Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * queryjumblefuncs.c
4 : * Query normalization and fingerprinting.
5 : *
6 : * Normalization is a process whereby similar queries, typically differing only
7 : * in their constants (though the exact rules are somewhat more subtle than
8 : * that) are recognized as equivalent, and are tracked as a single entry. This
9 : * is particularly useful for non-prepared queries.
10 : *
11 : * Normalization is implemented by fingerprinting queries, selectively
12 : * serializing those fields of each query tree's nodes that are judged to be
13 : * essential to the query. This is referred to as a query jumble. This is
14 : * distinct from a regular serialization in that various extraneous
15 : * information is ignored as irrelevant or not essential to the query, such
16 : * as the collations of Vars and, most notably, the values of constants.
17 : *
18 : * This jumble is acquired at the end of parse analysis of each query, and
19 : * a 64-bit hash of it is stored into the query's Query.queryId field.
20 : * The server then copies this value around, making it available in plan
21 : * tree(s) generated from the query. The executor can then use this value
22 : * to blame query costs on the proper queryId.
23 : *
24 : * Arrays of two or more constants and PARAM_EXTERN parameters are "squashed"
25 : * and contribute only once to the jumble. This has the effect that queries
26 : * that differ only on the length of such lists have the same queryId.
27 : *
28 : *
29 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
30 : * Portions Copyright (c) 1994, Regents of the University of California
31 : *
32 : *
33 : * IDENTIFICATION
34 : * src/backend/nodes/queryjumblefuncs.c
35 : *
36 : *-------------------------------------------------------------------------
37 : */
38 : #include "postgres.h"
39 :
40 : #include "access/transam.h"
41 : #include "catalog/pg_proc.h"
42 : #include "common/hashfn.h"
43 : #include "common/int.h"
44 : #include "miscadmin.h"
45 : #include "nodes/nodeFuncs.h"
46 : #include "nodes/queryjumble.h"
47 : #include "utils/lsyscache.h"
48 : #include "parser/scanner.h"
49 : #include "parser/scansup.h"
50 :
51 : #define JUMBLE_SIZE 1024 /* query serialization buffer size */
52 :
53 : /* GUC parameters */
54 : int compute_query_id = COMPUTE_QUERY_ID_AUTO;
55 :
56 : /*
57 : * True when compute_query_id is ON or AUTO, and a module requests them.
58 : *
59 : * Note that IsQueryIdEnabled() should be used instead of checking
60 : * query_id_enabled or compute_query_id directly when we want to know
61 : * whether query identifiers are computed in the core or not.
62 : */
63 : bool query_id_enabled = false;
64 :
65 : static JumbleState *InitJumble(void);
66 : static int64 DoJumble(JumbleState *jstate, Node *node);
67 : static void AppendJumble(JumbleState *jstate,
68 : const unsigned char *value, Size size);
69 : static void FlushPendingNulls(JumbleState *jstate);
70 : static void RecordConstLocation(JumbleState *jstate,
71 : bool extern_param,
72 : int location, int len);
73 : static void _jumbleNode(JumbleState *jstate, Node *node);
74 : static void _jumbleList(JumbleState *jstate, Node *node);
75 : static void _jumbleElements(JumbleState *jstate, List *elements, Node *node);
76 : static void _jumbleParam(JumbleState *jstate, Node *node);
77 : static void _jumbleA_Const(JumbleState *jstate, Node *node);
78 : static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
79 : static void _jumbleRangeTblEntry_eref(JumbleState *jstate,
80 : RangeTblEntry *rte,
81 : Alias *expr);
82 :
83 : /*
84 : * Given a possibly multi-statement source string, confine our attention to the
85 : * relevant part of the string.
86 : */
87 : const char *
88 104208 : CleanQuerytext(const char *query, int *location, int *len)
89 : {
90 104208 : int query_location = *location;
91 104208 : int query_len = *len;
92 :
93 : /* First apply starting offset, unless it's -1 (unknown). */
94 104208 : if (query_location >= 0)
95 : {
96 : Assert(query_location <= strlen(query));
97 104017 : query += query_location;
98 : /* Length of 0 (or -1) means "rest of string" */
99 104017 : if (query_len <= 0)
100 16635 : query_len = strlen(query);
101 : else
102 : Assert(query_len <= strlen(query));
103 : }
104 : else
105 : {
106 : /* If query location is unknown, distrust query_len as well */
107 191 : query_location = 0;
108 191 : query_len = strlen(query);
109 : }
110 :
111 : /*
112 : * Discard leading and trailing whitespace, too. Use scanner_isspace()
113 : * not libc's isspace(), because we want to match the lexer's behavior.
114 : *
115 : * Note: the parser now strips leading comments and whitespace from the
116 : * reported stmt_location, so this first loop will only iterate in the
117 : * unusual case that the location didn't propagate to here. But the
118 : * statement length will extend to the end-of-string or terminating
119 : * semicolon, so the second loop often does something useful.
120 : */
121 104209 : while (query_len > 0 && scanner_isspace(query[0]))
122 1 : query++, query_location++, query_len--;
123 104972 : while (query_len > 0 && scanner_isspace(query[query_len - 1]))
124 764 : query_len--;
125 :
126 104208 : *location = query_location;
127 104208 : *len = query_len;
128 :
129 104208 : return query;
130 : }
131 :
132 : /*
133 : * JumbleQuery
134 : * Recursively process the given Query producing a 64-bit hash value by
135 : * hashing the relevant fields and record that value in the Query's queryId
136 : * field. Return the JumbleState object used for jumbling the query.
137 : */
138 : JumbleState *
139 84257 : JumbleQuery(Query *query)
140 : {
141 : JumbleState *jstate;
142 :
143 : Assert(IsQueryIdEnabled());
144 :
145 84257 : jstate = InitJumble();
146 :
147 84257 : query->queryId = DoJumble(jstate, (Node *) query);
148 :
149 : /*
150 : * If we are unlucky enough to get a hash of zero, use 1 instead for
151 : * normal statements and 2 for utility queries.
152 : */
153 84257 : if (query->queryId == INT64CONST(0))
154 : {
155 0 : if (query->utilityStmt)
156 0 : query->queryId = INT64CONST(2);
157 : else
158 0 : query->queryId = INT64CONST(1);
159 : }
160 :
161 84257 : return jstate;
162 : }
163 :
164 : /*
165 : * Enables query identifier computation.
166 : *
167 : * Third-party plugins can use this function to inform core that they require
168 : * a query identifier to be computed.
169 : */
170 : void
171 15 : EnableQueryId(void)
172 : {
173 15 : if (compute_query_id != COMPUTE_QUERY_ID_OFF)
174 15 : query_id_enabled = true;
175 15 : }
176 :
177 : /*
178 : * InitJumble
179 : * Allocate a JumbleState object and make it ready to jumble.
180 : */
181 : static JumbleState *
182 84257 : InitJumble(void)
183 : {
184 : JumbleState *jstate;
185 :
186 84257 : jstate = palloc_object(JumbleState);
187 :
188 : /* Set up workspace for query jumbling */
189 84257 : jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
190 84257 : jstate->jumble_len = 0;
191 84257 : jstate->clocations_buf_size = 32;
192 84257 : jstate->clocations = (LocationLen *) palloc(jstate->clocations_buf_size *
193 : sizeof(LocationLen));
194 84257 : jstate->clocations_count = 0;
195 84257 : jstate->highest_extern_param_id = 0;
196 84257 : jstate->pending_nulls = 0;
197 84257 : jstate->has_squashed_lists = false;
198 : #ifdef USE_ASSERT_CHECKING
199 : jstate->total_jumble_len = 0;
200 : #endif
201 :
202 84257 : return jstate;
203 : }
204 :
205 : /*
206 : * DoJumble
207 : * Jumble the given Node using the given JumbleState and return the resulting
208 : * jumble hash.
209 : */
210 : static int64
211 84257 : DoJumble(JumbleState *jstate, Node *node)
212 : {
213 : /* Jumble the given node */
214 84257 : _jumbleNode(jstate, node);
215 :
216 : /* Flush any pending NULLs before doing the final hash */
217 84257 : if (jstate->pending_nulls > 0)
218 83501 : FlushPendingNulls(jstate);
219 :
220 : /* Squashed list found, reset highest_extern_param_id */
221 84257 : if (jstate->has_squashed_lists)
222 1489 : jstate->highest_extern_param_id = 0;
223 :
224 : /* Process the jumble buffer and produce the hash value */
225 84257 : return DatumGetInt64(hash_any_extended(jstate->jumble,
226 84257 : jstate->jumble_len,
227 : 0));
228 : }
229 :
230 : /*
231 : * AppendJumbleInternal: Internal function for appending to the jumble buffer
232 : *
233 : * Note: Callers must ensure that size > 0.
234 : */
235 : static pg_attribute_always_inline void
236 6119369 : AppendJumbleInternal(JumbleState *jstate, const unsigned char *item,
237 : Size size)
238 : {
239 6119369 : unsigned char *jumble = jstate->jumble;
240 6119369 : Size jumble_len = jstate->jumble_len;
241 :
242 : /* Ensure the caller didn't mess up */
243 : Assert(size > 0);
244 :
245 : /*
246 : * Fast path for when there's enough space left in the buffer. This is
247 : * worthwhile as means the memcpy can be inlined into very efficient code
248 : * when 'size' is a compile-time constant.
249 : */
250 6119369 : if (likely(size <= JUMBLE_SIZE - jumble_len))
251 : {
252 6116059 : memcpy(jumble + jumble_len, item, size);
253 6116059 : jstate->jumble_len += size;
254 :
255 : #ifdef USE_ASSERT_CHECKING
256 : jstate->total_jumble_len += size;
257 : #endif
258 :
259 6116059 : return;
260 : }
261 :
262 : /*
263 : * Whenever the jumble buffer is full, we hash the current contents and
264 : * reset the buffer to contain just that hash value, thus relying on the
265 : * hash to summarize everything so far.
266 : */
267 : do
268 : {
269 : Size part_size;
270 :
271 5379 : if (unlikely(jumble_len >= JUMBLE_SIZE))
272 : {
273 : int64 start_hash;
274 :
275 3409 : start_hash = DatumGetInt64(hash_any_extended(jumble,
276 : JUMBLE_SIZE, 0));
277 3409 : memcpy(jumble, &start_hash, sizeof(start_hash));
278 3409 : jumble_len = sizeof(start_hash);
279 : }
280 5379 : part_size = Min(size, JUMBLE_SIZE - jumble_len);
281 5379 : memcpy(jumble + jumble_len, item, part_size);
282 5379 : jumble_len += part_size;
283 5379 : item += part_size;
284 5379 : size -= part_size;
285 :
286 : #ifdef USE_ASSERT_CHECKING
287 : jstate->total_jumble_len += part_size;
288 : #endif
289 5379 : } while (size > 0);
290 :
291 3310 : jstate->jumble_len = jumble_len;
292 : }
293 :
294 : /*
295 : * AppendJumble
296 : * Add 'size' bytes of the given jumble 'value' to the jumble state
297 : */
298 : static pg_noinline void
299 208538 : AppendJumble(JumbleState *jstate, const unsigned char *value, Size size)
300 : {
301 208538 : if (jstate->pending_nulls > 0)
302 31689 : FlushPendingNulls(jstate);
303 :
304 208538 : AppendJumbleInternal(jstate, value, size);
305 208538 : }
306 :
307 : /*
308 : * AppendJumbleNull
309 : * For jumbling NULL pointers
310 : */
311 : static pg_attribute_always_inline void
312 3072592 : AppendJumbleNull(JumbleState *jstate)
313 : {
314 3072592 : jstate->pending_nulls++;
315 3072592 : }
316 :
317 : /*
318 : * AppendJumble8
319 : * Add the first byte from the given 'value' pointer to the jumble state
320 : */
321 : static pg_noinline void
322 671488 : AppendJumble8(JumbleState *jstate, const unsigned char *value)
323 : {
324 671488 : if (jstate->pending_nulls > 0)
325 224476 : FlushPendingNulls(jstate);
326 :
327 671488 : AppendJumbleInternal(jstate, value, 1);
328 671488 : }
329 :
330 : /*
331 : * AppendJumble16
332 : * Add the first 2 bytes from the given 'value' pointer to the jumble
333 : * state.
334 : */
335 : static pg_noinline void
336 439548 : AppendJumble16(JumbleState *jstate, const unsigned char *value)
337 : {
338 439548 : if (jstate->pending_nulls > 0)
339 20100 : FlushPendingNulls(jstate);
340 :
341 439548 : AppendJumbleInternal(jstate, value, 2);
342 439548 : }
343 :
344 : /*
345 : * AppendJumble32
346 : * Add the first 4 bytes from the given 'value' pointer to the jumble
347 : * state.
348 : */
349 : static pg_noinline void
350 3810230 : AppendJumble32(JumbleState *jstate, const unsigned char *value)
351 : {
352 3810230 : if (jstate->pending_nulls > 0)
353 629799 : FlushPendingNulls(jstate);
354 :
355 3810230 : AppendJumbleInternal(jstate, value, 4);
356 3810230 : }
357 :
358 : /*
359 : * AppendJumble64
360 : * Add the first 8 bytes from the given 'value' pointer to the jumble
361 : * state.
362 : */
363 : static pg_noinline void
364 0 : AppendJumble64(JumbleState *jstate, const unsigned char *value)
365 : {
366 0 : if (jstate->pending_nulls > 0)
367 0 : FlushPendingNulls(jstate);
368 :
369 0 : AppendJumbleInternal(jstate, value, 8);
370 0 : }
371 :
372 : /*
373 : * FlushPendingNulls
374 : * Incorporate the pending_nulls value into the jumble buffer.
375 : *
376 : * Note: Callers must ensure that there's at least 1 pending NULL.
377 : */
378 : static pg_attribute_always_inline void
379 989565 : FlushPendingNulls(JumbleState *jstate)
380 : {
381 : Assert(jstate->pending_nulls > 0);
382 :
383 989565 : AppendJumbleInternal(jstate,
384 989565 : (const unsigned char *) &jstate->pending_nulls, 4);
385 989565 : jstate->pending_nulls = 0;
386 989565 : }
387 :
388 :
389 : /*
390 : * Record the location of some kind of constant within a query string.
391 : * These are not only bare constants but also expressions that ultimately
392 : * constitute a constant, such as those inside casts and simple function
393 : * calls; if extern_param, then it corresponds to a PARAM_EXTERN Param.
394 : *
395 : * If length is -1, it indicates a single such constant element. If
396 : * it's a positive integer, it indicates the length of a squashable
397 : * list of them.
398 : */
399 : static void
400 136983 : RecordConstLocation(JumbleState *jstate, bool extern_param, int location, int len)
401 : {
402 : /* -1 indicates unknown or undefined location */
403 136983 : if (location >= 0)
404 : {
405 : /* enlarge array if needed */
406 129066 : if (jstate->clocations_count >= jstate->clocations_buf_size)
407 : {
408 78 : jstate->clocations_buf_size *= 2;
409 78 : jstate->clocations = (LocationLen *)
410 78 : repalloc(jstate->clocations,
411 78 : jstate->clocations_buf_size *
412 : sizeof(LocationLen));
413 : }
414 129066 : jstate->clocations[jstate->clocations_count].location = location;
415 :
416 : /*
417 : * Lengths are either positive integers (indicating a squashable
418 : * list), or -1.
419 : */
420 : Assert(len > -1 || len == -1);
421 129066 : jstate->clocations[jstate->clocations_count].length = len;
422 129066 : jstate->clocations[jstate->clocations_count].squashed = (len > -1);
423 129066 : jstate->clocations[jstate->clocations_count].extern_param = extern_param;
424 129066 : jstate->clocations_count++;
425 : }
426 136983 : }
427 :
428 : /*
429 : * Subroutine for _jumbleElements: Verify a few simple cases where we can
430 : * deduce that the expression is a constant:
431 : *
432 : * - See through any wrapping RelabelType and CoerceViaIO layers.
433 : * - If it's a FuncExpr, check that the function is a builtin
434 : * cast and its arguments are Const.
435 : * - Otherwise test if the expression is a simple Const or a
436 : * PARAM_EXTERN param.
437 : */
438 : static bool
439 6715 : IsSquashableConstant(Node *element)
440 : {
441 299 : restart:
442 7014 : switch (nodeTag(element))
443 : {
444 229 : case T_RelabelType:
445 : /* Unwrap RelabelType */
446 229 : element = (Node *) ((RelabelType *) element)->arg;
447 229 : goto restart;
448 :
449 70 : case T_CoerceViaIO:
450 : /* Unwrap CoerceViaIO */
451 70 : element = (Node *) ((CoerceViaIO *) element)->arg;
452 70 : goto restart;
453 :
454 6204 : case T_Const:
455 6204 : return true;
456 :
457 80 : case T_Param:
458 80 : return castNode(Param, element)->paramkind == PARAM_EXTERN;
459 :
460 324 : case T_FuncExpr:
461 : {
462 324 : FuncExpr *func = (FuncExpr *) element;
463 : ListCell *temp;
464 :
465 324 : if (func->funcformat != COERCE_IMPLICIT_CAST &&
466 208 : func->funcformat != COERCE_EXPLICIT_CAST)
467 145 : return false;
468 :
469 179 : if (func->funcid > FirstGenbkiObjectId)
470 0 : return false;
471 :
472 : /*
473 : * We can check function arguments recursively, being careful
474 : * about recursing too deep. At each recursion level it's
475 : * enough to test the stack on the first element. (Note that
476 : * I wasn't able to hit this without bloating the stack
477 : * artificially in this function: the parser errors out before
478 : * stack size becomes a problem here.)
479 : */
480 355 : foreach(temp, func->args)
481 : {
482 179 : Node *arg = lfirst(temp);
483 :
484 179 : if (!IsA(arg, Const))
485 : {
486 14 : if (foreach_current_index(temp) == 0 &&
487 7 : stack_is_too_deep())
488 3 : return false;
489 7 : else if (!IsSquashableConstant(arg))
490 3 : return false;
491 : }
492 : }
493 :
494 176 : return true;
495 : }
496 :
497 107 : default:
498 107 : return false;
499 : }
500 : }
501 :
502 : /*
503 : * Subroutine for _jumbleElements: Verify whether the provided list
504 : * can be squashed, meaning it contains only constant expressions.
505 : *
506 : * Return value indicates if squashing is possible.
507 : *
508 : * Note that this function searches only for explicit Const nodes with
509 : * possibly very simple decorations on top and PARAM_EXTERN parameters,
510 : * and does not try to simplify expressions.
511 : */
512 : static bool
513 2559 : IsSquashableConstantList(List *elements)
514 : {
515 : ListCell *temp;
516 :
517 : /* If the list is too short, we don't try to squash it. */
518 2559 : if (list_length(elements) < 2)
519 252 : return false;
520 :
521 8763 : foreach(temp, elements)
522 : {
523 6708 : if (!IsSquashableConstant(lfirst(temp)))
524 252 : return false;
525 : }
526 :
527 2055 : return true;
528 : }
529 :
530 : #define JUMBLE_NODE(item) \
531 : _jumbleNode(jstate, (Node *) expr->item)
532 : #define JUMBLE_ELEMENTS(list, node) \
533 : _jumbleElements(jstate, (List *) expr->list, node)
534 : #define JUMBLE_LOCATION(location) \
535 : RecordConstLocation(jstate, false, expr->location, -1)
536 : #define JUMBLE_FIELD(item) \
537 : do { \
538 : if (sizeof(expr->item) == 8) \
539 : AppendJumble64(jstate, (const unsigned char *) &(expr->item)); \
540 : else if (sizeof(expr->item) == 4) \
541 : AppendJumble32(jstate, (const unsigned char *) &(expr->item)); \
542 : else if (sizeof(expr->item) == 2) \
543 : AppendJumble16(jstate, (const unsigned char *) &(expr->item)); \
544 : else if (sizeof(expr->item) == 1) \
545 : AppendJumble8(jstate, (const unsigned char *) &(expr->item)); \
546 : else \
547 : AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item)); \
548 : } while (0)
549 : #define JUMBLE_STRING(str) \
550 : do { \
551 : if (expr->str) \
552 : AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
553 : else \
554 : AppendJumbleNull(jstate); \
555 : } while(0)
556 : /* Function name used for the node field attribute custom_query_jumble. */
557 : #define JUMBLE_CUSTOM(nodetype, item) \
558 : _jumble##nodetype##_##item(jstate, expr, expr->item)
559 :
560 : #include "queryjumblefuncs.funcs.c"
561 :
562 : static void
563 4531443 : _jumbleNode(JumbleState *jstate, Node *node)
564 : {
565 4531443 : Node *expr = node;
566 : #ifdef USE_ASSERT_CHECKING
567 : Size prev_jumble_len = jstate->total_jumble_len;
568 : #endif
569 :
570 4531443 : if (expr == NULL)
571 : {
572 2805280 : AppendJumbleNull(jstate);
573 2805280 : return;
574 : }
575 :
576 : /* Guard against stack overflow due to overly complex expressions */
577 1726163 : check_stack_depth();
578 :
579 : /*
580 : * We always emit the node's NodeTag, then any additional fields that are
581 : * considered significant, and then we recurse to any child nodes.
582 : */
583 1726163 : JUMBLE_FIELD(type);
584 :
585 1726163 : switch (nodeTag(expr))
586 : {
587 : #include "queryjumblefuncs.switch.c"
588 :
589 416562 : case T_List:
590 : case T_IntList:
591 : case T_OidList:
592 : case T_XidList:
593 416562 : _jumbleList(jstate, expr);
594 416562 : break;
595 :
596 0 : default:
597 : /* Only a warning, since we can stumble along anyway */
598 0 : elog(WARNING, "unrecognized node type: %d",
599 : (int) nodeTag(expr));
600 0 : break;
601 : }
602 :
603 : /* Ensure we added something to the jumble buffer */
604 : Assert(jstate->total_jumble_len > prev_jumble_len);
605 : }
606 :
607 : static void
608 416562 : _jumbleList(JumbleState *jstate, Node *node)
609 : {
610 416562 : List *expr = (List *) node;
611 : ListCell *l;
612 :
613 416562 : switch (expr->type)
614 : {
615 415997 : case T_List:
616 1165359 : foreach(l, expr)
617 749362 : _jumbleNode(jstate, lfirst(l));
618 415997 : break;
619 565 : case T_IntList:
620 1242 : foreach(l, expr)
621 677 : AppendJumble32(jstate, (const unsigned char *) &lfirst_int(l));
622 565 : break;
623 0 : case T_OidList:
624 0 : foreach(l, expr)
625 0 : AppendJumble32(jstate, (const unsigned char *) &lfirst_oid(l));
626 0 : break;
627 0 : case T_XidList:
628 0 : foreach(l, expr)
629 0 : AppendJumble32(jstate, (const unsigned char *) &lfirst_xid(l));
630 0 : break;
631 0 : default:
632 0 : elog(ERROR, "unrecognized list node type: %d",
633 : (int) expr->type);
634 : return;
635 : }
636 : }
637 :
638 : /*
639 : * We try to jumble lists of expressions as one individual item regardless
640 : * of how many elements are in the list. This is know as squashing, which
641 : * results in different queries jumbling to the same query_id, if the only
642 : * difference is the number of elements in the list.
643 : *
644 : * We allow constants and PARAM_EXTERN parameters to be squashed. To normalize
645 : * such queries, we use the start and end locations of the list of elements in
646 : * a list.
647 : */
648 : static void
649 2559 : _jumbleElements(JumbleState *jstate, List *elements, Node *node)
650 : {
651 2559 : bool normalize_list = false;
652 :
653 2559 : if (IsSquashableConstantList(elements))
654 : {
655 2055 : if (IsA(node, ArrayExpr))
656 : {
657 2055 : ArrayExpr *aexpr = (ArrayExpr *) node;
658 :
659 2055 : if (aexpr->list_start > 0 && aexpr->list_end > 0)
660 : {
661 2003 : RecordConstLocation(jstate,
662 : false,
663 2003 : aexpr->list_start + 1,
664 2003 : (aexpr->list_end - aexpr->list_start) - 1);
665 2003 : normalize_list = true;
666 2003 : jstate->has_squashed_lists = true;
667 : }
668 : }
669 : }
670 :
671 2559 : if (!normalize_list)
672 : {
673 556 : _jumbleNode(jstate, (Node *) elements);
674 : }
675 2559 : }
676 :
677 : /*
678 : * We store the highest param ID of extern params. This can later be used
679 : * to start the numbering of the placeholder for squashed lists.
680 : */
681 : static void
682 5837 : _jumbleParam(JumbleState *jstate, Node *node)
683 : {
684 5837 : Param *expr = (Param *) node;
685 :
686 5837 : JUMBLE_FIELD(paramkind);
687 5837 : JUMBLE_FIELD(paramid);
688 5837 : JUMBLE_FIELD(paramtype);
689 : /* paramtypmod and paramcollid are ignored */
690 :
691 5837 : if (expr->paramkind == PARAM_EXTERN)
692 : {
693 : /*
694 : * At this point, only external parameter locations outside of
695 : * squashable lists will be recorded.
696 : */
697 4879 : RecordConstLocation(jstate, true, expr->location, -1);
698 :
699 : /*
700 : * Update the highest Param id seen, in order to start normalization
701 : * correctly.
702 : *
703 : * Note: This value is reset at the end of jumbling if there exists a
704 : * squashable list. See the comment in the definition of JumbleState.
705 : */
706 4879 : if (expr->paramid > jstate->highest_extern_param_id)
707 4207 : jstate->highest_extern_param_id = expr->paramid;
708 : }
709 5837 : }
710 :
711 : static void
712 9973 : _jumbleA_Const(JumbleState *jstate, Node *node)
713 : {
714 9973 : A_Const *expr = (A_Const *) node;
715 :
716 9973 : JUMBLE_FIELD(isnull);
717 9973 : if (!expr->isnull)
718 : {
719 9881 : JUMBLE_FIELD(val.node.type);
720 9881 : switch (nodeTag(&expr->val))
721 : {
722 4626 : case T_Integer:
723 4626 : JUMBLE_FIELD(val.ival.ival);
724 4626 : break;
725 30 : case T_Float:
726 30 : JUMBLE_STRING(val.fval.fval);
727 30 : break;
728 138 : case T_Boolean:
729 138 : JUMBLE_FIELD(val.boolval.boolval);
730 138 : break;
731 5085 : case T_String:
732 5085 : JUMBLE_STRING(val.sval.sval);
733 5085 : break;
734 2 : case T_BitString:
735 2 : JUMBLE_STRING(val.bsval.bsval);
736 2 : break;
737 0 : default:
738 0 : elog(ERROR, "unrecognized node type: %d",
739 : (int) nodeTag(&expr->val));
740 : break;
741 : }
742 : }
743 9973 : }
744 :
745 : static void
746 2925 : _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
747 : {
748 2925 : VariableSetStmt *expr = (VariableSetStmt *) node;
749 :
750 2925 : JUMBLE_FIELD(kind);
751 2925 : JUMBLE_STRING(name);
752 :
753 : /*
754 : * Account for the list of arguments in query jumbling only if told by the
755 : * parser.
756 : */
757 2925 : if (expr->jumble_args)
758 60 : JUMBLE_NODE(args);
759 2925 : JUMBLE_FIELD(is_local);
760 2925 : JUMBLE_LOCATION(location);
761 2925 : }
762 :
763 : /*
764 : * Custom query jumble function for RangeTblEntry.eref.
765 : */
766 : static void
767 85310 : _jumbleRangeTblEntry_eref(JumbleState *jstate,
768 : RangeTblEntry *rte,
769 : Alias *expr)
770 : {
771 85310 : JUMBLE_FIELD(type);
772 :
773 : /*
774 : * This includes only the table name, the list of column names is ignored.
775 : */
776 85310 : JUMBLE_STRING(aliasname);
777 85310 : }
778 :
779 : /*
780 : * CompLocation: comparator for qsorting LocationLen structs by location
781 : */
782 : static int
783 38679 : CompLocation(const void *a, const void *b)
784 : {
785 38679 : int l = ((const LocationLen *) a)->location;
786 38679 : int r = ((const LocationLen *) b)->location;
787 :
788 38679 : return pg_cmp_s32(l, r);
789 : }
790 :
791 : /*
792 : * Given a valid SQL string and an array of constant-location records, return
793 : * the textual lengths of those constants in a newly allocated LocationLen
794 : * array, or NULL if there are no constants.
795 : *
796 : * The constants may use any allowed constant syntax, such as float literals,
797 : * bit-strings, single-quoted strings and dollar-quoted strings. This is
798 : * accomplished by using the public API for the core scanner.
799 : *
800 : * It is the caller's job to ensure that the string is a valid SQL statement
801 : * with constants at the indicated locations. Since in practice the string
802 : * has already been parsed, and the locations that the caller provides will
803 : * have originated from within the authoritative parser, this should not be
804 : * a problem.
805 : *
806 : * Multiple constants can have the same location. We reset lengths of those
807 : * past the first to -1 so that they can later be ignored.
808 : *
809 : * If query_loc > 0, then "query" has been advanced by that much compared to
810 : * the original string start, as is the case with multi-statement strings, so
811 : * we need to translate the provided locations to compensate. (This lets us
812 : * avoid re-scanning statements before the one of interest, so it's worth
813 : * doing.)
814 : *
815 : * N.B. There is an assumption that a '-' character at a Const location begins
816 : * a negative numeric constant. This precludes there ever being another
817 : * reason for a constant to start with a '-'.
818 : *
819 : * It is the caller's responsibility to free the result, if necessary.
820 : */
821 : LocationLen *
822 11403 : ComputeConstantLengths(const JumbleState *jstate, const char *query,
823 : int query_loc)
824 : {
825 : LocationLen *locs;
826 : core_yyscan_t yyscanner;
827 : core_yy_extra_type yyextra;
828 : core_YYSTYPE yylval;
829 : YYLTYPE yylloc;
830 :
831 11403 : if (jstate->clocations_count == 0)
832 0 : return NULL;
833 :
834 : /* Copy constant locations to avoid modifying jstate */
835 11403 : locs = palloc_array(LocationLen, jstate->clocations_count);
836 11403 : memcpy(locs, jstate->clocations, jstate->clocations_count * sizeof(LocationLen));
837 :
838 : /*
839 : * Sort the records by location so that we can process them in order while
840 : * scanning the query text.
841 : */
842 11403 : if (jstate->clocations_count > 1)
843 7228 : qsort(locs, jstate->clocations_count,
844 : sizeof(LocationLen), CompLocation);
845 :
846 : /* initialize the flex scanner --- should match raw_parser() */
847 11403 : yyscanner = scanner_init(query,
848 : &yyextra,
849 : &ScanKeywords,
850 : ScanKeywordTokens);
851 :
852 : /* Search for each constant, in sequence */
853 45757 : for (int i = 0; i < jstate->clocations_count; i++)
854 : {
855 : int loc;
856 : int tok;
857 :
858 : /* Ignore constants after the first one in the same location */
859 34354 : if (i > 0 && locs[i].location == locs[i - 1].location)
860 : {
861 508 : locs[i].length = -1;
862 508 : continue;
863 : }
864 :
865 33846 : if (locs[i].squashed)
866 665 : continue; /* squashable list, ignore */
867 :
868 : /*
869 : * Adjust the constant's location using the provided starting location
870 : * of the current statement. This allows us to avoid scanning a
871 : * multi-statement string from the beginning.
872 : */
873 33181 : loc = locs[i].location - query_loc;
874 : Assert(loc >= 0);
875 :
876 : /*
877 : * We have a valid location for a constant that's not a dupe. Lex
878 : * tokens until we find the desired constant.
879 : */
880 : for (;;)
881 : {
882 255152 : tok = core_yylex(&yylval, &yylloc, yyscanner);
883 :
884 : /* We should not hit end-of-string, but if we do, behave sanely */
885 255152 : if (tok == 0)
886 0 : break; /* out of inner for-loop */
887 :
888 : /*
889 : * We should find the token position exactly, but if we somehow
890 : * run past it, work with that.
891 : */
892 255152 : if (yylloc >= loc)
893 : {
894 33181 : if (query[loc] == '-')
895 : {
896 : /*
897 : * It's a negative value - this is the one and only case
898 : * where we replace more than a single token.
899 : *
900 : * Do not compensate for the special-case adjustment of
901 : * location to that of the leading '-' operator in the
902 : * event of a negative constant (see doNegate() in
903 : * gram.y). It is also useful for our purposes to start
904 : * from the minus symbol. In this way, queries like
905 : * "select * from foo where bar = 1" and "select * from
906 : * foo where bar = -2" can be treated similarly.
907 : */
908 379 : tok = core_yylex(&yylval, &yylloc, yyscanner);
909 379 : if (tok == 0)
910 0 : break; /* out of inner for-loop */
911 : }
912 :
913 : /*
914 : * We now rely on the assumption that flex has placed a zero
915 : * byte after the text of the current token in scanbuf.
916 : */
917 33181 : locs[i].length = strlen(yyextra.scanbuf + loc);
918 33181 : break; /* out of inner for-loop */
919 : }
920 : }
921 :
922 : /* If we hit end-of-string, give up, leaving remaining lengths -1 */
923 33181 : if (tok == 0)
924 0 : break;
925 : }
926 :
927 11403 : scanner_finish(yyscanner);
928 :
929 11403 : return locs;
930 : }
|