LCOV - code coverage report
Current view: top level - src/backend/nodes - queryjumblefuncs.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 89.9 % 278 250
Test Date: 2026-04-16 16:16:26 Functions: 96.0 % 25 24
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * queryjumblefuncs.c
       4              :  *   Query normalization and fingerprinting.
       5              :  *
       6              :  * Normalization is a process whereby similar queries, typically differing only
       7              :  * in their constants (though the exact rules are somewhat more subtle than
       8              :  * that) are recognized as equivalent, and are tracked as a single entry.  This
       9              :  * is particularly useful for non-prepared queries.
      10              :  *
      11              :  * Normalization is implemented by fingerprinting queries, selectively
      12              :  * serializing those fields of each query tree's nodes that are judged to be
      13              :  * essential to the query.  This is referred to as a query jumble.  This is
      14              :  * distinct from a regular serialization in that various extraneous
      15              :  * information is ignored as irrelevant or not essential to the query, such
      16              :  * as the collations of Vars and, most notably, the values of constants.
      17              :  *
      18              :  * This jumble is acquired at the end of parse analysis of each query, and
      19              :  * a 64-bit hash of it is stored into the query's Query.queryId field.
      20              :  * The server then copies this value around, making it available in plan
      21              :  * tree(s) generated from the query.  The executor can then use this value
      22              :  * to blame query costs on the proper queryId.
      23              :  *
      24              :  * Arrays of two or more constants and PARAM_EXTERN parameters are "squashed"
      25              :  * and contribute only once to the jumble.  This has the effect that queries
      26              :  * that differ only on the length of such lists have the same queryId.
      27              :  *
      28              :  *
      29              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      30              :  * Portions Copyright (c) 1994, Regents of the University of California
      31              :  *
      32              :  *
      33              :  * IDENTIFICATION
      34              :  *    src/backend/nodes/queryjumblefuncs.c
      35              :  *
      36              :  *-------------------------------------------------------------------------
      37              :  */
      38              : #include "postgres.h"
      39              : 
      40              : #include "access/transam.h"
      41              : #include "catalog/pg_proc.h"
      42              : #include "common/hashfn.h"
      43              : #include "common/int.h"
      44              : #include "miscadmin.h"
      45              : #include "nodes/nodeFuncs.h"
      46              : #include "nodes/queryjumble.h"
      47              : #include "utils/lsyscache.h"
      48              : #include "parser/scanner.h"
      49              : #include "parser/scansup.h"
      50              : 
      51              : #define JUMBLE_SIZE             1024    /* query serialization buffer size */
      52              : 
      53              : /* GUC parameters */
      54              : int         compute_query_id = COMPUTE_QUERY_ID_AUTO;
      55              : 
      56              : /*
      57              :  * True when compute_query_id is ON or AUTO, and a module requests them.
      58              :  *
      59              :  * Note that IsQueryIdEnabled() should be used instead of checking
      60              :  * query_id_enabled or compute_query_id directly when we want to know
      61              :  * whether query identifiers are computed in the core or not.
      62              :  */
      63              : bool        query_id_enabled = false;
      64              : 
      65              : static JumbleState *InitJumble(void);
      66              : static int64 DoJumble(JumbleState *jstate, Node *node);
      67              : static void AppendJumble(JumbleState *jstate,
      68              :                          const unsigned char *value, Size size);
      69              : static void FlushPendingNulls(JumbleState *jstate);
      70              : static void RecordConstLocation(JumbleState *jstate,
      71              :                                 bool extern_param,
      72              :                                 int location, int len);
      73              : static void _jumbleNode(JumbleState *jstate, Node *node);
      74              : static void _jumbleList(JumbleState *jstate, Node *node);
      75              : static void _jumbleElements(JumbleState *jstate, List *elements, Node *node);
      76              : static void _jumbleParam(JumbleState *jstate, Node *node);
      77              : static void _jumbleA_Const(JumbleState *jstate, Node *node);
      78              : static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
      79              : static void _jumbleRangeTblEntry_eref(JumbleState *jstate,
      80              :                                       RangeTblEntry *rte,
      81              :                                       Alias *expr);
      82              : 
      83              : /*
      84              :  * Given a possibly multi-statement source string, confine our attention to the
      85              :  * relevant part of the string.
      86              :  */
      87              : const char *
      88       104208 : CleanQuerytext(const char *query, int *location, int *len)
      89              : {
      90       104208 :     int         query_location = *location;
      91       104208 :     int         query_len = *len;
      92              : 
      93              :     /* First apply starting offset, unless it's -1 (unknown). */
      94       104208 :     if (query_location >= 0)
      95              :     {
      96              :         Assert(query_location <= strlen(query));
      97       104017 :         query += query_location;
      98              :         /* Length of 0 (or -1) means "rest of string" */
      99       104017 :         if (query_len <= 0)
     100        16635 :             query_len = strlen(query);
     101              :         else
     102              :             Assert(query_len <= strlen(query));
     103              :     }
     104              :     else
     105              :     {
     106              :         /* If query location is unknown, distrust query_len as well */
     107          191 :         query_location = 0;
     108          191 :         query_len = strlen(query);
     109              :     }
     110              : 
     111              :     /*
     112              :      * Discard leading and trailing whitespace, too.  Use scanner_isspace()
     113              :      * not libc's isspace(), because we want to match the lexer's behavior.
     114              :      *
     115              :      * Note: the parser now strips leading comments and whitespace from the
     116              :      * reported stmt_location, so this first loop will only iterate in the
     117              :      * unusual case that the location didn't propagate to here.  But the
     118              :      * statement length will extend to the end-of-string or terminating
     119              :      * semicolon, so the second loop often does something useful.
     120              :      */
     121       104209 :     while (query_len > 0 && scanner_isspace(query[0]))
     122            1 :         query++, query_location++, query_len--;
     123       104972 :     while (query_len > 0 && scanner_isspace(query[query_len - 1]))
     124          764 :         query_len--;
     125              : 
     126       104208 :     *location = query_location;
     127       104208 :     *len = query_len;
     128              : 
     129       104208 :     return query;
     130              : }
     131              : 
     132              : /*
     133              :  * JumbleQuery
     134              :  *      Recursively process the given Query producing a 64-bit hash value by
     135              :  *      hashing the relevant fields and record that value in the Query's queryId
     136              :  *      field.  Return the JumbleState object used for jumbling the query.
     137              :  */
     138              : JumbleState *
     139        84257 : JumbleQuery(Query *query)
     140              : {
     141              :     JumbleState *jstate;
     142              : 
     143              :     Assert(IsQueryIdEnabled());
     144              : 
     145        84257 :     jstate = InitJumble();
     146              : 
     147        84257 :     query->queryId = DoJumble(jstate, (Node *) query);
     148              : 
     149              :     /*
     150              :      * If we are unlucky enough to get a hash of zero, use 1 instead for
     151              :      * normal statements and 2 for utility queries.
     152              :      */
     153        84257 :     if (query->queryId == INT64CONST(0))
     154              :     {
     155            0 :         if (query->utilityStmt)
     156            0 :             query->queryId = INT64CONST(2);
     157              :         else
     158            0 :             query->queryId = INT64CONST(1);
     159              :     }
     160              : 
     161        84257 :     return jstate;
     162              : }
     163              : 
     164              : /*
     165              :  * Enables query identifier computation.
     166              :  *
     167              :  * Third-party plugins can use this function to inform core that they require
     168              :  * a query identifier to be computed.
     169              :  */
     170              : void
     171           15 : EnableQueryId(void)
     172              : {
     173           15 :     if (compute_query_id != COMPUTE_QUERY_ID_OFF)
     174           15 :         query_id_enabled = true;
     175           15 : }
     176              : 
     177              : /*
     178              :  * InitJumble
     179              :  *      Allocate a JumbleState object and make it ready to jumble.
     180              :  */
     181              : static JumbleState *
     182        84257 : InitJumble(void)
     183              : {
     184              :     JumbleState *jstate;
     185              : 
     186        84257 :     jstate = palloc_object(JumbleState);
     187              : 
     188              :     /* Set up workspace for query jumbling */
     189        84257 :     jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
     190        84257 :     jstate->jumble_len = 0;
     191        84257 :     jstate->clocations_buf_size = 32;
     192        84257 :     jstate->clocations = (LocationLen *) palloc(jstate->clocations_buf_size *
     193              :                                                 sizeof(LocationLen));
     194        84257 :     jstate->clocations_count = 0;
     195        84257 :     jstate->highest_extern_param_id = 0;
     196        84257 :     jstate->pending_nulls = 0;
     197        84257 :     jstate->has_squashed_lists = false;
     198              : #ifdef USE_ASSERT_CHECKING
     199              :     jstate->total_jumble_len = 0;
     200              : #endif
     201              : 
     202        84257 :     return jstate;
     203              : }
     204              : 
     205              : /*
     206              :  * DoJumble
     207              :  *      Jumble the given Node using the given JumbleState and return the resulting
     208              :  *      jumble hash.
     209              :  */
     210              : static int64
     211        84257 : DoJumble(JumbleState *jstate, Node *node)
     212              : {
     213              :     /* Jumble the given node */
     214        84257 :     _jumbleNode(jstate, node);
     215              : 
     216              :     /* Flush any pending NULLs before doing the final hash */
     217        84257 :     if (jstate->pending_nulls > 0)
     218        83501 :         FlushPendingNulls(jstate);
     219              : 
     220              :     /* Squashed list found, reset highest_extern_param_id */
     221        84257 :     if (jstate->has_squashed_lists)
     222         1489 :         jstate->highest_extern_param_id = 0;
     223              : 
     224              :     /* Process the jumble buffer and produce the hash value */
     225        84257 :     return DatumGetInt64(hash_any_extended(jstate->jumble,
     226        84257 :                                            jstate->jumble_len,
     227              :                                            0));
     228              : }
     229              : 
     230              : /*
     231              :  * AppendJumbleInternal: Internal function for appending to the jumble buffer
     232              :  *
     233              :  * Note: Callers must ensure that size > 0.
     234              :  */
     235              : static pg_attribute_always_inline void
     236      6119369 : AppendJumbleInternal(JumbleState *jstate, const unsigned char *item,
     237              :                      Size size)
     238              : {
     239      6119369 :     unsigned char *jumble = jstate->jumble;
     240      6119369 :     Size        jumble_len = jstate->jumble_len;
     241              : 
     242              :     /* Ensure the caller didn't mess up */
     243              :     Assert(size > 0);
     244              : 
     245              :     /*
     246              :      * Fast path for when there's enough space left in the buffer.  This is
     247              :      * worthwhile as means the memcpy can be inlined into very efficient code
     248              :      * when 'size' is a compile-time constant.
     249              :      */
     250      6119369 :     if (likely(size <= JUMBLE_SIZE - jumble_len))
     251              :     {
     252      6116059 :         memcpy(jumble + jumble_len, item, size);
     253      6116059 :         jstate->jumble_len += size;
     254              : 
     255              : #ifdef USE_ASSERT_CHECKING
     256              :         jstate->total_jumble_len += size;
     257              : #endif
     258              : 
     259      6116059 :         return;
     260              :     }
     261              : 
     262              :     /*
     263              :      * Whenever the jumble buffer is full, we hash the current contents and
     264              :      * reset the buffer to contain just that hash value, thus relying on the
     265              :      * hash to summarize everything so far.
     266              :      */
     267              :     do
     268              :     {
     269              :         Size        part_size;
     270              : 
     271         5379 :         if (unlikely(jumble_len >= JUMBLE_SIZE))
     272              :         {
     273              :             int64       start_hash;
     274              : 
     275         3409 :             start_hash = DatumGetInt64(hash_any_extended(jumble,
     276              :                                                          JUMBLE_SIZE, 0));
     277         3409 :             memcpy(jumble, &start_hash, sizeof(start_hash));
     278         3409 :             jumble_len = sizeof(start_hash);
     279              :         }
     280         5379 :         part_size = Min(size, JUMBLE_SIZE - jumble_len);
     281         5379 :         memcpy(jumble + jumble_len, item, part_size);
     282         5379 :         jumble_len += part_size;
     283         5379 :         item += part_size;
     284         5379 :         size -= part_size;
     285              : 
     286              : #ifdef USE_ASSERT_CHECKING
     287              :         jstate->total_jumble_len += part_size;
     288              : #endif
     289         5379 :     } while (size > 0);
     290              : 
     291         3310 :     jstate->jumble_len = jumble_len;
     292              : }
     293              : 
     294              : /*
     295              :  * AppendJumble
     296              :  *      Add 'size' bytes of the given jumble 'value' to the jumble state
     297              :  */
     298              : static pg_noinline void
     299       208538 : AppendJumble(JumbleState *jstate, const unsigned char *value, Size size)
     300              : {
     301       208538 :     if (jstate->pending_nulls > 0)
     302        31689 :         FlushPendingNulls(jstate);
     303              : 
     304       208538 :     AppendJumbleInternal(jstate, value, size);
     305       208538 : }
     306              : 
     307              : /*
     308              :  * AppendJumbleNull
     309              :  *      For jumbling NULL pointers
     310              :  */
     311              : static pg_attribute_always_inline void
     312      3072592 : AppendJumbleNull(JumbleState *jstate)
     313              : {
     314      3072592 :     jstate->pending_nulls++;
     315      3072592 : }
     316              : 
     317              : /*
     318              :  * AppendJumble8
     319              :  *      Add the first byte from the given 'value' pointer to the jumble state
     320              :  */
     321              : static pg_noinline void
     322       671488 : AppendJumble8(JumbleState *jstate, const unsigned char *value)
     323              : {
     324       671488 :     if (jstate->pending_nulls > 0)
     325       224476 :         FlushPendingNulls(jstate);
     326              : 
     327       671488 :     AppendJumbleInternal(jstate, value, 1);
     328       671488 : }
     329              : 
     330              : /*
     331              :  * AppendJumble16
     332              :  *      Add the first 2 bytes from the given 'value' pointer to the jumble
     333              :  *      state.
     334              :  */
     335              : static pg_noinline void
     336       439548 : AppendJumble16(JumbleState *jstate, const unsigned char *value)
     337              : {
     338       439548 :     if (jstate->pending_nulls > 0)
     339        20100 :         FlushPendingNulls(jstate);
     340              : 
     341       439548 :     AppendJumbleInternal(jstate, value, 2);
     342       439548 : }
     343              : 
     344              : /*
     345              :  * AppendJumble32
     346              :  *      Add the first 4 bytes from the given 'value' pointer to the jumble
     347              :  *      state.
     348              :  */
     349              : static pg_noinline void
     350      3810230 : AppendJumble32(JumbleState *jstate, const unsigned char *value)
     351              : {
     352      3810230 :     if (jstate->pending_nulls > 0)
     353       629799 :         FlushPendingNulls(jstate);
     354              : 
     355      3810230 :     AppendJumbleInternal(jstate, value, 4);
     356      3810230 : }
     357              : 
     358              : /*
     359              :  * AppendJumble64
     360              :  *      Add the first 8 bytes from the given 'value' pointer to the jumble
     361              :  *      state.
     362              :  */
     363              : static pg_noinline void
     364            0 : AppendJumble64(JumbleState *jstate, const unsigned char *value)
     365              : {
     366            0 :     if (jstate->pending_nulls > 0)
     367            0 :         FlushPendingNulls(jstate);
     368              : 
     369            0 :     AppendJumbleInternal(jstate, value, 8);
     370            0 : }
     371              : 
     372              : /*
     373              :  * FlushPendingNulls
     374              :  *      Incorporate the pending_nulls value into the jumble buffer.
     375              :  *
     376              :  * Note: Callers must ensure that there's at least 1 pending NULL.
     377              :  */
     378              : static pg_attribute_always_inline void
     379       989565 : FlushPendingNulls(JumbleState *jstate)
     380              : {
     381              :     Assert(jstate->pending_nulls > 0);
     382              : 
     383       989565 :     AppendJumbleInternal(jstate,
     384       989565 :                          (const unsigned char *) &jstate->pending_nulls, 4);
     385       989565 :     jstate->pending_nulls = 0;
     386       989565 : }
     387              : 
     388              : 
     389              : /*
     390              :  * Record the location of some kind of constant within a query string.
     391              :  * These are not only bare constants but also expressions that ultimately
     392              :  * constitute a constant, such as those inside casts and simple function
     393              :  * calls; if extern_param, then it corresponds to a PARAM_EXTERN Param.
     394              :  *
     395              :  * If length is -1, it indicates a single such constant element.  If
     396              :  * it's a positive integer, it indicates the length of a squashable
     397              :  * list of them.
     398              :  */
     399              : static void
     400       136983 : RecordConstLocation(JumbleState *jstate, bool extern_param, int location, int len)
     401              : {
     402              :     /* -1 indicates unknown or undefined location */
     403       136983 :     if (location >= 0)
     404              :     {
     405              :         /* enlarge array if needed */
     406       129066 :         if (jstate->clocations_count >= jstate->clocations_buf_size)
     407              :         {
     408           78 :             jstate->clocations_buf_size *= 2;
     409           78 :             jstate->clocations = (LocationLen *)
     410           78 :                 repalloc(jstate->clocations,
     411           78 :                          jstate->clocations_buf_size *
     412              :                          sizeof(LocationLen));
     413              :         }
     414       129066 :         jstate->clocations[jstate->clocations_count].location = location;
     415              : 
     416              :         /*
     417              :          * Lengths are either positive integers (indicating a squashable
     418              :          * list), or -1.
     419              :          */
     420              :         Assert(len > -1 || len == -1);
     421       129066 :         jstate->clocations[jstate->clocations_count].length = len;
     422       129066 :         jstate->clocations[jstate->clocations_count].squashed = (len > -1);
     423       129066 :         jstate->clocations[jstate->clocations_count].extern_param = extern_param;
     424       129066 :         jstate->clocations_count++;
     425              :     }
     426       136983 : }
     427              : 
     428              : /*
     429              :  * Subroutine for _jumbleElements: Verify a few simple cases where we can
     430              :  * deduce that the expression is a constant:
     431              :  *
     432              :  * - See through any wrapping RelabelType and CoerceViaIO layers.
     433              :  * - If it's a FuncExpr, check that the function is a builtin
     434              :  *   cast and its arguments are Const.
     435              :  * - Otherwise test if the expression is a simple Const or a
     436              :  *   PARAM_EXTERN param.
     437              :  */
     438              : static bool
     439         6715 : IsSquashableConstant(Node *element)
     440              : {
     441          299 : restart:
     442         7014 :     switch (nodeTag(element))
     443              :     {
     444          229 :         case T_RelabelType:
     445              :             /* Unwrap RelabelType */
     446          229 :             element = (Node *) ((RelabelType *) element)->arg;
     447          229 :             goto restart;
     448              : 
     449           70 :         case T_CoerceViaIO:
     450              :             /* Unwrap CoerceViaIO */
     451           70 :             element = (Node *) ((CoerceViaIO *) element)->arg;
     452           70 :             goto restart;
     453              : 
     454         6204 :         case T_Const:
     455         6204 :             return true;
     456              : 
     457           80 :         case T_Param:
     458           80 :             return castNode(Param, element)->paramkind == PARAM_EXTERN;
     459              : 
     460          324 :         case T_FuncExpr:
     461              :             {
     462          324 :                 FuncExpr   *func = (FuncExpr *) element;
     463              :                 ListCell   *temp;
     464              : 
     465          324 :                 if (func->funcformat != COERCE_IMPLICIT_CAST &&
     466          208 :                     func->funcformat != COERCE_EXPLICIT_CAST)
     467          145 :                     return false;
     468              : 
     469          179 :                 if (func->funcid > FirstGenbkiObjectId)
     470            0 :                     return false;
     471              : 
     472              :                 /*
     473              :                  * We can check function arguments recursively, being careful
     474              :                  * about recursing too deep.  At each recursion level it's
     475              :                  * enough to test the stack on the first element.  (Note that
     476              :                  * I wasn't able to hit this without bloating the stack
     477              :                  * artificially in this function: the parser errors out before
     478              :                  * stack size becomes a problem here.)
     479              :                  */
     480          355 :                 foreach(temp, func->args)
     481              :                 {
     482          179 :                     Node       *arg = lfirst(temp);
     483              : 
     484          179 :                     if (!IsA(arg, Const))
     485              :                     {
     486           14 :                         if (foreach_current_index(temp) == 0 &&
     487            7 :                             stack_is_too_deep())
     488            3 :                             return false;
     489            7 :                         else if (!IsSquashableConstant(arg))
     490            3 :                             return false;
     491              :                     }
     492              :                 }
     493              : 
     494          176 :                 return true;
     495              :             }
     496              : 
     497          107 :         default:
     498          107 :             return false;
     499              :     }
     500              : }
     501              : 
     502              : /*
     503              :  * Subroutine for _jumbleElements: Verify whether the provided list
     504              :  * can be squashed, meaning it contains only constant expressions.
     505              :  *
     506              :  * Return value indicates if squashing is possible.
     507              :  *
     508              :  * Note that this function searches only for explicit Const nodes with
     509              :  * possibly very simple decorations on top and PARAM_EXTERN parameters,
     510              :  * and does not try to simplify expressions.
     511              :  */
     512              : static bool
     513         2559 : IsSquashableConstantList(List *elements)
     514              : {
     515              :     ListCell   *temp;
     516              : 
     517              :     /* If the list is too short, we don't try to squash it. */
     518         2559 :     if (list_length(elements) < 2)
     519          252 :         return false;
     520              : 
     521         8763 :     foreach(temp, elements)
     522              :     {
     523         6708 :         if (!IsSquashableConstant(lfirst(temp)))
     524          252 :             return false;
     525              :     }
     526              : 
     527         2055 :     return true;
     528              : }
     529              : 
     530              : #define JUMBLE_NODE(item) \
     531              :     _jumbleNode(jstate, (Node *) expr->item)
     532              : #define JUMBLE_ELEMENTS(list, node) \
     533              :     _jumbleElements(jstate, (List *) expr->list, node)
     534              : #define JUMBLE_LOCATION(location) \
     535              :     RecordConstLocation(jstate, false, expr->location, -1)
     536              : #define JUMBLE_FIELD(item) \
     537              : do { \
     538              :     if (sizeof(expr->item) == 8) \
     539              :         AppendJumble64(jstate, (const unsigned char *) &(expr->item)); \
     540              :     else if (sizeof(expr->item) == 4) \
     541              :         AppendJumble32(jstate, (const unsigned char *) &(expr->item)); \
     542              :     else if (sizeof(expr->item) == 2) \
     543              :         AppendJumble16(jstate, (const unsigned char *) &(expr->item)); \
     544              :     else if (sizeof(expr->item) == 1) \
     545              :         AppendJumble8(jstate, (const unsigned char *) &(expr->item)); \
     546              :     else \
     547              :         AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item)); \
     548              : } while (0)
     549              : #define JUMBLE_STRING(str) \
     550              : do { \
     551              :     if (expr->str) \
     552              :         AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
     553              :     else \
     554              :         AppendJumbleNull(jstate); \
     555              : } while(0)
     556              : /* Function name used for the node field attribute custom_query_jumble. */
     557              : #define JUMBLE_CUSTOM(nodetype, item) \
     558              :     _jumble##nodetype##_##item(jstate, expr, expr->item)
     559              : 
     560              : #include "queryjumblefuncs.funcs.c"
     561              : 
     562              : static void
     563      4531443 : _jumbleNode(JumbleState *jstate, Node *node)
     564              : {
     565      4531443 :     Node       *expr = node;
     566              : #ifdef USE_ASSERT_CHECKING
     567              :     Size        prev_jumble_len = jstate->total_jumble_len;
     568              : #endif
     569              : 
     570      4531443 :     if (expr == NULL)
     571              :     {
     572      2805280 :         AppendJumbleNull(jstate);
     573      2805280 :         return;
     574              :     }
     575              : 
     576              :     /* Guard against stack overflow due to overly complex expressions */
     577      1726163 :     check_stack_depth();
     578              : 
     579              :     /*
     580              :      * We always emit the node's NodeTag, then any additional fields that are
     581              :      * considered significant, and then we recurse to any child nodes.
     582              :      */
     583      1726163 :     JUMBLE_FIELD(type);
     584              : 
     585      1726163 :     switch (nodeTag(expr))
     586              :     {
     587              : #include "queryjumblefuncs.switch.c"
     588              : 
     589       416562 :         case T_List:
     590              :         case T_IntList:
     591              :         case T_OidList:
     592              :         case T_XidList:
     593       416562 :             _jumbleList(jstate, expr);
     594       416562 :             break;
     595              : 
     596            0 :         default:
     597              :             /* Only a warning, since we can stumble along anyway */
     598            0 :             elog(WARNING, "unrecognized node type: %d",
     599              :                  (int) nodeTag(expr));
     600            0 :             break;
     601              :     }
     602              : 
     603              :     /* Ensure we added something to the jumble buffer */
     604              :     Assert(jstate->total_jumble_len > prev_jumble_len);
     605              : }
     606              : 
     607              : static void
     608       416562 : _jumbleList(JumbleState *jstate, Node *node)
     609              : {
     610       416562 :     List       *expr = (List *) node;
     611              :     ListCell   *l;
     612              : 
     613       416562 :     switch (expr->type)
     614              :     {
     615       415997 :         case T_List:
     616      1165359 :             foreach(l, expr)
     617       749362 :                 _jumbleNode(jstate, lfirst(l));
     618       415997 :             break;
     619          565 :         case T_IntList:
     620         1242 :             foreach(l, expr)
     621          677 :                 AppendJumble32(jstate, (const unsigned char *) &lfirst_int(l));
     622          565 :             break;
     623            0 :         case T_OidList:
     624            0 :             foreach(l, expr)
     625            0 :                 AppendJumble32(jstate, (const unsigned char *) &lfirst_oid(l));
     626            0 :             break;
     627            0 :         case T_XidList:
     628            0 :             foreach(l, expr)
     629            0 :                 AppendJumble32(jstate, (const unsigned char *) &lfirst_xid(l));
     630            0 :             break;
     631            0 :         default:
     632            0 :             elog(ERROR, "unrecognized list node type: %d",
     633              :                  (int) expr->type);
     634              :             return;
     635              :     }
     636              : }
     637              : 
     638              : /*
     639              :  * We try to jumble lists of expressions as one individual item regardless
     640              :  * of how many elements are in the list. This is know as squashing, which
     641              :  * results in different queries jumbling to the same query_id, if the only
     642              :  * difference is the number of elements in the list.
     643              :  *
     644              :  * We allow constants and PARAM_EXTERN parameters to be squashed. To normalize
     645              :  * such queries, we use the start and end locations of the list of elements in
     646              :  * a list.
     647              :  */
     648              : static void
     649         2559 : _jumbleElements(JumbleState *jstate, List *elements, Node *node)
     650              : {
     651         2559 :     bool        normalize_list = false;
     652              : 
     653         2559 :     if (IsSquashableConstantList(elements))
     654              :     {
     655         2055 :         if (IsA(node, ArrayExpr))
     656              :         {
     657         2055 :             ArrayExpr  *aexpr = (ArrayExpr *) node;
     658              : 
     659         2055 :             if (aexpr->list_start > 0 && aexpr->list_end > 0)
     660              :             {
     661         2003 :                 RecordConstLocation(jstate,
     662              :                                     false,
     663         2003 :                                     aexpr->list_start + 1,
     664         2003 :                                     (aexpr->list_end - aexpr->list_start) - 1);
     665         2003 :                 normalize_list = true;
     666         2003 :                 jstate->has_squashed_lists = true;
     667              :             }
     668              :         }
     669              :     }
     670              : 
     671         2559 :     if (!normalize_list)
     672              :     {
     673          556 :         _jumbleNode(jstate, (Node *) elements);
     674              :     }
     675         2559 : }
     676              : 
     677              : /*
     678              :  * We store the highest param ID of extern params.  This can later be used
     679              :  * to start the numbering of the placeholder for squashed lists.
     680              :  */
     681              : static void
     682         5837 : _jumbleParam(JumbleState *jstate, Node *node)
     683              : {
     684         5837 :     Param      *expr = (Param *) node;
     685              : 
     686         5837 :     JUMBLE_FIELD(paramkind);
     687         5837 :     JUMBLE_FIELD(paramid);
     688         5837 :     JUMBLE_FIELD(paramtype);
     689              :     /* paramtypmod and paramcollid are ignored */
     690              : 
     691         5837 :     if (expr->paramkind == PARAM_EXTERN)
     692              :     {
     693              :         /*
     694              :          * At this point, only external parameter locations outside of
     695              :          * squashable lists will be recorded.
     696              :          */
     697         4879 :         RecordConstLocation(jstate, true, expr->location, -1);
     698              : 
     699              :         /*
     700              :          * Update the highest Param id seen, in order to start normalization
     701              :          * correctly.
     702              :          *
     703              :          * Note: This value is reset at the end of jumbling if there exists a
     704              :          * squashable list. See the comment in the definition of JumbleState.
     705              :          */
     706         4879 :         if (expr->paramid > jstate->highest_extern_param_id)
     707         4207 :             jstate->highest_extern_param_id = expr->paramid;
     708              :     }
     709         5837 : }
     710              : 
     711              : static void
     712         9973 : _jumbleA_Const(JumbleState *jstate, Node *node)
     713              : {
     714         9973 :     A_Const    *expr = (A_Const *) node;
     715              : 
     716         9973 :     JUMBLE_FIELD(isnull);
     717         9973 :     if (!expr->isnull)
     718              :     {
     719         9881 :         JUMBLE_FIELD(val.node.type);
     720         9881 :         switch (nodeTag(&expr->val))
     721              :         {
     722         4626 :             case T_Integer:
     723         4626 :                 JUMBLE_FIELD(val.ival.ival);
     724         4626 :                 break;
     725           30 :             case T_Float:
     726           30 :                 JUMBLE_STRING(val.fval.fval);
     727           30 :                 break;
     728          138 :             case T_Boolean:
     729          138 :                 JUMBLE_FIELD(val.boolval.boolval);
     730          138 :                 break;
     731         5085 :             case T_String:
     732         5085 :                 JUMBLE_STRING(val.sval.sval);
     733         5085 :                 break;
     734            2 :             case T_BitString:
     735            2 :                 JUMBLE_STRING(val.bsval.bsval);
     736            2 :                 break;
     737            0 :             default:
     738            0 :                 elog(ERROR, "unrecognized node type: %d",
     739              :                      (int) nodeTag(&expr->val));
     740              :                 break;
     741              :         }
     742              :     }
     743         9973 : }
     744              : 
     745              : static void
     746         2925 : _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
     747              : {
     748         2925 :     VariableSetStmt *expr = (VariableSetStmt *) node;
     749              : 
     750         2925 :     JUMBLE_FIELD(kind);
     751         2925 :     JUMBLE_STRING(name);
     752              : 
     753              :     /*
     754              :      * Account for the list of arguments in query jumbling only if told by the
     755              :      * parser.
     756              :      */
     757         2925 :     if (expr->jumble_args)
     758           60 :         JUMBLE_NODE(args);
     759         2925 :     JUMBLE_FIELD(is_local);
     760         2925 :     JUMBLE_LOCATION(location);
     761         2925 : }
     762              : 
     763              : /*
     764              :  * Custom query jumble function for RangeTblEntry.eref.
     765              :  */
     766              : static void
     767        85310 : _jumbleRangeTblEntry_eref(JumbleState *jstate,
     768              :                           RangeTblEntry *rte,
     769              :                           Alias *expr)
     770              : {
     771        85310 :     JUMBLE_FIELD(type);
     772              : 
     773              :     /*
     774              :      * This includes only the table name, the list of column names is ignored.
     775              :      */
     776        85310 :     JUMBLE_STRING(aliasname);
     777        85310 : }
     778              : 
     779              : /*
     780              :  * CompLocation: comparator for qsorting LocationLen structs by location
     781              :  */
     782              : static int
     783        38679 : CompLocation(const void *a, const void *b)
     784              : {
     785        38679 :     int         l = ((const LocationLen *) a)->location;
     786        38679 :     int         r = ((const LocationLen *) b)->location;
     787              : 
     788        38679 :     return pg_cmp_s32(l, r);
     789              : }
     790              : 
     791              : /*
     792              :  * Given a valid SQL string and an array of constant-location records, return
     793              :  * the textual lengths of those constants in a newly allocated LocationLen
     794              :  * array, or NULL if there are no constants.
     795              :  *
     796              :  * The constants may use any allowed constant syntax, such as float literals,
     797              :  * bit-strings, single-quoted strings and dollar-quoted strings.  This is
     798              :  * accomplished by using the public API for the core scanner.
     799              :  *
     800              :  * It is the caller's job to ensure that the string is a valid SQL statement
     801              :  * with constants at the indicated locations.  Since in practice the string
     802              :  * has already been parsed, and the locations that the caller provides will
     803              :  * have originated from within the authoritative parser, this should not be
     804              :  * a problem.
     805              :  *
     806              :  * Multiple constants can have the same location.  We reset lengths of those
     807              :  * past the first to -1 so that they can later be ignored.
     808              :  *
     809              :  * If query_loc > 0, then "query" has been advanced by that much compared to
     810              :  * the original string start, as is the case with multi-statement strings, so
     811              :  * we need to translate the provided locations to compensate.  (This lets us
     812              :  * avoid re-scanning statements before the one of interest, so it's worth
     813              :  * doing.)
     814              :  *
     815              :  * N.B. There is an assumption that a '-' character at a Const location begins
     816              :  * a negative numeric constant.  This precludes there ever being another
     817              :  * reason for a constant to start with a '-'.
     818              :  *
     819              :  * It is the caller's responsibility to free the result, if necessary.
     820              :  */
     821              : LocationLen *
     822        11403 : ComputeConstantLengths(const JumbleState *jstate, const char *query,
     823              :                        int query_loc)
     824              : {
     825              :     LocationLen *locs;
     826              :     core_yyscan_t yyscanner;
     827              :     core_yy_extra_type yyextra;
     828              :     core_YYSTYPE yylval;
     829              :     YYLTYPE     yylloc;
     830              : 
     831        11403 :     if (jstate->clocations_count == 0)
     832            0 :         return NULL;
     833              : 
     834              :     /* Copy constant locations to avoid modifying jstate */
     835        11403 :     locs = palloc_array(LocationLen, jstate->clocations_count);
     836        11403 :     memcpy(locs, jstate->clocations, jstate->clocations_count * sizeof(LocationLen));
     837              : 
     838              :     /*
     839              :      * Sort the records by location so that we can process them in order while
     840              :      * scanning the query text.
     841              :      */
     842        11403 :     if (jstate->clocations_count > 1)
     843         7228 :         qsort(locs, jstate->clocations_count,
     844              :               sizeof(LocationLen), CompLocation);
     845              : 
     846              :     /* initialize the flex scanner --- should match raw_parser() */
     847        11403 :     yyscanner = scanner_init(query,
     848              :                              &yyextra,
     849              :                              &ScanKeywords,
     850              :                              ScanKeywordTokens);
     851              : 
     852              :     /* Search for each constant, in sequence */
     853        45757 :     for (int i = 0; i < jstate->clocations_count; i++)
     854              :     {
     855              :         int         loc;
     856              :         int         tok;
     857              : 
     858              :         /* Ignore constants after the first one in the same location */
     859        34354 :         if (i > 0 && locs[i].location == locs[i - 1].location)
     860              :         {
     861          508 :             locs[i].length = -1;
     862          508 :             continue;
     863              :         }
     864              : 
     865        33846 :         if (locs[i].squashed)
     866          665 :             continue;           /* squashable list, ignore */
     867              : 
     868              :         /*
     869              :          * Adjust the constant's location using the provided starting location
     870              :          * of the current statement.  This allows us to avoid scanning a
     871              :          * multi-statement string from the beginning.
     872              :          */
     873        33181 :         loc = locs[i].location - query_loc;
     874              :         Assert(loc >= 0);
     875              : 
     876              :         /*
     877              :          * We have a valid location for a constant that's not a dupe. Lex
     878              :          * tokens until we find the desired constant.
     879              :          */
     880              :         for (;;)
     881              :         {
     882       255152 :             tok = core_yylex(&yylval, &yylloc, yyscanner);
     883              : 
     884              :             /* We should not hit end-of-string, but if we do, behave sanely */
     885       255152 :             if (tok == 0)
     886            0 :                 break;          /* out of inner for-loop */
     887              : 
     888              :             /*
     889              :              * We should find the token position exactly, but if we somehow
     890              :              * run past it, work with that.
     891              :              */
     892       255152 :             if (yylloc >= loc)
     893              :             {
     894        33181 :                 if (query[loc] == '-')
     895              :                 {
     896              :                     /*
     897              :                      * It's a negative value - this is the one and only case
     898              :                      * where we replace more than a single token.
     899              :                      *
     900              :                      * Do not compensate for the special-case adjustment of
     901              :                      * location to that of the leading '-' operator in the
     902              :                      * event of a negative constant (see doNegate() in
     903              :                      * gram.y).  It is also useful for our purposes to start
     904              :                      * from the minus symbol.  In this way, queries like
     905              :                      * "select * from foo where bar = 1" and "select * from
     906              :                      * foo where bar = -2" can be treated similarly.
     907              :                      */
     908          379 :                     tok = core_yylex(&yylval, &yylloc, yyscanner);
     909          379 :                     if (tok == 0)
     910            0 :                         break;  /* out of inner for-loop */
     911              :                 }
     912              : 
     913              :                 /*
     914              :                  * We now rely on the assumption that flex has placed a zero
     915              :                  * byte after the text of the current token in scanbuf.
     916              :                  */
     917        33181 :                 locs[i].length = strlen(yyextra.scanbuf + loc);
     918        33181 :                 break;          /* out of inner for-loop */
     919              :             }
     920              :         }
     921              : 
     922              :         /* If we hit end-of-string, give up, leaving remaining lengths -1 */
     923        33181 :         if (tok == 0)
     924            0 :             break;
     925              :     }
     926              : 
     927        11403 :     scanner_finish(yyscanner);
     928              : 
     929        11403 :     return locs;
     930              : }
        

Generated by: LCOV version 2.0-1