LCOV - code coverage report
Current view: top level - src/backend/nodes - queryjumblefuncs.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 123 141 87.2 %
Date: 2025-01-28 22:15:38 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * queryjumblefuncs.c
       4             :  *   Query normalization and fingerprinting.
       5             :  *
       6             :  * Normalization is a process whereby similar queries, typically differing only
       7             :  * in their constants (though the exact rules are somewhat more subtle than
       8             :  * that) are recognized as equivalent, and are tracked as a single entry.  This
       9             :  * is particularly useful for non-prepared queries.
      10             :  *
      11             :  * Normalization is implemented by fingerprinting queries, selectively
      12             :  * serializing those fields of each query tree's nodes that are judged to be
      13             :  * essential to the query.  This is referred to as a query jumble.  This is
      14             :  * distinct from a regular serialization in that various extraneous
      15             :  * information is ignored as irrelevant or not essential to the query, such
      16             :  * as the collations of Vars and, most notably, the values of constants.
      17             :  *
      18             :  * This jumble is acquired at the end of parse analysis of each query, and
      19             :  * a 64-bit hash of it is stored into the query's Query.queryId field.
      20             :  * The server then copies this value around, making it available in plan
      21             :  * tree(s) generated from the query.  The executor can then use this value
      22             :  * to blame query costs on the proper queryId.
      23             :  *
      24             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      25             :  * Portions Copyright (c) 1994, Regents of the University of California
      26             :  *
      27             :  *
      28             :  * IDENTIFICATION
      29             :  *    src/backend/nodes/queryjumblefuncs.c
      30             :  *
      31             :  *-------------------------------------------------------------------------
      32             :  */
      33             : #include "postgres.h"
      34             : 
      35             : #include "common/hashfn.h"
      36             : #include "miscadmin.h"
      37             : #include "nodes/queryjumble.h"
      38             : #include "parser/scansup.h"
      39             : 
      40             : #define JUMBLE_SIZE             1024    /* query serialization buffer size */
      41             : 
      42             : /* GUC parameters */
      43             : int         compute_query_id = COMPUTE_QUERY_ID_AUTO;
      44             : 
      45             : /*
      46             :  * True when compute_query_id is ON or AUTO, and a module requests them.
      47             :  *
      48             :  * Note that IsQueryIdEnabled() should be used instead of checking
      49             :  * query_id_enabled or compute_query_id directly when we want to know
      50             :  * whether query identifiers are computed in the core or not.
      51             :  */
      52             : bool        query_id_enabled = false;
      53             : 
      54             : static void AppendJumble(JumbleState *jstate,
      55             :                          const unsigned char *item, Size size);
      56             : static void RecordConstLocation(JumbleState *jstate, int location);
      57             : static void _jumbleNode(JumbleState *jstate, Node *node);
      58             : static void _jumbleA_Const(JumbleState *jstate, Node *node);
      59             : static void _jumbleList(JumbleState *jstate, Node *node);
      60             : static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
      61             : 
      62             : /*
      63             :  * Given a possibly multi-statement source string, confine our attention to the
      64             :  * relevant part of the string.
      65             :  */
      66             : const char *
      67      181378 : CleanQuerytext(const char *query, int *location, int *len)
      68             : {
      69      181378 :     int         query_location = *location;
      70      181378 :     int         query_len = *len;
      71             : 
      72             :     /* First apply starting offset, unless it's -1 (unknown). */
      73      181378 :     if (query_location >= 0)
      74             :     {
      75             :         Assert(query_location <= strlen(query));
      76      181004 :         query += query_location;
      77             :         /* Length of 0 (or -1) means "rest of string" */
      78      181004 :         if (query_len <= 0)
      79       29474 :             query_len = strlen(query);
      80             :         else
      81             :             Assert(query_len <= strlen(query));
      82             :     }
      83             :     else
      84             :     {
      85             :         /* If query location is unknown, distrust query_len as well */
      86         374 :         query_location = 0;
      87         374 :         query_len = strlen(query);
      88             :     }
      89             : 
      90             :     /*
      91             :      * Discard leading and trailing whitespace, too.  Use scanner_isspace()
      92             :      * not libc's isspace(), because we want to match the lexer's behavior.
      93             :      *
      94             :      * Note: the parser now strips leading comments and whitespace from the
      95             :      * reported stmt_location, so this first loop will only iterate in the
      96             :      * unusual case that the location didn't propagate to here.  But the
      97             :      * statement length will extend to the end-of-string or terminating
      98             :      * semicolon, so the second loop often does something useful.
      99             :      */
     100      181400 :     while (query_len > 0 && scanner_isspace(query[0]))
     101          22 :         query++, query_location++, query_len--;
     102      182366 :     while (query_len > 0 && scanner_isspace(query[query_len - 1]))
     103         988 :         query_len--;
     104             : 
     105      181378 :     *location = query_location;
     106      181378 :     *len = query_len;
     107             : 
     108      181378 :     return query;
     109             : }
     110             : 
     111             : JumbleState *
     112      149166 : JumbleQuery(Query *query)
     113             : {
     114      149166 :     JumbleState *jstate = NULL;
     115             : 
     116             :     Assert(IsQueryIdEnabled());
     117             : 
     118      149166 :     jstate = (JumbleState *) palloc(sizeof(JumbleState));
     119             : 
     120             :     /* Set up workspace for query jumbling */
     121      149166 :     jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
     122      149166 :     jstate->jumble_len = 0;
     123      149166 :     jstate->clocations_buf_size = 32;
     124      149166 :     jstate->clocations = (LocationLen *)
     125      149166 :         palloc(jstate->clocations_buf_size * sizeof(LocationLen));
     126      149166 :     jstate->clocations_count = 0;
     127      149166 :     jstate->highest_extern_param_id = 0;
     128             : 
     129             :     /* Compute query ID and mark the Query node with it */
     130      149166 :     _jumbleNode(jstate, (Node *) query);
     131      149166 :     query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
     132      149166 :                                                       jstate->jumble_len,
     133             :                                                       0));
     134             : 
     135             :     /*
     136             :      * If we are unlucky enough to get a hash of zero, use 1 instead for
     137             :      * normal statements and 2 for utility queries.
     138             :      */
     139      149166 :     if (query->queryId == UINT64CONST(0))
     140             :     {
     141           0 :         if (query->utilityStmt)
     142           0 :             query->queryId = UINT64CONST(2);
     143             :         else
     144           0 :             query->queryId = UINT64CONST(1);
     145             :     }
     146             : 
     147      149166 :     return jstate;
     148             : }
     149             : 
     150             : /*
     151             :  * Enables query identifier computation.
     152             :  *
     153             :  * Third-party plugins can use this function to inform core that they require
     154             :  * a query identifier to be computed.
     155             :  */
     156             : void
     157          14 : EnableQueryId(void)
     158             : {
     159          14 :     if (compute_query_id != COMPUTE_QUERY_ID_OFF)
     160          14 :         query_id_enabled = true;
     161          14 : }
     162             : 
     163             : /*
     164             :  * AppendJumble: Append a value that is substantive in a given query to
     165             :  * the current jumble.
     166             :  */
     167             : static void
     168     8415426 : AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
     169             : {
     170     8415426 :     unsigned char *jumble = jstate->jumble;
     171     8415426 :     Size        jumble_len = jstate->jumble_len;
     172             : 
     173             :     /*
     174             :      * Whenever the jumble buffer is full, we hash the current contents and
     175             :      * reset the buffer to contain just that hash value, thus relying on the
     176             :      * hash to summarize everything so far.
     177             :      */
     178    16833066 :     while (size > 0)
     179             :     {
     180             :         Size        part_size;
     181             : 
     182     8417640 :         if (jumble_len >= JUMBLE_SIZE)
     183             :         {
     184             :             uint64      start_hash;
     185             : 
     186        2962 :             start_hash = DatumGetUInt64(hash_any_extended(jumble,
     187             :                                                           JUMBLE_SIZE, 0));
     188        2962 :             memcpy(jumble, &start_hash, sizeof(start_hash));
     189        2962 :             jumble_len = sizeof(start_hash);
     190             :         }
     191     8417640 :         part_size = Min(size, JUMBLE_SIZE - jumble_len);
     192     8417640 :         memcpy(jumble + jumble_len, item, part_size);
     193     8417640 :         jumble_len += part_size;
     194     8417640 :         item += part_size;
     195     8417640 :         size -= part_size;
     196             :     }
     197     8415426 :     jstate->jumble_len = jumble_len;
     198     8415426 : }
     199             : 
     200             : /*
     201             :  * Record location of constant within query string of query tree
     202             :  * that is currently being walked.
     203             :  */
     204             : static void
     205      228100 : RecordConstLocation(JumbleState *jstate, int location)
     206             : {
     207             :     /* -1 indicates unknown or undefined location */
     208      228100 :     if (location >= 0)
     209             :     {
     210             :         /* enlarge array if needed */
     211      215190 :         if (jstate->clocations_count >= jstate->clocations_buf_size)
     212             :         {
     213         126 :             jstate->clocations_buf_size *= 2;
     214         126 :             jstate->clocations = (LocationLen *)
     215         126 :                 repalloc(jstate->clocations,
     216         126 :                          jstate->clocations_buf_size *
     217             :                          sizeof(LocationLen));
     218             :         }
     219      215190 :         jstate->clocations[jstate->clocations_count].location = location;
     220             :         /* initialize lengths to -1 to simplify third-party module usage */
     221      215190 :         jstate->clocations[jstate->clocations_count].length = -1;
     222      215190 :         jstate->clocations_count++;
     223             :     }
     224      228100 : }
     225             : 
     226             : #define JUMBLE_NODE(item) \
     227             :     _jumbleNode(jstate, (Node *) expr->item)
     228             : #define JUMBLE_LOCATION(location) \
     229             :     RecordConstLocation(jstate, expr->location)
     230             : #define JUMBLE_FIELD(item) \
     231             :     AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
     232             : #define JUMBLE_FIELD_SINGLE(item) \
     233             :     AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
     234             : #define JUMBLE_STRING(str) \
     235             : do { \
     236             :     if (expr->str) \
     237             :         AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
     238             : } while(0)
     239             : 
     240             : #include "queryjumblefuncs.funcs.c"
     241             : 
     242             : static void
     243     7189692 : _jumbleNode(JumbleState *jstate, Node *node)
     244             : {
     245     7189692 :     Node       *expr = node;
     246             : 
     247     7189692 :     if (expr == NULL)
     248     4244284 :         return;
     249             : 
     250             :     /* Guard against stack overflow due to overly complex expressions */
     251     2945408 :     check_stack_depth();
     252             : 
     253             :     /*
     254             :      * We always emit the node's NodeTag, then any additional fields that are
     255             :      * considered significant, and then we recurse to any child nodes.
     256             :      */
     257     2945408 :     JUMBLE_FIELD(type);
     258             : 
     259     2945408 :     switch (nodeTag(expr))
     260             :     {
     261             : #include "queryjumblefuncs.switch.c"
     262             : 
     263      703918 :         case T_List:
     264             :         case T_IntList:
     265             :         case T_OidList:
     266             :         case T_XidList:
     267      703918 :             _jumbleList(jstate, expr);
     268      703918 :             break;
     269             : 
     270           0 :         default:
     271             :             /* Only a warning, since we can stumble along anyway */
     272           0 :             elog(WARNING, "unrecognized node type: %d",
     273             :                  (int) nodeTag(expr));
     274           0 :             break;
     275             :     }
     276             : 
     277             :     /* Special cases to handle outside the automated code */
     278     2945408 :     switch (nodeTag(expr))
     279             :     {
     280       10686 :         case T_Param:
     281             :             {
     282       10686 :                 Param      *p = (Param *) node;
     283             : 
     284             :                 /*
     285             :                  * Update the highest Param id seen, in order to start
     286             :                  * normalization correctly.
     287             :                  */
     288       10686 :                 if (p->paramkind == PARAM_EXTERN &&
     289        9922 :                     p->paramid > jstate->highest_extern_param_id)
     290        8582 :                     jstate->highest_extern_param_id = p->paramid;
     291             :             }
     292       10686 :             break;
     293     2934722 :         default:
     294     2934722 :             break;
     295             :     }
     296             : }
     297             : 
     298             : static void
     299      703918 : _jumbleList(JumbleState *jstate, Node *node)
     300             : {
     301      703918 :     List       *expr = (List *) node;
     302             :     ListCell   *l;
     303             : 
     304      703918 :     switch (expr->type)
     305             :     {
     306      703008 :         case T_List:
     307     1978334 :             foreach(l, expr)
     308     1275326 :                 _jumbleNode(jstate, lfirst(l));
     309      703008 :             break;
     310         910 :         case T_IntList:
     311        2036 :             foreach(l, expr)
     312        1126 :                 JUMBLE_FIELD_SINGLE(lfirst_int(l));
     313         910 :             break;
     314           0 :         case T_OidList:
     315           0 :             foreach(l, expr)
     316           0 :                 JUMBLE_FIELD_SINGLE(lfirst_oid(l));
     317           0 :             break;
     318           0 :         case T_XidList:
     319           0 :             foreach(l, expr)
     320           0 :                 JUMBLE_FIELD_SINGLE(lfirst_xid(l));
     321           0 :             break;
     322           0 :         default:
     323           0 :             elog(ERROR, "unrecognized list node type: %d",
     324             :                  (int) expr->type);
     325             :             return;
     326             :     }
     327             : }
     328             : 
     329             : static void
     330       16158 : _jumbleA_Const(JumbleState *jstate, Node *node)
     331             : {
     332       16158 :     A_Const    *expr = (A_Const *) node;
     333             : 
     334       16158 :     JUMBLE_FIELD(isnull);
     335       16158 :     if (!expr->isnull)
     336             :     {
     337       16012 :         JUMBLE_FIELD(val.node.type);
     338       16012 :         switch (nodeTag(&expr->val))
     339             :         {
     340        7148 :             case T_Integer:
     341        7148 :                 JUMBLE_FIELD(val.ival.ival);
     342        7148 :                 break;
     343          42 :             case T_Float:
     344          42 :                 JUMBLE_STRING(val.fval.fval);
     345          42 :                 break;
     346         232 :             case T_Boolean:
     347         232 :                 JUMBLE_FIELD(val.boolval.boolval);
     348         232 :                 break;
     349        8586 :             case T_String:
     350        8586 :                 JUMBLE_STRING(val.sval.sval);
     351        8586 :                 break;
     352           4 :             case T_BitString:
     353           4 :                 JUMBLE_STRING(val.bsval.bsval);
     354           4 :                 break;
     355           0 :             default:
     356           0 :                 elog(ERROR, "unrecognized node type: %d",
     357             :                      (int) nodeTag(&expr->val));
     358             :                 break;
     359             :         }
     360         146 :     }
     361       16158 : }
     362             : 
     363             : static void
     364        4992 : _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
     365             : {
     366        4992 :     VariableSetStmt *expr = (VariableSetStmt *) node;
     367             : 
     368        4992 :     JUMBLE_FIELD(kind);
     369        4992 :     JUMBLE_STRING(name);
     370             : 
     371             :     /*
     372             :      * Account for the list of arguments in query jumbling only if told by the
     373             :      * parser.
     374             :      */
     375        4992 :     if (expr->jumble_args)
     376         120 :         JUMBLE_NODE(args);
     377        4992 :     JUMBLE_FIELD(is_local);
     378        4992 :     JUMBLE_LOCATION(location);
     379        4992 : }

Generated by: LCOV version 1.14