LCOV - code coverage report
Current view: top level - src/backend/nodes - queryjumblefuncs.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 114 132 86.4 %
Date: 2024-04-19 05:11:10 Functions: 8 8 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * queryjumblefuncs.c
       4             :  *   Query normalization and fingerprinting.
       5             :  *
       6             :  * Normalization is a process whereby similar queries, typically differing only
       7             :  * in their constants (though the exact rules are somewhat more subtle than
       8             :  * that) are recognized as equivalent, and are tracked as a single entry.  This
       9             :  * is particularly useful for non-prepared queries.
      10             :  *
      11             :  * Normalization is implemented by fingerprinting queries, selectively
      12             :  * serializing those fields of each query tree's nodes that are judged to be
      13             :  * essential to the query.  This is referred to as a query jumble.  This is
      14             :  * distinct from a regular serialization in that various extraneous
      15             :  * information is ignored as irrelevant or not essential to the query, such
      16             :  * as the collations of Vars and, most notably, the values of constants.
      17             :  *
      18             :  * This jumble is acquired at the end of parse analysis of each query, and
      19             :  * a 64-bit hash of it is stored into the query's Query.queryId field.
      20             :  * The server then copies this value around, making it available in plan
      21             :  * tree(s) generated from the query.  The executor can then use this value
      22             :  * to blame query costs on the proper queryId.
      23             :  *
      24             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      25             :  * Portions Copyright (c) 1994, Regents of the University of California
      26             :  *
      27             :  *
      28             :  * IDENTIFICATION
      29             :  *    src/backend/nodes/queryjumblefuncs.c
      30             :  *
      31             :  *-------------------------------------------------------------------------
      32             :  */
      33             : #include "postgres.h"
      34             : 
      35             : #include "common/hashfn.h"
      36             : #include "miscadmin.h"
      37             : #include "nodes/queryjumble.h"
      38             : #include "parser/scansup.h"
      39             : 
      40             : #define JUMBLE_SIZE             1024    /* query serialization buffer size */
      41             : 
      42             : /* GUC parameters */
      43             : int         compute_query_id = COMPUTE_QUERY_ID_AUTO;
      44             : 
      45             : /*
      46             :  * True when compute_query_id is ON or AUTO, and a module requests them.
      47             :  *
      48             :  * Note that IsQueryIdEnabled() should be used instead of checking
      49             :  * query_id_enabled or compute_query_id directly when we want to know
      50             :  * whether query identifiers are computed in the core or not.
      51             :  */
      52             : bool        query_id_enabled = false;
      53             : 
      54             : static void AppendJumble(JumbleState *jstate,
      55             :                          const unsigned char *item, Size size);
      56             : static void RecordConstLocation(JumbleState *jstate, int location);
      57             : static void _jumbleNode(JumbleState *jstate, Node *node);
      58             : static void _jumbleA_Const(JumbleState *jstate, Node *node);
      59             : static void _jumbleList(JumbleState *jstate, Node *node);
      60             : static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
      61             : 
      62             : /*
      63             :  * Given a possibly multi-statement source string, confine our attention to the
      64             :  * relevant part of the string.
      65             :  */
      66             : const char *
      67      152230 : CleanQuerytext(const char *query, int *location, int *len)
      68             : {
      69      152230 :     int         query_location = *location;
      70      152230 :     int         query_len = *len;
      71             : 
      72             :     /* First apply starting offset, unless it's -1 (unknown). */
      73      152230 :     if (query_location >= 0)
      74             :     {
      75             :         Assert(query_location <= strlen(query));
      76      151862 :         query += query_location;
      77             :         /* Length of 0 (or -1) means "rest of string" */
      78      151862 :         if (query_len <= 0)
      79        9062 :             query_len = strlen(query);
      80             :         else
      81             :             Assert(query_len <= strlen(query));
      82             :     }
      83             :     else
      84             :     {
      85             :         /* If query location is unknown, distrust query_len as well */
      86         368 :         query_location = 0;
      87         368 :         query_len = strlen(query);
      88             :     }
      89             : 
      90             :     /*
      91             :      * Discard leading and trailing whitespace, too.  Use scanner_isspace()
      92             :      * not libc's isspace(), because we want to match the lexer's behavior.
      93             :      */
      94      152746 :     while (query_len > 0 && scanner_isspace(query[0]))
      95         516 :         query++, query_location++, query_len--;
      96      153136 :     while (query_len > 0 && scanner_isspace(query[query_len - 1]))
      97         906 :         query_len--;
      98             : 
      99      152230 :     *location = query_location;
     100      152230 :     *len = query_len;
     101             : 
     102      152230 :     return query;
     103             : }
     104             : 
     105             : JumbleState *
     106      132696 : JumbleQuery(Query *query)
     107             : {
     108      132696 :     JumbleState *jstate = NULL;
     109             : 
     110             :     Assert(IsQueryIdEnabled());
     111             : 
     112      132696 :     jstate = (JumbleState *) palloc(sizeof(JumbleState));
     113             : 
     114             :     /* Set up workspace for query jumbling */
     115      132696 :     jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
     116      132696 :     jstate->jumble_len = 0;
     117      132696 :     jstate->clocations_buf_size = 32;
     118      132696 :     jstate->clocations = (LocationLen *)
     119      132696 :         palloc(jstate->clocations_buf_size * sizeof(LocationLen));
     120      132696 :     jstate->clocations_count = 0;
     121      132696 :     jstate->highest_extern_param_id = 0;
     122             : 
     123             :     /* Compute query ID and mark the Query node with it */
     124      132696 :     _jumbleNode(jstate, (Node *) query);
     125      132696 :     query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
     126      132696 :                                                       jstate->jumble_len,
     127             :                                                       0));
     128             : 
     129             :     /*
     130             :      * If we are unlucky enough to get a hash of zero, use 1 instead for
     131             :      * normal statements and 2 for utility queries.
     132             :      */
     133      132696 :     if (query->queryId == UINT64CONST(0))
     134             :     {
     135           0 :         if (query->utilityStmt)
     136           0 :             query->queryId = UINT64CONST(2);
     137             :         else
     138           0 :             query->queryId = UINT64CONST(1);
     139             :     }
     140             : 
     141      132696 :     return jstate;
     142             : }
     143             : 
     144             : /*
     145             :  * Enables query identifier computation.
     146             :  *
     147             :  * Third-party plugins can use this function to inform core that they require
     148             :  * a query identifier to be computed.
     149             :  */
     150             : void
     151          12 : EnableQueryId(void)
     152             : {
     153          12 :     if (compute_query_id != COMPUTE_QUERY_ID_OFF)
     154          12 :         query_id_enabled = true;
     155          12 : }
     156             : 
     157             : /*
     158             :  * AppendJumble: Append a value that is substantive in a given query to
     159             :  * the current jumble.
     160             :  */
     161             : static void
     162     6737390 : AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
     163             : {
     164     6737390 :     unsigned char *jumble = jstate->jumble;
     165     6737390 :     Size        jumble_len = jstate->jumble_len;
     166             : 
     167             :     /*
     168             :      * Whenever the jumble buffer is full, we hash the current contents and
     169             :      * reset the buffer to contain just that hash value, thus relying on the
     170             :      * hash to summarize everything so far.
     171             :      */
     172    13476582 :     while (size > 0)
     173             :     {
     174             :         Size        part_size;
     175             : 
     176     6739192 :         if (jumble_len >= JUMBLE_SIZE)
     177             :         {
     178             :             uint64      start_hash;
     179             : 
     180        2216 :             start_hash = DatumGetUInt64(hash_any_extended(jumble,
     181             :                                                           JUMBLE_SIZE, 0));
     182        2216 :             memcpy(jumble, &start_hash, sizeof(start_hash));
     183        2216 :             jumble_len = sizeof(start_hash);
     184             :         }
     185     6739192 :         part_size = Min(size, JUMBLE_SIZE - jumble_len);
     186     6739192 :         memcpy(jumble + jumble_len, item, part_size);
     187     6739192 :         jumble_len += part_size;
     188     6739192 :         item += part_size;
     189     6739192 :         size -= part_size;
     190             :     }
     191     6737390 :     jstate->jumble_len = jumble_len;
     192     6737390 : }
     193             : 
     194             : /*
     195             :  * Record location of constant within query string of query tree
     196             :  * that is currently being walked.
     197             :  */
     198             : static void
     199      199316 : RecordConstLocation(JumbleState *jstate, int location)
     200             : {
     201             :     /* -1 indicates unknown or undefined location */
     202      199316 :     if (location >= 0)
     203             :     {
     204             :         /* enlarge array if needed */
     205      187534 :         if (jstate->clocations_count >= jstate->clocations_buf_size)
     206             :         {
     207         114 :             jstate->clocations_buf_size *= 2;
     208         114 :             jstate->clocations = (LocationLen *)
     209         114 :                 repalloc(jstate->clocations,
     210         114 :                          jstate->clocations_buf_size *
     211             :                          sizeof(LocationLen));
     212             :         }
     213      187534 :         jstate->clocations[jstate->clocations_count].location = location;
     214             :         /* initialize lengths to -1 to simplify third-party module usage */
     215      187534 :         jstate->clocations[jstate->clocations_count].length = -1;
     216      187534 :         jstate->clocations_count++;
     217             :     }
     218      199316 : }
     219             : 
     220             : #define JUMBLE_NODE(item) \
     221             :     _jumbleNode(jstate, (Node *) expr->item)
     222             : #define JUMBLE_LOCATION(location) \
     223             :     RecordConstLocation(jstate, expr->location)
     224             : #define JUMBLE_FIELD(item) \
     225             :     AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
     226             : #define JUMBLE_FIELD_SINGLE(item) \
     227             :     AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
     228             : #define JUMBLE_STRING(str) \
     229             : do { \
     230             :     if (expr->str) \
     231             :         AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
     232             : } while(0)
     233             : 
     234             : #include "queryjumblefuncs.funcs.c"
     235             : 
     236             : static void
     237     6278870 : _jumbleNode(JumbleState *jstate, Node *node)
     238             : {
     239     6278870 :     Node       *expr = node;
     240             : 
     241     6278870 :     if (expr == NULL)
     242     3764806 :         return;
     243             : 
     244             :     /* Guard against stack overflow due to overly complex expressions */
     245     2514064 :     check_stack_depth();
     246             : 
     247             :     /*
     248             :      * We always emit the node's NodeTag, then any additional fields that are
     249             :      * considered significant, and then we recurse to any child nodes.
     250             :      */
     251     2514064 :     JUMBLE_FIELD(type);
     252             : 
     253     2514064 :     switch (nodeTag(expr))
     254             :     {
     255             : #include "queryjumblefuncs.switch.c"
     256             : 
     257      617050 :         case T_List:
     258             :         case T_IntList:
     259             :         case T_OidList:
     260             :         case T_XidList:
     261      617050 :             _jumbleList(jstate, expr);
     262      617050 :             break;
     263             : 
     264           0 :         default:
     265             :             /* Only a warning, since we can stumble along anyway */
     266           0 :             elog(WARNING, "unrecognized node type: %d",
     267             :                  (int) nodeTag(expr));
     268           0 :             break;
     269             :     }
     270             : 
     271             :     /* Special cases to handle outside the automated code */
     272     2514064 :     switch (nodeTag(expr))
     273             :     {
     274       11064 :         case T_Param:
     275             :             {
     276       11064 :                 Param      *p = (Param *) node;
     277             : 
     278             :                 /*
     279             :                  * Update the highest Param id seen, in order to start
     280             :                  * normalization correctly.
     281             :                  */
     282       11064 :                 if (p->paramkind == PARAM_EXTERN &&
     283       10318 :                     p->paramid > jstate->highest_extern_param_id)
     284        9016 :                     jstate->highest_extern_param_id = p->paramid;
     285             :             }
     286       11064 :             break;
     287     2503000 :         default:
     288     2503000 :             break;
     289             :     }
     290             : }
     291             : 
     292             : static void
     293      617050 : _jumbleList(JumbleState *jstate, Node *node)
     294             : {
     295      617050 :     List       *expr = (List *) node;
     296             :     ListCell   *l;
     297             : 
     298      617050 :     switch (expr->type)
     299             :     {
     300      616278 :         case T_List:
     301     1696504 :             foreach(l, expr)
     302     1080226 :                 _jumbleNode(jstate, lfirst(l));
     303      616278 :             break;
     304         772 :         case T_IntList:
     305        1730 :             foreach(l, expr)
     306         958 :                 JUMBLE_FIELD_SINGLE(lfirst_int(l));
     307         772 :             break;
     308           0 :         case T_OidList:
     309           0 :             foreach(l, expr)
     310           0 :                 JUMBLE_FIELD_SINGLE(lfirst_oid(l));
     311           0 :             break;
     312           0 :         case T_XidList:
     313           0 :             foreach(l, expr)
     314           0 :                 JUMBLE_FIELD_SINGLE(lfirst_xid(l));
     315           0 :             break;
     316           0 :         default:
     317           0 :             elog(ERROR, "unrecognized list node type: %d",
     318             :                  (int) expr->type);
     319             :             return;
     320             :     }
     321             : }
     322             : 
     323             : static void
     324       16466 : _jumbleA_Const(JumbleState *jstate, Node *node)
     325             : {
     326       16466 :     A_Const    *expr = (A_Const *) node;
     327             : 
     328       16466 :     JUMBLE_FIELD(isnull);
     329       16466 :     if (!expr->isnull)
     330             :     {
     331       16322 :         JUMBLE_FIELD(val.node.type);
     332       16322 :         switch (nodeTag(&expr->val))
     333             :         {
     334        7686 :             case T_Integer:
     335        7686 :                 JUMBLE_FIELD(val.ival.ival);
     336        7686 :                 break;
     337          84 :             case T_Float:
     338          84 :                 JUMBLE_STRING(val.fval.fval);
     339          84 :                 break;
     340         224 :             case T_Boolean:
     341         224 :                 JUMBLE_FIELD(val.boolval.boolval);
     342         224 :                 break;
     343        8324 :             case T_String:
     344        8324 :                 JUMBLE_STRING(val.sval.sval);
     345        8324 :                 break;
     346           4 :             case T_BitString:
     347           4 :                 JUMBLE_STRING(val.bsval.bsval);
     348           4 :                 break;
     349           0 :             default:
     350           0 :                 elog(ERROR, "unrecognized node type: %d",
     351             :                      (int) nodeTag(&expr->val));
     352             :                 break;
     353             :         }
     354         144 :     }
     355       16466 : }

Generated by: LCOV version 1.14