LCOV - code coverage report
Current view: top level - src/backend/nodes - queryjumblefuncs.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17beta1 Lines: 114 132 86.4 %
Date: 2024-05-29 11:10:47 Functions: 8 8 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * queryjumblefuncs.c
       4             :  *   Query normalization and fingerprinting.
       5             :  *
       6             :  * Normalization is a process whereby similar queries, typically differing only
       7             :  * in their constants (though the exact rules are somewhat more subtle than
       8             :  * that) are recognized as equivalent, and are tracked as a single entry.  This
       9             :  * is particularly useful for non-prepared queries.
      10             :  *
      11             :  * Normalization is implemented by fingerprinting queries, selectively
      12             :  * serializing those fields of each query tree's nodes that are judged to be
      13             :  * essential to the query.  This is referred to as a query jumble.  This is
      14             :  * distinct from a regular serialization in that various extraneous
      15             :  * information is ignored as irrelevant or not essential to the query, such
      16             :  * as the collations of Vars and, most notably, the values of constants.
      17             :  *
      18             :  * This jumble is acquired at the end of parse analysis of each query, and
      19             :  * a 64-bit hash of it is stored into the query's Query.queryId field.
      20             :  * The server then copies this value around, making it available in plan
      21             :  * tree(s) generated from the query.  The executor can then use this value
      22             :  * to blame query costs on the proper queryId.
      23             :  *
      24             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      25             :  * Portions Copyright (c) 1994, Regents of the University of California
      26             :  *
      27             :  *
      28             :  * IDENTIFICATION
      29             :  *    src/backend/nodes/queryjumblefuncs.c
      30             :  *
      31             :  *-------------------------------------------------------------------------
      32             :  */
      33             : #include "postgres.h"
      34             : 
      35             : #include "common/hashfn.h"
      36             : #include "miscadmin.h"
      37             : #include "nodes/queryjumble.h"
      38             : #include "parser/scansup.h"
      39             : 
      40             : #define JUMBLE_SIZE             1024    /* query serialization buffer size */
      41             : 
      42             : /* GUC parameters */
      43             : int         compute_query_id = COMPUTE_QUERY_ID_AUTO;
      44             : 
      45             : /*
      46             :  * True when compute_query_id is ON or AUTO, and a module requests them.
      47             :  *
      48             :  * Note that IsQueryIdEnabled() should be used instead of checking
      49             :  * query_id_enabled or compute_query_id directly when we want to know
      50             :  * whether query identifiers are computed in the core or not.
      51             :  */
      52             : bool        query_id_enabled = false;
      53             : 
      54             : static void AppendJumble(JumbleState *jstate,
      55             :                          const unsigned char *item, Size size);
      56             : static void RecordConstLocation(JumbleState *jstate, int location);
      57             : static void _jumbleNode(JumbleState *jstate, Node *node);
      58             : static void _jumbleA_Const(JumbleState *jstate, Node *node);
      59             : static void _jumbleList(JumbleState *jstate, Node *node);
      60             : 
      61             : /*
      62             :  * Given a possibly multi-statement source string, confine our attention to the
      63             :  * relevant part of the string.
      64             :  */
      65             : const char *
      66      148148 : CleanQuerytext(const char *query, int *location, int *len)
      67             : {
      68      148148 :     int         query_location = *location;
      69      148148 :     int         query_len = *len;
      70             : 
      71             :     /* First apply starting offset, unless it's -1 (unknown). */
      72      148148 :     if (query_location >= 0)
      73             :     {
      74             :         Assert(query_location <= strlen(query));
      75      147780 :         query += query_location;
      76             :         /* Length of 0 (or -1) means "rest of string" */
      77      147780 :         if (query_len <= 0)
      78        8186 :             query_len = strlen(query);
      79             :         else
      80             :             Assert(query_len <= strlen(query));
      81             :     }
      82             :     else
      83             :     {
      84             :         /* If query location is unknown, distrust query_len as well */
      85         368 :         query_location = 0;
      86         368 :         query_len = strlen(query);
      87             :     }
      88             : 
      89             :     /*
      90             :      * Discard leading and trailing whitespace, too.  Use scanner_isspace()
      91             :      * not libc's isspace(), because we want to match the lexer's behavior.
      92             :      */
      93      148664 :     while (query_len > 0 && scanner_isspace(query[0]))
      94         516 :         query++, query_location++, query_len--;
      95      149050 :     while (query_len > 0 && scanner_isspace(query[query_len - 1]))
      96         902 :         query_len--;
      97             : 
      98      148148 :     *location = query_location;
      99      148148 :     *len = query_len;
     100             : 
     101      148148 :     return query;
     102             : }
     103             : 
     104             : JumbleState *
     105      129942 : JumbleQuery(Query *query)
     106             : {
     107      129942 :     JumbleState *jstate = NULL;
     108             : 
     109             :     Assert(IsQueryIdEnabled());
     110             : 
     111      129942 :     jstate = (JumbleState *) palloc(sizeof(JumbleState));
     112             : 
     113             :     /* Set up workspace for query jumbling */
     114      129942 :     jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
     115      129942 :     jstate->jumble_len = 0;
     116      129942 :     jstate->clocations_buf_size = 32;
     117      129942 :     jstate->clocations = (LocationLen *)
     118      129942 :         palloc(jstate->clocations_buf_size * sizeof(LocationLen));
     119      129942 :     jstate->clocations_count = 0;
     120      129942 :     jstate->highest_extern_param_id = 0;
     121             : 
     122             :     /* Compute query ID and mark the Query node with it */
     123      129942 :     _jumbleNode(jstate, (Node *) query);
     124      129942 :     query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
     125      129942 :                                                       jstate->jumble_len,
     126             :                                                       0));
     127             : 
     128             :     /*
     129             :      * If we are unlucky enough to get a hash of zero, use 1 instead for
     130             :      * normal statements and 2 for utility queries.
     131             :      */
     132      129942 :     if (query->queryId == UINT64CONST(0))
     133             :     {
     134           0 :         if (query->utilityStmt)
     135           0 :             query->queryId = UINT64CONST(2);
     136             :         else
     137           0 :             query->queryId = UINT64CONST(1);
     138             :     }
     139             : 
     140      129942 :     return jstate;
     141             : }
     142             : 
     143             : /*
     144             :  * Enables query identifier computation.
     145             :  *
     146             :  * Third-party plugins can use this function to inform core that they require
     147             :  * a query identifier to be computed.
     148             :  */
     149             : void
     150          12 : EnableQueryId(void)
     151             : {
     152          12 :     if (compute_query_id != COMPUTE_QUERY_ID_OFF)
     153          12 :         query_id_enabled = true;
     154          12 : }
     155             : 
     156             : /*
     157             :  * AppendJumble: Append a value that is substantive in a given query to
     158             :  * the current jumble.
     159             :  */
     160             : static void
     161     6387538 : AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
     162             : {
     163     6387538 :     unsigned char *jumble = jstate->jumble;
     164     6387538 :     Size        jumble_len = jstate->jumble_len;
     165             : 
     166             :     /*
     167             :      * Whenever the jumble buffer is full, we hash the current contents and
     168             :      * reset the buffer to contain just that hash value, thus relying on the
     169             :      * hash to summarize everything so far.
     170             :      */
     171    12776684 :     while (size > 0)
     172             :     {
     173             :         Size        part_size;
     174             : 
     175     6389146 :         if (jumble_len >= JUMBLE_SIZE)
     176             :         {
     177             :             uint64      start_hash;
     178             : 
     179        2022 :             start_hash = DatumGetUInt64(hash_any_extended(jumble,
     180             :                                                           JUMBLE_SIZE, 0));
     181        2022 :             memcpy(jumble, &start_hash, sizeof(start_hash));
     182        2022 :             jumble_len = sizeof(start_hash);
     183             :         }
     184     6389146 :         part_size = Min(size, JUMBLE_SIZE - jumble_len);
     185     6389146 :         memcpy(jumble + jumble_len, item, part_size);
     186     6389146 :         jumble_len += part_size;
     187     6389146 :         item += part_size;
     188     6389146 :         size -= part_size;
     189             :     }
     190     6387538 :     jstate->jumble_len = jumble_len;
     191     6387538 : }
     192             : 
     193             : /*
     194             :  * Record location of constant within query string of query tree
     195             :  * that is currently being walked.
     196             :  */
     197             : static void
     198      190608 : RecordConstLocation(JumbleState *jstate, int location)
     199             : {
     200             :     /* -1 indicates unknown or undefined location */
     201      190608 :     if (location >= 0)
     202             :     {
     203             :         /* enlarge array if needed */
     204      178940 :         if (jstate->clocations_count >= jstate->clocations_buf_size)
     205             :         {
     206         114 :             jstate->clocations_buf_size *= 2;
     207         114 :             jstate->clocations = (LocationLen *)
     208         114 :                 repalloc(jstate->clocations,
     209         114 :                          jstate->clocations_buf_size *
     210             :                          sizeof(LocationLen));
     211             :         }
     212      178940 :         jstate->clocations[jstate->clocations_count].location = location;
     213             :         /* initialize lengths to -1 to simplify third-party module usage */
     214      178940 :         jstate->clocations[jstate->clocations_count].length = -1;
     215      178940 :         jstate->clocations_count++;
     216             :     }
     217      190608 : }
     218             : 
     219             : #define JUMBLE_NODE(item) \
     220             :     _jumbleNode(jstate, (Node *) expr->item)
     221             : #define JUMBLE_LOCATION(location) \
     222             :     RecordConstLocation(jstate, expr->location)
     223             : #define JUMBLE_FIELD(item) \
     224             :     AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
     225             : #define JUMBLE_FIELD_SINGLE(item) \
     226             :     AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
     227             : #define JUMBLE_STRING(str) \
     228             : do { \
     229             :     if (expr->str) \
     230             :         AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
     231             : } while(0)
     232             : 
     233             : #include "queryjumblefuncs.funcs.c"
     234             : 
     235             : static void
     236     6037112 : _jumbleNode(JumbleState *jstate, Node *node)
     237             : {
     238     6037112 :     Node       *expr = node;
     239             : 
     240     6037112 :     if (expr == NULL)
     241     3643228 :         return;
     242             : 
     243             :     /* Guard against stack overflow due to overly complex expressions */
     244     2393884 :     check_stack_depth();
     245             : 
     246             :     /*
     247             :      * We always emit the node's NodeTag, then any additional fields that are
     248             :      * considered significant, and then we recurse to any child nodes.
     249             :      */
     250     2393884 :     JUMBLE_FIELD(type);
     251             : 
     252     2393884 :     switch (nodeTag(expr))
     253             :     {
     254             : #include "queryjumblefuncs.switch.c"
     255             : 
     256      590014 :         case T_List:
     257             :         case T_IntList:
     258             :         case T_OidList:
     259             :         case T_XidList:
     260      590014 :             _jumbleList(jstate, expr);
     261      590014 :             break;
     262             : 
     263           0 :         default:
     264             :             /* Only a warning, since we can stumble along anyway */
     265           0 :             elog(WARNING, "unrecognized node type: %d",
     266             :                  (int) nodeTag(expr));
     267           0 :             break;
     268             :     }
     269             : 
     270             :     /* Special cases to handle outside the automated code */
     271     2393884 :     switch (nodeTag(expr))
     272             :     {
     273       11468 :         case T_Param:
     274             :             {
     275       11468 :                 Param      *p = (Param *) node;
     276             : 
     277             :                 /*
     278             :                  * Update the highest Param id seen, in order to start
     279             :                  * normalization correctly.
     280             :                  */
     281       11468 :                 if (p->paramkind == PARAM_EXTERN &&
     282       10746 :                     p->paramid > jstate->highest_extern_param_id)
     283        8770 :                     jstate->highest_extern_param_id = p->paramid;
     284             :             }
     285       11468 :             break;
     286     2382416 :         default:
     287     2382416 :             break;
     288             :     }
     289             : }
     290             : 
     291             : static void
     292      590014 : _jumbleList(JumbleState *jstate, Node *node)
     293             : {
     294      590014 :     List       *expr = (List *) node;
     295             :     ListCell   *l;
     296             : 
     297      590014 :     switch (expr->type)
     298             :     {
     299      589242 :         case T_List:
     300     1612516 :             foreach(l, expr)
     301     1023274 :                 _jumbleNode(jstate, lfirst(l));
     302      589242 :             break;
     303         772 :         case T_IntList:
     304        1730 :             foreach(l, expr)
     305         958 :                 JUMBLE_FIELD_SINGLE(lfirst_int(l));
     306         772 :             break;
     307           0 :         case T_OidList:
     308           0 :             foreach(l, expr)
     309           0 :                 JUMBLE_FIELD_SINGLE(lfirst_oid(l));
     310           0 :             break;
     311           0 :         case T_XidList:
     312           0 :             foreach(l, expr)
     313           0 :                 JUMBLE_FIELD_SINGLE(lfirst_xid(l));
     314           0 :             break;
     315           0 :         default:
     316           0 :             elog(ERROR, "unrecognized list node type: %d",
     317             :                  (int) expr->type);
     318             :             return;
     319             :     }
     320             : }
     321             : 
     322             : static void
     323       16588 : _jumbleA_Const(JumbleState *jstate, Node *node)
     324             : {
     325       16588 :     A_Const    *expr = (A_Const *) node;
     326             : 
     327       16588 :     JUMBLE_FIELD(isnull);
     328       16588 :     if (!expr->isnull)
     329             :     {
     330       16446 :         JUMBLE_FIELD(val.node.type);
     331       16446 :         switch (nodeTag(&expr->val))
     332             :         {
     333        7788 :             case T_Integer:
     334        7788 :                 JUMBLE_FIELD(val.ival.ival);
     335        7788 :                 break;
     336          84 :             case T_Float:
     337          84 :                 JUMBLE_STRING(val.fval.fval);
     338          84 :                 break;
     339         224 :             case T_Boolean:
     340         224 :                 JUMBLE_FIELD(val.boolval.boolval);
     341         224 :                 break;
     342        8346 :             case T_String:
     343        8346 :                 JUMBLE_STRING(val.sval.sval);
     344        8346 :                 break;
     345           4 :             case T_BitString:
     346           4 :                 JUMBLE_STRING(val.bsval.bsval);
     347           4 :                 break;
     348           0 :             default:
     349           0 :                 elog(ERROR, "unrecognized node type: %d",
     350             :                      (int) nodeTag(&expr->val));
     351             :                 break;
     352             :         }
     353         142 :     }
     354       16588 : }

Generated by: LCOV version 1.14