LCOV - code coverage report
Current view: top level - src/backend/nodes - queryjumblefuncs.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 146 168 86.9 %
Date: 2023-11-29 04:11:06 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * queryjumblefuncs.c
       4             :  *   Query normalization and fingerprinting.
       5             :  *
       6             :  * Normalization is a process whereby similar queries, typically differing only
       7             :  * in their constants (though the exact rules are somewhat more subtle than
       8             :  * that) are recognized as equivalent, and are tracked as a single entry.  This
       9             :  * is particularly useful for non-prepared queries.
      10             :  *
      11             :  * Normalization is implemented by fingerprinting queries, selectively
      12             :  * serializing those fields of each query tree's nodes that are judged to be
      13             :  * essential to the query.  This is referred to as a query jumble.  This is
      14             :  * distinct from a regular serialization in that various extraneous
      15             :  * information is ignored as irrelevant or not essential to the query, such
      16             :  * as the collations of Vars and, most notably, the values of constants.
      17             :  *
      18             :  * This jumble is acquired at the end of parse analysis of each query, and
      19             :  * a 64-bit hash of it is stored into the query's Query.queryId field.
      20             :  * The server then copies this value around, making it available in plan
      21             :  * tree(s) generated from the query.  The executor can then use this value
      22             :  * to blame query costs on the proper queryId.
      23             :  *
      24             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      25             :  * Portions Copyright (c) 1994, Regents of the University of California
      26             :  *
      27             :  *
      28             :  * IDENTIFICATION
      29             :  *    src/backend/nodes/queryjumblefuncs.c
      30             :  *
      31             :  *-------------------------------------------------------------------------
      32             :  */
      33             : #include "postgres.h"
      34             : 
      35             : #include "common/hashfn.h"
      36             : #include "miscadmin.h"
      37             : #include "nodes/queryjumble.h"
      38             : #include "parser/scansup.h"
      39             : 
      40             : #define JUMBLE_SIZE             1024    /* query serialization buffer size */
      41             : 
      42             : /* GUC parameters */
      43             : int         compute_query_id = COMPUTE_QUERY_ID_AUTO;
      44             : 
      45             : /* True when compute_query_id is ON, or AUTO and a module requests them */
      46             : bool        query_id_enabled = false;
      47             : 
      48             : static void AppendJumble(JumbleState *jstate,
      49             :                          const unsigned char *item, Size size);
      50             : static void RecordConstLocation(JumbleState *jstate, int location);
      51             : static void _jumbleNode(JumbleState *jstate, Node *node);
      52             : static void _jumbleA_Const(JumbleState *jstate, Node *node);
      53             : static void _jumbleList(JumbleState *jstate, Node *node);
      54             : static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
      55             : 
      56             : /*
      57             :  * Given a possibly multi-statement source string, confine our attention to the
      58             :  * relevant part of the string.
      59             :  */
      60             : const char *
      61      141330 : CleanQuerytext(const char *query, int *location, int *len)
      62             : {
      63      141330 :     int         query_location = *location;
      64      141330 :     int         query_len = *len;
      65             : 
      66             :     /* First apply starting offset, unless it's -1 (unknown). */
      67      141330 :     if (query_location >= 0)
      68             :     {
      69             :         Assert(query_location <= strlen(query));
      70      140962 :         query += query_location;
      71             :         /* Length of 0 (or -1) means "rest of string" */
      72      140962 :         if (query_len <= 0)
      73        8788 :             query_len = strlen(query);
      74             :         else
      75             :             Assert(query_len <= strlen(query));
      76             :     }
      77             :     else
      78             :     {
      79             :         /* If query location is unknown, distrust query_len as well */
      80         368 :         query_location = 0;
      81         368 :         query_len = strlen(query);
      82             :     }
      83             : 
      84             :     /*
      85             :      * Discard leading and trailing whitespace, too.  Use scanner_isspace()
      86             :      * not libc's isspace(), because we want to match the lexer's behavior.
      87             :      */
      88      141846 :     while (query_len > 0 && scanner_isspace(query[0]))
      89         516 :         query++, query_location++, query_len--;
      90      142254 :     while (query_len > 0 && scanner_isspace(query[query_len - 1]))
      91         924 :         query_len--;
      92             : 
      93      141330 :     *location = query_location;
      94      141330 :     *len = query_len;
      95             : 
      96      141330 :     return query;
      97             : }
      98             : 
      99             : JumbleState *
     100      123882 : JumbleQuery(Query *query)
     101             : {
     102      123882 :     JumbleState *jstate = NULL;
     103             : 
     104             :     Assert(IsQueryIdEnabled());
     105             : 
     106      123882 :     jstate = (JumbleState *) palloc(sizeof(JumbleState));
     107             : 
     108             :     /* Set up workspace for query jumbling */
     109      123882 :     jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
     110      123882 :     jstate->jumble_len = 0;
     111      123882 :     jstate->clocations_buf_size = 32;
     112      123882 :     jstate->clocations = (LocationLen *)
     113      123882 :         palloc(jstate->clocations_buf_size * sizeof(LocationLen));
     114      123882 :     jstate->clocations_count = 0;
     115      123882 :     jstate->highest_extern_param_id = 0;
     116             : 
     117             :     /* Compute query ID and mark the Query node with it */
     118      123882 :     _jumbleNode(jstate, (Node *) query);
     119      123882 :     query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
     120      123882 :                                                       jstate->jumble_len,
     121             :                                                       0));
     122             : 
     123             :     /*
     124             :      * If we are unlucky enough to get a hash of zero, use 1 instead for
     125             :      * normal statements and 2 for utility queries.
     126             :      */
     127      123882 :     if (query->queryId == UINT64CONST(0))
     128             :     {
     129           0 :         if (query->utilityStmt)
     130           0 :             query->queryId = UINT64CONST(2);
     131             :         else
     132           0 :             query->queryId = UINT64CONST(1);
     133             :     }
     134             : 
     135      123882 :     return jstate;
     136             : }
     137             : 
     138             : /*
     139             :  * Enables query identifier computation.
     140             :  *
     141             :  * Third-party plugins can use this function to inform core that they require
     142             :  * a query identifier to be computed.
     143             :  */
     144             : void
     145           6 : EnableQueryId(void)
     146             : {
     147           6 :     if (compute_query_id != COMPUTE_QUERY_ID_OFF)
     148           6 :         query_id_enabled = true;
     149           6 : }
     150             : 
     151             : /*
     152             :  * AppendJumble: Append a value that is substantive in a given query to
     153             :  * the current jumble.
     154             :  */
     155             : static void
     156     5838520 : AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
     157             : {
     158     5838520 :     unsigned char *jumble = jstate->jumble;
     159     5838520 :     Size        jumble_len = jstate->jumble_len;
     160             : 
     161             :     /*
     162             :      * Whenever the jumble buffer is full, we hash the current contents and
     163             :      * reset the buffer to contain just that hash value, thus relying on the
     164             :      * hash to summarize everything so far.
     165             :      */
     166    11677600 :     while (size > 0)
     167             :     {
     168             :         Size        part_size;
     169             : 
     170     5839080 :         if (jumble_len >= JUMBLE_SIZE)
     171             :         {
     172             :             uint64      start_hash;
     173             : 
     174        1688 :             start_hash = DatumGetUInt64(hash_any_extended(jumble,
     175             :                                                           JUMBLE_SIZE, 0));
     176        1688 :             memcpy(jumble, &start_hash, sizeof(start_hash));
     177        1688 :             jumble_len = sizeof(start_hash);
     178             :         }
     179     5839080 :         part_size = Min(size, JUMBLE_SIZE - jumble_len);
     180     5839080 :         memcpy(jumble + jumble_len, item, part_size);
     181     5839080 :         jumble_len += part_size;
     182     5839080 :         item += part_size;
     183     5839080 :         size -= part_size;
     184             :     }
     185     5838520 :     jstate->jumble_len = jumble_len;
     186     5838520 : }
     187             : 
     188             : /*
     189             :  * Record location of constant within query string of query tree
     190             :  * that is currently being walked.
     191             :  */
     192             : static void
     193      182876 : RecordConstLocation(JumbleState *jstate, int location)
     194             : {
     195             :     /* -1 indicates unknown or undefined location */
     196      182876 :     if (location >= 0)
     197             :     {
     198             :         /* enlarge array if needed */
     199      172858 :         if (jstate->clocations_count >= jstate->clocations_buf_size)
     200             :         {
     201         114 :             jstate->clocations_buf_size *= 2;
     202         114 :             jstate->clocations = (LocationLen *)
     203         114 :                 repalloc(jstate->clocations,
     204         114 :                          jstate->clocations_buf_size *
     205             :                          sizeof(LocationLen));
     206             :         }
     207      172858 :         jstate->clocations[jstate->clocations_count].location = location;
     208             :         /* initialize lengths to -1 to simplify third-party module usage */
     209      172858 :         jstate->clocations[jstate->clocations_count].length = -1;
     210      172858 :         jstate->clocations_count++;
     211             :     }
     212      182876 : }
     213             : 
     214             : #define JUMBLE_NODE(item) \
     215             :     _jumbleNode(jstate, (Node *) expr->item)
     216             : #define JUMBLE_LOCATION(location) \
     217             :     RecordConstLocation(jstate, expr->location)
     218             : #define JUMBLE_FIELD(item) \
     219             :     AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
     220             : #define JUMBLE_FIELD_SINGLE(item) \
     221             :     AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
     222             : #define JUMBLE_STRING(str) \
     223             : do { \
     224             :     if (expr->str) \
     225             :         AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
     226             : } while(0)
     227             : 
     228             : #include "queryjumblefuncs.funcs.c"
     229             : 
     230             : static void
     231     5234296 : _jumbleNode(JumbleState *jstate, Node *node)
     232             : {
     233     5234296 :     Node       *expr = node;
     234             : 
     235     5234296 :     if (expr == NULL)
     236     2906474 :         return;
     237             : 
     238             :     /* Guard against stack overflow due to overly complex expressions */
     239     2327822 :     check_stack_depth();
     240             : 
     241             :     /*
     242             :      * We always emit the node's NodeTag, then any additional fields that are
     243             :      * considered significant, and then we recurse to any child nodes.
     244             :      */
     245     2327822 :     JUMBLE_FIELD(type);
     246             : 
     247     2327822 :     switch (nodeTag(expr))
     248             :     {
     249             : #include "queryjumblefuncs.switch.c"
     250             : 
     251      573722 :         case T_List:
     252             :         case T_IntList:
     253             :         case T_OidList:
     254             :         case T_XidList:
     255      573722 :             _jumbleList(jstate, expr);
     256      573722 :             break;
     257             : 
     258           0 :         default:
     259             :             /* Only a warning, since we can stumble along anyway */
     260           0 :             elog(WARNING, "unrecognized node type: %d",
     261             :                  (int) nodeTag(expr));
     262           0 :             break;
     263             :     }
     264             : 
     265             :     /* Special cases to handle outside the automated code */
     266     2327822 :     switch (nodeTag(expr))
     267             :     {
     268       10562 :         case T_Param:
     269             :             {
     270       10562 :                 Param      *p = (Param *) node;
     271             : 
     272             :                 /*
     273             :                  * Update the highest Param id seen, in order to start
     274             :                  * normalization correctly.
     275             :                  */
     276       10562 :                 if (p->paramkind == PARAM_EXTERN &&
     277        9934 :                     p->paramid > jstate->highest_extern_param_id)
     278        8252 :                     jstate->highest_extern_param_id = p->paramid;
     279             :             }
     280       10562 :             break;
     281     2317260 :         default:
     282     2317260 :             break;
     283             :     }
     284             : }
     285             : 
     286             : static void
     287      573722 : _jumbleList(JumbleState *jstate, Node *node)
     288             : {
     289      573722 :     List       *expr = (List *) node;
     290             :     ListCell   *l;
     291             : 
     292      573722 :     switch (expr->type)
     293             :     {
     294      572950 :         case T_List:
     295     1574406 :             foreach(l, expr)
     296     1001456 :                 _jumbleNode(jstate, lfirst(l));
     297      572950 :             break;
     298         772 :         case T_IntList:
     299        1730 :             foreach(l, expr)
     300         958 :                 JUMBLE_FIELD_SINGLE(lfirst_int(l));
     301         772 :             break;
     302           0 :         case T_OidList:
     303           0 :             foreach(l, expr)
     304           0 :                 JUMBLE_FIELD_SINGLE(lfirst_oid(l));
     305           0 :             break;
     306           0 :         case T_XidList:
     307           0 :             foreach(l, expr)
     308           0 :                 JUMBLE_FIELD_SINGLE(lfirst_xid(l));
     309           0 :             break;
     310           0 :         default:
     311           0 :             elog(ERROR, "unrecognized list node type: %d",
     312             :                  (int) expr->type);
     313             :             return;
     314             :     }
     315             : }
     316             : 
     317             : static void
     318       14638 : _jumbleA_Const(JumbleState *jstate, Node *node)
     319             : {
     320       14638 :     A_Const    *expr = (A_Const *) node;
     321             : 
     322       14638 :     JUMBLE_FIELD(isnull);
     323       14638 :     if (!expr->isnull)
     324             :     {
     325       14506 :         JUMBLE_FIELD(val.node.type);
     326       14506 :         switch (nodeTag(&expr->val))
     327             :         {
     328        7104 :             case T_Integer:
     329        7104 :                 JUMBLE_FIELD(val.ival.ival);
     330        7104 :                 break;
     331          84 :             case T_Float:
     332          84 :                 JUMBLE_STRING(val.fval.fval);
     333          84 :                 break;
     334         212 :             case T_Boolean:
     335         212 :                 JUMBLE_FIELD(val.boolval.boolval);
     336         212 :                 break;
     337        7102 :             case T_String:
     338        7102 :                 JUMBLE_STRING(val.sval.sval);
     339        7102 :                 break;
     340           4 :             case T_BitString:
     341           4 :                 JUMBLE_STRING(val.bsval.bsval);
     342           4 :                 break;
     343           0 :             default:
     344           0 :                 elog(ERROR, "unrecognized node type: %d",
     345             :                      (int) nodeTag(&expr->val));
     346             :                 break;
     347             :         }
     348         132 :     }
     349       14638 : }
     350             : 
     351             : static void
     352      108820 : _jumbleRangeTblEntry(JumbleState *jstate, Node *node)
     353             : {
     354      108820 :     RangeTblEntry *expr = (RangeTblEntry *) node;
     355             : 
     356      108820 :     JUMBLE_FIELD(rtekind);
     357      108820 :     switch (expr->rtekind)
     358             :     {
     359       77398 :         case RTE_RELATION:
     360       77398 :             JUMBLE_FIELD(relid);
     361       77398 :             JUMBLE_NODE(tablesample);
     362       77398 :             JUMBLE_FIELD(inh);
     363       77398 :             break;
     364        9376 :         case RTE_SUBQUERY:
     365        9376 :             JUMBLE_NODE(subquery);
     366        9376 :             break;
     367       12512 :         case RTE_JOIN:
     368       12512 :             JUMBLE_FIELD(jointype);
     369       12512 :             break;
     370        5996 :         case RTE_FUNCTION:
     371        5996 :             JUMBLE_NODE(functions);
     372        5996 :             break;
     373          74 :         case RTE_TABLEFUNC:
     374          74 :             JUMBLE_NODE(tablefunc);
     375          74 :             break;
     376        2388 :         case RTE_VALUES:
     377        2388 :             JUMBLE_NODE(values_lists);
     378        2388 :             break;
     379         926 :         case RTE_CTE:
     380             : 
     381             :             /*
     382             :              * Depending on the CTE name here isn't ideal, but it's the only
     383             :              * info we have to identify the referenced WITH item.
     384             :              */
     385         926 :             JUMBLE_STRING(ctename);
     386         926 :             JUMBLE_FIELD(ctelevelsup);
     387         926 :             break;
     388         150 :         case RTE_NAMEDTUPLESTORE:
     389         150 :             JUMBLE_STRING(enrname);
     390         150 :             break;
     391           0 :         case RTE_RESULT:
     392           0 :             break;
     393           0 :         default:
     394           0 :             elog(ERROR, "unrecognized RTE kind: %d", (int) expr->rtekind);
     395             :             break;
     396             :     }
     397      108820 : }

Generated by: LCOV version 1.14