LCOV - code coverage report
Current view: top level - src/backend/nodes - read.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 138 179 77.1 %
Date: 2025-01-18 04:15:08 Functions: 7 7 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * read.c
       4             :  *    routines to convert a string (legal ascii representation of node) back
       5             :  *    to nodes
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  *
      11             :  * IDENTIFICATION
      12             :  *    src/backend/nodes/read.c
      13             :  *
      14             :  * HISTORY
      15             :  *    AUTHOR            DATE            MAJOR EVENT
      16             :  *    Andrew Yu         Nov 2, 1994     file creation
      17             :  *
      18             :  *-------------------------------------------------------------------------
      19             :  */
      20             : #include "postgres.h"
      21             : 
      22             : #include <ctype.h>
      23             : 
      24             : #include "common/string.h"
      25             : #include "nodes/bitmapset.h"
      26             : #include "nodes/pg_list.h"
      27             : #include "nodes/readfuncs.h"
      28             : #include "nodes/value.h"
      29             : 
      30             : 
      31             : /* Static state for pg_strtok */
      32             : static const char *pg_strtok_ptr = NULL;
      33             : 
      34             : /* State flag that determines how readfuncs.c should treat location fields */
      35             : #ifdef DEBUG_NODE_TESTS_ENABLED
      36             : bool        restore_location_fields = false;
      37             : #endif
      38             : 
      39             : 
      40             : /*
      41             :  * stringToNode -
      42             :  *    builds a Node tree from its string representation (assumed valid)
      43             :  *
      44             :  * restore_loc_fields instructs readfuncs.c whether to restore location
      45             :  * fields rather than set them to -1.  This is currently only supported
      46             :  * in builds with DEBUG_NODE_TESTS_ENABLED defined.
      47             :  */
      48             : static void *
      49     2206764 : stringToNodeInternal(const char *str, bool restore_loc_fields)
      50             : {
      51             :     void       *retval;
      52             :     const char *save_strtok;
      53             : #ifdef DEBUG_NODE_TESTS_ENABLED
      54             :     bool        save_restore_location_fields;
      55             : #endif
      56             : 
      57             :     /*
      58             :      * We save and restore the pre-existing state of pg_strtok. This makes the
      59             :      * world safe for re-entrant invocation of stringToNode, without incurring
      60             :      * a lot of notational overhead by having to pass the next-character
      61             :      * pointer around through all the readfuncs.c code.
      62             :      */
      63     2206764 :     save_strtok = pg_strtok_ptr;
      64             : 
      65     2206764 :     pg_strtok_ptr = str;        /* point pg_strtok at the string to read */
      66             : 
      67             :     /*
      68             :      * If enabled, likewise save/restore the location field handling flag.
      69             :      */
      70             : #ifdef DEBUG_NODE_TESTS_ENABLED
      71     2206764 :     save_restore_location_fields = restore_location_fields;
      72     2206764 :     restore_location_fields = restore_loc_fields;
      73             : #endif
      74             : 
      75     2206764 :     retval = nodeRead(NULL, 0); /* do the reading */
      76             : 
      77     2206764 :     pg_strtok_ptr = save_strtok;
      78             : 
      79             : #ifdef DEBUG_NODE_TESTS_ENABLED
      80     2206764 :     restore_location_fields = save_restore_location_fields;
      81             : #endif
      82             : 
      83     2206764 :     return retval;
      84             : }
      85             : 
      86             : /*
      87             :  * Externally visible entry points
      88             :  */
      89             : void *
      90      327120 : stringToNode(const char *str)
      91             : {
      92      327120 :     return stringToNodeInternal(str, false);
      93             : }
      94             : 
      95             : #ifdef DEBUG_NODE_TESTS_ENABLED
      96             : 
      97             : void *
      98     1879644 : stringToNodeWithLocations(const char *str)
      99             : {
     100     1879644 :     return stringToNodeInternal(str, true);
     101             : }
     102             : 
     103             : #endif
     104             : 
     105             : 
     106             : /*****************************************************************************
     107             :  *
     108             :  * the lisp token parser
     109             :  *
     110             :  *****************************************************************************/
     111             : 
     112             : /*
     113             :  * pg_strtok --- retrieve next "token" from a string.
     114             :  *
     115             :  * Works kinda like strtok, except it never modifies the source string.
     116             :  * (Instead of storing nulls into the string, the length of the token
     117             :  * is returned to the caller.)
     118             :  * Also, the rules about what is a token are hard-wired rather than being
     119             :  * configured by passing a set of terminating characters.
     120             :  *
     121             :  * The string is assumed to have been initialized already by stringToNode.
     122             :  *
     123             :  * The rules for tokens are:
     124             :  *  * Whitespace (space, tab, newline) always separates tokens.
     125             :  *  * The characters '(', ')', '{', '}' form individual tokens even
     126             :  *    without any whitespace around them.
     127             :  *  * Otherwise, a token is all the characters up to the next whitespace
     128             :  *    or occurrence of one of the four special characters.
     129             :  *  * A backslash '\' can be used to quote whitespace or one of the four
     130             :  *    special characters, so that it is treated as a plain token character.
     131             :  *    Backslashes themselves must also be backslashed for consistency.
     132             :  *    Any other character can be, but need not be, backslashed as well.
     133             :  *  * If the resulting token is '<>' (with no backslash), it is returned
     134             :  *    as a non-NULL pointer to the token but with length == 0.  Note that
     135             :  *    there is no other way to get a zero-length token.
     136             :  *
     137             :  * Returns a pointer to the start of the next token, and the length of the
     138             :  * token (including any embedded backslashes!) in *length.  If there are
     139             :  * no more tokens, NULL and 0 are returned.
     140             :  *
     141             :  * NOTE: this routine doesn't remove backslashes; the caller must do so
     142             :  * if necessary (see "debackslash").
     143             :  *
     144             :  * NOTE: prior to release 7.0, this routine also had a special case to treat
     145             :  * a token starting with '"' as extending to the next '"'.  This code was
     146             :  * broken, however, since it would fail to cope with a string containing an
     147             :  * embedded '"'.  I have therefore removed this special case, and instead
     148             :  * introduced rules for using backslashes to quote characters.  Higher-level
     149             :  * code should add backslashes to a string constant to ensure it is treated
     150             :  * as a single token.
     151             :  */
     152             : const char *
     153  1148956086 : pg_strtok(int *length)
     154             : {
     155             :     const char *local_str;      /* working pointer to string */
     156             :     const char *ret_str;        /* start of token to return */
     157             : 
     158  1148956086 :     local_str = pg_strtok_ptr;
     159             : 
     160  2133201378 :     while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')
     161   984245292 :         local_str++;
     162             : 
     163  1148956086 :     if (*local_str == '\0')
     164             :     {
     165           0 :         *length = 0;
     166           0 :         pg_strtok_ptr = local_str;
     167           0 :         return NULL;            /* no more tokens */
     168             :     }
     169             : 
     170             :     /*
     171             :      * Now pointing at start of next token.
     172             :      */
     173  1148956086 :     ret_str = local_str;
     174             : 
     175  1148956086 :     if (*local_str == '(' || *local_str == ')' ||
     176  1078424170 :         *local_str == '{' || *local_str == '}')
     177             :     {
     178             :         /* special 1-character token */
     179   162917252 :         local_str++;
     180             :     }
     181             :     else
     182             :     {
     183             :         /* Normal token, possibly containing backslashes */
     184  6967450036 :         while (*local_str != '\0' &&
     185  6967416016 :                *local_str != ' ' && *local_str != '\n' &&
     186  6048815358 :                *local_str != '\t' &&
     187  6048815358 :                *local_str != '(' && *local_str != ')' &&
     188  6024319210 :                *local_str != '{' && *local_str != '}')
     189             :         {
     190  5981411202 :             if (*local_str == '\\' && local_str[1] != '\0')
     191     1686874 :                 local_str += 2;
     192             :             else
     193  5979724328 :                 local_str++;
     194             :         }
     195             :     }
     196             : 
     197  1148956086 :     *length = local_str - ret_str;
     198             : 
     199             :     /* Recognize special case for "empty" token */
     200  1148956086 :     if (*length == 2 && ret_str[0] == '<' && ret_str[1] == '>')
     201    60902720 :         *length = 0;
     202             : 
     203  1148956086 :     pg_strtok_ptr = local_str;
     204             : 
     205  1148956086 :     return ret_str;
     206             : }
     207             : 
     208             : /*
     209             :  * debackslash -
     210             :  *    create a palloc'd string holding the given token.
     211             :  *    any protective backslashes in the token are removed.
     212             :  */
     213             : char *
     214    33582262 : debackslash(const char *token, int length)
     215             : {
     216    33582262 :     char       *result = palloc(length + 1);
     217    33582262 :     char       *ptr = result;
     218             : 
     219   315070578 :     while (length > 0)
     220             :     {
     221   281488316 :         if (*token == '\\' && length > 1)
     222     1686874 :             token++, length--;
     223   281488316 :         *ptr++ = *token++;
     224   281488316 :         length--;
     225             :     }
     226    33582262 :     *ptr = '\0';
     227    33582262 :     return result;
     228             : }
     229             : 
     230             : #define RIGHT_PAREN (1000000 + 1)
     231             : #define LEFT_PAREN  (1000000 + 2)
     232             : #define LEFT_BRACE  (1000000 + 3)
     233             : #define OTHER_TOKEN (1000000 + 4)
     234             : 
     235             : /*
     236             :  * nodeTokenType -
     237             :  *    returns the type of the node token contained in token.
     238             :  *    It returns one of the following valid NodeTags:
     239             :  *      T_Integer, T_Float, T_Boolean, T_String, T_BitString
     240             :  *    and some of its own:
     241             :  *      RIGHT_PAREN, LEFT_PAREN, LEFT_BRACE, OTHER_TOKEN
     242             :  *
     243             :  *    Assumption: the ascii representation is legal
     244             :  */
     245             : static NodeTag
     246   140302868 : nodeTokenType(const char *token, int length)
     247             : {
     248             :     NodeTag     retval;
     249             :     const char *numptr;
     250             :     int         numlen;
     251             : 
     252             :     /*
     253             :      * Check if the token is a number
     254             :      */
     255   140302868 :     numptr = token;
     256   140302868 :     numlen = length;
     257   140302868 :     if (*numptr == '+' || *numptr == '-')
     258       46252 :         numptr++, numlen--;
     259   140302868 :     if ((numlen > 0 && isdigit((unsigned char) *numptr)) ||
     260    25399870 :         (numlen > 1 && *numptr == '.' && isdigit((unsigned char) numptr[1])))
     261             :     {
     262             :         /*
     263             :          * Yes.  Figure out whether it is integral or float; this requires
     264             :          * both a syntax check and a range check. strtoint() can do both for
     265             :          * us. We know the token will end at a character that strtoint will
     266             :          * stop at, so we do not need to modify the string.
     267             :          */
     268             :         char       *endptr;
     269             : 
     270      513014 :         errno = 0;
     271      513014 :         (void) strtoint(numptr, &endptr, 10);
     272      513014 :         if (endptr != token + length || errno == ERANGE)
     273       12378 :             return T_Float;
     274      500636 :         return T_Integer;
     275             :     }
     276             : 
     277             :     /*
     278             :      * these three cases do not need length checks, since pg_strtok() will
     279             :      * always treat them as single-byte tokens
     280             :      */
     281   139789854 :     else if (*token == '(')
     282    17476962 :         retval = LEFT_PAREN;
     283   122312892 :     else if (*token == ')')
     284           0 :         retval = RIGHT_PAREN;
     285   122312892 :     else if (*token == '{')
     286    46192668 :         retval = LEFT_BRACE;
     287    76120224 :     else if ((length == 4 && strncmp(token, "true", 4) == 0) ||
     288     1664848 :              (length == 5 && strncmp(token, "false", 5) == 0))
     289       89786 :         retval = T_Boolean;
     290    76030438 :     else if (*token == '"' && length > 1 && token[length - 1] == '"')
     291    25305994 :         retval = T_String;
     292    50724444 :     else if (*token == 'b' || *token == 'x')
     293        4068 :         retval = T_BitString;
     294             :     else
     295    50720376 :         retval = OTHER_TOKEN;
     296   139789854 :     return retval;
     297             : }
     298             : 
     299             : /*
     300             :  * nodeRead -
     301             :  *    Slightly higher-level reader.
     302             :  *
     303             :  * This routine applies some semantic knowledge on top of the purely
     304             :  * lexical tokenizer pg_strtok().   It can read
     305             :  *  * Value token nodes (integers, floats, booleans, or strings);
     306             :  *  * General nodes (via parseNodeString() from readfuncs.c);
     307             :  *  * Lists of the above;
     308             :  *  * Lists of integers, OIDs, or TransactionIds.
     309             :  * The return value is declared void *, not Node *, to avoid having to
     310             :  * cast it explicitly in callers that assign to fields of different types.
     311             :  *
     312             :  * External callers should always pass NULL/0 for the arguments.  Internally
     313             :  * a non-NULL token may be passed when the upper recursion level has already
     314             :  * scanned the first token of a node's representation.
     315             :  *
     316             :  * We assume pg_strtok is already initialized with a string to read (hence
     317             :  * this should only be invoked from within a stringToNode operation).
     318             :  */
     319             : void *
     320   140302868 : nodeRead(const char *token, int tok_len)
     321             : {
     322             :     Node       *result;
     323             :     NodeTag     type;
     324             : 
     325   140302868 :     if (token == NULL)          /* need to read a token? */
     326             :     {
     327    89850364 :         token = pg_strtok(&tok_len);
     328             : 
     329    89850364 :         if (token == NULL)      /* end of input */
     330           0 :             return NULL;
     331             :     }
     332             : 
     333   140302868 :     type = nodeTokenType(token, tok_len);
     334             : 
     335   140302868 :     switch ((int) type)
     336             :     {
     337    46192668 :         case LEFT_BRACE:
     338    46192668 :             result = parseNodeString();
     339    46192668 :             token = pg_strtok(&tok_len);
     340    46192668 :             if (token == NULL || token[0] != '}')
     341           0 :                 elog(ERROR, "did not find '}' at end of input node");
     342    46192668 :             break;
     343    17476962 :         case LEFT_PAREN:
     344             :             {
     345    17476962 :                 List       *l = NIL;
     346             : 
     347             :                 /*----------
     348             :                  * Could be an integer list:    (i int int ...)
     349             :                  * or an OID list:              (o int int ...)
     350             :                  * or an XID list:              (x int int ...)
     351             :                  * or a bitmapset:              (b int int ...)
     352             :                  * or a list of nodes/values:   (node node ...)
     353             :                  *----------
     354             :                  */
     355    17476962 :                 token = pg_strtok(&tok_len);
     356    17476962 :                 if (token == NULL)
     357           0 :                     elog(ERROR, "unterminated List structure");
     358    17476962 :                 if (tok_len == 1 && token[0] == 'i')
     359             :                 {
     360             :                     /* List of integers */
     361             :                     for (;;)
     362     4327060 :                     {
     363             :                         int         val;
     364             :                         char       *endptr;
     365             : 
     366     4815064 :                         token = pg_strtok(&tok_len);
     367     4815064 :                         if (token == NULL)
     368           0 :                             elog(ERROR, "unterminated List structure");
     369     4815064 :                         if (token[0] == ')')
     370      488004 :                             break;
     371     4327060 :                         val = (int) strtol(token, &endptr, 10);
     372     4327060 :                         if (endptr != token + tok_len)
     373           0 :                             elog(ERROR, "unrecognized integer: \"%.*s\"",
     374             :                                  tok_len, token);
     375     4327060 :                         l = lappend_int(l, val);
     376             :                     }
     377      488004 :                     result = (Node *) l;
     378             :                 }
     379    16988958 :                 else if (tok_len == 1 && token[0] == 'o')
     380             :                 {
     381             :                     /* List of OIDs */
     382             :                     for (;;)
     383     1302370 :                     {
     384             :                         Oid         val;
     385             :                         char       *endptr;
     386             : 
     387     1968568 :                         token = pg_strtok(&tok_len);
     388     1968568 :                         if (token == NULL)
     389           0 :                             elog(ERROR, "unterminated List structure");
     390     1968568 :                         if (token[0] == ')')
     391      666198 :                             break;
     392     1302370 :                         val = (Oid) strtoul(token, &endptr, 10);
     393     1302370 :                         if (endptr != token + tok_len)
     394           0 :                             elog(ERROR, "unrecognized OID: \"%.*s\"",
     395             :                                  tok_len, token);
     396     1302370 :                         l = lappend_oid(l, val);
     397             :                     }
     398      666198 :                     result = (Node *) l;
     399             :                 }
     400    16322760 :                 else if (tok_len == 1 && token[0] == 'x')
     401             :                 {
     402             :                     /* List of TransactionIds */
     403             :                     for (;;)
     404           0 :                     {
     405             :                         TransactionId val;
     406             :                         char       *endptr;
     407             : 
     408           0 :                         token = pg_strtok(&tok_len);
     409           0 :                         if (token == NULL)
     410           0 :                             elog(ERROR, "unterminated List structure");
     411           0 :                         if (token[0] == ')')
     412           0 :                             break;
     413           0 :                         val = (TransactionId) strtoul(token, &endptr, 10);
     414           0 :                         if (endptr != token + tok_len)
     415           0 :                             elog(ERROR, "unrecognized Xid: \"%.*s\"",
     416             :                                  tok_len, token);
     417           0 :                         l = lappend_xid(l, val);
     418             :                     }
     419           0 :                     result = (Node *) l;
     420             :                 }
     421    16322760 :                 else if (tok_len == 1 && token[0] == 'b')
     422           0 :                 {
     423             :                     /* Bitmapset -- see also _readBitmapset() */
     424           0 :                     Bitmapset  *bms = NULL;
     425             : 
     426             :                     for (;;)
     427           0 :                     {
     428             :                         int         val;
     429             :                         char       *endptr;
     430             : 
     431           0 :                         token = pg_strtok(&tok_len);
     432           0 :                         if (token == NULL)
     433           0 :                             elog(ERROR, "unterminated Bitmapset structure");
     434           0 :                         if (tok_len == 1 && token[0] == ')')
     435           0 :                             break;
     436           0 :                         val = (int) strtol(token, &endptr, 10);
     437           0 :                         if (endptr != token + tok_len)
     438           0 :                             elog(ERROR, "unrecognized integer: \"%.*s\"",
     439             :                                  tok_len, token);
     440           0 :                         bms = bms_add_member(bms, val);
     441             :                     }
     442           0 :                     result = (Node *) bms;
     443             :                 }
     444             :                 else
     445             :                 {
     446             :                     /* List of other node types */
     447             :                     for (;;)
     448             :                     {
     449             :                         /* We have already scanned next token... */
     450    66775264 :                         if (token[0] == ')')
     451    16322760 :                             break;
     452    50452504 :                         l = lappend(l, nodeRead(token, tok_len));
     453    50452504 :                         token = pg_strtok(&tok_len);
     454    50452504 :                         if (token == NULL)
     455           0 :                             elog(ERROR, "unterminated List structure");
     456             :                     }
     457    16322760 :                     result = (Node *) l;
     458             :                 }
     459    17476962 :                 break;
     460             :             }
     461           0 :         case RIGHT_PAREN:
     462           0 :             elog(ERROR, "unexpected right parenthesis");
     463             :             result = NULL;      /* keep compiler happy */
     464             :             break;
     465    50720376 :         case OTHER_TOKEN:
     466    50720376 :             if (tok_len == 0)
     467             :             {
     468             :                 /* must be "<>" --- represents a null pointer */
     469    50720376 :                 result = NULL;
     470             :             }
     471             :             else
     472             :             {
     473           0 :                 elog(ERROR, "unrecognized token: \"%.*s\"", tok_len, token);
     474             :                 result = NULL;  /* keep compiler happy */
     475             :             }
     476    50720376 :             break;
     477      500636 :         case T_Integer:
     478             : 
     479             :             /*
     480             :              * we know that the token terminates on a char atoi will stop at
     481             :              */
     482      500636 :             result = (Node *) makeInteger(atoi(token));
     483      500636 :             break;
     484       12378 :         case T_Float:
     485             :             {
     486       12378 :                 char       *fval = (char *) palloc(tok_len + 1);
     487             : 
     488       12378 :                 memcpy(fval, token, tok_len);
     489       12378 :                 fval[tok_len] = '\0';
     490       12378 :                 result = (Node *) makeFloat(fval);
     491             :             }
     492       12378 :             break;
     493       89786 :         case T_Boolean:
     494       89786 :             result = (Node *) makeBoolean(token[0] == 't');
     495       89786 :             break;
     496    25305994 :         case T_String:
     497             :             /* need to remove leading and trailing quotes, and backslashes */
     498    25305994 :             result = (Node *) makeString(debackslash(token + 1, tok_len - 2));
     499    25305994 :             break;
     500        4068 :         case T_BitString:
     501             :             /* need to remove backslashes, but there are no quotes */
     502        4068 :             result = (Node *) makeBitString(debackslash(token, tok_len));
     503        4068 :             break;
     504           0 :         default:
     505           0 :             elog(ERROR, "unrecognized node type: %d", (int) type);
     506             :             result = NULL;      /* keep compiler happy */
     507             :             break;
     508             :     }
     509             : 
     510   140302868 :     return result;
     511             : }

Generated by: LCOV version 1.14