LCOV - code coverage report
Current view: top level - src/backend/nodes - read.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 83.2 % 179 149
Test Date: 2026-03-03 13:15:30 Functions: 100.0 % 7 7
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * read.c
       4              :  *    routines to convert a string (legal ascii representation of node) back
       5              :  *    to nodes
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  * Portions Copyright (c) 1994, Regents of the University of California
       9              :  *
      10              :  *
      11              :  * IDENTIFICATION
      12              :  *    src/backend/nodes/read.c
      13              :  *
      14              :  * HISTORY
      15              :  *    AUTHOR            DATE            MAJOR EVENT
      16              :  *    Andrew Yu         Nov 2, 1994     file creation
      17              :  *
      18              :  *-------------------------------------------------------------------------
      19              :  */
      20              : #include "postgres.h"
      21              : 
      22              : #include <ctype.h>
      23              : 
      24              : #include "common/string.h"
      25              : #include "nodes/bitmapset.h"
      26              : #include "nodes/pg_list.h"
      27              : #include "nodes/readfuncs.h"
      28              : #include "nodes/value.h"
      29              : 
      30              : 
      31              : /* Static state for pg_strtok */
      32              : static const char *pg_strtok_ptr = NULL;
      33              : 
      34              : /* State flag that determines how readfuncs.c should treat location fields */
      35              : #ifdef DEBUG_NODE_TESTS_ENABLED
      36              : bool        restore_location_fields = false;
      37              : #endif
      38              : 
      39              : 
      40              : /*
      41              :  * stringToNode -
      42              :  *    builds a Node tree from its string representation (assumed valid)
      43              :  *
      44              :  * restore_loc_fields instructs readfuncs.c whether to restore location
      45              :  * fields rather than set them to -1.  This is currently only supported
      46              :  * in builds with DEBUG_NODE_TESTS_ENABLED defined.
      47              :  */
      48              : static void *
      49      1199203 : stringToNodeInternal(const char *str, bool restore_loc_fields)
      50              : {
      51              :     void       *retval;
      52              :     const char *save_strtok;
      53              : #ifdef DEBUG_NODE_TESTS_ENABLED
      54              :     bool        save_restore_location_fields;
      55              : #endif
      56              : 
      57              :     /*
      58              :      * We save and restore the pre-existing state of pg_strtok. This makes the
      59              :      * world safe for re-entrant invocation of stringToNode, without incurring
      60              :      * a lot of notational overhead by having to pass the next-character
      61              :      * pointer around through all the readfuncs.c code.
      62              :      */
      63      1199203 :     save_strtok = pg_strtok_ptr;
      64              : 
      65      1199203 :     pg_strtok_ptr = str;        /* point pg_strtok at the string to read */
      66              : 
      67              :     /*
      68              :      * If enabled, likewise save/restore the location field handling flag.
      69              :      */
      70              : #ifdef DEBUG_NODE_TESTS_ENABLED
      71      1199203 :     save_restore_location_fields = restore_location_fields;
      72      1199203 :     restore_location_fields = restore_loc_fields;
      73              : #endif
      74              : 
      75      1199203 :     retval = nodeRead(NULL, 0); /* do the reading */
      76              : 
      77      1199203 :     pg_strtok_ptr = save_strtok;
      78              : 
      79              : #ifdef DEBUG_NODE_TESTS_ENABLED
      80      1199203 :     restore_location_fields = save_restore_location_fields;
      81              : #endif
      82              : 
      83      1199203 :     return retval;
      84              : }
      85              : 
      86              : /*
      87              :  * Externally visible entry points
      88              :  */
      89              : void *
      90       177466 : stringToNode(const char *str)
      91              : {
      92       177466 :     return stringToNodeInternal(str, false);
      93              : }
      94              : 
      95              : #ifdef DEBUG_NODE_TESTS_ENABLED
      96              : 
      97              : void *
      98      1021737 : stringToNodeWithLocations(const char *str)
      99              : {
     100      1021737 :     return stringToNodeInternal(str, true);
     101              : }
     102              : 
     103              : #endif
     104              : 
     105              : 
     106              : /*****************************************************************************
     107              :  *
     108              :  * the lisp token parser
     109              :  *
     110              :  *****************************************************************************/
     111              : 
     112              : /*
     113              :  * pg_strtok --- retrieve next "token" from a string.
     114              :  *
     115              :  * Works kinda like strtok, except it never modifies the source string.
     116              :  * (Instead of storing nulls into the string, the length of the token
     117              :  * is returned to the caller.)
     118              :  * Also, the rules about what is a token are hard-wired rather than being
     119              :  * configured by passing a set of terminating characters.
     120              :  *
     121              :  * The string is assumed to have been initialized already by stringToNode.
     122              :  *
     123              :  * The rules for tokens are:
     124              :  *  * Whitespace (space, tab, newline) always separates tokens.
     125              :  *  * The characters '(', ')', '{', '}' form individual tokens even
     126              :  *    without any whitespace around them.
     127              :  *  * Otherwise, a token is all the characters up to the next whitespace
     128              :  *    or occurrence of one of the four special characters.
     129              :  *  * A backslash '\' can be used to quote whitespace or one of the four
     130              :  *    special characters, so that it is treated as a plain token character.
     131              :  *    Backslashes themselves must also be backslashed for consistency.
     132              :  *    Any other character can be, but need not be, backslashed as well.
     133              :  *  * If the resulting token is '<>' (with no backslash), it is returned
     134              :  *    as a non-NULL pointer to the token but with length == 0.  Note that
     135              :  *    there is no other way to get a zero-length token.
     136              :  *
     137              :  * Returns a pointer to the start of the next token, and the length of the
     138              :  * token (including any embedded backslashes!) in *length.  If there are
     139              :  * no more tokens, NULL and 0 are returned.
     140              :  *
     141              :  * NOTE: this routine doesn't remove backslashes; the caller must do so
     142              :  * if necessary (see "debackslash").
     143              :  *
     144              :  * NOTE: prior to release 7.0, this routine also had a special case to treat
     145              :  * a token starting with '"' as extending to the next '"'.  This code was
     146              :  * broken, however, since it would fail to cope with a string containing an
     147              :  * embedded '"'.  I have therefore removed this special case, and instead
     148              :  * introduced rules for using backslashes to quote characters.  Higher-level
     149              :  * code should add backslashes to a string constant to ensure it is treated
     150              :  * as a single token.
     151              :  */
     152              : const char *
     153    700953405 : pg_strtok(int *length)
     154              : {
     155              :     const char *local_str;      /* working pointer to string */
     156              :     const char *ret_str;        /* start of token to return */
     157              : 
     158    700953405 :     local_str = pg_strtok_ptr;
     159              : 
     160   1300715245 :     while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')
     161    599761840 :         local_str++;
     162              : 
     163    700953405 :     if (*local_str == '\0')
     164              :     {
     165            0 :         *length = 0;
     166            0 :         pg_strtok_ptr = local_str;
     167            0 :         return NULL;            /* no more tokens */
     168              :     }
     169              : 
     170              :     /*
     171              :      * Now pointing at start of next token.
     172              :      */
     173    700953405 :     ret_str = local_str;
     174              : 
     175    700953405 :     if (*local_str == '(' || *local_str == ')' ||
     176    656941295 :         *local_str == '{' || *local_str == '}')
     177              :     {
     178              :         /* special 1-character token */
     179    100249162 :         local_str++;
     180              :     }
     181              :     else
     182              :     {
     183              :         /* Normal token, possibly containing backslashes */
     184    600704243 :         while (*local_str != '\0' &&
     185   4287848765 :                *local_str != ' ' && *local_str != '\n' &&
     186   3728803210 :                *local_str != '\t' &&
     187   3728803210 :                *local_str != '(' && *local_str != ')' &&
     188   8001001555 :                *local_str != '{' && *local_str != '}')
     189              :         {
     190   3687164956 :             if (*local_str == '\\' && local_str[1] != '\0')
     191       884311 :                 local_str += 2;
     192              :             else
     193   3686280645 :                 local_str++;
     194              :         }
     195              :     }
     196              : 
     197    700953405 :     *length = local_str - ret_str;
     198              : 
     199              :     /* Recognize special case for "empty" token */
     200    700953405 :     if (*length == 2 && ret_str[0] == '<' && ret_str[1] == '>')
     201     35801894 :         *length = 0;
     202              : 
     203    700953405 :     pg_strtok_ptr = local_str;
     204              : 
     205    700953405 :     return ret_str;
     206              : }
     207              : 
     208              : /*
     209              :  * debackslash -
     210              :  *    create a palloc'd string holding the given token.
     211              :  *    any protective backslashes in the token are removed.
     212              :  */
     213              : char *
     214     21136636 : debackslash(const char *token, int length)
     215              : {
     216     21136636 :     char       *result = palloc(length + 1);
     217     21136636 :     char       *ptr = result;
     218              : 
     219    201638022 :     while (length > 0)
     220              :     {
     221    180501386 :         if (*token == '\\' && length > 1)
     222       884311 :             token++, length--;
     223    180501386 :         *ptr++ = *token++;
     224    180501386 :         length--;
     225              :     }
     226     21136636 :     *ptr = '\0';
     227     21136636 :     return result;
     228              : }
     229              : 
     230              : #define RIGHT_PAREN (1000000 + 1)
     231              : #define LEFT_PAREN  (1000000 + 2)
     232              : #define LEFT_BRACE  (1000000 + 3)
     233              : #define OTHER_TOKEN (1000000 + 4)
     234              : 
     235              : /*
     236              :  * nodeTokenType -
     237              :  *    returns the type of the node token contained in token.
     238              :  *    It returns one of the following valid NodeTags:
     239              :  *      T_Integer, T_Float, T_Boolean, T_String, T_BitString
     240              :  *    and some of its own:
     241              :  *      RIGHT_PAREN, LEFT_PAREN, LEFT_BRACE, OTHER_TOKEN
     242              :  *
     243              :  *    Assumption: the ascii representation is legal
     244              :  */
     245              : static NodeTag
     246     84610263 : nodeTokenType(const char *token, int length)
     247              : {
     248              :     NodeTag     retval;
     249              :     const char *numptr;
     250              :     int         numlen;
     251              : 
     252              :     /*
     253              :      * Check if the token is a number
     254              :      */
     255     84610263 :     numptr = token;
     256     84610263 :     numlen = length;
     257     84610263 :     if (*numptr == '+' || *numptr == '-')
     258        15433 :         numptr++, numlen--;
     259     84610263 :     if ((numlen > 0 && isdigit((unsigned char) *numptr)) ||
     260     16187837 :         (numlen > 1 && *numptr == '.' && isdigit((unsigned char) numptr[1])))
     261              :     {
     262              :         /*
     263              :          * Yes.  Figure out whether it is integral or float; this requires
     264              :          * both a syntax check and a range check. strtoint() can do both for
     265              :          * us. We know the token will end at a character that strtoint will
     266              :          * stop at, so we do not need to modify the string.
     267              :          */
     268              :         char       *endptr;
     269              : 
     270       257732 :         errno = 0;
     271       257732 :         (void) strtoint(numptr, &endptr, 10);
     272       257732 :         if (endptr != token + length || errno == ERANGE)
     273         6085 :             return T_Float;
     274       251647 :         return T_Integer;
     275              :     }
     276              : 
     277              :     /*
     278              :      * these three cases do not need length checks, since pg_strtok() will
     279              :      * always treat them as single-byte tokens
     280              :      */
     281     84352531 :     else if (*token == '(')
     282     10323273 :         retval = LEFT_PAREN;
     283     74029258 :     else if (*token == ')')
     284            0 :         retval = RIGHT_PAREN;
     285     74029258 :     else if (*token == '{')
     286     28118526 :         retval = LEFT_BRACE;
     287     45910732 :     else if ((length == 4 && strncmp(token, "true", 4) == 0) ||
     288      1001780 :              (length == 5 && strncmp(token, "false", 5) == 0))
     289        49236 :         retval = T_Boolean;
     290     45861496 :     else if (*token == '"' && length > 1 && token[length - 1] == '"')
     291     16136540 :         retval = T_String;
     292     29724956 :     else if (*token == 'b' || *token == 'x')
     293         2050 :         retval = T_BitString;
     294              :     else
     295     29722906 :         retval = OTHER_TOKEN;
     296     84352531 :     return retval;
     297              : }
     298              : 
     299              : /*
     300              :  * nodeRead -
     301              :  *    Slightly higher-level reader.
     302              :  *
     303              :  * This routine applies some semantic knowledge on top of the purely
     304              :  * lexical tokenizer pg_strtok().   It can read
     305              :  *  * Value token nodes (integers, floats, booleans, or strings);
     306              :  *  * General nodes (via parseNodeString() from readfuncs.c);
     307              :  *  * Lists of the above;
     308              :  *  * Lists of integers, OIDs, or TransactionIds.
     309              :  * The return value is declared void *, not Node *, to avoid having to
     310              :  * cast it explicitly in callers that assign to fields of different types.
     311              :  *
     312              :  * External callers should always pass NULL/0 for the arguments.  Internally
     313              :  * a non-NULL token may be passed when the upper recursion level has already
     314              :  * scanned the first token of a node's representation.
     315              :  *
     316              :  * We assume pg_strtok is already initialized with a string to read (hence
     317              :  * this should only be invoked from within a stringToNode operation).
     318              :  */
     319              : void *
     320     84610263 : nodeRead(const char *token, int tok_len)
     321              : {
     322              :     Node       *result;
     323              :     NodeTag     type;
     324              : 
     325     84610263 :     if (token == NULL)          /* need to read a token? */
     326              :     {
     327     52867043 :         token = pg_strtok(&tok_len);
     328              : 
     329     52867043 :         if (token == NULL)      /* end of input */
     330            0 :             return NULL;
     331              :     }
     332              : 
     333     84610263 :     type = nodeTokenType(token, tok_len);
     334              : 
     335     84610263 :     switch ((int) type)
     336              :     {
     337     28118526 :         case LEFT_BRACE:
     338     28118526 :             result = parseNodeString();
     339     28118526 :             token = pg_strtok(&tok_len);
     340     28118526 :             if (token == NULL || token[0] != '}')
     341            0 :                 elog(ERROR, "did not find '}' at end of input node");
     342     28118526 :             break;
     343     10323273 :         case LEFT_PAREN:
     344              :             {
     345     10323273 :                 List       *l = NIL;
     346              : 
     347              :                 /*----------
     348              :                  * Could be an integer list:    (i int int ...)
     349              :                  * or an OID list:              (o int int ...)
     350              :                  * or an XID list:              (x int int ...)
     351              :                  * or a bitmapset:              (b int int ...)
     352              :                  * or a list of nodes/values:   (node node ...)
     353              :                  *----------
     354              :                  */
     355     10323273 :                 token = pg_strtok(&tok_len);
     356     10323273 :                 if (token == NULL)
     357            0 :                     elog(ERROR, "unterminated List structure");
     358     10323273 :                 if (tok_len == 1 && token[0] == 'i')
     359              :                 {
     360              :                     /* List of integers */
     361              :                     for (;;)
     362      3038094 :                     {
     363              :                         int         val;
     364              :                         char       *endptr;
     365              : 
     366      3342603 :                         token = pg_strtok(&tok_len);
     367      3342603 :                         if (token == NULL)
     368            0 :                             elog(ERROR, "unterminated List structure");
     369      3342603 :                         if (token[0] == ')')
     370       304509 :                             break;
     371      3038094 :                         val = (int) strtol(token, &endptr, 10);
     372      3038094 :                         if (endptr != token + tok_len)
     373            0 :                             elog(ERROR, "unrecognized integer: \"%.*s\"",
     374              :                                  tok_len, token);
     375      3038094 :                         l = lappend_int(l, val);
     376              :                     }
     377       304509 :                     result = (Node *) l;
     378              :                 }
     379     10018764 :                 else if (tok_len == 1 && token[0] == 'o')
     380              :                 {
     381              :                     /* List of OIDs */
     382              :                     for (;;)
     383       871659 :                     {
     384              :                         Oid         val;
     385              :                         char       *endptr;
     386              : 
     387      1270243 :                         token = pg_strtok(&tok_len);
     388      1270243 :                         if (token == NULL)
     389            0 :                             elog(ERROR, "unterminated List structure");
     390      1270243 :                         if (token[0] == ')')
     391       398584 :                             break;
     392       871659 :                         val = (Oid) strtoul(token, &endptr, 10);
     393       871659 :                         if (endptr != token + tok_len)
     394            0 :                             elog(ERROR, "unrecognized OID: \"%.*s\"",
     395              :                                  tok_len, token);
     396       871659 :                         l = lappend_oid(l, val);
     397              :                     }
     398       398584 :                     result = (Node *) l;
     399              :                 }
     400      9620180 :                 else if (tok_len == 1 && token[0] == 'x')
     401              :                 {
     402              :                     /* List of TransactionIds */
     403              :                     for (;;)
     404            0 :                     {
     405              :                         TransactionId val;
     406              :                         char       *endptr;
     407              : 
     408            0 :                         token = pg_strtok(&tok_len);
     409            0 :                         if (token == NULL)
     410            0 :                             elog(ERROR, "unterminated List structure");
     411            0 :                         if (token[0] == ')')
     412            0 :                             break;
     413            0 :                         val = (TransactionId) strtoul(token, &endptr, 10);
     414            0 :                         if (endptr != token + tok_len)
     415            0 :                             elog(ERROR, "unrecognized Xid: \"%.*s\"",
     416              :                                  tok_len, token);
     417            0 :                         l = lappend_xid(l, val);
     418              :                     }
     419            0 :                     result = (Node *) l;
     420              :                 }
     421      9620180 :                 else if (tok_len == 1 && token[0] == 'b')
     422         1716 :                 {
     423              :                     /* Bitmapset -- see also _readBitmapset() */
     424         1716 :                     Bitmapset  *bms = NULL;
     425              : 
     426              :                     for (;;)
     427         2650 :                     {
     428              :                         int         val;
     429              :                         char       *endptr;
     430              : 
     431         4366 :                         token = pg_strtok(&tok_len);
     432         4366 :                         if (token == NULL)
     433            0 :                             elog(ERROR, "unterminated Bitmapset structure");
     434         4366 :                         if (tok_len == 1 && token[0] == ')')
     435         1716 :                             break;
     436         2650 :                         val = (int) strtol(token, &endptr, 10);
     437         2650 :                         if (endptr != token + tok_len)
     438            0 :                             elog(ERROR, "unrecognized integer: \"%.*s\"",
     439              :                                  tok_len, token);
     440         2650 :                         bms = bms_add_member(bms, val);
     441              :                     }
     442         1716 :                     result = (Node *) bms;
     443              :                 }
     444              :                 else
     445              :                 {
     446              :                     /* List of other node types */
     447              :                     for (;;)
     448              :                     {
     449              :                         /* We have already scanned next token... */
     450     41361684 :                         if (token[0] == ')')
     451      9618464 :                             break;
     452     31743220 :                         l = lappend(l, nodeRead(token, tok_len));
     453     31743220 :                         token = pg_strtok(&tok_len);
     454     31743220 :                         if (token == NULL)
     455            0 :                             elog(ERROR, "unterminated List structure");
     456              :                     }
     457      9618464 :                     result = (Node *) l;
     458              :                 }
     459     10323273 :                 break;
     460              :             }
     461            0 :         case RIGHT_PAREN:
     462            0 :             elog(ERROR, "unexpected right parenthesis");
     463              :             result = NULL;      /* keep compiler happy */
     464              :             break;
     465     29722906 :         case OTHER_TOKEN:
     466     29722906 :             if (tok_len == 0)
     467              :             {
     468              :                 /* must be "<>" --- represents a null pointer */
     469     29722906 :                 result = NULL;
     470              :             }
     471              :             else
     472              :             {
     473            0 :                 elog(ERROR, "unrecognized token: \"%.*s\"", tok_len, token);
     474              :                 result = NULL;  /* keep compiler happy */
     475              :             }
     476     29722906 :             break;
     477       251647 :         case T_Integer:
     478              : 
     479              :             /*
     480              :              * we know that the token terminates on a char atoi will stop at
     481              :              */
     482       251647 :             result = (Node *) makeInteger(atoi(token));
     483       251647 :             break;
     484         6085 :         case T_Float:
     485              :             {
     486         6085 :                 char       *fval = (char *) palloc(tok_len + 1);
     487              : 
     488         6085 :                 memcpy(fval, token, tok_len);
     489         6085 :                 fval[tok_len] = '\0';
     490         6085 :                 result = (Node *) makeFloat(fval);
     491              :             }
     492         6085 :             break;
     493        49236 :         case T_Boolean:
     494        49236 :             result = (Node *) makeBoolean(token[0] == 't');
     495        49236 :             break;
     496     16136540 :         case T_String:
     497              :             /* need to remove leading and trailing quotes, and backslashes */
     498     16136540 :             result = (Node *) makeString(debackslash(token + 1, tok_len - 2));
     499     16136540 :             break;
     500         2050 :         case T_BitString:
     501              :             /* need to remove backslashes, but there are no quotes */
     502         2050 :             result = (Node *) makeBitString(debackslash(token, tok_len));
     503         2050 :             break;
     504            0 :         default:
     505            0 :             elog(ERROR, "unrecognized node type: %d", (int) type);
     506              :             result = NULL;      /* keep compiler happy */
     507              :             break;
     508              :     }
     509              : 
     510     84610263 :     return result;
     511              : }
        

Generated by: LCOV version 2.0-1