LCOV - code coverage report
Current view: top level - src/backend/nodes - read.c (source / functions) Hit Total Coverage
Test: PostgreSQL 12beta2 Lines: 111 139 79.9 %
Date: 2019-06-19 14:06:47 Functions: 7 7 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * read.c
       4             :  *    routines to convert a string (legal ascii representation of node) back
       5             :  *    to nodes
       6             :  *
       7             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  *
      11             :  * IDENTIFICATION
      12             :  *    src/backend/nodes/read.c
      13             :  *
      14             :  * HISTORY
      15             :  *    AUTHOR            DATE            MAJOR EVENT
      16             :  *    Andrew Yu         Nov 2, 1994     file creation
      17             :  *
      18             :  *-------------------------------------------------------------------------
      19             :  */
      20             : #include "postgres.h"
      21             : 
      22             : #include <ctype.h>
      23             : 
      24             : #include "common/string.h"
      25             : #include "nodes/pg_list.h"
      26             : #include "nodes/readfuncs.h"
      27             : #include "nodes/value.h"
      28             : 
      29             : 
      30             : /* Static state for pg_strtok */
      31             : static const char *pg_strtok_ptr = NULL;
      32             : 
      33             : /* State flag that determines how readfuncs.c should treat location fields */
      34             : #ifdef WRITE_READ_PARSE_PLAN_TREES
      35             : bool        restore_location_fields = false;
      36             : #endif
      37             : 
      38             : 
      39             : /*
      40             :  * stringToNode -
      41             :  *    builds a Node tree from its string representation (assumed valid)
      42             :  *
      43             :  * restore_loc_fields instructs readfuncs.c whether to restore location
      44             :  * fields rather than set them to -1.  This is currently only supported
      45             :  * in builds with the WRITE_READ_PARSE_PLAN_TREES debugging flag set.
      46             :  */
      47             : static void *
      48      639166 : stringToNodeInternal(const char *str, bool restore_loc_fields)
      49             : {
      50             :     void       *retval;
      51             :     const char *save_strtok;
      52             : #ifdef WRITE_READ_PARSE_PLAN_TREES
      53             :     bool        save_restore_location_fields;
      54             : #endif
      55             : 
      56             :     /*
      57             :      * We save and restore the pre-existing state of pg_strtok. This makes the
      58             :      * world safe for re-entrant invocation of stringToNode, without incurring
      59             :      * a lot of notational overhead by having to pass the next-character
      60             :      * pointer around through all the readfuncs.c code.
      61             :      */
      62      639166 :     save_strtok = pg_strtok_ptr;
      63             : 
      64      639166 :     pg_strtok_ptr = str;        /* point pg_strtok at the string to read */
      65             : 
      66             :     /*
      67             :      * If enabled, likewise save/restore the location field handling flag.
      68             :      */
      69             : #ifdef WRITE_READ_PARSE_PLAN_TREES
      70      639166 :     save_restore_location_fields = restore_location_fields;
      71      639166 :     restore_location_fields = restore_loc_fields;
      72             : #endif
      73             : 
      74      639166 :     retval = nodeRead(NULL, 0); /* do the reading */
      75             : 
      76      639166 :     pg_strtok_ptr = save_strtok;
      77             : 
      78             : #ifdef WRITE_READ_PARSE_PLAN_TREES
      79      639166 :     restore_location_fields = save_restore_location_fields;
      80             : #endif
      81             : 
      82      639166 :     return retval;
      83             : }
      84             : 
      85             : /*
      86             :  * Externally visible entry points
      87             :  */
      88             : void *
      89      165368 : stringToNode(const char *str)
      90             : {
      91      165368 :     return stringToNodeInternal(str, false);
      92             : }
      93             : 
      94             : #ifdef WRITE_READ_PARSE_PLAN_TREES
      95             : 
      96             : void *
      97      473798 : stringToNodeWithLocations(const char *str)
      98             : {
      99      473798 :     return stringToNodeInternal(str, true);
     100             : }
     101             : 
     102             : #endif
     103             : 
     104             : 
     105             : /*****************************************************************************
     106             :  *
     107             :  * the lisp token parser
     108             :  *
     109             :  *****************************************************************************/
     110             : 
     111             : /*
     112             :  * pg_strtok --- retrieve next "token" from a string.
     113             :  *
     114             :  * Works kinda like strtok, except it never modifies the source string.
     115             :  * (Instead of storing nulls into the string, the length of the token
     116             :  * is returned to the caller.)
     117             :  * Also, the rules about what is a token are hard-wired rather than being
     118             :  * configured by passing a set of terminating characters.
     119             :  *
     120             :  * The string is assumed to have been initialized already by stringToNode.
     121             :  *
     122             :  * The rules for tokens are:
     123             :  *  * Whitespace (space, tab, newline) always separates tokens.
     124             :  *  * The characters '(', ')', '{', '}' form individual tokens even
     125             :  *    without any whitespace around them.
     126             :  *  * Otherwise, a token is all the characters up to the next whitespace
     127             :  *    or occurrence of one of the four special characters.
     128             :  *  * A backslash '\' can be used to quote whitespace or one of the four
     129             :  *    special characters, so that it is treated as a plain token character.
     130             :  *    Backslashes themselves must also be backslashed for consistency.
     131             :  *    Any other character can be, but need not be, backslashed as well.
     132             :  *  * If the resulting token is '<>' (with no backslash), it is returned
     133             :  *    as a non-NULL pointer to the token but with length == 0.  Note that
     134             :  *    there is no other way to get a zero-length token.
     135             :  *
     136             :  * Returns a pointer to the start of the next token, and the length of the
     137             :  * token (including any embedded backslashes!) in *length.  If there are
     138             :  * no more tokens, NULL and 0 are returned.
     139             :  *
     140             :  * NOTE: this routine doesn't remove backslashes; the caller must do so
     141             :  * if necessary (see "debackslash").
     142             :  *
     143             :  * NOTE: prior to release 7.0, this routine also had a special case to treat
     144             :  * a token starting with '"' as extending to the next '"'.  This code was
     145             :  * broken, however, since it would fail to cope with a string containing an
     146             :  * embedded '"'.  I have therefore removed this special case, and instead
     147             :  * introduced rules for using backslashes to quote characters.  Higher-level
     148             :  * code should add backslashes to a string constant to ensure it is treated
     149             :  * as a single token.
     150             :  */
     151             : const char *
     152   639357618 : pg_strtok(int *length)
     153             : {
     154             :     const char *local_str;      /* working pointer to string */
     155             :     const char *ret_str;        /* start of token to return */
     156             : 
     157   639357618 :     local_str = pg_strtok_ptr;
     158             : 
     159  1834060032 :     while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')
     160   555344796 :         local_str++;
     161             : 
     162   639357618 :     if (*local_str == '\0')
     163             :     {
     164           0 :         *length = 0;
     165           0 :         pg_strtok_ptr = local_str;
     166           0 :         return NULL;            /* no more tokens */
     167             :     }
     168             : 
     169             :     /*
     170             :      * Now pointing at start of next token.
     171             :      */
     172   639357618 :     ret_str = local_str;
     173             : 
     174  1249796460 :     if (*local_str == '(' || *local_str == ')' ||
     175  1193537546 :         *local_str == '{' || *local_str == '}')
     176             :     {
     177             :         /* special 1-character token */
     178    83599052 :         local_str++;
     179             :     }
     180             :     else
     181             :     {
     182             :         /* Normal token, possibly containing backslashes */
     183  8236354948 :         while (*local_str != '\0' &&
     184 10479391748 :                *local_str != ' ' && *local_str != '\n' &&
     185  6639121548 :                *local_str != '\t' &&
     186  9949300962 :                *local_str != '(' && *local_str != ')' &&
     187  6620358828 :                *local_str != '{' && *local_str != '}')
     188             :         {
     189  3284567616 :             if (*local_str == '\\' && local_str[1] != '\0')
     190      214852 :                 local_str += 2;
     191             :             else
     192  3284352764 :                 local_str++;
     193             :         }
     194             :     }
     195             : 
     196   639357618 :     *length = local_str - ret_str;
     197             : 
     198             :     /* Recognize special case for "empty" token */
     199   639357618 :     if (*length == 2 && ret_str[0] == '<' && ret_str[1] == '>')
     200    19628546 :         *length = 0;
     201             : 
     202   639357618 :     pg_strtok_ptr = local_str;
     203             : 
     204   639357618 :     return ret_str;
     205             : }
     206             : 
     207             : /*
     208             :  * debackslash -
     209             :  *    create a palloc'd string holding the given token.
     210             :  *    any protective backslashes in the token are removed.
     211             :  */
     212             : char *
     213    24265510 : debackslash(const char *token, int length)
     214             : {
     215    24265510 :     char       *result = palloc(length + 1);
     216    24265510 :     char       *ptr = result;
     217             : 
     218   273896432 :     while (length > 0)
     219             :     {
     220   225365412 :         if (*token == '\\' && length > 1)
     221      214852 :             token++, length--;
     222   225365412 :         *ptr++ = *token++;
     223   225365412 :         length--;
     224             :     }
     225    24265510 :     *ptr = '\0';
     226    24265510 :     return result;
     227             : }
     228             : 
     229             : #define RIGHT_PAREN (1000000 + 1)
     230             : #define LEFT_PAREN  (1000000 + 2)
     231             : #define LEFT_BRACE  (1000000 + 3)
     232             : #define OTHER_TOKEN (1000000 + 4)
     233             : 
     234             : /*
     235             :  * nodeTokenType -
     236             :  *    returns the type of the node token contained in token.
     237             :  *    It returns one of the following valid NodeTags:
     238             :  *      T_Integer, T_Float, T_String, T_BitString
     239             :  *    and some of its own:
     240             :  *      RIGHT_PAREN, LEFT_PAREN, LEFT_BRACE, OTHER_TOKEN
     241             :  *
     242             :  *    Assumption: the ascii representation is legal
     243             :  */
     244             : static NodeTag
     245    71132756 : nodeTokenType(const char *token, int length)
     246             : {
     247             :     NodeTag     retval;
     248             :     const char *numptr;
     249             :     int         numlen;
     250             : 
     251             :     /*
     252             :      * Check if the token is a number
     253             :      */
     254    71132756 :     numptr = token;
     255    71132756 :     numlen = length;
     256    71132756 :     if (*numptr == '+' || *numptr == '-')
     257           0 :         numptr++, numlen--;
     258    71132756 :     if ((numlen > 0 && isdigit((unsigned char) *numptr)) ||
     259    19076898 :         (numlen > 1 && *numptr == '.' && isdigit((unsigned char) numptr[1])))
     260             :     {
     261             :         /*
     262             :          * Yes.  Figure out whether it is integral or float; this requires
     263             :          * both a syntax check and a range check. strtoint() can do both for
     264             :          * us. We know the token will end at a character that strtoint will
     265             :          * stop at, so we do not need to modify the string.
     266             :          */
     267             :         char       *endptr;
     268             : 
     269        1650 :         errno = 0;
     270        1650 :         (void) strtoint(token, &endptr, 10);
     271        1650 :         if (endptr != token + length || errno == ERANGE)
     272           0 :             return T_Float;
     273        1650 :         return T_Integer;
     274             :     }
     275             : 
     276             :     /*
     277             :      * these three cases do not need length checks, since pg_strtok() will
     278             :      * always treat them as single-byte tokens
     279             :      */
     280    71131106 :     else if (*token == '(')
     281     7139310 :         retval = LEFT_PAREN;
     282    63991796 :     else if (*token == ')')
     283           0 :         retval = RIGHT_PAREN;
     284    63991796 :     else if (*token == '{')
     285    27340138 :         retval = LEFT_BRACE;
     286    36651658 :     else if (*token == '"' && length > 1 && token[length - 1] == '"')
     287    19076898 :         retval = T_String;
     288    17574760 :     else if (*token == 'b')
     289           0 :         retval = T_BitString;
     290             :     else
     291    17574760 :         retval = OTHER_TOKEN;
     292    71131106 :     return retval;
     293             : }
     294             : 
     295             : /*
     296             :  * nodeRead -
     297             :  *    Slightly higher-level reader.
     298             :  *
     299             :  * This routine applies some semantic knowledge on top of the purely
     300             :  * lexical tokenizer pg_strtok().   It can read
     301             :  *  * Value token nodes (integers, floats, or strings);
     302             :  *  * General nodes (via parseNodeString() from readfuncs.c);
     303             :  *  * Lists of the above;
     304             :  *  * Lists of integers or OIDs.
     305             :  * The return value is declared void *, not Node *, to avoid having to
     306             :  * cast it explicitly in callers that assign to fields of different types.
     307             :  *
     308             :  * External callers should always pass NULL/0 for the arguments.  Internally
     309             :  * a non-NULL token may be passed when the upper recursion level has already
     310             :  * scanned the first token of a node's representation.
     311             :  *
     312             :  * We assume pg_strtok is already initialized with a string to read (hence
     313             :  * this should only be invoked from within a stringToNode operation).
     314             :  */
     315             : void *
     316    71132756 : nodeRead(const char *token, int tok_len)
     317             : {
     318             :     Node       *result;
     319             :     NodeTag     type;
     320             : 
     321    71132756 :     if (token == NULL)          /* need to read a token? */
     322             :     {
     323    34939298 :         token = pg_strtok(&tok_len);
     324             : 
     325    34939298 :         if (token == NULL)      /* end of input */
     326           0 :             return NULL;
     327             :     }
     328             : 
     329    71132756 :     type = nodeTokenType(token, tok_len);
     330             : 
     331    71132756 :     switch ((int) type)
     332             :     {
     333             :         case LEFT_BRACE:
     334    27340138 :             result = parseNodeString();
     335    27340138 :             token = pg_strtok(&tok_len);
     336    27340138 :             if (token == NULL || token[0] != '}')
     337           0 :                 elog(ERROR, "did not find '}' at end of input node");
     338    27340138 :             break;
     339             :         case LEFT_PAREN:
     340             :             {
     341     7139310 :                 List       *l = NIL;
     342             : 
     343             :                 /*----------
     344             :                  * Could be an integer list:    (i int int ...)
     345             :                  * or an OID list:              (o int int ...)
     346             :                  * or a list of nodes/values:   (node node ...)
     347             :                  *----------
     348             :                  */
     349     7139310 :                 token = pg_strtok(&tok_len);
     350     7139310 :                 if (token == NULL)
     351           0 :                     elog(ERROR, "unterminated List structure");
     352     7367024 :                 if (tok_len == 1 && token[0] == 'i')
     353             :                 {
     354             :                     /* List of integers */
     355             :                     for (;;)
     356      340790 :                     {
     357             :                         int         val;
     358             :                         char       *endptr;
     359             : 
     360      568504 :                         token = pg_strtok(&tok_len);
     361      568504 :                         if (token == NULL)
     362           0 :                             elog(ERROR, "unterminated List structure");
     363      568504 :                         if (token[0] == ')')
     364      227714 :                             break;
     365      340790 :                         val = (int) strtol(token, &endptr, 10);
     366      340790 :                         if (endptr != token + tok_len)
     367           0 :                             elog(ERROR, "unrecognized integer: \"%.*s\"",
     368             :                                  tok_len, token);
     369      340790 :                         l = lappend_int(l, val);
     370             :                     }
     371             :                 }
     372     7315512 :                 else if (tok_len == 1 && token[0] == 'o')
     373             :                 {
     374             :                     /* List of OIDs */
     375             :                     for (;;)
     376      933826 :                     {
     377             :                         Oid         val;
     378             :                         char       *endptr;
     379             : 
     380     1337742 :                         token = pg_strtok(&tok_len);
     381     1337742 :                         if (token == NULL)
     382           0 :                             elog(ERROR, "unterminated List structure");
     383     1337742 :                         if (token[0] == ')')
     384      403916 :                             break;
     385      933826 :                         val = (Oid) strtoul(token, &endptr, 10);
     386      933826 :                         if (endptr != token + tok_len)
     387           0 :                             elog(ERROR, "unrecognized OID: \"%.*s\"",
     388             :                                  tok_len, token);
     389      933826 :                         l = lappend_oid(l, val);
     390             :                     }
     391             :                 }
     392             :                 else
     393             :                 {
     394             :                     /* List of other node types */
     395             :                     for (;;)
     396             :                     {
     397             :                         /* We have already scanned next token... */
     398    78894596 :                         if (token[0] == ')')
     399     6507680 :                             break;
     400    36193458 :                         l = lappend(l, nodeRead(token, tok_len));
     401    36193458 :                         token = pg_strtok(&tok_len);
     402    36193458 :                         if (token == NULL)
     403           0 :                             elog(ERROR, "unterminated List structure");
     404             :                     }
     405             :                 }
     406     7139310 :                 result = (Node *) l;
     407     7139310 :                 break;
     408             :             }
     409             :         case RIGHT_PAREN:
     410           0 :             elog(ERROR, "unexpected right parenthesis");
     411             :             result = NULL;      /* keep compiler happy */
     412             :             break;
     413             :         case OTHER_TOKEN:
     414    17574760 :             if (tok_len == 0)
     415             :             {
     416             :                 /* must be "<>" --- represents a null pointer */
     417    17574760 :                 result = NULL;
     418             :             }
     419             :             else
     420             :             {
     421           0 :                 elog(ERROR, "unrecognized token: \"%.*s\"", tok_len, token);
     422             :                 result = NULL;  /* keep compiler happy */
     423             :             }
     424    17574760 :             break;
     425             :         case T_Integer:
     426             : 
     427             :             /*
     428             :              * we know that the token terminates on a char atoi will stop at
     429             :              */
     430        1650 :             result = (Node *) makeInteger(atoi(token));
     431        1650 :             break;
     432             :         case T_Float:
     433             :             {
     434           0 :                 char       *fval = (char *) palloc(tok_len + 1);
     435             : 
     436           0 :                 memcpy(fval, token, tok_len);
     437           0 :                 fval[tok_len] = '\0';
     438           0 :                 result = (Node *) makeFloat(fval);
     439             :             }
     440           0 :             break;
     441             :         case T_String:
     442             :             /* need to remove leading and trailing quotes, and backslashes */
     443    19076898 :             result = (Node *) makeString(debackslash(token + 1, tok_len - 2));
     444    19076898 :             break;
     445             :         case T_BitString:
     446             :             {
     447           0 :                 char       *val = palloc(tok_len);
     448             : 
     449             :                 /* skip leading 'b' */
     450           0 :                 memcpy(val, token + 1, tok_len - 1);
     451           0 :                 val[tok_len - 1] = '\0';
     452           0 :                 result = (Node *) makeBitString(val);
     453           0 :                 break;
     454             :             }
     455             :         default:
     456           0 :             elog(ERROR, "unrecognized node type: %d", (int) type);
     457             :             result = NULL;      /* keep compiler happy */
     458             :             break;
     459             :     }
     460             : 
     461    71132756 :     return (void *) result;
     462             : }

Generated by: LCOV version 1.13