LCOV - code coverage report
Current view: top level - src/common - jsonapi.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 463 520 89.0 %
Date: 2023-11-29 05:10:53 Functions: 18 18 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * jsonapi.c
       4             :  *      JSON parser and lexer interfaces
       5             :  *
       6             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/common/jsonapi.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #ifndef FRONTEND
      15             : #include "postgres.h"
      16             : #else
      17             : #include "postgres_fe.h"
      18             : #endif
      19             : 
      20             : #include "common/jsonapi.h"
      21             : #include "mb/pg_wchar.h"
      22             : #include "port/pg_lfind.h"
      23             : 
      24             : #ifndef FRONTEND
      25             : #include "miscadmin.h"
      26             : #endif
      27             : 
      28             : /*
      29             :  * The context of the parser is maintained by the recursive descent
      30             :  * mechanism, but is passed explicitly to the error reporting routine
      31             :  * for better diagnostics.
      32             :  */
      33             : typedef enum                    /* contexts of JSON parser */
      34             : {
      35             :     JSON_PARSE_VALUE,           /* expecting a value */
      36             :     JSON_PARSE_STRING,          /* expecting a string (for a field name) */
      37             :     JSON_PARSE_ARRAY_START,     /* saw '[', expecting value or ']' */
      38             :     JSON_PARSE_ARRAY_NEXT,      /* saw array element, expecting ',' or ']' */
      39             :     JSON_PARSE_OBJECT_START,    /* saw '{', expecting label or '}' */
      40             :     JSON_PARSE_OBJECT_LABEL,    /* saw object label, expecting ':' */
      41             :     JSON_PARSE_OBJECT_NEXT,     /* saw object value, expecting ',' or '}' */
      42             :     JSON_PARSE_OBJECT_COMMA,    /* saw object ',', expecting next label */
      43             :     JSON_PARSE_END,             /* saw the end of a document, expect nothing */
      44             : } JsonParseContext;
      45             : 
      46             : static inline JsonParseErrorType json_lex_string(JsonLexContext *lex);
      47             : static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, char *s,
      48             :                                                  bool *num_err, int *total_len);
      49             : static inline JsonParseErrorType parse_scalar(JsonLexContext *lex, JsonSemAction *sem);
      50             : static JsonParseErrorType parse_object_field(JsonLexContext *lex, JsonSemAction *sem);
      51             : static JsonParseErrorType parse_object(JsonLexContext *lex, JsonSemAction *sem);
      52             : static JsonParseErrorType parse_array_element(JsonLexContext *lex, JsonSemAction *sem);
      53             : static JsonParseErrorType parse_array(JsonLexContext *lex, JsonSemAction *sem);
      54             : static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
      55             : 
      56             : /* the null action object used for pure validation */
      57             : JsonSemAction nullSemAction =
      58             : {
      59             :     NULL, NULL, NULL, NULL, NULL,
      60             :     NULL, NULL, NULL, NULL, NULL
      61             : };
      62             : 
      63             : /* Recursive Descent parser support routines */
      64             : 
      65             : /*
      66             :  * lex_peek
      67             :  *
      68             :  * what is the current look_ahead token?
      69             : */
      70             : static inline JsonTokenType
      71     4962466 : lex_peek(JsonLexContext *lex)
      72             : {
      73     4962466 :     return lex->token_type;
      74             : }
      75             : 
      76             : /*
      77             :  * lex_expect
      78             :  *
      79             :  * move the lexer to the next token if the current look_ahead token matches
      80             :  * the parameter token. Otherwise, report an error.
      81             :  */
      82             : static inline JsonParseErrorType
      83      933456 : lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
      84             : {
      85      933456 :     if (lex_peek(lex) == token)
      86      933348 :         return json_lex(lex);
      87             :     else
      88         108 :         return report_parse_error(ctx, lex);
      89             : }
      90             : 
      91             : /* chars to consider as part of an alphanumeric token */
      92             : #define JSON_ALPHANUMERIC_CHAR(c)  \
      93             :     (((c) >= 'a' && (c) <= 'z') || \
      94             :      ((c) >= 'A' && (c) <= 'Z') || \
      95             :      ((c) >= '0' && (c) <= '9') || \
      96             :      (c) == '_' || \
      97             :      IS_HIGHBIT_SET(c))
      98             : 
      99             : /*
     100             :  * Utility function to check if a string is a valid JSON number.
     101             :  *
     102             :  * str is of length len, and need not be null-terminated.
     103             :  */
     104             : bool
     105        2962 : IsValidJsonNumber(const char *str, int len)
     106             : {
     107             :     bool        numeric_error;
     108             :     int         total_len;
     109             :     JsonLexContext dummy_lex;
     110             : 
     111        2962 :     if (len <= 0)
     112           0 :         return false;
     113             : 
     114             :     /*
     115             :      * json_lex_number expects a leading  '-' to have been eaten already.
     116             :      *
     117             :      * having to cast away the constness of str is ugly, but there's not much
     118             :      * easy alternative.
     119             :      */
     120        2962 :     if (*str == '-')
     121             :     {
     122          58 :         dummy_lex.input = unconstify(char *, str) + 1;
     123          58 :         dummy_lex.input_length = len - 1;
     124             :     }
     125             :     else
     126             :     {
     127        2904 :         dummy_lex.input = unconstify(char *, str);
     128        2904 :         dummy_lex.input_length = len;
     129             :     }
     130             : 
     131        2962 :     json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
     132             : 
     133        2962 :     return (!numeric_error) && (total_len == dummy_lex.input_length);
     134             : }
     135             : 
     136             : /*
     137             :  * makeJsonLexContextCstringLen
     138             :  *      Initialize the given JsonLexContext object, or create one
     139             :  *
     140             :  * If a valid 'lex' pointer is given, it is initialized.  This can
     141             :  * be used for stack-allocated structs, saving overhead.  If NULL is
     142             :  * given, a new struct is allocated.
     143             :  *
     144             :  * If need_escapes is true, ->strval stores the unescaped lexemes.
     145             :  * Unescaping is expensive, so only request it when necessary.
     146             :  *
     147             :  * If need_escapes is true or lex was given as NULL, then caller is
     148             :  * responsible for freeing the returned struct, either by calling
     149             :  * freeJsonLexContext() or (in backend environment) via memory context
     150             :  * cleanup.
     151             :  */
     152             : JsonLexContext *
     153       30932 : makeJsonLexContextCstringLen(JsonLexContext *lex, char *json,
     154             :                              int len, int encoding, bool need_escapes)
     155             : {
     156       30932 :     if (lex == NULL)
     157             :     {
     158        5634 :         lex = palloc0(sizeof(JsonLexContext));
     159        5634 :         lex->flags |= JSONLEX_FREE_STRUCT;
     160             :     }
     161             :     else
     162       25298 :         memset(lex, 0, sizeof(JsonLexContext));
     163             : 
     164       30932 :     lex->input = lex->token_terminator = lex->line_start = json;
     165       30932 :     lex->line_number = 1;
     166       30932 :     lex->input_length = len;
     167       30932 :     lex->input_encoding = encoding;
     168       30932 :     if (need_escapes)
     169             :     {
     170       23584 :         lex->strval = makeStringInfo();
     171       23584 :         lex->flags |= JSONLEX_FREE_STRVAL;
     172             :     }
     173             : 
     174       30932 :     return lex;
     175             : }
     176             : 
     177             : /*
     178             :  * Free memory in a JsonLexContext.  There's no need for this if a *lex
     179             :  * pointer was given when the object was made and need_escapes was false,
     180             :  * or (in backend environment) a memory context delete/reset is imminent.
     181             :  */
     182             : void
     183        6382 : freeJsonLexContext(JsonLexContext *lex)
     184             : {
     185        6382 :     if (lex->flags & JSONLEX_FREE_STRVAL)
     186             :     {
     187        5992 :         pfree(lex->strval->data);
     188        5992 :         pfree(lex->strval);
     189             :     }
     190        6382 :     if (lex->flags & JSONLEX_FREE_STRUCT)
     191        5404 :         pfree(lex);
     192        6382 : }
     193             : 
     194             : /*
     195             :  * pg_parse_json
     196             :  *
     197             :  * Publicly visible entry point for the JSON parser.
     198             :  *
     199             :  * lex is a lexing context, set up for the json to be processed by calling
     200             :  * makeJsonLexContext(). sem is a structure of function pointers to semantic
     201             :  * action routines to be called at appropriate spots during parsing, and a
     202             :  * pointer to a state object to be passed to those routines.
     203             :  */
     204             : JsonParseErrorType
     205       30194 : pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
     206             : {
     207             :     JsonTokenType tok;
     208             :     JsonParseErrorType result;
     209             : 
     210             :     /* get the initial token */
     211       30194 :     result = json_lex(lex);
     212       30194 :     if (result != JSON_SUCCESS)
     213         214 :         return result;
     214             : 
     215       29980 :     tok = lex_peek(lex);
     216             : 
     217             :     /* parse by recursive descent */
     218       29980 :     switch (tok)
     219             :     {
     220       19166 :         case JSON_TOKEN_OBJECT_START:
     221       19166 :             result = parse_object(lex, sem);
     222       19100 :             break;
     223        5242 :         case JSON_TOKEN_ARRAY_START:
     224        5242 :             result = parse_array(lex, sem);
     225        5138 :             break;
     226        5572 :         default:
     227        5572 :             result = parse_scalar(lex, sem);    /* json can be a bare scalar */
     228             :     }
     229             : 
     230       29738 :     if (result == JSON_SUCCESS)
     231       29280 :         result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
     232             : 
     233       29738 :     return result;
     234             : }
     235             : 
     236             : /*
     237             :  * json_count_array_elements
     238             :  *
     239             :  * Returns number of array elements in lex context at start of array token
     240             :  * until end of array token at same nesting level.
     241             :  *
     242             :  * Designed to be called from array_start routines.
     243             :  */
     244             : JsonParseErrorType
     245           6 : json_count_array_elements(JsonLexContext *lex, int *elements)
     246             : {
     247             :     JsonLexContext copylex;
     248             :     int         count;
     249             :     JsonParseErrorType result;
     250             : 
     251             :     /*
     252             :      * It's safe to do this with a shallow copy because the lexical routines
     253             :      * don't scribble on the input. They do scribble on the other pointers
     254             :      * etc, so doing this with a copy makes that safe.
     255             :      */
     256           6 :     memcpy(&copylex, lex, sizeof(JsonLexContext));
     257           6 :     copylex.strval = NULL;      /* not interested in values here */
     258           6 :     copylex.lex_level++;
     259             : 
     260           6 :     count = 0;
     261           6 :     result = lex_expect(JSON_PARSE_ARRAY_START, &copylex,
     262             :                         JSON_TOKEN_ARRAY_START);
     263           6 :     if (result != JSON_SUCCESS)
     264           0 :         return result;
     265           6 :     if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
     266             :     {
     267             :         while (1)
     268             :         {
     269          48 :             count++;
     270          48 :             result = parse_array_element(&copylex, &nullSemAction);
     271          48 :             if (result != JSON_SUCCESS)
     272           0 :                 return result;
     273          48 :             if (copylex.token_type != JSON_TOKEN_COMMA)
     274           6 :                 break;
     275          42 :             result = json_lex(&copylex);
     276          42 :             if (result != JSON_SUCCESS)
     277           0 :                 return result;
     278             :         }
     279             :     }
     280           6 :     result = lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex,
     281             :                         JSON_TOKEN_ARRAY_END);
     282           6 :     if (result != JSON_SUCCESS)
     283           0 :         return result;
     284             : 
     285           6 :     *elements = count;
     286           6 :     return JSON_SUCCESS;
     287             : }
     288             : 
     289             : /*
     290             :  *  Recursive Descent parse routines. There is one for each structural
     291             :  *  element in a json document:
     292             :  *    - scalar (string, number, true, false, null)
     293             :  *    - array  ( [ ] )
     294             :  *    - array element
     295             :  *    - object ( { } )
     296             :  *    - object field
     297             :  */
     298             : static inline JsonParseErrorType
     299      711458 : parse_scalar(JsonLexContext *lex, JsonSemAction *sem)
     300             : {
     301      711458 :     char       *val = NULL;
     302      711458 :     json_scalar_action sfunc = sem->scalar;
     303      711458 :     JsonTokenType tok = lex_peek(lex);
     304             :     JsonParseErrorType result;
     305             : 
     306             :     /* a scalar must be a string, a number, true, false, or null */
     307      711458 :     if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
     308       23740 :         tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
     309             :         tok != JSON_TOKEN_NULL)
     310         180 :         return report_parse_error(JSON_PARSE_VALUE, lex);
     311             : 
     312             :     /* if no semantic function, just consume the token */
     313      711278 :     if (sfunc == NULL)
     314       10562 :         return json_lex(lex);
     315             : 
     316             :     /* extract the de-escaped string value, or the raw lexeme */
     317      700716 :     if (lex_peek(lex) == JSON_TOKEN_STRING)
     318             :     {
     319      457118 :         if (lex->strval != NULL)
     320      451310 :             val = pstrdup(lex->strval->data);
     321             :     }
     322             :     else
     323             :     {
     324      243598 :         int         len = (lex->token_terminator - lex->token_start);
     325             : 
     326      243598 :         val = palloc(len + 1);
     327      243598 :         memcpy(val, lex->token_start, len);
     328      243598 :         val[len] = '\0';
     329             :     }
     330             : 
     331             :     /* consume the token */
     332      700716 :     result = json_lex(lex);
     333      700716 :     if (result != JSON_SUCCESS)
     334           0 :         return result;
     335             : 
     336             :     /* invoke the callback */
     337      700716 :     result = (*sfunc) (sem->semstate, val, tok);
     338             : 
     339      700622 :     return result;
     340             : }
     341             : 
     342             : static JsonParseErrorType
     343      713302 : parse_object_field(JsonLexContext *lex, JsonSemAction *sem)
     344             : {
     345             :     /*
     346             :      * An object field is "fieldname" : value where value can be a scalar,
     347             :      * object or array.  Note: in user-facing docs and error messages, we
     348             :      * generally call a field name a "key".
     349             :      */
     350             : 
     351      713302 :     char       *fname = NULL;   /* keep compiler quiet */
     352      713302 :     json_ofield_action ostart = sem->object_field_start;
     353      713302 :     json_ofield_action oend = sem->object_field_end;
     354             :     bool        isnull;
     355             :     JsonTokenType tok;
     356             :     JsonParseErrorType result;
     357             : 
     358      713302 :     if (lex_peek(lex) != JSON_TOKEN_STRING)
     359          12 :         return report_parse_error(JSON_PARSE_STRING, lex);
     360      713290 :     if ((ostart != NULL || oend != NULL) && lex->strval != NULL)
     361      667270 :         fname = pstrdup(lex->strval->data);
     362      713290 :     result = json_lex(lex);
     363      713290 :     if (result != JSON_SUCCESS)
     364          12 :         return result;
     365             : 
     366      713278 :     result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
     367      713278 :     if (result != JSON_SUCCESS)
     368         114 :         return result;
     369             : 
     370      713164 :     tok = lex_peek(lex);
     371      713164 :     isnull = tok == JSON_TOKEN_NULL;
     372             : 
     373      713164 :     if (ostart != NULL)
     374             :     {
     375      667162 :         result = (*ostart) (sem->semstate, fname, isnull);
     376      667154 :         if (result != JSON_SUCCESS)
     377           0 :             return result;
     378             :     }
     379             : 
     380      713156 :     switch (tok)
     381             :     {
     382       11760 :         case JSON_TOKEN_OBJECT_START:
     383       11760 :             result = parse_object(lex, sem);
     384        3952 :             break;
     385       14922 :         case JSON_TOKEN_ARRAY_START:
     386       14922 :             result = parse_array(lex, sem);
     387       14888 :             break;
     388      686474 :         default:
     389      686474 :             result = parse_scalar(lex, sem);
     390             :     }
     391      705310 :     if (result != JSON_SUCCESS)
     392          42 :         return result;
     393             : 
     394      705268 :     if (oend != NULL)
     395             :     {
     396      113750 :         result = (*oend) (sem->semstate, fname, isnull);
     397      113750 :         if (result != JSON_SUCCESS)
     398           0 :             return result;
     399             :     }
     400             : 
     401      705268 :     return JSON_SUCCESS;
     402             : }
     403             : 
     404             : static JsonParseErrorType
     405      143694 : parse_object(JsonLexContext *lex, JsonSemAction *sem)
     406             : {
     407             :     /*
     408             :      * an object is a possibly empty sequence of object fields, separated by
     409             :      * commas and surrounded by curly braces.
     410             :      */
     411      143694 :     json_struct_action ostart = sem->object_start;
     412      143694 :     json_struct_action oend = sem->object_end;
     413             :     JsonTokenType tok;
     414             :     JsonParseErrorType result;
     415             : 
     416             : #ifndef FRONTEND
     417       42808 :     check_stack_depth();
     418             : #endif
     419             : 
     420      143682 :     if (ostart != NULL)
     421             :     {
     422      124058 :         result = (*ostart) (sem->semstate);
     423      124038 :         if (result != JSON_SUCCESS)
     424           0 :             return result;
     425             :     }
     426             : 
     427             :     /*
     428             :      * Data inside an object is at a higher nesting level than the object
     429             :      * itself. Note that we increment this after we call the semantic routine
     430             :      * for the object start and restore it before we call the routine for the
     431             :      * object end.
     432             :      */
     433      143662 :     lex->lex_level++;
     434             : 
     435             :     Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START);
     436      143662 :     result = json_lex(lex);
     437      143662 :     if (result != JSON_SUCCESS)
     438          60 :         return result;
     439             : 
     440      143602 :     tok = lex_peek(lex);
     441      143602 :     switch (tok)
     442             :     {
     443      141324 :         case JSON_TOKEN_STRING:
     444      141324 :             result = parse_object_field(lex, sem);
     445      705448 :             while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
     446             :             {
     447      571978 :                 result = json_lex(lex);
     448      571978 :                 if (result != JSON_SUCCESS)
     449           0 :                     break;
     450      571978 :                 result = parse_object_field(lex, sem);
     451             :             }
     452      133470 :             break;
     453        2264 :         case JSON_TOKEN_OBJECT_END:
     454        2264 :             break;
     455          14 :         default:
     456             :             /* case of an invalid initial token inside the object */
     457          14 :             result = report_parse_error(JSON_PARSE_OBJECT_START, lex);
     458             :     }
     459      135748 :     if (result != JSON_SUCCESS)
     460         194 :         return result;
     461             : 
     462      135554 :     result = lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
     463      135554 :     if (result != JSON_SUCCESS)
     464          36 :         return result;
     465             : 
     466      135518 :     lex->lex_level--;
     467             : 
     468      135518 :     if (oend != NULL)
     469             :     {
     470      117594 :         result = (*oend) (sem->semstate);
     471      117548 :         if (result != JSON_SUCCESS)
     472           0 :             return result;
     473             :     }
     474             : 
     475      135472 :     return JSON_SUCCESS;
     476             : }
     477             : 
     478             : static JsonParseErrorType
     479      144188 : parse_array_element(JsonLexContext *lex, JsonSemAction *sem)
     480             : {
     481      144188 :     json_aelem_action astart = sem->array_element_start;
     482      144188 :     json_aelem_action aend = sem->array_element_end;
     483      144188 :     JsonTokenType tok = lex_peek(lex);
     484             :     JsonParseErrorType result;
     485             :     bool        isnull;
     486             : 
     487      144188 :     isnull = tok == JSON_TOKEN_NULL;
     488             : 
     489      144188 :     if (astart != NULL)
     490             :     {
     491        7780 :         result = (*astart) (sem->semstate, isnull);
     492        7780 :         if (result != JSON_SUCCESS)
     493           0 :             return result;
     494             :     }
     495             : 
     496             :     /* an array element is any object, array or scalar */
     497      144188 :     switch (tok)
     498             :     {
     499      112768 :         case JSON_TOKEN_OBJECT_START:
     500      112768 :             result = parse_object(lex, sem);
     501      112710 :             break;
     502       12008 :         case JSON_TOKEN_ARRAY_START:
     503       12008 :             result = parse_array(lex, sem);
     504        3202 :             break;
     505       19412 :         default:
     506       19412 :             result = parse_scalar(lex, sem);
     507             :     }
     508             : 
     509      135306 :     if (result != JSON_SUCCESS)
     510          66 :         return result;
     511             : 
     512      135240 :     if (aend != NULL)
     513             :     {
     514        7270 :         result = (*aend) (sem->semstate, isnull);
     515        7258 :         if (result != JSON_SUCCESS)
     516           0 :             return result;
     517             :     }
     518             : 
     519      135228 :     return JSON_SUCCESS;
     520             : }
     521             : 
     522             : static JsonParseErrorType
     523       32172 : parse_array(JsonLexContext *lex, JsonSemAction *sem)
     524             : {
     525             :     /*
     526             :      * an array is a possibly empty sequence of array elements, separated by
     527             :      * commas and surrounded by square brackets.
     528             :      */
     529       32172 :     json_struct_action astart = sem->array_start;
     530       32172 :     json_struct_action aend = sem->array_end;
     531             :     JsonParseErrorType result;
     532             : 
     533             : #ifndef FRONTEND
     534       31920 :     check_stack_depth();
     535             : #endif
     536             : 
     537       32160 :     if (astart != NULL)
     538             :     {
     539       14008 :         result = (*astart) (sem->semstate);
     540       13994 :         if (result != JSON_SUCCESS)
     541           0 :             return result;
     542             :     }
     543             : 
     544             :     /*
     545             :      * Data inside an array is at a higher nesting level than the array
     546             :      * itself. Note that we increment this after we call the semantic routine
     547             :      * for the array start and restore it before we call the routine for the
     548             :      * array end.
     549             :      */
     550       32146 :     lex->lex_level++;
     551             : 
     552       32146 :     result = lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
     553       32146 :     if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
     554             :     {
     555       25408 :         result = parse_array_element(lex, sem);
     556             : 
     557      135246 :         while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
     558             :         {
     559      118732 :             result = json_lex(lex);
     560      118732 :             if (result != JSON_SUCCESS)
     561           0 :                 break;
     562      118732 :             result = parse_array_element(lex, sem);
     563             :         }
     564             :     }
     565       23252 :     if (result != JSON_SUCCESS)
     566          66 :         return result;
     567             : 
     568       23186 :     result = lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
     569       23186 :     if (result != JSON_SUCCESS)
     570          24 :         return result;
     571             : 
     572       23162 :     lex->lex_level--;
     573             : 
     574       23162 :     if (aend != NULL)
     575             :     {
     576        7388 :         result = (*aend) (sem->semstate);
     577        7364 :         if (result != JSON_SUCCESS)
     578           0 :             return result;
     579             :     }
     580             : 
     581       23138 :     return JSON_SUCCESS;
     582             : }
     583             : 
     584             : /*
     585             :  * Lex one token from the input stream.
     586             :  */
     587             : JsonParseErrorType
     588     3223262 : json_lex(JsonLexContext *lex)
     589             : {
     590             :     char       *s;
     591     3223262 :     char       *const end = lex->input + lex->input_length;
     592             :     JsonParseErrorType result;
     593             : 
     594             :     /* Skip leading whitespace. */
     595     3223262 :     s = lex->token_terminator;
     596     7694448 :     while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
     597             :     {
     598     4471186 :         if (*s++ == '\n')
     599             :         {
     600      272576 :             ++lex->line_number;
     601      272576 :             lex->line_start = s;
     602             :         }
     603             :     }
     604     3223262 :     lex->token_start = s;
     605             : 
     606             :     /* Determine token type. */
     607     3223262 :     if (s >= end)
     608             :     {
     609       58740 :         lex->token_start = NULL;
     610       58740 :         lex->prev_token_terminator = lex->token_terminator;
     611       58740 :         lex->token_terminator = s;
     612       58740 :         lex->token_type = JSON_TOKEN_END;
     613             :     }
     614             :     else
     615             :     {
     616     3164522 :         switch (*s)
     617             :         {
     618             :                 /* Single-character token, some kind of punctuation mark. */
     619      144006 :             case '{':
     620      144006 :                 lex->prev_token_terminator = lex->token_terminator;
     621      144006 :                 lex->token_terminator = s + 1;
     622      144006 :                 lex->token_type = JSON_TOKEN_OBJECT_START;
     623      144006 :                 break;
     624      135552 :             case '}':
     625      135552 :                 lex->prev_token_terminator = lex->token_terminator;
     626      135552 :                 lex->token_terminator = s + 1;
     627      135552 :                 lex->token_type = JSON_TOKEN_OBJECT_END;
     628      135552 :                 break;
     629       32310 :             case '[':
     630       32310 :                 lex->prev_token_terminator = lex->token_terminator;
     631       32310 :                 lex->token_terminator = s + 1;
     632       32310 :                 lex->token_type = JSON_TOKEN_ARRAY_START;
     633       32310 :                 break;
     634       23312 :             case ']':
     635       23312 :                 lex->prev_token_terminator = lex->token_terminator;
     636       23312 :                 lex->token_terminator = s + 1;
     637       23312 :                 lex->token_type = JSON_TOKEN_ARRAY_END;
     638       23312 :                 break;
     639      690806 :             case ',':
     640      690806 :                 lex->prev_token_terminator = lex->token_terminator;
     641      690806 :                 lex->token_terminator = s + 1;
     642      690806 :                 lex->token_type = JSON_TOKEN_COMMA;
     643      690806 :                 break;
     644      713278 :             case ':':
     645      713278 :                 lex->prev_token_terminator = lex->token_terminator;
     646      713278 :                 lex->token_terminator = s + 1;
     647      713278 :                 lex->token_type = JSON_TOKEN_COLON;
     648      713278 :                 break;
     649     1173484 :             case '"':
     650             :                 /* string */
     651     1173484 :                 result = json_lex_string(lex);
     652     1173484 :                 if (result != JSON_SUCCESS)
     653         172 :                     return result;
     654     1173312 :                 lex->token_type = JSON_TOKEN_STRING;
     655     1173312 :                 break;
     656         142 :             case '-':
     657             :                 /* Negative number. */
     658         142 :                 result = json_lex_number(lex, s + 1, NULL, NULL);
     659         142 :                 if (result != JSON_SUCCESS)
     660           0 :                     return result;
     661         142 :                 lex->token_type = JSON_TOKEN_NUMBER;
     662         142 :                 break;
     663      220512 :             case '0':
     664             :             case '1':
     665             :             case '2':
     666             :             case '3':
     667             :             case '4':
     668             :             case '5':
     669             :             case '6':
     670             :             case '7':
     671             :             case '8':
     672             :             case '9':
     673             :                 /* Positive number. */
     674      220512 :                 result = json_lex_number(lex, s, NULL, NULL);
     675      220512 :                 if (result != JSON_SUCCESS)
     676          48 :                     return result;
     677      220464 :                 lex->token_type = JSON_TOKEN_NUMBER;
     678      220464 :                 break;
     679       31120 :             default:
     680             :                 {
     681             :                     char       *p;
     682             : 
     683             :                     /*
     684             :                      * We're not dealing with a string, number, legal
     685             :                      * punctuation mark, or end of string.  The only legal
     686             :                      * tokens we might find here are true, false, and null,
     687             :                      * but for error reporting purposes we scan until we see a
     688             :                      * non-alphanumeric character.  That way, we can report
     689             :                      * the whole word as an unexpected token, rather than just
     690             :                      * some unintuitive prefix thereof.
     691             :                      */
     692      174216 :                     for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++)
     693             :                          /* skip */ ;
     694             : 
     695             :                     /*
     696             :                      * We got some sort of unexpected punctuation or an
     697             :                      * otherwise unexpected character, so just complain about
     698             :                      * that one character.
     699             :                      */
     700       31120 :                     if (p == s)
     701             :                     {
     702          24 :                         lex->prev_token_terminator = lex->token_terminator;
     703          24 :                         lex->token_terminator = s + 1;
     704          24 :                         return JSON_INVALID_TOKEN;
     705             :                     }
     706             : 
     707             :                     /*
     708             :                      * We've got a real alphanumeric token here.  If it
     709             :                      * happens to be true, false, or null, all is well.  If
     710             :                      * not, error out.
     711             :                      */
     712       31096 :                     lex->prev_token_terminator = lex->token_terminator;
     713       31096 :                     lex->token_terminator = p;
     714       31096 :                     if (p - s == 4)
     715             :                     {
     716       12048 :                         if (memcmp(s, "true", 4) == 0)
     717        7308 :                             lex->token_type = JSON_TOKEN_TRUE;
     718        4740 :                         else if (memcmp(s, "null", 4) == 0)
     719        4728 :                             lex->token_type = JSON_TOKEN_NULL;
     720             :                         else
     721          12 :                             return JSON_INVALID_TOKEN;
     722             :                     }
     723       19048 :                     else if (p - s == 5 && memcmp(s, "false", 5) == 0)
     724       18910 :                         lex->token_type = JSON_TOKEN_FALSE;
     725             :                     else
     726         138 :                         return JSON_INVALID_TOKEN;
     727             :                 }
     728             :         }                       /* end of switch */
     729             :     }
     730             : 
     731     3222868 :     return JSON_SUCCESS;
     732             : }
     733             : 
     734             : /*
     735             :  * The next token in the input stream is known to be a string; lex it.
     736             :  *
     737             :  * If lex->strval isn't NULL, fill it with the decoded string.
     738             :  * Set lex->token_terminator to the end of the decoded input, and in
     739             :  * success cases, transfer its previous value to lex->prev_token_terminator.
     740             :  * Return JSON_SUCCESS or an error code.
     741             :  *
     742             :  * Note: be careful that all error exits advance lex->token_terminator
     743             :  * to the point after the character we detected the error on.
     744             :  */
     745             : static inline JsonParseErrorType
     746     1173484 : json_lex_string(JsonLexContext *lex)
     747             : {
     748             :     char       *s;
     749     1173484 :     char       *const end = lex->input + lex->input_length;
     750     1173484 :     int         hi_surrogate = -1;
     751             : 
     752             :     /* Convenience macros for error exits */
     753             : #define FAIL_AT_CHAR_START(code) \
     754             :     do { \
     755             :         lex->token_terminator = s; \
     756             :         return code; \
     757             :     } while (0)
     758             : #define FAIL_AT_CHAR_END(code) \
     759             :     do { \
     760             :         lex->token_terminator = \
     761             :             s + pg_encoding_mblen_bounded(lex->input_encoding, s); \
     762             :         return code; \
     763             :     } while (0)
     764             : 
     765     1173484 :     if (lex->strval != NULL)
     766     1127022 :         resetStringInfo(lex->strval);
     767             : 
     768             :     Assert(lex->input_length > 0);
     769     1173484 :     s = lex->token_start;
     770             :     for (;;)
     771             :     {
     772     2347690 :         s++;
     773             :         /* Premature end of the string. */
     774     2347690 :         if (s >= end)
     775          12 :             FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
     776     2347678 :         else if (*s == '"')
     777     1173312 :             break;
     778     1174366 :         else if (*s == '\\')
     779             :         {
     780             :             /* OK, we have an escape character. */
     781         738 :             s++;
     782         738 :             if (s >= end)
     783           0 :                 FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
     784         738 :             else if (*s == 'u')
     785             :             {
     786             :                 int         i;
     787         324 :                 int         ch = 0;
     788             : 
     789        1536 :                 for (i = 1; i <= 4; i++)
     790             :                 {
     791        1248 :                     s++;
     792        1248 :                     if (s >= end)
     793           0 :                         FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
     794        1248 :                     else if (*s >= '0' && *s <= '9')
     795         798 :                         ch = (ch * 16) + (*s - '0');
     796         450 :                     else if (*s >= 'a' && *s <= 'f')
     797         390 :                         ch = (ch * 16) + (*s - 'a') + 10;
     798          60 :                     else if (*s >= 'A' && *s <= 'F')
     799          24 :                         ch = (ch * 16) + (*s - 'A') + 10;
     800             :                     else
     801          36 :                         FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT);
     802             :                 }
     803         288 :                 if (lex->strval != NULL)
     804             :                 {
     805             :                     /*
     806             :                      * Combine surrogate pairs.
     807             :                      */
     808         180 :                     if (is_utf16_surrogate_first(ch))
     809             :                     {
     810          60 :                         if (hi_surrogate != -1)
     811          12 :                             FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE);
     812          48 :                         hi_surrogate = ch;
     813          48 :                         continue;
     814             :                     }
     815         120 :                     else if (is_utf16_surrogate_second(ch))
     816             :                     {
     817          48 :                         if (hi_surrogate == -1)
     818          24 :                             FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
     819          24 :                         ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
     820          24 :                         hi_surrogate = -1;
     821             :                     }
     822             : 
     823          96 :                     if (hi_surrogate != -1)
     824           0 :                         FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
     825             : 
     826             :                     /*
     827             :                      * Reject invalid cases.  We can't have a value above
     828             :                      * 0xFFFF here (since we only accepted 4 hex digits
     829             :                      * above), so no need to test for out-of-range chars.
     830             :                      */
     831          96 :                     if (ch == 0)
     832             :                     {
     833             :                         /* We can't allow this, since our TEXT type doesn't */
     834          24 :                         FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO);
     835             :                     }
     836             : 
     837             :                     /*
     838             :                      * Add the represented character to lex->strval.  In the
     839             :                      * backend, we can let pg_unicode_to_server_noerror()
     840             :                      * handle any required character set conversion; in
     841             :                      * frontend, we can only deal with trivial conversions.
     842             :                      */
     843             : #ifndef FRONTEND
     844             :                     {
     845             :                         char        cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
     846             : 
     847          72 :                         if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
     848          28 :                             FAIL_AT_CHAR_END(JSON_UNICODE_UNTRANSLATABLE);
     849          44 :                         appendStringInfoString(lex->strval, cbuf);
     850             :                     }
     851             : #else
     852           0 :                     if (lex->input_encoding == PG_UTF8)
     853             :                     {
     854             :                         /* OK, we can map the code point to UTF8 easily */
     855             :                         char        utf8str[5];
     856             :                         int         utf8len;
     857             : 
     858           0 :                         unicode_to_utf8(ch, (unsigned char *) utf8str);
     859           0 :                         utf8len = pg_utf_mblen((unsigned char *) utf8str);
     860           0 :                         appendBinaryStringInfo(lex->strval, utf8str, utf8len);
     861             :                     }
     862           0 :                     else if (ch <= 0x007f)
     863             :                     {
     864             :                         /* The ASCII range is the same in all encodings */
     865           0 :                         appendStringInfoChar(lex->strval, (char) ch);
     866             :                     }
     867             :                     else
     868           0 :                         FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE);
     869             : #endif                          /* FRONTEND */
     870             :                 }
     871             :             }
     872         414 :             else if (lex->strval != NULL)
     873             :             {
     874         300 :                 if (hi_surrogate != -1)
     875           0 :                     FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
     876             : 
     877         300 :                 switch (*s)
     878             :                 {
     879         204 :                     case '"':
     880             :                     case '\\':
     881             :                     case '/':
     882         204 :                         appendStringInfoChar(lex->strval, *s);
     883         204 :                         break;
     884          36 :                     case 'b':
     885          36 :                         appendStringInfoChar(lex->strval, '\b');
     886          36 :                         break;
     887           0 :                     case 'f':
     888           0 :                         appendStringInfoChar(lex->strval, '\f');
     889           0 :                         break;
     890          54 :                     case 'n':
     891          54 :                         appendStringInfoChar(lex->strval, '\n');
     892          54 :                         break;
     893           0 :                     case 'r':
     894           0 :                         appendStringInfoChar(lex->strval, '\r');
     895           0 :                         break;
     896           0 :                     case 't':
     897           0 :                         appendStringInfoChar(lex->strval, '\t');
     898           0 :                         break;
     899           6 :                     default:
     900             : 
     901             :                         /*
     902             :                          * Not a valid string escape, so signal error.  We
     903             :                          * adjust token_start so that just the escape sequence
     904             :                          * is reported, not the whole string.
     905             :                          */
     906           6 :                         lex->token_start = s;
     907           6 :                         FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
     908             :                 }
     909             :             }
     910         114 :             else if (strchr("\"\\/bfnrt", *s) == NULL)
     911             :             {
     912             :                 /*
     913             :                  * Simpler processing if we're not bothered about de-escaping
     914             :                  *
     915             :                  * It's very tempting to remove the strchr() call here and
     916             :                  * replace it with a switch statement, but testing so far has
     917             :                  * shown it's not a performance win.
     918             :                  */
     919           6 :                 lex->token_start = s;
     920           6 :                 FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
     921             :             }
     922             :         }
     923             :         else
     924             :         {
     925     1173628 :             char       *p = s;
     926             : 
     927     1173628 :             if (hi_surrogate != -1)
     928          12 :                 FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
     929             : 
     930             :             /*
     931             :              * Skip to the first byte that requires special handling, so we
     932             :              * can batch calls to appendBinaryStringInfo.
     933             :              */
     934     1459832 :             while (p < end - sizeof(Vector8) &&
     935     1431700 :                    !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
     936     1431208 :                    !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
     937      286216 :                    !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
     938      286216 :                 p += sizeof(Vector8);
     939             : 
     940     9208948 :             for (; p < end; p++)
     941             :             {
     942     9208936 :                 if (*p == '\\' || *p == '"')
     943             :                     break;
     944     8035344 :                 else if ((unsigned char) *p <= 31)
     945             :                 {
     946             :                     /* Per RFC4627, these characters MUST be escaped. */
     947             :                     /*
     948             :                      * Since *p isn't printable, exclude it from the context
     949             :                      * string
     950             :                      */
     951          12 :                     lex->token_terminator = p;
     952          12 :                     return JSON_ESCAPING_REQUIRED;
     953             :                 }
     954             :             }
     955             : 
     956     1173604 :             if (lex->strval != NULL)
     957     1127106 :                 appendBinaryStringInfo(lex->strval, s, p - s);
     958             : 
     959             :             /*
     960             :              * s will be incremented at the top of the loop, so set it to just
     961             :              * behind our lookahead position
     962             :              */
     963     1173604 :             s = p - 1;
     964             :         }
     965             :     }
     966             : 
     967     1173312 :     if (hi_surrogate != -1)
     968             :     {
     969           0 :         lex->token_terminator = s + 1;
     970           0 :         return JSON_UNICODE_LOW_SURROGATE;
     971             :     }
     972             : 
     973             :     /* Hooray, we found the end of the string! */
     974     1173312 :     lex->prev_token_terminator = lex->token_terminator;
     975     1173312 :     lex->token_terminator = s + 1;
     976     1173312 :     return JSON_SUCCESS;
     977             : 
     978             : #undef FAIL_AT_CHAR_START
     979             : #undef FAIL_AT_CHAR_END
     980             : }
     981             : 
     982             : /*
     983             :  * The next token in the input stream is known to be a number; lex it.
     984             :  *
     985             :  * In JSON, a number consists of four parts:
     986             :  *
     987             :  * (1) An optional minus sign ('-').
     988             :  *
     989             :  * (2) Either a single '0', or a string of one or more digits that does not
     990             :  *     begin with a '0'.
     991             :  *
     992             :  * (3) An optional decimal part, consisting of a period ('.') followed by
     993             :  *     one or more digits.  (Note: While this part can be omitted
     994             :  *     completely, it's not OK to have only the decimal point without
     995             :  *     any digits afterwards.)
     996             :  *
     997             :  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
     998             :  *     followed by '+' or '-', followed by one or more digits.  (Note:
     999             :  *     As with the decimal part, if 'e' or 'E' is present, it must be
    1000             :  *     followed by at least one digit.)
    1001             :  *
    1002             :  * The 's' argument to this function points to the ostensible beginning
    1003             :  * of part 2 - i.e. the character after any optional minus sign, or the
    1004             :  * first character of the string if there is none.
    1005             :  *
    1006             :  * If num_err is not NULL, we return an error flag to *num_err rather than
    1007             :  * raising an error for a badly-formed number.  Also, if total_len is not NULL
    1008             :  * the distance from lex->input to the token end+1 is returned to *total_len.
    1009             :  */
    1010             : static inline JsonParseErrorType
    1011      223616 : json_lex_number(JsonLexContext *lex, char *s,
    1012             :                 bool *num_err, int *total_len)
    1013             : {
    1014      223616 :     bool        error = false;
    1015      223616 :     int         len = s - lex->input;
    1016             : 
    1017             :     /* Part (1): leading sign indicator. */
    1018             :     /* Caller already did this for us; so do nothing. */
    1019             : 
    1020             :     /* Part (2): parse main digit string. */
    1021      223616 :     if (len < lex->input_length && *s == '0')
    1022             :     {
    1023       33438 :         s++;
    1024       33438 :         len++;
    1025             :     }
    1026      190178 :     else if (len < lex->input_length && *s >= '1' && *s <= '9')
    1027             :     {
    1028             :         do
    1029             :         {
    1030      608568 :             s++;
    1031      608568 :             len++;
    1032      608568 :         } while (len < lex->input_length && *s >= '0' && *s <= '9');
    1033             :     }
    1034             :     else
    1035          20 :         error = true;
    1036             : 
    1037             :     /* Part (3): parse optional decimal portion. */
    1038      223616 :     if (len < lex->input_length && *s == '.')
    1039             :     {
    1040       37310 :         s++;
    1041       37310 :         len++;
    1042       37310 :         if (len == lex->input_length || *s < '0' || *s > '9')
    1043          12 :             error = true;
    1044             :         else
    1045             :         {
    1046             :             do
    1047             :             {
    1048       91744 :                 s++;
    1049       91744 :                 len++;
    1050       91744 :             } while (len < lex->input_length && *s >= '0' && *s <= '9');
    1051             :         }
    1052             :     }
    1053             : 
    1054             :     /* Part (4): parse optional exponent. */
    1055      223616 :     if (len < lex->input_length && (*s == 'e' || *s == 'E'))
    1056             :     {
    1057          64 :         s++;
    1058          64 :         len++;
    1059          64 :         if (len < lex->input_length && (*s == '+' || *s == '-'))
    1060             :         {
    1061          10 :             s++;
    1062          10 :             len++;
    1063             :         }
    1064          64 :         if (len == lex->input_length || *s < '0' || *s > '9')
    1065          12 :             error = true;
    1066             :         else
    1067             :         {
    1068             :             do
    1069             :             {
    1070         164 :                 s++;
    1071         164 :                 len++;
    1072         164 :             } while (len < lex->input_length && *s >= '0' && *s <= '9');
    1073             :         }
    1074             :     }
    1075             : 
    1076             :     /*
    1077             :      * Check for trailing garbage.  As in json_lex(), any alphanumeric stuff
    1078             :      * here should be considered part of the token for error-reporting
    1079             :      * purposes.
    1080             :      */
    1081      223886 :     for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
    1082         270 :         error = true;
    1083             : 
    1084      223616 :     if (total_len != NULL)
    1085        2962 :         *total_len = len;
    1086             : 
    1087      223616 :     if (num_err != NULL)
    1088             :     {
    1089             :         /* let the caller handle any error */
    1090        2962 :         *num_err = error;
    1091             :     }
    1092             :     else
    1093             :     {
    1094             :         /* return token endpoint */
    1095      220654 :         lex->prev_token_terminator = lex->token_terminator;
    1096      220654 :         lex->token_terminator = s;
    1097             :         /* handle error if any */
    1098      220654 :         if (error)
    1099          48 :             return JSON_INVALID_TOKEN;
    1100             :     }
    1101             : 
    1102      223568 :     return JSON_SUCCESS;
    1103             : }
    1104             : 
    1105             : /*
    1106             :  * Report a parse error.
    1107             :  *
    1108             :  * lex->token_start and lex->token_terminator must identify the current token.
    1109             :  */
    1110             : static JsonParseErrorType
    1111         314 : report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
    1112             : {
    1113             :     /* Handle case where the input ended prematurely. */
    1114         314 :     if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
    1115         128 :         return JSON_EXPECTED_MORE;
    1116             : 
    1117             :     /* Otherwise choose the error type based on the parsing context. */
    1118         186 :     switch (ctx)
    1119             :     {
    1120          24 :         case JSON_PARSE_END:
    1121          24 :             return JSON_EXPECTED_END;
    1122         102 :         case JSON_PARSE_VALUE:
    1123         102 :             return JSON_EXPECTED_JSON;
    1124          12 :         case JSON_PARSE_STRING:
    1125          12 :             return JSON_EXPECTED_STRING;
    1126           0 :         case JSON_PARSE_ARRAY_START:
    1127           0 :             return JSON_EXPECTED_ARRAY_FIRST;
    1128           0 :         case JSON_PARSE_ARRAY_NEXT:
    1129           0 :             return JSON_EXPECTED_ARRAY_NEXT;
    1130          12 :         case JSON_PARSE_OBJECT_START:
    1131          12 :             return JSON_EXPECTED_OBJECT_FIRST;
    1132          24 :         case JSON_PARSE_OBJECT_LABEL:
    1133          24 :             return JSON_EXPECTED_COLON;
    1134          12 :         case JSON_PARSE_OBJECT_NEXT:
    1135          12 :             return JSON_EXPECTED_OBJECT_NEXT;
    1136           0 :         case JSON_PARSE_OBJECT_COMMA:
    1137           0 :             return JSON_EXPECTED_STRING;
    1138             :     }
    1139             : 
    1140             :     /*
    1141             :      * We don't use a default: case, so that the compiler will warn about
    1142             :      * unhandled enum values.
    1143             :      */
    1144             :     Assert(false);
    1145           0 :     return JSON_SUCCESS;        /* silence stupider compilers */
    1146             : }
    1147             : 
    1148             : 
    1149             : #ifndef FRONTEND
    1150             : /*
    1151             :  * Extract the current token from a lexing context, for error reporting.
    1152             :  */
    1153             : static char *
    1154         258 : extract_token(JsonLexContext *lex)
    1155             : {
    1156         258 :     int         toklen = lex->token_terminator - lex->token_start;
    1157         258 :     char       *token = palloc(toklen + 1);
    1158             : 
    1159         258 :     memcpy(token, lex->token_start, toklen);
    1160         258 :     token[toklen] = '\0';
    1161         258 :     return token;
    1162             : }
    1163             : 
    1164             : /*
    1165             :  * Construct an (already translated) detail message for a JSON error.
    1166             :  *
    1167             :  * Note that the error message generated by this routine may not be
    1168             :  * palloc'd, making it unsafe for frontend code as there is no way to
    1169             :  * know if this can be safely pfree'd or not.
    1170             :  */
    1171             : char *
    1172         466 : json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
    1173             : {
    1174         466 :     switch (error)
    1175             :     {
    1176           0 :         case JSON_SUCCESS:
    1177             :             /* fall through to the error code after switch */
    1178           0 :             break;
    1179          12 :         case JSON_ESCAPING_INVALID:
    1180          12 :             return psprintf(_("Escape sequence \"\\%s\" is invalid."),
    1181             :                             extract_token(lex));
    1182          12 :         case JSON_ESCAPING_REQUIRED:
    1183          12 :             return psprintf(_("Character with value 0x%02x must be escaped."),
    1184          12 :                             (unsigned char) *(lex->token_terminator));
    1185          24 :         case JSON_EXPECTED_END:
    1186          24 :             return psprintf(_("Expected end of input, but found \"%s\"."),
    1187             :                             extract_token(lex));
    1188           0 :         case JSON_EXPECTED_ARRAY_FIRST:
    1189           0 :             return psprintf(_("Expected array element or \"]\", but found \"%s\"."),
    1190             :                             extract_token(lex));
    1191           0 :         case JSON_EXPECTED_ARRAY_NEXT:
    1192           0 :             return psprintf(_("Expected \",\" or \"]\", but found \"%s\"."),
    1193             :                             extract_token(lex));
    1194          24 :         case JSON_EXPECTED_COLON:
    1195          24 :             return psprintf(_("Expected \":\", but found \"%s\"."),
    1196             :                             extract_token(lex));
    1197          48 :         case JSON_EXPECTED_JSON:
    1198          48 :             return psprintf(_("Expected JSON value, but found \"%s\"."),
    1199             :                             extract_token(lex));
    1200          60 :         case JSON_EXPECTED_MORE:
    1201          60 :             return _("The input string ended unexpectedly.");
    1202          12 :         case JSON_EXPECTED_OBJECT_FIRST:
    1203          12 :             return psprintf(_("Expected string or \"}\", but found \"%s\"."),
    1204             :                             extract_token(lex));
    1205          12 :         case JSON_EXPECTED_OBJECT_NEXT:
    1206          12 :             return psprintf(_("Expected \",\" or \"}\", but found \"%s\"."),
    1207             :                             extract_token(lex));
    1208          12 :         case JSON_EXPECTED_STRING:
    1209          12 :             return psprintf(_("Expected string, but found \"%s\"."),
    1210             :                             extract_token(lex));
    1211         114 :         case JSON_INVALID_TOKEN:
    1212         114 :             return psprintf(_("Token \"%s\" is invalid."),
    1213             :                             extract_token(lex));
    1214          24 :         case JSON_UNICODE_CODE_POINT_ZERO:
    1215          24 :             return _("\\u0000 cannot be converted to text.");
    1216          36 :         case JSON_UNICODE_ESCAPE_FORMAT:
    1217          36 :             return _("\"\\u\" must be followed by four hexadecimal digits.");
    1218           0 :         case JSON_UNICODE_HIGH_ESCAPE:
    1219             :             /* note: this case is only reachable in frontend not backend */
    1220           0 :             return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
    1221          28 :         case JSON_UNICODE_UNTRANSLATABLE:
    1222             :             /* note: this case is only reachable in backend not frontend */
    1223          28 :             return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."),
    1224             :                             GetDatabaseEncodingName());
    1225          12 :         case JSON_UNICODE_HIGH_SURROGATE:
    1226          12 :             return _("Unicode high surrogate must not follow a high surrogate.");
    1227          36 :         case JSON_UNICODE_LOW_SURROGATE:
    1228          36 :             return _("Unicode low surrogate must follow a high surrogate.");
    1229           0 :         case JSON_SEM_ACTION_FAILED:
    1230             :             /* fall through to the error code after switch */
    1231           0 :             break;
    1232             :     }
    1233             : 
    1234             :     /*
    1235             :      * We don't use a default: case, so that the compiler will warn about
    1236             :      * unhandled enum values.  But this needs to be here anyway to cover the
    1237             :      * possibility of an incorrect input.
    1238             :      */
    1239           0 :     elog(ERROR, "unexpected json parse error type: %d", (int) error);
    1240             :     return NULL;
    1241             : }
    1242             : #endif

Generated by: LCOV version 1.14