LCOV - PostgreSQL 19devel - src/common/jsonapi.c

LCOV - code coverage report

Current view:	top level - src/common - jsonapi.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL 19devel	Lines:	849	963	88.2 %
Date:	2025-08-15 00:17:59	Functions:	31	31	100.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * jsonapi.c
       4             :  *      JSON parser and lexer interfaces
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/common/jsonapi.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #ifndef FRONTEND
      15             : #include "postgres.h"
      16             : #else
      17             : #include "postgres_fe.h"
      18             : #endif
      19             : 
      20             : #include "common/jsonapi.h"
      21             : #include "mb/pg_wchar.h"
      22             : #include "port/pg_lfind.h"
      23             : 
      24             : #ifdef JSONAPI_USE_PQEXPBUFFER
      25             : #include "pqexpbuffer.h"
      26             : #else
      27             : #include "lib/stringinfo.h"
      28             : #include "miscadmin.h"
      29             : #endif
      30             : 
      31             : /*
      32             :  * By default, we will use palloc/pfree along with StringInfo.  In libpq,
      33             :  * use malloc and PQExpBuffer, and return JSON_OUT_OF_MEMORY on out-of-memory.
      34             :  */
      35             : #ifdef JSONAPI_USE_PQEXPBUFFER
      36             : 
      37             : #define STRDUP(s) strdup(s)
      38             : #define ALLOC(size) malloc(size)
      39             : #define ALLOC0(size) calloc(1, size)
      40             : #define REALLOC realloc
      41             : #define FREE(s) free(s)
      42             : 
      43             : #define jsonapi_appendStringInfo            appendPQExpBuffer
      44             : #define jsonapi_appendBinaryStringInfo      appendBinaryPQExpBuffer
      45             : #define jsonapi_appendStringInfoChar        appendPQExpBufferChar
      46             : /* XXX should we add a macro version to PQExpBuffer? */
      47             : #define jsonapi_appendStringInfoCharMacro   appendPQExpBufferChar
      48             : #define jsonapi_makeStringInfo              createPQExpBuffer
      49             : #define jsonapi_initStringInfo              initPQExpBuffer
      50             : #define jsonapi_resetStringInfo             resetPQExpBuffer
      51             : #define jsonapi_termStringInfo              termPQExpBuffer
      52             : #define jsonapi_destroyStringInfo           destroyPQExpBuffer
      53             : 
      54             : #else                           /* !JSONAPI_USE_PQEXPBUFFER */
      55             : 
      56             : #define STRDUP(s) pstrdup(s)
      57             : #define ALLOC(size) palloc(size)
      58             : #define ALLOC0(size) palloc0(size)
      59             : #define REALLOC repalloc
      60             : 
      61             : #ifdef FRONTEND
      62             : #define FREE pfree
      63             : #else
      64             : /*
      65             :  * Backend pfree() doesn't handle NULL pointers like the frontend's does; smooth
      66             :  * that over to reduce mental gymnastics. Avoid multiple evaluation of the macro
      67             :  * argument to avoid future hair-pulling.
      68             :  */
      69             : #define FREE(s) do {    \
      70             :     void *__v = (s);    \
      71             :     if (__v)            \
      72             :         pfree(__v);     \
      73             : } while (0)
      74             : #endif
      75             : 
      76             : #define jsonapi_appendStringInfo            appendStringInfo
      77             : #define jsonapi_appendBinaryStringInfo      appendBinaryStringInfo
      78             : #define jsonapi_appendStringInfoChar        appendStringInfoChar
      79             : #define jsonapi_appendStringInfoCharMacro   appendStringInfoCharMacro
      80             : #define jsonapi_makeStringInfo              makeStringInfo
      81             : #define jsonapi_initStringInfo              initStringInfo
      82             : #define jsonapi_resetStringInfo             resetStringInfo
      83             : #define jsonapi_termStringInfo(s)           pfree((s)->data)
      84             : #define jsonapi_destroyStringInfo           destroyStringInfo
      85             : 
      86             : #endif                          /* JSONAPI_USE_PQEXPBUFFER */
      87             : 
      88             : /*
      89             :  * The context of the parser is maintained by the recursive descent
      90             :  * mechanism, but is passed explicitly to the error reporting routine
      91             :  * for better diagnostics.
      92             :  */
      93             : typedef enum                    /* contexts of JSON parser */
      94             : {
      95             :     JSON_PARSE_VALUE,           /* expecting a value */
      96             :     JSON_PARSE_STRING,          /* expecting a string (for a field name) */
      97             :     JSON_PARSE_ARRAY_START,     /* saw '[', expecting value or ']' */
      98             :     JSON_PARSE_ARRAY_NEXT,      /* saw array element, expecting ',' or ']' */
      99             :     JSON_PARSE_OBJECT_START,    /* saw '{', expecting label or '}' */
     100             :     JSON_PARSE_OBJECT_LABEL,    /* saw object label, expecting ':' */
     101             :     JSON_PARSE_OBJECT_NEXT,     /* saw object value, expecting ',' or '}' */
     102             :     JSON_PARSE_OBJECT_COMMA,    /* saw object ',', expecting next label */
     103             :     JSON_PARSE_END,             /* saw the end of a document, expect nothing */
     104             : } JsonParseContext;
     105             : 
     106             : /*
     107             :  * Setup for table-driven parser.
     108             :  * These enums need to be separate from the JsonTokenType and from each other
     109             :  * so we can have all of them on the prediction stack, which consists of
     110             :  * tokens, non-terminals, and semantic action markers.
     111             :  */
     112             : 
     113             : enum JsonNonTerminal
     114             : {
     115             :     JSON_NT_JSON = 32,
     116             :     JSON_NT_ARRAY_ELEMENTS,
     117             :     JSON_NT_MORE_ARRAY_ELEMENTS,
     118             :     JSON_NT_KEY_PAIRS,
     119             :     JSON_NT_MORE_KEY_PAIRS,
     120             : };
     121             : 
     122             : enum JsonParserSem
     123             : {
     124             :     JSON_SEM_OSTART = 64,
     125             :     JSON_SEM_OEND,
     126             :     JSON_SEM_ASTART,
     127             :     JSON_SEM_AEND,
     128             :     JSON_SEM_OFIELD_INIT,
     129             :     JSON_SEM_OFIELD_START,
     130             :     JSON_SEM_OFIELD_END,
     131             :     JSON_SEM_AELEM_START,
     132             :     JSON_SEM_AELEM_END,
     133             :     JSON_SEM_SCALAR_INIT,
     134             :     JSON_SEM_SCALAR_CALL,
     135             : };
     136             : 
     137             : /*
     138             :  * struct containing the 3 stacks used in non-recursive parsing,
     139             :  * and the token and value for scalars that need to be preserved
     140             :  * across calls.
     141             :  *
     142             :  * typedef appears in jsonapi.h
     143             :  */
     144             : struct JsonParserStack
     145             : {
     146             :     int         stack_size;
     147             :     char       *prediction;
     148             :     size_t      pred_index;
     149             :     /* these two are indexed by lex_level */
     150             :     char      **fnames;
     151             :     bool       *fnull;
     152             :     JsonTokenType scalar_tok;
     153             :     char       *scalar_val;
     154             : };
     155             : 
     156             : /*
     157             :  * struct containing state used when there is a possible partial token at the
     158             :  * end of a json chunk when we are doing incremental parsing.
     159             :  *
     160             :  * typedef appears in jsonapi.h
     161             :  */
     162             : struct JsonIncrementalState
     163             : {
     164             :     bool        started;
     165             :     bool        is_last_chunk;
     166             :     bool        partial_completed;
     167             :     jsonapi_StrValType partial_token;
     168             : };
     169             : 
     170             : /*
     171             :  * constants and macros used in the nonrecursive parser
     172             :  */
     173             : #define JSON_NUM_TERMINALS 13
     174             : #define JSON_NUM_NONTERMINALS 5
     175             : #define JSON_NT_OFFSET JSON_NT_JSON
     176             : /* for indexing the table */
     177             : #define OFS(NT) (NT) - JSON_NT_OFFSET
     178             : /* classify items we get off the stack */
     179             : #define IS_SEM(x) ((x) & 0x40)
     180             : #define IS_NT(x)  ((x) & 0x20)
     181             : 
     182             : /*
     183             :  * These productions are stored in reverse order right to left so that when
     184             :  * they are pushed on the stack what we expect next is at the top of the stack.
     185             :  */
     186             : static char JSON_PROD_EPSILON[] = {0};  /* epsilon - an empty production */
     187             : 
     188             : /* JSON -> string */
     189             : static char JSON_PROD_SCALAR_STRING[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_STRING, JSON_SEM_SCALAR_INIT, 0};
     190             : 
     191             : /* JSON -> number */
     192             : static char JSON_PROD_SCALAR_NUMBER[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NUMBER, JSON_SEM_SCALAR_INIT, 0};
     193             : 
     194             : /* JSON -> 'true' */
     195             : static char JSON_PROD_SCALAR_TRUE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_TRUE, JSON_SEM_SCALAR_INIT, 0};
     196             : 
     197             : /* JSON -> 'false' */
     198             : static char JSON_PROD_SCALAR_FALSE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_FALSE, JSON_SEM_SCALAR_INIT, 0};
     199             : 
     200             : /* JSON -> 'null' */
     201             : static char JSON_PROD_SCALAR_NULL[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NULL, JSON_SEM_SCALAR_INIT, 0};
     202             : 
     203             : /* JSON -> '{' KEY_PAIRS '}' */
     204             : static char JSON_PROD_OBJECT[] = {JSON_SEM_OEND, JSON_TOKEN_OBJECT_END, JSON_NT_KEY_PAIRS, JSON_TOKEN_OBJECT_START, JSON_SEM_OSTART, 0};
     205             : 
     206             : /* JSON -> '[' ARRAY_ELEMENTS ']' */
     207             : static char JSON_PROD_ARRAY[] = {JSON_SEM_AEND, JSON_TOKEN_ARRAY_END, JSON_NT_ARRAY_ELEMENTS, JSON_TOKEN_ARRAY_START, JSON_SEM_ASTART, 0};
     208             : 
     209             : /* ARRAY_ELEMENTS -> JSON MORE_ARRAY_ELEMENTS */
     210             : static char JSON_PROD_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, 0};
     211             : 
     212             : /* MORE_ARRAY_ELEMENTS -> ',' JSON MORE_ARRAY_ELEMENTS */
     213             : static char JSON_PROD_MORE_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, JSON_TOKEN_COMMA, 0};
     214             : 
     215             : /* KEY_PAIRS -> string ':' JSON MORE_KEY_PAIRS */
     216             : static char JSON_PROD_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, 0};
     217             : 
     218             : /* MORE_KEY_PAIRS -> ',' string ':'  JSON MORE_KEY_PAIRS */
     219             : static char JSON_PROD_MORE_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, JSON_TOKEN_COMMA, 0};
     220             : 
     221             : /*
     222             :  * Note: there are also epsilon productions for ARRAY_ELEMENTS,
     223             :  * MORE_ARRAY_ELEMENTS, KEY_PAIRS and MORE_KEY_PAIRS
     224             :  * They are all the same as none require any semantic actions.
     225             :  */
     226             : 
     227             : /*
     228             :  * Table connecting the productions with their director sets of
     229             :  * terminal symbols.
     230             :  * Any combination not specified here represents an error.
     231             :  */
     232             : 
     233             : typedef struct
     234             : {
     235             :     size_t      len;
     236             :     char       *prod;
     237             : } td_entry;
     238             : 
     239             : #define TD_ENTRY(PROD) { sizeof(PROD) - 1, (PROD) }
     240             : 
     241             : static td_entry td_parser_table[JSON_NUM_NONTERMINALS][JSON_NUM_TERMINALS] =
     242             : {
     243             :     /* JSON */
     244             :     [OFS(JSON_NT_JSON)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_SCALAR_STRING),
     245             :     [OFS(JSON_NT_JSON)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_SCALAR_NUMBER),
     246             :     [OFS(JSON_NT_JSON)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_SCALAR_TRUE),
     247             :     [OFS(JSON_NT_JSON)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_SCALAR_FALSE),
     248             :     [OFS(JSON_NT_JSON)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_SCALAR_NULL),
     249             :     [OFS(JSON_NT_JSON)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY),
     250             :     [OFS(JSON_NT_JSON)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_OBJECT),
     251             :     /* ARRAY_ELEMENTS */
     252             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     253             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     254             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     255             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     256             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     257             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     258             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     259             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
     260             :     /* MORE_ARRAY_ELEMENTS */
     261             :     [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_ARRAY_ELEMENTS),
     262             :     [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
     263             :     /* KEY_PAIRS */
     264             :     [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_KEY_PAIRS),
     265             :     [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
     266             :     /* MORE_KEY_PAIRS */
     267             :     [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_KEY_PAIRS),
     268             :     [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
     269             : };
     270             : 
     271             : /* the GOAL production. Not stored in the table, but will be the initial contents of the prediction stack */
     272             : static char JSON_PROD_GOAL[] = {JSON_TOKEN_END, JSON_NT_JSON, 0};
     273             : 
     274             : static inline JsonParseErrorType json_lex_string(JsonLexContext *lex);
     275             : static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, const char *s,
     276             :                                                  bool *num_err, size_t *total_len);
     277             : static inline JsonParseErrorType parse_scalar(JsonLexContext *lex, const JsonSemAction *sem);
     278             : static JsonParseErrorType parse_object_field(JsonLexContext *lex, const JsonSemAction *sem);
     279             : static JsonParseErrorType parse_object(JsonLexContext *lex, const JsonSemAction *sem);
     280             : static JsonParseErrorType parse_array_element(JsonLexContext *lex, const JsonSemAction *sem);
     281             : static JsonParseErrorType parse_array(JsonLexContext *lex, const JsonSemAction *sem);
     282             : static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
     283             : static bool allocate_incremental_state(JsonLexContext *lex);
     284             : static inline void set_fname(JsonLexContext *lex, char *fname);
     285             : 
     286             : /* the null action object used for pure validation */
     287             : const JsonSemAction nullSemAction =
     288             : {
     289             :     NULL, NULL, NULL, NULL, NULL,
     290             :     NULL, NULL, NULL, NULL, NULL
     291             : };
     292             : 
     293             : /* sentinels used for out-of-memory conditions */
     294             : static JsonLexContext failed_oom;
     295             : static JsonIncrementalState failed_inc_oom;
     296             : 
     297             : /* Parser support routines */
     298             : 
     299             : /*
     300             :  * lex_peek
     301             :  *
     302             :  * what is the current look_ahead token?
     303             : */
     304             : static inline JsonTokenType
     305    14337318 : lex_peek(JsonLexContext *lex)
     306             : {
     307    14337318 :     return lex->token_type;
     308             : }
     309             : 
     310             : /*
     311             :  * lex_expect
     312             :  *
     313             :  * move the lexer to the next token if the current look_ahead token matches
     314             :  * the parameter token. Otherwise, report an error.
     315             :  */
     316             : static inline JsonParseErrorType
     317      469878 : lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
     318             : {
     319      469878 :     if (lex_peek(lex) == token)
     320      469770 :         return json_lex(lex);
     321             :     else
     322         108 :         return report_parse_error(ctx, lex);
     323             : }
     324             : 
     325             : /* chars to consider as part of an alphanumeric token */
     326             : #define JSON_ALPHANUMERIC_CHAR(c)  \
     327             :     (((c) >= 'a' && (c) <= 'z') || \
     328             :      ((c) >= 'A' && (c) <= 'Z') || \
     329             :      ((c) >= '0' && (c) <= '9') || \
     330             :      (c) == '_' || \
     331             :      IS_HIGHBIT_SET(c))
     332             : 
     333             : /*
     334             :  * Utility function to check if a string is a valid JSON number.
     335             :  *
     336             :  * str is of length len, and need not be null-terminated.
     337             :  */
     338             : bool
     339          46 : IsValidJsonNumber(const char *str, size_t len)
     340             : {
     341             :     bool        numeric_error;
     342             :     size_t      total_len;
     343          46 :     JsonLexContext dummy_lex = {0};
     344             : 
     345          46 :     if (len <= 0)
     346           0 :         return false;
     347             : 
     348             :     /*
     349             :      * json_lex_number expects a leading  '-' to have been eaten already.
     350             :      *
     351             :      * having to cast away the constness of str is ugly, but there's not much
     352             :      * easy alternative.
     353             :      */
     354          46 :     if (*str == '-')
     355             :     {
     356           4 :         dummy_lex.input = str + 1;
     357           4 :         dummy_lex.input_length = len - 1;
     358             :     }
     359             :     else
     360             :     {
     361          42 :         dummy_lex.input = str;
     362          42 :         dummy_lex.input_length = len;
     363             :     }
     364             : 
     365          46 :     dummy_lex.token_start = dummy_lex.input;
     366             : 
     367          46 :     json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
     368             : 
     369          46 :     return (!numeric_error) && (total_len == dummy_lex.input_length);
     370             : }
     371             : 
     372             : /*
     373             :  * makeJsonLexContextCstringLen
     374             :  *      Initialize the given JsonLexContext object, or create one
     375             :  *
     376             :  * If a valid 'lex' pointer is given, it is initialized.  This can
     377             :  * be used for stack-allocated structs, saving overhead.  If NULL is
     378             :  * given, a new struct is allocated.
     379             :  *
     380             :  * If need_escapes is true, ->strval stores the unescaped lexemes.
     381             :  * Unescaping is expensive, so only request it when necessary.
     382             :  *
     383             :  * If need_escapes is true or lex was given as NULL, then caller is
     384             :  * responsible for freeing the returned struct, either by calling
     385             :  * freeJsonLexContext() or (in backend environment) via memory context
     386             :  * cleanup.
     387             :  *
     388             :  * In shlib code, any out-of-memory failures will be deferred to time
     389             :  * of use; this function is guaranteed to return a valid JsonLexContext.
     390             :  */
     391             : JsonLexContext *
     392       39410 : makeJsonLexContextCstringLen(JsonLexContext *lex, const char *json,
     393             :                              size_t len, int encoding, bool need_escapes)
     394             : {
     395       39410 :     if (lex == NULL)
     396             :     {
     397        5530 :         lex = ALLOC0(sizeof(JsonLexContext));
     398        5530 :         if (!lex)
     399           0 :             return &failed_oom;
     400        5530 :         lex->flags |= JSONLEX_FREE_STRUCT;
     401             :     }
     402             :     else
     403       33880 :         memset(lex, 0, sizeof(JsonLexContext));
     404             : 
     405       39410 :     lex->errormsg = NULL;
     406       39410 :     lex->input = lex->token_terminator = lex->line_start = json;
     407       39410 :     lex->line_number = 1;
     408       39410 :     lex->input_length = len;
     409       39410 :     lex->input_encoding = encoding;
     410       39410 :     lex->need_escapes = need_escapes;
     411       39410 :     if (need_escapes)
     412             :     {
     413             :         /*
     414             :          * This call can fail in shlib code. We defer error handling to time
     415             :          * of use (json_lex_string()) since we might not need to parse any
     416             :          * strings anyway.
     417             :          */
     418       30828 :         lex->strval = jsonapi_makeStringInfo();
     419       30828 :         lex->flags |= JSONLEX_FREE_STRVAL;
     420             :     }
     421             : 
     422       39410 :     return lex;
     423             : }
     424             : 
     425             : /*
     426             :  * Allocates the internal bookkeeping structures for incremental parsing. This
     427             :  * can only fail in-band with shlib code.
     428             :  */
     429             : #define JS_STACK_CHUNK_SIZE 64
     430             : #define JS_MAX_PROD_LEN 10      /* more than we need */
     431             : #define JSON_TD_MAX_STACK 6400  /* hard coded for now - this is a REALLY high
     432             :                                  * number */
     433             : static bool
     434        4158 : allocate_incremental_state(JsonLexContext *lex)
     435             : {
     436             :     void       *pstack,
     437             :                *prediction,
     438             :                *fnames,
     439             :                *fnull;
     440             : 
     441        4158 :     lex->inc_state = ALLOC0(sizeof(JsonIncrementalState));
     442        4158 :     pstack = ALLOC0(sizeof(JsonParserStack));
     443        4158 :     prediction = ALLOC(JS_STACK_CHUNK_SIZE * JS_MAX_PROD_LEN);
     444        4158 :     fnames = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(char *));
     445        4158 :     fnull = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(bool));
     446             : 
     447             : #ifdef JSONAPI_USE_PQEXPBUFFER
     448        1960 :     if (!lex->inc_state
     449        1960 :         || !pstack
     450        1960 :         || !prediction
     451        1960 :         || !fnames
     452        1960 :         || !fnull)
     453             :     {
     454           0 :         FREE(lex->inc_state);
     455           0 :         FREE(pstack);
     456           0 :         FREE(prediction);
     457           0 :         FREE(fnames);
     458           0 :         FREE(fnull);
     459             : 
     460           0 :         lex->inc_state = &failed_inc_oom;
     461           0 :         return false;
     462             :     }
     463             : #endif
     464             : 
     465        4158 :     jsonapi_initStringInfo(&(lex->inc_state->partial_token));
     466        4158 :     lex->pstack = pstack;
     467        4158 :     lex->pstack->stack_size = JS_STACK_CHUNK_SIZE;
     468        4158 :     lex->pstack->prediction = prediction;
     469        4158 :     lex->pstack->fnames = fnames;
     470        4158 :     lex->pstack->fnull = fnull;
     471             : 
     472             :     /*
     473             :      * fnames between 0 and lex_level must always be defined so that
     474             :      * freeJsonLexContext() can handle them safely. inc/dec_lex_level() handle
     475             :      * the rest.
     476             :      */
     477             :     Assert(lex->lex_level == 0);
     478        4158 :     lex->pstack->fnames[0] = NULL;
     479             : 
     480        4158 :     lex->incremental = true;
     481        4158 :     return true;
     482             : }
     483             : 
     484             : 
     485             : /*
     486             :  * makeJsonLexContextIncremental
     487             :  *
     488             :  * Similar to above but set up for use in incremental parsing. That means we
     489             :  * need explicit stacks for predictions, field names and null indicators, but
     490             :  * we don't need the input, that will be handed in bit by bit to the
     491             :  * parse routine. We also need an accumulator for partial tokens in case
     492             :  * the boundary between chunks happens to fall in the middle of a token.
     493             :  *
     494             :  * In shlib code, any out-of-memory failures will be deferred to time of use;
     495             :  * this function is guaranteed to return a valid JsonLexContext.
     496             :  */
     497             : JsonLexContext *
     498        4158 : makeJsonLexContextIncremental(JsonLexContext *lex, int encoding,
     499             :                               bool need_escapes)
     500             : {
     501        4158 :     if (lex == NULL)
     502             :     {
     503           2 :         lex = ALLOC0(sizeof(JsonLexContext));
     504           2 :         if (!lex)
     505           0 :             return &failed_oom;
     506             : 
     507           2 :         lex->flags |= JSONLEX_FREE_STRUCT;
     508             :     }
     509             :     else
     510        4156 :         memset(lex, 0, sizeof(JsonLexContext));
     511             : 
     512        4158 :     lex->line_number = 1;
     513        4158 :     lex->input_encoding = encoding;
     514             : 
     515        4158 :     if (!allocate_incremental_state(lex))
     516             :     {
     517           0 :         if (lex->flags & JSONLEX_FREE_STRUCT)
     518             :         {
     519           0 :             FREE(lex);
     520           0 :             return &failed_oom;
     521             :         }
     522             : 
     523             :         /* lex->inc_state tracks the OOM failure; we can return here. */
     524           0 :         return lex;
     525             :     }
     526             : 
     527        4158 :     lex->need_escapes = need_escapes;
     528        4158 :     if (need_escapes)
     529             :     {
     530             :         /*
     531             :          * This call can fail in shlib code. We defer error handling to time
     532             :          * of use (json_lex_string()) since we might not need to parse any
     533             :          * strings anyway.
     534             :          */
     535         244 :         lex->strval = jsonapi_makeStringInfo();
     536         244 :         lex->flags |= JSONLEX_FREE_STRVAL;
     537             :     }
     538             : 
     539        4158 :     return lex;
     540             : }
     541             : 
     542             : void
     543        3920 : setJsonLexContextOwnsTokens(JsonLexContext *lex, bool owned_by_context)
     544             : {
     545        3920 :     if (lex->incremental && lex->inc_state->started)
     546             :     {
     547             :         /*
     548             :          * Switching this flag after parsing has already started is a
     549             :          * programming error.
     550             :          */
     551             :         Assert(false);
     552           0 :         return;
     553             :     }
     554             : 
     555        3920 :     if (owned_by_context)
     556        1960 :         lex->flags |= JSONLEX_CTX_OWNS_TOKENS;
     557             :     else
     558        1960 :         lex->flags &= ~JSONLEX_CTX_OWNS_TOKENS;
     559             : }
     560             : 
     561             : static inline bool
     562     5174820 : inc_lex_level(JsonLexContext *lex)
     563             : {
     564     5174820 :     if (lex->incremental && (lex->lex_level + 1) >= lex->pstack->stack_size)
     565             :     {
     566             :         size_t      new_stack_size;
     567             :         char       *new_prediction;
     568             :         char      **new_fnames;
     569             :         bool       *new_fnull;
     570             : 
     571       76800 :         new_stack_size = lex->pstack->stack_size + JS_STACK_CHUNK_SIZE;
     572             : 
     573       76800 :         new_prediction = REALLOC(lex->pstack->prediction,
     574             :                                  new_stack_size * JS_MAX_PROD_LEN);
     575             : #ifdef JSONAPI_USE_PQEXPBUFFER
     576       38400 :         if (!new_prediction)
     577           0 :             return false;
     578             : #endif
     579       76800 :         lex->pstack->prediction = new_prediction;
     580             : 
     581       76800 :         new_fnames = REALLOC(lex->pstack->fnames,
     582             :                              new_stack_size * sizeof(char *));
     583             : #ifdef JSONAPI_USE_PQEXPBUFFER
     584       38400 :         if (!new_fnames)
     585           0 :             return false;
     586             : #endif
     587       76800 :         lex->pstack->fnames = new_fnames;
     588             : 
     589       76800 :         new_fnull = REALLOC(lex->pstack->fnull, new_stack_size * sizeof(bool));
     590             : #ifdef JSONAPI_USE_PQEXPBUFFER
     591       38400 :         if (!new_fnull)
     592           0 :             return false;
     593             : #endif
     594       76800 :         lex->pstack->fnull = new_fnull;
     595             : 
     596       76800 :         lex->pstack->stack_size = new_stack_size;
     597             :     }
     598             : 
     599     5174820 :     lex->lex_level += 1;
     600             : 
     601     5174820 :     if (lex->incremental)
     602             :     {
     603             :         /*
     604             :          * Ensure freeJsonLexContext() remains safe even if no fname is
     605             :          * assigned at this level.
     606             :          */
     607     5174820 :         lex->pstack->fnames[lex->lex_level] = NULL;
     608             :     }
     609             : 
     610     5174820 :     return true;
     611             : }
     612             : 
     613             : static inline void
     614     1897018 : dec_lex_level(JsonLexContext *lex)
     615             : {
     616     1897018 :     set_fname(lex, NULL);       /* free the current level's fname, if needed */
     617     1897018 :     lex->lex_level -= 1;
     618     1897018 : }
     619             : 
     620             : static inline void
     621    14701792 : push_prediction(JsonParserStack *pstack, td_entry entry)
     622             : {
     623    14701792 :     memcpy(pstack->prediction + pstack->pred_index, entry.prod, entry.len);
     624    14701792 :     pstack->pred_index += entry.len;
     625    14701792 : }
     626             : 
     627             : static inline char
     628    46984410 : pop_prediction(JsonParserStack *pstack)
     629             : {
     630             :     Assert(pstack->pred_index > 0);
     631    46984410 :     return pstack->prediction[--pstack->pred_index];
     632             : }
     633             : 
     634             : static inline char
     635         152 : next_prediction(JsonParserStack *pstack)
     636             : {
     637             :     Assert(pstack->pred_index > 0);
     638         152 :     return pstack->prediction[pstack->pred_index - 1];
     639             : }
     640             : 
     641             : static inline bool
     642    47588988 : have_prediction(JsonParserStack *pstack)
     643             : {
     644    47588988 :     return pstack->pred_index > 0;
     645             : }
     646             : 
     647             : static inline void
     648     3134104 : set_fname(JsonLexContext *lex, char *fname)
     649             : {
     650     3134104 :     if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
     651             :     {
     652             :         /*
     653             :          * Don't leak prior fnames. If one hasn't been assigned yet,
     654             :          * inc_lex_level ensured that it's NULL (and therefore safe to free).
     655             :          */
     656      873724 :         FREE(lex->pstack->fnames[lex->lex_level]);
     657             :     }
     658             : 
     659     3134104 :     lex->pstack->fnames[lex->lex_level] = fname;
     660     3134104 : }
     661             : 
     662             : static inline char *
     663     1157574 : get_fname(JsonLexContext *lex)
     664             : {
     665     1157574 :     return lex->pstack->fnames[lex->lex_level];
     666             : }
     667             : 
     668             : static inline void
     669     6398506 : set_fnull(JsonLexContext *lex, bool fnull)
     670             : {
     671     6398506 :     lex->pstack->fnull[lex->lex_level] = fnull;
     672     6398506 : }
     673             : 
     674             : static inline bool
     675        1488 : get_fnull(JsonLexContext *lex)
     676             : {
     677        1488 :     return lex->pstack->fnull[lex->lex_level];
     678             : }
     679             : 
     680             : /*
     681             :  * Free memory in a JsonLexContext.
     682             :  *
     683             :  * There's no need for this if a *lex pointer was given when the object was
     684             :  * made, need_escapes was false, and json_errdetail() was not called; or if (in
     685             :  * backend environment) a memory context delete/reset is imminent.
     686             :  */
     687             : void
     688       10432 : freeJsonLexContext(JsonLexContext *lex)
     689             : {
     690             :     static const JsonLexContext empty = {0};
     691             : 
     692       10432 :     if (!lex || lex == &failed_oom)
     693           0 :         return;
     694             : 
     695       10432 :     if (lex->flags & JSONLEX_FREE_STRVAL)
     696        6130 :         jsonapi_destroyStringInfo(lex->strval);
     697             : 
     698       10432 :     if (lex->errormsg)
     699        1906 :         jsonapi_destroyStringInfo(lex->errormsg);
     700             : 
     701       10432 :     if (lex->incremental)
     702             :     {
     703        4152 :         jsonapi_termStringInfo(&lex->inc_state->partial_token);
     704        4152 :         FREE(lex->inc_state);
     705        4152 :         FREE(lex->pstack->prediction);
     706             : 
     707        4152 :         if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
     708             :         {
     709             :             int         i;
     710             : 
     711             :             /* Clean up any tokens that were left behind. */
     712     1642820 :             for (i = 0; i <= lex->lex_level; i++)
     713     1640860 :                 FREE(lex->pstack->fnames[i]);
     714             :         }
     715             : 
     716        4152 :         FREE(lex->pstack->fnames);
     717        4152 :         FREE(lex->pstack->fnull);
     718        4152 :         FREE(lex->pstack->scalar_val);
     719        4152 :         FREE(lex->pstack);
     720             :     }
     721             : 
     722       10432 :     if (lex->flags & JSONLEX_FREE_STRUCT)
     723        5310 :         FREE(lex);
     724             :     else
     725        5122 :         *lex = empty;
     726             : }
     727             : 
     728             : /*
     729             :  * pg_parse_json
     730             :  *
     731             :  * Publicly visible entry point for the JSON parser.
     732             :  *
     733             :  * lex is a lexing context, set up for the json to be processed by calling
     734             :  * makeJsonLexContext(). sem is a structure of function pointers to semantic
     735             :  * action routines to be called at appropriate spots during parsing, and a
     736             :  * pointer to a state object to be passed to those routines.
     737             :  *
     738             :  * If FORCE_JSON_PSTACK is defined then the routine will call the non-recursive
     739             :  * JSON parser. This is a useful way to validate that it's doing the right
     740             :  * thing at least for non-incremental cases. If this is on we expect to see
     741             :  * regression diffs relating to error messages about stack depth, but no
     742             :  * other differences.
     743             :  */
     744             : JsonParseErrorType
     745       38672 : pg_parse_json(JsonLexContext *lex, const JsonSemAction *sem)
     746             : {
     747             : #ifdef FORCE_JSON_PSTACK
     748             :     /*
     749             :      * We don't need partial token processing, there is only one chunk. But we
     750             :      * still need to init the partial token string so that freeJsonLexContext
     751             :      * works, so perform the full incremental initialization.
     752             :      */
     753             :     if (!allocate_incremental_state(lex))
     754             :         return JSON_OUT_OF_MEMORY;
     755             : 
     756             :     return pg_parse_json_incremental(lex, sem, lex->input, lex->input_length, true);
     757             : 
     758             : #else
     759             : 
     760             :     JsonTokenType tok;
     761             :     JsonParseErrorType result;
     762             : 
     763       38672 :     if (lex == &failed_oom)
     764           0 :         return JSON_OUT_OF_MEMORY;
     765       38672 :     if (lex->incremental)
     766           0 :         return JSON_INVALID_LEXER_TYPE;
     767             : 
     768             :     /* get the initial token */
     769       38672 :     result = json_lex(lex);
     770       38672 :     if (result != JSON_SUCCESS)
     771         246 :         return result;
     772             : 
     773       38426 :     tok = lex_peek(lex);
     774             : 
     775             :     /* parse by recursive descent */
     776       38426 :     switch (tok)
     777             :     {
     778       20796 :         case JSON_TOKEN_OBJECT_START:
     779       20796 :             result = parse_object(lex, sem);
     780       20728 :             break;
     781        7432 :         case JSON_TOKEN_ARRAY_START:
     782        7432 :             result = parse_array(lex, sem);
     783        7328 :             break;
     784       10198 :         default:
     785       10198 :             result = parse_scalar(lex, sem);    /* json can be a bare scalar */
     786             :     }
     787             : 
     788       38182 :     if (result == JSON_SUCCESS)
     789       37738 :         result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
     790             : 
     791       38182 :     return result;
     792             : #endif
     793             : }
     794             : 
     795             : /*
     796             :  * json_count_array_elements
     797             :  *
     798             :  * Returns number of array elements in lex context at start of array token
     799             :  * until end of array token at same nesting level.
     800             :  *
     801             :  * Designed to be called from array_start routines.
     802             :  */
     803             : JsonParseErrorType
     804           6 : json_count_array_elements(JsonLexContext *lex, int *elements)
     805             : {
     806             :     JsonLexContext copylex;
     807             :     int         count;
     808             :     JsonParseErrorType result;
     809             : 
     810           6 :     if (lex == &failed_oom)
     811           0 :         return JSON_OUT_OF_MEMORY;
     812             : 
     813             :     /*
     814             :      * It's safe to do this with a shallow copy because the lexical routines
     815             :      * don't scribble on the input. They do scribble on the other pointers
     816             :      * etc, so doing this with a copy makes that safe.
     817             :      */
     818           6 :     memcpy(&copylex, lex, sizeof(JsonLexContext));
     819           6 :     copylex.need_escapes = false;   /* not interested in values here */
     820           6 :     copylex.lex_level++;
     821             : 
     822           6 :     count = 0;
     823           6 :     result = lex_expect(JSON_PARSE_ARRAY_START, &copylex,
     824             :                         JSON_TOKEN_ARRAY_START);
     825           6 :     if (result != JSON_SUCCESS)
     826           0 :         return result;
     827           6 :     if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
     828             :     {
     829             :         while (1)
     830             :         {
     831          48 :             count++;
     832          48 :             result = parse_array_element(&copylex, &nullSemAction);
     833          48 :             if (result != JSON_SUCCESS)
     834           0 :                 return result;
     835          48 :             if (copylex.token_type != JSON_TOKEN_COMMA)
     836           6 :                 break;
     837          42 :             result = json_lex(&copylex);
     838          42 :             if (result != JSON_SUCCESS)
     839           0 :                 return result;
     840             :         }
     841             :     }
     842           6 :     result = lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex,
     843             :                         JSON_TOKEN_ARRAY_END);
     844           6 :     if (result != JSON_SUCCESS)
     845           0 :         return result;
     846             : 
     847           6 :     *elements = count;
     848           6 :     return JSON_SUCCESS;
     849             : }
     850             : 
     851             : /*
     852             :  * pg_parse_json_incremental
     853             :  *
     854             :  * Routine for incremental parsing of json. This uses the non-recursive top
     855             :  * down method of the Dragon Book Algorithm 4.3. It's somewhat slower than
     856             :  * the Recursive Descent pattern used above, so we only use it for incremental
     857             :  * parsing of JSON.
     858             :  *
     859             :  * The lexing context needs to be set up by a call to
     860             :  * makeJsonLexContextIncremental(). sem is a structure of function pointers
     861             :  * to semantic action routines, which should function exactly as those used
     862             :  * in the recursive descent parser.
     863             :  *
     864             :  * This routine can be called repeatedly with chunks of JSON. On the final
     865             :  * chunk is_last must be set to true. len is the length of the json chunk,
     866             :  * which does not need to be null terminated.
     867             :  */
     868             : JsonParseErrorType
     869      745888 : pg_parse_json_incremental(JsonLexContext *lex,
     870             :                           const JsonSemAction *sem,
     871             :                           const char *json,
     872             :                           size_t len,
     873             :                           bool is_last)
     874             : {
     875             :     JsonTokenType tok;
     876             :     JsonParseErrorType result;
     877      745888 :     JsonParseContext ctx = JSON_PARSE_VALUE;
     878      745888 :     JsonParserStack *pstack = lex->pstack;
     879             : 
     880      745888 :     if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
     881           0 :         return JSON_OUT_OF_MEMORY;
     882      745888 :     if (!lex->incremental)
     883           0 :         return JSON_INVALID_LEXER_TYPE;
     884             : 
     885      745888 :     lex->input = lex->token_terminator = lex->line_start = json;
     886      745888 :     lex->input_length = len;
     887      745888 :     lex->inc_state->is_last_chunk = is_last;
     888      745888 :     lex->inc_state->started = true;
     889             : 
     890             :     /* get the initial token */
     891      745888 :     result = json_lex(lex);
     892      745888 :     if (result != JSON_SUCCESS)
     893      143560 :         return result;
     894             : 
     895      602328 :     tok = lex_peek(lex);
     896             : 
     897             :     /* use prediction stack for incremental parsing */
     898             : 
     899      602328 :     if (!have_prediction(pstack))
     900             :     {
     901        3790 :         td_entry    goal = TD_ENTRY(JSON_PROD_GOAL);
     902             : 
     903        3790 :         push_prediction(pstack, goal);
     904             :     }
     905             : 
     906    46986660 :     while (have_prediction(pstack))
     907             :     {
     908    46984410 :         char        top = pop_prediction(pstack);
     909             :         td_entry    entry;
     910             : 
     911             :         /*
     912             :          * these first two branches are the guts of the Table Driven method
     913             :          */
     914    46984410 :         if (top == tok)
     915             :         {
     916             :             /*
     917             :              * tok can only be a terminal symbol, so top must be too. the
     918             :              * token matches the top of the stack, so get the next token.
     919             :              */
     920    12002172 :             if (tok < JSON_TOKEN_END)
     921             :             {
     922    11999922 :                 result = json_lex(lex);
     923    11999922 :                 if (result != JSON_SUCCESS)
     924      600076 :                     return result;
     925    11401240 :                 tok = lex_peek(lex);
     926             :             }
     927             :         }
     928    34982238 :         else if (IS_NT(top) && (entry = td_parser_table[OFS(top)][tok]).prod != NULL)
     929             :         {
     930             :             /*
     931             :              * the token is in the director set for a production of the
     932             :              * non-terminal at the top of the stack, so push the reversed RHS
     933             :              * of the production onto the stack.
     934             :              */
     935    14698002 :             push_prediction(pstack, entry);
     936             :         }
     937    20284236 :         else if (IS_SEM(top))
     938             :         {
     939             :             /*
     940             :              * top is a semantic action marker, so take action accordingly.
     941             :              * It's important to have these markers in the prediction stack
     942             :              * before any token they might need so we don't advance the token
     943             :              * prematurely. Note in a couple of cases we need to do something
     944             :              * both before and after the token.
     945             :              */
     946    20283354 :             switch (top)
     947             :             {
     948      252784 :                 case JSON_SEM_OSTART:
     949             :                     {
     950      252784 :                         json_struct_action ostart = sem->object_start;
     951             : 
     952      252784 :                         if (lex->lex_level >= JSON_TD_MAX_STACK)
     953           0 :                             return JSON_NESTING_TOO_DEEP;
     954             : 
     955      252784 :                         if (ostart != NULL)
     956             :                         {
     957      231432 :                             result = (*ostart) (sem->semstate);
     958      231432 :                             if (result != JSON_SUCCESS)
     959           0 :                                 return result;
     960             :                         }
     961             : 
     962      252784 :                         if (!inc_lex_level(lex))
     963           0 :                             return JSON_OUT_OF_MEMORY;
     964             :                     }
     965      252784 :                     break;
     966      252054 :                 case JSON_SEM_OEND:
     967             :                     {
     968      252054 :                         json_struct_action oend = sem->object_end;
     969             : 
     970      252054 :                         dec_lex_level(lex);
     971      252054 :                         if (oend != NULL)
     972             :                         {
     973      231430 :                             result = (*oend) (sem->semstate);
     974      231430 :                             if (result != JSON_SUCCESS)
     975           0 :                                 return result;
     976             :                         }
     977             :                     }
     978      252054 :                     break;
     979     4922548 :                 case JSON_SEM_ASTART:
     980             :                     {
     981     4922548 :                         json_struct_action astart = sem->array_start;
     982             : 
     983     4922548 :                         if (lex->lex_level >= JSON_TD_MAX_STACK)
     984         512 :                             return JSON_NESTING_TOO_DEEP;
     985             : 
     986     4922036 :                         if (astart != NULL)
     987             :                         {
     988         556 :                             result = (*astart) (sem->semstate);
     989         556 :                             if (result != JSON_SUCCESS)
     990           0 :                                 return result;
     991             :                         }
     992             : 
     993     4922036 :                         if (!inc_lex_level(lex))
     994           0 :                             return JSON_OUT_OF_MEMORY;
     995             :                     }
     996     4922036 :                     break;
     997     1644964 :                 case JSON_SEM_AEND:
     998             :                     {
     999     1644964 :                         json_struct_action aend = sem->array_end;
    1000             : 
    1001     1644964 :                         dec_lex_level(lex);
    1002     1644964 :                         if (aend != NULL)
    1003             :                         {
    1004         556 :                             result = (*aend) (sem->semstate);
    1005         556 :                             if (result != JSON_SUCCESS)
    1006           0 :                                 return result;
    1007             :                         }
    1008             :                     }
    1009     1644964 :                     break;
    1010     1237086 :                 case JSON_SEM_OFIELD_INIT:
    1011             :                     {
    1012             :                         /*
    1013             :                          * all we do here is save out the field name. We have
    1014             :                          * to wait to get past the ':' to see if the next
    1015             :                          * value is null so we can call the semantic routine
    1016             :                          */
    1017     1237086 :                         char       *fname = NULL;
    1018     1237086 :                         json_ofield_action ostart = sem->object_field_start;
    1019     1237086 :                         json_ofield_action oend = sem->object_field_end;
    1020             : 
    1021     1237086 :                         if ((ostart != NULL || oend != NULL) && lex->need_escapes)
    1022             :                         {
    1023     1156326 :                             fname = STRDUP(lex->strval->data);
    1024     1156326 :                             if (fname == NULL)
    1025           0 :                                 return JSON_OUT_OF_MEMORY;
    1026             :                         }
    1027     1237086 :                         set_fname(lex, fname);
    1028             :                     }
    1029     1237086 :                     break;
    1030     1236830 :                 case JSON_SEM_OFIELD_START:
    1031             :                     {
    1032             :                         /*
    1033             :                          * the current token should be the first token of the
    1034             :                          * value
    1035             :                          */
    1036     1236830 :                         bool        isnull = tok == JSON_TOKEN_NULL;
    1037     1236830 :                         json_ofield_action ostart = sem->object_field_start;
    1038             : 
    1039     1236830 :                         set_fnull(lex, isnull);
    1040             : 
    1041     1236830 :                         if (ostart != NULL)
    1042             :                         {
    1043     1156326 :                             char       *fname = get_fname(lex);
    1044             : 
    1045     1156326 :                             result = (*ostart) (sem->semstate, fname, isnull);
    1046     1156326 :                             if (result != JSON_SUCCESS)
    1047           0 :                                 return result;
    1048             :                         }
    1049             :                     }
    1050     1236830 :                     break;
    1051     1236756 :                 case JSON_SEM_OFIELD_END:
    1052             :                     {
    1053     1236756 :                         json_ofield_action oend = sem->object_field_end;
    1054             : 
    1055     1236756 :                         if (oend != NULL)
    1056             :                         {
    1057        1248 :                             char       *fname = get_fname(lex);
    1058        1248 :                             bool        isnull = get_fnull(lex);
    1059             : 
    1060        1248 :                             result = (*oend) (sem->semstate, fname, isnull);
    1061        1248 :                             if (result != JSON_SUCCESS)
    1062           0 :                                 return result;
    1063             :                         }
    1064             :                     }
    1065     1236756 :                     break;
    1066     5161676 :                 case JSON_SEM_AELEM_START:
    1067             :                     {
    1068     5161676 :                         json_aelem_action astart = sem->array_element_start;
    1069     5161676 :                         bool        isnull = tok == JSON_TOKEN_NULL;
    1070             : 
    1071     5161676 :                         set_fnull(lex, isnull);
    1072             : 
    1073     5161676 :                         if (astart != NULL)
    1074             :                         {
    1075         240 :                             result = (*astart) (sem->semstate, isnull);
    1076         240 :                             if (result != JSON_SUCCESS)
    1077           0 :                                 return result;
    1078             :                         }
    1079             :                     }
    1080     5161676 :                     break;
    1081     1884876 :                 case JSON_SEM_AELEM_END:
    1082             :                     {
    1083     1884876 :                         json_aelem_action aend = sem->array_element_end;
    1084             : 
    1085     1884876 :                         if (aend != NULL)
    1086             :                         {
    1087         240 :                             bool        isnull = get_fnull(lex);
    1088             : 
    1089         240 :                             result = (*aend) (sem->semstate, isnull);
    1090         240 :                             if (result != JSON_SUCCESS)
    1091           0 :                                 return result;
    1092             :                         }
    1093             :                     }
    1094     1884876 :                     break;
    1095     1226890 :                 case JSON_SEM_SCALAR_INIT:
    1096             :                     {
    1097     1226890 :                         json_scalar_action sfunc = sem->scalar;
    1098             : 
    1099     1226890 :                         pstack->scalar_val = NULL;
    1100             : 
    1101     1226890 :                         if (sfunc != NULL)
    1102             :                         {
    1103             :                             /*
    1104             :                              * extract the de-escaped string value, or the raw
    1105             :                              * lexeme
    1106             :                              */
    1107             :                             /*
    1108             :                              * XXX copied from RD parser but looks like a
    1109             :                              * buglet
    1110             :                              */
    1111     1155698 :                             if (tok == JSON_TOKEN_STRING)
    1112             :                             {
    1113      924110 :                                 if (lex->need_escapes)
    1114             :                                 {
    1115      924110 :                                     pstack->scalar_val = STRDUP(lex->strval->data);
    1116      924110 :                                     if (pstack->scalar_val == NULL)
    1117           0 :                                         return JSON_OUT_OF_MEMORY;
    1118             :                                 }
    1119             :                             }
    1120             :                             else
    1121             :                             {
    1122      231588 :                                 ptrdiff_t   tlen = (lex->token_terminator - lex->token_start);
    1123             : 
    1124      231588 :                                 pstack->scalar_val = ALLOC(tlen + 1);
    1125      231588 :                                 if (pstack->scalar_val == NULL)
    1126           0 :                                     return JSON_OUT_OF_MEMORY;
    1127             : 
    1128      231588 :                                 memcpy(pstack->scalar_val, lex->token_start, tlen);
    1129      231588 :                                 pstack->scalar_val[tlen] = '\0';
    1130             :                             }
    1131     1155698 :                             pstack->scalar_tok = tok;
    1132             :                         }
    1133             :                     }
    1134     1226890 :                     break;
    1135     1226890 :                 case JSON_SEM_SCALAR_CALL:
    1136             :                     {
    1137             :                         /*
    1138             :                          * We'd like to be able to get rid of this business of
    1139             :                          * two bits of scalar action, but we can't. It breaks
    1140             :                          * certain semantic actions which expect that when
    1141             :                          * called the lexer has consumed the item. See for
    1142             :                          * example get_scalar() in jsonfuncs.c.
    1143             :                          */
    1144     1226890 :                         json_scalar_action sfunc = sem->scalar;
    1145             : 
    1146     1226890 :                         if (sfunc != NULL)
    1147             :                         {
    1148     1155698 :                             result = (*sfunc) (sem->semstate, pstack->scalar_val, pstack->scalar_tok);
    1149             : 
    1150             :                             /*
    1151             :                              * Either ownership of the token passed to the
    1152             :                              * callback, or we need to free it now. Either
    1153             :                              * way, clear our pointer to it so it doesn't get
    1154             :                              * freed in the future.
    1155             :                              */
    1156     1155696 :                             if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
    1157         544 :                                 FREE(pstack->scalar_val);
    1158     1155696 :                             pstack->scalar_val = NULL;
    1159             : 
    1160     1155696 :                             if (result != JSON_SUCCESS)
    1161           0 :                                 return result;
    1162             :                         }
    1163             :                     }
    1164     1226888 :                     break;
    1165           0 :                 default:
    1166             :                     /* should not happen */
    1167           0 :                     break;
    1168             :             }
    1169             :         }
    1170             :         else
    1171             :         {
    1172             :             /*
    1173             :              * The token didn't match the stack top if it's a terminal nor a
    1174             :              * production for the stack top if it's a non-terminal.
    1175             :              *
    1176             :              * Various cases here are Asserted to be not possible, as the
    1177             :              * token would not appear at the top of the prediction stack
    1178             :              * unless the lookahead matched.
    1179             :              */
    1180         882 :             switch (top)
    1181             :             {
    1182         152 :                 case JSON_TOKEN_STRING:
    1183         152 :                     if (next_prediction(pstack) == JSON_TOKEN_COLON)
    1184         152 :                         ctx = JSON_PARSE_STRING;
    1185             :                     else
    1186             :                     {
    1187             :                         Assert(false);
    1188           0 :                         ctx = JSON_PARSE_VALUE;
    1189             :                     }
    1190         152 :                     break;
    1191           0 :                 case JSON_TOKEN_NUMBER:
    1192             :                 case JSON_TOKEN_TRUE:
    1193             :                 case JSON_TOKEN_FALSE:
    1194             :                 case JSON_TOKEN_NULL:
    1195             :                 case JSON_TOKEN_ARRAY_START:
    1196             :                 case JSON_TOKEN_OBJECT_START:
    1197             :                     Assert(false);
    1198           0 :                     ctx = JSON_PARSE_VALUE;
    1199           0 :                     break;
    1200           0 :                 case JSON_TOKEN_ARRAY_END:
    1201             :                     Assert(false);
    1202           0 :                     ctx = JSON_PARSE_ARRAY_NEXT;
    1203           0 :                     break;
    1204           0 :                 case JSON_TOKEN_OBJECT_END:
    1205             :                     Assert(false);
    1206           0 :                     ctx = JSON_PARSE_OBJECT_NEXT;
    1207           0 :                     break;
    1208           0 :                 case JSON_TOKEN_COMMA:
    1209             :                     Assert(false);
    1210           0 :                     if (next_prediction(pstack) == JSON_TOKEN_STRING)
    1211           0 :                         ctx = JSON_PARSE_OBJECT_NEXT;
    1212             :                     else
    1213           0 :                         ctx = JSON_PARSE_ARRAY_NEXT;
    1214           0 :                     break;
    1215         104 :                 case JSON_TOKEN_COLON:
    1216         104 :                     ctx = JSON_PARSE_OBJECT_LABEL;
    1217         104 :                     break;
    1218          24 :                 case JSON_TOKEN_END:
    1219          24 :                     ctx = JSON_PARSE_END;
    1220          24 :                     break;
    1221          72 :                 case JSON_NT_MORE_ARRAY_ELEMENTS:
    1222          72 :                     ctx = JSON_PARSE_ARRAY_NEXT;
    1223          72 :                     break;
    1224          56 :                 case JSON_NT_ARRAY_ELEMENTS:
    1225          56 :                     ctx = JSON_PARSE_ARRAY_START;
    1226          56 :                     break;
    1227         280 :                 case JSON_NT_MORE_KEY_PAIRS:
    1228         280 :                     ctx = JSON_PARSE_OBJECT_NEXT;
    1229         280 :                     break;
    1230         120 :                 case JSON_NT_KEY_PAIRS:
    1231         120 :                     ctx = JSON_PARSE_OBJECT_START;
    1232         120 :                     break;
    1233          74 :                 default:
    1234          74 :                     ctx = JSON_PARSE_VALUE;
    1235             :             }
    1236         882 :             return report_parse_error(ctx, lex);
    1237             :         }
    1238             :     }
    1239             : 
    1240        2250 :     return JSON_SUCCESS;
    1241             : }
    1242             : 
    1243             : /*
    1244             :  *  Recursive Descent parse routines. There is one for each structural
    1245             :  *  element in a json document:
    1246             :  *    - scalar (string, number, true, false, null)
    1247             :  *    - array  ( [ ] )
    1248             :  *    - array element
    1249             :  *    - object ( { } )
    1250             :  *    - object field
    1251             :  */
    1252             : static inline JsonParseErrorType
    1253      336096 : parse_scalar(JsonLexContext *lex, const JsonSemAction *sem)
    1254             : {
    1255      336096 :     char       *val = NULL;
    1256      336096 :     json_scalar_action sfunc = sem->scalar;
    1257      336096 :     JsonTokenType tok = lex_peek(lex);
    1258             :     JsonParseErrorType result;
    1259             : 
    1260             :     /* a scalar must be a string, a number, true, false, or null */
    1261      336096 :     if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
    1262       33122 :         tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
    1263             :         tok != JSON_TOKEN_NULL)
    1264         188 :         return report_parse_error(JSON_PARSE_VALUE, lex);
    1265             : 
    1266             :     /* if no semantic function, just consume the token */
    1267      335908 :     if (sfunc == NULL)
    1268       11894 :         return json_lex(lex);
    1269             : 
    1270             :     /* extract the de-escaped string value, or the raw lexeme */
    1271      324014 :     if (lex_peek(lex) == JSON_TOKEN_STRING)
    1272             :     {
    1273       75800 :         if (lex->need_escapes)
    1274             :         {
    1275       70124 :             val = STRDUP(lex->strval->data);
    1276       70124 :             if (val == NULL)
    1277           0 :                 return JSON_OUT_OF_MEMORY;
    1278             :         }
    1279             :     }
    1280             :     else
    1281             :     {
    1282      248214 :         int         len = (lex->token_terminator - lex->token_start);
    1283             : 
    1284      248214 :         val = ALLOC(len + 1);
    1285      248214 :         if (val == NULL)
    1286           0 :             return JSON_OUT_OF_MEMORY;
    1287             : 
    1288      248214 :         memcpy(val, lex->token_start, len);
    1289      248214 :         val[len] = '\0';
    1290             :     }
    1291             : 
    1292             :     /* consume the token */
    1293      324014 :     result = json_lex(lex);
    1294      324014 :     if (result != JSON_SUCCESS)
    1295             :     {
    1296           0 :         FREE(val);
    1297           0 :         return result;
    1298             :     }
    1299             : 
    1300             :     /*
    1301             :      * invoke the callback, which may take ownership of val. For string
    1302             :      * values, val is NULL if need_escapes is false.
    1303             :      */
    1304      324014 :     result = (*sfunc) (sem->semstate, val, tok);
    1305             : 
    1306      323918 :     if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
    1307           0 :         FREE(val);
    1308             : 
    1309      323918 :     return result;
    1310             : }
    1311             : 
    1312             : static JsonParseErrorType
    1313      325786 : parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
    1314             : {
    1315             :     /*
    1316             :      * An object field is "fieldname" : value where value can be a scalar,
    1317             :      * object or array.  Note: in user-facing docs and error messages, we
    1318             :      * generally call a field name a "key".
    1319             :      */
    1320             : 
    1321      325786 :     char       *fname = NULL;
    1322      325786 :     json_ofield_action ostart = sem->object_field_start;
    1323      325786 :     json_ofield_action oend = sem->object_field_end;
    1324             :     bool        isnull;
    1325             :     JsonTokenType tok;
    1326             :     JsonParseErrorType result;
    1327             : 
    1328      325786 :     if (lex_peek(lex) != JSON_TOKEN_STRING)
    1329          12 :         return report_parse_error(JSON_PARSE_STRING, lex);
    1330      325774 :     if ((ostart != NULL || oend != NULL) && lex->need_escapes)
    1331             :     {
    1332             :         /* fname is NULL if need_escapes is false */
    1333      257556 :         fname = STRDUP(lex->strval->data);
    1334      257556 :         if (fname == NULL)
    1335           0 :             return JSON_OUT_OF_MEMORY;
    1336             :     }
    1337      325774 :     result = json_lex(lex);
    1338      325774 :     if (result != JSON_SUCCESS)
    1339             :     {
    1340          12 :         FREE(fname);
    1341          12 :         return result;
    1342             :     }
    1343             : 
    1344      325762 :     result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
    1345      325762 :     if (result != JSON_SUCCESS)
    1346             :     {
    1347          90 :         FREE(fname);
    1348          90 :         return result;
    1349             :     }
    1350             : 
    1351      325672 :     tok = lex_peek(lex);
    1352      325672 :     isnull = tok == JSON_TOKEN_NULL;
    1353             : 
    1354      325672 :     if (ostart != NULL)
    1355             :     {
    1356      257472 :         result = (*ostart) (sem->semstate, fname, isnull);
    1357      257464 :         if (result != JSON_SUCCESS)
    1358           0 :             goto ofield_cleanup;
    1359             :     }
    1360             : 
    1361      325664 :     switch (tok)
    1362             :     {
    1363       11276 :         case JSON_TOKEN_OBJECT_START:
    1364       11276 :             result = parse_object(lex, sem);
    1365        4414 :             break;
    1366       15348 :         case JSON_TOKEN_ARRAY_START:
    1367       15348 :             result = parse_array(lex, sem);
    1368       15314 :             break;
    1369      299040 :         default:
    1370      299040 :             result = parse_scalar(lex, sem);
    1371             :     }
    1372      318762 :     if (result != JSON_SUCCESS)
    1373          42 :         goto ofield_cleanup;
    1374             : 
    1375      318720 :     if (oend != NULL)
    1376             :     {
    1377      183770 :         result = (*oend) (sem->semstate, fname, isnull);
    1378      183770 :         if (result != JSON_SUCCESS)
    1379           0 :             goto ofield_cleanup;
    1380             :     }
    1381             : 
    1382      318720 : ofield_cleanup:
    1383      318762 :     if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
    1384           0 :         FREE(fname);
    1385      318762 :     return result;
    1386             : }
    1387             : 
    1388             : static JsonParseErrorType
    1389       51030 : parse_object(JsonLexContext *lex, const JsonSemAction *sem)
    1390             : {
    1391             :     /*
    1392             :      * an object is a possibly empty sequence of object fields, separated by
    1393             :      * commas and surrounded by curly braces.
    1394             :      */
    1395       51030 :     json_struct_action ostart = sem->object_start;
    1396       51030 :     json_struct_action oend = sem->object_end;
    1397             :     JsonTokenType tok;
    1398             :     JsonParseErrorType result;
    1399             : 
    1400             : #ifndef FRONTEND
    1401             : 
    1402             :     /*
    1403             :      * TODO: clients need some way to put a bound on stack growth. Parse level
    1404             :      * limits maybe?
    1405             :      */
    1406       45106 :     check_stack_depth();
    1407             : #endif
    1408             : 
    1409       51018 :     if (ostart != NULL)
    1410             :     {
    1411       31462 :         result = (*ostart) (sem->semstate);
    1412       31442 :         if (result != JSON_SUCCESS)
    1413           0 :             return result;
    1414             :     }
    1415             : 
    1416             :     /*
    1417             :      * Data inside an object is at a higher nesting level than the object
    1418             :      * itself. Note that we increment this after we call the semantic routine
    1419             :      * for the object start and restore it before we call the routine for the
    1420             :      * object end.
    1421             :      */
    1422       50998 :     lex->lex_level++;
    1423             : 
    1424             :     Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START);
    1425       50998 :     result = json_lex(lex);
    1426       50998 :     if (result != JSON_SUCCESS)
    1427          62 :         return result;
    1428             : 
    1429       50936 :     tok = lex_peek(lex);
    1430       50936 :     switch (tok)
    1431             :     {
    1432       47956 :         case JSON_TOKEN_STRING:
    1433       47956 :             result = parse_object_field(lex, sem);
    1434      318876 :             while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
    1435             :             {
    1436      277830 :                 result = json_lex(lex);
    1437      277830 :                 if (result != JSON_SUCCESS)
    1438           0 :                     break;
    1439      277830 :                 result = parse_object_field(lex, sem);
    1440             :             }
    1441       41046 :             break;
    1442        2966 :         case JSON_TOKEN_OBJECT_END:
    1443        2966 :             break;
    1444          14 :         default:
    1445             :             /* case of an invalid initial token inside the object */
    1446          14 :             result = report_parse_error(JSON_PARSE_OBJECT_START, lex);
    1447             :     }
    1448       44026 :     if (result != JSON_SUCCESS)
    1449         170 :         return result;
    1450             : 
    1451       43856 :     result = lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
    1452       43856 :     if (result != JSON_SUCCESS)
    1453          36 :         return result;
    1454             : 
    1455       43820 :     lex->lex_level--;
    1456             : 
    1457       43820 :     if (oend != NULL)
    1458             :     {
    1459       25522 :         result = (*oend) (sem->semstate);
    1460       25476 :         if (result != JSON_SUCCESS)
    1461           0 :             return result;
    1462             :     }
    1463             : 
    1464       43774 :     return JSON_SUCCESS;
    1465             : }
    1466             : 
    1467             : static JsonParseErrorType
    1468       58692 : parse_array_element(JsonLexContext *lex, const JsonSemAction *sem)
    1469             : {
    1470       58692 :     json_aelem_action astart = sem->array_element_start;
    1471       58692 :     json_aelem_action aend = sem->array_element_end;
    1472       58692 :     JsonTokenType tok = lex_peek(lex);
    1473             :     JsonParseErrorType result;
    1474             :     bool        isnull;
    1475             : 
    1476       58692 :     isnull = tok == JSON_TOKEN_NULL;
    1477             : 
    1478       58692 :     if (astart != NULL)
    1479             :     {
    1480        7804 :         result = (*astart) (sem->semstate, isnull);
    1481        7804 :         if (result != JSON_SUCCESS)
    1482           0 :             return result;
    1483             :     }
    1484             : 
    1485             :     /* an array element is any object, array or scalar */
    1486       58692 :     switch (tok)
    1487             :     {
    1488       18958 :         case JSON_TOKEN_OBJECT_START:
    1489       18958 :             result = parse_object(lex, sem);
    1490       18900 :             break;
    1491       12876 :         case JSON_TOKEN_ARRAY_START:
    1492       12876 :             result = parse_array(lex, sem);
    1493        4280 :             break;
    1494       26858 :         default:
    1495       26858 :             result = parse_scalar(lex, sem);
    1496             :     }
    1497             : 
    1498       50020 :     if (result != JSON_SUCCESS)
    1499          66 :         return result;
    1500             : 
    1501       49954 :     if (aend != NULL)
    1502             :     {
    1503        7228 :         result = (*aend) (sem->semstate, isnull);
    1504        7216 :         if (result != JSON_SUCCESS)
    1505           0 :             return result;
    1506             :     }
    1507             : 
    1508       49942 :     return JSON_SUCCESS;
    1509             : }
    1510             : 
    1511             : static JsonParseErrorType
    1512       35656 : parse_array(JsonLexContext *lex, const JsonSemAction *sem)
    1513             : {
    1514             :     /*
    1515             :      * an array is a possibly empty sequence of array elements, separated by
    1516             :      * commas and surrounded by square brackets.
    1517             :      */
    1518       35656 :     json_struct_action astart = sem->array_start;
    1519       35656 :     json_struct_action aend = sem->array_end;
    1520             :     JsonParseErrorType result;
    1521             : 
    1522             : #ifndef FRONTEND
    1523       35600 :     check_stack_depth();
    1524             : #endif
    1525             : 
    1526       35644 :     if (astart != NULL)
    1527             :     {
    1528       17748 :         result = (*astart) (sem->semstate);
    1529       17734 :         if (result != JSON_SUCCESS)
    1530           0 :             return result;
    1531             :     }
    1532             : 
    1533             :     /*
    1534             :      * Data inside an array is at a higher nesting level than the array
    1535             :      * itself. Note that we increment this after we call the semantic routine
    1536             :      * for the array start and restore it before we call the routine for the
    1537             :      * array end.
    1538             :      */
    1539       35630 :     lex->lex_level++;
    1540             : 
    1541       35630 :     result = lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
    1542       35630 :     if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
    1543             :     {
    1544       28152 :         result = parse_array_element(lex, sem);
    1545             : 
    1546       49960 :         while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
    1547             :         {
    1548       30492 :             result = json_lex(lex);
    1549       30492 :             if (result != JSON_SUCCESS)
    1550           0 :                 break;
    1551       30492 :             result = parse_array_element(lex, sem);
    1552             :         }
    1553             :     }
    1554       26946 :     if (result != JSON_SUCCESS)
    1555          66 :         return result;
    1556             : 
    1557       26880 :     result = lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
    1558       26880 :     if (result != JSON_SUCCESS)
    1559          24 :         return result;
    1560             : 
    1561       26856 :     lex->lex_level--;
    1562             : 
    1563       26856 :     if (aend != NULL)
    1564             :     {
    1565       11244 :         result = (*aend) (sem->semstate);
    1566       11220 :         if (result != JSON_SUCCESS)
    1567           0 :             return result;
    1568             :     }
    1569             : 
    1570       26832 :     return JSON_SUCCESS;
    1571             : }
    1572             : 
    1573             : /*
    1574             :  * Lex one token from the input stream.
    1575             :  *
    1576             :  * When doing incremental parsing, we can reach the end of the input string
    1577             :  * without having (or knowing we have) a complete token. If it's not the
    1578             :  * final chunk of input, the partial token is then saved to the lex
    1579             :  * structure's ptok StringInfo. On subsequent calls input is appended to this
    1580             :  * buffer until we have something that we think is a complete token,
    1581             :  * which is then lexed using a recursive call to json_lex. Processing then
    1582             :  * continues as normal on subsequent calls.
    1583             :  *
    1584             :  * Note than when doing incremental processing, the lex.prev_token_terminator
    1585             :  * should not be relied on. It could point into a previous input chunk or
    1586             :  * worse.
    1587             :  */
    1588             : JsonParseErrorType
    1589    14345482 : json_lex(JsonLexContext *lex)
    1590             : {
    1591             :     const char *s;
    1592    14345482 :     const char *const end = lex->input + lex->input_length;
    1593             :     JsonParseErrorType result;
    1594             : 
    1595    14345482 :     if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
    1596           0 :         return JSON_OUT_OF_MEMORY;
    1597             : 
    1598    14345482 :     if (lex->incremental)
    1599             :     {
    1600    12745810 :         if (lex->inc_state->partial_completed)
    1601             :         {
    1602             :             /*
    1603             :              * We just lexed a completed partial token on the last call, so
    1604             :              * reset everything
    1605             :              */
    1606       68856 :             jsonapi_resetStringInfo(&(lex->inc_state->partial_token));
    1607       68856 :             lex->token_terminator = lex->input;
    1608       68856 :             lex->inc_state->partial_completed = false;
    1609             :         }
    1610             : 
    1611             : #ifdef JSONAPI_USE_PQEXPBUFFER
    1612             :         /* Make sure our partial token buffer is valid before using it below. */
    1613     3831520 :         if (PQExpBufferDataBroken(lex->inc_state->partial_token))
    1614           0 :             return JSON_OUT_OF_MEMORY;
    1615             : #endif
    1616             :     }
    1617             : 
    1618    14345482 :     s = lex->token_terminator;
    1619             : 
    1620    14345482 :     if (lex->incremental && lex->inc_state->partial_token.len)
    1621             :     {
    1622             :         /*
    1623             :          * We have a partial token. Extend it and if completed lex it by a
    1624             :          * recursive call
    1625             :          */
    1626      166400 :         jsonapi_StrValType *ptok = &(lex->inc_state->partial_token);
    1627      166400 :         size_t      added = 0;
    1628      166400 :         bool        tok_done = false;
    1629      166400 :         JsonLexContext dummy_lex = {0};
    1630             :         JsonParseErrorType partial_result;
    1631             : 
    1632      166400 :         if (ptok->data[0] == '"')
    1633             :         {
    1634             :             /*
    1635             :              * It's a string. Accumulate characters until we reach an
    1636             :              * unescaped '"'.
    1637             :              */
    1638      160152 :             int         escapes = 0;
    1639             : 
    1640      162720 :             for (int i = ptok->len - 1; i > 0; i--)
    1641             :             {
    1642             :                 /* count the trailing backslashes on the partial token */
    1643      151340 :                 if (ptok->data[i] == '\\')
    1644        2568 :                     escapes++;
    1645             :                 else
    1646      148772 :                     break;
    1647             :             }
    1648             : 
    1649     1214554 :             for (size_t i = 0; i < lex->input_length; i++)
    1650             :             {
    1651     1119410 :                 char        c = lex->input[i];
    1652             : 
    1653     1119410 :                 jsonapi_appendStringInfoCharMacro(ptok, c);
    1654     1119410 :                 added++;
    1655     1119410 :                 if (c == '"' && escapes % 2 == 0)
    1656             :                 {
    1657       65008 :                     tok_done = true;
    1658       65008 :                     break;
    1659             :                 }
    1660     1054402 :                 if (c == '\\')
    1661        4096 :                     escapes++;
    1662             :                 else
    1663     1050306 :                     escapes = 0;
    1664             :             }
    1665             :         }
    1666             :         else
    1667             :         {
    1668             :             /* not a string */
    1669        6248 :             char        c = ptok->data[0];
    1670             : 
    1671        6248 :             if (c == '-' || (c >= '0' && c <= '9'))
    1672             :             {
    1673             :                 /* for numbers look for possible numeric continuations */
    1674             : 
    1675        1312 :                 bool        numend = false;
    1676             : 
    1677        3680 :                 for (size_t i = 0; i < lex->input_length && !numend; i++)
    1678             :                 {
    1679        2368 :                     char        cc = lex->input[i];
    1680             : 
    1681        2368 :                     switch (cc)
    1682             :                     {
    1683        1592 :                         case '+':
    1684             :                         case '-':
    1685             :                         case 'e':
    1686             :                         case 'E':
    1687             :                         case '0':
    1688             :                         case '1':
    1689             :                         case '2':
    1690             :                         case '3':
    1691             :                         case '4':
    1692             :                         case '5':
    1693             :                         case '6':
    1694             :                         case '7':
    1695             :                         case '8':
    1696             :                         case '9':
    1697             :                             {
    1698        1592 :                                 jsonapi_appendStringInfoCharMacro(ptok, cc);
    1699        1592 :                                 added++;
    1700             :                             }
    1701        1592 :                             break;
    1702         776 :                         default:
    1703         776 :                             numend = true;
    1704             :                     }
    1705             :                 }
    1706             :             }
    1707             : 
    1708             :             /*
    1709             :              * Add any remaining alphanumeric chars. This takes care of the
    1710             :              * {null, false, true} literals as well as any trailing
    1711             :              * alphanumeric junk on non-string tokens.
    1712             :              */
    1713       12896 :             for (size_t i = added; i < lex->input_length; i++)
    1714             :             {
    1715       10936 :                 char        cc = lex->input[i];
    1716             : 
    1717       10936 :                 if (JSON_ALPHANUMERIC_CHAR(cc))
    1718             :                 {
    1719        6648 :                     jsonapi_appendStringInfoCharMacro(ptok, cc);
    1720        6648 :                     added++;
    1721             :                 }
    1722             :                 else
    1723             :                 {
    1724        4288 :                     tok_done = true;
    1725        4288 :                     break;
    1726             :                 }
    1727             :             }
    1728        6248 :             if (added == lex->input_length &&
    1729        1960 :                 lex->inc_state->is_last_chunk)
    1730             :             {
    1731         152 :                 tok_done = true;
    1732             :             }
    1733             :         }
    1734             : 
    1735      166400 :         if (!tok_done)
    1736             :         {
    1737             :             /* We should have consumed the whole chunk in this case. */
    1738             :             Assert(added == lex->input_length);
    1739             : 
    1740       96952 :             if (!lex->inc_state->is_last_chunk)
    1741       96872 :                 return JSON_INCOMPLETE;
    1742             : 
    1743             :             /* json_errdetail() needs access to the accumulated token. */
    1744          80 :             lex->token_start = ptok->data;
    1745          80 :             lex->token_terminator = ptok->data + ptok->len;
    1746          80 :             return JSON_INVALID_TOKEN;
    1747             :         }
    1748             : 
    1749             :         /*
    1750             :          * Everything up to lex->input[added] has been added to the partial
    1751             :          * token, so move the input past it.
    1752             :          */
    1753       69448 :         lex->input += added;
    1754       69448 :         lex->input_length -= added;
    1755             : 
    1756       69448 :         dummy_lex.input = dummy_lex.token_terminator =
    1757       69448 :             dummy_lex.line_start = ptok->data;
    1758       69448 :         dummy_lex.line_number = lex->line_number;
    1759       69448 :         dummy_lex.input_length = ptok->len;
    1760       69448 :         dummy_lex.input_encoding = lex->input_encoding;
    1761       69448 :         dummy_lex.incremental = false;
    1762       69448 :         dummy_lex.need_escapes = lex->need_escapes;
    1763       69448 :         dummy_lex.strval = lex->strval;
    1764             : 
    1765       69448 :         partial_result = json_lex(&dummy_lex);
    1766             : 
    1767             :         /*
    1768             :          * We either have a complete token or an error. In either case we need
    1769             :          * to point to the partial token data for the semantic or error
    1770             :          * routines. If it's not an error we'll readjust on the next call to
    1771             :          * json_lex.
    1772             :          */
    1773       69448 :         lex->token_type = dummy_lex.token_type;
    1774       69448 :         lex->line_number = dummy_lex.line_number;
    1775             : 
    1776             :         /*
    1777             :          * We know the prev_token_terminator must be back in some previous
    1778             :          * piece of input, so we just make it NULL.
    1779             :          */
    1780       69448 :         lex->prev_token_terminator = NULL;
    1781             : 
    1782             :         /*
    1783             :          * Normally token_start would be ptok->data, but it could be later,
    1784             :          * see json_lex_string's handling of invalid escapes.
    1785             :          */
    1786       69448 :         lex->token_start = dummy_lex.token_start;
    1787       69448 :         lex->token_terminator = dummy_lex.token_terminator;
    1788       69448 :         if (partial_result == JSON_SUCCESS)
    1789             :         {
    1790             :             /* make sure we've used all the input */
    1791       69232 :             if (lex->token_terminator - lex->token_start != ptok->len)
    1792             :             {
    1793             :                 Assert(false);
    1794           0 :                 return JSON_INVALID_TOKEN;
    1795             :             }
    1796             : 
    1797       69232 :             lex->inc_state->partial_completed = true;
    1798             :         }
    1799       69448 :         return partial_result;
    1800             :         /* end of partial token processing */
    1801             :     }
    1802             : 
    1803             :     /* Skip leading whitespace. */
    1804    22869490 :     while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
    1805             :     {
    1806     8690408 :         if (*s++ == '\n')
    1807             :         {
    1808      577080 :             ++lex->line_number;
    1809      577080 :             lex->line_start = s;
    1810             :         }
    1811             :     }
    1812    14179082 :     lex->token_start = s;
    1813             : 
    1814             :     /* Determine token type. */
    1815    14179082 :     if (s >= end)
    1816             :     {
    1817      653374 :         lex->token_start = NULL;
    1818      653374 :         lex->prev_token_terminator = lex->token_terminator;
    1819      653374 :         lex->token_terminator = s;
    1820      653374 :         lex->token_type = JSON_TOKEN_END;
    1821             :     }
    1822             :     else
    1823             :     {
    1824    13525708 :         switch (*s)
    1825             :         {
    1826             :                 /* Single-character token, some kind of punctuation mark. */
    1827      304166 :             case '{':
    1828      304166 :                 lex->prev_token_terminator = lex->token_terminator;
    1829      304166 :                 lex->token_terminator = s + 1;
    1830      304166 :                 lex->token_type = JSON_TOKEN_OBJECT_START;
    1831      304166 :                 break;
    1832      296038 :             case '}':
    1833      296038 :                 lex->prev_token_terminator = lex->token_terminator;
    1834      296038 :                 lex->token_terminator = s + 1;
    1835      296038 :                 lex->token_type = JSON_TOKEN_OBJECT_END;
    1836      296038 :                 break;
    1837     4958342 :             case '[':
    1838     4958342 :                 lex->prev_token_terminator = lex->token_terminator;
    1839     4958342 :                 lex->token_terminator = s + 1;
    1840     4958342 :                 lex->token_type = JSON_TOKEN_ARRAY_START;
    1841     4958342 :                 break;
    1842     1671994 :             case ']':
    1843     1671994 :                 lex->prev_token_terminator = lex->token_terminator;
    1844     1671994 :                 lex->token_terminator = s + 1;
    1845     1671994 :                 lex->token_type = JSON_TOKEN_ARRAY_END;
    1846     1671994 :                 break;
    1847     1535850 :             case ',':
    1848     1535850 :                 lex->prev_token_terminator = lex->token_terminator;
    1849     1535850 :                 lex->token_terminator = s + 1;
    1850     1535850 :                 lex->token_type = JSON_TOKEN_COMMA;
    1851     1535850 :                 break;
    1852     1562592 :             case ':':
    1853     1562592 :                 lex->prev_token_terminator = lex->token_terminator;
    1854     1562592 :                 lex->token_terminator = s + 1;
    1855     1562592 :                 lex->token_type = JSON_TOKEN_COLON;
    1856     1562592 :                 break;
    1857     2686442 :             case '"':
    1858             :                 /* string */
    1859     2686442 :                 result = json_lex_string(lex);
    1860     2686442 :                 if (result != JSON_SUCCESS)
    1861       65466 :                     return result;
    1862     2620976 :                 lex->token_type = JSON_TOKEN_STRING;
    1863     2620976 :                 break;
    1864         372 :             case '-':
    1865             :                 /* Negative number. */
    1866         372 :                 result = json_lex_number(lex, s + 1, NULL, NULL);
    1867         372 :                 if (result != JSON_SUCCESS)
    1868           0 :                     return result;
    1869         372 :                 lex->token_type = JSON_TOKEN_NUMBER;
    1870         372 :                 break;
    1871      449658 :             case '0':
    1872             :             case '1':
    1873             :             case '2':
    1874             :             case '3':
    1875             :             case '4':
    1876             :             case '5':
    1877             :             case '6':
    1878             :             case '7':
    1879             :             case '8':
    1880             :             case '9':
    1881             :                 /* Positive number. */
    1882      449658 :                 result = json_lex_number(lex, s, NULL, NULL);
    1883      449658 :                 if (result != JSON_SUCCESS)
    1884         904 :                     return result;
    1885      448754 :                 lex->token_type = JSON_TOKEN_NUMBER;
    1886      448754 :                 break;
    1887       60254 :             default:
    1888             :                 {
    1889             :                     const char *p;
    1890             : 
    1891             :                     /*
    1892             :                      * We're not dealing with a string, number, legal
    1893             :                      * punctuation mark, or end of string.  The only legal
    1894             :                      * tokens we might find here are true, false, and null,
    1895             :                      * but for error reporting purposes we scan until we see a
    1896             :                      * non-alphanumeric character.  That way, we can report
    1897             :                      * the whole word as an unexpected token, rather than just
    1898             :                      * some unintuitive prefix thereof.
    1899             :                      */
    1900      324992 :                     for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++)
    1901             :                          /* skip */ ;
    1902             : 
    1903             :                     /*
    1904             :                      * We got some sort of unexpected punctuation or an
    1905             :                      * otherwise unexpected character, so just complain about
    1906             :                      * that one character.
    1907             :                      */
    1908       60254 :                     if (p == s)
    1909             :                     {
    1910          80 :                         lex->prev_token_terminator = lex->token_terminator;
    1911          80 :                         lex->token_terminator = s + 1;
    1912          80 :                         return JSON_INVALID_TOKEN;
    1913             :                     }
    1914             : 
    1915       60174 :                     if (lex->incremental && !lex->inc_state->is_last_chunk &&
    1916       15664 :                         p == lex->input + lex->input_length)
    1917             :                     {
    1918        3664 :                         jsonapi_appendBinaryStringInfo(&(lex->inc_state->partial_token), s, end - s);
    1919        3664 :                         return JSON_INCOMPLETE;
    1920             :                     }
    1921             : 
    1922             :                     /*
    1923             :                      * We've got a real alphanumeric token here.  If it
    1924             :                      * happens to be true, false, or null, all is well.  If
    1925             :                      * not, error out.
    1926             :                      */
    1927       56510 :                     lex->prev_token_terminator = lex->token_terminator;
    1928       56510 :                     lex->token_terminator = p;
    1929       56510 :                     if (p - s == 4)
    1930             :                     {
    1931       26084 :                         if (memcmp(s, "true", 4) == 0)
    1932        7472 :                             lex->token_type = JSON_TOKEN_TRUE;
    1933       18612 :                         else if (memcmp(s, "null", 4) == 0)
    1934       18600 :                             lex->token_type = JSON_TOKEN_NULL;
    1935             :                         else
    1936          12 :                             return JSON_INVALID_TOKEN;
    1937             :                     }
    1938       30426 :                     else if (p - s == 5 && memcmp(s, "false", 5) == 0)
    1939       30188 :                         lex->token_type = JSON_TOKEN_FALSE;
    1940             :                     else
    1941         238 :                         return JSON_INVALID_TOKEN;
    1942             :                 }
    1943             :         }                       /* end of switch */
    1944             :     }
    1945             : 
    1946    14108718 :     if (lex->incremental && lex->token_type == JSON_TOKEN_END && !lex->inc_state->is_last_chunk)
    1947      575330 :         return JSON_INCOMPLETE;
    1948             :     else
    1949    13533388 :         return JSON_SUCCESS;
    1950             : }
    1951             : 
    1952             : /*
    1953             :  * The next token in the input stream is known to be a string; lex it.
    1954             :  *
    1955             :  * If lex->strval isn't NULL, fill it with the decoded string.
    1956             :  * Set lex->token_terminator to the end of the decoded input, and in
    1957             :  * success cases, transfer its previous value to lex->prev_token_terminator.
    1958             :  * Return JSON_SUCCESS or an error code.
    1959             :  *
    1960             :  * Note: be careful that all error exits advance lex->token_terminator
    1961             :  * to the point after the character we detected the error on.
    1962             :  */
    1963             : static inline JsonParseErrorType
    1964     2686442 : json_lex_string(JsonLexContext *lex)
    1965             : {
    1966             :     const char *s;
    1967     2686442 :     const char *const end = lex->input + lex->input_length;
    1968     2686442 :     int         hi_surrogate = -1;
    1969             : 
    1970             :     /* Convenience macros for error exits */
    1971             : #define FAIL_OR_INCOMPLETE_AT_CHAR_START(code) \
    1972             :     do { \
    1973             :         if (lex->incremental && !lex->inc_state->is_last_chunk) \
    1974             :         { \
    1975             :             jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token, \
    1976             :                                            lex->token_start, \
    1977             :                                            end - lex->token_start); \
    1978             :             return JSON_INCOMPLETE; \
    1979             :         } \
    1980             :         lex->token_terminator = s; \
    1981             :         return code; \
    1982             :     } while (0)
    1983             : #define FAIL_AT_CHAR_END(code) \
    1984             :     do { \
    1985             :         ptrdiff_t   remaining = end - s; \
    1986             :         int         charlen; \
    1987             :         charlen = pg_encoding_mblen_or_incomplete(lex->input_encoding, \
    1988             :                                                   s, remaining); \
    1989             :         lex->token_terminator = (charlen <= remaining) ? s + charlen : end; \
    1990             :         return code; \
    1991             :     } while (0)
    1992             : 
    1993     2686442 :     if (lex->need_escapes)
    1994             :     {
    1995             : #ifdef JSONAPI_USE_PQEXPBUFFER
    1996             :         /* make sure initialization succeeded */
    1997        1336 :         if (lex->strval == NULL)
    1998           0 :             return JSON_OUT_OF_MEMORY;
    1999             : #endif
    2000     2422138 :         jsonapi_resetStringInfo(lex->strval);
    2001             :     }
    2002             : 
    2003             :     Assert(lex->input_length > 0);
    2004     2686442 :     s = lex->token_start;
    2005             :     for (;;)
    2006             :     {
    2007     5373382 :         s++;
    2008             :         /* Premature end of the string. */
    2009     5373382 :         if (s >= end)
    2010       64660 :             FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
    2011     5308722 :         else if (*s == '"')
    2012     2620976 :             break;
    2013     2687746 :         else if (*s == '\\')
    2014             :         {
    2015             :             /* OK, we have an escape character. */
    2016       10258 :             s++;
    2017       10258 :             if (s >= end)
    2018         192 :                 FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
    2019       10066 :             else if (*s == 'u')
    2020             :             {
    2021             :                 int         i;
    2022        3886 :                 int         ch = 0;
    2023             : 
    2024       18682 :                 for (i = 1; i <= 4; i++)
    2025             :                 {
    2026       15090 :                     s++;
    2027       15090 :                     if (s >= end)
    2028         256 :                         FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
    2029       14834 :                     else if (*s >= '0' && *s <= '9')
    2030        9230 :                         ch = (ch * 16) + (*s - '0');
    2031        5604 :                     else if (*s >= 'a' && *s <= 'f')
    2032        5542 :                         ch = (ch * 16) + (*s - 'a') + 10;
    2033          62 :                     else if (*s >= 'A' && *s <= 'F')
    2034          24 :                         ch = (ch * 16) + (*s - 'A') + 10;
    2035             :                     else
    2036          38 :                         FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT);
    2037             :                 }
    2038        3592 :                 if (lex->need_escapes)
    2039             :                 {
    2040             :                     /*
    2041             :                      * Combine surrogate pairs.
    2042             :                      */
    2043         260 :                     if (is_utf16_surrogate_first(ch))
    2044             :                     {
    2045          72 :                         if (hi_surrogate != -1)
    2046          12 :                             FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE);
    2047          60 :                         hi_surrogate = ch;
    2048          60 :                         continue;
    2049             :                     }
    2050         188 :                     else if (is_utf16_surrogate_second(ch))
    2051             :                     {
    2052          60 :                         if (hi_surrogate == -1)
    2053          24 :                             FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2054          36 :                         ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
    2055          36 :                         hi_surrogate = -1;
    2056             :                     }
    2057             : 
    2058         164 :                     if (hi_surrogate != -1)
    2059           0 :                         FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2060             : 
    2061             :                     /*
    2062             :                      * Reject invalid cases.  We can't have a value above
    2063             :                      * 0xFFFF here (since we only accepted 4 hex digits
    2064             :                      * above), so no need to test for out-of-range chars.
    2065             :                      */
    2066         164 :                     if (ch == 0)
    2067             :                     {
    2068             :                         /* We can't allow this, since our TEXT type doesn't */
    2069          24 :                         FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO);
    2070             :                     }
    2071             : 
    2072             :                     /*
    2073             :                      * Add the represented character to lex->strval.  In the
    2074             :                      * backend, we can let pg_unicode_to_server_noerror()
    2075             :                      * handle any required character set conversion; in
    2076             :                      * frontend, we can only deal with trivial conversions.
    2077             :                      */
    2078             : #ifndef FRONTEND
    2079             :                     {
    2080             :                         char        cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
    2081             : 
    2082          84 :                         if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
    2083           0 :                             FAIL_AT_CHAR_END(JSON_UNICODE_UNTRANSLATABLE);
    2084          84 :                         appendStringInfoString(lex->strval, cbuf);
    2085             :                     }
    2086             : #else
    2087          56 :                     if (lex->input_encoding == PG_UTF8)
    2088             :                     {
    2089             :                         /* OK, we can map the code point to UTF8 easily */
    2090             :                         char        utf8str[5];
    2091             :                         int         utf8len;
    2092             : 
    2093          56 :                         unicode_to_utf8(ch, (unsigned char *) utf8str);
    2094          56 :                         utf8len = pg_utf_mblen((unsigned char *) utf8str);
    2095          56 :                         jsonapi_appendBinaryStringInfo(lex->strval, utf8str, utf8len);
    2096             :                     }
    2097           0 :                     else if (ch <= 0x007f)
    2098             :                     {
    2099             :                         /* The ASCII range is the same in all encodings */
    2100           0 :                         jsonapi_appendStringInfoChar(lex->strval, (char) ch);
    2101             :                     }
    2102             :                     else
    2103           0 :                         FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE);
    2104             : #endif                          /* FRONTEND */
    2105             :                 }
    2106             :             }
    2107        6180 :             else if (lex->need_escapes)
    2108             :             {
    2109         606 :                 if (hi_surrogate != -1)
    2110           0 :                     FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2111             : 
    2112         606 :                 switch (*s)
    2113             :                 {
    2114         386 :                     case '"':
    2115             :                     case '\\':
    2116             :                     case '/':
    2117         386 :                         jsonapi_appendStringInfoChar(lex->strval, *s);
    2118         386 :                         break;
    2119          44 :                     case 'b':
    2120          44 :                         jsonapi_appendStringInfoChar(lex->strval, '\b');
    2121          44 :                         break;
    2122           8 :                     case 'f':
    2123           8 :                         jsonapi_appendStringInfoChar(lex->strval, '\f');
    2124           8 :                         break;
    2125          62 :                     case 'n':
    2126          62 :                         jsonapi_appendStringInfoChar(lex->strval, '\n');
    2127          62 :                         break;
    2128           8 :                     case 'r':
    2129           8 :                         jsonapi_appendStringInfoChar(lex->strval, '\r');
    2130           8 :                         break;
    2131          92 :                     case 't':
    2132          92 :                         jsonapi_appendStringInfoChar(lex->strval, '\t');
    2133          92 :                         break;
    2134           6 :                     default:
    2135             : 
    2136             :                         /*
    2137             :                          * Not a valid string escape, so signal error.  We
    2138             :                          * adjust token_start so that just the escape sequence
    2139             :                          * is reported, not the whole string.
    2140             :                          */
    2141           6 :                         lex->token_start = s;
    2142           6 :                         FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
    2143             :                 }
    2144             :             }
    2145        5574 :             else if (strchr("\"\\/bfnrt", *s) == NULL)
    2146             :             {
    2147             :                 /*
    2148             :                  * Simpler processing if we're not bothered about de-escaping
    2149             :                  *
    2150             :                  * It's very tempting to remove the strchr() call here and
    2151             :                  * replace it with a switch statement, but testing so far has
    2152             :                  * shown it's not a performance win.
    2153             :                  */
    2154         126 :                 lex->token_start = s;
    2155         126 :                 FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
    2156             :             }
    2157             :         }
    2158             :         else
    2159             :         {
    2160     2677488 :             const char *p = s;
    2161             : 
    2162     2677488 :             if (hi_surrogate != -1)
    2163          12 :                 FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2164             : 
    2165             :             /*
    2166             :              * Skip to the first byte that requires special handling, so we
    2167             :              * can batch calls to jsonapi_appendBinaryStringInfo.
    2168             :              */
    2169     2677476 :             while (p < end - sizeof(Vector8) &&
    2170     3287344 :                    !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
    2171     6751944 :                    !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
    2172      790742 :                    !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
    2173      790742 :                 p += sizeof(Vector8);
    2174             : 
    2175    20724614 :             for (; p < end; p++)
    2176             :             {
    2177    20671142 :                 if (*p == '\\' || *p == '"')
    2178             :                     break;
    2179    18047254 :                 else if ((unsigned char) *p <= 31)
    2180             :                 {
    2181             :                     /* Per RFC4627, these characters MUST be escaped. */
    2182             :                     /*
    2183             :                      * Since *p isn't printable, exclude it from the context
    2184             :                      * string
    2185             :                      */
    2186         116 :                     lex->token_terminator = p;
    2187         116 :                     return JSON_ESCAPING_REQUIRED;
    2188             :                 }
    2189             :             }
    2190             : 
    2191     2677360 :             if (lex->need_escapes)
    2192     2422326 :                 jsonapi_appendBinaryStringInfo(lex->strval, s, p - s);
    2193             : 
    2194             :             /*
    2195             :              * s will be incremented at the top of the loop, so set it to just
    2196             :              * behind our lookahead position
    2197             :              */
    2198     2677360 :             s = p - 1;
    2199             :         }
    2200             :     }
    2201             : 
    2202     2620976 :     if (hi_surrogate != -1)
    2203             :     {
    2204           0 :         lex->token_terminator = s + 1;
    2205           0 :         return JSON_UNICODE_LOW_SURROGATE;
    2206             :     }
    2207             : 
    2208             : #ifdef JSONAPI_USE_PQEXPBUFFER
    2209       68896 :     if (lex->need_escapes && PQExpBufferBroken(lex->strval))
    2210           0 :         return JSON_OUT_OF_MEMORY;
    2211             : #endif
    2212             : 
    2213             :     /* Hooray, we found the end of the string! */
    2214     2620976 :     lex->prev_token_terminator = lex->token_terminator;
    2215     2620976 :     lex->token_terminator = s + 1;
    2216     2620976 :     return JSON_SUCCESS;
    2217             : 
    2218             : #undef FAIL_OR_INCOMPLETE_AT_CHAR_START
    2219             : #undef FAIL_AT_CHAR_END
    2220             : }
    2221             : 
    2222             : /*
    2223             :  * The next token in the input stream is known to be a number; lex it.
    2224             :  *
    2225             :  * In JSON, a number consists of four parts:
    2226             :  *
    2227             :  * (1) An optional minus sign ('-').
    2228             :  *
    2229             :  * (2) Either a single '0', or a string of one or more digits that does not
    2230             :  *     begin with a '0'.
    2231             :  *
    2232             :  * (3) An optional decimal part, consisting of a period ('.') followed by
    2233             :  *     one or more digits.  (Note: While this part can be omitted
    2234             :  *     completely, it's not OK to have only the decimal point without
    2235             :  *     any digits afterwards.)
    2236             :  *
    2237             :  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
    2238             :  *     followed by '+' or '-', followed by one or more digits.  (Note:
    2239             :  *     As with the decimal part, if 'e' or 'E' is present, it must be
    2240             :  *     followed by at least one digit.)
    2241             :  *
    2242             :  * The 's' argument to this function points to the ostensible beginning
    2243             :  * of part 2 - i.e. the character after any optional minus sign, or the
    2244             :  * first character of the string if there is none.
    2245             :  *
    2246             :  * If num_err is not NULL, we return an error flag to *num_err rather than
    2247             :  * raising an error for a badly-formed number.  Also, if total_len is not NULL
    2248             :  * the distance from lex->input to the token end+1 is returned to *total_len.
    2249             :  */
    2250             : static inline JsonParseErrorType
    2251      450076 : json_lex_number(JsonLexContext *lex, const char *s,
    2252             :                 bool *num_err, size_t *total_len)
    2253             : {
    2254      450076 :     bool        error = false;
    2255      450076 :     int         len = s - lex->input;
    2256             : 
    2257             :     /* Part (1): leading sign indicator. */
    2258             :     /* Caller already did this for us; so do nothing. */
    2259             : 
    2260             :     /* Part (2): parse main digit string. */
    2261      450076 :     if (len < lex->input_length && *s == '0')
    2262             :     {
    2263      127872 :         s++;
    2264      127872 :         len++;
    2265             :     }
    2266      322204 :     else if (len < lex->input_length && *s >= '1' && *s <= '9')
    2267             :     {
    2268             :         do
    2269             :         {
    2270     1105494 :             s++;
    2271     1105494 :             len++;
    2272     1105494 :         } while (len < lex->input_length && *s >= '0' && *s <= '9');
    2273             :     }
    2274             :     else
    2275           2 :         error = true;
    2276             : 
    2277             :     /* Part (3): parse optional decimal portion. */
    2278      450076 :     if (len < lex->input_length && *s == '.')
    2279             :     {
    2280       45532 :         s++;
    2281       45532 :         len++;
    2282       45532 :         if (len == lex->input_length || *s < '0' || *s > '9')
    2283          12 :             error = true;
    2284             :         else
    2285             :         {
    2286             :             do
    2287             :             {
    2288      108026 :                 s++;
    2289      108026 :                 len++;
    2290      108026 :             } while (len < lex->input_length && *s >= '0' && *s <= '9');
    2291             :         }
    2292             :     }
    2293             : 
    2294             :     /* Part (4): parse optional exponent. */
    2295      450076 :     if (len < lex->input_length && (*s == 'e' || *s == 'E'))
    2296             :     {
    2297          94 :         s++;
    2298          94 :         len++;
    2299          94 :         if (len < lex->input_length && (*s == '+' || *s == '-'))
    2300             :         {
    2301          10 :             s++;
    2302          10 :             len++;
    2303             :         }
    2304          94 :         if (len == lex->input_length || *s < '0' || *s > '9')
    2305          12 :             error = true;
    2306             :         else
    2307             :         {
    2308             :             do
    2309             :             {
    2310         284 :                 s++;
    2311         284 :                 len++;
    2312         284 :             } while (len < lex->input_length && *s >= '0' && *s <= '9');
    2313             :         }
    2314             :     }
    2315             : 
    2316             :     /*
    2317             :      * Check for trailing garbage.  As in json_lex(), any alphanumeric stuff
    2318             :      * here should be considered part of the token for error-reporting
    2319             :      * purposes.
    2320             :      */
    2321      450424 :     for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
    2322         348 :         error = true;
    2323             : 
    2324      450076 :     if (total_len != NULL)
    2325          46 :         *total_len = len;
    2326             : 
    2327      450076 :     if (lex->incremental && !lex->inc_state->is_last_chunk &&
    2328      125494 :         len >= lex->input_length)
    2329             :     {
    2330         776 :         jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token,
    2331         776 :                                        lex->token_start, s - lex->token_start);
    2332         776 :         if (num_err != NULL)
    2333           0 :             *num_err = error;
    2334             : 
    2335         776 :         return JSON_INCOMPLETE;
    2336             :     }
    2337      449300 :     else if (num_err != NULL)
    2338             :     {
    2339             :         /* let the caller handle any error */
    2340          46 :         *num_err = error;
    2341             :     }
    2342             :     else
    2343             :     {
    2344             :         /* return token endpoint */
    2345      449254 :         lex->prev_token_terminator = lex->token_terminator;
    2346      449254 :         lex->token_terminator = s;
    2347             :         /* handle error if any */
    2348      449254 :         if (error)
    2349         128 :             return JSON_INVALID_TOKEN;
    2350             :     }
    2351             : 
    2352      449172 :     return JSON_SUCCESS;
    2353             : }
    2354             : 
    2355             : /*
    2356             :  * Report a parse error.
    2357             :  *
    2358             :  * lex->token_start and lex->token_terminator must identify the current token.
    2359             :  */
    2360             : static JsonParseErrorType
    2361        1204 : report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
    2362             : {
    2363             :     /* Handle case where the input ended prematurely. */
    2364        1204 :     if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
    2365         266 :         return JSON_EXPECTED_MORE;
    2366             : 
    2367             :     /* Otherwise choose the error type based on the parsing context. */
    2368         938 :     switch (ctx)
    2369             :     {
    2370          48 :         case JSON_PARSE_END:
    2371          48 :             return JSON_EXPECTED_END;
    2372         174 :         case JSON_PARSE_VALUE:
    2373         174 :             return JSON_EXPECTED_JSON;
    2374         164 :         case JSON_PARSE_STRING:
    2375         164 :             return JSON_EXPECTED_STRING;
    2376          56 :         case JSON_PARSE_ARRAY_START:
    2377          56 :             return JSON_EXPECTED_ARRAY_FIRST;
    2378          72 :         case JSON_PARSE_ARRAY_NEXT:
    2379          72 :             return JSON_EXPECTED_ARRAY_NEXT;
    2380         124 :         case JSON_PARSE_OBJECT_START:
    2381         124 :             return JSON_EXPECTED_OBJECT_FIRST;
    2382         128 :         case JSON_PARSE_OBJECT_LABEL:
    2383         128 :             return JSON_EXPECTED_COLON;
    2384         172 :         case JSON_PARSE_OBJECT_NEXT:
    2385         172 :             return JSON_EXPECTED_OBJECT_NEXT;
    2386           0 :         case JSON_PARSE_OBJECT_COMMA:
    2387           0 :             return JSON_EXPECTED_STRING;
    2388             :     }
    2389             : 
    2390             :     /*
    2391             :      * We don't use a default: case, so that the compiler will warn about
    2392             :      * unhandled enum values.
    2393             :      */
    2394             :     Assert(false);
    2395           0 :     return JSON_SUCCESS;        /* silence stupider compilers */
    2396             : }
    2397             : 
    2398             : /*
    2399             :  * Construct an (already translated) detail message for a JSON error.
    2400             :  *
    2401             :  * The returned pointer should not be freed, the allocation is either static
    2402             :  * or owned by the JsonLexContext.
    2403             :  */
    2404             : char *
    2405        2358 : json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
    2406             : {
    2407        2358 :     if (error == JSON_OUT_OF_MEMORY || lex == &failed_oom)
    2408             :     {
    2409             :         /* Short circuit. Allocating anything for this case is unhelpful. */
    2410           0 :         return _("out of memory");
    2411             :     }
    2412             : 
    2413        2358 :     if (lex->errormsg)
    2414           0 :         jsonapi_resetStringInfo(lex->errormsg);
    2415             :     else
    2416        2358 :         lex->errormsg = jsonapi_makeStringInfo();
    2417             : 
    2418             :     /*
    2419             :      * A helper for error messages that should print the current token. The
    2420             :      * format must contain exactly one %.*s specifier.
    2421             :      */
    2422             : #define json_token_error(lex, format) \
    2423             :     jsonapi_appendStringInfo((lex)->errormsg, _(format), \
    2424             :                              (int) ((lex)->token_terminator - (lex)->token_start), \
    2425             :                              (lex)->token_start);
    2426             : 
    2427        2358 :     switch (error)
    2428             :     {
    2429           0 :         case JSON_INCOMPLETE:
    2430             :         case JSON_SUCCESS:
    2431             :             /* fall through to the error code after switch */
    2432           0 :             break;
    2433           0 :         case JSON_INVALID_LEXER_TYPE:
    2434           0 :             if (lex->incremental)
    2435           0 :                 return _("Recursive descent parser cannot use incremental lexer.");
    2436             :             else
    2437           0 :                 return _("Incremental parser requires incremental lexer.");
    2438         512 :         case JSON_NESTING_TOO_DEEP:
    2439         512 :             return (_("JSON nested too deep, maximum permitted depth is 6400."));
    2440         132 :         case JSON_ESCAPING_INVALID:
    2441         132 :             json_token_error(lex, "Escape sequence \"\\%.*s\" is invalid.");
    2442         132 :             break;
    2443         116 :         case JSON_ESCAPING_REQUIRED:
    2444         116 :             jsonapi_appendStringInfo(lex->errormsg,
    2445         116 :                                      _("Character with value 0x%02x must be escaped."),
    2446         116 :                                      (unsigned char) *(lex->token_terminator));
    2447         116 :             break;
    2448          48 :         case JSON_EXPECTED_END:
    2449          48 :             json_token_error(lex, "Expected end of input, but found \"%.*s\".");
    2450          48 :             break;
    2451          56 :         case JSON_EXPECTED_ARRAY_FIRST:
    2452          56 :             json_token_error(lex, "Expected array element or \"]\", but found \"%.*s\".");
    2453          56 :             break;
    2454          72 :         case JSON_EXPECTED_ARRAY_NEXT:
    2455          72 :             json_token_error(lex, "Expected \",\" or \"]\", but found \"%.*s\".");
    2456          72 :             break;
    2457         128 :         case JSON_EXPECTED_COLON:
    2458         128 :             json_token_error(lex, "Expected \":\", but found \"%.*s\".");
    2459         128 :             break;
    2460         120 :         case JSON_EXPECTED_JSON:
    2461         120 :             json_token_error(lex, "Expected JSON value, but found \"%.*s\".");
    2462         120 :             break;
    2463         190 :         case JSON_EXPECTED_MORE:
    2464         190 :             return _("The input string ended unexpectedly.");
    2465         124 :         case JSON_EXPECTED_OBJECT_FIRST:
    2466         124 :             json_token_error(lex, "Expected string or \"}\", but found \"%.*s\".");
    2467         124 :             break;
    2468         172 :         case JSON_EXPECTED_OBJECT_NEXT:
    2469         172 :             json_token_error(lex, "Expected \",\" or \"}\", but found \"%.*s\".");
    2470         172 :             break;
    2471         164 :         case JSON_EXPECTED_STRING:
    2472         164 :             json_token_error(lex, "Expected string, but found \"%.*s\".");
    2473         164 :             break;
    2474         414 :         case JSON_INVALID_TOKEN:
    2475         414 :             json_token_error(lex, "Token \"%.*s\" is invalid.");
    2476         414 :             break;
    2477           0 :         case JSON_OUT_OF_MEMORY:
    2478             :             /* should have been handled above; use the error path */
    2479           0 :             break;
    2480          24 :         case JSON_UNICODE_CODE_POINT_ZERO:
    2481          24 :             return _("\\u0000 cannot be converted to text.");
    2482          38 :         case JSON_UNICODE_ESCAPE_FORMAT:
    2483          38 :             return _("\"\\u\" must be followed by four hexadecimal digits.");
    2484           0 :         case JSON_UNICODE_HIGH_ESCAPE:
    2485             :             /* note: this case is only reachable in frontend not backend */
    2486           0 :             return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
    2487           0 :         case JSON_UNICODE_UNTRANSLATABLE:
    2488             : 
    2489             :             /*
    2490             :              * Note: this case is only reachable in backend and not frontend.
    2491             :              * #ifdef it away so the frontend doesn't try to link against
    2492             :              * backend functionality.
    2493             :              */
    2494             : #ifndef FRONTEND
    2495           0 :             return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."),
    2496             :                             GetDatabaseEncodingName());
    2497             : #else
    2498             :             Assert(false);
    2499           0 :             break;
    2500             : #endif
    2501          12 :         case JSON_UNICODE_HIGH_SURROGATE:
    2502          12 :             return _("Unicode high surrogate must not follow a high surrogate.");
    2503          36 :         case JSON_UNICODE_LOW_SURROGATE:
    2504          36 :             return _("Unicode low surrogate must follow a high surrogate.");
    2505           0 :         case JSON_SEM_ACTION_FAILED:
    2506             :             /* fall through to the error code after switch */
    2507           0 :             break;
    2508             :     }
    2509             : #undef json_token_error
    2510             : 
    2511             :     /* Note that lex->errormsg can be NULL in shlib code. */
    2512        1546 :     if (lex->errormsg && lex->errormsg->len == 0)
    2513             :     {
    2514             :         /*
    2515             :          * We don't use a default: case, so that the compiler will warn about
    2516             :          * unhandled enum values.  But this needs to be here anyway to cover
    2517             :          * the possibility of an incorrect input.
    2518             :          */
    2519           0 :         jsonapi_appendStringInfo(lex->errormsg,
    2520             :                                  "unexpected json parse error type: %d",
    2521             :                                  (int) error);
    2522             :     }
    2523             : 
    2524             : #ifdef JSONAPI_USE_PQEXPBUFFER
    2525         632 :     if (PQExpBufferBroken(lex->errormsg))
    2526           0 :         return _("out of memory while constructing error description");
    2527             : #endif
    2528             : 
    2529        1546 :     return lex->errormsg->data;
    2530             : }

Generated by: LCOV version 1.16