LCOV - code coverage report
Current view: top level - src/common - jsonapi.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 849 963 88.2 %
Date: 2024-12-03 08:15:25 Functions: 31 31 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * jsonapi.c
       4             :  *      JSON parser and lexer interfaces
       5             :  *
       6             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/common/jsonapi.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #ifndef FRONTEND
      15             : #include "postgres.h"
      16             : #else
      17             : #include "postgres_fe.h"
      18             : #endif
      19             : 
      20             : #include "common/jsonapi.h"
      21             : #include "mb/pg_wchar.h"
      22             : #include "port/pg_lfind.h"
      23             : 
      24             : #ifdef JSONAPI_USE_PQEXPBUFFER
      25             : #include "pqexpbuffer.h"
      26             : #else
      27             : #include "lib/stringinfo.h"
      28             : #include "miscadmin.h"
      29             : #endif
      30             : 
      31             : /*
      32             :  * By default, we will use palloc/pfree along with StringInfo.  In libpq,
      33             :  * use malloc and PQExpBuffer, and return JSON_OUT_OF_MEMORY on out-of-memory.
      34             :  */
      35             : #ifdef JSONAPI_USE_PQEXPBUFFER
      36             : 
      37             : #define STRDUP(s) strdup(s)
      38             : #define ALLOC(size) malloc(size)
      39             : #define ALLOC0(size) calloc(1, size)
      40             : #define REALLOC realloc
      41             : #define FREE(s) free(s)
      42             : 
      43             : #define jsonapi_appendStringInfo            appendPQExpBuffer
      44             : #define jsonapi_appendBinaryStringInfo      appendBinaryPQExpBuffer
      45             : #define jsonapi_appendStringInfoChar        appendPQExpBufferChar
      46             : /* XXX should we add a macro version to PQExpBuffer? */
      47             : #define jsonapi_appendStringInfoCharMacro   appendPQExpBufferChar
      48             : #define jsonapi_makeStringInfo              createPQExpBuffer
      49             : #define jsonapi_initStringInfo              initPQExpBuffer
      50             : #define jsonapi_resetStringInfo             resetPQExpBuffer
      51             : #define jsonapi_termStringInfo              termPQExpBuffer
      52             : #define jsonapi_destroyStringInfo           destroyPQExpBuffer
      53             : 
      54             : #else                           /* !JSONAPI_USE_PQEXPBUFFER */
      55             : 
      56             : #define STRDUP(s) pstrdup(s)
      57             : #define ALLOC(size) palloc(size)
      58             : #define ALLOC0(size) palloc0(size)
      59             : #define REALLOC repalloc
      60             : 
      61             : #ifdef FRONTEND
      62             : #define FREE pfree
      63             : #else
      64             : /*
      65             :  * Backend pfree() doesn't handle NULL pointers like the frontend's does; smooth
      66             :  * that over to reduce mental gymnastics. Avoid multiple evaluation of the macro
      67             :  * argument to avoid future hair-pulling.
      68             :  */
      69             : #define FREE(s) do {    \
      70             :     void *__v = (s);    \
      71             :     if (__v)            \
      72             :         pfree(__v);     \
      73             : } while (0)
      74             : #endif
      75             : 
      76             : #define jsonapi_appendStringInfo            appendStringInfo
      77             : #define jsonapi_appendBinaryStringInfo      appendBinaryStringInfo
      78             : #define jsonapi_appendStringInfoChar        appendStringInfoChar
      79             : #define jsonapi_appendStringInfoCharMacro   appendStringInfoCharMacro
      80             : #define jsonapi_makeStringInfo              makeStringInfo
      81             : #define jsonapi_initStringInfo              initStringInfo
      82             : #define jsonapi_resetStringInfo             resetStringInfo
      83             : #define jsonapi_termStringInfo(s)           pfree((s)->data)
      84             : #define jsonapi_destroyStringInfo           destroyStringInfo
      85             : 
      86             : #endif                          /* JSONAPI_USE_PQEXPBUFFER */
      87             : 
      88             : /*
      89             :  * The context of the parser is maintained by the recursive descent
      90             :  * mechanism, but is passed explicitly to the error reporting routine
      91             :  * for better diagnostics.
      92             :  */
      93             : typedef enum                    /* contexts of JSON parser */
      94             : {
      95             :     JSON_PARSE_VALUE,           /* expecting a value */
      96             :     JSON_PARSE_STRING,          /* expecting a string (for a field name) */
      97             :     JSON_PARSE_ARRAY_START,     /* saw '[', expecting value or ']' */
      98             :     JSON_PARSE_ARRAY_NEXT,      /* saw array element, expecting ',' or ']' */
      99             :     JSON_PARSE_OBJECT_START,    /* saw '{', expecting label or '}' */
     100             :     JSON_PARSE_OBJECT_LABEL,    /* saw object label, expecting ':' */
     101             :     JSON_PARSE_OBJECT_NEXT,     /* saw object value, expecting ',' or '}' */
     102             :     JSON_PARSE_OBJECT_COMMA,    /* saw object ',', expecting next label */
     103             :     JSON_PARSE_END,             /* saw the end of a document, expect nothing */
     104             : } JsonParseContext;
     105             : 
     106             : /*
     107             :  * Setup for table-driven parser.
     108             :  * These enums need to be separate from the JsonTokenType and from each other
     109             :  * so we can have all of them on the prediction stack, which consists of
     110             :  * tokens, non-terminals, and semantic action markers.
     111             :  */
     112             : 
     113             : enum JsonNonTerminal
     114             : {
     115             :     JSON_NT_JSON = 32,
     116             :     JSON_NT_ARRAY_ELEMENTS,
     117             :     JSON_NT_MORE_ARRAY_ELEMENTS,
     118             :     JSON_NT_KEY_PAIRS,
     119             :     JSON_NT_MORE_KEY_PAIRS,
     120             : };
     121             : 
     122             : enum JsonParserSem
     123             : {
     124             :     JSON_SEM_OSTART = 64,
     125             :     JSON_SEM_OEND,
     126             :     JSON_SEM_ASTART,
     127             :     JSON_SEM_AEND,
     128             :     JSON_SEM_OFIELD_INIT,
     129             :     JSON_SEM_OFIELD_START,
     130             :     JSON_SEM_OFIELD_END,
     131             :     JSON_SEM_AELEM_START,
     132             :     JSON_SEM_AELEM_END,
     133             :     JSON_SEM_SCALAR_INIT,
     134             :     JSON_SEM_SCALAR_CALL,
     135             : };
     136             : 
     137             : /*
     138             :  * struct containing the 3 stacks used in non-recursive parsing,
     139             :  * and the token and value for scalars that need to be preserved
     140             :  * across calls.
     141             :  *
     142             :  * typedef appears in jsonapi.h
     143             :  */
     144             : struct JsonParserStack
     145             : {
     146             :     int         stack_size;
     147             :     char       *prediction;
     148             :     size_t      pred_index;
     149             :     /* these two are indexed by lex_level */
     150             :     char      **fnames;
     151             :     bool       *fnull;
     152             :     JsonTokenType scalar_tok;
     153             :     char       *scalar_val;
     154             : };
     155             : 
     156             : /*
     157             :  * struct containing state used when there is a possible partial token at the
     158             :  * end of a json chunk when we are doing incremental parsing.
     159             :  *
     160             :  * typedef appears in jsonapi.h
     161             :  */
     162             : struct JsonIncrementalState
     163             : {
     164             :     bool        started;
     165             :     bool        is_last_chunk;
     166             :     bool        partial_completed;
     167             :     jsonapi_StrValType partial_token;
     168             : };
     169             : 
     170             : /*
     171             :  * constants and macros used in the nonrecursive parser
     172             :  */
     173             : #define JSON_NUM_TERMINALS 13
     174             : #define JSON_NUM_NONTERMINALS 5
     175             : #define JSON_NT_OFFSET JSON_NT_JSON
     176             : /* for indexing the table */
     177             : #define OFS(NT) (NT) - JSON_NT_OFFSET
     178             : /* classify items we get off the stack */
     179             : #define IS_SEM(x) ((x) & 0x40)
     180             : #define IS_NT(x)  ((x) & 0x20)
     181             : 
     182             : /*
     183             :  * These productions are stored in reverse order right to left so that when
     184             :  * they are pushed on the stack what we expect next is at the top of the stack.
     185             :  */
     186             : static char JSON_PROD_EPSILON[] = {0};  /* epsilon - an empty production */
     187             : 
     188             : /* JSON -> string */
     189             : static char JSON_PROD_SCALAR_STRING[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_STRING, JSON_SEM_SCALAR_INIT, 0};
     190             : 
     191             : /* JSON -> number */
     192             : static char JSON_PROD_SCALAR_NUMBER[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NUMBER, JSON_SEM_SCALAR_INIT, 0};
     193             : 
     194             : /* JSON -> 'true' */
     195             : static char JSON_PROD_SCALAR_TRUE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_TRUE, JSON_SEM_SCALAR_INIT, 0};
     196             : 
     197             : /* JSON -> 'false' */
     198             : static char JSON_PROD_SCALAR_FALSE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_FALSE, JSON_SEM_SCALAR_INIT, 0};
     199             : 
     200             : /* JSON -> 'null' */
     201             : static char JSON_PROD_SCALAR_NULL[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NULL, JSON_SEM_SCALAR_INIT, 0};
     202             : 
     203             : /* JSON -> '{' KEY_PAIRS '}' */
     204             : static char JSON_PROD_OBJECT[] = {JSON_SEM_OEND, JSON_TOKEN_OBJECT_END, JSON_NT_KEY_PAIRS, JSON_TOKEN_OBJECT_START, JSON_SEM_OSTART, 0};
     205             : 
     206             : /* JSON -> '[' ARRAY_ELEMENTS ']' */
     207             : static char JSON_PROD_ARRAY[] = {JSON_SEM_AEND, JSON_TOKEN_ARRAY_END, JSON_NT_ARRAY_ELEMENTS, JSON_TOKEN_ARRAY_START, JSON_SEM_ASTART, 0};
     208             : 
     209             : /* ARRAY_ELEMENTS -> JSON MORE_ARRAY_ELEMENTS */
     210             : static char JSON_PROD_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, 0};
     211             : 
     212             : /* MORE_ARRAY_ELEMENTS -> ',' JSON MORE_ARRAY_ELEMENTS */
     213             : static char JSON_PROD_MORE_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, JSON_TOKEN_COMMA, 0};
     214             : 
     215             : /* KEY_PAIRS -> string ':' JSON MORE_KEY_PAIRS */
     216             : static char JSON_PROD_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, 0};
     217             : 
     218             : /* MORE_KEY_PAIRS -> ',' string ':'  JSON MORE_KEY_PAIRS */
     219             : static char JSON_PROD_MORE_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, JSON_TOKEN_COMMA, 0};
     220             : 
     221             : /*
     222             :  * Note: there are also epsilon productions for ARRAY_ELEMENTS,
     223             :  * MORE_ARRAY_ELEMENTS, KEY_PAIRS and MORE_KEY_PAIRS
     224             :  * They are all the same as none require any semantic actions.
     225             :  */
     226             : 
     227             : /*
     228             :  * Table connecting the productions with their director sets of
     229             :  * terminal symbols.
     230             :  * Any combination not specified here represents an error.
     231             :  */
     232             : 
     233             : typedef struct
     234             : {
     235             :     size_t      len;
     236             :     char       *prod;
     237             : } td_entry;
     238             : 
     239             : #define TD_ENTRY(PROD) { sizeof(PROD) - 1, (PROD) }
     240             : 
     241             : static td_entry td_parser_table[JSON_NUM_NONTERMINALS][JSON_NUM_TERMINALS] =
     242             : {
     243             :     /* JSON */
     244             :     [OFS(JSON_NT_JSON)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_SCALAR_STRING),
     245             :     [OFS(JSON_NT_JSON)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_SCALAR_NUMBER),
     246             :     [OFS(JSON_NT_JSON)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_SCALAR_TRUE),
     247             :     [OFS(JSON_NT_JSON)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_SCALAR_FALSE),
     248             :     [OFS(JSON_NT_JSON)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_SCALAR_NULL),
     249             :     [OFS(JSON_NT_JSON)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY),
     250             :     [OFS(JSON_NT_JSON)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_OBJECT),
     251             :     /* ARRAY_ELEMENTS */
     252             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     253             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     254             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     255             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     256             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     257             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     258             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     259             :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
     260             :     /* MORE_ARRAY_ELEMENTS */
     261             :     [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_ARRAY_ELEMENTS),
     262             :     [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
     263             :     /* KEY_PAIRS */
     264             :     [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_KEY_PAIRS),
     265             :     [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
     266             :     /* MORE_KEY_PAIRS */
     267             :     [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_KEY_PAIRS),
     268             :     [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
     269             : };
     270             : 
     271             : /* the GOAL production. Not stored in the table, but will be the initial contents of the prediction stack */
     272             : static char JSON_PROD_GOAL[] = {JSON_TOKEN_END, JSON_NT_JSON, 0};
     273             : 
     274             : static inline JsonParseErrorType json_lex_string(JsonLexContext *lex);
     275             : static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, const char *s,
     276             :                                                  bool *num_err, size_t *total_len);
     277             : static inline JsonParseErrorType parse_scalar(JsonLexContext *lex, const JsonSemAction *sem);
     278             : static JsonParseErrorType parse_object_field(JsonLexContext *lex, const JsonSemAction *sem);
     279             : static JsonParseErrorType parse_object(JsonLexContext *lex, const JsonSemAction *sem);
     280             : static JsonParseErrorType parse_array_element(JsonLexContext *lex, const JsonSemAction *sem);
     281             : static JsonParseErrorType parse_array(JsonLexContext *lex, const JsonSemAction *sem);
     282             : static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
     283             : static bool allocate_incremental_state(JsonLexContext *lex);
     284             : static inline void set_fname(JsonLexContext *lex, char *fname);
     285             : 
     286             : /* the null action object used for pure validation */
     287             : const JsonSemAction nullSemAction =
     288             : {
     289             :     NULL, NULL, NULL, NULL, NULL,
     290             :     NULL, NULL, NULL, NULL, NULL
     291             : };
     292             : 
     293             : /* sentinels used for out-of-memory conditions */
     294             : static JsonLexContext failed_oom;
     295             : static JsonIncrementalState failed_inc_oom;
     296             : 
     297             : /* Parser support routines */
     298             : 
     299             : /*
     300             :  * lex_peek
     301             :  *
     302             :  * what is the current look_ahead token?
     303             : */
     304             : static inline JsonTokenType
     305    13579536 : lex_peek(JsonLexContext *lex)
     306             : {
     307    13579536 :     return lex->token_type;
     308             : }
     309             : 
     310             : /*
     311             :  * lex_expect
     312             :  *
     313             :  * move the lexer to the next token if the current look_ahead token matches
     314             :  * the parameter token. Otherwise, report an error.
     315             :  */
     316             : static inline JsonParseErrorType
     317      375092 : lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
     318             : {
     319      375092 :     if (lex_peek(lex) == token)
     320      374984 :         return json_lex(lex);
     321             :     else
     322         108 :         return report_parse_error(ctx, lex);
     323             : }
     324             : 
     325             : /* chars to consider as part of an alphanumeric token */
     326             : #define JSON_ALPHANUMERIC_CHAR(c)  \
     327             :     (((c) >= 'a' && (c) <= 'z') || \
     328             :      ((c) >= 'A' && (c) <= 'Z') || \
     329             :      ((c) >= '0' && (c) <= '9') || \
     330             :      (c) == '_' || \
     331             :      IS_HIGHBIT_SET(c))
     332             : 
     333             : /*
     334             :  * Utility function to check if a string is a valid JSON number.
     335             :  *
     336             :  * str is of length len, and need not be null-terminated.
     337             :  */
     338             : bool
     339          46 : IsValidJsonNumber(const char *str, size_t len)
     340             : {
     341             :     bool        numeric_error;
     342             :     size_t      total_len;
     343          46 :     JsonLexContext dummy_lex = {0};
     344             : 
     345          46 :     if (len <= 0)
     346           0 :         return false;
     347             : 
     348             :     /*
     349             :      * json_lex_number expects a leading  '-' to have been eaten already.
     350             :      *
     351             :      * having to cast away the constness of str is ugly, but there's not much
     352             :      * easy alternative.
     353             :      */
     354          46 :     if (*str == '-')
     355             :     {
     356           4 :         dummy_lex.input = str + 1;
     357           4 :         dummy_lex.input_length = len - 1;
     358             :     }
     359             :     else
     360             :     {
     361          42 :         dummy_lex.input = str;
     362          42 :         dummy_lex.input_length = len;
     363             :     }
     364             : 
     365          46 :     dummy_lex.token_start = dummy_lex.input;
     366             : 
     367          46 :     json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
     368             : 
     369          46 :     return (!numeric_error) && (total_len == dummy_lex.input_length);
     370             : }
     371             : 
     372             : /*
     373             :  * makeJsonLexContextCstringLen
     374             :  *      Initialize the given JsonLexContext object, or create one
     375             :  *
     376             :  * If a valid 'lex' pointer is given, it is initialized.  This can
     377             :  * be used for stack-allocated structs, saving overhead.  If NULL is
     378             :  * given, a new struct is allocated.
     379             :  *
     380             :  * If need_escapes is true, ->strval stores the unescaped lexemes.
     381             :  * Unescaping is expensive, so only request it when necessary.
     382             :  *
     383             :  * If need_escapes is true or lex was given as NULL, then caller is
     384             :  * responsible for freeing the returned struct, either by calling
     385             :  * freeJsonLexContext() or (in backend environment) via memory context
     386             :  * cleanup.
     387             :  *
     388             :  * In shlib code, any out-of-memory failures will be deferred to time
     389             :  * of use; this function is guaranteed to return a valid JsonLexContext.
     390             :  */
     391             : JsonLexContext *
     392       38012 : makeJsonLexContextCstringLen(JsonLexContext *lex, const char *json,
     393             :                              size_t len, int encoding, bool need_escapes)
     394             : {
     395       38012 :     if (lex == NULL)
     396             :     {
     397        5528 :         lex = ALLOC0(sizeof(JsonLexContext));
     398        5528 :         if (!lex)
     399           0 :             return &failed_oom;
     400        5528 :         lex->flags |= JSONLEX_FREE_STRUCT;
     401             :     }
     402             :     else
     403       32484 :         memset(lex, 0, sizeof(JsonLexContext));
     404             : 
     405       38012 :     lex->errormsg = NULL;
     406       38012 :     lex->input = lex->token_terminator = lex->line_start = json;
     407       38012 :     lex->line_number = 1;
     408       38012 :     lex->input_length = len;
     409       38012 :     lex->input_encoding = encoding;
     410       38012 :     lex->need_escapes = need_escapes;
     411       38012 :     if (need_escapes)
     412             :     {
     413             :         /*
     414             :          * This call can fail in shlib code. We defer error handling to time
     415             :          * of use (json_lex_string()) since we might not need to parse any
     416             :          * strings anyway.
     417             :          */
     418       29474 :         lex->strval = jsonapi_makeStringInfo();
     419       29474 :         lex->flags |= JSONLEX_FREE_STRVAL;
     420             :     }
     421             : 
     422       38012 :     return lex;
     423             : }
     424             : 
     425             : /*
     426             :  * Allocates the internal bookkeeping structures for incremental parsing. This
     427             :  * can only fail in-band with shlib code.
     428             :  */
     429             : #define JS_STACK_CHUNK_SIZE 64
     430             : #define JS_MAX_PROD_LEN 10      /* more than we need */
     431             : #define JSON_TD_MAX_STACK 6400  /* hard coded for now - this is a REALLY high
     432             :                                  * number */
     433             : static bool
     434        4148 : allocate_incremental_state(JsonLexContext *lex)
     435             : {
     436             :     void       *pstack,
     437             :                *prediction,
     438             :                *fnames,
     439             :                *fnull;
     440             : 
     441        4148 :     lex->inc_state = ALLOC0(sizeof(JsonIncrementalState));
     442        4148 :     pstack = ALLOC0(sizeof(JsonParserStack));
     443        4148 :     prediction = ALLOC(JS_STACK_CHUNK_SIZE * JS_MAX_PROD_LEN);
     444        4148 :     fnames = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(char *));
     445        4148 :     fnull = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(bool));
     446             : 
     447             : #ifdef JSONAPI_USE_PQEXPBUFFER
     448        1960 :     if (!lex->inc_state
     449        1960 :         || !pstack
     450        1960 :         || !prediction
     451        1960 :         || !fnames
     452        1960 :         || !fnull)
     453             :     {
     454           0 :         FREE(lex->inc_state);
     455           0 :         FREE(pstack);
     456           0 :         FREE(prediction);
     457           0 :         FREE(fnames);
     458           0 :         FREE(fnull);
     459             : 
     460           0 :         lex->inc_state = &failed_inc_oom;
     461           0 :         return false;
     462             :     }
     463             : #endif
     464             : 
     465        4148 :     jsonapi_initStringInfo(&(lex->inc_state->partial_token));
     466        4148 :     lex->pstack = pstack;
     467        4148 :     lex->pstack->stack_size = JS_STACK_CHUNK_SIZE;
     468        4148 :     lex->pstack->prediction = prediction;
     469        4148 :     lex->pstack->fnames = fnames;
     470        4148 :     lex->pstack->fnull = fnull;
     471             : 
     472             :     /*
     473             :      * fnames between 0 and lex_level must always be defined so that
     474             :      * freeJsonLexContext() can handle them safely. inc/dec_lex_level() handle
     475             :      * the rest.
     476             :      */
     477             :     Assert(lex->lex_level == 0);
     478        4148 :     lex->pstack->fnames[0] = NULL;
     479             : 
     480        4148 :     lex->incremental = true;
     481        4148 :     return true;
     482             : }
     483             : 
     484             : 
     485             : /*
     486             :  * makeJsonLexContextIncremental
     487             :  *
     488             :  * Similar to above but set up for use in incremental parsing. That means we
     489             :  * need explicit stacks for predictions, field names and null indicators, but
     490             :  * we don't need the input, that will be handed in bit by bit to the
     491             :  * parse routine. We also need an accumulator for partial tokens in case
     492             :  * the boundary between chunks happens to fall in the middle of a token.
     493             :  *
     494             :  * In shlib code, any out-of-memory failures will be deferred to time of use;
     495             :  * this function is guaranteed to return a valid JsonLexContext.
     496             :  */
     497             : JsonLexContext *
     498        4148 : makeJsonLexContextIncremental(JsonLexContext *lex, int encoding,
     499             :                               bool need_escapes)
     500             : {
     501        4148 :     if (lex == NULL)
     502             :     {
     503           2 :         lex = ALLOC0(sizeof(JsonLexContext));
     504           2 :         if (!lex)
     505           0 :             return &failed_oom;
     506             : 
     507           2 :         lex->flags |= JSONLEX_FREE_STRUCT;
     508             :     }
     509             :     else
     510        4146 :         memset(lex, 0, sizeof(JsonLexContext));
     511             : 
     512        4148 :     lex->line_number = 1;
     513        4148 :     lex->input_encoding = encoding;
     514             : 
     515        4148 :     if (!allocate_incremental_state(lex))
     516             :     {
     517           0 :         if (lex->flags & JSONLEX_FREE_STRUCT)
     518             :         {
     519           0 :             FREE(lex);
     520           0 :             return &failed_oom;
     521             :         }
     522             : 
     523             :         /* lex->inc_state tracks the OOM failure; we can return here. */
     524           0 :         return lex;
     525             :     }
     526             : 
     527        4148 :     lex->need_escapes = need_escapes;
     528        4148 :     if (need_escapes)
     529             :     {
     530             :         /*
     531             :          * This call can fail in shlib code. We defer error handling to time
     532             :          * of use (json_lex_string()) since we might not need to parse any
     533             :          * strings anyway.
     534             :          */
     535         234 :         lex->strval = jsonapi_makeStringInfo();
     536         234 :         lex->flags |= JSONLEX_FREE_STRVAL;
     537             :     }
     538             : 
     539        4148 :     return lex;
     540             : }
     541             : 
     542             : void
     543        3920 : setJsonLexContextOwnsTokens(JsonLexContext *lex, bool owned_by_context)
     544             : {
     545        3920 :     if (lex->incremental && lex->inc_state->started)
     546             :     {
     547             :         /*
     548             :          * Switching this flag after parsing has already started is a
     549             :          * programming error.
     550             :          */
     551             :         Assert(false);
     552           0 :         return;
     553             :     }
     554             : 
     555        3920 :     if (owned_by_context)
     556        1960 :         lex->flags |= JSONLEX_CTX_OWNS_TOKENS;
     557             :     else
     558        1960 :         lex->flags &= ~JSONLEX_CTX_OWNS_TOKENS;
     559             : }
     560             : 
     561             : static inline bool
     562     5165486 : inc_lex_level(JsonLexContext *lex)
     563             : {
     564     5165486 :     if (lex->incremental && (lex->lex_level + 1) >= lex->pstack->stack_size)
     565             :     {
     566             :         size_t      new_stack_size;
     567             :         char       *new_prediction;
     568             :         char      **new_fnames;
     569             :         bool       *new_fnull;
     570             : 
     571       76800 :         new_stack_size = lex->pstack->stack_size + JS_STACK_CHUNK_SIZE;
     572             : 
     573       76800 :         new_prediction = REALLOC(lex->pstack->prediction,
     574             :                                  new_stack_size * JS_MAX_PROD_LEN);
     575             : #ifdef JSONAPI_USE_PQEXPBUFFER
     576       38400 :         if (!new_prediction)
     577           0 :             return false;
     578             : #endif
     579       76800 :         lex->pstack->prediction = new_prediction;
     580             : 
     581       76800 :         new_fnames = REALLOC(lex->pstack->fnames,
     582             :                              new_stack_size * sizeof(char *));
     583             : #ifdef JSONAPI_USE_PQEXPBUFFER
     584       38400 :         if (!new_fnames)
     585           0 :             return false;
     586             : #endif
     587       76800 :         lex->pstack->fnames = new_fnames;
     588             : 
     589       76800 :         new_fnull = REALLOC(lex->pstack->fnull, new_stack_size * sizeof(bool));
     590             : #ifdef JSONAPI_USE_PQEXPBUFFER
     591       38400 :         if (!new_fnull)
     592           0 :             return false;
     593             : #endif
     594       76800 :         lex->pstack->fnull = new_fnull;
     595             : 
     596       76800 :         lex->pstack->stack_size = new_stack_size;
     597             :     }
     598             : 
     599     5165486 :     lex->lex_level += 1;
     600             : 
     601     5165486 :     if (lex->incremental)
     602             :     {
     603             :         /*
     604             :          * Ensure freeJsonLexContext() remains safe even if no fname is
     605             :          * assigned at this level.
     606             :          */
     607     5165486 :         lex->pstack->fnames[lex->lex_level] = NULL;
     608             :     }
     609             : 
     610     5165486 :     return true;
     611             : }
     612             : 
     613             : static inline void
     614     1887684 : dec_lex_level(JsonLexContext *lex)
     615             : {
     616     1887684 :     set_fname(lex, NULL);       /* free the current level's fname, if needed */
     617     1887684 :     lex->lex_level -= 1;
     618     1887684 : }
     619             : 
     620             : static inline void
     621    14580722 : push_prediction(JsonParserStack *pstack, td_entry entry)
     622             : {
     623    14580722 :     memcpy(pstack->prediction + pstack->pred_index, entry.prod, entry.len);
     624    14580722 :     pstack->pred_index += entry.len;
     625    14580722 : }
     626             : 
     627             : static inline char
     628    46388500 : pop_prediction(JsonParserStack *pstack)
     629             : {
     630             :     Assert(pstack->pred_index > 0);
     631    46388500 :     return pstack->prediction[--pstack->pred_index];
     632             : }
     633             : 
     634             : static inline char
     635         152 : next_prediction(JsonParserStack *pstack)
     636             : {
     637             :     Assert(pstack->pred_index > 0);
     638         152 :     return pstack->prediction[pstack->pred_index - 1];
     639             : }
     640             : 
     641             : static inline bool
     642    46993048 : have_prediction(JsonParserStack *pstack)
     643             : {
     644    46993048 :     return pstack->pred_index > 0;
     645             : }
     646             : 
     647             : static inline void
     648     3078216 : set_fname(JsonLexContext *lex, char *fname)
     649             : {
     650     3078216 :     if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
     651             :     {
     652             :         /*
     653             :          * Don't leak prior fnames. If one hasn't been assigned yet,
     654             :          * inc_lex_level ensured that it's NULL (and therefore safe to free).
     655             :          */
     656      873724 :         FREE(lex->pstack->fnames[lex->lex_level]);
     657             :     }
     658             : 
     659     3078216 :     lex->pstack->fnames[lex->lex_level] = fname;
     660     3078216 : }
     661             : 
     662             : static inline char *
     663     1111020 : get_fname(JsonLexContext *lex)
     664             : {
     665     1111020 :     return lex->pstack->fnames[lex->lex_level];
     666             : }
     667             : 
     668             : static inline void
     669     6342648 : set_fnull(JsonLexContext *lex, bool fnull)
     670             : {
     671     6342648 :     lex->pstack->fnull[lex->lex_level] = fnull;
     672     6342648 : }
     673             : 
     674             : static inline bool
     675        1488 : get_fnull(JsonLexContext *lex)
     676             : {
     677        1488 :     return lex->pstack->fnull[lex->lex_level];
     678             : }
     679             : 
     680             : /*
     681             :  * Free memory in a JsonLexContext.
     682             :  *
     683             :  * There's no need for this if a *lex pointer was given when the object was
     684             :  * made, need_escapes was false, and json_errdetail() was not called; or if (in
     685             :  * backend environment) a memory context delete/reset is imminent.
     686             :  */
     687             : void
     688       10420 : freeJsonLexContext(JsonLexContext *lex)
     689             : {
     690             :     static const JsonLexContext empty = {0};
     691             : 
     692       10420 :     if (!lex || lex == &failed_oom)
     693           0 :         return;
     694             : 
     695       10420 :     if (lex->flags & JSONLEX_FREE_STRVAL)
     696        6120 :         jsonapi_destroyStringInfo(lex->strval);
     697             : 
     698       10420 :     if (lex->errormsg)
     699        1904 :         jsonapi_destroyStringInfo(lex->errormsg);
     700             : 
     701       10420 :     if (lex->incremental)
     702             :     {
     703        4142 :         jsonapi_termStringInfo(&lex->inc_state->partial_token);
     704        4142 :         FREE(lex->inc_state);
     705        4142 :         FREE(lex->pstack->prediction);
     706             : 
     707        4142 :         if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
     708             :         {
     709             :             int         i;
     710             : 
     711             :             /* Clean up any tokens that were left behind. */
     712     1642820 :             for (i = 0; i <= lex->lex_level; i++)
     713     1640860 :                 FREE(lex->pstack->fnames[i]);
     714             :         }
     715             : 
     716        4142 :         FREE(lex->pstack->fnames);
     717        4142 :         FREE(lex->pstack->fnull);
     718        4142 :         FREE(lex->pstack->scalar_val);
     719        4142 :         FREE(lex->pstack);
     720             :     }
     721             : 
     722       10420 :     if (lex->flags & JSONLEX_FREE_STRUCT)
     723        5308 :         FREE(lex);
     724             :     else
     725        5112 :         *lex = empty;
     726             : }
     727             : 
     728             : /*
     729             :  * pg_parse_json
     730             :  *
     731             :  * Publicly visible entry point for the JSON parser.
     732             :  *
     733             :  * lex is a lexing context, set up for the json to be processed by calling
     734             :  * makeJsonLexContext(). sem is a structure of function pointers to semantic
     735             :  * action routines to be called at appropriate spots during parsing, and a
     736             :  * pointer to a state object to be passed to those routines.
     737             :  *
     738             :  * If FORCE_JSON_PSTACK is defined then the routine will call the non-recursive
     739             :  * JSON parser. This is a useful way to validate that it's doing the right
     740             :  * thing at least for non-incremental cases. If this is on we expect to see
     741             :  * regression diffs relating to error messages about stack depth, but no
     742             :  * other differences.
     743             :  */
     744             : JsonParseErrorType
     745       37274 : pg_parse_json(JsonLexContext *lex, const JsonSemAction *sem)
     746             : {
     747             : #ifdef FORCE_JSON_PSTACK
     748             :     /*
     749             :      * We don't need partial token processing, there is only one chunk. But we
     750             :      * still need to init the partial token string so that freeJsonLexContext
     751             :      * works, so perform the full incremental initialization.
     752             :      */
     753             :     if (!allocate_incremental_state(lex))
     754             :         return JSON_OUT_OF_MEMORY;
     755             : 
     756             :     return pg_parse_json_incremental(lex, sem, lex->input, lex->input_length, true);
     757             : 
     758             : #else
     759             : 
     760             :     JsonTokenType tok;
     761             :     JsonParseErrorType result;
     762             : 
     763       37274 :     if (lex == &failed_oom)
     764           0 :         return JSON_OUT_OF_MEMORY;
     765       37274 :     if (lex->incremental)
     766           0 :         return JSON_INVALID_LEXER_TYPE;
     767             : 
     768             :     /* get the initial token */
     769       37274 :     result = json_lex(lex);
     770       37274 :     if (result != JSON_SUCCESS)
     771         246 :         return result;
     772             : 
     773       37028 :     tok = lex_peek(lex);
     774             : 
     775             :     /* parse by recursive descent */
     776       37028 :     switch (tok)
     777             :     {
     778       20344 :         case JSON_TOKEN_OBJECT_START:
     779       20344 :             result = parse_object(lex, sem);
     780       20276 :             break;
     781        6856 :         case JSON_TOKEN_ARRAY_START:
     782        6856 :             result = parse_array(lex, sem);
     783        6752 :             break;
     784        9828 :         default:
     785        9828 :             result = parse_scalar(lex, sem);    /* json can be a bare scalar */
     786             :     }
     787             : 
     788       36784 :     if (result == JSON_SUCCESS)
     789       36342 :         result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
     790             : 
     791       36784 :     return result;
     792             : #endif
     793             : }
     794             : 
     795             : /*
     796             :  * json_count_array_elements
     797             :  *
     798             :  * Returns number of array elements in lex context at start of array token
     799             :  * until end of array token at same nesting level.
     800             :  *
     801             :  * Designed to be called from array_start routines.
     802             :  */
     803             : JsonParseErrorType
     804           6 : json_count_array_elements(JsonLexContext *lex, int *elements)
     805             : {
     806             :     JsonLexContext copylex;
     807             :     int         count;
     808             :     JsonParseErrorType result;
     809             : 
     810           6 :     if (lex == &failed_oom)
     811           0 :         return JSON_OUT_OF_MEMORY;
     812             : 
     813             :     /*
     814             :      * It's safe to do this with a shallow copy because the lexical routines
     815             :      * don't scribble on the input. They do scribble on the other pointers
     816             :      * etc, so doing this with a copy makes that safe.
     817             :      */
     818           6 :     memcpy(&copylex, lex, sizeof(JsonLexContext));
     819           6 :     copylex.need_escapes = false;   /* not interested in values here */
     820           6 :     copylex.lex_level++;
     821             : 
     822           6 :     count = 0;
     823           6 :     result = lex_expect(JSON_PARSE_ARRAY_START, &copylex,
     824             :                         JSON_TOKEN_ARRAY_START);
     825           6 :     if (result != JSON_SUCCESS)
     826           0 :         return result;
     827           6 :     if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
     828             :     {
     829             :         while (1)
     830             :         {
     831          48 :             count++;
     832          48 :             result = parse_array_element(&copylex, &nullSemAction);
     833          48 :             if (result != JSON_SUCCESS)
     834           0 :                 return result;
     835          48 :             if (copylex.token_type != JSON_TOKEN_COMMA)
     836           6 :                 break;
     837          42 :             result = json_lex(&copylex);
     838          42 :             if (result != JSON_SUCCESS)
     839           0 :                 return result;
     840             :         }
     841             :     }
     842           6 :     result = lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex,
     843             :                         JSON_TOKEN_ARRAY_END);
     844           6 :     if (result != JSON_SUCCESS)
     845           0 :         return result;
     846             : 
     847           6 :     *elements = count;
     848           6 :     return JSON_SUCCESS;
     849             : }
     850             : 
     851             : /*
     852             :  * pg_parse_json_incremental
     853             :  *
     854             :  * Routine for incremental parsing of json. This uses the non-recursive top
     855             :  * down method of the Dragon Book Algorithm 4.3. It's somewhat slower than
     856             :  * the Recursive Descent pattern used above, so we only use it for incremental
     857             :  * parsing of JSON.
     858             :  *
     859             :  * The lexing context needs to be set up by a call to
     860             :  * makeJsonLexContextIncremental(). sem is a structure of function pointers
     861             :  * to semantic action routines, which should function exactly as those used
     862             :  * in the recursive descent parser.
     863             :  *
     864             :  * This routine can be called repeatedly with chunks of JSON. On the final
     865             :  * chunk is_last must be set to true. len is the length of the json chunk,
     866             :  * which does not need to be null terminated.
     867             :  */
     868             : JsonParseErrorType
     869      745868 : pg_parse_json_incremental(JsonLexContext *lex,
     870             :                           const JsonSemAction *sem,
     871             :                           const char *json,
     872             :                           size_t len,
     873             :                           bool is_last)
     874             : {
     875             :     JsonTokenType tok;
     876             :     JsonParseErrorType result;
     877      745868 :     JsonParseContext ctx = JSON_PARSE_VALUE;
     878      745868 :     JsonParserStack *pstack = lex->pstack;
     879             : 
     880      745868 :     if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
     881           0 :         return JSON_OUT_OF_MEMORY;
     882      745868 :     if (!lex->incremental)
     883           0 :         return JSON_INVALID_LEXER_TYPE;
     884             : 
     885      745868 :     lex->input = lex->token_terminator = lex->line_start = json;
     886      745868 :     lex->input_length = len;
     887      745868 :     lex->inc_state->is_last_chunk = is_last;
     888      745868 :     lex->inc_state->started = true;
     889             : 
     890             :     /* get the initial token */
     891      745868 :     result = json_lex(lex);
     892      745868 :     if (result != JSON_SUCCESS)
     893      143560 :         return result;
     894             : 
     895      602308 :     tok = lex_peek(lex);
     896             : 
     897             :     /* use prediction stack for incremental parsing */
     898             : 
     899      602308 :     if (!have_prediction(pstack))
     900             :     {
     901        3780 :         td_entry    goal = TD_ENTRY(JSON_PROD_GOAL);
     902             : 
     903        3780 :         push_prediction(pstack, goal);
     904             :     }
     905             : 
     906    46390740 :     while (have_prediction(pstack))
     907             :     {
     908    46388500 :         char        top = pop_prediction(pstack);
     909             :         td_entry    entry;
     910             : 
     911             :         /*
     912             :          * these first two branches are the guts of the Table Driven method
     913             :          */
     914    46388500 :         if (top == tok)
     915             :         {
     916             :             /*
     917             :              * tok can only be a terminal symbol, so top must be too. the
     918             :              * token matches the top of the stack, so get the next token.
     919             :              */
     920    11797328 :             if (tok < JSON_TOKEN_END)
     921             :             {
     922    11795088 :                 result = json_lex(lex);
     923    11795088 :                 if (result != JSON_SUCCESS)
     924      600066 :                     return result;
     925    11196416 :                 tok = lex_peek(lex);
     926             :             }
     927             :         }
     928    34591172 :         else if (IS_NT(top) && (entry = td_parser_table[OFS(top)][tok]).prod != NULL)
     929             :         {
     930             :             /*
     931             :              * the token is in the director set for a production of the
     932             :              * non-terminal at the top of the stack, so push the reversed RHS
     933             :              * of the production onto the stack.
     934             :              */
     935    14576942 :             push_prediction(pstack, entry);
     936             :         }
     937    20014230 :         else if (IS_SEM(top))
     938             :         {
     939             :             /*
     940             :              * top is a semantic action marker, so take action accordingly.
     941             :              * It's important to have these markers in the prediction stack
     942             :              * before any token they might need so we don't advance the token
     943             :              * prematurely. Note in a couple of cases we need to do something
     944             :              * both before and after the token.
     945             :              */
     946    20013348 :             switch (top)
     947             :             {
     948      243470 :                 case JSON_SEM_OSTART:
     949             :                     {
     950      243470 :                         json_struct_action ostart = sem->object_start;
     951             : 
     952      243470 :                         if (lex->lex_level >= JSON_TD_MAX_STACK)
     953           0 :                             return JSON_NESTING_TOO_DEEP;
     954             : 
     955      243470 :                         if (ostart != NULL)
     956             :                         {
     957      222118 :                             result = (*ostart) (sem->semstate);
     958      222118 :                             if (result != JSON_SUCCESS)
     959           0 :                                 return result;
     960             :                         }
     961             : 
     962      243470 :                         if (!inc_lex_level(lex))
     963           0 :                             return JSON_OUT_OF_MEMORY;
     964             :                     }
     965      243470 :                     break;
     966      242740 :                 case JSON_SEM_OEND:
     967             :                     {
     968      242740 :                         json_struct_action oend = sem->object_end;
     969             : 
     970      242740 :                         dec_lex_level(lex);
     971      242740 :                         if (oend != NULL)
     972             :                         {
     973      222116 :                             result = (*oend) (sem->semstate);
     974      222116 :                             if (result != JSON_SUCCESS)
     975           0 :                                 return result;
     976             :                         }
     977             :                     }
     978      242740 :                     break;
     979     4922528 :                 case JSON_SEM_ASTART:
     980             :                     {
     981     4922528 :                         json_struct_action astart = sem->array_start;
     982             : 
     983     4922528 :                         if (lex->lex_level >= JSON_TD_MAX_STACK)
     984         512 :                             return JSON_NESTING_TOO_DEEP;
     985             : 
     986     4922016 :                         if (astart != NULL)
     987             :                         {
     988         536 :                             result = (*astart) (sem->semstate);
     989         536 :                             if (result != JSON_SUCCESS)
     990           0 :                                 return result;
     991             :                         }
     992             : 
     993     4922016 :                         if (!inc_lex_level(lex))
     994           0 :                             return JSON_OUT_OF_MEMORY;
     995             :                     }
     996     4922016 :                     break;
     997     1644944 :                 case JSON_SEM_AEND:
     998             :                     {
     999     1644944 :                         json_struct_action aend = sem->array_end;
    1000             : 
    1001     1644944 :                         dec_lex_level(lex);
    1002     1644944 :                         if (aend != NULL)
    1003             :                         {
    1004         536 :                             result = (*aend) (sem->semstate);
    1005         536 :                             if (result != JSON_SUCCESS)
    1006           0 :                                 return result;
    1007             :                         }
    1008             :                     }
    1009     1644944 :                     break;
    1010     1190532 :                 case JSON_SEM_OFIELD_INIT:
    1011             :                     {
    1012             :                         /*
    1013             :                          * all we do here is save out the field name. We have
    1014             :                          * to wait to get past the ':' to see if the next
    1015             :                          * value is null so we can call the semantic routine
    1016             :                          */
    1017     1190532 :                         char       *fname = NULL;
    1018     1190532 :                         json_ofield_action ostart = sem->object_field_start;
    1019     1190532 :                         json_ofield_action oend = sem->object_field_end;
    1020             : 
    1021     1190532 :                         if ((ostart != NULL || oend != NULL) && lex->need_escapes)
    1022             :                         {
    1023     1109772 :                             fname = STRDUP(lex->strval->data);
    1024     1109772 :                             if (fname == NULL)
    1025           0 :                                 return JSON_OUT_OF_MEMORY;
    1026             :                         }
    1027     1190532 :                         set_fname(lex, fname);
    1028             :                     }
    1029     1190532 :                     break;
    1030     1190276 :                 case JSON_SEM_OFIELD_START:
    1031             :                     {
    1032             :                         /*
    1033             :                          * the current token should be the first token of the
    1034             :                          * value
    1035             :                          */
    1036     1190276 :                         bool        isnull = tok == JSON_TOKEN_NULL;
    1037     1190276 :                         json_ofield_action ostart = sem->object_field_start;
    1038             : 
    1039     1190276 :                         set_fnull(lex, isnull);
    1040             : 
    1041     1190276 :                         if (ostart != NULL)
    1042             :                         {
    1043     1109772 :                             char       *fname = get_fname(lex);
    1044             : 
    1045     1109772 :                             result = (*ostart) (sem->semstate, fname, isnull);
    1046     1109772 :                             if (result != JSON_SUCCESS)
    1047           0 :                                 return result;
    1048             :                         }
    1049             :                     }
    1050     1190276 :                     break;
    1051     1190202 :                 case JSON_SEM_OFIELD_END:
    1052             :                     {
    1053     1190202 :                         json_ofield_action oend = sem->object_field_end;
    1054             : 
    1055     1190202 :                         if (oend != NULL)
    1056             :                         {
    1057        1248 :                             char       *fname = get_fname(lex);
    1058        1248 :                             bool        isnull = get_fnull(lex);
    1059             : 
    1060        1248 :                             result = (*oend) (sem->semstate, fname, isnull);
    1061        1248 :                             if (result != JSON_SUCCESS)
    1062           0 :                                 return result;
    1063             :                         }
    1064             :                     }
    1065     1190202 :                     break;
    1066     5152372 :                 case JSON_SEM_AELEM_START:
    1067             :                     {
    1068     5152372 :                         json_aelem_action astart = sem->array_element_start;
    1069     5152372 :                         bool        isnull = tok == JSON_TOKEN_NULL;
    1070             : 
    1071     5152372 :                         set_fnull(lex, isnull);
    1072             : 
    1073     5152372 :                         if (astart != NULL)
    1074             :                         {
    1075         240 :                             result = (*astart) (sem->semstate, isnull);
    1076         240 :                             if (result != JSON_SUCCESS)
    1077           0 :                                 return result;
    1078             :                         }
    1079             :                     }
    1080     5152372 :                     break;
    1081     1875572 :                 case JSON_SEM_AELEM_END:
    1082             :                     {
    1083     1875572 :                         json_aelem_action aend = sem->array_element_end;
    1084             : 
    1085     1875572 :                         if (aend != NULL)
    1086             :                         {
    1087         240 :                             bool        isnull = get_fnull(lex);
    1088             : 
    1089         240 :                             result = (*aend) (sem->semstate, isnull);
    1090         240 :                             if (result != JSON_SUCCESS)
    1091           0 :                                 return result;
    1092             :                         }
    1093             :                     }
    1094     1875572 :                     break;
    1095     1180356 :                 case JSON_SEM_SCALAR_INIT:
    1096             :                     {
    1097     1180356 :                         json_scalar_action sfunc = sem->scalar;
    1098             : 
    1099     1180356 :                         pstack->scalar_val = NULL;
    1100             : 
    1101     1180356 :                         if (sfunc != NULL)
    1102             :                         {
    1103             :                             /*
    1104             :                              * extract the de-escaped string value, or the raw
    1105             :                              * lexeme
    1106             :                              */
    1107             :                             /*
    1108             :                              * XXX copied from RD parser but looks like a
    1109             :                              * buglet
    1110             :                              */
    1111     1109164 :                             if (tok == JSON_TOKEN_STRING)
    1112             :                             {
    1113      886900 :                                 if (lex->need_escapes)
    1114             :                                 {
    1115      886900 :                                     pstack->scalar_val = STRDUP(lex->strval->data);
    1116      886900 :                                     if (pstack->scalar_val == NULL)
    1117           0 :                                         return JSON_OUT_OF_MEMORY;
    1118             :                                 }
    1119             :                             }
    1120             :                             else
    1121             :                             {
    1122      222264 :                                 ptrdiff_t   tlen = (lex->token_terminator - lex->token_start);
    1123             : 
    1124      222264 :                                 pstack->scalar_val = ALLOC(tlen + 1);
    1125      222264 :                                 if (pstack->scalar_val == NULL)
    1126           0 :                                     return JSON_OUT_OF_MEMORY;
    1127             : 
    1128      222264 :                                 memcpy(pstack->scalar_val, lex->token_start, tlen);
    1129      222264 :                                 pstack->scalar_val[tlen] = '\0';
    1130             :                             }
    1131     1109164 :                             pstack->scalar_tok = tok;
    1132             :                         }
    1133             :                     }
    1134     1180356 :                     break;
    1135     1180356 :                 case JSON_SEM_SCALAR_CALL:
    1136             :                     {
    1137             :                         /*
    1138             :                          * We'd like to be able to get rid of this business of
    1139             :                          * two bits of scalar action, but we can't. It breaks
    1140             :                          * certain semantic actions which expect that when
    1141             :                          * called the lexer has consumed the item. See for
    1142             :                          * example get_scalar() in jsonfuncs.c.
    1143             :                          */
    1144     1180356 :                         json_scalar_action sfunc = sem->scalar;
    1145             : 
    1146     1180356 :                         if (sfunc != NULL)
    1147             :                         {
    1148     1109164 :                             result = (*sfunc) (sem->semstate, pstack->scalar_val, pstack->scalar_tok);
    1149             : 
    1150             :                             /*
    1151             :                              * Either ownership of the token passed to the
    1152             :                              * callback, or we need to free it now. Either
    1153             :                              * way, clear our pointer to it so it doesn't get
    1154             :                              * freed in the future.
    1155             :                              */
    1156     1109162 :                             if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
    1157         544 :                                 FREE(pstack->scalar_val);
    1158     1109162 :                             pstack->scalar_val = NULL;
    1159             : 
    1160     1109162 :                             if (result != JSON_SUCCESS)
    1161           0 :                                 return result;
    1162             :                         }
    1163             :                     }
    1164     1180354 :                     break;
    1165           0 :                 default:
    1166             :                     /* should not happen */
    1167           0 :                     break;
    1168             :             }
    1169             :         }
    1170             :         else
    1171             :         {
    1172             :             /*
    1173             :              * The token didn't match the stack top if it's a terminal nor a
    1174             :              * production for the stack top if it's a non-terminal.
    1175             :              *
    1176             :              * Various cases here are Asserted to be not possible, as the
    1177             :              * token would not appear at the top of the prediction stack
    1178             :              * unless the lookahead matched.
    1179             :              */
    1180         882 :             switch (top)
    1181             :             {
    1182         152 :                 case JSON_TOKEN_STRING:
    1183         152 :                     if (next_prediction(pstack) == JSON_TOKEN_COLON)
    1184         152 :                         ctx = JSON_PARSE_STRING;
    1185             :                     else
    1186             :                     {
    1187             :                         Assert(false);
    1188           0 :                         ctx = JSON_PARSE_VALUE;
    1189             :                     }
    1190         152 :                     break;
    1191           0 :                 case JSON_TOKEN_NUMBER:
    1192             :                 case JSON_TOKEN_TRUE:
    1193             :                 case JSON_TOKEN_FALSE:
    1194             :                 case JSON_TOKEN_NULL:
    1195             :                 case JSON_TOKEN_ARRAY_START:
    1196             :                 case JSON_TOKEN_OBJECT_START:
    1197             :                     Assert(false);
    1198           0 :                     ctx = JSON_PARSE_VALUE;
    1199           0 :                     break;
    1200           0 :                 case JSON_TOKEN_ARRAY_END:
    1201             :                     Assert(false);
    1202           0 :                     ctx = JSON_PARSE_ARRAY_NEXT;
    1203           0 :                     break;
    1204           0 :                 case JSON_TOKEN_OBJECT_END:
    1205             :                     Assert(false);
    1206           0 :                     ctx = JSON_PARSE_OBJECT_NEXT;
    1207           0 :                     break;
    1208           0 :                 case JSON_TOKEN_COMMA:
    1209             :                     Assert(false);
    1210           0 :                     if (next_prediction(pstack) == JSON_TOKEN_STRING)
    1211           0 :                         ctx = JSON_PARSE_OBJECT_NEXT;
    1212             :                     else
    1213           0 :                         ctx = JSON_PARSE_ARRAY_NEXT;
    1214           0 :                     break;
    1215         104 :                 case JSON_TOKEN_COLON:
    1216         104 :                     ctx = JSON_PARSE_OBJECT_LABEL;
    1217         104 :                     break;
    1218          24 :                 case JSON_TOKEN_END:
    1219          24 :                     ctx = JSON_PARSE_END;
    1220          24 :                     break;
    1221          72 :                 case JSON_NT_MORE_ARRAY_ELEMENTS:
    1222          72 :                     ctx = JSON_PARSE_ARRAY_NEXT;
    1223          72 :                     break;
    1224          56 :                 case JSON_NT_ARRAY_ELEMENTS:
    1225          56 :                     ctx = JSON_PARSE_ARRAY_START;
    1226          56 :                     break;
    1227         280 :                 case JSON_NT_MORE_KEY_PAIRS:
    1228         280 :                     ctx = JSON_PARSE_OBJECT_NEXT;
    1229         280 :                     break;
    1230         120 :                 case JSON_NT_KEY_PAIRS:
    1231         120 :                     ctx = JSON_PARSE_OBJECT_START;
    1232         120 :                     break;
    1233          74 :                 default:
    1234          74 :                     ctx = JSON_PARSE_VALUE;
    1235             :             }
    1236         882 :             return report_parse_error(ctx, lex);
    1237             :         }
    1238             :     }
    1239             : 
    1240        2240 :     return JSON_SUCCESS;
    1241             : }
    1242             : 
    1243             : /*
    1244             :  *  Recursive Descent parse routines. There is one for each structural
    1245             :  *  element in a json document:
    1246             :  *    - scalar (string, number, true, false, null)
    1247             :  *    - array  ( [ ] )
    1248             :  *    - array element
    1249             :  *    - object ( { } )
    1250             :  *    - object field
    1251             :  */
    1252             : static inline JsonParseErrorType
    1253      245744 : parse_scalar(JsonLexContext *lex, const JsonSemAction *sem)
    1254             : {
    1255      245744 :     char       *val = NULL;
    1256      245744 :     json_scalar_action sfunc = sem->scalar;
    1257      245744 :     JsonTokenType tok = lex_peek(lex);
    1258             :     JsonParseErrorType result;
    1259             : 
    1260             :     /* a scalar must be a string, a number, true, false, or null */
    1261      245744 :     if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
    1262       32556 :         tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
    1263             :         tok != JSON_TOKEN_NULL)
    1264         188 :         return report_parse_error(JSON_PARSE_VALUE, lex);
    1265             : 
    1266             :     /* if no semantic function, just consume the token */
    1267      245556 :     if (sfunc == NULL)
    1268       11762 :         return json_lex(lex);
    1269             : 
    1270             :     /* extract the de-escaped string value, or the raw lexeme */
    1271      233794 :     if (lex_peek(lex) == JSON_TOKEN_STRING)
    1272             :     {
    1273       74938 :         if (lex->need_escapes)
    1274             :         {
    1275       69262 :             val = STRDUP(lex->strval->data);
    1276       69262 :             if (val == NULL)
    1277           0 :                 return JSON_OUT_OF_MEMORY;
    1278             :         }
    1279             :     }
    1280             :     else
    1281             :     {
    1282      158856 :         int         len = (lex->token_terminator - lex->token_start);
    1283             : 
    1284      158856 :         val = ALLOC(len + 1);
    1285      158856 :         if (val == NULL)
    1286           0 :             return JSON_OUT_OF_MEMORY;
    1287             : 
    1288      158856 :         memcpy(val, lex->token_start, len);
    1289      158856 :         val[len] = '\0';
    1290             :     }
    1291             : 
    1292             :     /* consume the token */
    1293      233794 :     result = json_lex(lex);
    1294      233794 :     if (result != JSON_SUCCESS)
    1295             :     {
    1296           0 :         FREE(val);
    1297           0 :         return result;
    1298             :     }
    1299             : 
    1300             :     /* invoke the callback, which may take ownership of val */
    1301      233794 :     result = (*sfunc) (sem->semstate, val, tok);
    1302             : 
    1303      233698 :     if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
    1304           0 :         FREE(val);
    1305             : 
    1306      233698 :     return result;
    1307             : }
    1308             : 
    1309             : static JsonParseErrorType
    1310      238036 : parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
    1311             : {
    1312             :     /*
    1313             :      * An object field is "fieldname" : value where value can be a scalar,
    1314             :      * object or array.  Note: in user-facing docs and error messages, we
    1315             :      * generally call a field name a "key".
    1316             :      */
    1317             : 
    1318      238036 :     char       *fname = NULL;
    1319      238036 :     json_ofield_action ostart = sem->object_field_start;
    1320      238036 :     json_ofield_action oend = sem->object_field_end;
    1321             :     bool        isnull;
    1322             :     JsonTokenType tok;
    1323             :     JsonParseErrorType result;
    1324             : 
    1325      238036 :     if (lex_peek(lex) != JSON_TOKEN_STRING)
    1326          12 :         return report_parse_error(JSON_PARSE_STRING, lex);
    1327      238024 :     if ((ostart != NULL || oend != NULL) && lex->need_escapes)
    1328             :     {
    1329      190896 :         fname = STRDUP(lex->strval->data);
    1330      190896 :         if (fname == NULL)
    1331           0 :             return JSON_OUT_OF_MEMORY;
    1332             :     }
    1333      238024 :     result = json_lex(lex);
    1334      238024 :     if (result != JSON_SUCCESS)
    1335             :     {
    1336          12 :         FREE(fname);
    1337          12 :         return result;
    1338             :     }
    1339             : 
    1340      238012 :     result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
    1341      238012 :     if (result != JSON_SUCCESS)
    1342             :     {
    1343          90 :         FREE(fname);
    1344          90 :         return result;
    1345             :     }
    1346             : 
    1347      237922 :     tok = lex_peek(lex);
    1348      237922 :     isnull = tok == JSON_TOKEN_NULL;
    1349             : 
    1350      237922 :     if (ostart != NULL)
    1351             :     {
    1352      190812 :         result = (*ostart) (sem->semstate, fname, isnull);
    1353      190804 :         if (result != JSON_SUCCESS)
    1354           0 :             goto ofield_cleanup;
    1355             :     }
    1356             : 
    1357      237914 :     switch (tok)
    1358             :     {
    1359       11052 :         case JSON_TOKEN_OBJECT_START:
    1360       11052 :             result = parse_object(lex, sem);
    1361        4012 :             break;
    1362       14822 :         case JSON_TOKEN_ARRAY_START:
    1363       14822 :             result = parse_array(lex, sem);
    1364       14788 :             break;
    1365      212040 :         default:
    1366      212040 :             result = parse_scalar(lex, sem);
    1367             :     }
    1368      230834 :     if (result != JSON_SUCCESS)
    1369          42 :         goto ofield_cleanup;
    1370             : 
    1371      230792 :     if (oend != NULL)
    1372             :     {
    1373      118602 :         result = (*oend) (sem->semstate, fname, isnull);
    1374      118602 :         if (result != JSON_SUCCESS)
    1375           0 :             goto ofield_cleanup;
    1376             :     }
    1377             : 
    1378      230792 : ofield_cleanup:
    1379      230834 :     if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
    1380           0 :         FREE(fname);
    1381      230834 :     return result;
    1382             : }
    1383             : 
    1384             : static JsonParseErrorType
    1385       49520 : parse_object(JsonLexContext *lex, const JsonSemAction *sem)
    1386             : {
    1387             :     /*
    1388             :      * an object is a possibly empty sequence of object fields, separated by
    1389             :      * commas and surrounded by curly braces.
    1390             :      */
    1391       49520 :     json_struct_action ostart = sem->object_start;
    1392       49520 :     json_struct_action oend = sem->object_end;
    1393             :     JsonTokenType tok;
    1394             :     JsonParseErrorType result;
    1395             : 
    1396             : #ifndef FRONTEND
    1397             : 
    1398             :     /*
    1399             :      * TODO: clients need some way to put a bound on stack growth. Parse level
    1400             :      * limits maybe?
    1401             :      */
    1402       43586 :     check_stack_depth();
    1403             : #endif
    1404             : 
    1405       49508 :     if (ostart != NULL)
    1406             :     {
    1407       30250 :         result = (*ostart) (sem->semstate);
    1408       30230 :         if (result != JSON_SUCCESS)
    1409           0 :             return result;
    1410             :     }
    1411             : 
    1412             :     /*
    1413             :      * Data inside an object is at a higher nesting level than the object
    1414             :      * itself. Note that we increment this after we call the semantic routine
    1415             :      * for the object start and restore it before we call the routine for the
    1416             :      * object end.
    1417             :      */
    1418       49488 :     lex->lex_level++;
    1419             : 
    1420             :     Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START);
    1421       49488 :     result = json_lex(lex);
    1422       49488 :     if (result != JSON_SUCCESS)
    1423          60 :         return result;
    1424             : 
    1425       49428 :     tok = lex_peek(lex);
    1426       49428 :     switch (tok)
    1427             :     {
    1428       46566 :         case JSON_TOKEN_STRING:
    1429       46566 :             result = parse_object_field(lex, sem);
    1430      230948 :             while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
    1431             :             {
    1432      191470 :                 result = json_lex(lex);
    1433      191470 :                 if (result != JSON_SUCCESS)
    1434           0 :                     break;
    1435      191470 :                 result = parse_object_field(lex, sem);
    1436             :             }
    1437       39478 :             break;
    1438        2848 :         case JSON_TOKEN_OBJECT_END:
    1439        2848 :             break;
    1440          14 :         default:
    1441             :             /* case of an invalid initial token inside the object */
    1442          14 :             result = report_parse_error(JSON_PARSE_OBJECT_START, lex);
    1443             :     }
    1444       42340 :     if (result != JSON_SUCCESS)
    1445         170 :         return result;
    1446             : 
    1447       42170 :     result = lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
    1448       42170 :     if (result != JSON_SUCCESS)
    1449          36 :         return result;
    1450             : 
    1451       42134 :     lex->lex_level--;
    1452             : 
    1453       42134 :     if (oend != NULL)
    1454             :     {
    1455       24220 :         result = (*oend) (sem->semstate);
    1456       24174 :         if (result != JSON_SUCCESS)
    1457           0 :             return result;
    1458             :     }
    1459             : 
    1460       42088 :     return JSON_SUCCESS;
    1461             : }
    1462             : 
    1463             : static JsonParseErrorType
    1464       54114 : parse_array_element(JsonLexContext *lex, const JsonSemAction *sem)
    1465             : {
    1466       54114 :     json_aelem_action astart = sem->array_element_start;
    1467       54114 :     json_aelem_action aend = sem->array_element_end;
    1468       54114 :     JsonTokenType tok = lex_peek(lex);
    1469             :     JsonParseErrorType result;
    1470             :     bool        isnull;
    1471             : 
    1472       54114 :     isnull = tok == JSON_TOKEN_NULL;
    1473             : 
    1474       54114 :     if (astart != NULL)
    1475             :     {
    1476        7738 :         result = (*astart) (sem->semstate, isnull);
    1477        7738 :         if (result != JSON_SUCCESS)
    1478           0 :             return result;
    1479             :     }
    1480             : 
    1481             :     /* an array element is any object, array or scalar */
    1482       54114 :     switch (tok)
    1483             :     {
    1484       18124 :         case JSON_TOKEN_OBJECT_START:
    1485       18124 :             result = parse_object(lex, sem);
    1486       18066 :             break;
    1487       12114 :         case JSON_TOKEN_ARRAY_START:
    1488       12114 :             result = parse_array(lex, sem);
    1489        3292 :             break;
    1490       23876 :         default:
    1491       23876 :             result = parse_scalar(lex, sem);
    1492             :     }
    1493             : 
    1494       45216 :     if (result != JSON_SUCCESS)
    1495          66 :         return result;
    1496             : 
    1497       45150 :     if (aend != NULL)
    1498             :     {
    1499        7228 :         result = (*aend) (sem->semstate, isnull);
    1500        7216 :         if (result != JSON_SUCCESS)
    1501           0 :             return result;
    1502             :     }
    1503             : 
    1504       45138 :     return JSON_SUCCESS;
    1505             : }
    1506             : 
    1507             : static JsonParseErrorType
    1508       33792 : parse_array(JsonLexContext *lex, const JsonSemAction *sem)
    1509             : {
    1510             :     /*
    1511             :      * an array is a possibly empty sequence of array elements, separated by
    1512             :      * commas and surrounded by square brackets.
    1513             :      */
    1514       33792 :     json_struct_action astart = sem->array_start;
    1515       33792 :     json_struct_action aend = sem->array_end;
    1516             :     JsonParseErrorType result;
    1517             : 
    1518             : #ifndef FRONTEND
    1519       33736 :     check_stack_depth();
    1520             : #endif
    1521             : 
    1522       33780 :     if (astart != NULL)
    1523             :     {
    1524       15790 :         result = (*astart) (sem->semstate);
    1525       15776 :         if (result != JSON_SUCCESS)
    1526           0 :             return result;
    1527             :     }
    1528             : 
    1529             :     /*
    1530             :      * Data inside an array is at a higher nesting level than the array
    1531             :      * itself. Note that we increment this after we call the semantic routine
    1532             :      * for the array start and restore it before we call the routine for the
    1533             :      * array end.
    1534             :      */
    1535       33766 :     lex->lex_level++;
    1536             : 
    1537       33766 :     result = lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
    1538       33766 :     if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
    1539             :     {
    1540       26292 :         result = parse_array_element(lex, sem);
    1541             : 
    1542       45156 :         while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
    1543             :         {
    1544       27774 :             result = json_lex(lex);
    1545       27774 :             if (result != JSON_SUCCESS)
    1546           0 :                 break;
    1547       27774 :             result = parse_array_element(lex, sem);
    1548             :         }
    1549             :     }
    1550       24856 :     if (result != JSON_SUCCESS)
    1551          66 :         return result;
    1552             : 
    1553       24790 :     result = lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
    1554       24790 :     if (result != JSON_SUCCESS)
    1555          24 :         return result;
    1556             : 
    1557       24766 :     lex->lex_level--;
    1558             : 
    1559       24766 :     if (aend != NULL)
    1560             :     {
    1561        9172 :         result = (*aend) (sem->semstate);
    1562        9148 :         if (result != JSON_SUCCESS)
    1563           0 :             return result;
    1564             :     }
    1565             : 
    1566       24742 :     return JSON_SUCCESS;
    1567             : }
    1568             : 
    1569             : /*
    1570             :  * Lex one token from the input stream.
    1571             :  *
    1572             :  * When doing incremental parsing, we can reach the end of the input string
    1573             :  * without having (or knowing we have) a complete token. If it's not the
    1574             :  * final chunk of input, the partial token is then saved to the lex
    1575             :  * structure's ptok StringInfo. On subsequent calls input is appended to this
    1576             :  * buffer until we have something that we think is a complete token,
    1577             :  * which is then lexed using a recursive call to json_lex. Processing then
    1578             :  * continues as normal on subsequent calls.
    1579             :  *
    1580             :  * Note than when doing incremental processing, the lex.prev_token_terminator
    1581             :  * should not be relied on. It could point into a previous input chunk or
    1582             :  * worse.
    1583             :  */
    1584             : JsonParseErrorType
    1585    13775662 : json_lex(JsonLexContext *lex)
    1586             : {
    1587             :     const char *s;
    1588    13775662 :     const char *const end = lex->input + lex->input_length;
    1589             :     JsonParseErrorType result;
    1590             : 
    1591    13775662 :     if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
    1592           0 :         return JSON_OUT_OF_MEMORY;
    1593             : 
    1594    13775662 :     if (lex->incremental)
    1595             :     {
    1596    12540956 :         if (lex->inc_state->partial_completed)
    1597             :         {
    1598             :             /*
    1599             :              * We just lexed a completed partial token on the last call, so
    1600             :              * reset everything
    1601             :              */
    1602       68764 :             jsonapi_resetStringInfo(&(lex->inc_state->partial_token));
    1603       68764 :             lex->token_terminator = lex->input;
    1604       68764 :             lex->inc_state->partial_completed = false;
    1605             :         }
    1606             : 
    1607             : #ifdef JSONAPI_USE_PQEXPBUFFER
    1608             :         /* Make sure our partial token buffer is valid before using it below. */
    1609     3831520 :         if (PQExpBufferDataBroken(lex->inc_state->partial_token))
    1610           0 :             return JSON_OUT_OF_MEMORY;
    1611             : #endif
    1612             :     }
    1613             : 
    1614    13775662 :     s = lex->token_terminator;
    1615             : 
    1616    13775662 :     if (lex->incremental && lex->inc_state->partial_token.len)
    1617             :     {
    1618             :         /*
    1619             :          * We have a partial token. Extend it and if completed lex it by a
    1620             :          * recursive call
    1621             :          */
    1622      166308 :         jsonapi_StrValType *ptok = &(lex->inc_state->partial_token);
    1623      166308 :         size_t      added = 0;
    1624      166308 :         bool        tok_done = false;
    1625      166308 :         JsonLexContext dummy_lex = {0};
    1626             :         JsonParseErrorType partial_result;
    1627             : 
    1628      166308 :         if (ptok->data[0] == '"')
    1629             :         {
    1630             :             /*
    1631             :              * It's a string. Accumulate characters until we reach an
    1632             :              * unescaped '"'.
    1633             :              */
    1634      160060 :             int         escapes = 0;
    1635             : 
    1636      162628 :             for (int i = ptok->len - 1; i > 0; i--)
    1637             :             {
    1638             :                 /* count the trailing backslashes on the partial token */
    1639      151242 :                 if (ptok->data[i] == '\\')
    1640        2568 :                     escapes++;
    1641             :                 else
    1642      148674 :                     break;
    1643             :             }
    1644             : 
    1645     1213838 :             for (size_t i = 0; i < lex->input_length; i++)
    1646             :             {
    1647     1118694 :                 char        c = lex->input[i];
    1648             : 
    1649     1118694 :                 jsonapi_appendStringInfoCharMacro(ptok, c);
    1650     1118694 :                 added++;
    1651     1118694 :                 if (c == '"' && escapes % 2 == 0)
    1652             :                 {
    1653       64916 :                     tok_done = true;
    1654       64916 :                     break;
    1655             :                 }
    1656     1053778 :                 if (c == '\\')
    1657        4096 :                     escapes++;
    1658             :                 else
    1659     1049682 :                     escapes = 0;
    1660             :             }
    1661             :         }
    1662             :         else
    1663             :         {
    1664             :             /* not a string */
    1665        6248 :             char        c = ptok->data[0];
    1666             : 
    1667        6248 :             if (c == '-' || (c >= '0' && c <= '9'))
    1668             :             {
    1669             :                 /* for numbers look for possible numeric continuations */
    1670             : 
    1671        1312 :                 bool        numend = false;
    1672             : 
    1673        3680 :                 for (size_t i = 0; i < lex->input_length && !numend; i++)
    1674             :                 {
    1675        2368 :                     char        cc = lex->input[i];
    1676             : 
    1677        2368 :                     switch (cc)
    1678             :                     {
    1679        1592 :                         case '+':
    1680             :                         case '-':
    1681             :                         case 'e':
    1682             :                         case 'E':
    1683             :                         case '0':
    1684             :                         case '1':
    1685             :                         case '2':
    1686             :                         case '3':
    1687             :                         case '4':
    1688             :                         case '5':
    1689             :                         case '6':
    1690             :                         case '7':
    1691             :                         case '8':
    1692             :                         case '9':
    1693             :                             {
    1694        1592 :                                 jsonapi_appendStringInfoCharMacro(ptok, cc);
    1695        1592 :                                 added++;
    1696             :                             }
    1697        1592 :                             break;
    1698         776 :                         default:
    1699         776 :                             numend = true;
    1700             :                     }
    1701             :                 }
    1702             :             }
    1703             : 
    1704             :             /*
    1705             :              * Add any remaining alphanumeric chars. This takes care of the
    1706             :              * {null, false, true} literals as well as any trailing
    1707             :              * alphanumeric junk on non-string tokens.
    1708             :              */
    1709       12896 :             for (size_t i = added; i < lex->input_length; i++)
    1710             :             {
    1711       10936 :                 char        cc = lex->input[i];
    1712             : 
    1713       10936 :                 if (JSON_ALPHANUMERIC_CHAR(cc))
    1714             :                 {
    1715        6648 :                     jsonapi_appendStringInfoCharMacro(ptok, cc);
    1716        6648 :                     added++;
    1717             :                 }
    1718             :                 else
    1719             :                 {
    1720        4288 :                     tok_done = true;
    1721        4288 :                     break;
    1722             :                 }
    1723             :             }
    1724        6248 :             if (added == lex->input_length &&
    1725        1960 :                 lex->inc_state->is_last_chunk)
    1726             :             {
    1727         152 :                 tok_done = true;
    1728             :             }
    1729             :         }
    1730             : 
    1731      166308 :         if (!tok_done)
    1732             :         {
    1733             :             /* We should have consumed the whole chunk in this case. */
    1734             :             Assert(added == lex->input_length);
    1735             : 
    1736       96952 :             if (!lex->inc_state->is_last_chunk)
    1737       96872 :                 return JSON_INCOMPLETE;
    1738             : 
    1739             :             /* json_errdetail() needs access to the accumulated token. */
    1740          80 :             lex->token_start = ptok->data;
    1741          80 :             lex->token_terminator = ptok->data + ptok->len;
    1742          80 :             return JSON_INVALID_TOKEN;
    1743             :         }
    1744             : 
    1745             :         /*
    1746             :          * Everything up to lex->input[added] has been added to the partial
    1747             :          * token, so move the input past it.
    1748             :          */
    1749       69356 :         lex->input += added;
    1750       69356 :         lex->input_length -= added;
    1751             : 
    1752       69356 :         dummy_lex.input = dummy_lex.token_terminator =
    1753       69356 :             dummy_lex.line_start = ptok->data;
    1754       69356 :         dummy_lex.line_number = lex->line_number;
    1755       69356 :         dummy_lex.input_length = ptok->len;
    1756       69356 :         dummy_lex.input_encoding = lex->input_encoding;
    1757       69356 :         dummy_lex.incremental = false;
    1758       69356 :         dummy_lex.need_escapes = lex->need_escapes;
    1759       69356 :         dummy_lex.strval = lex->strval;
    1760             : 
    1761       69356 :         partial_result = json_lex(&dummy_lex);
    1762             : 
    1763             :         /*
    1764             :          * We either have a complete token or an error. In either case we need
    1765             :          * to point to the partial token data for the semantic or error
    1766             :          * routines. If it's not an error we'll readjust on the next call to
    1767             :          * json_lex.
    1768             :          */
    1769       69356 :         lex->token_type = dummy_lex.token_type;
    1770       69356 :         lex->line_number = dummy_lex.line_number;
    1771             : 
    1772             :         /*
    1773             :          * We know the prev_token_terminator must be back in some previous
    1774             :          * piece of input, so we just make it NULL.
    1775             :          */
    1776       69356 :         lex->prev_token_terminator = NULL;
    1777             : 
    1778             :         /*
    1779             :          * Normally token_start would be ptok->data, but it could be later,
    1780             :          * see json_lex_string's handling of invalid escapes.
    1781             :          */
    1782       69356 :         lex->token_start = dummy_lex.token_start;
    1783       69356 :         lex->token_terminator = dummy_lex.token_terminator;
    1784       69356 :         if (partial_result == JSON_SUCCESS)
    1785             :         {
    1786             :             /* make sure we've used all the input */
    1787       69140 :             if (lex->token_terminator - lex->token_start != ptok->len)
    1788             :             {
    1789             :                 Assert(false);
    1790           0 :                 return JSON_INVALID_TOKEN;
    1791             :             }
    1792             : 
    1793       69140 :             lex->inc_state->partial_completed = true;
    1794             :         }
    1795       69356 :         return partial_result;
    1796             :         /* end of partial token processing */
    1797             :     }
    1798             : 
    1799             :     /* Skip leading whitespace. */
    1800    20553448 :     while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
    1801             :     {
    1802     6944094 :         if (*s++ == '\n')
    1803             :         {
    1804      481102 :             ++lex->line_number;
    1805      481102 :             lex->line_start = s;
    1806             :         }
    1807             :     }
    1808    13609354 :     lex->token_start = s;
    1809             : 
    1810             :     /* Determine token type. */
    1811    13609354 :     if (s >= end)
    1812             :     {
    1813      650654 :         lex->token_start = NULL;
    1814      650654 :         lex->prev_token_terminator = lex->token_terminator;
    1815      650654 :         lex->token_terminator = s;
    1816      650654 :         lex->token_type = JSON_TOKEN_END;
    1817             :     }
    1818             :     else
    1819             :     {
    1820    12958700 :         switch (*s)
    1821             :         {
    1822             :                 /* Single-character token, some kind of punctuation mark. */
    1823      293342 :             case '{':
    1824      293342 :                 lex->prev_token_terminator = lex->token_terminator;
    1825      293342 :                 lex->token_terminator = s + 1;
    1826      293342 :                 lex->token_type = JSON_TOKEN_OBJECT_START;
    1827      293342 :                 break;
    1828      285038 :             case '}':
    1829      285038 :                 lex->prev_token_terminator = lex->token_terminator;
    1830      285038 :                 lex->token_terminator = s + 1;
    1831      285038 :                 lex->token_type = JSON_TOKEN_OBJECT_END;
    1832      285038 :                 break;
    1833     4956458 :             case '[':
    1834     4956458 :                 lex->prev_token_terminator = lex->token_terminator;
    1835     4956458 :                 lex->token_terminator = s + 1;
    1836     4956458 :                 lex->token_type = JSON_TOKEN_ARRAY_START;
    1837     4956458 :                 break;
    1838     1669884 :             case ']':
    1839     1669884 :                 lex->prev_token_terminator = lex->token_terminator;
    1840     1669884 :                 lex->token_terminator = s + 1;
    1841     1669884 :                 lex->token_type = JSON_TOKEN_ARRAY_END;
    1842     1669884 :                 break;
    1843     1400248 :             case ',':
    1844     1400248 :                 lex->prev_token_terminator = lex->token_terminator;
    1845     1400248 :                 lex->token_terminator = s + 1;
    1846     1400248 :                 lex->token_type = JSON_TOKEN_COMMA;
    1847     1400248 :                 break;
    1848     1428288 :             case ':':
    1849     1428288 :                 lex->prev_token_terminator = lex->token_terminator;
    1850     1428288 :                 lex->token_terminator = s + 1;
    1851     1428288 :                 lex->token_type = JSON_TOKEN_COLON;
    1852     1428288 :                 break;
    1853     2513966 :             case '"':
    1854             :                 /* string */
    1855     2513966 :                 result = json_lex_string(lex);
    1856     2513966 :                 if (result != JSON_SUCCESS)
    1857       65372 :                     return result;
    1858     2448594 :                 lex->token_type = JSON_TOKEN_STRING;
    1859     2448594 :                 break;
    1860         184 :             case '-':
    1861             :                 /* Negative number. */
    1862         184 :                 result = json_lex_number(lex, s + 1, NULL, NULL);
    1863         184 :                 if (result != JSON_SUCCESS)
    1864           0 :                     return result;
    1865         184 :                 lex->token_type = JSON_TOKEN_NUMBER;
    1866         184 :                 break;
    1867      351598 :             case '0':
    1868             :             case '1':
    1869             :             case '2':
    1870             :             case '3':
    1871             :             case '4':
    1872             :             case '5':
    1873             :             case '6':
    1874             :             case '7':
    1875             :             case '8':
    1876             :             case '9':
    1877             :                 /* Positive number. */
    1878      351598 :                 result = json_lex_number(lex, s, NULL, NULL);
    1879      351598 :                 if (result != JSON_SUCCESS)
    1880         904 :                     return result;
    1881      350694 :                 lex->token_type = JSON_TOKEN_NUMBER;
    1882      350694 :                 break;
    1883       59694 :             default:
    1884             :                 {
    1885             :                     const char *p;
    1886             : 
    1887             :                     /*
    1888             :                      * We're not dealing with a string, number, legal
    1889             :                      * punctuation mark, or end of string.  The only legal
    1890             :                      * tokens we might find here are true, false, and null,
    1891             :                      * but for error reporting purposes we scan until we see a
    1892             :                      * non-alphanumeric character.  That way, we can report
    1893             :                      * the whole word as an unexpected token, rather than just
    1894             :                      * some unintuitive prefix thereof.
    1895             :                      */
    1896      322004 :                     for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++)
    1897             :                          /* skip */ ;
    1898             : 
    1899             :                     /*
    1900             :                      * We got some sort of unexpected punctuation or an
    1901             :                      * otherwise unexpected character, so just complain about
    1902             :                      * that one character.
    1903             :                      */
    1904       59694 :                     if (p == s)
    1905             :                     {
    1906          80 :                         lex->prev_token_terminator = lex->token_terminator;
    1907          80 :                         lex->token_terminator = s + 1;
    1908          80 :                         return JSON_INVALID_TOKEN;
    1909             :                     }
    1910             : 
    1911       59614 :                     if (lex->incremental && !lex->inc_state->is_last_chunk &&
    1912       15664 :                         p == lex->input + lex->input_length)
    1913             :                     {
    1914        3664 :                         jsonapi_appendBinaryStringInfo(&(lex->inc_state->partial_token), s, end - s);
    1915        3664 :                         return JSON_INCOMPLETE;
    1916             :                     }
    1917             : 
    1918             :                     /*
    1919             :                      * We've got a real alphanumeric token here.  If it
    1920             :                      * happens to be true, false, or null, all is well.  If
    1921             :                      * not, error out.
    1922             :                      */
    1923       55950 :                     lex->prev_token_terminator = lex->token_terminator;
    1924       55950 :                     lex->token_terminator = p;
    1925       55950 :                     if (p - s == 4)
    1926             :                     {
    1927       25712 :                         if (memcmp(s, "true", 4) == 0)
    1928        7478 :                             lex->token_type = JSON_TOKEN_TRUE;
    1929       18234 :                         else if (memcmp(s, "null", 4) == 0)
    1930       18222 :                             lex->token_type = JSON_TOKEN_NULL;
    1931             :                         else
    1932          12 :                             return JSON_INVALID_TOKEN;
    1933             :                     }
    1934       30238 :                     else if (p - s == 5 && memcmp(s, "false", 5) == 0)
    1935       30000 :                         lex->token_type = JSON_TOKEN_FALSE;
    1936             :                     else
    1937         238 :                         return JSON_INVALID_TOKEN;
    1938             :                 }
    1939             :         }                       /* end of switch */
    1940             :     }
    1941             : 
    1942    13539084 :     if (lex->incremental && lex->token_type == JSON_TOKEN_END && !lex->inc_state->is_last_chunk)
    1943      575412 :         return JSON_INCOMPLETE;
    1944             :     else
    1945    12963672 :         return JSON_SUCCESS;
    1946             : }
    1947             : 
    1948             : /*
    1949             :  * The next token in the input stream is known to be a string; lex it.
    1950             :  *
    1951             :  * If lex->strval isn't NULL, fill it with the decoded string.
    1952             :  * Set lex->token_terminator to the end of the decoded input, and in
    1953             :  * success cases, transfer its previous value to lex->prev_token_terminator.
    1954             :  * Return JSON_SUCCESS or an error code.
    1955             :  *
    1956             :  * Note: be careful that all error exits advance lex->token_terminator
    1957             :  * to the point after the character we detected the error on.
    1958             :  */
    1959             : static inline JsonParseErrorType
    1960     2513966 : json_lex_string(JsonLexContext *lex)
    1961             : {
    1962             :     const char *s;
    1963     2513966 :     const char *const end = lex->input + lex->input_length;
    1964     2513966 :     int         hi_surrogate = -1;
    1965             : 
    1966             :     /* Convenience macros for error exits */
    1967             : #define FAIL_OR_INCOMPLETE_AT_CHAR_START(code) \
    1968             :     do { \
    1969             :         if (lex->incremental && !lex->inc_state->is_last_chunk) \
    1970             :         { \
    1971             :             jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token, \
    1972             :                                            lex->token_start, \
    1973             :                                            end - lex->token_start); \
    1974             :             return JSON_INCOMPLETE; \
    1975             :         } \
    1976             :         lex->token_terminator = s; \
    1977             :         return code; \
    1978             :     } while (0)
    1979             : #define FAIL_AT_CHAR_END(code) \
    1980             :     do { \
    1981             :         const char     *term = s + pg_encoding_mblen(lex->input_encoding, s); \
    1982             :         lex->token_terminator = (term <= end) ? term : end; \
    1983             :         return code; \
    1984             :     } while (0)
    1985             : 
    1986     2513966 :     if (lex->need_escapes)
    1987             :     {
    1988             : #ifdef JSONAPI_USE_PQEXPBUFFER
    1989             :         /* make sure initialization succeeded */
    1990        1336 :         if (lex->strval == NULL)
    1991           0 :             return JSON_OUT_OF_MEMORY;
    1992             : #endif
    1993     2266220 :         jsonapi_resetStringInfo(lex->strval);
    1994             :     }
    1995             : 
    1996             :     Assert(lex->input_length > 0);
    1997     2513966 :     s = lex->token_start;
    1998             :     for (;;)
    1999             :     {
    2000     5028426 :         s++;
    2001             :         /* Premature end of the string. */
    2002     5028426 :         if (s >= end)
    2003       64568 :             FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
    2004     4963858 :         else if (*s == '"')
    2005     2448594 :             break;
    2006     2515264 :         else if (*s == '\\')
    2007             :         {
    2008             :             /* OK, we have an escape character. */
    2009       10256 :             s++;
    2010       10256 :             if (s >= end)
    2011         192 :                 FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
    2012       10064 :             else if (*s == 'u')
    2013             :             {
    2014             :                 int         i;
    2015        3884 :                 int         ch = 0;
    2016             : 
    2017       18680 :                 for (i = 1; i <= 4; i++)
    2018             :                 {
    2019       15088 :                     s++;
    2020       15088 :                     if (s >= end)
    2021         256 :                         FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
    2022       14832 :                     else if (*s >= '0' && *s <= '9')
    2023        9230 :                         ch = (ch * 16) + (*s - '0');
    2024        5602 :                     else if (*s >= 'a' && *s <= 'f')
    2025        5542 :                         ch = (ch * 16) + (*s - 'a') + 10;
    2026          60 :                     else if (*s >= 'A' && *s <= 'F')
    2027          24 :                         ch = (ch * 16) + (*s - 'A') + 10;
    2028             :                     else
    2029          36 :                         FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT);
    2030             :                 }
    2031        3592 :                 if (lex->need_escapes)
    2032             :                 {
    2033             :                     /*
    2034             :                      * Combine surrogate pairs.
    2035             :                      */
    2036         260 :                     if (is_utf16_surrogate_first(ch))
    2037             :                     {
    2038          72 :                         if (hi_surrogate != -1)
    2039          12 :                             FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE);
    2040          60 :                         hi_surrogate = ch;
    2041          60 :                         continue;
    2042             :                     }
    2043         188 :                     else if (is_utf16_surrogate_second(ch))
    2044             :                     {
    2045          60 :                         if (hi_surrogate == -1)
    2046          24 :                             FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2047          36 :                         ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
    2048          36 :                         hi_surrogate = -1;
    2049             :                     }
    2050             : 
    2051         164 :                     if (hi_surrogate != -1)
    2052           0 :                         FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2053             : 
    2054             :                     /*
    2055             :                      * Reject invalid cases.  We can't have a value above
    2056             :                      * 0xFFFF here (since we only accepted 4 hex digits
    2057             :                      * above), so no need to test for out-of-range chars.
    2058             :                      */
    2059         164 :                     if (ch == 0)
    2060             :                     {
    2061             :                         /* We can't allow this, since our TEXT type doesn't */
    2062          24 :                         FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO);
    2063             :                     }
    2064             : 
    2065             :                     /*
    2066             :                      * Add the represented character to lex->strval.  In the
    2067             :                      * backend, we can let pg_unicode_to_server_noerror()
    2068             :                      * handle any required character set conversion; in
    2069             :                      * frontend, we can only deal with trivial conversions.
    2070             :                      */
    2071             : #ifndef FRONTEND
    2072             :                     {
    2073             :                         char        cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
    2074             : 
    2075          84 :                         if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
    2076           0 :                             FAIL_AT_CHAR_END(JSON_UNICODE_UNTRANSLATABLE);
    2077          84 :                         appendStringInfoString(lex->strval, cbuf);
    2078             :                     }
    2079             : #else
    2080          56 :                     if (lex->input_encoding == PG_UTF8)
    2081             :                     {
    2082             :                         /* OK, we can map the code point to UTF8 easily */
    2083             :                         char        utf8str[5];
    2084             :                         int         utf8len;
    2085             : 
    2086          56 :                         unicode_to_utf8(ch, (unsigned char *) utf8str);
    2087          56 :                         utf8len = pg_utf_mblen((unsigned char *) utf8str);
    2088          56 :                         jsonapi_appendBinaryStringInfo(lex->strval, utf8str, utf8len);
    2089             :                     }
    2090           0 :                     else if (ch <= 0x007f)
    2091             :                     {
    2092             :                         /* The ASCII range is the same in all encodings */
    2093           0 :                         jsonapi_appendStringInfoChar(lex->strval, (char) ch);
    2094             :                     }
    2095             :                     else
    2096           0 :                         FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE);
    2097             : #endif                          /* FRONTEND */
    2098             :                 }
    2099             :             }
    2100        6180 :             else if (lex->need_escapes)
    2101             :             {
    2102         606 :                 if (hi_surrogate != -1)
    2103           0 :                     FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2104             : 
    2105         606 :                 switch (*s)
    2106             :                 {
    2107         386 :                     case '"':
    2108             :                     case '\\':
    2109             :                     case '/':
    2110         386 :                         jsonapi_appendStringInfoChar(lex->strval, *s);
    2111         386 :                         break;
    2112          44 :                     case 'b':
    2113          44 :                         jsonapi_appendStringInfoChar(lex->strval, '\b');
    2114          44 :                         break;
    2115           8 :                     case 'f':
    2116           8 :                         jsonapi_appendStringInfoChar(lex->strval, '\f');
    2117           8 :                         break;
    2118          62 :                     case 'n':
    2119          62 :                         jsonapi_appendStringInfoChar(lex->strval, '\n');
    2120          62 :                         break;
    2121           8 :                     case 'r':
    2122           8 :                         jsonapi_appendStringInfoChar(lex->strval, '\r');
    2123           8 :                         break;
    2124          92 :                     case 't':
    2125          92 :                         jsonapi_appendStringInfoChar(lex->strval, '\t');
    2126          92 :                         break;
    2127           6 :                     default:
    2128             : 
    2129             :                         /*
    2130             :                          * Not a valid string escape, so signal error.  We
    2131             :                          * adjust token_start so that just the escape sequence
    2132             :                          * is reported, not the whole string.
    2133             :                          */
    2134           6 :                         lex->token_start = s;
    2135           6 :                         FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
    2136             :                 }
    2137             :             }
    2138        5574 :             else if (strchr("\"\\/bfnrt", *s) == NULL)
    2139             :             {
    2140             :                 /*
    2141             :                  * Simpler processing if we're not bothered about de-escaping
    2142             :                  *
    2143             :                  * It's very tempting to remove the strchr() call here and
    2144             :                  * replace it with a switch statement, but testing so far has
    2145             :                  * shown it's not a performance win.
    2146             :                  */
    2147         126 :                 lex->token_start = s;
    2148         126 :                 FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
    2149             :             }
    2150             :         }
    2151             :         else
    2152             :         {
    2153     2505008 :             const char *p = s;
    2154             : 
    2155     2505008 :             if (hi_surrogate != -1)
    2156          12 :                 FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2157             : 
    2158             :             /*
    2159             :              * Skip to the first byte that requires special handling, so we
    2160             :              * can batch calls to jsonapi_appendBinaryStringInfo.
    2161             :              */
    2162     3190374 :             while (p < end - sizeof(Vector8) &&
    2163     3010616 :                    !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
    2164     3006998 :                    !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
    2165      685378 :                    !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
    2166      685378 :                 p += sizeof(Vector8);
    2167             : 
    2168    19736850 :             for (; p < end; p++)
    2169             :             {
    2170    19683476 :                 if (*p == '\\' || *p == '"')
    2171             :                     break;
    2172    17231970 :                 else if ((unsigned char) *p <= 31)
    2173             :                 {
    2174             :                     /* Per RFC4627, these characters MUST be escaped. */
    2175             :                     /*
    2176             :                      * Since *p isn't printable, exclude it from the context
    2177             :                      * string
    2178             :                      */
    2179         116 :                     lex->token_terminator = p;
    2180         116 :                     return JSON_ESCAPING_REQUIRED;
    2181             :                 }
    2182             :             }
    2183             : 
    2184     2504880 :             if (lex->need_escapes)
    2185     2266402 :                 jsonapi_appendBinaryStringInfo(lex->strval, s, p - s);
    2186             : 
    2187             :             /*
    2188             :              * s will be incremented at the top of the loop, so set it to just
    2189             :              * behind our lookahead position
    2190             :              */
    2191     2504880 :             s = p - 1;
    2192             :         }
    2193             :     }
    2194             : 
    2195     2448594 :     if (hi_surrogate != -1)
    2196             :     {
    2197           0 :         lex->token_terminator = s + 1;
    2198           0 :         return JSON_UNICODE_LOW_SURROGATE;
    2199             :     }
    2200             : 
    2201             : #ifdef JSONAPI_USE_PQEXPBUFFER
    2202       68896 :     if (lex->need_escapes && PQExpBufferBroken(lex->strval))
    2203           0 :         return JSON_OUT_OF_MEMORY;
    2204             : #endif
    2205             : 
    2206             :     /* Hooray, we found the end of the string! */
    2207     2448594 :     lex->prev_token_terminator = lex->token_terminator;
    2208     2448594 :     lex->token_terminator = s + 1;
    2209     2448594 :     return JSON_SUCCESS;
    2210             : 
    2211             : #undef FAIL_OR_INCOMPLETE_AT_CHAR_START
    2212             : #undef FAIL_AT_CHAR_END
    2213             : }
    2214             : 
    2215             : /*
    2216             :  * The next token in the input stream is known to be a number; lex it.
    2217             :  *
    2218             :  * In JSON, a number consists of four parts:
    2219             :  *
    2220             :  * (1) An optional minus sign ('-').
    2221             :  *
    2222             :  * (2) Either a single '0', or a string of one or more digits that does not
    2223             :  *     begin with a '0'.
    2224             :  *
    2225             :  * (3) An optional decimal part, consisting of a period ('.') followed by
    2226             :  *     one or more digits.  (Note: While this part can be omitted
    2227             :  *     completely, it's not OK to have only the decimal point without
    2228             :  *     any digits afterwards.)
    2229             :  *
    2230             :  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
    2231             :  *     followed by '+' or '-', followed by one or more digits.  (Note:
    2232             :  *     As with the decimal part, if 'e' or 'E' is present, it must be
    2233             :  *     followed by at least one digit.)
    2234             :  *
    2235             :  * The 's' argument to this function points to the ostensible beginning
    2236             :  * of part 2 - i.e. the character after any optional minus sign, or the
    2237             :  * first character of the string if there is none.
    2238             :  *
    2239             :  * If num_err is not NULL, we return an error flag to *num_err rather than
    2240             :  * raising an error for a badly-formed number.  Also, if total_len is not NULL
    2241             :  * the distance from lex->input to the token end+1 is returned to *total_len.
    2242             :  */
    2243             : static inline JsonParseErrorType
    2244      351828 : json_lex_number(JsonLexContext *lex, const char *s,
    2245             :                 bool *num_err, size_t *total_len)
    2246             : {
    2247      351828 :     bool        error = false;
    2248      351828 :     int         len = s - lex->input;
    2249             : 
    2250             :     /* Part (1): leading sign indicator. */
    2251             :     /* Caller already did this for us; so do nothing. */
    2252             : 
    2253             :     /* Part (2): parse main digit string. */
    2254      351828 :     if (len < lex->input_length && *s == '0')
    2255             :     {
    2256       56066 :         s++;
    2257       56066 :         len++;
    2258             :     }
    2259      295762 :     else if (len < lex->input_length && *s >= '1' && *s <= '9')
    2260             :     {
    2261             :         do
    2262             :         {
    2263     1028828 :             s++;
    2264     1028828 :             len++;
    2265     1028828 :         } while (len < lex->input_length && *s >= '0' && *s <= '9');
    2266             :     }
    2267             :     else
    2268           2 :         error = true;
    2269             : 
    2270             :     /* Part (3): parse optional decimal portion. */
    2271      351828 :     if (len < lex->input_length && *s == '.')
    2272             :     {
    2273       37054 :         s++;
    2274       37054 :         len++;
    2275       37054 :         if (len == lex->input_length || *s < '0' || *s > '9')
    2276          12 :             error = true;
    2277             :         else
    2278             :         {
    2279             :             do
    2280             :             {
    2281       91148 :                 s++;
    2282       91148 :                 len++;
    2283       91148 :             } while (len < lex->input_length && *s >= '0' && *s <= '9');
    2284             :         }
    2285             :     }
    2286             : 
    2287             :     /* Part (4): parse optional exponent. */
    2288      351828 :     if (len < lex->input_length && (*s == 'e' || *s == 'E'))
    2289             :     {
    2290          94 :         s++;
    2291          94 :         len++;
    2292          94 :         if (len < lex->input_length && (*s == '+' || *s == '-'))
    2293             :         {
    2294          10 :             s++;
    2295          10 :             len++;
    2296             :         }
    2297          94 :         if (len == lex->input_length || *s < '0' || *s > '9')
    2298          12 :             error = true;
    2299             :         else
    2300             :         {
    2301             :             do
    2302             :             {
    2303         284 :                 s++;
    2304         284 :                 len++;
    2305         284 :             } while (len < lex->input_length && *s >= '0' && *s <= '9');
    2306             :         }
    2307             :     }
    2308             : 
    2309             :     /*
    2310             :      * Check for trailing garbage.  As in json_lex(), any alphanumeric stuff
    2311             :      * here should be considered part of the token for error-reporting
    2312             :      * purposes.
    2313             :      */
    2314      352176 :     for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
    2315         348 :         error = true;
    2316             : 
    2317      351828 :     if (total_len != NULL)
    2318          46 :         *total_len = len;
    2319             : 
    2320      351828 :     if (lex->incremental && !lex->inc_state->is_last_chunk &&
    2321      120008 :         len >= lex->input_length)
    2322             :     {
    2323         776 :         jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token,
    2324         776 :                                        lex->token_start, s - lex->token_start);
    2325         776 :         if (num_err != NULL)
    2326           0 :             *num_err = error;
    2327             : 
    2328         776 :         return JSON_INCOMPLETE;
    2329             :     }
    2330      351052 :     else if (num_err != NULL)
    2331             :     {
    2332             :         /* let the caller handle any error */
    2333          46 :         *num_err = error;
    2334             :     }
    2335             :     else
    2336             :     {
    2337             :         /* return token endpoint */
    2338      351006 :         lex->prev_token_terminator = lex->token_terminator;
    2339      351006 :         lex->token_terminator = s;
    2340             :         /* handle error if any */
    2341      351006 :         if (error)
    2342         128 :             return JSON_INVALID_TOKEN;
    2343             :     }
    2344             : 
    2345      350924 :     return JSON_SUCCESS;
    2346             : }
    2347             : 
    2348             : /*
    2349             :  * Report a parse error.
    2350             :  *
    2351             :  * lex->token_start and lex->token_terminator must identify the current token.
    2352             :  */
    2353             : static JsonParseErrorType
    2354        1204 : report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
    2355             : {
    2356             :     /* Handle case where the input ended prematurely. */
    2357        1204 :     if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
    2358         266 :         return JSON_EXPECTED_MORE;
    2359             : 
    2360             :     /* Otherwise choose the error type based on the parsing context. */
    2361         938 :     switch (ctx)
    2362             :     {
    2363          48 :         case JSON_PARSE_END:
    2364          48 :             return JSON_EXPECTED_END;
    2365         174 :         case JSON_PARSE_VALUE:
    2366         174 :             return JSON_EXPECTED_JSON;
    2367         164 :         case JSON_PARSE_STRING:
    2368         164 :             return JSON_EXPECTED_STRING;
    2369          56 :         case JSON_PARSE_ARRAY_START:
    2370          56 :             return JSON_EXPECTED_ARRAY_FIRST;
    2371          72 :         case JSON_PARSE_ARRAY_NEXT:
    2372          72 :             return JSON_EXPECTED_ARRAY_NEXT;
    2373         124 :         case JSON_PARSE_OBJECT_START:
    2374         124 :             return JSON_EXPECTED_OBJECT_FIRST;
    2375         128 :         case JSON_PARSE_OBJECT_LABEL:
    2376         128 :             return JSON_EXPECTED_COLON;
    2377         172 :         case JSON_PARSE_OBJECT_NEXT:
    2378         172 :             return JSON_EXPECTED_OBJECT_NEXT;
    2379           0 :         case JSON_PARSE_OBJECT_COMMA:
    2380           0 :             return JSON_EXPECTED_STRING;
    2381             :     }
    2382             : 
    2383             :     /*
    2384             :      * We don't use a default: case, so that the compiler will warn about
    2385             :      * unhandled enum values.
    2386             :      */
    2387             :     Assert(false);
    2388           0 :     return JSON_SUCCESS;        /* silence stupider compilers */
    2389             : }
    2390             : 
    2391             : /*
    2392             :  * Construct an (already translated) detail message for a JSON error.
    2393             :  *
    2394             :  * The returned pointer should not be freed, the allocation is either static
    2395             :  * or owned by the JsonLexContext.
    2396             :  */
    2397             : char *
    2398        2356 : json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
    2399             : {
    2400        2356 :     if (error == JSON_OUT_OF_MEMORY || lex == &failed_oom)
    2401             :     {
    2402             :         /* Short circuit. Allocating anything for this case is unhelpful. */
    2403           0 :         return _("out of memory");
    2404             :     }
    2405             : 
    2406        2356 :     if (lex->errormsg)
    2407           0 :         jsonapi_resetStringInfo(lex->errormsg);
    2408             :     else
    2409        2356 :         lex->errormsg = jsonapi_makeStringInfo();
    2410             : 
    2411             :     /*
    2412             :      * A helper for error messages that should print the current token. The
    2413             :      * format must contain exactly one %.*s specifier.
    2414             :      */
    2415             : #define json_token_error(lex, format) \
    2416             :     jsonapi_appendStringInfo((lex)->errormsg, _(format), \
    2417             :                              (int) ((lex)->token_terminator - (lex)->token_start), \
    2418             :                              (lex)->token_start);
    2419             : 
    2420        2356 :     switch (error)
    2421             :     {
    2422           0 :         case JSON_INCOMPLETE:
    2423             :         case JSON_SUCCESS:
    2424             :             /* fall through to the error code after switch */
    2425           0 :             break;
    2426           0 :         case JSON_INVALID_LEXER_TYPE:
    2427           0 :             if (lex->incremental)
    2428           0 :                 return _("Recursive descent parser cannot use incremental lexer.");
    2429             :             else
    2430           0 :                 return _("Incremental parser requires incremental lexer.");
    2431         512 :         case JSON_NESTING_TOO_DEEP:
    2432         512 :             return (_("JSON nested too deep, maximum permitted depth is 6400."));
    2433         132 :         case JSON_ESCAPING_INVALID:
    2434         132 :             json_token_error(lex, "Escape sequence \"\\%.*s\" is invalid.");
    2435         132 :             break;
    2436         116 :         case JSON_ESCAPING_REQUIRED:
    2437         116 :             jsonapi_appendStringInfo(lex->errormsg,
    2438         116 :                                      _("Character with value 0x%02x must be escaped."),
    2439         116 :                                      (unsigned char) *(lex->token_terminator));
    2440         116 :             break;
    2441          48 :         case JSON_EXPECTED_END:
    2442          48 :             json_token_error(lex, "Expected end of input, but found \"%.*s\".");
    2443          48 :             break;
    2444          56 :         case JSON_EXPECTED_ARRAY_FIRST:
    2445          56 :             json_token_error(lex, "Expected array element or \"]\", but found \"%.*s\".");
    2446          56 :             break;
    2447          72 :         case JSON_EXPECTED_ARRAY_NEXT:
    2448          72 :             json_token_error(lex, "Expected \",\" or \"]\", but found \"%.*s\".");
    2449          72 :             break;
    2450         128 :         case JSON_EXPECTED_COLON:
    2451         128 :             json_token_error(lex, "Expected \":\", but found \"%.*s\".");
    2452         128 :             break;
    2453         120 :         case JSON_EXPECTED_JSON:
    2454         120 :             json_token_error(lex, "Expected JSON value, but found \"%.*s\".");
    2455         120 :             break;
    2456         190 :         case JSON_EXPECTED_MORE:
    2457         190 :             return _("The input string ended unexpectedly.");
    2458         124 :         case JSON_EXPECTED_OBJECT_FIRST:
    2459         124 :             json_token_error(lex, "Expected string or \"}\", but found \"%.*s\".");
    2460         124 :             break;
    2461         172 :         case JSON_EXPECTED_OBJECT_NEXT:
    2462         172 :             json_token_error(lex, "Expected \",\" or \"}\", but found \"%.*s\".");
    2463         172 :             break;
    2464         164 :         case JSON_EXPECTED_STRING:
    2465         164 :             json_token_error(lex, "Expected string, but found \"%.*s\".");
    2466         164 :             break;
    2467         414 :         case JSON_INVALID_TOKEN:
    2468         414 :             json_token_error(lex, "Token \"%.*s\" is invalid.");
    2469         414 :             break;
    2470           0 :         case JSON_OUT_OF_MEMORY:
    2471             :             /* should have been handled above; use the error path */
    2472           0 :             break;
    2473          24 :         case JSON_UNICODE_CODE_POINT_ZERO:
    2474          24 :             return _("\\u0000 cannot be converted to text.");
    2475          36 :         case JSON_UNICODE_ESCAPE_FORMAT:
    2476          36 :             return _("\"\\u\" must be followed by four hexadecimal digits.");
    2477           0 :         case JSON_UNICODE_HIGH_ESCAPE:
    2478             :             /* note: this case is only reachable in frontend not backend */
    2479           0 :             return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
    2480           0 :         case JSON_UNICODE_UNTRANSLATABLE:
    2481             : 
    2482             :             /*
    2483             :              * Note: this case is only reachable in backend and not frontend.
    2484             :              * #ifdef it away so the frontend doesn't try to link against
    2485             :              * backend functionality.
    2486             :              */
    2487             : #ifndef FRONTEND
    2488           0 :             return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."),
    2489             :                             GetDatabaseEncodingName());
    2490             : #else
    2491             :             Assert(false);
    2492           0 :             break;
    2493             : #endif
    2494          12 :         case JSON_UNICODE_HIGH_SURROGATE:
    2495          12 :             return _("Unicode high surrogate must not follow a high surrogate.");
    2496          36 :         case JSON_UNICODE_LOW_SURROGATE:
    2497          36 :             return _("Unicode low surrogate must follow a high surrogate.");
    2498           0 :         case JSON_SEM_ACTION_FAILED:
    2499             :             /* fall through to the error code after switch */
    2500           0 :             break;
    2501             :     }
    2502             : #undef json_token_error
    2503             : 
    2504             :     /* Note that lex->errormsg can be NULL in shlib code. */
    2505        1546 :     if (lex->errormsg && lex->errormsg->len == 0)
    2506             :     {
    2507             :         /*
    2508             :          * We don't use a default: case, so that the compiler will warn about
    2509             :          * unhandled enum values.  But this needs to be here anyway to cover
    2510             :          * the possibility of an incorrect input.
    2511             :          */
    2512           0 :         jsonapi_appendStringInfo(lex->errormsg,
    2513             :                                  "unexpected json parse error type: %d",
    2514             :                                  (int) error);
    2515             :     }
    2516             : 
    2517             : #ifdef JSONAPI_USE_PQEXPBUFFER
    2518         632 :     if (PQExpBufferBroken(lex->errormsg))
    2519           0 :         return _("out of memory while constructing error description");
    2520             : #endif
    2521             : 
    2522        1546 :     return lex->errormsg->data;
    2523             : }

Generated by: LCOV version 1.14