LCOV - code coverage report
Current view: top level - src/common - jsonapi.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 89.2 % 963 859
Test Date: 2026-03-12 06:14:44 Functions: 100.0 % 31 31
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * jsonapi.c
       4              :  *      JSON parser and lexer interfaces
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  * IDENTIFICATION
      10              :  *    src/common/jsonapi.c
      11              :  *
      12              :  *-------------------------------------------------------------------------
      13              :  */
      14              : #ifndef FRONTEND
      15              : #include "postgres.h"
      16              : #else
      17              : #include "postgres_fe.h"
      18              : #endif
      19              : 
      20              : #include "common/jsonapi.h"
      21              : #include "mb/pg_wchar.h"
      22              : #include "port/pg_lfind.h"
      23              : 
      24              : #ifdef JSONAPI_USE_PQEXPBUFFER
      25              : #include "pqexpbuffer.h"
      26              : #else
      27              : #include "lib/stringinfo.h"
      28              : #include "miscadmin.h"
      29              : #endif
      30              : 
      31              : /*
      32              :  * By default, we will use palloc/pfree along with StringInfo.  In libpq,
      33              :  * use malloc and PQExpBuffer, and return JSON_OUT_OF_MEMORY on out-of-memory.
      34              :  */
      35              : #ifdef JSONAPI_USE_PQEXPBUFFER
      36              : 
      37              : #define STRDUP(s) strdup(s)
      38              : #define ALLOC(size) malloc(size)
      39              : #define ALLOC0(size) calloc(1, size)
      40              : #define REALLOC realloc
      41              : #define FREE(s) free(s)
      42              : 
      43              : #define jsonapi_appendStringInfo            appendPQExpBuffer
      44              : #define jsonapi_appendBinaryStringInfo      appendBinaryPQExpBuffer
      45              : #define jsonapi_appendStringInfoChar        appendPQExpBufferChar
      46              : /* XXX should we add a macro version to PQExpBuffer? */
      47              : #define jsonapi_appendStringInfoCharMacro   appendPQExpBufferChar
      48              : #define jsonapi_makeStringInfo              createPQExpBuffer
      49              : #define jsonapi_initStringInfo              initPQExpBuffer
      50              : #define jsonapi_resetStringInfo             resetPQExpBuffer
      51              : #define jsonapi_termStringInfo              termPQExpBuffer
      52              : #define jsonapi_destroyStringInfo           destroyPQExpBuffer
      53              : 
      54              : #else                           /* !JSONAPI_USE_PQEXPBUFFER */
      55              : 
      56              : #define STRDUP(s) pstrdup(s)
      57              : #define ALLOC(size) palloc(size)
      58              : #define ALLOC0(size) palloc0(size)
      59              : #define REALLOC repalloc
      60              : 
      61              : #ifdef FRONTEND
      62              : #define FREE pfree
      63              : #else
      64              : /*
      65              :  * Backend pfree() doesn't handle NULL pointers like the frontend's does; smooth
      66              :  * that over to reduce mental gymnastics. Avoid multiple evaluation of the macro
      67              :  * argument to avoid future hair-pulling.
      68              :  */
      69              : #define FREE(s) do {    \
      70              :     void *__v = (s);    \
      71              :     if (__v)            \
      72              :         pfree(__v);     \
      73              : } while (0)
      74              : #endif
      75              : 
      76              : #define jsonapi_appendStringInfo            appendStringInfo
      77              : #define jsonapi_appendBinaryStringInfo      appendBinaryStringInfo
      78              : #define jsonapi_appendStringInfoChar        appendStringInfoChar
      79              : #define jsonapi_appendStringInfoCharMacro   appendStringInfoCharMacro
      80              : #define jsonapi_makeStringInfo              makeStringInfo
      81              : #define jsonapi_initStringInfo              initStringInfo
      82              : #define jsonapi_resetStringInfo             resetStringInfo
      83              : #define jsonapi_termStringInfo(s)           pfree((s)->data)
      84              : #define jsonapi_destroyStringInfo           destroyStringInfo
      85              : 
      86              : #endif                          /* JSONAPI_USE_PQEXPBUFFER */
      87              : 
      88              : /*
      89              :  * The context of the parser is maintained by the recursive descent
      90              :  * mechanism, but is passed explicitly to the error reporting routine
      91              :  * for better diagnostics.
      92              :  */
      93              : typedef enum                    /* contexts of JSON parser */
      94              : {
      95              :     JSON_PARSE_VALUE,           /* expecting a value */
      96              :     JSON_PARSE_STRING,          /* expecting a string (for a field name) */
      97              :     JSON_PARSE_ARRAY_START,     /* saw '[', expecting value or ']' */
      98              :     JSON_PARSE_ARRAY_NEXT,      /* saw array element, expecting ',' or ']' */
      99              :     JSON_PARSE_OBJECT_START,    /* saw '{', expecting label or '}' */
     100              :     JSON_PARSE_OBJECT_LABEL,    /* saw object label, expecting ':' */
     101              :     JSON_PARSE_OBJECT_NEXT,     /* saw object value, expecting ',' or '}' */
     102              :     JSON_PARSE_OBJECT_COMMA,    /* saw object ',', expecting next label */
     103              :     JSON_PARSE_END,             /* saw the end of a document, expect nothing */
     104              : } JsonParseContext;
     105              : 
     106              : /*
     107              :  * Setup for table-driven parser.
     108              :  * These enums need to be separate from the JsonTokenType and from each other
     109              :  * so we can have all of them on the prediction stack, which consists of
     110              :  * tokens, non-terminals, and semantic action markers.
     111              :  */
     112              : 
     113              : enum JsonNonTerminal
     114              : {
     115              :     JSON_NT_JSON = 32,
     116              :     JSON_NT_ARRAY_ELEMENTS,
     117              :     JSON_NT_MORE_ARRAY_ELEMENTS,
     118              :     JSON_NT_KEY_PAIRS,
     119              :     JSON_NT_MORE_KEY_PAIRS,
     120              : };
     121              : 
     122              : enum JsonParserSem
     123              : {
     124              :     JSON_SEM_OSTART = 64,
     125              :     JSON_SEM_OEND,
     126              :     JSON_SEM_ASTART,
     127              :     JSON_SEM_AEND,
     128              :     JSON_SEM_OFIELD_INIT,
     129              :     JSON_SEM_OFIELD_START,
     130              :     JSON_SEM_OFIELD_END,
     131              :     JSON_SEM_AELEM_START,
     132              :     JSON_SEM_AELEM_END,
     133              :     JSON_SEM_SCALAR_INIT,
     134              :     JSON_SEM_SCALAR_CALL,
     135              : };
     136              : 
     137              : /*
     138              :  * struct containing the 3 stacks used in non-recursive parsing,
     139              :  * and the token and value for scalars that need to be preserved
     140              :  * across calls.
     141              :  *
     142              :  * typedef appears in jsonapi.h
     143              :  */
     144              : struct JsonParserStack
     145              : {
     146              :     int         stack_size;
     147              :     char       *prediction;
     148              :     size_t      pred_index;
     149              :     /* these two are indexed by lex_level */
     150              :     char      **fnames;
     151              :     bool       *fnull;
     152              :     JsonTokenType scalar_tok;
     153              :     char       *scalar_val;
     154              : };
     155              : 
     156              : /*
     157              :  * struct containing state used when there is a possible partial token at the
     158              :  * end of a json chunk when we are doing incremental parsing.
     159              :  *
     160              :  * typedef appears in jsonapi.h
     161              :  */
     162              : struct JsonIncrementalState
     163              : {
     164              :     bool        started;
     165              :     bool        is_last_chunk;
     166              :     bool        partial_completed;
     167              :     jsonapi_StrValType partial_token;
     168              : };
     169              : 
     170              : /*
     171              :  * constants and macros used in the nonrecursive parser
     172              :  */
     173              : #define JSON_NUM_TERMINALS 13
     174              : #define JSON_NUM_NONTERMINALS 5
     175              : #define JSON_NT_OFFSET JSON_NT_JSON
     176              : /* for indexing the table */
     177              : #define OFS(NT) (NT) - JSON_NT_OFFSET
     178              : /* classify items we get off the stack */
     179              : #define IS_SEM(x) ((x) & 0x40)
     180              : #define IS_NT(x)  ((x) & 0x20)
     181              : 
     182              : /*
     183              :  * These productions are stored in reverse order right to left so that when
     184              :  * they are pushed on the stack what we expect next is at the top of the stack.
     185              :  */
     186              : static char JSON_PROD_EPSILON[] = {0};  /* epsilon - an empty production */
     187              : 
     188              : /* JSON -> string */
     189              : static char JSON_PROD_SCALAR_STRING[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_STRING, JSON_SEM_SCALAR_INIT, 0};
     190              : 
     191              : /* JSON -> number */
     192              : static char JSON_PROD_SCALAR_NUMBER[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NUMBER, JSON_SEM_SCALAR_INIT, 0};
     193              : 
     194              : /* JSON -> 'true' */
     195              : static char JSON_PROD_SCALAR_TRUE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_TRUE, JSON_SEM_SCALAR_INIT, 0};
     196              : 
     197              : /* JSON -> 'false' */
     198              : static char JSON_PROD_SCALAR_FALSE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_FALSE, JSON_SEM_SCALAR_INIT, 0};
     199              : 
     200              : /* JSON -> 'null' */
     201              : static char JSON_PROD_SCALAR_NULL[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NULL, JSON_SEM_SCALAR_INIT, 0};
     202              : 
     203              : /* JSON -> '{' KEY_PAIRS '}' */
     204              : static char JSON_PROD_OBJECT[] = {JSON_SEM_OEND, JSON_TOKEN_OBJECT_END, JSON_NT_KEY_PAIRS, JSON_TOKEN_OBJECT_START, JSON_SEM_OSTART, 0};
     205              : 
     206              : /* JSON -> '[' ARRAY_ELEMENTS ']' */
     207              : static char JSON_PROD_ARRAY[] = {JSON_SEM_AEND, JSON_TOKEN_ARRAY_END, JSON_NT_ARRAY_ELEMENTS, JSON_TOKEN_ARRAY_START, JSON_SEM_ASTART, 0};
     208              : 
     209              : /* ARRAY_ELEMENTS -> JSON MORE_ARRAY_ELEMENTS */
     210              : static char JSON_PROD_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, 0};
     211              : 
     212              : /* MORE_ARRAY_ELEMENTS -> ',' JSON MORE_ARRAY_ELEMENTS */
     213              : static char JSON_PROD_MORE_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, JSON_TOKEN_COMMA, 0};
     214              : 
     215              : /* KEY_PAIRS -> string ':' JSON MORE_KEY_PAIRS */
     216              : static char JSON_PROD_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, 0};
     217              : 
     218              : /* MORE_KEY_PAIRS -> ',' string ':'  JSON MORE_KEY_PAIRS */
     219              : static char JSON_PROD_MORE_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, JSON_TOKEN_COMMA, 0};
     220              : 
     221              : /*
     222              :  * Note: there are also epsilon productions for ARRAY_ELEMENTS,
     223              :  * MORE_ARRAY_ELEMENTS, KEY_PAIRS and MORE_KEY_PAIRS
     224              :  * They are all the same as none require any semantic actions.
     225              :  */
     226              : 
     227              : /*
     228              :  * Table connecting the productions with their director sets of
     229              :  * terminal symbols.
     230              :  * Any combination not specified here represents an error.
     231              :  */
     232              : 
     233              : typedef struct
     234              : {
     235              :     size_t      len;
     236              :     char       *prod;
     237              : } td_entry;
     238              : 
     239              : #define TD_ENTRY(PROD) { sizeof(PROD) - 1, (PROD) }
     240              : 
     241              : static td_entry td_parser_table[JSON_NUM_NONTERMINALS][JSON_NUM_TERMINALS] =
     242              : {
     243              :     /* JSON */
     244              :     [OFS(JSON_NT_JSON)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_SCALAR_STRING),
     245              :     [OFS(JSON_NT_JSON)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_SCALAR_NUMBER),
     246              :     [OFS(JSON_NT_JSON)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_SCALAR_TRUE),
     247              :     [OFS(JSON_NT_JSON)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_SCALAR_FALSE),
     248              :     [OFS(JSON_NT_JSON)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_SCALAR_NULL),
     249              :     [OFS(JSON_NT_JSON)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY),
     250              :     [OFS(JSON_NT_JSON)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_OBJECT),
     251              :     /* ARRAY_ELEMENTS */
     252              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     253              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     254              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     255              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     256              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     257              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     258              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     259              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
     260              :     /* MORE_ARRAY_ELEMENTS */
     261              :     [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_ARRAY_ELEMENTS),
     262              :     [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
     263              :     /* KEY_PAIRS */
     264              :     [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_KEY_PAIRS),
     265              :     [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
     266              :     /* MORE_KEY_PAIRS */
     267              :     [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_KEY_PAIRS),
     268              :     [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
     269              : };
     270              : 
     271              : /* the GOAL production. Not stored in the table, but will be the initial contents of the prediction stack */
     272              : static char JSON_PROD_GOAL[] = {JSON_TOKEN_END, JSON_NT_JSON, 0};
     273              : 
     274              : static inline JsonParseErrorType json_lex_string(JsonLexContext *lex);
     275              : static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, const char *s,
     276              :                                                  bool *num_err, size_t *total_len);
     277              : static inline JsonParseErrorType parse_scalar(JsonLexContext *lex, const JsonSemAction *sem);
     278              : static JsonParseErrorType parse_object_field(JsonLexContext *lex, const JsonSemAction *sem);
     279              : static JsonParseErrorType parse_object(JsonLexContext *lex, const JsonSemAction *sem);
     280              : static JsonParseErrorType parse_array_element(JsonLexContext *lex, const JsonSemAction *sem);
     281              : static JsonParseErrorType parse_array(JsonLexContext *lex, const JsonSemAction *sem);
     282              : static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
     283              : static bool allocate_incremental_state(JsonLexContext *lex);
     284              : static inline void set_fname(JsonLexContext *lex, char *fname);
     285              : 
     286              : /* the null action object used for pure validation */
     287              : const JsonSemAction nullSemAction =
     288              : {
     289              :     NULL, NULL, NULL, NULL, NULL,
     290              :     NULL, NULL, NULL, NULL, NULL
     291              : };
     292              : 
     293              : /* sentinels used for out-of-memory conditions */
     294              : static JsonLexContext failed_oom;
     295              : static JsonIncrementalState failed_inc_oom;
     296              : 
     297              : /* Parser support routines */
     298              : 
     299              : /*
     300              :  * lex_peek
     301              :  *
     302              :  * what is the current look_ahead token?
     303              : */
     304              : static inline JsonTokenType
     305      7251891 : lex_peek(JsonLexContext *lex)
     306              : {
     307      7251891 :     return lex->token_type;
     308              : }
     309              : 
     310              : /*
     311              :  * lex_expect
     312              :  *
     313              :  * move the lexer to the next token if the current look_ahead token matches
     314              :  * the parameter token. Otherwise, report an error.
     315              :  */
     316              : static inline JsonParseErrorType
     317       239195 : lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
     318              : {
     319       239195 :     if (lex_peek(lex) == token)
     320       239141 :         return json_lex(lex);
     321              :     else
     322           54 :         return report_parse_error(ctx, lex);
     323              : }
     324              : 
     325              : /* chars to consider as part of an alphanumeric token */
     326              : #define JSON_ALPHANUMERIC_CHAR(c)  \
     327              :     (((c) >= 'a' && (c) <= 'z') || \
     328              :      ((c) >= 'A' && (c) <= 'Z') || \
     329              :      ((c) >= '0' && (c) <= '9') || \
     330              :      (c) == '_' || \
     331              :      IS_HIGHBIT_SET(c))
     332              : 
     333              : /*
     334              :  * Utility function to check if a string is a valid JSON number.
     335              :  *
     336              :  * str is of length len, and need not be null-terminated.
     337              :  */
     338              : bool
     339           23 : IsValidJsonNumber(const char *str, size_t len)
     340              : {
     341              :     bool        numeric_error;
     342              :     size_t      total_len;
     343           23 :     JsonLexContext dummy_lex = {0};
     344              : 
     345           23 :     if (len <= 0)
     346            0 :         return false;
     347              : 
     348              :     /*
     349              :      * json_lex_number expects a leading  '-' to have been eaten already.
     350              :      *
     351              :      * having to cast away the constness of str is ugly, but there's not much
     352              :      * easy alternative.
     353              :      */
     354           23 :     if (*str == '-')
     355              :     {
     356            2 :         dummy_lex.input = str + 1;
     357            2 :         dummy_lex.input_length = len - 1;
     358              :     }
     359              :     else
     360              :     {
     361           21 :         dummy_lex.input = str;
     362           21 :         dummy_lex.input_length = len;
     363              :     }
     364              : 
     365           23 :     dummy_lex.token_start = dummy_lex.input;
     366              : 
     367           23 :     json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
     368              : 
     369           23 :     return (!numeric_error) && (total_len == dummy_lex.input_length);
     370              : }
     371              : 
     372              : /*
     373              :  * makeJsonLexContextCstringLen
     374              :  *      Initialize the given JsonLexContext object, or create one
     375              :  *
     376              :  * If a valid 'lex' pointer is given, it is initialized.  This can
     377              :  * be used for stack-allocated structs, saving overhead.  If NULL is
     378              :  * given, a new struct is allocated.
     379              :  *
     380              :  * If need_escapes is true, ->strval stores the unescaped lexemes.
     381              :  * Unescaping is expensive, so only request it when necessary.
     382              :  *
     383              :  * If need_escapes is true or lex was given as NULL, then caller is
     384              :  * responsible for freeing the returned struct, either by calling
     385              :  * freeJsonLexContext() or (in backend environment) via memory context
     386              :  * cleanup.
     387              :  *
     388              :  * In shlib code, any out-of-memory failures will be deferred to time
     389              :  * of use; this function is guaranteed to return a valid JsonLexContext.
     390              :  */
     391              : JsonLexContext *
     392        20491 : makeJsonLexContextCstringLen(JsonLexContext *lex, const char *json,
     393              :                              size_t len, int encoding, bool need_escapes)
     394              : {
     395        20491 :     if (lex == NULL)
     396              :     {
     397         3312 :         lex = ALLOC0(sizeof(JsonLexContext));
     398         3312 :         if (!lex)
     399            0 :             return &failed_oom;
     400         3312 :         lex->flags |= JSONLEX_FREE_STRUCT;
     401              :     }
     402              :     else
     403        17179 :         memset(lex, 0, sizeof(JsonLexContext));
     404              : 
     405        20491 :     lex->errormsg = NULL;
     406        20491 :     lex->input = lex->token_terminator = lex->line_start = json;
     407        20491 :     lex->line_number = 1;
     408        20491 :     lex->input_length = len;
     409        20491 :     lex->input_encoding = encoding;
     410        20491 :     lex->need_escapes = need_escapes;
     411        20491 :     if (need_escapes)
     412              :     {
     413              :         /*
     414              :          * This call can fail in shlib code. We defer error handling to time
     415              :          * of use (json_lex_string()) since we might not need to parse any
     416              :          * strings anyway.
     417              :          */
     418        16197 :         lex->strval = jsonapi_makeStringInfo();
     419        16197 :         lex->flags |= JSONLEX_FREE_STRVAL;
     420              :     }
     421              : 
     422        20491 :     return lex;
     423              : }
     424              : 
     425              : /*
     426              :  * Allocates the internal bookkeeping structures for incremental parsing. This
     427              :  * can only fail in-band with shlib code.
     428              :  */
     429              : #define JS_STACK_CHUNK_SIZE 64
     430              : #define JS_MAX_PROD_LEN 10      /* more than we need */
     431              : #define JSON_TD_MAX_STACK 6400  /* hard coded for now - this is a REALLY high
     432              :                                  * number */
     433              : static bool
     434         2082 : allocate_incremental_state(JsonLexContext *lex)
     435              : {
     436              :     void       *pstack,
     437              :                *prediction,
     438              :                *fnames,
     439              :                *fnull;
     440              : 
     441         2082 :     lex->inc_state = ALLOC0(sizeof(JsonIncrementalState));
     442         2082 :     pstack = ALLOC0(sizeof(JsonParserStack));
     443         2082 :     prediction = ALLOC(JS_STACK_CHUNK_SIZE * JS_MAX_PROD_LEN);
     444         2082 :     fnames = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(char *));
     445         2082 :     fnull = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(bool));
     446              : 
     447              : #ifdef JSONAPI_USE_PQEXPBUFFER
     448          980 :     if (!lex->inc_state
     449          980 :         || !pstack
     450          980 :         || !prediction
     451          980 :         || !fnames
     452          980 :         || !fnull)
     453              :     {
     454            0 :         FREE(lex->inc_state);
     455            0 :         FREE(pstack);
     456            0 :         FREE(prediction);
     457            0 :         FREE(fnames);
     458            0 :         FREE(fnull);
     459              : 
     460            0 :         lex->inc_state = &failed_inc_oom;
     461            0 :         return false;
     462              :     }
     463              : #endif
     464              : 
     465         2082 :     jsonapi_initStringInfo(&(lex->inc_state->partial_token));
     466         2082 :     lex->pstack = pstack;
     467         2082 :     lex->pstack->stack_size = JS_STACK_CHUNK_SIZE;
     468         2082 :     lex->pstack->prediction = prediction;
     469         2082 :     lex->pstack->fnames = fnames;
     470         2082 :     lex->pstack->fnull = fnull;
     471              : 
     472              :     /*
     473              :      * fnames between 0 and lex_level must always be defined so that
     474              :      * freeJsonLexContext() can handle them safely. inc/dec_lex_level() handle
     475              :      * the rest.
     476              :      */
     477              :     Assert(lex->lex_level == 0);
     478         2082 :     lex->pstack->fnames[0] = NULL;
     479              : 
     480         2082 :     lex->incremental = true;
     481         2082 :     return true;
     482              : }
     483              : 
     484              : 
     485              : /*
     486              :  * makeJsonLexContextIncremental
     487              :  *
     488              :  * Similar to above but set up for use in incremental parsing. That means we
     489              :  * need explicit stacks for predictions, field names and null indicators, but
     490              :  * we don't need the input, that will be handed in bit by bit to the
     491              :  * parse routine. We also need an accumulator for partial tokens in case
     492              :  * the boundary between chunks happens to fall in the middle of a token.
     493              :  *
     494              :  * In shlib code, any out-of-memory failures will be deferred to time of use;
     495              :  * this function is guaranteed to return a valid JsonLexContext.
     496              :  */
     497              : JsonLexContext *
     498         2082 : makeJsonLexContextIncremental(JsonLexContext *lex, int encoding,
     499              :                               bool need_escapes)
     500              : {
     501         2082 :     if (lex == NULL)
     502              :     {
     503            1 :         lex = ALLOC0(sizeof(JsonLexContext));
     504            1 :         if (!lex)
     505            0 :             return &failed_oom;
     506              : 
     507            1 :         lex->flags |= JSONLEX_FREE_STRUCT;
     508              :     }
     509              :     else
     510         2081 :         memset(lex, 0, sizeof(JsonLexContext));
     511              : 
     512         2082 :     lex->line_number = 1;
     513         2082 :     lex->input_encoding = encoding;
     514              : 
     515         2082 :     if (!allocate_incremental_state(lex))
     516              :     {
     517            0 :         if (lex->flags & JSONLEX_FREE_STRUCT)
     518              :         {
     519            0 :             FREE(lex);
     520            0 :             return &failed_oom;
     521              :         }
     522              : 
     523              :         /* lex->inc_state tracks the OOM failure; we can return here. */
     524            0 :         return lex;
     525              :     }
     526              : 
     527         2082 :     lex->need_escapes = need_escapes;
     528         2082 :     if (need_escapes)
     529              :     {
     530              :         /*
     531              :          * This call can fail in shlib code. We defer error handling to time
     532              :          * of use (json_lex_string()) since we might not need to parse any
     533              :          * strings anyway.
     534              :          */
     535          125 :         lex->strval = jsonapi_makeStringInfo();
     536          125 :         lex->flags |= JSONLEX_FREE_STRVAL;
     537              :     }
     538              : 
     539         2082 :     return lex;
     540              : }
     541              : 
     542              : void
     543         1960 : setJsonLexContextOwnsTokens(JsonLexContext *lex, bool owned_by_context)
     544              : {
     545         1960 :     if (lex->incremental && lex->inc_state->started)
     546              :     {
     547              :         /*
     548              :          * Switching this flag after parsing has already started is a
     549              :          * programming error.
     550              :          */
     551              :         Assert(false);
     552            0 :         return;
     553              :     }
     554              : 
     555         1960 :     if (owned_by_context)
     556          980 :         lex->flags |= JSONLEX_CTX_OWNS_TOKENS;
     557              :     else
     558          980 :         lex->flags &= ~JSONLEX_CTX_OWNS_TOKENS;
     559              : }
     560              : 
     561              : static inline bool
     562      2590335 : inc_lex_level(JsonLexContext *lex)
     563              : {
     564      2590335 :     if (lex->incremental && (lex->lex_level + 1) >= lex->pstack->stack_size)
     565              :     {
     566              :         size_t      new_stack_size;
     567              :         char       *new_prediction;
     568              :         char      **new_fnames;
     569              :         bool       *new_fnull;
     570              : 
     571        38400 :         new_stack_size = lex->pstack->stack_size + JS_STACK_CHUNK_SIZE;
     572              : 
     573        38400 :         new_prediction = REALLOC(lex->pstack->prediction,
     574              :                                  new_stack_size * JS_MAX_PROD_LEN);
     575              : #ifdef JSONAPI_USE_PQEXPBUFFER
     576        19200 :         if (!new_prediction)
     577            0 :             return false;
     578              : #endif
     579        38400 :         lex->pstack->prediction = new_prediction;
     580              : 
     581        38400 :         new_fnames = REALLOC(lex->pstack->fnames,
     582              :                              new_stack_size * sizeof(char *));
     583              : #ifdef JSONAPI_USE_PQEXPBUFFER
     584        19200 :         if (!new_fnames)
     585            0 :             return false;
     586              : #endif
     587        38400 :         lex->pstack->fnames = new_fnames;
     588              : 
     589        38400 :         new_fnull = REALLOC(lex->pstack->fnull, new_stack_size * sizeof(bool));
     590              : #ifdef JSONAPI_USE_PQEXPBUFFER
     591        19200 :         if (!new_fnull)
     592            0 :             return false;
     593              : #endif
     594        38400 :         lex->pstack->fnull = new_fnull;
     595              : 
     596        38400 :         lex->pstack->stack_size = new_stack_size;
     597              :     }
     598              : 
     599      2590335 :     lex->lex_level += 1;
     600              : 
     601      2590335 :     if (lex->incremental)
     602              :     {
     603              :         /*
     604              :          * Ensure freeJsonLexContext() remains safe even if no fname is
     605              :          * assigned at this level.
     606              :          */
     607      2590335 :         lex->pstack->fnames[lex->lex_level] = NULL;
     608              :     }
     609              : 
     610      2590335 :     return true;
     611              : }
     612              : 
     613              : static inline void
     614       951434 : dec_lex_level(JsonLexContext *lex)
     615              : {
     616       951434 :     set_fname(lex, NULL);       /* free the current level's fname, if needed */
     617       951434 :     lex->lex_level -= 1;
     618       951434 : }
     619              : 
     620              : static inline void
     621      7388837 : push_prediction(JsonParserStack *pstack, td_entry entry)
     622              : {
     623      7388837 :     memcpy(pstack->prediction + pstack->pred_index, entry.prod, entry.len);
     624      7388837 :     pstack->pred_index += entry.len;
     625      7388837 : }
     626              : 
     627              : static inline char
     628     23678952 : pop_prediction(JsonParserStack *pstack)
     629              : {
     630              :     Assert(pstack->pred_index > 0);
     631     23678952 :     return pstack->prediction[--pstack->pred_index];
     632              : }
     633              : 
     634              : static inline char
     635           76 : next_prediction(JsonParserStack *pstack)
     636              : {
     637              :     Assert(pstack->pred_index > 0);
     638           76 :     return pstack->prediction[pstack->pred_index - 1];
     639              : }
     640              : 
     641              : static inline bool
     642     23981250 : have_prediction(JsonParserStack *pstack)
     643              : {
     644     23981250 :     return pstack->pred_index > 0;
     645              : }
     646              : 
     647              : static inline void
     648      1584566 : set_fname(JsonLexContext *lex, char *fname)
     649              : {
     650      1584566 :     if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
     651              :     {
     652              :         /*
     653              :          * Don't leak prior fnames. If one hasn't been assigned yet,
     654              :          * inc_lex_level ensured that it's NULL (and therefore safe to free).
     655              :          */
     656       436862 :         FREE(lex->pstack->fnames[lex->lex_level]);
     657              :     }
     658              : 
     659      1584566 :     lex->pstack->fnames[lex->lex_level] = fname;
     660      1584566 : }
     661              : 
     662              : static inline char *
     663       593376 : get_fname(JsonLexContext *lex)
     664              : {
     665       593376 :     return lex->pstack->fnames[lex->lex_level];
     666              : }
     667              : 
     668              : static inline void
     669      3216758 : set_fnull(JsonLexContext *lex, bool fnull)
     670              : {
     671      3216758 :     lex->pstack->fnull[lex->lex_level] = fnull;
     672      3216758 : }
     673              : 
     674              : static inline bool
     675          744 : get_fnull(JsonLexContext *lex)
     676              : {
     677          744 :     return lex->pstack->fnull[lex->lex_level];
     678              : }
     679              : 
     680              : /*
     681              :  * Free memory in a JsonLexContext.
     682              :  *
     683              :  * There's no need for this if a *lex pointer was given when the object was
     684              :  * made, need_escapes was false, and json_errdetail() was not called; or if (in
     685              :  * backend environment) a memory context delete/reset is imminent.
     686              :  */
     687              : void
     688         5565 : freeJsonLexContext(JsonLexContext *lex)
     689              : {
     690              :     static const JsonLexContext empty = {0};
     691              : 
     692         5565 :     if (!lex || lex == &failed_oom)
     693            0 :         return;
     694              : 
     695         5565 :     if (lex->flags & JSONLEX_FREE_STRVAL)
     696         3414 :         jsonapi_destroyStringInfo(lex->strval);
     697              : 
     698         5565 :     if (lex->errormsg)
     699          953 :         jsonapi_destroyStringInfo(lex->errormsg);
     700              : 
     701         5565 :     if (lex->incremental)
     702              :     {
     703         2079 :         jsonapi_termStringInfo(&lex->inc_state->partial_token);
     704         2079 :         FREE(lex->inc_state);
     705         2079 :         FREE(lex->pstack->prediction);
     706              : 
     707         2079 :         if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
     708              :         {
     709              :             int         i;
     710              : 
     711              :             /* Clean up any tokens that were left behind. */
     712       821410 :             for (i = 0; i <= lex->lex_level; i++)
     713       820430 :                 FREE(lex->pstack->fnames[i]);
     714              :         }
     715              : 
     716         2079 :         FREE(lex->pstack->fnames);
     717         2079 :         FREE(lex->pstack->fnull);
     718         2079 :         FREE(lex->pstack->scalar_val);
     719         2079 :         FREE(lex->pstack);
     720              :     }
     721              : 
     722         5565 :     if (lex->flags & JSONLEX_FREE_STRUCT)
     723         3001 :         FREE(lex);
     724              :     else
     725         2564 :         *lex = empty;
     726              : }
     727              : 
     728              : /*
     729              :  * pg_parse_json
     730              :  *
     731              :  * Publicly visible entry point for the JSON parser.
     732              :  *
     733              :  * lex is a lexing context, set up for the json to be processed by calling
     734              :  * makeJsonLexContext(). sem is a structure of function pointers to semantic
     735              :  * action routines to be called at appropriate spots during parsing, and a
     736              :  * pointer to a state object to be passed to those routines.
     737              :  *
     738              :  * If FORCE_JSON_PSTACK is defined then the routine will call the non-recursive
     739              :  * JSON parser. This is a useful way to validate that it's doing the right
     740              :  * thing at least for non-incremental cases. If this is on we expect to see
     741              :  * regression diffs relating to error messages about stack depth, but no
     742              :  * other differences.
     743              :  */
     744              : JsonParseErrorType
     745        20122 : pg_parse_json(JsonLexContext *lex, const JsonSemAction *sem)
     746              : {
     747              : #ifdef FORCE_JSON_PSTACK
     748              :     /*
     749              :      * We don't need partial token processing, there is only one chunk. But we
     750              :      * still need to init the partial token string so that freeJsonLexContext
     751              :      * works, so perform the full incremental initialization.
     752              :      */
     753              :     if (!allocate_incremental_state(lex))
     754              :         return JSON_OUT_OF_MEMORY;
     755              : 
     756              :     return pg_parse_json_incremental(lex, sem, lex->input, lex->input_length, true);
     757              : 
     758              : #else
     759              : 
     760              :     JsonTokenType tok;
     761              :     JsonParseErrorType result;
     762              : 
     763        20122 :     if (lex == &failed_oom)
     764            0 :         return JSON_OUT_OF_MEMORY;
     765        20122 :     if (lex->incremental)
     766            0 :         return JSON_INVALID_LEXER_TYPE;
     767              : 
     768              :     /* get the initial token */
     769        20122 :     result = json_lex(lex);
     770        20122 :     if (result != JSON_SUCCESS)
     771          126 :         return result;
     772              : 
     773        19996 :     tok = lex_peek(lex);
     774              : 
     775              :     /* parse by recursive descent */
     776        19996 :     switch (tok)
     777              :     {
     778        10539 :         case JSON_TOKEN_OBJECT_START:
     779        10539 :             result = parse_object(lex, sem);
     780        10493 :             break;
     781         4328 :         case JSON_TOKEN_ARRAY_START:
     782         4328 :             result = parse_array(lex, sem);
     783         4093 :             break;
     784         5129 :         default:
     785         5129 :             result = parse_scalar(lex, sem);    /* json can be a bare scalar */
     786              :     }
     787              : 
     788        19673 :     if (result == JSON_SUCCESS)
     789        19193 :         result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
     790              : 
     791        19673 :     return result;
     792              : #endif
     793              : }
     794              : 
     795              : /*
     796              :  * json_count_array_elements
     797              :  *
     798              :  * Returns number of array elements in lex context at start of array token
     799              :  * until end of array token at same nesting level.
     800              :  *
     801              :  * Designed to be called from array_start routines.
     802              :  */
     803              : JsonParseErrorType
     804            3 : json_count_array_elements(JsonLexContext *lex, int *elements)
     805              : {
     806              :     JsonLexContext copylex;
     807              :     int         count;
     808              :     JsonParseErrorType result;
     809              : 
     810            3 :     if (lex == &failed_oom)
     811            0 :         return JSON_OUT_OF_MEMORY;
     812              : 
     813              :     /*
     814              :      * It's safe to do this with a shallow copy because the lexical routines
     815              :      * don't scribble on the input. They do scribble on the other pointers
     816              :      * etc, so doing this with a copy makes that safe.
     817              :      */
     818            3 :     memcpy(&copylex, lex, sizeof(JsonLexContext));
     819            3 :     copylex.need_escapes = false;   /* not interested in values here */
     820            3 :     copylex.lex_level++;
     821              : 
     822            3 :     count = 0;
     823            3 :     result = lex_expect(JSON_PARSE_ARRAY_START, &copylex,
     824              :                         JSON_TOKEN_ARRAY_START);
     825            3 :     if (result != JSON_SUCCESS)
     826            0 :         return result;
     827            3 :     if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
     828              :     {
     829              :         while (1)
     830              :         {
     831           24 :             count++;
     832           24 :             result = parse_array_element(&copylex, &nullSemAction);
     833           24 :             if (result != JSON_SUCCESS)
     834            0 :                 return result;
     835           24 :             if (copylex.token_type != JSON_TOKEN_COMMA)
     836            3 :                 break;
     837           21 :             result = json_lex(&copylex);
     838           21 :             if (result != JSON_SUCCESS)
     839            0 :                 return result;
     840              :         }
     841              :     }
     842            3 :     result = lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex,
     843              :                         JSON_TOKEN_ARRAY_END);
     844            3 :     if (result != JSON_SUCCESS)
     845            0 :         return result;
     846              : 
     847            3 :     *elements = count;
     848            3 :     return JSON_SUCCESS;
     849              : }
     850              : 
     851              : /*
     852              :  * pg_parse_json_incremental
     853              :  *
     854              :  * Routine for incremental parsing of json. This uses the non-recursive top
     855              :  * down method of the Dragon Book Algorithm 4.3. It's somewhat slower than
     856              :  * the Recursive Descent pattern used above, so we only use it for incremental
     857              :  * parsing of JSON.
     858              :  *
     859              :  * The lexing context needs to be set up by a call to
     860              :  * makeJsonLexContextIncremental(). sem is a structure of function pointers
     861              :  * to semantic action routines, which should function exactly as those used
     862              :  * in the recursive descent parser.
     863              :  *
     864              :  * This routine can be called repeatedly with chunks of JSON. On the final
     865              :  * chunk is_last must be set to true. len is the length of the json chunk,
     866              :  * which does not need to be null terminated.
     867              :  */
     868              : JsonParseErrorType
     869       372950 : pg_parse_json_incremental(JsonLexContext *lex,
     870              :                           const JsonSemAction *sem,
     871              :                           const char *json,
     872              :                           size_t len,
     873              :                           bool is_last)
     874              : {
     875              :     JsonTokenType tok;
     876              :     JsonParseErrorType result;
     877       372950 :     JsonParseContext ctx = JSON_PARSE_VALUE;
     878       372950 :     JsonParserStack *pstack = lex->pstack;
     879              : 
     880       372950 :     if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
     881            0 :         return JSON_OUT_OF_MEMORY;
     882       372950 :     if (!lex->incremental)
     883            0 :         return JSON_INVALID_LEXER_TYPE;
     884              : 
     885       372950 :     lex->input = lex->token_terminator = lex->line_start = json;
     886       372950 :     lex->input_length = len;
     887       372950 :     lex->inc_state->is_last_chunk = is_last;
     888       372950 :     lex->inc_state->started = true;
     889              : 
     890              :     /* get the initial token */
     891       372950 :     result = json_lex(lex);
     892       372950 :     if (result != JSON_SUCCESS)
     893        71780 :         return result;
     894              : 
     895       301170 :     tok = lex_peek(lex);
     896              : 
     897              :     /* use prediction stack for incremental parsing */
     898              : 
     899       301170 :     if (!have_prediction(pstack))
     900              :     {
     901         1898 :         td_entry    goal = TD_ENTRY(JSON_PROD_GOAL);
     902              : 
     903         1898 :         push_prediction(pstack, goal);
     904              :     }
     905              : 
     906     23680080 :     while (have_prediction(pstack))
     907              :     {
     908     23678952 :         char        top = pop_prediction(pstack);
     909              :         td_entry    entry;
     910              : 
     911              :         /*
     912              :          * these first two branches are the guts of the Table Driven method
     913              :          */
     914     23678952 :         if (top == tok)
     915              :         {
     916              :             /*
     917              :              * tok can only be a terminal symbol, so top must be too. the
     918              :              * token matches the top of the stack, so get the next token.
     919              :              */
     920      6065280 :             if (tok < JSON_TOKEN_END)
     921              :             {
     922      6064152 :                 result = json_lex(lex);
     923      6064152 :                 if (result != JSON_SUCCESS)
     924       300041 :                     return result;
     925      5764808 :                 tok = lex_peek(lex);
     926              :             }
     927              :         }
     928     17613672 :         else if (IS_NT(top) && (entry = td_parser_table[OFS(top)][tok]).prod != NULL)
     929              :         {
     930              :             /*
     931              :              * the token is in the director set for a production of the
     932              :              * non-terminal at the top of the stack, so push the reversed RHS
     933              :              * of the production onto the stack.
     934              :              */
     935      7386939 :             push_prediction(pstack, entry);
     936              :         }
     937     10226733 :         else if (IS_SEM(top))
     938              :         {
     939              :             /*
     940              :              * top is a semantic action marker, so take action accordingly.
     941              :              * It's important to have these markers in the prediction stack
     942              :              * before any token they might need so we don't advance the token
     943              :              * prematurely. Note in a couple of cases we need to do something
     944              :              * both before and after the token.
     945              :              */
     946     10226292 :             switch (top)
     947              :             {
     948       129311 :                 case JSON_SEM_OSTART:
     949              :                     {
     950       129311 :                         json_struct_action ostart = sem->object_start;
     951              : 
     952       129311 :                         if (lex->lex_level >= JSON_TD_MAX_STACK)
     953            0 :                             return JSON_NESTING_TOO_DEEP;
     954              : 
     955       129311 :                         if (ostart != NULL)
     956              :                         {
     957       118635 :                             result = (*ostart) (sem->semstate);
     958       118635 :                             if (result != JSON_SUCCESS)
     959            0 :                                 return result;
     960              :                         }
     961              : 
     962       129311 :                         if (!inc_lex_level(lex))
     963            0 :                             return JSON_OUT_OF_MEMORY;
     964              :                     }
     965       129311 :                     break;
     966       128946 :                 case JSON_SEM_OEND:
     967              :                     {
     968       128946 :                         json_struct_action oend = sem->object_end;
     969              : 
     970       128946 :                         dec_lex_level(lex);
     971       128946 :                         if (oend != NULL)
     972              :                         {
     973       118634 :                             result = (*oend) (sem->semstate);
     974       118634 :                             if (result != JSON_SUCCESS)
     975            0 :                                 return result;
     976              :                         }
     977              :                     }
     978       128946 :                     break;
     979      2461280 :                 case JSON_SEM_ASTART:
     980              :                     {
     981      2461280 :                         json_struct_action astart = sem->array_start;
     982              : 
     983      2461280 :                         if (lex->lex_level >= JSON_TD_MAX_STACK)
     984          256 :                             return JSON_NESTING_TOO_DEEP;
     985              : 
     986      2461024 :                         if (astart != NULL)
     987              :                         {
     988          284 :                             result = (*astart) (sem->semstate);
     989          284 :                             if (result != JSON_SUCCESS)
     990            0 :                                 return result;
     991              :                         }
     992              : 
     993      2461024 :                         if (!inc_lex_level(lex))
     994            0 :                             return JSON_OUT_OF_MEMORY;
     995              :                     }
     996      2461024 :                     break;
     997       822488 :                 case JSON_SEM_AEND:
     998              :                     {
     999       822488 :                         json_struct_action aend = sem->array_end;
    1000              : 
    1001       822488 :                         dec_lex_level(lex);
    1002       822488 :                         if (aend != NULL)
    1003              :                         {
    1004          284 :                             result = (*aend) (sem->semstate);
    1005          284 :                             if (result != JSON_SUCCESS)
    1006            0 :                                 return result;
    1007              :                         }
    1008              :                     }
    1009       822488 :                     break;
    1010       633132 :                 case JSON_SEM_OFIELD_INIT:
    1011              :                     {
    1012              :                         /*
    1013              :                          * all we do here is save out the field name. We have
    1014              :                          * to wait to get past the ':' to see if the next
    1015              :                          * value is null so we can call the semantic routine
    1016              :                          */
    1017       633132 :                         char       *fname = NULL;
    1018       633132 :                         json_ofield_action ostart = sem->object_field_start;
    1019       633132 :                         json_ofield_action oend = sem->object_field_end;
    1020              : 
    1021       633132 :                         if ((ostart != NULL || oend != NULL) && lex->need_escapes)
    1022              :                         {
    1023       592752 :                             fname = STRDUP(lex->strval->data);
    1024       592752 :                             if (fname == NULL)
    1025            0 :                                 return JSON_OUT_OF_MEMORY;
    1026              :                         }
    1027       633132 :                         set_fname(lex, fname);
    1028              :                     }
    1029       633132 :                     break;
    1030       633004 :                 case JSON_SEM_OFIELD_START:
    1031              :                     {
    1032              :                         /*
    1033              :                          * the current token should be the first token of the
    1034              :                          * value
    1035              :                          */
    1036       633004 :                         bool        isnull = tok == JSON_TOKEN_NULL;
    1037       633004 :                         json_ofield_action ostart = sem->object_field_start;
    1038              : 
    1039       633004 :                         set_fnull(lex, isnull);
    1040              : 
    1041       633004 :                         if (ostart != NULL)
    1042              :                         {
    1043       592752 :                             char       *fname = get_fname(lex);
    1044              : 
    1045       592752 :                             result = (*ostart) (sem->semstate, fname, isnull);
    1046       592752 :                             if (result != JSON_SUCCESS)
    1047            0 :                                 return result;
    1048              :                         }
    1049              :                     }
    1050       633004 :                     break;
    1051       632967 :                 case JSON_SEM_OFIELD_END:
    1052              :                     {
    1053       632967 :                         json_ofield_action oend = sem->object_field_end;
    1054              : 
    1055       632967 :                         if (oend != NULL)
    1056              :                         {
    1057          624 :                             char       *fname = get_fname(lex);
    1058          624 :                             bool        isnull = get_fnull(lex);
    1059              : 
    1060          624 :                             result = (*oend) (sem->semstate, fname, isnull);
    1061          624 :                             if (result != JSON_SUCCESS)
    1062            0 :                                 return result;
    1063              :                         }
    1064              :                     }
    1065       632967 :                     break;
    1066      2583754 :                 case JSON_SEM_AELEM_START:
    1067              :                     {
    1068      2583754 :                         json_aelem_action astart = sem->array_element_start;
    1069      2583754 :                         bool        isnull = tok == JSON_TOKEN_NULL;
    1070              : 
    1071      2583754 :                         set_fnull(lex, isnull);
    1072              : 
    1073      2583754 :                         if (astart != NULL)
    1074              :                         {
    1075          120 :                             result = (*astart) (sem->semstate, isnull);
    1076          120 :                             if (result != JSON_SUCCESS)
    1077            0 :                                 return result;
    1078              :                         }
    1079              :                     }
    1080      2583754 :                     break;
    1081       945354 :                 case JSON_SEM_AELEM_END:
    1082              :                     {
    1083       945354 :                         json_aelem_action aend = sem->array_element_end;
    1084              : 
    1085       945354 :                         if (aend != NULL)
    1086              :                         {
    1087          120 :                             bool        isnull = get_fnull(lex);
    1088              : 
    1089          120 :                             result = (*aend) (sem->semstate, isnull);
    1090          120 :                             if (result != JSON_SUCCESS)
    1091            0 :                                 return result;
    1092              :                         }
    1093              :                     }
    1094       945354 :                     break;
    1095       628028 :                 case JSON_SEM_SCALAR_INIT:
    1096              :                     {
    1097       628028 :                         json_scalar_action sfunc = sem->scalar;
    1098              : 
    1099       628028 :                         pstack->scalar_val = NULL;
    1100              : 
    1101       628028 :                         if (sfunc != NULL)
    1102              :                         {
    1103              :                             /*
    1104              :                              * extract the de-escaped string value, or the raw
    1105              :                              * lexeme
    1106              :                              */
    1107              :                             /*
    1108              :                              * XXX copied from RD parser but looks like a
    1109              :                              * buglet
    1110              :                              */
    1111       592432 :                             if (tok == JSON_TOKEN_STRING)
    1112              :                             {
    1113       473716 :                                 if (lex->need_escapes)
    1114              :                                 {
    1115       473716 :                                     pstack->scalar_val = STRDUP(lex->strval->data);
    1116       473716 :                                     if (pstack->scalar_val == NULL)
    1117            0 :                                         return JSON_OUT_OF_MEMORY;
    1118              :                                 }
    1119              :                             }
    1120              :                             else
    1121              :                             {
    1122       118716 :                                 ptrdiff_t   tlen = (lex->token_terminator - lex->token_start);
    1123              : 
    1124       118716 :                                 pstack->scalar_val = ALLOC(tlen + 1);
    1125       118716 :                                 if (pstack->scalar_val == NULL)
    1126            0 :                                     return JSON_OUT_OF_MEMORY;
    1127              : 
    1128       118716 :                                 memcpy(pstack->scalar_val, lex->token_start, tlen);
    1129       118716 :                                 pstack->scalar_val[tlen] = '\0';
    1130              :                             }
    1131       592432 :                             pstack->scalar_tok = tok;
    1132              :                         }
    1133              :                     }
    1134       628028 :                     break;
    1135       628028 :                 case JSON_SEM_SCALAR_CALL:
    1136              :                     {
    1137              :                         /*
    1138              :                          * We'd like to be able to get rid of this business of
    1139              :                          * two bits of scalar action, but we can't. It breaks
    1140              :                          * certain semantic actions which expect that when
    1141              :                          * called the lexer has consumed the item. See for
    1142              :                          * example get_scalar() in jsonfuncs.c.
    1143              :                          */
    1144       628028 :                         json_scalar_action sfunc = sem->scalar;
    1145              : 
    1146       628028 :                         if (sfunc != NULL)
    1147              :                         {
    1148       592432 :                             result = (*sfunc) (sem->semstate, pstack->scalar_val, pstack->scalar_tok);
    1149              : 
    1150              :                             /*
    1151              :                              * Either ownership of the token passed to the
    1152              :                              * callback, or we need to free it now. Either
    1153              :                              * way, clear our pointer to it so it doesn't get
    1154              :                              * freed in the future.
    1155              :                              */
    1156       592431 :                             if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
    1157          272 :                                 FREE(pstack->scalar_val);
    1158       592431 :                             pstack->scalar_val = NULL;
    1159              : 
    1160       592431 :                             if (result != JSON_SUCCESS)
    1161            0 :                                 return result;
    1162              :                         }
    1163              :                     }
    1164       628027 :                     break;
    1165            0 :                 default:
    1166              :                     /* should not happen */
    1167            0 :                     break;
    1168              :             }
    1169              :         }
    1170              :         else
    1171              :         {
    1172              :             /*
    1173              :              * The token didn't match the stack top if it's a terminal nor a
    1174              :              * production for the stack top if it's a non-terminal.
    1175              :              *
    1176              :              * Various cases here are Asserted to be not possible, as the
    1177              :              * token would not appear at the top of the prediction stack
    1178              :              * unless the lookahead matched.
    1179              :              */
    1180          441 :             switch (top)
    1181              :             {
    1182           76 :                 case JSON_TOKEN_STRING:
    1183           76 :                     if (next_prediction(pstack) == JSON_TOKEN_COLON)
    1184           76 :                         ctx = JSON_PARSE_STRING;
    1185              :                     else
    1186              :                     {
    1187              :                         Assert(false);
    1188            0 :                         ctx = JSON_PARSE_VALUE;
    1189              :                     }
    1190           76 :                     break;
    1191            0 :                 case JSON_TOKEN_NUMBER:
    1192              :                 case JSON_TOKEN_TRUE:
    1193              :                 case JSON_TOKEN_FALSE:
    1194              :                 case JSON_TOKEN_NULL:
    1195              :                 case JSON_TOKEN_ARRAY_START:
    1196              :                 case JSON_TOKEN_OBJECT_START:
    1197              :                     Assert(false);
    1198            0 :                     ctx = JSON_PARSE_VALUE;
    1199            0 :                     break;
    1200            0 :                 case JSON_TOKEN_ARRAY_END:
    1201              :                     Assert(false);
    1202            0 :                     ctx = JSON_PARSE_ARRAY_NEXT;
    1203            0 :                     break;
    1204            0 :                 case JSON_TOKEN_OBJECT_END:
    1205              :                     Assert(false);
    1206            0 :                     ctx = JSON_PARSE_OBJECT_NEXT;
    1207            0 :                     break;
    1208            0 :                 case JSON_TOKEN_COMMA:
    1209              :                     Assert(false);
    1210            0 :                     if (next_prediction(pstack) == JSON_TOKEN_STRING)
    1211            0 :                         ctx = JSON_PARSE_OBJECT_NEXT;
    1212              :                     else
    1213            0 :                         ctx = JSON_PARSE_ARRAY_NEXT;
    1214            0 :                     break;
    1215           52 :                 case JSON_TOKEN_COLON:
    1216           52 :                     ctx = JSON_PARSE_OBJECT_LABEL;
    1217           52 :                     break;
    1218           12 :                 case JSON_TOKEN_END:
    1219           12 :                     ctx = JSON_PARSE_END;
    1220           12 :                     break;
    1221           36 :                 case JSON_NT_MORE_ARRAY_ELEMENTS:
    1222           36 :                     ctx = JSON_PARSE_ARRAY_NEXT;
    1223           36 :                     break;
    1224           28 :                 case JSON_NT_ARRAY_ELEMENTS:
    1225           28 :                     ctx = JSON_PARSE_ARRAY_START;
    1226           28 :                     break;
    1227          140 :                 case JSON_NT_MORE_KEY_PAIRS:
    1228          140 :                     ctx = JSON_PARSE_OBJECT_NEXT;
    1229          140 :                     break;
    1230           60 :                 case JSON_NT_KEY_PAIRS:
    1231           60 :                     ctx = JSON_PARSE_OBJECT_START;
    1232           60 :                     break;
    1233           37 :                 default:
    1234           37 :                     ctx = JSON_PARSE_VALUE;
    1235              :             }
    1236          441 :             return report_parse_error(ctx, lex);
    1237              :         }
    1238              :     }
    1239              : 
    1240         1128 :     return JSON_SUCCESS;
    1241              : }
    1242              : 
    1243              : /*
    1244              :  *  Recursive Descent parse routines. There is one for each structural
    1245              :  *  element in a json document:
    1246              :  *    - scalar (string, number, true, false, null)
    1247              :  *    - array  ( [ ] )
    1248              :  *    - array element
    1249              :  *    - object ( { } )
    1250              :  *    - object field
    1251              :  */
    1252              : static inline JsonParseErrorType
    1253       170156 : parse_scalar(JsonLexContext *lex, const JsonSemAction *sem)
    1254              : {
    1255       170156 :     char       *val = NULL;
    1256       170156 :     json_scalar_action sfunc = sem->scalar;
    1257       170156 :     JsonTokenType tok = lex_peek(lex);
    1258              :     JsonParseErrorType result;
    1259              : 
    1260              :     /* a scalar must be a string, a number, true, false, or null */
    1261       170156 :     if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
    1262        16706 :         tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
    1263              :         tok != JSON_TOKEN_NULL)
    1264           94 :         return report_parse_error(JSON_PARSE_VALUE, lex);
    1265              : 
    1266              :     /* if no semantic function, just consume the token */
    1267       170062 :     if (sfunc == NULL)
    1268         5947 :         return json_lex(lex);
    1269              : 
    1270              :     /* extract the de-escaped string value, or the raw lexeme */
    1271       164115 :     if (lex_peek(lex) == JSON_TOKEN_STRING)
    1272              :     {
    1273        38456 :         if (lex->need_escapes)
    1274              :         {
    1275        35606 :             val = STRDUP(lex->strval->data);
    1276        35606 :             if (val == NULL)
    1277            0 :                 return JSON_OUT_OF_MEMORY;
    1278              :         }
    1279              :     }
    1280              :     else
    1281              :     {
    1282       125659 :         int         len = (lex->token_terminator - lex->token_start);
    1283              : 
    1284       125659 :         val = ALLOC(len + 1);
    1285       125659 :         if (val == NULL)
    1286            0 :             return JSON_OUT_OF_MEMORY;
    1287              : 
    1288       125659 :         memcpy(val, lex->token_start, len);
    1289       125659 :         val[len] = '\0';
    1290              :     }
    1291              : 
    1292              :     /* consume the token */
    1293       164115 :     result = json_lex(lex);
    1294       164115 :     if (result != JSON_SUCCESS)
    1295              :     {
    1296            0 :         FREE(val);
    1297            0 :         return result;
    1298              :     }
    1299              : 
    1300              :     /*
    1301              :      * invoke the callback, which may take ownership of val. For string
    1302              :      * values, val is NULL if need_escapes is false.
    1303              :      */
    1304       164115 :     result = (*sfunc) (sem->semstate, val, tok);
    1305              : 
    1306       163998 :     if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
    1307            0 :         FREE(val);
    1308              : 
    1309       163998 :     return result;
    1310              : }
    1311              : 
    1312              : static JsonParseErrorType
    1313       164547 : parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
    1314              : {
    1315              :     /*
    1316              :      * An object field is "fieldname" : value where value can be a scalar,
    1317              :      * object or array.  Note: in user-facing docs and error messages, we
    1318              :      * generally call a field name a "key".
    1319              :      */
    1320              : 
    1321       164547 :     char       *fname = NULL;
    1322       164547 :     json_ofield_action ostart = sem->object_field_start;
    1323       164547 :     json_ofield_action oend = sem->object_field_end;
    1324              :     bool        isnull;
    1325              :     JsonTokenType tok;
    1326              :     JsonParseErrorType result;
    1327              : 
    1328       164547 :     if (lex_peek(lex) != JSON_TOKEN_STRING)
    1329            6 :         return report_parse_error(JSON_PARSE_STRING, lex);
    1330       164541 :     if ((ostart != NULL || oend != NULL) && lex->need_escapes)
    1331              :     {
    1332              :         /* fname is NULL if need_escapes is false */
    1333       130411 :         fname = STRDUP(lex->strval->data);
    1334       130411 :         if (fname == NULL)
    1335            0 :             return JSON_OUT_OF_MEMORY;
    1336              :     }
    1337       164541 :     result = json_lex(lex);
    1338       164541 :     if (result != JSON_SUCCESS)
    1339              :     {
    1340            6 :         FREE(fname);
    1341            6 :         return result;
    1342              :     }
    1343              : 
    1344       164535 :     result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
    1345       164535 :     if (result != JSON_SUCCESS)
    1346              :     {
    1347           72 :         FREE(fname);
    1348           72 :         return result;
    1349              :     }
    1350              : 
    1351       164463 :     tok = lex_peek(lex);
    1352       164463 :     isnull = tok == JSON_TOKEN_NULL;
    1353              : 
    1354       164463 :     if (ostart != NULL)
    1355              :     {
    1356       130342 :         result = (*ostart) (sem->semstate, fname, isnull);
    1357       130311 :         if (result != JSON_SUCCESS)
    1358           27 :             goto ofield_cleanup;
    1359              :     }
    1360              : 
    1361       164405 :     switch (tok)
    1362              :     {
    1363         5614 :         case JSON_TOKEN_OBJECT_START:
    1364         5614 :             result = parse_object(lex, sem);
    1365         2174 :             break;
    1366         8226 :         case JSON_TOKEN_ARRAY_START:
    1367         8226 :             result = parse_array(lex, sem);
    1368         8149 :             break;
    1369       150565 :         default:
    1370       150565 :             result = parse_scalar(lex, sem);
    1371              :     }
    1372       160846 :     if (result != JSON_SUCCESS)
    1373          147 :         goto ofield_cleanup;
    1374              : 
    1375       160699 :     if (oend != NULL)
    1376              :     {
    1377        91777 :         result = (*oend) (sem->semstate, fname, isnull);
    1378        91777 :         if (result != JSON_SUCCESS)
    1379            0 :             goto ofield_cleanup;
    1380              :     }
    1381              : 
    1382       160699 : ofield_cleanup:
    1383       160873 :     if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
    1384            0 :         FREE(fname);
    1385       160873 :     return result;
    1386              : }
    1387              : 
    1388              : static JsonParseErrorType
    1389        26421 : parse_object(JsonLexContext *lex, const JsonSemAction *sem)
    1390              : {
    1391              :     /*
    1392              :      * an object is a possibly empty sequence of object fields, separated by
    1393              :      * commas and surrounded by curly braces.
    1394              :      */
    1395        26421 :     json_struct_action ostart = sem->object_start;
    1396        26421 :     json_struct_action oend = sem->object_end;
    1397              :     JsonTokenType tok;
    1398              :     JsonParseErrorType result;
    1399              : 
    1400              : #ifndef FRONTEND
    1401              : 
    1402              :     /*
    1403              :      * TODO: clients need some way to put a bound on stack growth. Parse level
    1404              :      * limits maybe?
    1405              :      */
    1406        23459 :     check_stack_depth();
    1407              : #endif
    1408              : 
    1409        26415 :     if (ostart != NULL)
    1410              :     {
    1411        16688 :         result = (*ostart) (sem->semstate);
    1412        16645 :         if (result != JSON_SUCCESS)
    1413           33 :             return result;
    1414              :     }
    1415              : 
    1416              :     /*
    1417              :      * Data inside an object is at a higher nesting level than the object
    1418              :      * itself. Note that we increment this after we call the semantic routine
    1419              :      * for the object start and restore it before we call the routine for the
    1420              :      * object end.
    1421              :      */
    1422        26339 :     lex->lex_level++;
    1423              : 
    1424              :     Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START);
    1425        26339 :     result = json_lex(lex);
    1426        26339 :     if (result != JSON_SUCCESS)
    1427           43 :         return result;
    1428              : 
    1429        26296 :     tok = lex_peek(lex);
    1430        26296 :     switch (tok)
    1431              :     {
    1432        24743 :         case JSON_TOKEN_STRING:
    1433        24743 :             result = parse_object_field(lex, sem);
    1434       160957 :             while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
    1435              :             {
    1436       139810 :                 result = json_lex(lex);
    1437       139810 :                 if (result != JSON_SUCCESS)
    1438            6 :                     break;
    1439       139804 :                 result = parse_object_field(lex, sem);
    1440              :             }
    1441        21153 :             break;
    1442         1546 :         case JSON_TOKEN_OBJECT_END:
    1443         1546 :             break;
    1444            7 :         default:
    1445              :             /* case of an invalid initial token inside the object */
    1446            7 :             result = report_parse_error(JSON_PARSE_OBJECT_START, lex);
    1447              :     }
    1448        22706 :     if (result != JSON_SUCCESS)
    1449          271 :         return result;
    1450              : 
    1451        22435 :     result = lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
    1452        22435 :     if (result != JSON_SUCCESS)
    1453           18 :         return result;
    1454              : 
    1455        22417 :     lex->lex_level--;
    1456              : 
    1457        22417 :     if (oend != NULL)
    1458              :     {
    1459        13316 :         result = (*oend) (sem->semstate);
    1460        13263 :         if (result != JSON_SUCCESS)
    1461           30 :             return result;
    1462              :     }
    1463              : 
    1464        22334 :     return JSON_SUCCESS;
    1465              : }
    1466              : 
    1467              : static JsonParseErrorType
    1468        31192 : parse_array_element(JsonLexContext *lex, const JsonSemAction *sem)
    1469              : {
    1470        31192 :     json_aelem_action astart = sem->array_element_start;
    1471        31192 :     json_aelem_action aend = sem->array_element_end;
    1472        31192 :     JsonTokenType tok = lex_peek(lex);
    1473              :     JsonParseErrorType result;
    1474              :     bool        isnull;
    1475              : 
    1476        31192 :     isnull = tok == JSON_TOKEN_NULL;
    1477              : 
    1478        31192 :     if (astart != NULL)
    1479              :     {
    1480         5606 :         result = (*astart) (sem->semstate, isnull);
    1481         5594 :         if (result != JSON_SUCCESS)
    1482           12 :             return result;
    1483              :     }
    1484              : 
    1485              :     /* an array element is any object, array or scalar */
    1486        31168 :     switch (tok)
    1487              :     {
    1488        10268 :         case JSON_TOKEN_OBJECT_START:
    1489        10268 :             result = parse_object(lex, sem);
    1490        10062 :             break;
    1491         6438 :         case JSON_TOKEN_ARRAY_START:
    1492         6438 :             result = parse_array(lex, sem);
    1493         2147 :             break;
    1494        14462 :         default:
    1495        14462 :             result = parse_scalar(lex, sem);
    1496              :     }
    1497              : 
    1498        26638 :     if (result != JSON_SUCCESS)
    1499          288 :         return result;
    1500              : 
    1501        26350 :     if (aend != NULL)
    1502              :     {
    1503         3614 :         result = (*aend) (sem->semstate, isnull);
    1504         3608 :         if (result != JSON_SUCCESS)
    1505            0 :             return result;
    1506              :     }
    1507              : 
    1508        26344 :     return JSON_SUCCESS;
    1509              : }
    1510              : 
    1511              : static JsonParseErrorType
    1512        18992 : parse_array(JsonLexContext *lex, const JsonSemAction *sem)
    1513              : {
    1514              :     /*
    1515              :      * an array is a possibly empty sequence of array elements, separated by
    1516              :      * commas and surrounded by square brackets.
    1517              :      */
    1518        18992 :     json_struct_action astart = sem->array_start;
    1519        18992 :     json_struct_action aend = sem->array_end;
    1520              :     JsonParseErrorType result;
    1521              : 
    1522              : #ifndef FRONTEND
    1523        18964 :     check_stack_depth();
    1524              : #endif
    1525              : 
    1526        18986 :     if (astart != NULL)
    1527              :     {
    1528        10042 :         result = (*astart) (sem->semstate);
    1529        10017 :         if (result != JSON_SUCCESS)
    1530           18 :             return result;
    1531              :     }
    1532              : 
    1533              :     /*
    1534              :      * Data inside an array is at a higher nesting level than the array
    1535              :      * itself. Note that we increment this after we call the semantic routine
    1536              :      * for the array start and restore it before we call the routine for the
    1537              :      * array end.
    1538              :      */
    1539        18943 :     lex->lex_level++;
    1540              : 
    1541        18943 :     result = lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
    1542        18943 :     if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
    1543              :     {
    1544        15169 :         result = parse_array_element(lex, sem);
    1545              : 
    1546        26620 :         while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
    1547              :         {
    1548        15999 :             result = json_lex(lex);
    1549        15999 :             if (result != JSON_SUCCESS)
    1550            0 :                 break;
    1551        15999 :             result = parse_array_element(lex, sem);
    1552              :         }
    1553              :     }
    1554        14395 :     if (result != JSON_SUCCESS)
    1555          312 :         return result;
    1556              : 
    1557        14083 :     result = lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
    1558        14083 :     if (result != JSON_SUCCESS)
    1559           12 :         return result;
    1560              : 
    1561        14071 :     lex->lex_level--;
    1562              : 
    1563        14071 :     if (aend != NULL)
    1564              :     {
    1565         6265 :         result = (*aend) (sem->semstate);
    1566         6241 :         if (result != JSON_SUCCESS)
    1567           12 :             return result;
    1568              :     }
    1569              : 
    1570        14035 :     return JSON_SUCCESS;
    1571              : }
    1572              : 
    1573              : /*
    1574              :  * Lex one token from the input stream.
    1575              :  *
    1576              :  * When doing incremental parsing, we can reach the end of the input string
    1577              :  * without having (or knowing we have) a complete token. If it's not the
    1578              :  * final chunk of input, the partial token is then saved to the lex
    1579              :  * structure's ptok StringInfo. On subsequent calls input is appended to this
    1580              :  * buffer until we have something that we think is a complete token,
    1581              :  * which is then lexed using a recursive call to json_lex. Processing then
    1582              :  * continues as normal on subsequent calls.
    1583              :  *
    1584              :  * Note than when doing incremental processing, the lex.prev_token_terminator
    1585              :  * should not be relied on. It could point into a previous input chunk or
    1586              :  * worse.
    1587              :  */
    1588              : JsonParseErrorType
    1589      7248233 : json_lex(JsonLexContext *lex)
    1590              : {
    1591              :     const char *s;
    1592      7248233 :     const char *const end = lex->input + lex->input_length;
    1593              :     JsonParseErrorType result;
    1594              : 
    1595      7248233 :     if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
    1596            0 :         return JSON_OUT_OF_MEMORY;
    1597              : 
    1598      7248233 :     if (lex->incremental)
    1599              :     {
    1600      6437102 :         if (lex->inc_state->partial_completed)
    1601              :         {
    1602              :             /*
    1603              :              * We just lexed a completed partial token on the last call, so
    1604              :              * reset everything
    1605              :              */
    1606        34431 :             jsonapi_resetStringInfo(&(lex->inc_state->partial_token));
    1607        34431 :             lex->token_terminator = lex->input;
    1608        34431 :             lex->inc_state->partial_completed = false;
    1609              :         }
    1610              : 
    1611              : #ifdef JSONAPI_USE_PQEXPBUFFER
    1612              :         /* Make sure our partial token buffer is valid before using it below. */
    1613      1915760 :         if (PQExpBufferDataBroken(lex->inc_state->partial_token))
    1614            0 :             return JSON_OUT_OF_MEMORY;
    1615              : #endif
    1616              :     }
    1617              : 
    1618      7248233 :     s = lex->token_terminator;
    1619              : 
    1620      7248233 :     if (lex->incremental && lex->inc_state->partial_token.len)
    1621              :     {
    1622              :         /*
    1623              :          * We have a partial token. Extend it and if completed lex it by a
    1624              :          * recursive call
    1625              :          */
    1626        83203 :         jsonapi_StrValType *ptok = &(lex->inc_state->partial_token);
    1627        83203 :         size_t      added = 0;
    1628        83203 :         bool        tok_done = false;
    1629        83203 :         JsonLexContext dummy_lex = {0};
    1630              :         JsonParseErrorType partial_result;
    1631              : 
    1632        83203 :         if (ptok->data[0] == '"')
    1633              :         {
    1634              :             /*
    1635              :              * It's a string. Accumulate characters until we reach an
    1636              :              * unescaped '"'.
    1637              :              */
    1638        80078 :             int         escapes = 0;
    1639              : 
    1640        81362 :             for (int i = ptok->len - 1; i > 0; i--)
    1641              :             {
    1642              :                 /* count the trailing backslashes on the partial token */
    1643        75670 :                 if (ptok->data[i] == '\\')
    1644         1284 :                     escapes++;
    1645              :                 else
    1646        74386 :                     break;
    1647              :             }
    1648              : 
    1649       607145 :             for (size_t i = 0; i < lex->input_length; i++)
    1650              :             {
    1651       559573 :                 char        c = lex->input[i];
    1652              : 
    1653       559573 :                 jsonapi_appendStringInfoCharMacro(ptok, c);
    1654       559573 :                 added++;
    1655       559573 :                 if (c == '"' && escapes % 2 == 0)
    1656              :                 {
    1657        32506 :                     tok_done = true;
    1658        32506 :                     break;
    1659              :                 }
    1660       527067 :                 if (c == '\\')
    1661         2048 :                     escapes++;
    1662              :                 else
    1663       525019 :                     escapes = 0;
    1664              :             }
    1665              :         }
    1666              :         else
    1667              :         {
    1668              :             /* not a string */
    1669         3125 :             char        c = ptok->data[0];
    1670              : 
    1671         3125 :             if (c == '-' || (c >= '0' && c <= '9'))
    1672              :             {
    1673              :                 /* for numbers look for possible numeric continuations */
    1674              : 
    1675          657 :                 bool        numend = false;
    1676              : 
    1677         1842 :                 for (size_t i = 0; i < lex->input_length && !numend; i++)
    1678              :                 {
    1679         1185 :                     char        cc = lex->input[i];
    1680              : 
    1681         1185 :                     switch (cc)
    1682              :                     {
    1683          796 :                         case '+':
    1684              :                         case '-':
    1685              :                         case 'e':
    1686              :                         case 'E':
    1687              :                         case '0':
    1688              :                         case '1':
    1689              :                         case '2':
    1690              :                         case '3':
    1691              :                         case '4':
    1692              :                         case '5':
    1693              :                         case '6':
    1694              :                         case '7':
    1695              :                         case '8':
    1696              :                         case '9':
    1697              :                             {
    1698          796 :                                 jsonapi_appendStringInfoCharMacro(ptok, cc);
    1699          796 :                                 added++;
    1700              :                             }
    1701          796 :                             break;
    1702          389 :                         default:
    1703          389 :                             numend = true;
    1704              :                     }
    1705              :                 }
    1706              :             }
    1707              : 
    1708              :             /*
    1709              :              * Add any remaining alphanumeric chars. This takes care of the
    1710              :              * {null, false, true} literals as well as any trailing
    1711              :              * alphanumeric junk on non-string tokens.
    1712              :              */
    1713         6449 :             for (size_t i = added; i < lex->input_length; i++)
    1714              :             {
    1715         5469 :                 char        cc = lex->input[i];
    1716              : 
    1717         5469 :                 if (JSON_ALPHANUMERIC_CHAR(cc))
    1718              :                 {
    1719         3324 :                     jsonapi_appendStringInfoCharMacro(ptok, cc);
    1720         3324 :                     added++;
    1721              :                 }
    1722              :                 else
    1723              :                 {
    1724         2145 :                     tok_done = true;
    1725         2145 :                     break;
    1726              :                 }
    1727              :             }
    1728         3125 :             if (added == lex->input_length &&
    1729          980 :                 lex->inc_state->is_last_chunk)
    1730              :             {
    1731           76 :                 tok_done = true;
    1732              :             }
    1733              :         }
    1734              : 
    1735        83203 :         if (!tok_done)
    1736              :         {
    1737              :             /* We should have consumed the whole chunk in this case. */
    1738              :             Assert(added == lex->input_length);
    1739              : 
    1740        48476 :             if (!lex->inc_state->is_last_chunk)
    1741        48436 :                 return JSON_INCOMPLETE;
    1742              : 
    1743              :             /* json_errdetail() needs access to the accumulated token. */
    1744           40 :             lex->token_start = ptok->data;
    1745           40 :             lex->token_terminator = ptok->data + ptok->len;
    1746           40 :             return JSON_INVALID_TOKEN;
    1747              :         }
    1748              : 
    1749              :         /*
    1750              :          * Everything up to lex->input[added] has been added to the partial
    1751              :          * token, so move the input past it.
    1752              :          */
    1753        34727 :         lex->input += added;
    1754        34727 :         lex->input_length -= added;
    1755              : 
    1756        34727 :         dummy_lex.input = dummy_lex.token_terminator =
    1757        34727 :             dummy_lex.line_start = ptok->data;
    1758        34727 :         dummy_lex.line_number = lex->line_number;
    1759        34727 :         dummy_lex.input_length = ptok->len;
    1760        34727 :         dummy_lex.input_encoding = lex->input_encoding;
    1761        34727 :         dummy_lex.incremental = false;
    1762        34727 :         dummy_lex.need_escapes = lex->need_escapes;
    1763        34727 :         dummy_lex.strval = lex->strval;
    1764              : 
    1765        34727 :         partial_result = json_lex(&dummy_lex);
    1766              : 
    1767              :         /*
    1768              :          * We either have a complete token or an error. In either case we need
    1769              :          * to point to the partial token data for the semantic or error
    1770              :          * routines. If it's not an error we'll readjust on the next call to
    1771              :          * json_lex.
    1772              :          */
    1773        34727 :         lex->token_type = dummy_lex.token_type;
    1774        34727 :         lex->line_number = dummy_lex.line_number;
    1775              : 
    1776              :         /*
    1777              :          * We know the prev_token_terminator must be back in some previous
    1778              :          * piece of input, so we just make it NULL.
    1779              :          */
    1780        34727 :         lex->prev_token_terminator = NULL;
    1781              : 
    1782              :         /*
    1783              :          * Normally token_start would be ptok->data, but it could be later,
    1784              :          * see json_lex_string's handling of invalid escapes.
    1785              :          */
    1786        34727 :         lex->token_start = dummy_lex.token_start;
    1787        34727 :         lex->token_terminator = dummy_lex.token_terminator;
    1788        34727 :         if (partial_result == JSON_SUCCESS)
    1789              :         {
    1790              :             /* make sure we've used all the input */
    1791        34619 :             if (lex->token_terminator - lex->token_start != ptok->len)
    1792              :             {
    1793              :                 Assert(false);
    1794            0 :                 return JSON_INVALID_TOKEN;
    1795              :             }
    1796              : 
    1797        34619 :             lex->inc_state->partial_completed = true;
    1798              :         }
    1799        34727 :         return partial_result;
    1800              :         /* end of partial token processing */
    1801              :     }
    1802              : 
    1803              :     /* Skip leading whitespace. */
    1804     11560687 :     while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
    1805              :     {
    1806      4395657 :         if (*s++ == '\n')
    1807              :         {
    1808       292060 :             ++lex->line_number;
    1809       292060 :             lex->line_start = s;
    1810              :         }
    1811              :     }
    1812      7165030 :     lex->token_start = s;
    1813              : 
    1814              :     /* Determine token type. */
    1815      7165030 :     if (s >= end)
    1816              :     {
    1817       327362 :         lex->token_start = NULL;
    1818       327362 :         lex->prev_token_terminator = lex->token_terminator;
    1819       327362 :         lex->token_terminator = s;
    1820       327362 :         lex->token_type = JSON_TOKEN_END;
    1821              :     }
    1822              :     else
    1823              :     {
    1824      6837668 :         switch (*s)
    1825              :         {
    1826              :                 /* Single-character token, some kind of punctuation mark. */
    1827       155908 :             case '{':
    1828       155908 :                 lex->prev_token_terminator = lex->token_terminator;
    1829       155908 :                 lex->token_terminator = s + 1;
    1830       155908 :                 lex->token_type = JSON_TOKEN_OBJECT_START;
    1831       155908 :                 break;
    1832       151463 :             case '}':
    1833       151463 :                 lex->prev_token_terminator = lex->token_terminator;
    1834       151463 :                 lex->token_terminator = s + 1;
    1835       151463 :                 lex->token_type = JSON_TOKEN_OBJECT_END;
    1836       151463 :                 break;
    1837      2480365 :             case '[':
    1838      2480365 :                 lex->prev_token_terminator = lex->token_terminator;
    1839      2480365 :                 lex->token_terminator = s + 1;
    1840      2480365 :                 lex->token_type = JSON_TOKEN_ARRAY_START;
    1841      2480365 :                 break;
    1842       836742 :             case ']':
    1843       836742 :                 lex->prev_token_terminator = lex->token_terminator;
    1844       836742 :                 lex->token_terminator = s + 1;
    1845       836742 :                 lex->token_type = JSON_TOKEN_ARRAY_END;
    1846       836742 :                 break;
    1847       784237 :             case ',':
    1848       784237 :                 lex->prev_token_terminator = lex->token_terminator;
    1849       784237 :                 lex->token_terminator = s + 1;
    1850       784237 :                 lex->token_type = JSON_TOKEN_COMMA;
    1851       784237 :                 break;
    1852       797539 :             case ':':
    1853       797539 :                 lex->prev_token_terminator = lex->token_terminator;
    1854       797539 :                 lex->token_terminator = s + 1;
    1855       797539 :                 lex->token_type = JSON_TOKEN_COLON;
    1856       797539 :                 break;
    1857      1371731 :             case '"':
    1858              :                 /* string */
    1859      1371731 :                 result = json_lex_string(lex);
    1860      1371731 :                 if (result != JSON_SUCCESS)
    1861        32783 :                     return result;
    1862      1338948 :                 lex->token_type = JSON_TOKEN_STRING;
    1863      1338948 :                 break;
    1864          349 :             case '-':
    1865              :                 /* Negative number. */
    1866          349 :                 result = json_lex_number(lex, s + 1, NULL, NULL);
    1867          349 :                 if (result != JSON_SUCCESS)
    1868            3 :                     return result;
    1869          346 :                 lex->token_type = JSON_TOKEN_NUMBER;
    1870          346 :                 break;
    1871       229065 :             case '0':
    1872              :             case '1':
    1873              :             case '2':
    1874              :             case '3':
    1875              :             case '4':
    1876              :             case '5':
    1877              :             case '6':
    1878              :             case '7':
    1879              :             case '8':
    1880              :             case '9':
    1881              :                 /* Positive number. */
    1882       229065 :                 result = json_lex_number(lex, s, NULL, NULL);
    1883       229065 :                 if (result != JSON_SUCCESS)
    1884          453 :                     return result;
    1885       228612 :                 lex->token_type = JSON_TOKEN_NUMBER;
    1886       228612 :                 break;
    1887        30269 :             default:
    1888              :                 {
    1889              :                     const char *p;
    1890              : 
    1891              :                     /*
    1892              :                      * We're not dealing with a string, number, legal
    1893              :                      * punctuation mark, or end of string.  The only legal
    1894              :                      * tokens we might find here are true, false, and null,
    1895              :                      * but for error reporting purposes we scan until we see a
    1896              :                      * non-alphanumeric character.  That way, we can report
    1897              :                      * the whole word as an unexpected token, rather than just
    1898              :                      * some unintuitive prefix thereof.
    1899              :                      */
    1900       163357 :                     for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++)
    1901              :                          /* skip */ ;
    1902              : 
    1903              :                     /*
    1904              :                      * We got some sort of unexpected punctuation or an
    1905              :                      * otherwise unexpected character, so just complain about
    1906              :                      * that one character.
    1907              :                      */
    1908        30269 :                     if (p == s)
    1909              :                     {
    1910           40 :                         lex->prev_token_terminator = lex->token_terminator;
    1911           40 :                         lex->token_terminator = s + 1;
    1912           40 :                         return JSON_INVALID_TOKEN;
    1913              :                     }
    1914              : 
    1915        30229 :                     if (lex->incremental && !lex->inc_state->is_last_chunk &&
    1916         7832 :                         p == lex->input + lex->input_length)
    1917              :                     {
    1918         1832 :                         jsonapi_appendBinaryStringInfo(&(lex->inc_state->partial_token), s, end - s);
    1919         1832 :                         return JSON_INCOMPLETE;
    1920              :                     }
    1921              : 
    1922              :                     /*
    1923              :                      * We've got a real alphanumeric token here.  If it
    1924              :                      * happens to be true, false, or null, all is well.  If
    1925              :                      * not, error out.
    1926              :                      */
    1927        28397 :                     lex->prev_token_terminator = lex->token_terminator;
    1928        28397 :                     lex->token_terminator = p;
    1929        28397 :                     if (p - s == 4)
    1930              :                     {
    1931        13186 :                         if (memcmp(s, "true", 4) == 0)
    1932         3700 :                             lex->token_type = JSON_TOKEN_TRUE;
    1933         9486 :                         else if (memcmp(s, "null", 4) == 0)
    1934         9480 :                             lex->token_type = JSON_TOKEN_NULL;
    1935              :                         else
    1936            6 :                             return JSON_INVALID_TOKEN;
    1937              :                     }
    1938        15211 :                     else if (p - s == 5 && memcmp(s, "false", 5) == 0)
    1939        15083 :                         lex->token_type = JSON_TOKEN_FALSE;
    1940              :                     else
    1941          128 :                         return JSON_INVALID_TOKEN;
    1942              :                 }
    1943              :         }                       /* end of switch */
    1944              :     }
    1945              : 
    1946      7129785 :     if (lex->incremental && lex->token_type == JSON_TOKEN_END && !lex->inc_state->is_last_chunk)
    1947       287665 :         return JSON_INCOMPLETE;
    1948              :     else
    1949      6842120 :         return JSON_SUCCESS;
    1950              : }
    1951              : 
    1952              : /*
    1953              :  * The next token in the input stream is known to be a string; lex it.
    1954              :  *
    1955              :  * If lex->strval isn't NULL, fill it with the decoded string.
    1956              :  * Set lex->token_terminator to the end of the decoded input, and in
    1957              :  * success cases, transfer its previous value to lex->prev_token_terminator.
    1958              :  * Return JSON_SUCCESS or an error code.
    1959              :  *
    1960              :  * Note: be careful that all error exits advance lex->token_terminator
    1961              :  * to the point after the character we detected the error on.
    1962              :  */
    1963              : static inline JsonParseErrorType
    1964      1371731 : json_lex_string(JsonLexContext *lex)
    1965              : {
    1966              :     const char *s;
    1967      1371731 :     const char *const end = lex->input + lex->input_length;
    1968      1371731 :     int         hi_surrogate = -1;
    1969              : 
    1970              :     /* Convenience macros for error exits */
    1971              : #define FAIL_OR_INCOMPLETE_AT_CHAR_START(code) \
    1972              :     do { \
    1973              :         if (lex->incremental && !lex->inc_state->is_last_chunk) \
    1974              :         { \
    1975              :             jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token, \
    1976              :                                            lex->token_start, \
    1977              :                                            end - lex->token_start); \
    1978              :             return JSON_INCOMPLETE; \
    1979              :         } \
    1980              :         lex->token_terminator = s; \
    1981              :         return code; \
    1982              :     } while (0)
    1983              : #define FAIL_AT_CHAR_END(code) \
    1984              :     do { \
    1985              :         ptrdiff_t   remaining = end - s; \
    1986              :         int         charlen; \
    1987              :         charlen = pg_encoding_mblen_or_incomplete(lex->input_encoding, \
    1988              :                                                   s, remaining); \
    1989              :         lex->token_terminator = (charlen <= remaining) ? s + charlen : end; \
    1990              :         return code; \
    1991              :     } while (0)
    1992              : 
    1993      1371731 :     if (lex->need_escapes)
    1994              :     {
    1995              : #ifdef JSONAPI_USE_PQEXPBUFFER
    1996              :         /* make sure initialization succeeded */
    1997          668 :         if (lex->strval == NULL)
    1998            0 :             return JSON_OUT_OF_MEMORY;
    1999              : #endif
    2000      1239546 :         jsonapi_resetStringInfo(lex->strval);
    2001              :     }
    2002              : 
    2003              :     Assert(lex->input_length > 0);
    2004      1371731 :     s = lex->token_start;
    2005              :     for (;;)
    2006              :     {
    2007      2743889 :         s++;
    2008              :         /* Premature end of the string. */
    2009      2743889 :         if (s >= end)
    2010        32332 :             FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
    2011      2711557 :         else if (*s == '"')
    2012      1338996 :             break;
    2013      1372561 :         else if (*s == '\\')
    2014              :         {
    2015              :             /* OK, we have an escape character. */
    2016         5267 :             s++;
    2017         5267 :             if (s >= end)
    2018           96 :                 FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
    2019         5171 :             else if (*s == 'u')
    2020              :             {
    2021              :                 int         i;
    2022         1991 :                 int         ch = 0;
    2023              : 
    2024         9581 :                 for (i = 1; i <= 4; i++)
    2025              :                 {
    2026         7737 :                     s++;
    2027         7737 :                     if (s >= end)
    2028          128 :                         FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
    2029         7609 :                     else if (*s >= '0' && *s <= '9')
    2030         4711 :                         ch = (ch * 16) + (*s - '0');
    2031         2898 :                     else if (*s >= 'a' && *s <= 'f')
    2032         2867 :                         ch = (ch * 16) + (*s - 'a') + 10;
    2033           31 :                     else if (*s >= 'A' && *s <= 'F')
    2034           12 :                         ch = (ch * 16) + (*s - 'A') + 10;
    2035              :                     else
    2036           19 :                         FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT);
    2037              :                 }
    2038         1844 :                 if (lex->need_escapes)
    2039              :                 {
    2040              :                     /*
    2041              :                      * Combine surrogate pairs.
    2042              :                      */
    2043          178 :                     if (is_utf16_surrogate_first(ch))
    2044              :                     {
    2045           84 :                         if (hi_surrogate != -1)
    2046            6 :                             FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE);
    2047           78 :                         hi_surrogate = ch;
    2048           78 :                         continue;
    2049              :                     }
    2050           94 :                     else if (is_utf16_surrogate_second(ch))
    2051              :                     {
    2052           30 :                         if (hi_surrogate == -1)
    2053           12 :                             FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2054           18 :                         ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
    2055           18 :                         hi_surrogate = -1;
    2056              :                     }
    2057              : 
    2058           82 :                     if (hi_surrogate != -1)
    2059            0 :                         FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2060              : 
    2061              :                     /*
    2062              :                      * Reject invalid cases.  We can't have a value above
    2063              :                      * 0xFFFF here (since we only accepted 4 hex digits
    2064              :                      * above), so no need to test for out-of-range chars.
    2065              :                      */
    2066           82 :                     if (ch == 0)
    2067              :                     {
    2068              :                         /* We can't allow this, since our TEXT type doesn't */
    2069           12 :                         FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO);
    2070              :                     }
    2071              : 
    2072              :                     /*
    2073              :                      * Add the represented character to lex->strval.  In the
    2074              :                      * backend, we can let pg_unicode_to_server_noerror()
    2075              :                      * handle any required character set conversion; in
    2076              :                      * frontend, we can only deal with trivial conversions.
    2077              :                      */
    2078              : #ifndef FRONTEND
    2079              :                     {
    2080              :                         char        cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
    2081              : 
    2082           42 :                         if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
    2083            0 :                             FAIL_AT_CHAR_END(JSON_UNICODE_UNTRANSLATABLE);
    2084           42 :                         appendStringInfoString(lex->strval, cbuf);
    2085              :                     }
    2086              : #else
    2087           28 :                     if (lex->input_encoding == PG_UTF8)
    2088              :                     {
    2089              :                         /* OK, we can map the code point to UTF8 easily */
    2090              :                         char        utf8str[5];
    2091              :                         int         utf8len;
    2092              : 
    2093           28 :                         unicode_to_utf8(ch, (unsigned char *) utf8str);
    2094           28 :                         utf8len = pg_utf_mblen((unsigned char *) utf8str);
    2095           28 :                         jsonapi_appendBinaryStringInfo(lex->strval, utf8str, utf8len);
    2096              :                     }
    2097            0 :                     else if (ch <= 0x007f)
    2098              :                     {
    2099              :                         /* The ASCII range is the same in all encodings */
    2100            0 :                         jsonapi_appendStringInfoChar(lex->strval, (char) ch);
    2101              :                     }
    2102              :                     else
    2103            0 :                         FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE);
    2104              : #endif                          /* FRONTEND */
    2105              :                 }
    2106              :             }
    2107         3180 :             else if (lex->need_escapes)
    2108              :             {
    2109          393 :                 if (hi_surrogate != -1)
    2110            0 :                     FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2111              : 
    2112          393 :                 switch (*s)
    2113              :                 {
    2114          283 :                     case '"':
    2115              :                     case '\\':
    2116              :                     case '/':
    2117          283 :                         jsonapi_appendStringInfoChar(lex->strval, *s);
    2118          283 :                         break;
    2119           22 :                     case 'b':
    2120           22 :                         jsonapi_appendStringInfoChar(lex->strval, '\b');
    2121           22 :                         break;
    2122            4 :                     case 'f':
    2123            4 :                         jsonapi_appendStringInfoChar(lex->strval, '\f');
    2124            4 :                         break;
    2125           31 :                     case 'n':
    2126           31 :                         jsonapi_appendStringInfoChar(lex->strval, '\n');
    2127           31 :                         break;
    2128            4 :                     case 'r':
    2129            4 :                         jsonapi_appendStringInfoChar(lex->strval, '\r');
    2130            4 :                         break;
    2131           46 :                     case 't':
    2132           46 :                         jsonapi_appendStringInfoChar(lex->strval, '\t');
    2133           46 :                         break;
    2134            3 :                     default:
    2135              : 
    2136              :                         /*
    2137              :                          * Not a valid string escape, so signal error.  We
    2138              :                          * adjust token_start so that just the escape sequence
    2139              :                          * is reported, not the whole string.
    2140              :                          */
    2141            3 :                         lex->token_start = s;
    2142            3 :                         FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
    2143              :                 }
    2144              :             }
    2145         2787 :             else if (strchr("\"\\/bfnrt", *s) == NULL)
    2146              :             {
    2147              :                 /*
    2148              :                  * Simpler processing if we're not bothered about de-escaping
    2149              :                  *
    2150              :                  * It's very tempting to remove the strchr() call here and
    2151              :                  * replace it with a switch statement, but testing so far has
    2152              :                  * shown it's not a performance win.
    2153              :                  */
    2154           63 :                 lex->token_start = s;
    2155           63 :                 FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
    2156              :             }
    2157              :         }
    2158              :         else
    2159              :         {
    2160      1367294 :             const char *p = s;
    2161              : 
    2162      1367294 :             if (hi_surrogate != -1)
    2163            6 :                 FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2164              : 
    2165              :             /*
    2166              :              * Skip to the first byte that requires special handling, so we
    2167              :              * can batch calls to jsonapi_appendBinaryStringInfo.
    2168              :              */
    2169      1367288 :             while (p < end - sizeof(Vector8) &&
    2170      1679240 :                    !pg_lfind8('\\', (const uint8 *) p, sizeof(Vector8)) &&
    2171      3447293 :                    !pg_lfind8('"', (const uint8 *) p, sizeof(Vector8)) &&
    2172       402697 :                    !pg_lfind8_le(31, (const uint8 *) p, sizeof(Vector8)))
    2173       402697 :                 p += sizeof(Vector8);
    2174              : 
    2175     10588239 :             for (; p < end; p++)
    2176              :             {
    2177     10561503 :                 if (*p == '\\' || *p == '"')
    2178              :                     break;
    2179      9221009 :                 else if ((unsigned char) *p <= 31)
    2180              :                 {
    2181              :                     /* Per RFC4627, these characters MUST be escaped. */
    2182              :                     /*
    2183              :                      * Since *p isn't printable, exclude it from the context
    2184              :                      * string
    2185              :                      */
    2186           58 :                     lex->token_terminator = p;
    2187           58 :                     return JSON_ESCAPING_REQUIRED;
    2188              :                 }
    2189              :             }
    2190              : 
    2191      1367230 :             if (lex->need_escapes)
    2192      1239680 :                 jsonapi_appendBinaryStringInfo(lex->strval, s, p - s);
    2193              : 
    2194              :             /*
    2195              :              * s will be incremented at the top of the loop, so set it to just
    2196              :              * behind our lookahead position
    2197              :              */
    2198      1367230 :             s = p - 1;
    2199              :         }
    2200              :     }
    2201              : 
    2202      1338996 :     if (hi_surrogate != -1)
    2203              :     {
    2204           48 :         lex->token_terminator = s + 1;
    2205           48 :         return JSON_UNICODE_LOW_SURROGATE;
    2206              :     }
    2207              : 
    2208              : #ifdef JSONAPI_USE_PQEXPBUFFER
    2209        34448 :     if (lex->need_escapes && PQExpBufferBroken(lex->strval))
    2210            0 :         return JSON_OUT_OF_MEMORY;
    2211              : #endif
    2212              : 
    2213              :     /* Hooray, we found the end of the string! */
    2214      1338948 :     lex->prev_token_terminator = lex->token_terminator;
    2215      1338948 :     lex->token_terminator = s + 1;
    2216      1338948 :     return JSON_SUCCESS;
    2217              : 
    2218              : #undef FAIL_OR_INCOMPLETE_AT_CHAR_START
    2219              : #undef FAIL_AT_CHAR_END
    2220              : }
    2221              : 
    2222              : /*
    2223              :  * The next token in the input stream is known to be a number; lex it.
    2224              :  *
    2225              :  * In JSON, a number consists of four parts:
    2226              :  *
    2227              :  * (1) An optional minus sign ('-').
    2228              :  *
    2229              :  * (2) Either a single '0', or a string of one or more digits that does not
    2230              :  *     begin with a '0'.
    2231              :  *
    2232              :  * (3) An optional decimal part, consisting of a period ('.') followed by
    2233              :  *     one or more digits.  (Note: While this part can be omitted
    2234              :  *     completely, it's not OK to have only the decimal point without
    2235              :  *     any digits afterwards.)
    2236              :  *
    2237              :  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
    2238              :  *     followed by '+' or '-', followed by one or more digits.  (Note:
    2239              :  *     As with the decimal part, if 'e' or 'E' is present, it must be
    2240              :  *     followed by at least one digit.)
    2241              :  *
    2242              :  * The 's' argument to this function points to the ostensible beginning
    2243              :  * of part 2 - i.e. the character after any optional minus sign, or the
    2244              :  * first character of the string if there is none.
    2245              :  *
    2246              :  * If num_err is not NULL, we return an error flag to *num_err rather than
    2247              :  * raising an error for a badly-formed number.  Also, if total_len is not NULL
    2248              :  * the distance from lex->input to the token end+1 is returned to *total_len.
    2249              :  */
    2250              : static inline JsonParseErrorType
    2251       229437 : json_lex_number(JsonLexContext *lex, const char *s,
    2252              :                 bool *num_err, size_t *total_len)
    2253              : {
    2254       229437 :     bool        error = false;
    2255       229437 :     int         len = s - lex->input;
    2256              : 
    2257              :     /* Part (1): leading sign indicator. */
    2258              :     /* Caller already did this for us; so do nothing. */
    2259              : 
    2260              :     /* Part (2): parse main digit string. */
    2261       229437 :     if (len < lex->input_length && *s == '0')
    2262              :     {
    2263        64494 :         s++;
    2264        64494 :         len++;
    2265              :     }
    2266       164943 :     else if (len < lex->input_length && *s >= '1' && *s <= '9')
    2267              :     {
    2268              :         do
    2269              :         {
    2270       559995 :             s++;
    2271       559995 :             len++;
    2272       559995 :         } while (len < lex->input_length && *s >= '0' && *s <= '9');
    2273              :     }
    2274              :     else
    2275            4 :         error = true;
    2276              : 
    2277              :     /* Part (3): parse optional decimal portion. */
    2278       229437 :     if (len < lex->input_length && *s == '.')
    2279              :     {
    2280        22828 :         s++;
    2281        22828 :         len++;
    2282        22828 :         if (len == lex->input_length || *s < '0' || *s > '9')
    2283            6 :             error = true;
    2284              :         else
    2285              :         {
    2286              :             do
    2287              :             {
    2288        54361 :                 s++;
    2289        54361 :                 len++;
    2290        54361 :             } while (len < lex->input_length && *s >= '0' && *s <= '9');
    2291              :         }
    2292              :     }
    2293              : 
    2294              :     /* Part (4): parse optional exponent. */
    2295       229437 :     if (len < lex->input_length && (*s == 'e' || *s == 'E'))
    2296              :     {
    2297           47 :         s++;
    2298           47 :         len++;
    2299           47 :         if (len < lex->input_length && (*s == '+' || *s == '-'))
    2300              :         {
    2301            5 :             s++;
    2302            5 :             len++;
    2303              :         }
    2304           47 :         if (len == lex->input_length || *s < '0' || *s > '9')
    2305            6 :             error = true;
    2306              :         else
    2307              :         {
    2308              :             do
    2309              :             {
    2310          142 :                 s++;
    2311          142 :                 len++;
    2312          142 :             } while (len < lex->input_length && *s >= '0' && *s <= '9');
    2313              :         }
    2314              :     }
    2315              : 
    2316              :     /*
    2317              :      * Check for trailing garbage.  As in json_lex(), any alphanumeric stuff
    2318              :      * here should be considered part of the token for error-reporting
    2319              :      * purposes.
    2320              :      */
    2321       229635 :     for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
    2322          198 :         error = true;
    2323              : 
    2324       229437 :     if (total_len != NULL)
    2325           23 :         *total_len = len;
    2326              : 
    2327       229437 :     if (lex->incremental && !lex->inc_state->is_last_chunk &&
    2328        64641 :         len >= lex->input_length)
    2329              :     {
    2330          389 :         jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token,
    2331          389 :                                        lex->token_start, s - lex->token_start);
    2332          389 :         if (num_err != NULL)
    2333            0 :             *num_err = error;
    2334              : 
    2335          389 :         return JSON_INCOMPLETE;
    2336              :     }
    2337       229048 :     else if (num_err != NULL)
    2338              :     {
    2339              :         /* let the caller handle any error */
    2340           23 :         *num_err = error;
    2341              :     }
    2342              :     else
    2343              :     {
    2344              :         /* return token endpoint */
    2345       229025 :         lex->prev_token_terminator = lex->token_terminator;
    2346       229025 :         lex->token_terminator = s;
    2347              :         /* handle error if any */
    2348       229025 :         if (error)
    2349           67 :             return JSON_INVALID_TOKEN;
    2350              :     }
    2351              : 
    2352       228981 :     return JSON_SUCCESS;
    2353              : }
    2354              : 
    2355              : /*
    2356              :  * Report a parse error.
    2357              :  *
    2358              :  * lex->token_start and lex->token_terminator must identify the current token.
    2359              :  */
    2360              : static JsonParseErrorType
    2361          602 : report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
    2362              : {
    2363              :     /* Handle case where the input ended prematurely. */
    2364          602 :     if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
    2365          133 :         return JSON_EXPECTED_MORE;
    2366              : 
    2367              :     /* Otherwise choose the error type based on the parsing context. */
    2368          469 :     switch (ctx)
    2369              :     {
    2370           24 :         case JSON_PARSE_END:
    2371           24 :             return JSON_EXPECTED_END;
    2372           87 :         case JSON_PARSE_VALUE:
    2373           87 :             return JSON_EXPECTED_JSON;
    2374           82 :         case JSON_PARSE_STRING:
    2375           82 :             return JSON_EXPECTED_STRING;
    2376           28 :         case JSON_PARSE_ARRAY_START:
    2377           28 :             return JSON_EXPECTED_ARRAY_FIRST;
    2378           36 :         case JSON_PARSE_ARRAY_NEXT:
    2379           36 :             return JSON_EXPECTED_ARRAY_NEXT;
    2380           62 :         case JSON_PARSE_OBJECT_START:
    2381           62 :             return JSON_EXPECTED_OBJECT_FIRST;
    2382           64 :         case JSON_PARSE_OBJECT_LABEL:
    2383           64 :             return JSON_EXPECTED_COLON;
    2384           86 :         case JSON_PARSE_OBJECT_NEXT:
    2385           86 :             return JSON_EXPECTED_OBJECT_NEXT;
    2386            0 :         case JSON_PARSE_OBJECT_COMMA:
    2387            0 :             return JSON_EXPECTED_STRING;
    2388              :     }
    2389              : 
    2390              :     /*
    2391              :      * We don't use a default: case, so that the compiler will warn about
    2392              :      * unhandled enum values.
    2393              :      */
    2394              :     Assert(false);
    2395            0 :     return JSON_SUCCESS;        /* silence stupider compilers */
    2396              : }
    2397              : 
    2398              : /*
    2399              :  * Construct an (already translated) detail message for a JSON error.
    2400              :  *
    2401              :  * The returned pointer should not be freed, the allocation is either static
    2402              :  * or owned by the JsonLexContext.
    2403              :  */
    2404              : char *
    2405         1182 : json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
    2406              : {
    2407         1182 :     if (error == JSON_OUT_OF_MEMORY || lex == &failed_oom)
    2408              :     {
    2409              :         /* Short circuit. Allocating anything for this case is unhelpful. */
    2410            0 :         return _("out of memory");
    2411              :     }
    2412              : 
    2413         1182 :     if (lex->errormsg)
    2414            0 :         jsonapi_resetStringInfo(lex->errormsg);
    2415              :     else
    2416         1182 :         lex->errormsg = jsonapi_makeStringInfo();
    2417              : 
    2418              :     /*
    2419              :      * A helper for error messages that should print the current token. The
    2420              :      * format must contain exactly one %.*s specifier.
    2421              :      */
    2422              : #define json_token_error(lex, format) \
    2423              :     jsonapi_appendStringInfo((lex)->errormsg, _(format), \
    2424              :                              (int) ((lex)->token_terminator - (lex)->token_start), \
    2425              :                              (lex)->token_start);
    2426              : 
    2427         1182 :     switch (error)
    2428              :     {
    2429            0 :         case JSON_INCOMPLETE:
    2430              :         case JSON_SUCCESS:
    2431              :             /* fall through to the error code after switch */
    2432            0 :             break;
    2433            0 :         case JSON_INVALID_LEXER_TYPE:
    2434            0 :             if (lex->incremental)
    2435            0 :                 return _("Recursive descent parser cannot use incremental lexer.");
    2436              :             else
    2437            0 :                 return _("Incremental parser requires incremental lexer.");
    2438          256 :         case JSON_NESTING_TOO_DEEP:
    2439          256 :             return (_("JSON nested too deep, maximum permitted depth is 6400."));
    2440           66 :         case JSON_ESCAPING_INVALID:
    2441           66 :             json_token_error(lex, "Escape sequence \"\\%.*s\" is invalid.");
    2442           66 :             break;
    2443           58 :         case JSON_ESCAPING_REQUIRED:
    2444           58 :             jsonapi_appendStringInfo(lex->errormsg,
    2445           58 :                                      _("Character with value 0x%02x must be escaped."),
    2446           58 :                                      (unsigned char) *(lex->token_terminator));
    2447           58 :             break;
    2448           24 :         case JSON_EXPECTED_END:
    2449           24 :             json_token_error(lex, "Expected end of input, but found \"%.*s\".");
    2450           24 :             break;
    2451           28 :         case JSON_EXPECTED_ARRAY_FIRST:
    2452           28 :             json_token_error(lex, "Expected array element or \"]\", but found \"%.*s\".");
    2453           28 :             break;
    2454           36 :         case JSON_EXPECTED_ARRAY_NEXT:
    2455           36 :             json_token_error(lex, "Expected \",\" or \"]\", but found \"%.*s\".");
    2456           36 :             break;
    2457           64 :         case JSON_EXPECTED_COLON:
    2458           64 :             json_token_error(lex, "Expected \":\", but found \"%.*s\".");
    2459           64 :             break;
    2460           60 :         case JSON_EXPECTED_JSON:
    2461           60 :             json_token_error(lex, "Expected JSON value, but found \"%.*s\".");
    2462           60 :             break;
    2463           95 :         case JSON_EXPECTED_MORE:
    2464           95 :             return _("The input string ended unexpectedly.");
    2465           62 :         case JSON_EXPECTED_OBJECT_FIRST:
    2466           62 :             json_token_error(lex, "Expected string or \"}\", but found \"%.*s\".");
    2467           62 :             break;
    2468           86 :         case JSON_EXPECTED_OBJECT_NEXT:
    2469           86 :             json_token_error(lex, "Expected \",\" or \"}\", but found \"%.*s\".");
    2470           86 :             break;
    2471           82 :         case JSON_EXPECTED_STRING:
    2472           82 :             json_token_error(lex, "Expected string, but found \"%.*s\".");
    2473           82 :             break;
    2474          210 :         case JSON_INVALID_TOKEN:
    2475          210 :             json_token_error(lex, "Token \"%.*s\" is invalid.");
    2476          210 :             break;
    2477            0 :         case JSON_OUT_OF_MEMORY:
    2478              :             /* should have been handled above; use the error path */
    2479            0 :             break;
    2480           12 :         case JSON_UNICODE_CODE_POINT_ZERO:
    2481           12 :             return _("\\u0000 cannot be converted to text.");
    2482           19 :         case JSON_UNICODE_ESCAPE_FORMAT:
    2483           19 :             return _("\"\\u\" must be followed by four hexadecimal digits.");
    2484            0 :         case JSON_UNICODE_HIGH_ESCAPE:
    2485              :             /* note: this case is only reachable in frontend not backend */
    2486            0 :             return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
    2487            0 :         case JSON_UNICODE_UNTRANSLATABLE:
    2488              : 
    2489              :             /*
    2490              :              * Note: this case is only reachable in backend and not frontend.
    2491              :              * #ifdef it away so the frontend doesn't try to link against
    2492              :              * backend functionality.
    2493              :              */
    2494              : #ifndef FRONTEND
    2495            0 :             return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."),
    2496              :                             GetDatabaseEncodingName());
    2497              : #else
    2498              :             Assert(false);
    2499            0 :             break;
    2500              : #endif
    2501            6 :         case JSON_UNICODE_HIGH_SURROGATE:
    2502            6 :             return _("Unicode high surrogate must not follow a high surrogate.");
    2503           18 :         case JSON_UNICODE_LOW_SURROGATE:
    2504           18 :             return _("Unicode low surrogate must follow a high surrogate.");
    2505            0 :         case JSON_SEM_ACTION_FAILED:
    2506              :             /* fall through to the error code after switch */
    2507            0 :             break;
    2508              :     }
    2509              : #undef json_token_error
    2510              : 
    2511              :     /* Note that lex->errormsg can be NULL in shlib code. */
    2512          776 :     if (lex->errormsg && lex->errormsg->len == 0)
    2513              :     {
    2514              :         /*
    2515              :          * We don't use a default: case, so that the compiler will warn about
    2516              :          * unhandled enum values.  But this needs to be here anyway to cover
    2517              :          * the possibility of an incorrect input.
    2518              :          */
    2519            0 :         jsonapi_appendStringInfo(lex->errormsg,
    2520              :                                  "unexpected json parse error type: %d",
    2521              :                                  (int) error);
    2522              :     }
    2523              : 
    2524              : #ifdef JSONAPI_USE_PQEXPBUFFER
    2525          316 :     if (PQExpBufferBroken(lex->errormsg))
    2526            0 :         return _("out of memory while constructing error description");
    2527              : #endif
    2528              : 
    2529          776 :     return lex->errormsg->data;
    2530              : }
        

Generated by: LCOV version 2.0-1