LCOV - code coverage report
Current view: top level - src/common - jsonapi.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 87.1 % 997 868
Test Date: 2026-05-01 11:16:27 Functions: 100.0 % 31 31
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * jsonapi.c
       4              :  *      JSON parser and lexer interfaces
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  * IDENTIFICATION
      10              :  *    src/common/jsonapi.c
      11              :  *
      12              :  *-------------------------------------------------------------------------
      13              :  */
      14              : #ifndef FRONTEND
      15              : #include "postgres.h"
      16              : #else
      17              : #include "postgres_fe.h"
      18              : #endif
      19              : 
      20              : #include "common/jsonapi.h"
      21              : #include "mb/pg_wchar.h"
      22              : #include "port/pg_lfind.h"
      23              : 
      24              : #ifdef JSONAPI_USE_PQEXPBUFFER
      25              : #include "pqexpbuffer.h"
      26              : #else
      27              : #include "lib/stringinfo.h"
      28              : #include "miscadmin.h"
      29              : #endif
      30              : 
      31              : /*
      32              :  * By default, we will use palloc/pfree along with StringInfo.  In libpq,
      33              :  * use malloc and PQExpBuffer, and return JSON_OUT_OF_MEMORY on out-of-memory.
      34              :  */
      35              : #ifdef JSONAPI_USE_PQEXPBUFFER
      36              : 
      37              : #define STRDUP(s) strdup(s)
      38              : #define ALLOC(size) malloc(size)
      39              : #define ALLOC0(size) calloc(1, size)
      40              : #define REALLOC realloc
      41              : #define FREE(s) free(s)
      42              : 
      43              : #define jsonapi_appendStringInfo            appendPQExpBuffer
      44              : #define jsonapi_appendBinaryStringInfo      appendBinaryPQExpBuffer
      45              : #define jsonapi_appendStringInfoChar        appendPQExpBufferChar
      46              : /* XXX should we add a macro version to PQExpBuffer? */
      47              : #define jsonapi_appendStringInfoCharMacro   appendPQExpBufferChar
      48              : #define jsonapi_makeStringInfo              createPQExpBuffer
      49              : #define jsonapi_initStringInfo              initPQExpBuffer
      50              : #define jsonapi_resetStringInfo             resetPQExpBuffer
      51              : #define jsonapi_termStringInfo              termPQExpBuffer
      52              : #define jsonapi_destroyStringInfo           destroyPQExpBuffer
      53              : 
      54              : #else                           /* !JSONAPI_USE_PQEXPBUFFER */
      55              : 
      56              : #define STRDUP(s) pstrdup(s)
      57              : #define ALLOC(size) palloc(size)
      58              : #define ALLOC0(size) palloc0(size)
      59              : #define REALLOC repalloc
      60              : 
      61              : #ifdef FRONTEND
      62              : #define FREE pfree
      63              : #else
      64              : /*
      65              :  * Backend pfree() doesn't handle NULL pointers like the frontend's does; smooth
      66              :  * that over to reduce mental gymnastics. Avoid multiple evaluation of the macro
      67              :  * argument to avoid future hair-pulling.
      68              :  */
      69              : #define FREE(s) do {    \
      70              :     void *__v = (s);    \
      71              :     if (__v)            \
      72              :         pfree(__v);     \
      73              : } while (0)
      74              : #endif
      75              : 
      76              : #define jsonapi_appendStringInfo            appendStringInfo
      77              : #define jsonapi_appendBinaryStringInfo      appendBinaryStringInfo
      78              : #define jsonapi_appendStringInfoChar        appendStringInfoChar
      79              : #define jsonapi_appendStringInfoCharMacro   appendStringInfoCharMacro
      80              : #define jsonapi_makeStringInfo              makeStringInfo
      81              : #define jsonapi_initStringInfo              initStringInfo
      82              : #define jsonapi_resetStringInfo             resetStringInfo
      83              : #define jsonapi_termStringInfo(s)           pfree((s)->data)
      84              : #define jsonapi_destroyStringInfo           destroyStringInfo
      85              : 
      86              : #endif                          /* JSONAPI_USE_PQEXPBUFFER */
      87              : 
      88              : /*
      89              :  * The context of the parser is maintained by the recursive descent
      90              :  * mechanism, but is passed explicitly to the error reporting routine
      91              :  * for better diagnostics.
      92              :  */
      93              : typedef enum                    /* contexts of JSON parser */
      94              : {
      95              :     JSON_PARSE_VALUE,           /* expecting a value */
      96              :     JSON_PARSE_STRING,          /* expecting a string (for a field name) */
      97              :     JSON_PARSE_ARRAY_START,     /* saw '[', expecting value or ']' */
      98              :     JSON_PARSE_ARRAY_NEXT,      /* saw array element, expecting ',' or ']' */
      99              :     JSON_PARSE_OBJECT_START,    /* saw '{', expecting label or '}' */
     100              :     JSON_PARSE_OBJECT_LABEL,    /* saw object label, expecting ':' */
     101              :     JSON_PARSE_OBJECT_NEXT,     /* saw object value, expecting ',' or '}' */
     102              :     JSON_PARSE_OBJECT_COMMA,    /* saw object ',', expecting next label */
     103              :     JSON_PARSE_END,             /* saw the end of a document, expect nothing */
     104              : } JsonParseContext;
     105              : 
     106              : /*
     107              :  * Setup for table-driven parser.
     108              :  * These enums need to be separate from the JsonTokenType and from each other
     109              :  * so we can have all of them on the prediction stack, which consists of
     110              :  * tokens, non-terminals, and semantic action markers.
     111              :  */
     112              : 
     113              : enum JsonNonTerminal
     114              : {
     115              :     JSON_NT_JSON = 32,
     116              :     JSON_NT_ARRAY_ELEMENTS,
     117              :     JSON_NT_MORE_ARRAY_ELEMENTS,
     118              :     JSON_NT_KEY_PAIRS,
     119              :     JSON_NT_MORE_KEY_PAIRS,
     120              : };
     121              : 
     122              : enum JsonParserSem
     123              : {
     124              :     JSON_SEM_OSTART = 64,
     125              :     JSON_SEM_OEND,
     126              :     JSON_SEM_ASTART,
     127              :     JSON_SEM_AEND,
     128              :     JSON_SEM_OFIELD_INIT,
     129              :     JSON_SEM_OFIELD_START,
     130              :     JSON_SEM_OFIELD_END,
     131              :     JSON_SEM_AELEM_START,
     132              :     JSON_SEM_AELEM_END,
     133              :     JSON_SEM_SCALAR_INIT,
     134              :     JSON_SEM_SCALAR_CALL,
     135              : };
     136              : 
     137              : /*
     138              :  * struct containing the 3 stacks used in non-recursive parsing,
     139              :  * and the token and value for scalars that need to be preserved
     140              :  * across calls.
     141              :  *
     142              :  * typedef appears in jsonapi.h
     143              :  */
     144              : struct JsonParserStack
     145              : {
     146              :     int         stack_size;
     147              :     char       *prediction;
     148              :     size_t      pred_index;
     149              :     /* these two are indexed by lex_level */
     150              :     char      **fnames;
     151              :     bool       *fnull;
     152              :     JsonTokenType scalar_tok;
     153              :     char       *scalar_val;
     154              : };
     155              : 
     156              : /*
     157              :  * struct containing state used when there is a possible partial token at the
     158              :  * end of a json chunk when we are doing incremental parsing.
     159              :  *
     160              :  * typedef appears in jsonapi.h
     161              :  */
     162              : struct JsonIncrementalState
     163              : {
     164              :     bool        started;
     165              :     bool        is_last_chunk;
     166              :     bool        partial_completed;
     167              :     jsonapi_StrValType partial_token;
     168              : };
     169              : 
     170              : /*
     171              :  * constants and macros used in the nonrecursive parser
     172              :  */
     173              : #define JSON_NUM_TERMINALS 13
     174              : #define JSON_NUM_NONTERMINALS 5
     175              : #define JSON_NT_OFFSET JSON_NT_JSON
     176              : /* for indexing the table */
     177              : #define OFS(NT) (NT) - JSON_NT_OFFSET
     178              : /* classify items we get off the stack */
     179              : #define IS_SEM(x) ((x) & 0x40)
     180              : #define IS_NT(x)  ((x) & 0x20)
     181              : 
     182              : /*
     183              :  * These productions are stored in reverse order right to left so that when
     184              :  * they are pushed on the stack what we expect next is at the top of the stack.
     185              :  */
     186              : static char JSON_PROD_EPSILON[] = {0};  /* epsilon - an empty production */
     187              : 
     188              : /* JSON -> string */
     189              : static char JSON_PROD_SCALAR_STRING[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_STRING, JSON_SEM_SCALAR_INIT, 0};
     190              : 
     191              : /* JSON -> number */
     192              : static char JSON_PROD_SCALAR_NUMBER[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NUMBER, JSON_SEM_SCALAR_INIT, 0};
     193              : 
     194              : /* JSON -> 'true' */
     195              : static char JSON_PROD_SCALAR_TRUE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_TRUE, JSON_SEM_SCALAR_INIT, 0};
     196              : 
     197              : /* JSON -> 'false' */
     198              : static char JSON_PROD_SCALAR_FALSE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_FALSE, JSON_SEM_SCALAR_INIT, 0};
     199              : 
     200              : /* JSON -> 'null' */
     201              : static char JSON_PROD_SCALAR_NULL[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NULL, JSON_SEM_SCALAR_INIT, 0};
     202              : 
     203              : /* JSON -> '{' KEY_PAIRS '}' */
     204              : static char JSON_PROD_OBJECT[] = {JSON_SEM_OEND, JSON_TOKEN_OBJECT_END, JSON_NT_KEY_PAIRS, JSON_TOKEN_OBJECT_START, JSON_SEM_OSTART, 0};
     205              : 
     206              : /* JSON -> '[' ARRAY_ELEMENTS ']' */
     207              : static char JSON_PROD_ARRAY[] = {JSON_SEM_AEND, JSON_TOKEN_ARRAY_END, JSON_NT_ARRAY_ELEMENTS, JSON_TOKEN_ARRAY_START, JSON_SEM_ASTART, 0};
     208              : 
     209              : /* ARRAY_ELEMENTS -> JSON MORE_ARRAY_ELEMENTS */
     210              : static char JSON_PROD_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, 0};
     211              : 
     212              : /* MORE_ARRAY_ELEMENTS -> ',' JSON MORE_ARRAY_ELEMENTS */
     213              : static char JSON_PROD_MORE_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, JSON_TOKEN_COMMA, 0};
     214              : 
     215              : /* KEY_PAIRS -> string ':' JSON MORE_KEY_PAIRS */
     216              : static char JSON_PROD_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, 0};
     217              : 
     218              : /* MORE_KEY_PAIRS -> ',' string ':'  JSON MORE_KEY_PAIRS */
     219              : static char JSON_PROD_MORE_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, JSON_TOKEN_COMMA, 0};
     220              : 
     221              : /*
     222              :  * Note: there are also epsilon productions for ARRAY_ELEMENTS,
     223              :  * MORE_ARRAY_ELEMENTS, KEY_PAIRS and MORE_KEY_PAIRS
     224              :  * They are all the same as none require any semantic actions.
     225              :  */
     226              : 
     227              : /*
     228              :  * Table connecting the productions with their director sets of
     229              :  * terminal symbols.
     230              :  * Any combination not specified here represents an error.
     231              :  */
     232              : 
     233              : typedef struct
     234              : {
     235              :     size_t      len;
     236              :     char       *prod;
     237              : } td_entry;
     238              : 
     239              : #define TD_ENTRY(PROD) { sizeof(PROD) - 1, (PROD) }
     240              : 
     241              : static td_entry td_parser_table[JSON_NUM_NONTERMINALS][JSON_NUM_TERMINALS] =
     242              : {
     243              :     /* JSON */
     244              :     [OFS(JSON_NT_JSON)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_SCALAR_STRING),
     245              :     [OFS(JSON_NT_JSON)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_SCALAR_NUMBER),
     246              :     [OFS(JSON_NT_JSON)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_SCALAR_TRUE),
     247              :     [OFS(JSON_NT_JSON)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_SCALAR_FALSE),
     248              :     [OFS(JSON_NT_JSON)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_SCALAR_NULL),
     249              :     [OFS(JSON_NT_JSON)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY),
     250              :     [OFS(JSON_NT_JSON)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_OBJECT),
     251              :     /* ARRAY_ELEMENTS */
     252              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     253              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     254              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     255              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     256              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     257              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     258              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
     259              :     [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
     260              :     /* MORE_ARRAY_ELEMENTS */
     261              :     [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_ARRAY_ELEMENTS),
     262              :     [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
     263              :     /* KEY_PAIRS */
     264              :     [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_KEY_PAIRS),
     265              :     [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
     266              :     /* MORE_KEY_PAIRS */
     267              :     [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_KEY_PAIRS),
     268              :     [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
     269              : };
     270              : 
     271              : /* the GOAL production. Not stored in the table, but will be the initial contents of the prediction stack */
     272              : static char JSON_PROD_GOAL[] = {JSON_TOKEN_END, JSON_NT_JSON, 0};
     273              : 
     274              : static inline JsonParseErrorType json_lex_string(JsonLexContext *lex);
     275              : static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, const char *s,
     276              :                                                  bool *num_err, size_t *total_len);
     277              : static inline JsonParseErrorType parse_scalar(JsonLexContext *lex, const JsonSemAction *sem);
     278              : static JsonParseErrorType parse_object_field(JsonLexContext *lex, const JsonSemAction *sem);
     279              : static JsonParseErrorType parse_object(JsonLexContext *lex, const JsonSemAction *sem);
     280              : static JsonParseErrorType parse_array_element(JsonLexContext *lex, const JsonSemAction *sem);
     281              : static JsonParseErrorType parse_array(JsonLexContext *lex, const JsonSemAction *sem);
     282              : static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
     283              : static bool allocate_incremental_state(JsonLexContext *lex);
     284              : static inline void set_fname(JsonLexContext *lex, char *fname);
     285              : 
     286              : /* the null action object used for pure validation */
     287              : const JsonSemAction nullSemAction =
     288              : {
     289              :     NULL, NULL, NULL, NULL, NULL,
     290              :     NULL, NULL, NULL, NULL, NULL
     291              : };
     292              : 
     293              : /* sentinels used for out-of-memory conditions */
     294              : static JsonLexContext failed_oom;
     295              : static JsonIncrementalState failed_inc_oom;
     296              : 
     297              : /* Parser support routines */
     298              : 
     299              : /*
     300              :  * lex_peek
     301              :  *
     302              :  * what is the current look_ahead token?
     303              : */
     304              : static inline JsonTokenType
     305      7811924 : lex_peek(JsonLexContext *lex)
     306              : {
     307      7811924 :     return lex->token_type;
     308              : }
     309              : 
     310              : /*
     311              :  * lex_expect
     312              :  *
     313              :  * move the lexer to the next token if the current look_ahead token matches
     314              :  * the parameter token. Otherwise, report an error.
     315              :  */
     316              : static inline JsonParseErrorType
     317       315042 : lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
     318              : {
     319       315042 :     if (lex_peek(lex) == token)
     320       314970 :         return json_lex(lex);
     321              :     else
     322           72 :         return report_parse_error(ctx, lex);
     323              : }
     324              : 
     325              : /* chars to consider as part of an alphanumeric token */
     326              : #define JSON_ALPHANUMERIC_CHAR(c)  \
     327              :     (((c) >= 'a' && (c) <= 'z') || \
     328              :      ((c) >= 'A' && (c) <= 'Z') || \
     329              :      ((c) >= '0' && (c) <= '9') || \
     330              :      (c) == '_' || \
     331              :      IS_HIGHBIT_SET(c))
     332              : 
     333              : /*
     334              :  * Utility function to check if a string is a valid JSON number.
     335              :  *
     336              :  * str is of length len, and need not be null-terminated.
     337              :  */
     338              : bool
     339           23 : IsValidJsonNumber(const char *str, size_t len)
     340              : {
     341              :     bool        numeric_error;
     342              :     size_t      total_len;
     343           23 :     JsonLexContext dummy_lex = {0};
     344              : 
     345           23 :     if (len <= 0)
     346            0 :         return false;
     347              : 
     348              :     /*
     349              :      * json_lex_number expects a leading  '-' to have been eaten already.
     350              :      *
     351              :      * having to cast away the constness of str is ugly, but there's not much
     352              :      * easy alternative.
     353              :      */
     354           23 :     if (*str == '-')
     355              :     {
     356            2 :         dummy_lex.input = str + 1;
     357            2 :         dummy_lex.input_length = len - 1;
     358              :     }
     359              :     else
     360              :     {
     361           21 :         dummy_lex.input = str;
     362           21 :         dummy_lex.input_length = len;
     363              :     }
     364              : 
     365           23 :     dummy_lex.token_start = dummy_lex.input;
     366              : 
     367           23 :     json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
     368              : 
     369           23 :     return (!numeric_error) && (total_len == dummy_lex.input_length);
     370              : }
     371              : 
     372              : /*
     373              :  * makeJsonLexContextCstringLen
     374              :  *      Initialize the given JsonLexContext object, or create one
     375              :  *
     376              :  * If a valid 'lex' pointer is given, it is initialized.  This can
     377              :  * be used for stack-allocated structs, saving overhead.  If NULL is
     378              :  * given, a new struct is allocated.
     379              :  *
     380              :  * If need_escapes is true, ->strval stores the unescaped lexemes.
     381              :  * Unescaping is expensive, so only request it when necessary.
     382              :  *
     383              :  * If need_escapes is true or lex was given as NULL, then caller is
     384              :  * responsible for freeing the returned struct, either by calling
     385              :  * freeJsonLexContext() or (in backend environment) via memory context
     386              :  * cleanup.
     387              :  *
     388              :  * In shlib code, any out-of-memory failures will be deferred to time
     389              :  * of use; this function is guaranteed to return a valid JsonLexContext.
     390              :  */
     391              : JsonLexContext *
     392        27480 : makeJsonLexContextCstringLen(JsonLexContext *lex, const char *json,
     393              :                              size_t len, int encoding, bool need_escapes)
     394              : {
     395        27480 :     if (lex == NULL)
     396              :     {
     397         4475 :         lex = ALLOC0(sizeof(JsonLexContext));
     398         4475 :         if (!lex)
     399            0 :             return &failed_oom;
     400         4475 :         lex->flags |= JSONLEX_FREE_STRUCT;
     401              :     }
     402              :     else
     403        23005 :         memset(lex, 0, sizeof(JsonLexContext));
     404              : 
     405        27480 :     lex->errormsg = NULL;
     406        27480 :     lex->input = lex->token_terminator = lex->line_start = json;
     407        27480 :     lex->line_number = 1;
     408        27480 :     lex->input_length = len;
     409        27480 :     lex->input_encoding = encoding;
     410        27480 :     lex->need_escapes = need_escapes;
     411        27480 :     if (need_escapes)
     412              :     {
     413              :         /*
     414              :          * This call can fail in shlib code. We defer error handling to time
     415              :          * of use (json_lex_string()) since we might not need to parse any
     416              :          * strings anyway.
     417              :          */
     418        21694 :         lex->strval = jsonapi_makeStringInfo();
     419        21694 :         lex->flags |= JSONLEX_FREE_STRVAL;
     420              :     }
     421              : 
     422        27480 :     return lex;
     423              : }
     424              : 
     425              : /*
     426              :  * Allocates the internal bookkeeping structures for incremental parsing. This
     427              :  * can only fail in-band with shlib code.
     428              :  */
     429              : #define JS_STACK_CHUNK_SIZE 64
     430              : #define JS_MAX_PROD_LEN 10      /* more than we need */
     431              : #define JSON_TD_MAX_STACK 6400  /* hard coded for now - this is a REALLY high
     432              :                                  * number */
     433              : static bool
     434         2083 : allocate_incremental_state(JsonLexContext *lex)
     435              : {
     436              :     void       *pstack,
     437              :                *prediction,
     438              :                *fnames,
     439              :                *fnull;
     440              : 
     441         2083 :     lex->inc_state = ALLOC0(sizeof(JsonIncrementalState));
     442         2083 :     pstack = ALLOC0(sizeof(JsonParserStack));
     443         2083 :     prediction = ALLOC(JS_STACK_CHUNK_SIZE * JS_MAX_PROD_LEN);
     444         2083 :     fnames = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(char *));
     445         2083 :     fnull = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(bool));
     446              : 
     447              : #ifdef JSONAPI_USE_PQEXPBUFFER
     448          980 :     if (!lex->inc_state
     449          980 :         || !pstack
     450          980 :         || !prediction
     451          980 :         || !fnames
     452          980 :         || !fnull)
     453              :     {
     454            0 :         FREE(lex->inc_state);
     455            0 :         FREE(pstack);
     456            0 :         FREE(prediction);
     457            0 :         FREE(fnames);
     458            0 :         FREE(fnull);
     459              : 
     460            0 :         lex->inc_state = &failed_inc_oom;
     461            0 :         return false;
     462              :     }
     463              : #endif
     464              : 
     465         2083 :     jsonapi_initStringInfo(&(lex->inc_state->partial_token));
     466         2083 :     lex->pstack = pstack;
     467         2083 :     lex->pstack->stack_size = JS_STACK_CHUNK_SIZE;
     468         2083 :     lex->pstack->prediction = prediction;
     469         2083 :     lex->pstack->fnames = fnames;
     470         2083 :     lex->pstack->fnull = fnull;
     471              : 
     472              :     /*
     473              :      * fnames between 0 and lex_level must always be defined so that
     474              :      * freeJsonLexContext() can handle them safely. inc/dec_lex_level() handle
     475              :      * the rest.
     476              :      */
     477              :     Assert(lex->lex_level == 0);
     478         2083 :     lex->pstack->fnames[0] = NULL;
     479              : 
     480         2083 :     lex->incremental = true;
     481         2083 :     return true;
     482              : }
     483              : 
     484              : 
     485              : /*
     486              :  * makeJsonLexContextIncremental
     487              :  *
     488              :  * Similar to above but set up for use in incremental parsing. That means we
     489              :  * need explicit stacks for predictions, field names and null indicators, but
     490              :  * we don't need the input, that will be handed in bit by bit to the
     491              :  * parse routine. We also need an accumulator for partial tokens in case
     492              :  * the boundary between chunks happens to fall in the middle of a token.
     493              :  *
     494              :  * In shlib code, any out-of-memory failures will be deferred to time of use;
     495              :  * this function is guaranteed to return a valid JsonLexContext.
     496              :  */
     497              : JsonLexContext *
     498         2083 : makeJsonLexContextIncremental(JsonLexContext *lex, int encoding,
     499              :                               bool need_escapes)
     500              : {
     501         2083 :     if (lex == NULL)
     502              :     {
     503            1 :         lex = ALLOC0(sizeof(JsonLexContext));
     504            1 :         if (!lex)
     505            0 :             return &failed_oom;
     506              : 
     507            1 :         lex->flags |= JSONLEX_FREE_STRUCT;
     508              :     }
     509              :     else
     510         2082 :         memset(lex, 0, sizeof(JsonLexContext));
     511              : 
     512         2083 :     lex->line_number = 1;
     513         2083 :     lex->input_encoding = encoding;
     514              : 
     515         2083 :     if (!allocate_incremental_state(lex))
     516              :     {
     517            0 :         if (lex->flags & JSONLEX_FREE_STRUCT)
     518              :         {
     519            0 :             FREE(lex);
     520            0 :             return &failed_oom;
     521              :         }
     522              : 
     523              :         /* lex->inc_state tracks the OOM failure; we can return here. */
     524            0 :         return lex;
     525              :     }
     526              : 
     527         2083 :     lex->need_escapes = need_escapes;
     528         2083 :     if (need_escapes)
     529              :     {
     530              :         /*
     531              :          * This call can fail in shlib code. We defer error handling to time
     532              :          * of use (json_lex_string()) since we might not need to parse any
     533              :          * strings anyway.
     534              :          */
     535          126 :         lex->strval = jsonapi_makeStringInfo();
     536          126 :         lex->flags |= JSONLEX_FREE_STRVAL;
     537              :     }
     538              : 
     539         2083 :     return lex;
     540              : }
     541              : 
     542              : void
     543         1960 : setJsonLexContextOwnsTokens(JsonLexContext *lex, bool owned_by_context)
     544              : {
     545         1960 :     if (lex->incremental && lex->inc_state->started)
     546              :     {
     547              :         /*
     548              :          * Switching this flag after parsing has already started is a
     549              :          * programming error.
     550              :          */
     551              :         Assert(false);
     552            0 :         return;
     553              :     }
     554              : 
     555         1960 :     if (owned_by_context)
     556          980 :         lex->flags |= JSONLEX_CTX_OWNS_TOKENS;
     557              :     else
     558          980 :         lex->flags &= ~JSONLEX_CTX_OWNS_TOKENS;
     559              : }
     560              : 
     561              : static inline bool
     562      2598809 : inc_lex_level(JsonLexContext *lex)
     563              : {
     564      2598809 :     if (lex->incremental && (lex->lex_level + 1) >= lex->pstack->stack_size)
     565              :     {
     566              :         size_t      new_stack_size;
     567              :         char       *new_prediction;
     568              :         char      **new_fnames;
     569              :         bool       *new_fnull;
     570              : 
     571        38400 :         new_stack_size = lex->pstack->stack_size + JS_STACK_CHUNK_SIZE;
     572              : 
     573        38400 :         new_prediction = REALLOC(lex->pstack->prediction,
     574              :                                  new_stack_size * JS_MAX_PROD_LEN);
     575              : #ifdef JSONAPI_USE_PQEXPBUFFER
     576        19200 :         if (!new_prediction)
     577            0 :             return false;
     578              : #endif
     579        38400 :         lex->pstack->prediction = new_prediction;
     580              : 
     581        38400 :         new_fnames = REALLOC(lex->pstack->fnames,
     582              :                              new_stack_size * sizeof(char *));
     583              : #ifdef JSONAPI_USE_PQEXPBUFFER
     584        19200 :         if (!new_fnames)
     585            0 :             return false;
     586              : #endif
     587        38400 :         lex->pstack->fnames = new_fnames;
     588              : 
     589        38400 :         new_fnull = REALLOC(lex->pstack->fnull, new_stack_size * sizeof(bool));
     590              : #ifdef JSONAPI_USE_PQEXPBUFFER
     591        19200 :         if (!new_fnull)
     592            0 :             return false;
     593              : #endif
     594        38400 :         lex->pstack->fnull = new_fnull;
     595              : 
     596        38400 :         lex->pstack->stack_size = new_stack_size;
     597              :     }
     598              : 
     599      2598809 :     lex->lex_level += 1;
     600              : 
     601      2598809 :     if (lex->incremental)
     602              :     {
     603              :         /*
     604              :          * Ensure freeJsonLexContext() remains safe even if no fname is
     605              :          * assigned at this level.
     606              :          */
     607      2598809 :         lex->pstack->fnames[lex->lex_level] = NULL;
     608              :     }
     609              : 
     610      2598809 :     return true;
     611              : }
     612              : 
     613              : static inline void
     614       959908 : dec_lex_level(JsonLexContext *lex)
     615              : {
     616       959908 :     set_fname(lex, NULL);       /* free the current level's fname, if needed */
     617       959908 :     lex->lex_level -= 1;
     618       959908 : }
     619              : 
     620              : static inline void
     621      7498971 : push_prediction(JsonParserStack *pstack, td_entry entry)
     622              : {
     623      7498971 :     memcpy(pstack->prediction + pstack->pred_index, entry.prod, entry.len);
     624      7498971 :     pstack->pred_index += entry.len;
     625      7498971 : }
     626              : 
     627              : static inline char
     628     24221137 : pop_prediction(JsonParserStack *pstack)
     629              : {
     630              :     Assert(pstack->pred_index > 0);
     631     24221137 :     return pstack->prediction[--pstack->pred_index];
     632              : }
     633              : 
     634              : static inline char
     635           76 : next_prediction(JsonParserStack *pstack)
     636              : {
     637              :     Assert(pstack->pred_index > 0);
     638           76 :     return pstack->prediction[pstack->pred_index - 1];
     639              : }
     640              : 
     641              : static inline bool
     642     24523440 : have_prediction(JsonParserStack *pstack)
     643              : {
     644     24523440 :     return pstack->pred_index > 0;
     645              : }
     646              : 
     647              : static inline void
     648      1635398 : set_fname(JsonLexContext *lex, char *fname)
     649              : {
     650      1635398 :     if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
     651              :     {
     652              :         /*
     653              :          * Don't leak prior fnames. If one hasn't been assigned yet,
     654              :          * inc_lex_level ensured that it's NULL (and therefore safe to free).
     655              :          */
     656       436862 :         FREE(lex->pstack->fnames[lex->lex_level]);
     657              :     }
     658              : 
     659      1635398 :     lex->pstack->fnames[lex->lex_level] = fname;
     660      1635398 : }
     661              : 
     662              : static inline char *
     663       635734 : get_fname(JsonLexContext *lex)
     664              : {
     665       635734 :     return lex->pstack->fnames[lex->lex_level];
     666              : }
     667              : 
     668              : static inline void
     669      3267587 : set_fnull(JsonLexContext *lex, bool fnull)
     670              : {
     671      3267587 :     lex->pstack->fnull[lex->lex_level] = fnull;
     672      3267587 : }
     673              : 
     674              : static inline bool
     675          744 : get_fnull(JsonLexContext *lex)
     676              : {
     677          744 :     return lex->pstack->fnull[lex->lex_level];
     678              : }
     679              : 
     680              : /*
     681              :  * Free memory in a JsonLexContext.
     682              :  *
     683              :  * There's no need for this if a *lex pointer was given when the object was
     684              :  * made, need_escapes was false, and json_errdetail() was not called; or if (in
     685              :  * backend environment) a memory context delete/reset is imminent.
     686              :  */
     687              : void
     688         6815 : freeJsonLexContext(JsonLexContext *lex)
     689              : {
     690              :     static const JsonLexContext empty = {0};
     691              : 
     692         6815 :     if (!lex || lex == &failed_oom)
     693            0 :         return;
     694              : 
     695         6815 :     if (lex->flags & JSONLEX_FREE_STRVAL)
     696         4600 :         jsonapi_destroyStringInfo(lex->strval);
     697              : 
     698         6815 :     if (lex->errormsg)
     699          953 :         jsonapi_destroyStringInfo(lex->errormsg);
     700              : 
     701         6815 :     if (lex->incremental)
     702              :     {
     703         2080 :         jsonapi_termStringInfo(&lex->inc_state->partial_token);
     704         2080 :         FREE(lex->inc_state);
     705         2080 :         FREE(lex->pstack->prediction);
     706              : 
     707         2080 :         if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
     708              :         {
     709              :             int         i;
     710              : 
     711              :             /* Clean up any tokens that were left behind. */
     712       821410 :             for (i = 0; i <= lex->lex_level; i++)
     713       820430 :                 FREE(lex->pstack->fnames[i]);
     714              :         }
     715              : 
     716         2080 :         FREE(lex->pstack->fnames);
     717         2080 :         FREE(lex->pstack->fnull);
     718         2080 :         FREE(lex->pstack->scalar_val);
     719         2080 :         FREE(lex->pstack);
     720              :     }
     721              : 
     722         6815 :     if (lex->flags & JSONLEX_FREE_STRUCT)
     723         4070 :         FREE(lex);
     724              :     else
     725         2745 :         *lex = empty;
     726              : }
     727              : 
     728              : /*
     729              :  * pg_parse_json
     730              :  *
     731              :  * Publicly visible entry point for the JSON parser.
     732              :  *
     733              :  * lex is a lexing context, set up for the json to be processed by calling
     734              :  * makeJsonLexContext(). sem is a structure of function pointers to semantic
     735              :  * action routines to be called at appropriate spots during parsing, and a
     736              :  * pointer to a state object to be passed to those routines.
     737              :  *
     738              :  * If FORCE_JSON_PSTACK is defined then the routine will call the non-recursive
     739              :  * JSON parser. This is a useful way to validate that it's doing the right
     740              :  * thing at least for non-incremental cases. If this is on we expect to see
     741              :  * regression diffs relating to error messages about stack depth, but no
     742              :  * other differences.
     743              :  */
     744              : JsonParseErrorType
     745        26990 : pg_parse_json(JsonLexContext *lex, const JsonSemAction *sem)
     746              : {
     747              : #ifdef FORCE_JSON_PSTACK
     748              :     /*
     749              :      * We don't need partial token processing, there is only one chunk. But we
     750              :      * still need to init the partial token string so that freeJsonLexContext
     751              :      * works, so perform the full incremental initialization.
     752              :      */
     753              :     if (!allocate_incremental_state(lex))
     754              :         return JSON_OUT_OF_MEMORY;
     755              : 
     756              :     return pg_parse_json_incremental(lex, sem, lex->input, lex->input_length, true);
     757              : 
     758              : #else
     759              : 
     760              :     JsonTokenType tok;
     761              :     JsonParseErrorType result;
     762              : 
     763        26990 :     if (lex == &failed_oom)
     764            0 :         return JSON_OUT_OF_MEMORY;
     765        26990 :     if (lex->incremental)
     766            0 :         return JSON_INVALID_LEXER_TYPE;
     767              : 
     768              :     /* get the initial token */
     769        26990 :     result = json_lex(lex);
     770        26990 :     if (result != JSON_SUCCESS)
     771          168 :         return result;
     772              : 
     773        26822 :     tok = lex_peek(lex);
     774              : 
     775              :     /* parse by recursive descent */
     776        26822 :     switch (tok)
     777              :     {
     778        13859 :         case JSON_TOKEN_OBJECT_START:
     779        13859 :             result = parse_object(lex, sem);
     780        13806 :             break;
     781         5875 :         case JSON_TOKEN_ARRAY_START:
     782         5875 :             result = parse_array(lex, sem);
     783         5562 :             break;
     784         7088 :         default:
     785         7088 :             result = parse_scalar(lex, sem);    /* json can be a bare scalar */
     786              :     }
     787              : 
     788        26400 :     if (result == JSON_SUCCESS)
     789        25762 :         result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
     790              : 
     791        26400 :     return result;
     792              : #endif
     793              : }
     794              : 
     795              : /*
     796              :  * json_count_array_elements
     797              :  *
     798              :  * Returns number of array elements in lex context at start of array token
     799              :  * until end of array token at same nesting level.
     800              :  *
     801              :  * Designed to be called from array_start routines.
     802              :  */
     803              : JsonParseErrorType
     804            4 : json_count_array_elements(JsonLexContext *lex, int *elements)
     805              : {
     806              :     JsonLexContext copylex;
     807              :     int         count;
     808              :     JsonParseErrorType result;
     809              : 
     810            4 :     if (lex == &failed_oom)
     811            0 :         return JSON_OUT_OF_MEMORY;
     812              : 
     813              :     /*
     814              :      * It's safe to do this with a shallow copy because the lexical routines
     815              :      * don't scribble on the input. They do scribble on the other pointers
     816              :      * etc, so doing this with a copy makes that safe.
     817              :      */
     818            4 :     memcpy(&copylex, lex, sizeof(JsonLexContext));
     819            4 :     copylex.need_escapes = false;   /* not interested in values here */
     820            4 :     copylex.lex_level++;
     821              : 
     822            4 :     count = 0;
     823            4 :     result = lex_expect(JSON_PARSE_ARRAY_START, &copylex,
     824              :                         JSON_TOKEN_ARRAY_START);
     825            4 :     if (result != JSON_SUCCESS)
     826            0 :         return result;
     827            4 :     if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
     828              :     {
     829              :         while (1)
     830              :         {
     831           32 :             count++;
     832           32 :             result = parse_array_element(&copylex, &nullSemAction);
     833           32 :             if (result != JSON_SUCCESS)
     834            0 :                 return result;
     835           32 :             if (copylex.token_type != JSON_TOKEN_COMMA)
     836            4 :                 break;
     837           28 :             result = json_lex(&copylex);
     838           28 :             if (result != JSON_SUCCESS)
     839            0 :                 return result;
     840              :         }
     841              :     }
     842            4 :     result = lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex,
     843              :                         JSON_TOKEN_ARRAY_END);
     844            4 :     if (result != JSON_SUCCESS)
     845            0 :         return result;
     846              : 
     847            4 :     *elements = count;
     848            4 :     return JSON_SUCCESS;
     849              : }
     850              : 
     851              : /*
     852              :  * pg_parse_json_incremental
     853              :  *
     854              :  * Routine for incremental parsing of json. This uses the non-recursive top
     855              :  * down method of the Dragon Book Algorithm 4.3. It's somewhat slower than
     856              :  * the Recursive Descent pattern used above, so we only use it for incremental
     857              :  * parsing of JSON.
     858              :  *
     859              :  * The lexing context needs to be set up by a call to
     860              :  * makeJsonLexContextIncremental(). sem is a structure of function pointers
     861              :  * to semantic action routines, which should function exactly as those used
     862              :  * in the recursive descent parser.
     863              :  *
     864              :  * This routine can be called repeatedly with chunks of JSON. On the final
     865              :  * chunk is_last must be set to true. len is the length of the json chunk,
     866              :  * which does not need to be null terminated.
     867              :  */
     868              : JsonParseErrorType
     869       372954 : pg_parse_json_incremental(JsonLexContext *lex,
     870              :                           const JsonSemAction *sem,
     871              :                           const char *json,
     872              :                           size_t len,
     873              :                           bool is_last)
     874              : {
     875              :     JsonTokenType tok;
     876              :     JsonParseErrorType result;
     877       372954 :     JsonParseContext ctx = JSON_PARSE_VALUE;
     878       372954 :     JsonParserStack *pstack = lex->pstack;
     879              : 
     880       372954 :     if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
     881            0 :         return JSON_OUT_OF_MEMORY;
     882       372954 :     if (!lex->incremental)
     883            0 :         return JSON_INVALID_LEXER_TYPE;
     884              : 
     885       372954 :     lex->input = lex->token_terminator = lex->line_start = json;
     886       372954 :     lex->input_length = len;
     887       372954 :     lex->inc_state->is_last_chunk = is_last;
     888       372954 :     lex->inc_state->started = true;
     889              : 
     890              :     /* get the initial token */
     891       372954 :     result = json_lex(lex);
     892       372954 :     if (result != JSON_SUCCESS)
     893        71780 :         return result;
     894              : 
     895       301174 :     tok = lex_peek(lex);
     896              : 
     897              :     /* use prediction stack for incremental parsing */
     898              : 
     899       301174 :     if (!have_prediction(pstack))
     900              :     {
     901         1899 :         td_entry    goal = TD_ENTRY(JSON_PROD_GOAL);
     902              : 
     903         1899 :         push_prediction(pstack, goal);
     904              :     }
     905              : 
     906     24222266 :     while (have_prediction(pstack))
     907              :     {
     908     24221137 :         char        top = pop_prediction(pstack);
     909              :         td_entry    entry;
     910              : 
     911              :         /*
     912              :          * these first two branches are the guts of the Table Driven method
     913              :          */
     914     24221137 :         if (top == tok)
     915              :         {
     916              :             /*
     917              :              * tok can only be a terminal symbol, so top must be too. the
     918              :              * token matches the top of the stack, so get the next token.
     919              :              */
     920      6251656 :             if (tok < JSON_TOKEN_END)
     921              :             {
     922      6250527 :                 result = json_lex(lex);
     923      6250527 :                 if (result != JSON_SUCCESS)
     924       300044 :                     return result;
     925      5951180 :                 tok = lex_peek(lex);
     926              :             }
     927              :         }
     928     17969481 :         else if (IS_NT(top) && (entry = td_parser_table[OFS(top)][tok]).prod != NULL)
     929              :         {
     930              :             /*
     931              :              * the token is in the director set for a production of the
     932              :              * non-terminal at the top of the stack, so push the reversed RHS
     933              :              * of the production onto the stack.
     934              :              */
     935      7497072 :             push_prediction(pstack, entry);
     936              :         }
     937     10472409 :         else if (IS_SEM(top))
     938              :         {
     939              :             /*
     940              :              * top is a semantic action marker, so take action accordingly.
     941              :              * It's important to have these markers in the prediction stack
     942              :              * before any token they might need so we don't advance the token
     943              :              * prematurely. Note in a couple of cases we need to do something
     944              :              * both before and after the token.
     945              :              */
     946     10471968 :             switch (top)
     947              :             {
     948       137783 :                 case JSON_SEM_OSTART:
     949              :                     {
     950       137783 :                         json_struct_action ostart = sem->object_start;
     951              : 
     952       137783 :                         if (lex->lex_level >= JSON_TD_MAX_STACK)
     953            0 :                             return JSON_NESTING_TOO_DEEP;
     954              : 
     955       137783 :                         if (ostart != NULL)
     956              :                         {
     957       127107 :                             result = (*ostart) (sem->semstate);
     958       127107 :                             if (result != JSON_SUCCESS)
     959            0 :                                 return result;
     960              :                         }
     961              : 
     962       137783 :                         if (!inc_lex_level(lex))
     963            0 :                             return JSON_OUT_OF_MEMORY;
     964              :                     }
     965       137783 :                     break;
     966       137418 :                 case JSON_SEM_OEND:
     967              :                     {
     968       137418 :                         json_struct_action oend = sem->object_end;
     969              : 
     970       137418 :                         dec_lex_level(lex);
     971       137418 :                         if (oend != NULL)
     972              :                         {
     973       127106 :                             result = (*oend) (sem->semstate);
     974       127106 :                             if (result != JSON_SUCCESS)
     975            0 :                                 return result;
     976              :                         }
     977              :                     }
     978       137418 :                     break;
     979      2461282 :                 case JSON_SEM_ASTART:
     980              :                     {
     981      2461282 :                         json_struct_action astart = sem->array_start;
     982              : 
     983      2461282 :                         if (lex->lex_level >= JSON_TD_MAX_STACK)
     984          256 :                             return JSON_NESTING_TOO_DEEP;
     985              : 
     986      2461026 :                         if (astart != NULL)
     987              :                         {
     988          286 :                             result = (*astart) (sem->semstate);
     989          286 :                             if (result != JSON_SUCCESS)
     990            0 :                                 return result;
     991              :                         }
     992              : 
     993      2461026 :                         if (!inc_lex_level(lex))
     994            0 :                             return JSON_OUT_OF_MEMORY;
     995              :                     }
     996      2461026 :                     break;
     997       822490 :                 case JSON_SEM_AEND:
     998              :                     {
     999       822490 :                         json_struct_action aend = sem->array_end;
    1000              : 
    1001       822490 :                         dec_lex_level(lex);
    1002       822490 :                         if (aend != NULL)
    1003              :                         {
    1004          286 :                             result = (*aend) (sem->semstate);
    1005          286 :                             if (result != JSON_SUCCESS)
    1006            0 :                                 return result;
    1007              :                         }
    1008              :                     }
    1009       822490 :                     break;
    1010       675490 :                 case JSON_SEM_OFIELD_INIT:
    1011              :                     {
    1012              :                         /*
    1013              :                          * all we do here is save out the field name. We have
    1014              :                          * to wait to get past the ':' to see if the next
    1015              :                          * value is null so we can call the semantic routine
    1016              :                          */
    1017       675490 :                         char       *fname = NULL;
    1018       675490 :                         json_ofield_action ostart = sem->object_field_start;
    1019       675490 :                         json_ofield_action oend = sem->object_field_end;
    1020              : 
    1021       675490 :                         if ((ostart != NULL || oend != NULL) && lex->need_escapes)
    1022              :                         {
    1023       635110 :                             fname = STRDUP(lex->strval->data);
    1024       635110 :                             if (fname == NULL)
    1025            0 :                                 return JSON_OUT_OF_MEMORY;
    1026              :                         }
    1027       675490 :                         set_fname(lex, fname);
    1028              :                     }
    1029       675490 :                     break;
    1030       675362 :                 case JSON_SEM_OFIELD_START:
    1031              :                     {
    1032              :                         /*
    1033              :                          * the current token should be the first token of the
    1034              :                          * value
    1035              :                          */
    1036       675362 :                         bool        isnull = tok == JSON_TOKEN_NULL;
    1037       675362 :                         json_ofield_action ostart = sem->object_field_start;
    1038              : 
    1039       675362 :                         set_fnull(lex, isnull);
    1040              : 
    1041       675362 :                         if (ostart != NULL)
    1042              :                         {
    1043       635110 :                             char       *fname = get_fname(lex);
    1044              : 
    1045       635110 :                             result = (*ostart) (sem->semstate, fname, isnull);
    1046       635110 :                             if (result != JSON_SUCCESS)
    1047            0 :                                 return result;
    1048              :                         }
    1049              :                     }
    1050       675362 :                     break;
    1051       675325 :                 case JSON_SEM_OFIELD_END:
    1052              :                     {
    1053       675325 :                         json_ofield_action oend = sem->object_field_end;
    1054              : 
    1055       675325 :                         if (oend != NULL)
    1056              :                         {
    1057          624 :                             char       *fname = get_fname(lex);
    1058          624 :                             bool        isnull = get_fnull(lex);
    1059              : 
    1060          624 :                             result = (*oend) (sem->semstate, fname, isnull);
    1061          624 :                             if (result != JSON_SUCCESS)
    1062            0 :                                 return result;
    1063              :                         }
    1064              :                     }
    1065       675325 :                     break;
    1066      2592225 :                 case JSON_SEM_AELEM_START:
    1067              :                     {
    1068      2592225 :                         json_aelem_action astart = sem->array_element_start;
    1069      2592225 :                         bool        isnull = tok == JSON_TOKEN_NULL;
    1070              : 
    1071      2592225 :                         set_fnull(lex, isnull);
    1072              : 
    1073      2592225 :                         if (astart != NULL)
    1074              :                         {
    1075          120 :                             result = (*astart) (sem->semstate, isnull);
    1076          120 :                             if (result != JSON_SUCCESS)
    1077            0 :                                 return result;
    1078              :                         }
    1079              :                     }
    1080      2592225 :                     break;
    1081       953825 :                 case JSON_SEM_AELEM_END:
    1082              :                     {
    1083       953825 :                         json_aelem_action aend = sem->array_element_end;
    1084              : 
    1085       953825 :                         if (aend != NULL)
    1086              :                         {
    1087          120 :                             bool        isnull = get_fnull(lex);
    1088              : 
    1089          120 :                             result = (*aend) (sem->semstate, isnull);
    1090          120 :                             if (result != JSON_SUCCESS)
    1091            0 :                                 return result;
    1092              :                         }
    1093              :                     }
    1094       953825 :                     break;
    1095       670384 :                 case JSON_SEM_SCALAR_INIT:
    1096              :                     {
    1097       670384 :                         json_scalar_action sfunc = sem->scalar;
    1098              : 
    1099       670384 :                         pstack->scalar_val = NULL;
    1100              : 
    1101       670384 :                         if (sfunc != NULL)
    1102              :                         {
    1103              :                             /*
    1104              :                              * extract the de-escaped string value, or the raw
    1105              :                              * lexeme
    1106              :                              */
    1107              :                             /*
    1108              :                              * XXX copied from RD parser but looks like a
    1109              :                              * buglet
    1110              :                              */
    1111       634788 :                             if (tok == JSON_TOKEN_STRING)
    1112              :                             {
    1113       507599 :                                 if (lex->need_escapes)
    1114              :                                 {
    1115       507599 :                                     pstack->scalar_val = STRDUP(lex->strval->data);
    1116       507599 :                                     if (pstack->scalar_val == NULL)
    1117            0 :                                         return JSON_OUT_OF_MEMORY;
    1118              :                                 }
    1119              :                             }
    1120              :                             else
    1121              :                             {
    1122       127189 :                                 ptrdiff_t   tlen = (lex->token_terminator - lex->token_start);
    1123              : 
    1124       127189 :                                 pstack->scalar_val = ALLOC(tlen + 1);
    1125       127189 :                                 if (pstack->scalar_val == NULL)
    1126            0 :                                     return JSON_OUT_OF_MEMORY;
    1127              : 
    1128       127189 :                                 memcpy(pstack->scalar_val, lex->token_start, tlen);
    1129       127189 :                                 pstack->scalar_val[tlen] = '\0';
    1130              :                             }
    1131       634788 :                             pstack->scalar_tok = tok;
    1132              :                         }
    1133              :                     }
    1134       670384 :                     break;
    1135       670384 :                 case JSON_SEM_SCALAR_CALL:
    1136              :                     {
    1137              :                         /*
    1138              :                          * We'd like to be able to get rid of this business of
    1139              :                          * two bits of scalar action, but we can't. It breaks
    1140              :                          * certain semantic actions which expect that when
    1141              :                          * called the lexer has consumed the item. See for
    1142              :                          * example get_scalar() in jsonfuncs.c.
    1143              :                          */
    1144       670384 :                         json_scalar_action sfunc = sem->scalar;
    1145              : 
    1146       670384 :                         if (sfunc != NULL)
    1147              :                         {
    1148       634788 :                             result = (*sfunc) (sem->semstate, pstack->scalar_val, pstack->scalar_tok);
    1149              : 
    1150              :                             /*
    1151              :                              * Either ownership of the token passed to the
    1152              :                              * callback, or we need to free it now. Either
    1153              :                              * way, clear our pointer to it so it doesn't get
    1154              :                              * freed in the future.
    1155              :                              */
    1156       634787 :                             if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
    1157          272 :                                 FREE(pstack->scalar_val);
    1158       634787 :                             pstack->scalar_val = NULL;
    1159              : 
    1160       634787 :                             if (result != JSON_SUCCESS)
    1161            0 :                                 return result;
    1162              :                         }
    1163              :                     }
    1164       670383 :                     break;
    1165            0 :                 default:
    1166              :                     /* should not happen */
    1167            0 :                     break;
    1168              :             }
    1169              :         }
    1170              :         else
    1171              :         {
    1172              :             /*
    1173              :              * The token didn't match the stack top if it's a terminal nor a
    1174              :              * production for the stack top if it's a non-terminal.
    1175              :              *
    1176              :              * Various cases here are Asserted to be not possible, as the
    1177              :              * token would not appear at the top of the prediction stack
    1178              :              * unless the lookahead matched.
    1179              :              */
    1180          441 :             switch (top)
    1181              :             {
    1182           76 :                 case JSON_TOKEN_STRING:
    1183           76 :                     if (next_prediction(pstack) == JSON_TOKEN_COLON)
    1184           76 :                         ctx = JSON_PARSE_STRING;
    1185              :                     else
    1186              :                     {
    1187              :                         Assert(false);
    1188            0 :                         ctx = JSON_PARSE_VALUE;
    1189              :                     }
    1190           76 :                     break;
    1191            0 :                 case JSON_TOKEN_NUMBER:
    1192              :                 case JSON_TOKEN_TRUE:
    1193              :                 case JSON_TOKEN_FALSE:
    1194              :                 case JSON_TOKEN_NULL:
    1195              :                 case JSON_TOKEN_ARRAY_START:
    1196              :                 case JSON_TOKEN_OBJECT_START:
    1197              :                     Assert(false);
    1198            0 :                     ctx = JSON_PARSE_VALUE;
    1199            0 :                     break;
    1200            0 :                 case JSON_TOKEN_ARRAY_END:
    1201              :                     Assert(false);
    1202            0 :                     ctx = JSON_PARSE_ARRAY_NEXT;
    1203            0 :                     break;
    1204            0 :                 case JSON_TOKEN_OBJECT_END:
    1205              :                     Assert(false);
    1206            0 :                     ctx = JSON_PARSE_OBJECT_NEXT;
    1207            0 :                     break;
    1208            0 :                 case JSON_TOKEN_COMMA:
    1209              :                     Assert(false);
    1210            0 :                     if (next_prediction(pstack) == JSON_TOKEN_STRING)
    1211            0 :                         ctx = JSON_PARSE_OBJECT_NEXT;
    1212              :                     else
    1213            0 :                         ctx = JSON_PARSE_ARRAY_NEXT;
    1214            0 :                     break;
    1215           52 :                 case JSON_TOKEN_COLON:
    1216           52 :                     ctx = JSON_PARSE_OBJECT_LABEL;
    1217           52 :                     break;
    1218           12 :                 case JSON_TOKEN_END:
    1219           12 :                     ctx = JSON_PARSE_END;
    1220           12 :                     break;
    1221           36 :                 case JSON_NT_MORE_ARRAY_ELEMENTS:
    1222           36 :                     ctx = JSON_PARSE_ARRAY_NEXT;
    1223           36 :                     break;
    1224           28 :                 case JSON_NT_ARRAY_ELEMENTS:
    1225           28 :                     ctx = JSON_PARSE_ARRAY_START;
    1226           28 :                     break;
    1227          140 :                 case JSON_NT_MORE_KEY_PAIRS:
    1228          140 :                     ctx = JSON_PARSE_OBJECT_NEXT;
    1229          140 :                     break;
    1230           60 :                 case JSON_NT_KEY_PAIRS:
    1231           60 :                     ctx = JSON_PARSE_OBJECT_START;
    1232           60 :                     break;
    1233           37 :                 default:
    1234           37 :                     ctx = JSON_PARSE_VALUE;
    1235              :             }
    1236          441 :             return report_parse_error(ctx, lex);
    1237              :         }
    1238              :     }
    1239              : 
    1240         1129 :     return JSON_SUCCESS;
    1241              : }
    1242              : 
    1243              : /*
    1244              :  *  Recursive Descent parse routines. There is one for each structural
    1245              :  *  element in a json document:
    1246              :  *    - scalar (string, number, true, false, null)
    1247              :  *    - array  ( [ ] )
    1248              :  *    - array element
    1249              :  *    - object ( { } )
    1250              :  *    - object field
    1251              :  */
    1252              : static inline JsonParseErrorType
    1253       224038 : parse_scalar(JsonLexContext *lex, const JsonSemAction *sem)
    1254              : {
    1255       224038 :     char       *val = NULL;
    1256       224038 :     json_scalar_action sfunc = sem->scalar;
    1257       224038 :     JsonTokenType tok = lex_peek(lex);
    1258              :     JsonParseErrorType result;
    1259              : 
    1260              :     /* a scalar must be a string, a number, true, false, or null */
    1261       224038 :     if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
    1262        22324 :         tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
    1263              :         tok != JSON_TOKEN_NULL)
    1264          125 :         return report_parse_error(JSON_PARSE_VALUE, lex);
    1265              : 
    1266              :     /* if no semantic function, just consume the token */
    1267       223913 :     if (sfunc == NULL)
    1268         7989 :         return json_lex(lex);
    1269              : 
    1270              :     /* extract the de-escaped string value, or the raw lexeme */
    1271       215924 :     if (lex_peek(lex) == JSON_TOKEN_STRING)
    1272              :     {
    1273        49904 :         if (lex->need_escapes)
    1274              :         {
    1275        46104 :             val = STRDUP(lex->strval->data);
    1276        46104 :             if (val == NULL)
    1277            0 :                 return JSON_OUT_OF_MEMORY;
    1278              :         }
    1279              :     }
    1280              :     else
    1281              :     {
    1282       166020 :         int         len = (lex->token_terminator - lex->token_start);
    1283              : 
    1284       166020 :         val = ALLOC(len + 1);
    1285       166020 :         if (val == NULL)
    1286            0 :             return JSON_OUT_OF_MEMORY;
    1287              : 
    1288       166020 :         memcpy(val, lex->token_start, len);
    1289       166020 :         val[len] = '\0';
    1290              :     }
    1291              : 
    1292              :     /* consume the token */
    1293       215924 :     result = json_lex(lex);
    1294       215924 :     if (result != JSON_SUCCESS)
    1295              :     {
    1296            0 :         FREE(val);
    1297            0 :         return result;
    1298              :     }
    1299              : 
    1300              :     /*
    1301              :      * invoke the callback, which may take ownership of val. For string
    1302              :      * values, val is NULL if need_escapes is false.
    1303              :      */
    1304       215924 :     result = (*sfunc) (sem->semstate, val, tok);
    1305              : 
    1306       215769 :     if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
    1307            0 :         FREE(val);
    1308              : 
    1309       215769 :     return result;
    1310              : }
    1311              : 
    1312              : static JsonParseErrorType
    1313       216495 : parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
    1314              : {
    1315              :     /*
    1316              :      * An object field is "fieldname" : value where value can be a scalar,
    1317              :      * object or array.  Note: in user-facing docs and error messages, we
    1318              :      * generally call a field name a "key".
    1319              :      */
    1320              : 
    1321       216495 :     char       *fname = NULL;
    1322       216495 :     json_ofield_action ostart = sem->object_field_start;
    1323       216495 :     json_ofield_action oend = sem->object_field_end;
    1324              :     bool        isnull;
    1325              :     JsonTokenType tok;
    1326              :     JsonParseErrorType result;
    1327              : 
    1328       216495 :     if (lex_peek(lex) != JSON_TOKEN_STRING)
    1329            8 :         return report_parse_error(JSON_PARSE_STRING, lex);
    1330       216487 :     if ((ostart != NULL || oend != NULL) && lex->need_escapes)
    1331              :     {
    1332              :         /* fname is NULL if need_escapes is false */
    1333       171247 :         fname = STRDUP(lex->strval->data);
    1334       171247 :         if (fname == NULL)
    1335            0 :             return JSON_OUT_OF_MEMORY;
    1336              :     }
    1337       216487 :     result = json_lex(lex);
    1338       216487 :     if (result != JSON_SUCCESS)
    1339              :     {
    1340            8 :         FREE(fname);
    1341            8 :         return result;
    1342              :     }
    1343              : 
    1344       216479 :     result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
    1345       216479 :     if (result != JSON_SUCCESS)
    1346              :     {
    1347           96 :         FREE(fname);
    1348           96 :         return result;
    1349              :     }
    1350              : 
    1351       216383 :     tok = lex_peek(lex);
    1352       216383 :     isnull = tok == JSON_TOKEN_NULL;
    1353              : 
    1354       216383 :     if (ostart != NULL)
    1355              :     {
    1356       171155 :         result = (*ostart) (sem->semstate, fname, isnull);
    1357       171115 :         if (result != JSON_SUCCESS)
    1358           36 :             goto ofield_cleanup;
    1359              :     }
    1360              : 
    1361       216307 :     switch (tok)
    1362              :     {
    1363         7514 :         case JSON_TOKEN_OBJECT_START:
    1364         7514 :             result = parse_object(lex, sem);
    1365         2943 :             break;
    1366        10917 :         case JSON_TOKEN_ARRAY_START:
    1367        10917 :             result = parse_array(lex, sem);
    1368        10820 :             break;
    1369       197876 :         default:
    1370       197876 :             result = parse_scalar(lex, sem);
    1371              :     }
    1372       211584 :     if (result != JSON_SUCCESS)
    1373          196 :         goto ofield_cleanup;
    1374              : 
    1375       211388 :     if (oend != NULL)
    1376              :     {
    1377       122698 :         result = (*oend) (sem->semstate, fname, isnull);
    1378       122698 :         if (result != JSON_SUCCESS)
    1379            0 :             goto ofield_cleanup;
    1380              :     }
    1381              : 
    1382       211388 : ofield_cleanup:
    1383       211620 :     if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
    1384            0 :         FREE(fname);
    1385       211620 :     return result;
    1386              : }
    1387              : 
    1388              : static JsonParseErrorType
    1389        34205 : parse_object(JsonLexContext *lex, const JsonSemAction *sem)
    1390              : {
    1391              :     /*
    1392              :      * an object is a possibly empty sequence of object fields, separated by
    1393              :      * commas and surrounded by curly braces.
    1394              :      */
    1395        34205 :     json_struct_action ostart = sem->object_start;
    1396        34205 :     json_struct_action oend = sem->object_end;
    1397              :     JsonTokenType tok;
    1398              :     JsonParseErrorType result;
    1399              : 
    1400              : #ifndef FRONTEND
    1401              : 
    1402              :     /*
    1403              :      * TODO: clients need some way to put a bound on stack growth. Parse level
    1404              :      * limits maybe?
    1405              :      */
    1406        31060 :     check_stack_depth();
    1407              : #endif
    1408              : 
    1409        34197 :     if (ostart != NULL)
    1410              :     {
    1411        21073 :         result = (*ostart) (sem->semstate);
    1412        21016 :         if (result != JSON_SUCCESS)
    1413           44 :             return result;
    1414              :     }
    1415              : 
    1416              :     /*
    1417              :      * Data inside an object is at a higher nesting level than the object
    1418              :      * itself. Note that we increment this after we call the semantic routine
    1419              :      * for the object start and restore it before we call the routine for the
    1420              :      * object end.
    1421              :      */
    1422        34096 :     lex->lex_level++;
    1423              : 
    1424              :     Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START);
    1425        34096 :     result = json_lex(lex);
    1426        34096 :     if (result != JSON_SUCCESS)
    1427           56 :         return result;
    1428              : 
    1429        34040 :     tok = lex_peek(lex);
    1430        34040 :     switch (tok)
    1431              :     {
    1432        32049 :         case JSON_TOKEN_STRING:
    1433        32049 :             result = parse_object_field(lex, sem);
    1434       211732 :             while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
    1435              :             {
    1436       184454 :                 result = json_lex(lex);
    1437       184454 :                 if (result != JSON_SUCCESS)
    1438            8 :                     break;
    1439       184446 :                 result = parse_object_field(lex, sem);
    1440              :             }
    1441        27286 :             break;
    1442         1982 :         case JSON_TOKEN_OBJECT_END:
    1443         1982 :             break;
    1444            9 :         default:
    1445              :             /* case of an invalid initial token inside the object */
    1446            9 :             result = report_parse_error(JSON_PARSE_OBJECT_START, lex);
    1447              :     }
    1448        29277 :     if (result != JSON_SUCCESS)
    1449          361 :         return result;
    1450              : 
    1451        28916 :     result = lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
    1452        28916 :     if (result != JSON_SUCCESS)
    1453           24 :         return result;
    1454              : 
    1455        28892 :     lex->lex_level--;
    1456              : 
    1457        28892 :     if (oend != NULL)
    1458              :     {
    1459        16594 :         result = (*oend) (sem->semstate);
    1460        16529 :         if (result != JSON_SUCCESS)
    1461           40 :             return result;
    1462              :     }
    1463              : 
    1464        28787 :     return JSON_SUCCESS;
    1465              : }
    1466              : 
    1467              : static JsonParseErrorType
    1468        40378 : parse_array_element(JsonLexContext *lex, const JsonSemAction *sem)
    1469              : {
    1470        40378 :     json_aelem_action astart = sem->array_element_start;
    1471        40378 :     json_aelem_action aend = sem->array_element_end;
    1472        40378 :     JsonTokenType tok = lex_peek(lex);
    1473              :     JsonParseErrorType result;
    1474              :     bool        isnull;
    1475              : 
    1476        40378 :     isnull = tok == JSON_TOKEN_NULL;
    1477              : 
    1478        40378 :     if (astart != NULL)
    1479              :     {
    1480         7561 :         result = (*astart) (sem->semstate, isnull);
    1481         7545 :         if (result != JSON_SUCCESS)
    1482           16 :             return result;
    1483              :     }
    1484              : 
    1485              :     /* an array element is any object, array or scalar */
    1486        40346 :     switch (tok)
    1487              :     {
    1488        12832 :         case JSON_TOKEN_OBJECT_START:
    1489        12832 :             result = parse_object(lex, sem);
    1490        12563 :             break;
    1491         8440 :         case JSON_TOKEN_ARRAY_START:
    1492         8440 :             result = parse_array(lex, sem);
    1493         2736 :             break;
    1494        19074 :         default:
    1495        19074 :             result = parse_scalar(lex, sem);
    1496              :     }
    1497              : 
    1498        34329 :     if (result != JSON_SUCCESS)
    1499          384 :         return result;
    1500              : 
    1501        33945 :     if (aend != NULL)
    1502              :     {
    1503         4876 :         result = (*aend) (sem->semstate, isnull);
    1504         4868 :         if (result != JSON_SUCCESS)
    1505            0 :             return result;
    1506              :     }
    1507              : 
    1508        33937 :     return JSON_SUCCESS;
    1509              : }
    1510              : 
    1511              : static JsonParseErrorType
    1512        25232 : parse_array(JsonLexContext *lex, const JsonSemAction *sem)
    1513              : {
    1514              :     /*
    1515              :      * an array is a possibly empty sequence of array elements, separated by
    1516              :      * commas and surrounded by square brackets.
    1517              :      */
    1518        25232 :     json_struct_action astart = sem->array_start;
    1519        25232 :     json_struct_action aend = sem->array_end;
    1520              :     JsonParseErrorType result;
    1521              : 
    1522              : #ifndef FRONTEND
    1523        25204 :     check_stack_depth();
    1524              : #endif
    1525              : 
    1526        25224 :     if (astart != NULL)
    1527              :     {
    1528        13255 :         result = (*astart) (sem->semstate);
    1529        13222 :         if (result != JSON_SUCCESS)
    1530           24 :             return result;
    1531              :     }
    1532              : 
    1533              :     /*
    1534              :      * Data inside an array is at a higher nesting level than the array
    1535              :      * itself. Note that we increment this after we call the semantic routine
    1536              :      * for the array start and restore it before we call the routine for the
    1537              :      * array end.
    1538              :      */
    1539        25167 :     lex->lex_level++;
    1540              : 
    1541        25167 :     result = lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
    1542        25167 :     if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
    1543              :     {
    1544        20021 :         result = parse_array_element(lex, sem);
    1545              : 
    1546        34305 :         while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
    1547              :         {
    1548        20325 :             result = json_lex(lex);
    1549        20325 :             if (result != JSON_SUCCESS)
    1550            0 :                 break;
    1551        20325 :             result = parse_array_element(lex, sem);
    1552              :         }
    1553              :     }
    1554        19126 :     if (result != JSON_SUCCESS)
    1555          416 :         return result;
    1556              : 
    1557        18710 :     result = lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
    1558        18710 :     if (result != JSON_SUCCESS)
    1559           16 :         return result;
    1560              : 
    1561        18694 :     lex->lex_level--;
    1562              : 
    1563        18694 :     if (aend != NULL)
    1564              :     {
    1565         8220 :         result = (*aend) (sem->semstate);
    1566         8188 :         if (result != JSON_SUCCESS)
    1567           16 :             return result;
    1568              :     }
    1569              : 
    1570        18646 :     return JSON_SUCCESS;
    1571              : }
    1572              : 
    1573              : /*
    1574              :  * Lex one token from the input stream.
    1575              :  *
    1576              :  * When doing incremental parsing, we can reach the end of the input string
    1577              :  * without having (or knowing we have) a complete token. If it's not the
    1578              :  * final chunk of input, the partial token is then saved to the lex
    1579              :  * structure's ptok StringInfo. On subsequent calls input is appended to this
    1580              :  * buffer until we have something that we think is a complete token,
    1581              :  * which is then lexed using a recursive call to json_lex. Processing then
    1582              :  * continues as normal on subsequent calls.
    1583              :  *
    1584              :  * Note than when doing incremental processing, the lex.prev_token_terminator
    1585              :  * should not be relied on. It could point into a previous input chunk or
    1586              :  * worse.
    1587              :  */
    1588              : JsonParseErrorType
    1589      7679962 : json_lex(JsonLexContext *lex)
    1590              : {
    1591              :     const char *s;
    1592      7679962 :     const char *const end = lex->input + lex->input_length;
    1593              :     JsonParseErrorType result;
    1594              : 
    1595      7679962 :     if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
    1596            0 :         return JSON_OUT_OF_MEMORY;
    1597              : 
    1598      7679962 :     if (lex->incremental)
    1599              :     {
    1600      6623481 :         if (lex->inc_state->partial_completed)
    1601              :         {
    1602              :             /*
    1603              :              * We just lexed a completed partial token on the last call, so
    1604              :              * reset everything
    1605              :              */
    1606        34432 :             jsonapi_resetStringInfo(&(lex->inc_state->partial_token));
    1607        34432 :             lex->token_terminator = lex->input;
    1608        34432 :             lex->inc_state->partial_completed = false;
    1609              :         }
    1610              : 
    1611              : #ifdef JSONAPI_USE_PQEXPBUFFER
    1612              :         /* Make sure our partial token buffer is valid before using it below. */
    1613      1915760 :         if (PQExpBufferDataBroken(lex->inc_state->partial_token))
    1614            0 :             return JSON_OUT_OF_MEMORY;
    1615              : #endif
    1616              :     }
    1617              : 
    1618      7679962 :     s = lex->token_terminator;
    1619              : 
    1620      7679962 :     if (lex->incremental && lex->inc_state->partial_token.len)
    1621              :     {
    1622              :         /*
    1623              :          * We have a partial token. Extend it and if completed lex it by a
    1624              :          * recursive call
    1625              :          */
    1626        83204 :         jsonapi_StrValType *ptok = &(lex->inc_state->partial_token);
    1627        83204 :         size_t      added = 0;
    1628        83204 :         bool        tok_done = false;
    1629        83204 :         JsonLexContext dummy_lex = {0};
    1630              :         JsonParseErrorType partial_result;
    1631              : 
    1632        83204 :         if (ptok->data[0] == '"')
    1633              :         {
    1634              :             /*
    1635              :              * It's a string. Accumulate characters until we reach an
    1636              :              * unescaped '"'.
    1637              :              */
    1638        80079 :             int         escapes = 0;
    1639              : 
    1640        81363 :             for (int i = ptok->len - 1; i > 0; i--)
    1641              :             {
    1642              :                 /* count the trailing backslashes on the partial token */
    1643        75674 :                 if (ptok->data[i] == '\\')
    1644         1284 :                     escapes++;
    1645              :                 else
    1646        74390 :                     break;
    1647              :             }
    1648              : 
    1649       607283 :             for (size_t i = 0; i < lex->input_length; i++)
    1650              :             {
    1651       559711 :                 char        c = lex->input[i];
    1652              : 
    1653       559711 :                 jsonapi_appendStringInfoCharMacro(ptok, c);
    1654       559711 :                 added++;
    1655       559711 :                 if (c == '"' && escapes % 2 == 0)
    1656              :                 {
    1657        32507 :                     tok_done = true;
    1658        32507 :                     break;
    1659              :                 }
    1660       527204 :                 if (c == '\\')
    1661         2048 :                     escapes++;
    1662              :                 else
    1663       525156 :                     escapes = 0;
    1664              :             }
    1665              :         }
    1666              :         else
    1667              :         {
    1668              :             /* not a string */
    1669         3125 :             char        c = ptok->data[0];
    1670              : 
    1671         3125 :             if (c == '-' || (c >= '0' && c <= '9'))
    1672              :             {
    1673              :                 /*
    1674              :                  * Accumulate numeric continuations, respecting JSON number
    1675              :                  * grammar: -? int [frac] [exp]
    1676              :                  *
    1677              :                  * We must track what parts of the number we've already seen
    1678              :                  * so we don't over-consume.  '.' is valid only once and not
    1679              :                  * after 'e'/'E'; 'e'/'E' is valid only once; '+'/'-' are
    1680              :                  * valid only immediately after 'e'/'E'.
    1681              :                  */
    1682          657 :                 bool        numend = false;
    1683          657 :                 bool        seen_dot = false;
    1684          657 :                 bool        seen_exp = false;
    1685              :                 char        prev;
    1686              : 
    1687              :                 /* Scan existing partial token for state */
    1688         2518 :                 for (int j = 0; j < ptok->len; j++)
    1689              :                 {
    1690         1861 :                     char        pc = ptok->data[j];
    1691              : 
    1692         1861 :                     if (pc == '.')
    1693            0 :                         seen_dot = true;
    1694         1861 :                     else if (pc == 'e' || pc == 'E')
    1695            0 :                         seen_exp = true;
    1696              :                 }
    1697          657 :                 prev = ptok->data[ptok->len - 1];
    1698              : 
    1699         1842 :                 for (size_t i = 0; i < lex->input_length && !numend; i++)
    1700              :                 {
    1701         1185 :                     char        cc = lex->input[i];
    1702              : 
    1703         1185 :                     switch (cc)
    1704              :                     {
    1705            0 :                         case '+':
    1706              :                         case '-':
    1707            0 :                             if (prev != 'e' && prev != 'E')
    1708              :                             {
    1709            0 :                                 numend = true;
    1710            0 :                                 break;
    1711              :                             }
    1712            0 :                             jsonapi_appendStringInfoCharMacro(ptok, cc);
    1713            0 :                             added++;
    1714            0 :                             break;
    1715            0 :                         case '.':
    1716            0 :                             if (seen_dot || seen_exp)
    1717              :                             {
    1718            0 :                                 numend = true;
    1719            0 :                                 break;
    1720              :                             }
    1721            0 :                             seen_dot = true;
    1722            0 :                             jsonapi_appendStringInfoCharMacro(ptok, cc);
    1723            0 :                             added++;
    1724            0 :                             break;
    1725            0 :                         case 'e':
    1726              :                         case 'E':
    1727            0 :                             if (seen_exp)
    1728              :                             {
    1729            0 :                                 numend = true;
    1730            0 :                                 break;
    1731              :                             }
    1732            0 :                             seen_exp = true;
    1733            0 :                             jsonapi_appendStringInfoCharMacro(ptok, cc);
    1734            0 :                             added++;
    1735            0 :                             break;
    1736          796 :                         case '0':
    1737              :                         case '1':
    1738              :                         case '2':
    1739              :                         case '3':
    1740              :                         case '4':
    1741              :                         case '5':
    1742              :                         case '6':
    1743              :                         case '7':
    1744              :                         case '8':
    1745              :                         case '9':
    1746          796 :                             jsonapi_appendStringInfoCharMacro(ptok, cc);
    1747          796 :                             added++;
    1748          796 :                             break;
    1749          389 :                         default:
    1750          389 :                             numend = true;
    1751              :                     }
    1752         1185 :                     if (!numend)
    1753          796 :                         prev = cc;
    1754              :                 }
    1755              :             }
    1756              : 
    1757              :             /*
    1758              :              * Add any remaining alphanumeric chars. This takes care of the
    1759              :              * {null, false, true} literals as well as any trailing
    1760              :              * alphanumeric junk on non-string tokens.
    1761              :              */
    1762         6449 :             for (size_t i = added; i < lex->input_length; i++)
    1763              :             {
    1764         5469 :                 char        cc = lex->input[i];
    1765              : 
    1766         5469 :                 if (JSON_ALPHANUMERIC_CHAR(cc))
    1767              :                 {
    1768         3324 :                     jsonapi_appendStringInfoCharMacro(ptok, cc);
    1769         3324 :                     added++;
    1770              :                 }
    1771              :                 else
    1772              :                 {
    1773         2145 :                     tok_done = true;
    1774         2145 :                     break;
    1775              :                 }
    1776              :             }
    1777         3125 :             if (added == lex->input_length &&
    1778          980 :                 lex->inc_state->is_last_chunk)
    1779              :             {
    1780           76 :                 tok_done = true;
    1781              :             }
    1782              :         }
    1783              : 
    1784        83204 :         if (!tok_done)
    1785              :         {
    1786              :             /* We should have consumed the whole chunk in this case. */
    1787              :             Assert(added == lex->input_length);
    1788              : 
    1789        48476 :             if (!lex->inc_state->is_last_chunk)
    1790        48436 :                 return JSON_INCOMPLETE;
    1791              : 
    1792              :             /* json_errdetail() needs access to the accumulated token. */
    1793           40 :             lex->token_start = ptok->data;
    1794           40 :             lex->token_terminator = ptok->data + ptok->len;
    1795           40 :             return JSON_INVALID_TOKEN;
    1796              :         }
    1797              : 
    1798              :         /*
    1799              :          * Everything up to lex->input[added] has been added to the partial
    1800              :          * token, so move the input past it.
    1801              :          */
    1802        34728 :         lex->input += added;
    1803        34728 :         lex->input_length -= added;
    1804              : 
    1805        34728 :         dummy_lex.input = dummy_lex.token_terminator =
    1806        34728 :             dummy_lex.line_start = ptok->data;
    1807        34728 :         dummy_lex.line_number = lex->line_number;
    1808        34728 :         dummy_lex.input_length = ptok->len;
    1809        34728 :         dummy_lex.input_encoding = lex->input_encoding;
    1810        34728 :         dummy_lex.incremental = false;
    1811        34728 :         dummy_lex.need_escapes = lex->need_escapes;
    1812        34728 :         dummy_lex.strval = lex->strval;
    1813              : 
    1814        34728 :         partial_result = json_lex(&dummy_lex);
    1815              : 
    1816              :         /*
    1817              :          * We either have a complete token or an error. In either case we need
    1818              :          * to point to the partial token data for the semantic or error
    1819              :          * routines. If it's not an error we'll readjust on the next call to
    1820              :          * json_lex.
    1821              :          */
    1822        34728 :         lex->token_type = dummy_lex.token_type;
    1823        34728 :         lex->line_number = dummy_lex.line_number;
    1824              : 
    1825              :         /*
    1826              :          * We know the prev_token_terminator must be back in some previous
    1827              :          * piece of input, so we just make it NULL.
    1828              :          */
    1829        34728 :         lex->prev_token_terminator = NULL;
    1830              : 
    1831              :         /*
    1832              :          * Normally token_start would be ptok->data, but it could be later,
    1833              :          * see json_lex_string's handling of invalid escapes.
    1834              :          */
    1835        34728 :         lex->token_start = dummy_lex.token_start;
    1836        34728 :         lex->token_terminator = dummy_lex.token_terminator;
    1837        34728 :         if (partial_result == JSON_SUCCESS)
    1838              :         {
    1839              :             /* make sure we've used all the input */
    1840        34620 :             if (lex->token_terminator - lex->token_start != ptok->len)
    1841              :             {
    1842              :                 Assert(false);
    1843            0 :                 return JSON_INVALID_TOKEN;
    1844              :             }
    1845              : 
    1846        34620 :             lex->inc_state->partial_completed = true;
    1847              :         }
    1848        34728 :         return partial_result;
    1849              :         /* end of partial token processing */
    1850              :     }
    1851              : 
    1852              :     /* Skip leading whitespace. */
    1853     12937929 :     while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
    1854              :     {
    1855      5341171 :         if (*s++ == '\n')
    1856              :         {
    1857       344706 :             ++lex->line_number;
    1858       344706 :             lex->line_start = s;
    1859              :         }
    1860              :     }
    1861      7596758 :     lex->token_start = s;
    1862              : 
    1863              :     /* Determine token type. */
    1864      7596758 :     if (s >= end)
    1865              :     {
    1866       340541 :         lex->token_start = NULL;
    1867       340541 :         lex->prev_token_terminator = lex->token_terminator;
    1868       340541 :         lex->token_terminator = s;
    1869       340541 :         lex->token_type = JSON_TOKEN_END;
    1870              :     }
    1871              :     else
    1872              :     {
    1873      7256217 :         switch (*s)
    1874              :         {
    1875              :                 /* Single-character token, some kind of punctuation mark. */
    1876       172214 :             case '{':
    1877       172214 :                 lex->prev_token_terminator = lex->token_terminator;
    1878       172214 :                 lex->token_terminator = s + 1;
    1879       172214 :                 lex->token_type = JSON_TOKEN_OBJECT_START;
    1880       172214 :                 break;
    1881       166421 :             case '}':
    1882       166421 :                 lex->prev_token_terminator = lex->token_terminator;
    1883       166421 :                 lex->token_terminator = s + 1;
    1884       166421 :                 lex->token_type = JSON_TOKEN_OBJECT_END;
    1885       166421 :                 break;
    1886      2486638 :             case '[':
    1887      2486638 :                 lex->prev_token_terminator = lex->token_terminator;
    1888      2486638 :                 lex->token_terminator = s + 1;
    1889      2486638 :                 lex->token_type = JSON_TOKEN_ARRAY_START;
    1890      2486638 :                 break;
    1891       841419 :             case ']':
    1892       841419 :                 lex->prev_token_terminator = lex->token_terminator;
    1893       841419 :                 lex->token_terminator = s + 1;
    1894       841419 :                 lex->token_type = JSON_TOKEN_ARRAY_END;
    1895       841419 :                 break;
    1896       875606 :             case ',':
    1897       875606 :                 lex->prev_token_terminator = lex->token_terminator;
    1898       875606 :                 lex->token_terminator = s + 1;
    1899       875606 :                 lex->token_type = JSON_TOKEN_COMMA;
    1900       875606 :                 break;
    1901       891841 :             case ':':
    1902       891841 :                 lex->prev_token_terminator = lex->token_terminator;
    1903       891841 :                 lex->token_terminator = s + 1;
    1904       891841 :                 lex->token_type = JSON_TOKEN_COLON;
    1905       891841 :                 break;
    1906      1511958 :             case '"':
    1907              :                 /* string */
    1908      1511958 :                 result = json_lex_string(lex);
    1909      1511958 :                 if (result != JSON_SUCCESS)
    1910        32824 :                     return result;
    1911      1479134 :                 lex->token_type = JSON_TOKEN_STRING;
    1912      1479134 :                 break;
    1913          425 :             case '-':
    1914              :                 /* Negative number. */
    1915          425 :                 result = json_lex_number(lex, s + 1, NULL, NULL);
    1916          425 :                 if (result != JSON_SUCCESS)
    1917            4 :                     return result;
    1918          421 :                 lex->token_type = JSON_TOKEN_NUMBER;
    1919          421 :                 break;
    1920       272509 :             case '0':
    1921              :             case '1':
    1922              :             case '2':
    1923              :             case '3':
    1924              :             case '4':
    1925              :             case '5':
    1926              :             case '6':
    1927              :             case '7':
    1928              :             case '8':
    1929              :             case '9':
    1930              :                 /* Positive number. */
    1931       272509 :                 result = json_lex_number(lex, s, NULL, NULL);
    1932       272509 :                 if (result != JSON_SUCCESS)
    1933          461 :                     return result;
    1934       272048 :                 lex->token_type = JSON_TOKEN_NUMBER;
    1935       272048 :                 break;
    1936        37186 :             default:
    1937              :                 {
    1938              :                     const char *p;
    1939              : 
    1940              :                     /*
    1941              :                      * We're not dealing with a string, number, legal
    1942              :                      * punctuation mark, or end of string.  The only legal
    1943              :                      * tokens we might find here are true, false, and null,
    1944              :                      * but for error reporting purposes we scan until we see a
    1945              :                      * non-alphanumeric character.  That way, we can report
    1946              :                      * the whole word as an unexpected token, rather than just
    1947              :                      * some unintuitive prefix thereof.
    1948              :                      */
    1949       202519 :                     for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++)
    1950              :                          /* skip */ ;
    1951              : 
    1952              :                     /*
    1953              :                      * We got some sort of unexpected punctuation or an
    1954              :                      * otherwise unexpected character, so just complain about
    1955              :                      * that one character.
    1956              :                      */
    1957        37186 :                     if (p == s)
    1958              :                     {
    1959           44 :                         lex->prev_token_terminator = lex->token_terminator;
    1960           44 :                         lex->token_terminator = s + 1;
    1961           44 :                         return JSON_INVALID_TOKEN;
    1962              :                     }
    1963              : 
    1964        37142 :                     if (lex->incremental && !lex->inc_state->is_last_chunk &&
    1965         7832 :                         p == lex->input + lex->input_length)
    1966              :                     {
    1967         1832 :                         jsonapi_appendBinaryStringInfo(&(lex->inc_state->partial_token), s, end - s);
    1968         1832 :                         return JSON_INCOMPLETE;
    1969              :                     }
    1970              : 
    1971              :                     /*
    1972              :                      * We've got a real alphanumeric token here.  If it
    1973              :                      * happens to be true, false, or null, all is well.  If
    1974              :                      * not, error out.
    1975              :                      */
    1976        35310 :                     lex->prev_token_terminator = lex->token_terminator;
    1977        35310 :                     lex->token_terminator = p;
    1978        35310 :                     if (p - s == 4)
    1979              :                     {
    1980        15495 :                         if (memcmp(s, "true", 4) == 0)
    1981         4972 :                             lex->token_type = JSON_TOKEN_TRUE;
    1982        10523 :                         else if (memcmp(s, "null", 4) == 0)
    1983        10515 :                             lex->token_type = JSON_TOKEN_NULL;
    1984              :                         else
    1985            8 :                             return JSON_INVALID_TOKEN;
    1986              :                     }
    1987        19815 :                     else if (p - s == 5 && memcmp(s, "false", 5) == 0)
    1988        19656 :                         lex->token_type = JSON_TOKEN_FALSE;
    1989              :                     else
    1990          159 :                         return JSON_INVALID_TOKEN;
    1991              :                 }
    1992              :         }                       /* end of switch */
    1993              :     }
    1994              : 
    1995      7561426 :     if (lex->incremental && lex->token_type == JSON_TOKEN_END && !lex->inc_state->is_last_chunk)
    1996       287667 :         return JSON_INCOMPLETE;
    1997              :     else
    1998      7273759 :         return JSON_SUCCESS;
    1999              : }
    2000              : 
    2001              : /*
    2002              :  * The next token in the input stream is known to be a string; lex it.
    2003              :  *
    2004              :  * If lex->strval isn't NULL, fill it with the decoded string.
    2005              :  * Set lex->token_terminator to the end of the decoded input, and in
    2006              :  * success cases, transfer its previous value to lex->prev_token_terminator.
    2007              :  * Return JSON_SUCCESS or an error code.
    2008              :  *
    2009              :  * Note: be careful that all error exits advance lex->token_terminator
    2010              :  * to the point after the character we detected the error on.
    2011              :  */
    2012              : static inline JsonParseErrorType
    2013      1511958 : json_lex_string(JsonLexContext *lex)
    2014              : {
    2015              :     const char *s;
    2016      1511958 :     const char *const end = lex->input + lex->input_length;
    2017      1511958 :     int         hi_surrogate = -1;
    2018              : 
    2019              :     /* Convenience macros for error exits */
    2020              : #define FAIL_OR_INCOMPLETE_AT_CHAR_START(code) \
    2021              :     do { \
    2022              :         if (lex->incremental && !lex->inc_state->is_last_chunk) \
    2023              :         { \
    2024              :             jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token, \
    2025              :                                            lex->token_start, \
    2026              :                                            end - lex->token_start); \
    2027              :             return JSON_INCOMPLETE; \
    2028              :         } \
    2029              :         lex->token_terminator = s; \
    2030              :         return code; \
    2031              :     } while (0)
    2032              : #define FAIL_AT_CHAR_END(code) \
    2033              :     do { \
    2034              :         ptrdiff_t   remaining = end - s; \
    2035              :         int         charlen; \
    2036              :         charlen = pg_encoding_mblen_or_incomplete(lex->input_encoding, \
    2037              :                                                   s, remaining); \
    2038              :         lex->token_terminator = (charlen <= remaining) ? s + charlen : end; \
    2039              :         return code; \
    2040              :     } while (0)
    2041              : 
    2042      1511958 :     if (lex->need_escapes)
    2043              :     {
    2044              : #ifdef JSONAPI_USE_PQEXPBUFFER
    2045              :         /* make sure initialization succeeded */
    2046          668 :         if (lex->strval == NULL)
    2047            0 :             return JSON_OUT_OF_MEMORY;
    2048              : #endif
    2049      1369084 :         jsonapi_resetStringInfo(lex->strval);
    2050              :     }
    2051              : 
    2052              :     Assert(lex->input_length > 0);
    2053      1511958 :     s = lex->token_start;
    2054              :     for (;;)
    2055              :     {
    2056      3024610 :         s++;
    2057              :         /* Premature end of the string. */
    2058      3024610 :         if (s >= end)
    2059        32335 :             FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
    2060      2992275 :         else if (*s == '"')
    2061      1479198 :             break;
    2062      1513077 :         else if (*s == '\\')
    2063              :         {
    2064              :             /* OK, we have an escape character. */
    2065         5498 :             s++;
    2066         5498 :             if (s >= end)
    2067           96 :                 FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
    2068         5402 :             else if (*s == 'u')
    2069              :             {
    2070              :                 int         i;
    2071         2071 :                 int         ch = 0;
    2072              : 
    2073         9967 :                 for (i = 1; i <= 4; i++)
    2074              :                 {
    2075         8049 :                     s++;
    2076         8049 :                     if (s >= end)
    2077          128 :                         FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
    2078         7921 :                     else if (*s >= '0' && *s <= '9')
    2079         4899 :                         ch = (ch * 16) + (*s - '0');
    2080         3022 :                     else if (*s >= 'a' && *s <= 'f')
    2081         2981 :                         ch = (ch * 16) + (*s - 'a') + 10;
    2082           41 :                     else if (*s >= 'A' && *s <= 'F')
    2083           16 :                         ch = (ch * 16) + (*s - 'A') + 10;
    2084              :                     else
    2085           25 :                         FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT);
    2086              :                 }
    2087         1918 :                 if (lex->need_escapes)
    2088              :                 {
    2089              :                     /*
    2090              :                      * Combine surrogate pairs.
    2091              :                      */
    2092          234 :                     if (is_utf16_surrogate_first(ch))
    2093              :                     {
    2094          114 :                         if (hi_surrogate != -1)
    2095            8 :                             FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE);
    2096          106 :                         hi_surrogate = ch;
    2097          106 :                         continue;
    2098              :                     }
    2099          120 :                     else if (is_utf16_surrogate_second(ch))
    2100              :                     {
    2101           42 :                         if (hi_surrogate == -1)
    2102           16 :                             FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2103           26 :                         ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
    2104           26 :                         hi_surrogate = -1;
    2105              :                     }
    2106              : 
    2107          104 :                     if (hi_surrogate != -1)
    2108            0 :                         FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2109              : 
    2110              :                     /*
    2111              :                      * Reject invalid cases.  We can't have a value above
    2112              :                      * 0xFFFF here (since we only accepted 4 hex digits
    2113              :                      * above), so no need to test for out-of-range chars.
    2114              :                      */
    2115          104 :                     if (ch == 0)
    2116              :                     {
    2117              :                         /* We can't allow this, since our TEXT type doesn't */
    2118           16 :                         FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO);
    2119              :                     }
    2120              : 
    2121              :                     /*
    2122              :                      * Add the represented character to lex->strval.  In the
    2123              :                      * backend, we can let pg_unicode_to_server_noerror()
    2124              :                      * handle any required character set conversion; in
    2125              :                      * frontend, we can only deal with trivial conversions.
    2126              :                      */
    2127              : #ifndef FRONTEND
    2128              :                     {
    2129              :                         char        cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
    2130              : 
    2131           60 :                         if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
    2132            0 :                             FAIL_AT_CHAR_END(JSON_UNICODE_UNTRANSLATABLE);
    2133           60 :                         appendStringInfoString(lex->strval, cbuf);
    2134              :                     }
    2135              : #else
    2136           28 :                     if (lex->input_encoding == PG_UTF8)
    2137              :                     {
    2138              :                         /* OK, we can map the code point to UTF8 easily */
    2139              :                         char        utf8str[5];
    2140              :                         int         utf8len;
    2141              : 
    2142           28 :                         unicode_to_utf8(ch, (unsigned char *) utf8str);
    2143           28 :                         utf8len = pg_utf_mblen((unsigned char *) utf8str);
    2144           28 :                         jsonapi_appendBinaryStringInfo(lex->strval, utf8str, utf8len);
    2145              :                     }
    2146            0 :                     else if (ch <= 0x007f)
    2147              :                     {
    2148              :                         /* The ASCII range is the same in all encodings */
    2149            0 :                         jsonapi_appendStringInfoChar(lex->strval, (char) ch);
    2150              :                     }
    2151              :                     else
    2152            0 :                         FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE);
    2153              : #endif                          /* FRONTEND */
    2154              :                 }
    2155              :             }
    2156         3331 :             else if (lex->need_escapes)
    2157              :             {
    2158          518 :                 if (hi_surrogate != -1)
    2159            0 :                     FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2160              : 
    2161          518 :                 switch (*s)
    2162              :                 {
    2163          378 :                     case '"':
    2164              :                     case '\\':
    2165              :                     case '/':
    2166          378 :                         jsonapi_appendStringInfoChar(lex->strval, *s);
    2167          378 :                         break;
    2168           28 :                     case 'b':
    2169           28 :                         jsonapi_appendStringInfoChar(lex->strval, '\b');
    2170           28 :                         break;
    2171            4 :                     case 'f':
    2172            4 :                         jsonapi_appendStringInfoChar(lex->strval, '\f');
    2173            4 :                         break;
    2174           40 :                     case 'n':
    2175           40 :                         jsonapi_appendStringInfoChar(lex->strval, '\n');
    2176           40 :                         break;
    2177            4 :                     case 'r':
    2178            4 :                         jsonapi_appendStringInfoChar(lex->strval, '\r');
    2179            4 :                         break;
    2180           60 :                     case 't':
    2181           60 :                         jsonapi_appendStringInfoChar(lex->strval, '\t');
    2182           60 :                         break;
    2183            4 :                     default:
    2184              : 
    2185              :                         /*
    2186              :                          * Not a valid string escape, so signal error.  We
    2187              :                          * adjust token_start so that just the escape sequence
    2188              :                          * is reported, not the whole string.
    2189              :                          */
    2190            4 :                         lex->token_start = s;
    2191            4 :                         FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
    2192              :                 }
    2193              :             }
    2194         2813 :             else if (strchr("\"\\/bfnrt", *s) == NULL)
    2195              :             {
    2196              :                 /*
    2197              :                  * Simpler processing if we're not bothered about de-escaping
    2198              :                  *
    2199              :                  * It's very tempting to remove the strchr() call here and
    2200              :                  * replace it with a switch statement, but testing so far has
    2201              :                  * shown it's not a performance win.
    2202              :                  */
    2203           64 :                 lex->token_start = s;
    2204           64 :                 FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
    2205              :             }
    2206              :         }
    2207              :         else
    2208              :         {
    2209      1507579 :             const char *p = s;
    2210              : 
    2211      1507579 :             if (hi_surrogate != -1)
    2212            8 :                 FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
    2213              : 
    2214              :             /*
    2215              :              * Skip to the first byte that requires special handling, so we
    2216              :              * can batch calls to jsonapi_appendBinaryStringInfo.
    2217              :              */
    2218      1507571 :             while (p < end - sizeof(Vector8) &&
    2219      1856127 :                    !pg_lfind8('\\', (const uint8 *) p, sizeof(Vector8)) &&
    2220      3806621 :                    !pg_lfind8('"', (const uint8 *) p, sizeof(Vector8)) &&
    2221       445000 :                    !pg_lfind8_le(31, (const uint8 *) p, sizeof(Vector8)))
    2222       445000 :                 p += sizeof(Vector8);
    2223              : 
    2224     11606826 :             for (; p < end; p++)
    2225              :             {
    2226     11580084 :                 if (*p == '\\' || *p == '"')
    2227              :                     break;
    2228     10099315 :                 else if ((unsigned char) *p <= 31)
    2229              :                 {
    2230              :                     /* Per RFC4627, these characters MUST be escaped. */
    2231              :                     /*
    2232              :                      * Since *p isn't printable, exclude it from the context
    2233              :                      * string
    2234              :                      */
    2235           60 :                     lex->token_terminator = p;
    2236           60 :                     return JSON_ESCAPING_REQUIRED;
    2237              :                 }
    2238              :             }
    2239              : 
    2240      1507511 :             if (lex->need_escapes)
    2241      1369266 :                 jsonapi_appendBinaryStringInfo(lex->strval, s, p - s);
    2242              : 
    2243              :             /*
    2244              :              * s will be incremented at the top of the loop, so set it to just
    2245              :              * behind our lookahead position
    2246              :              */
    2247      1507511 :             s = p - 1;
    2248              :         }
    2249              :     }
    2250              : 
    2251      1479198 :     if (hi_surrogate != -1)
    2252              :     {
    2253           64 :         lex->token_terminator = s + 1;
    2254           64 :         return JSON_UNICODE_LOW_SURROGATE;
    2255              :     }
    2256              : 
    2257              : #ifdef JSONAPI_USE_PQEXPBUFFER
    2258        34448 :     if (lex->need_escapes && PQExpBufferBroken(lex->strval))
    2259            0 :         return JSON_OUT_OF_MEMORY;
    2260              : #endif
    2261              : 
    2262              :     /* Hooray, we found the end of the string! */
    2263      1479134 :     lex->prev_token_terminator = lex->token_terminator;
    2264      1479134 :     lex->token_terminator = s + 1;
    2265      1479134 :     return JSON_SUCCESS;
    2266              : 
    2267              : #undef FAIL_OR_INCOMPLETE_AT_CHAR_START
    2268              : #undef FAIL_AT_CHAR_END
    2269              : }
    2270              : 
    2271              : /*
    2272              :  * The next token in the input stream is known to be a number; lex it.
    2273              :  *
    2274              :  * In JSON, a number consists of four parts:
    2275              :  *
    2276              :  * (1) An optional minus sign ('-').
    2277              :  *
    2278              :  * (2) Either a single '0', or a string of one or more digits that does not
    2279              :  *     begin with a '0'.
    2280              :  *
    2281              :  * (3) An optional decimal part, consisting of a period ('.') followed by
    2282              :  *     one or more digits.  (Note: While this part can be omitted
    2283              :  *     completely, it's not OK to have only the decimal point without
    2284              :  *     any digits afterwards.)
    2285              :  *
    2286              :  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
    2287              :  *     followed by '+' or '-', followed by one or more digits.  (Note:
    2288              :  *     As with the decimal part, if 'e' or 'E' is present, it must be
    2289              :  *     followed by at least one digit.)
    2290              :  *
    2291              :  * The 's' argument to this function points to the ostensible beginning
    2292              :  * of part 2 - i.e. the character after any optional minus sign, or the
    2293              :  * first character of the string if there is none.
    2294              :  *
    2295              :  * If num_err is not NULL, we return an error flag to *num_err rather than
    2296              :  * raising an error for a badly-formed number.  Also, if total_len is not NULL
    2297              :  * the distance from lex->input to the token end+1 is returned to *total_len.
    2298              :  */
    2299              : static inline JsonParseErrorType
    2300       272957 : json_lex_number(JsonLexContext *lex, const char *s,
    2301              :                 bool *num_err, size_t *total_len)
    2302              : {
    2303       272957 :     bool        error = false;
    2304       272957 :     int         len = s - lex->input;
    2305              : 
    2306              :     /* Part (1): leading sign indicator. */
    2307              :     /* Caller already did this for us; so do nothing. */
    2308              : 
    2309              :     /* Part (2): parse main digit string. */
    2310       272957 :     if (len < lex->input_length && *s == '0')
    2311              :     {
    2312        82102 :         s++;
    2313        82102 :         len++;
    2314              :     }
    2315       190855 :     else if (len < lex->input_length && *s >= '1' && *s <= '9')
    2316              :     {
    2317              :         do
    2318              :         {
    2319       625381 :             s++;
    2320       625381 :             len++;
    2321       625381 :         } while (len < lex->input_length && *s >= '0' && *s <= '9');
    2322              :     }
    2323              :     else
    2324            5 :         error = true;
    2325              : 
    2326              :     /* Part (3): parse optional decimal portion. */
    2327       272957 :     if (len < lex->input_length && *s == '.')
    2328              :     {
    2329        30435 :         s++;
    2330        30435 :         len++;
    2331        30435 :         if (len == lex->input_length || *s < '0' || *s > '9')
    2332            8 :             error = true;
    2333              :         else
    2334              :         {
    2335              :             do
    2336              :             {
    2337        72449 :                 s++;
    2338        72449 :                 len++;
    2339        72449 :             } while (len < lex->input_length && *s >= '0' && *s <= '9');
    2340              :         }
    2341              :     }
    2342              : 
    2343              :     /* Part (4): parse optional exponent. */
    2344       272957 :     if (len < lex->input_length && (*s == 'e' || *s == 'E'))
    2345              :     {
    2346           61 :         s++;
    2347           61 :         len++;
    2348           61 :         if (len < lex->input_length && (*s == '+' || *s == '-'))
    2349              :         {
    2350            5 :             s++;
    2351            5 :             len++;
    2352              :         }
    2353           61 :         if (len == lex->input_length || *s < '0' || *s > '9')
    2354            8 :             error = true;
    2355              :         else
    2356              :         {
    2357              :             do
    2358              :             {
    2359          186 :                 s++;
    2360          186 :                 len++;
    2361          186 :             } while (len < lex->input_length && *s >= '0' && *s <= '9');
    2362              :         }
    2363              :     }
    2364              : 
    2365              :     /*
    2366              :      * Check for trailing garbage.  As in json_lex(), any alphanumeric stuff
    2367              :      * here should be considered part of the token for error-reporting
    2368              :      * purposes.
    2369              :      */
    2370       273181 :     for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
    2371          224 :         error = true;
    2372              : 
    2373       272957 :     if (total_len != NULL)
    2374           23 :         *total_len = len;
    2375              : 
    2376       272957 :     if (lex->incremental && !lex->inc_state->is_last_chunk &&
    2377        69039 :         len >= lex->input_length)
    2378              :     {
    2379          389 :         jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token,
    2380          389 :                                        lex->token_start, s - lex->token_start);
    2381          389 :         if (num_err != NULL)
    2382            0 :             *num_err = error;
    2383              : 
    2384          389 :         return JSON_INCOMPLETE;
    2385              :     }
    2386       272568 :     else if (num_err != NULL)
    2387              :     {
    2388              :         /* let the caller handle any error */
    2389           23 :         *num_err = error;
    2390              :     }
    2391              :     else
    2392              :     {
    2393              :         /* return token endpoint */
    2394       272545 :         lex->prev_token_terminator = lex->token_terminator;
    2395       272545 :         lex->token_terminator = s;
    2396              :         /* handle error if any */
    2397       272545 :         if (error)
    2398           76 :             return JSON_INVALID_TOKEN;
    2399              :     }
    2400              : 
    2401       272492 :     return JSON_SUCCESS;
    2402              : }
    2403              : 
    2404              : /*
    2405              :  * Report a parse error.
    2406              :  *
    2407              :  * lex->token_start and lex->token_terminator must identify the current token.
    2408              :  */
    2409              : static JsonParseErrorType
    2410          655 : report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
    2411              : {
    2412              :     /* Handle case where the input ended prematurely. */
    2413          655 :     if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
    2414          155 :         return JSON_EXPECTED_MORE;
    2415              : 
    2416              :     /* Otherwise choose the error type based on the parsing context. */
    2417          500 :     switch (ctx)
    2418              :     {
    2419           28 :         case JSON_PARSE_END:
    2420           28 :             return JSON_EXPECTED_END;
    2421          104 :         case JSON_PARSE_VALUE:
    2422          104 :             return JSON_EXPECTED_JSON;
    2423           84 :         case JSON_PARSE_STRING:
    2424           84 :             return JSON_EXPECTED_STRING;
    2425           28 :         case JSON_PARSE_ARRAY_START:
    2426           28 :             return JSON_EXPECTED_ARRAY_FIRST;
    2427           36 :         case JSON_PARSE_ARRAY_NEXT:
    2428           36 :             return JSON_EXPECTED_ARRAY_NEXT;
    2429           64 :         case JSON_PARSE_OBJECT_START:
    2430           64 :             return JSON_EXPECTED_OBJECT_FIRST;
    2431           68 :         case JSON_PARSE_OBJECT_LABEL:
    2432           68 :             return JSON_EXPECTED_COLON;
    2433           88 :         case JSON_PARSE_OBJECT_NEXT:
    2434           88 :             return JSON_EXPECTED_OBJECT_NEXT;
    2435            0 :         case JSON_PARSE_OBJECT_COMMA:
    2436            0 :             return JSON_EXPECTED_STRING;
    2437              :     }
    2438              : 
    2439              :     /*
    2440              :      * We don't use a default: case, so that the compiler will warn about
    2441              :      * unhandled enum values.
    2442              :      */
    2443              :     Assert(false);
    2444            0 :     return JSON_SUCCESS;        /* silence stupider compilers */
    2445              : }
    2446              : 
    2447              : /*
    2448              :  * Construct an (already translated) detail message for a JSON error.
    2449              :  *
    2450              :  * The returned pointer should not be freed, the allocation is either static
    2451              :  * or owned by the JsonLexContext.
    2452              :  */
    2453              : char *
    2454         1257 : json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
    2455              : {
    2456         1257 :     if (error == JSON_OUT_OF_MEMORY || lex == &failed_oom)
    2457              :     {
    2458              :         /* Short circuit. Allocating anything for this case is unhelpful. */
    2459            0 :         return _("out of memory");
    2460              :     }
    2461              : 
    2462         1257 :     if (lex->errormsg)
    2463            0 :         jsonapi_resetStringInfo(lex->errormsg);
    2464              :     else
    2465         1257 :         lex->errormsg = jsonapi_makeStringInfo();
    2466              : 
    2467              :     /*
    2468              :      * A helper for error messages that should print the current token. The
    2469              :      * format must contain exactly one %.*s specifier.
    2470              :      */
    2471              : #define json_token_error(lex, format) \
    2472              :     jsonapi_appendStringInfo((lex)->errormsg, _(format), \
    2473              :                              (int) ((lex)->token_terminator - (lex)->token_start), \
    2474              :                              (lex)->token_start);
    2475              : 
    2476         1257 :     switch (error)
    2477              :     {
    2478            0 :         case JSON_INCOMPLETE:
    2479              :         case JSON_SUCCESS:
    2480              :             /* fall through to the error code after switch */
    2481            0 :             break;
    2482            0 :         case JSON_INVALID_LEXER_TYPE:
    2483            0 :             if (lex->incremental)
    2484            0 :                 return _("Recursive descent parser cannot use incremental lexer.");
    2485              :             else
    2486            0 :                 return _("Incremental parser requires incremental lexer.");
    2487          256 :         case JSON_NESTING_TOO_DEEP:
    2488          256 :             return (_("JSON nested too deep, maximum permitted depth is 6400."));
    2489           68 :         case JSON_ESCAPING_INVALID:
    2490           68 :             json_token_error(lex, "Escape sequence \"\\%.*s\" is invalid.");
    2491           68 :             break;
    2492           60 :         case JSON_ESCAPING_REQUIRED:
    2493           60 :             jsonapi_appendStringInfo(lex->errormsg,
    2494           60 :                                      _("Character with value 0x%02x must be escaped."),
    2495           60 :                                      (unsigned char) *(lex->token_terminator));
    2496           60 :             break;
    2497           28 :         case JSON_EXPECTED_END:
    2498           28 :             json_token_error(lex, "Expected end of input, but found \"%.*s\".");
    2499           28 :             break;
    2500           28 :         case JSON_EXPECTED_ARRAY_FIRST:
    2501           28 :             json_token_error(lex, "Expected array element or \"]\", but found \"%.*s\".");
    2502           28 :             break;
    2503           36 :         case JSON_EXPECTED_ARRAY_NEXT:
    2504           36 :             json_token_error(lex, "Expected \",\" or \"]\", but found \"%.*s\".");
    2505           36 :             break;
    2506           68 :         case JSON_EXPECTED_COLON:
    2507           68 :             json_token_error(lex, "Expected \":\", but found \"%.*s\".");
    2508           68 :             break;
    2509           68 :         case JSON_EXPECTED_JSON:
    2510           68 :             json_token_error(lex, "Expected JSON value, but found \"%.*s\".");
    2511           68 :             break;
    2512          105 :         case JSON_EXPECTED_MORE:
    2513          105 :             return _("The input string ended unexpectedly.");
    2514           64 :         case JSON_EXPECTED_OBJECT_FIRST:
    2515           64 :             json_token_error(lex, "Expected string or \"}\", but found \"%.*s\".");
    2516           64 :             break;
    2517           88 :         case JSON_EXPECTED_OBJECT_NEXT:
    2518           88 :             json_token_error(lex, "Expected \",\" or \"}\", but found \"%.*s\".");
    2519           88 :             break;
    2520           84 :         case JSON_EXPECTED_STRING:
    2521           84 :             json_token_error(lex, "Expected string, but found \"%.*s\".");
    2522           84 :             break;
    2523          231 :         case JSON_INVALID_TOKEN:
    2524          231 :             json_token_error(lex, "Token \"%.*s\" is invalid.");
    2525          231 :             break;
    2526            0 :         case JSON_OUT_OF_MEMORY:
    2527              :             /* should have been handled above; use the error path */
    2528            0 :             break;
    2529           16 :         case JSON_UNICODE_CODE_POINT_ZERO:
    2530           16 :             return _("\\u0000 cannot be converted to text.");
    2531           25 :         case JSON_UNICODE_ESCAPE_FORMAT:
    2532           25 :             return _("\"\\u\" must be followed by four hexadecimal digits.");
    2533            0 :         case JSON_UNICODE_HIGH_ESCAPE:
    2534              :             /* note: this case is only reachable in frontend not backend */
    2535            0 :             return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
    2536            0 :         case JSON_UNICODE_UNTRANSLATABLE:
    2537              : 
    2538              :             /*
    2539              :              * Note: this case is only reachable in backend and not frontend.
    2540              :              * #ifdef it away so the frontend doesn't try to link against
    2541              :              * backend functionality.
    2542              :              */
    2543              : #ifndef FRONTEND
    2544            0 :             return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."),
    2545              :                             GetDatabaseEncodingName());
    2546              : #else
    2547              :             Assert(false);
    2548            0 :             break;
    2549              : #endif
    2550            8 :         case JSON_UNICODE_HIGH_SURROGATE:
    2551            8 :             return _("Unicode high surrogate must not follow a high surrogate.");
    2552           24 :         case JSON_UNICODE_LOW_SURROGATE:
    2553           24 :             return _("Unicode low surrogate must follow a high surrogate.");
    2554            0 :         case JSON_SEM_ACTION_FAILED:
    2555              :             /* fall through to the error code after switch */
    2556            0 :             break;
    2557              :     }
    2558              : #undef json_token_error
    2559              : 
    2560              :     /* Note that lex->errormsg can be NULL in shlib code. */
    2561          823 :     if (lex->errormsg && lex->errormsg->len == 0)
    2562              :     {
    2563              :         /*
    2564              :          * We don't use a default: case, so that the compiler will warn about
    2565              :          * unhandled enum values.  But this needs to be here anyway to cover
    2566              :          * the possibility of an incorrect input.
    2567              :          */
    2568            0 :         jsonapi_appendStringInfo(lex->errormsg,
    2569              :                                  "unexpected json parse error type: %d",
    2570              :                                  (int) error);
    2571              :     }
    2572              : 
    2573              : #ifdef JSONAPI_USE_PQEXPBUFFER
    2574          316 :     if (PQExpBufferBroken(lex->errormsg))
    2575            0 :         return _("out of memory while constructing error description");
    2576              : #endif
    2577              : 
    2578          823 :     return lex->errormsg->data;
    2579              : }
        

Generated by: LCOV version 2.0-1