Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * jsonapi.c
4 : * JSON parser and lexer interfaces
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/common/jsonapi.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #ifndef FRONTEND
15 : #include "postgres.h"
16 : #else
17 : #include "postgres_fe.h"
18 : #endif
19 :
20 : #include "common/jsonapi.h"
21 : #include "mb/pg_wchar.h"
22 : #include "port/pg_lfind.h"
23 :
24 : #ifdef JSONAPI_USE_PQEXPBUFFER
25 : #include "pqexpbuffer.h"
26 : #else
27 : #include "lib/stringinfo.h"
28 : #include "miscadmin.h"
29 : #endif
30 :
31 : /*
32 : * By default, we will use palloc/pfree along with StringInfo. In libpq,
33 : * use malloc and PQExpBuffer, and return JSON_OUT_OF_MEMORY on out-of-memory.
34 : */
35 : #ifdef JSONAPI_USE_PQEXPBUFFER
36 :
37 : #define STRDUP(s) strdup(s)
38 : #define ALLOC(size) malloc(size)
39 : #define ALLOC0(size) calloc(1, size)
40 : #define REALLOC realloc
41 : #define FREE(s) free(s)
42 :
43 : #define jsonapi_appendStringInfo appendPQExpBuffer
44 : #define jsonapi_appendBinaryStringInfo appendBinaryPQExpBuffer
45 : #define jsonapi_appendStringInfoChar appendPQExpBufferChar
46 : /* XXX should we add a macro version to PQExpBuffer? */
47 : #define jsonapi_appendStringInfoCharMacro appendPQExpBufferChar
48 : #define jsonapi_makeStringInfo createPQExpBuffer
49 : #define jsonapi_initStringInfo initPQExpBuffer
50 : #define jsonapi_resetStringInfo resetPQExpBuffer
51 : #define jsonapi_termStringInfo termPQExpBuffer
52 : #define jsonapi_destroyStringInfo destroyPQExpBuffer
53 :
54 : #else /* !JSONAPI_USE_PQEXPBUFFER */
55 :
56 : #define STRDUP(s) pstrdup(s)
57 : #define ALLOC(size) palloc(size)
58 : #define ALLOC0(size) palloc0(size)
59 : #define REALLOC repalloc
60 :
61 : #ifdef FRONTEND
62 : #define FREE pfree
63 : #else
64 : /*
65 : * Backend pfree() doesn't handle NULL pointers like the frontend's does; smooth
66 : * that over to reduce mental gymnastics. Avoid multiple evaluation of the macro
67 : * argument to avoid future hair-pulling.
68 : */
69 : #define FREE(s) do { \
70 : void *__v = (s); \
71 : if (__v) \
72 : pfree(__v); \
73 : } while (0)
74 : #endif
75 :
76 : #define jsonapi_appendStringInfo appendStringInfo
77 : #define jsonapi_appendBinaryStringInfo appendBinaryStringInfo
78 : #define jsonapi_appendStringInfoChar appendStringInfoChar
79 : #define jsonapi_appendStringInfoCharMacro appendStringInfoCharMacro
80 : #define jsonapi_makeStringInfo makeStringInfo
81 : #define jsonapi_initStringInfo initStringInfo
82 : #define jsonapi_resetStringInfo resetStringInfo
83 : #define jsonapi_termStringInfo(s) pfree((s)->data)
84 : #define jsonapi_destroyStringInfo destroyStringInfo
85 :
86 : #endif /* JSONAPI_USE_PQEXPBUFFER */
87 :
88 : /*
89 : * The context of the parser is maintained by the recursive descent
90 : * mechanism, but is passed explicitly to the error reporting routine
91 : * for better diagnostics.
92 : */
93 : typedef enum /* contexts of JSON parser */
94 : {
95 : JSON_PARSE_VALUE, /* expecting a value */
96 : JSON_PARSE_STRING, /* expecting a string (for a field name) */
97 : JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */
98 : JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */
99 : JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */
100 : JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */
101 : JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */
102 : JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */
103 : JSON_PARSE_END, /* saw the end of a document, expect nothing */
104 : } JsonParseContext;
105 :
106 : /*
107 : * Setup for table-driven parser.
108 : * These enums need to be separate from the JsonTokenType and from each other
109 : * so we can have all of them on the prediction stack, which consists of
110 : * tokens, non-terminals, and semantic action markers.
111 : */
112 :
113 : enum JsonNonTerminal
114 : {
115 : JSON_NT_JSON = 32,
116 : JSON_NT_ARRAY_ELEMENTS,
117 : JSON_NT_MORE_ARRAY_ELEMENTS,
118 : JSON_NT_KEY_PAIRS,
119 : JSON_NT_MORE_KEY_PAIRS,
120 : };
121 :
122 : enum JsonParserSem
123 : {
124 : JSON_SEM_OSTART = 64,
125 : JSON_SEM_OEND,
126 : JSON_SEM_ASTART,
127 : JSON_SEM_AEND,
128 : JSON_SEM_OFIELD_INIT,
129 : JSON_SEM_OFIELD_START,
130 : JSON_SEM_OFIELD_END,
131 : JSON_SEM_AELEM_START,
132 : JSON_SEM_AELEM_END,
133 : JSON_SEM_SCALAR_INIT,
134 : JSON_SEM_SCALAR_CALL,
135 : };
136 :
137 : /*
138 : * struct containing the 3 stacks used in non-recursive parsing,
139 : * and the token and value for scalars that need to be preserved
140 : * across calls.
141 : *
142 : * typedef appears in jsonapi.h
143 : */
144 : struct JsonParserStack
145 : {
146 : int stack_size;
147 : char *prediction;
148 : size_t pred_index;
149 : /* these two are indexed by lex_level */
150 : char **fnames;
151 : bool *fnull;
152 : JsonTokenType scalar_tok;
153 : char *scalar_val;
154 : };
155 :
156 : /*
157 : * struct containing state used when there is a possible partial token at the
158 : * end of a json chunk when we are doing incremental parsing.
159 : *
160 : * typedef appears in jsonapi.h
161 : */
162 : struct JsonIncrementalState
163 : {
164 : bool is_last_chunk;
165 : bool partial_completed;
166 : jsonapi_StrValType partial_token;
167 : };
168 :
169 : /*
170 : * constants and macros used in the nonrecursive parser
171 : */
172 : #define JSON_NUM_TERMINALS 13
173 : #define JSON_NUM_NONTERMINALS 5
174 : #define JSON_NT_OFFSET JSON_NT_JSON
175 : /* for indexing the table */
176 : #define OFS(NT) (NT) - JSON_NT_OFFSET
177 : /* classify items we get off the stack */
178 : #define IS_SEM(x) ((x) & 0x40)
179 : #define IS_NT(x) ((x) & 0x20)
180 :
181 : /*
182 : * These productions are stored in reverse order right to left so that when
183 : * they are pushed on the stack what we expect next is at the top of the stack.
184 : */
185 : static char JSON_PROD_EPSILON[] = {0}; /* epsilon - an empty production */
186 :
187 : /* JSON -> string */
188 : static char JSON_PROD_SCALAR_STRING[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_STRING, JSON_SEM_SCALAR_INIT, 0};
189 :
190 : /* JSON -> number */
191 : static char JSON_PROD_SCALAR_NUMBER[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NUMBER, JSON_SEM_SCALAR_INIT, 0};
192 :
193 : /* JSON -> 'true' */
194 : static char JSON_PROD_SCALAR_TRUE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_TRUE, JSON_SEM_SCALAR_INIT, 0};
195 :
196 : /* JSON -> 'false' */
197 : static char JSON_PROD_SCALAR_FALSE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_FALSE, JSON_SEM_SCALAR_INIT, 0};
198 :
199 : /* JSON -> 'null' */
200 : static char JSON_PROD_SCALAR_NULL[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NULL, JSON_SEM_SCALAR_INIT, 0};
201 :
202 : /* JSON -> '{' KEY_PAIRS '}' */
203 : static char JSON_PROD_OBJECT[] = {JSON_SEM_OEND, JSON_TOKEN_OBJECT_END, JSON_NT_KEY_PAIRS, JSON_TOKEN_OBJECT_START, JSON_SEM_OSTART, 0};
204 :
205 : /* JSON -> '[' ARRAY_ELEMENTS ']' */
206 : static char JSON_PROD_ARRAY[] = {JSON_SEM_AEND, JSON_TOKEN_ARRAY_END, JSON_NT_ARRAY_ELEMENTS, JSON_TOKEN_ARRAY_START, JSON_SEM_ASTART, 0};
207 :
208 : /* ARRAY_ELEMENTS -> JSON MORE_ARRAY_ELEMENTS */
209 : static char JSON_PROD_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, 0};
210 :
211 : /* MORE_ARRAY_ELEMENTS -> ',' JSON MORE_ARRAY_ELEMENTS */
212 : static char JSON_PROD_MORE_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, JSON_TOKEN_COMMA, 0};
213 :
214 : /* KEY_PAIRS -> string ':' JSON MORE_KEY_PAIRS */
215 : static char JSON_PROD_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, 0};
216 :
217 : /* MORE_KEY_PAIRS -> ',' string ':' JSON MORE_KEY_PAIRS */
218 : static char JSON_PROD_MORE_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, JSON_TOKEN_COMMA, 0};
219 :
220 : /*
221 : * Note: there are also epsilon productions for ARRAY_ELEMENTS,
222 : * MORE_ARRAY_ELEMENTS, KEY_PAIRS and MORE_KEY_PAIRS
223 : * They are all the same as none require any semantic actions.
224 : */
225 :
226 : /*
227 : * Table connecting the productions with their director sets of
228 : * terminal symbols.
229 : * Any combination not specified here represents an error.
230 : */
231 :
232 : typedef struct
233 : {
234 : size_t len;
235 : char *prod;
236 : } td_entry;
237 :
238 : #define TD_ENTRY(PROD) { sizeof(PROD) - 1, (PROD) }
239 :
240 : static td_entry td_parser_table[JSON_NUM_NONTERMINALS][JSON_NUM_TERMINALS] =
241 : {
242 : /* JSON */
243 : [OFS(JSON_NT_JSON)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_SCALAR_STRING),
244 : [OFS(JSON_NT_JSON)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_SCALAR_NUMBER),
245 : [OFS(JSON_NT_JSON)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_SCALAR_TRUE),
246 : [OFS(JSON_NT_JSON)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_SCALAR_FALSE),
247 : [OFS(JSON_NT_JSON)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_SCALAR_NULL),
248 : [OFS(JSON_NT_JSON)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY),
249 : [OFS(JSON_NT_JSON)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_OBJECT),
250 : /* ARRAY_ELEMENTS */
251 : [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
252 : [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
253 : [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
254 : [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
255 : [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
256 : [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
257 : [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
258 : [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
259 : /* MORE_ARRAY_ELEMENTS */
260 : [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_ARRAY_ELEMENTS),
261 : [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
262 : /* KEY_PAIRS */
263 : [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_KEY_PAIRS),
264 : [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
265 : /* MORE_KEY_PAIRS */
266 : [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_KEY_PAIRS),
267 : [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
268 : };
269 :
270 : /* the GOAL production. Not stored in the table, but will be the initial contents of the prediction stack */
271 : static char JSON_PROD_GOAL[] = {JSON_TOKEN_END, JSON_NT_JSON, 0};
272 :
273 : static inline JsonParseErrorType json_lex_string(JsonLexContext *lex);
274 : static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, const char *s,
275 : bool *num_err, size_t *total_len);
276 : static inline JsonParseErrorType parse_scalar(JsonLexContext *lex, const JsonSemAction *sem);
277 : static JsonParseErrorType parse_object_field(JsonLexContext *lex, const JsonSemAction *sem);
278 : static JsonParseErrorType parse_object(JsonLexContext *lex, const JsonSemAction *sem);
279 : static JsonParseErrorType parse_array_element(JsonLexContext *lex, const JsonSemAction *sem);
280 : static JsonParseErrorType parse_array(JsonLexContext *lex, const JsonSemAction *sem);
281 : static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
282 : static bool allocate_incremental_state(JsonLexContext *lex);
283 :
284 : /* the null action object used for pure validation */
285 : const JsonSemAction nullSemAction =
286 : {
287 : NULL, NULL, NULL, NULL, NULL,
288 : NULL, NULL, NULL, NULL, NULL
289 : };
290 :
291 : /* sentinels used for out-of-memory conditions */
292 : static JsonLexContext failed_oom;
293 : static JsonIncrementalState failed_inc_oom;
294 :
295 : /* Parser support routines */
296 :
297 : /*
298 : * lex_peek
299 : *
300 : * what is the current look_ahead token?
301 : */
302 : static inline JsonTokenType
303 10122036 : lex_peek(JsonLexContext *lex)
304 : {
305 10122036 : return lex->token_type;
306 : }
307 :
308 : /*
309 : * lex_expect
310 : *
311 : * move the lexer to the next token if the current look_ahead token matches
312 : * the parameter token. Otherwise, report an error.
313 : */
314 : static inline JsonParseErrorType
315 375832 : lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
316 : {
317 375832 : if (lex_peek(lex) == token)
318 375724 : return json_lex(lex);
319 : else
320 108 : return report_parse_error(ctx, lex);
321 : }
322 :
323 : /* chars to consider as part of an alphanumeric token */
324 : #define JSON_ALPHANUMERIC_CHAR(c) \
325 : (((c) >= 'a' && (c) <= 'z') || \
326 : ((c) >= 'A' && (c) <= 'Z') || \
327 : ((c) >= '0' && (c) <= '9') || \
328 : (c) == '_' || \
329 : IS_HIGHBIT_SET(c))
330 :
331 : /*
332 : * Utility function to check if a string is a valid JSON number.
333 : *
334 : * str is of length len, and need not be null-terminated.
335 : */
336 : bool
337 46 : IsValidJsonNumber(const char *str, size_t len)
338 : {
339 : bool numeric_error;
340 : size_t total_len;
341 46 : JsonLexContext dummy_lex = {0};
342 :
343 46 : if (len <= 0)
344 0 : return false;
345 :
346 : /*
347 : * json_lex_number expects a leading '-' to have been eaten already.
348 : *
349 : * having to cast away the constness of str is ugly, but there's not much
350 : * easy alternative.
351 : */
352 46 : if (*str == '-')
353 : {
354 4 : dummy_lex.input = str + 1;
355 4 : dummy_lex.input_length = len - 1;
356 : }
357 : else
358 : {
359 42 : dummy_lex.input = str;
360 42 : dummy_lex.input_length = len;
361 : }
362 :
363 46 : dummy_lex.token_start = dummy_lex.input;
364 :
365 46 : json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
366 :
367 46 : return (!numeric_error) && (total_len == dummy_lex.input_length);
368 : }
369 :
370 : /*
371 : * makeJsonLexContextCstringLen
372 : * Initialize the given JsonLexContext object, or create one
373 : *
374 : * If a valid 'lex' pointer is given, it is initialized. This can
375 : * be used for stack-allocated structs, saving overhead. If NULL is
376 : * given, a new struct is allocated.
377 : *
378 : * If need_escapes is true, ->strval stores the unescaped lexemes.
379 : * Unescaping is expensive, so only request it when necessary.
380 : *
381 : * If need_escapes is true or lex was given as NULL, then caller is
382 : * responsible for freeing the returned struct, either by calling
383 : * freeJsonLexContext() or (in backend environment) via memory context
384 : * cleanup.
385 : *
386 : * In shlib code, any out-of-memory failures will be deferred to time
387 : * of use; this function is guaranteed to return a valid JsonLexContext.
388 : */
389 : JsonLexContext *
390 37996 : makeJsonLexContextCstringLen(JsonLexContext *lex, const char *json,
391 : size_t len, int encoding, bool need_escapes)
392 : {
393 37996 : if (lex == NULL)
394 : {
395 5528 : lex = ALLOC0(sizeof(JsonLexContext));
396 5528 : if (!lex)
397 0 : return &failed_oom;
398 5528 : lex->flags |= JSONLEX_FREE_STRUCT;
399 : }
400 : else
401 32468 : memset(lex, 0, sizeof(JsonLexContext));
402 :
403 37996 : lex->errormsg = NULL;
404 37996 : lex->input = lex->token_terminator = lex->line_start = json;
405 37996 : lex->line_number = 1;
406 37996 : lex->input_length = len;
407 37996 : lex->input_encoding = encoding;
408 37996 : lex->need_escapes = need_escapes;
409 37996 : if (need_escapes)
410 : {
411 : /*
412 : * This call can fail in shlib code. We defer error handling to time
413 : * of use (json_lex_string()) since we might not need to parse any
414 : * strings anyway.
415 : */
416 29458 : lex->strval = jsonapi_makeStringInfo();
417 29458 : lex->flags |= JSONLEX_FREE_STRVAL;
418 : }
419 :
420 37996 : return lex;
421 : }
422 :
423 : /*
424 : * Allocates the internal bookkeeping structures for incremental parsing. This
425 : * can only fail in-band with shlib code.
426 : */
427 : #define JS_STACK_CHUNK_SIZE 64
428 : #define JS_MAX_PROD_LEN 10 /* more than we need */
429 : #define JSON_TD_MAX_STACK 6400 /* hard coded for now - this is a REALLY high
430 : * number */
431 : static bool
432 2188 : allocate_incremental_state(JsonLexContext *lex)
433 : {
434 : void *pstack,
435 : *prediction,
436 : *fnames,
437 : *fnull;
438 :
439 2188 : lex->inc_state = ALLOC0(sizeof(JsonIncrementalState));
440 2188 : pstack = ALLOC(sizeof(JsonParserStack));
441 2188 : prediction = ALLOC(JS_STACK_CHUNK_SIZE * JS_MAX_PROD_LEN);
442 2188 : fnames = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(char *));
443 2188 : fnull = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(bool));
444 :
445 : #ifdef JSONAPI_USE_PQEXPBUFFER
446 980 : if (!lex->inc_state
447 980 : || !pstack
448 980 : || !prediction
449 980 : || !fnames
450 980 : || !fnull)
451 : {
452 0 : FREE(lex->inc_state);
453 0 : FREE(pstack);
454 0 : FREE(prediction);
455 0 : FREE(fnames);
456 0 : FREE(fnull);
457 :
458 0 : lex->inc_state = &failed_inc_oom;
459 0 : return false;
460 : }
461 : #endif
462 :
463 2188 : jsonapi_initStringInfo(&(lex->inc_state->partial_token));
464 2188 : lex->pstack = pstack;
465 2188 : lex->pstack->stack_size = JS_STACK_CHUNK_SIZE;
466 2188 : lex->pstack->prediction = prediction;
467 2188 : lex->pstack->pred_index = 0;
468 2188 : lex->pstack->fnames = fnames;
469 2188 : lex->pstack->fnull = fnull;
470 :
471 2188 : lex->incremental = true;
472 2188 : return true;
473 : }
474 :
475 :
476 : /*
477 : * makeJsonLexContextIncremental
478 : *
479 : * Similar to above but set up for use in incremental parsing. That means we
480 : * need explicit stacks for predictions, field names and null indicators, but
481 : * we don't need the input, that will be handed in bit by bit to the
482 : * parse routine. We also need an accumulator for partial tokens in case
483 : * the boundary between chunks happens to fall in the middle of a token.
484 : *
485 : * In shlib code, any out-of-memory failures will be deferred to time of use;
486 : * this function is guaranteed to return a valid JsonLexContext.
487 : */
488 : JsonLexContext *
489 2188 : makeJsonLexContextIncremental(JsonLexContext *lex, int encoding,
490 : bool need_escapes)
491 : {
492 2188 : if (lex == NULL)
493 : {
494 2 : lex = ALLOC0(sizeof(JsonLexContext));
495 2 : if (!lex)
496 0 : return &failed_oom;
497 :
498 2 : lex->flags |= JSONLEX_FREE_STRUCT;
499 : }
500 : else
501 2186 : memset(lex, 0, sizeof(JsonLexContext));
502 :
503 2188 : lex->line_number = 1;
504 2188 : lex->input_encoding = encoding;
505 :
506 2188 : if (!allocate_incremental_state(lex))
507 : {
508 0 : if (lex->flags & JSONLEX_FREE_STRUCT)
509 : {
510 0 : FREE(lex);
511 0 : return &failed_oom;
512 : }
513 :
514 : /* lex->inc_state tracks the OOM failure; we can return here. */
515 0 : return lex;
516 : }
517 :
518 2188 : lex->need_escapes = need_escapes;
519 2188 : if (need_escapes)
520 : {
521 : /*
522 : * This call can fail in shlib code. We defer error handling to time
523 : * of use (json_lex_string()) since we might not need to parse any
524 : * strings anyway.
525 : */
526 230 : lex->strval = jsonapi_makeStringInfo();
527 230 : lex->flags |= JSONLEX_FREE_STRVAL;
528 : }
529 :
530 2188 : return lex;
531 : }
532 :
533 : static inline bool
534 2693864 : inc_lex_level(JsonLexContext *lex)
535 : {
536 2693864 : if (lex->incremental && (lex->lex_level + 1) >= lex->pstack->stack_size)
537 : {
538 : size_t new_stack_size;
539 : char *new_prediction;
540 : char **new_fnames;
541 : bool *new_fnull;
542 :
543 38400 : new_stack_size = lex->pstack->stack_size + JS_STACK_CHUNK_SIZE;
544 :
545 38400 : new_prediction = REALLOC(lex->pstack->prediction,
546 : new_stack_size * JS_MAX_PROD_LEN);
547 : #ifdef JSONAPI_USE_PQEXPBUFFER
548 19200 : if (!new_prediction)
549 0 : return false;
550 : #endif
551 38400 : lex->pstack->prediction = new_prediction;
552 :
553 38400 : new_fnames = REALLOC(lex->pstack->fnames,
554 : new_stack_size * sizeof(char *));
555 : #ifdef JSONAPI_USE_PQEXPBUFFER
556 19200 : if (!new_fnames)
557 0 : return false;
558 : #endif
559 38400 : lex->pstack->fnames = new_fnames;
560 :
561 38400 : new_fnull = REALLOC(lex->pstack->fnull, new_stack_size * sizeof(bool));
562 : #ifdef JSONAPI_USE_PQEXPBUFFER
563 19200 : if (!new_fnull)
564 0 : return false;
565 : #endif
566 38400 : lex->pstack->fnull = new_fnull;
567 :
568 38400 : lex->pstack->stack_size = new_stack_size;
569 : }
570 :
571 2693864 : lex->lex_level += 1;
572 2693864 : return true;
573 : }
574 :
575 : static inline void
576 1054962 : dec_lex_level(JsonLexContext *lex)
577 : {
578 1054962 : lex->lex_level -= 1;
579 1054962 : }
580 :
581 : static inline void
582 8731788 : push_prediction(JsonParserStack *pstack, td_entry entry)
583 : {
584 8731788 : memcpy(pstack->prediction + pstack->pred_index, entry.prod, entry.len);
585 8731788 : pstack->pred_index += entry.len;
586 8731788 : }
587 :
588 : static inline char
589 30289020 : pop_prediction(JsonParserStack *pstack)
590 : {
591 : Assert(pstack->pred_index > 0);
592 30289020 : return pstack->prediction[--pstack->pred_index];
593 : }
594 :
595 : static inline char
596 76 : next_prediction(JsonParserStack *pstack)
597 : {
598 : Assert(pstack->pred_index > 0);
599 76 : return pstack->prediction[pstack->pred_index - 1];
600 : }
601 :
602 : static inline bool
603 30591632 : have_prediction(JsonParserStack *pstack)
604 : {
605 30591632 : return pstack->pred_index > 0;
606 : }
607 :
608 : static inline void
609 1149522 : set_fname(JsonLexContext *lex, char *fname)
610 : {
611 1149522 : lex->pstack->fnames[lex->lex_level] = fname;
612 1149522 : }
613 :
614 : static inline char *
615 1109766 : get_fname(JsonLexContext *lex)
616 : {
617 1109766 : return lex->pstack->fnames[lex->lex_level];
618 : }
619 :
620 : static inline void
621 3836364 : set_fnull(JsonLexContext *lex, bool fnull)
622 : {
623 3836364 : lex->pstack->fnull[lex->lex_level] = fnull;
624 3836364 : }
625 :
626 : static inline bool
627 744 : get_fnull(JsonLexContext *lex)
628 : {
629 744 : return lex->pstack->fnull[lex->lex_level];
630 : }
631 :
632 : /*
633 : * Free memory in a JsonLexContext.
634 : *
635 : * There's no need for this if a *lex pointer was given when the object was
636 : * made, need_escapes was false, and json_errdetail() was not called; or if (in
637 : * backend environment) a memory context delete/reset is imminent.
638 : */
639 : void
640 6500 : freeJsonLexContext(JsonLexContext *lex)
641 : {
642 : static const JsonLexContext empty = {0};
643 :
644 6500 : if (!lex || lex == &failed_oom)
645 0 : return;
646 :
647 6500 : if (lex->flags & JSONLEX_FREE_STRVAL)
648 6112 : jsonapi_destroyStringInfo(lex->strval);
649 :
650 6500 : if (lex->errormsg)
651 0 : jsonapi_destroyStringInfo(lex->errormsg);
652 :
653 6500 : if (lex->incremental)
654 : {
655 222 : jsonapi_termStringInfo(&lex->inc_state->partial_token);
656 222 : FREE(lex->inc_state);
657 222 : FREE(lex->pstack->prediction);
658 222 : FREE(lex->pstack->fnames);
659 222 : FREE(lex->pstack->fnull);
660 222 : FREE(lex->pstack);
661 : }
662 :
663 6500 : if (lex->flags & JSONLEX_FREE_STRUCT)
664 5308 : FREE(lex);
665 : else
666 1192 : *lex = empty;
667 : }
668 :
669 : /*
670 : * pg_parse_json
671 : *
672 : * Publicly visible entry point for the JSON parser.
673 : *
674 : * lex is a lexing context, set up for the json to be processed by calling
675 : * makeJsonLexContext(). sem is a structure of function pointers to semantic
676 : * action routines to be called at appropriate spots during parsing, and a
677 : * pointer to a state object to be passed to those routines.
678 : *
679 : * If FORCE_JSON_PSTACK is defined then the routine will call the non-recursive
680 : * JSON parser. This is a useful way to validate that it's doing the right
681 : * thing at least for non-incremental cases. If this is on we expect to see
682 : * regression diffs relating to error messages about stack depth, but no
683 : * other differences.
684 : */
685 : JsonParseErrorType
686 37258 : pg_parse_json(JsonLexContext *lex, const JsonSemAction *sem)
687 : {
688 : #ifdef FORCE_JSON_PSTACK
689 : /*
690 : * We don't need partial token processing, there is only one chunk. But we
691 : * still need to init the partial token string so that freeJsonLexContext
692 : * works, so perform the full incremental initialization.
693 : */
694 : if (!allocate_incremental_state(lex))
695 : return JSON_OUT_OF_MEMORY;
696 :
697 : return pg_parse_json_incremental(lex, sem, lex->input, lex->input_length, true);
698 :
699 : #else
700 :
701 : JsonTokenType tok;
702 : JsonParseErrorType result;
703 :
704 37258 : if (lex == &failed_oom)
705 0 : return JSON_OUT_OF_MEMORY;
706 37258 : if (lex->incremental)
707 0 : return JSON_INVALID_LEXER_TYPE;
708 :
709 : /* get the initial token */
710 37258 : result = json_lex(lex);
711 37258 : if (result != JSON_SUCCESS)
712 246 : return result;
713 :
714 37012 : tok = lex_peek(lex);
715 :
716 : /* parse by recursive descent */
717 37012 : switch (tok)
718 : {
719 20344 : case JSON_TOKEN_OBJECT_START:
720 20344 : result = parse_object(lex, sem);
721 20276 : break;
722 6844 : case JSON_TOKEN_ARRAY_START:
723 6844 : result = parse_array(lex, sem);
724 6740 : break;
725 9824 : default:
726 9824 : result = parse_scalar(lex, sem); /* json can be a bare scalar */
727 : }
728 :
729 36768 : if (result == JSON_SUCCESS)
730 36326 : result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
731 :
732 36768 : return result;
733 : #endif
734 : }
735 :
736 : /*
737 : * json_count_array_elements
738 : *
739 : * Returns number of array elements in lex context at start of array token
740 : * until end of array token at same nesting level.
741 : *
742 : * Designed to be called from array_start routines.
743 : */
744 : JsonParseErrorType
745 6 : json_count_array_elements(JsonLexContext *lex, int *elements)
746 : {
747 : JsonLexContext copylex;
748 : int count;
749 : JsonParseErrorType result;
750 :
751 6 : if (lex == &failed_oom)
752 0 : return JSON_OUT_OF_MEMORY;
753 :
754 : /*
755 : * It's safe to do this with a shallow copy because the lexical routines
756 : * don't scribble on the input. They do scribble on the other pointers
757 : * etc, so doing this with a copy makes that safe.
758 : */
759 6 : memcpy(©lex, lex, sizeof(JsonLexContext));
760 6 : copylex.need_escapes = false; /* not interested in values here */
761 6 : copylex.lex_level++;
762 :
763 6 : count = 0;
764 6 : result = lex_expect(JSON_PARSE_ARRAY_START, ©lex,
765 : JSON_TOKEN_ARRAY_START);
766 6 : if (result != JSON_SUCCESS)
767 0 : return result;
768 6 : if (lex_peek(©lex) != JSON_TOKEN_ARRAY_END)
769 : {
770 : while (1)
771 : {
772 48 : count++;
773 48 : result = parse_array_element(©lex, &nullSemAction);
774 48 : if (result != JSON_SUCCESS)
775 0 : return result;
776 48 : if (copylex.token_type != JSON_TOKEN_COMMA)
777 6 : break;
778 42 : result = json_lex(©lex);
779 42 : if (result != JSON_SUCCESS)
780 0 : return result;
781 : }
782 : }
783 6 : result = lex_expect(JSON_PARSE_ARRAY_NEXT, ©lex,
784 : JSON_TOKEN_ARRAY_END);
785 6 : if (result != JSON_SUCCESS)
786 0 : return result;
787 :
788 6 : *elements = count;
789 6 : return JSON_SUCCESS;
790 : }
791 :
792 : /*
793 : * pg_parse_json_incremental
794 : *
795 : * Routine for incremental parsing of json. This uses the non-recursive top
796 : * down method of the Dragon Book Algorithm 4.3. It's somewhat slower than
797 : * the Recursive Descent pattern used above, so we only use it for incremental
798 : * parsing of JSON.
799 : *
800 : * The lexing context needs to be set up by a call to
801 : * makeJsonLexContextIncremental(). sem is a structure of function pointers
802 : * to semantic action routines, which should function exactly as those used
803 : * in the recursive descent parser.
804 : *
805 : * This routine can be called repeatedly with chunks of JSON. On the final
806 : * chunk is_last must be set to true. len is the length of the json chunk,
807 : * which does not need to be null terminated.
808 : */
809 : JsonParseErrorType
810 373160 : pg_parse_json_incremental(JsonLexContext *lex,
811 : const JsonSemAction *sem,
812 : const char *json,
813 : size_t len,
814 : bool is_last)
815 : {
816 : JsonTokenType tok;
817 : JsonParseErrorType result;
818 373160 : JsonParseContext ctx = JSON_PARSE_VALUE;
819 373160 : JsonParserStack *pstack = lex->pstack;
820 :
821 373160 : if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
822 0 : return JSON_OUT_OF_MEMORY;
823 373160 : if (!lex->incremental)
824 0 : return JSON_INVALID_LEXER_TYPE;
825 :
826 373160 : lex->input = lex->token_terminator = lex->line_start = json;
827 373160 : lex->input_length = len;
828 373160 : lex->inc_state->is_last_chunk = is_last;
829 :
830 : /* get the initial token */
831 373160 : result = json_lex(lex);
832 373160 : if (result != JSON_SUCCESS)
833 71780 : return result;
834 :
835 301380 : tok = lex_peek(lex);
836 :
837 : /* use prediction stack for incremental parsing */
838 :
839 301380 : if (!have_prediction(pstack))
840 : {
841 2004 : td_entry goal = TD_ENTRY(JSON_PROD_GOAL);
842 :
843 2004 : push_prediction(pstack, goal);
844 : }
845 :
846 30290252 : while (have_prediction(pstack))
847 : {
848 30289020 : char top = pop_prediction(pstack);
849 : td_entry entry;
850 :
851 : /*
852 : * these first two branches are the guts of the Table Driven method
853 : */
854 30289020 : if (top == tok)
855 : {
856 : /*
857 : * tok can only be a terminal symbol, so top must be too. the
858 : * token matches the top of the stack, so get the next token.
859 : */
860 8337480 : if (tok < JSON_TOKEN_END)
861 : {
862 8336248 : result = json_lex(lex);
863 8336248 : if (result != JSON_SUCCESS)
864 300146 : return result;
865 8036800 : tok = lex_peek(lex);
866 : }
867 : }
868 21951540 : else if (IS_NT(top) && (entry = td_parser_table[OFS(top)][tok]).prod != NULL)
869 : {
870 : /*
871 : * the token is in the director set for a production of the
872 : * non-terminal at the top of the stack, so push the reversed RHS
873 : * of the production onto the stack.
874 : */
875 8729784 : push_prediction(pstack, entry);
876 : }
877 13221756 : else if (IS_SEM(top))
878 : {
879 : /*
880 : * top is a semantic action marker, so take action accordingly.
881 : * It's important to have these markers in the prediction stack
882 : * before any token they might need so we don't advance the token
883 : * prematurely. Note in a couple of cases we need to do something
884 : * both before and after the token.
885 : */
886 13221314 : switch (top)
887 : {
888 232632 : case JSON_SEM_OSTART:
889 : {
890 232632 : json_struct_action ostart = sem->object_start;
891 :
892 232632 : if (lex->lex_level >= JSON_TD_MAX_STACK)
893 0 : return JSON_NESTING_TOO_DEEP;
894 :
895 232632 : if (ostart != NULL)
896 : {
897 221956 : result = (*ostart) (sem->semstate);
898 221956 : if (result != JSON_SUCCESS)
899 0 : return result;
900 : }
901 :
902 232632 : if (!inc_lex_level(lex))
903 0 : return JSON_OUT_OF_MEMORY;
904 : }
905 232632 : break;
906 232266 : case JSON_SEM_OEND:
907 : {
908 232266 : json_struct_action oend = sem->object_end;
909 :
910 232266 : dec_lex_level(lex);
911 232266 : if (oend != NULL)
912 : {
913 221954 : result = (*oend) (sem->semstate);
914 221954 : if (result != JSON_SUCCESS)
915 0 : return result;
916 : }
917 : }
918 232266 : break;
919 2461488 : case JSON_SEM_ASTART:
920 : {
921 2461488 : json_struct_action astart = sem->array_start;
922 :
923 2461488 : if (lex->lex_level >= JSON_TD_MAX_STACK)
924 256 : return JSON_NESTING_TOO_DEEP;
925 :
926 2461232 : if (astart != NULL)
927 : {
928 492 : result = (*astart) (sem->semstate);
929 492 : if (result != JSON_SUCCESS)
930 0 : return result;
931 : }
932 :
933 2461232 : if (!inc_lex_level(lex))
934 0 : return JSON_OUT_OF_MEMORY;
935 : }
936 2461232 : break;
937 822696 : case JSON_SEM_AEND:
938 : {
939 822696 : json_struct_action aend = sem->array_end;
940 :
941 822696 : dec_lex_level(lex);
942 822696 : if (aend != NULL)
943 : {
944 492 : result = (*aend) (sem->semstate);
945 492 : if (result != JSON_SUCCESS)
946 0 : return result;
947 : }
948 : }
949 822696 : break;
950 1149522 : case JSON_SEM_OFIELD_INIT:
951 : {
952 : /*
953 : * all we do here is save out the field name. We have
954 : * to wait to get past the ':' to see if the next
955 : * value is null so we can call the semantic routine
956 : */
957 1149522 : char *fname = NULL;
958 1149522 : json_ofield_action ostart = sem->object_field_start;
959 1149522 : json_ofield_action oend = sem->object_field_end;
960 :
961 1149522 : if ((ostart != NULL || oend != NULL) && lex->need_escapes)
962 : {
963 1109142 : fname = STRDUP(lex->strval->data);
964 1109142 : if (fname == NULL)
965 0 : return JSON_OUT_OF_MEMORY;
966 : }
967 1149522 : set_fname(lex, fname);
968 : }
969 1149522 : break;
970 1149394 : case JSON_SEM_OFIELD_START:
971 : {
972 : /*
973 : * the current token should be the first token of the
974 : * value
975 : */
976 1149394 : bool isnull = tok == JSON_TOKEN_NULL;
977 1149394 : json_ofield_action ostart = sem->object_field_start;
978 :
979 1149394 : set_fnull(lex, isnull);
980 :
981 1149394 : if (ostart != NULL)
982 : {
983 1109142 : char *fname = get_fname(lex);
984 :
985 1109142 : result = (*ostart) (sem->semstate, fname, isnull);
986 1109142 : if (result != JSON_SUCCESS)
987 0 : return result;
988 : }
989 : }
990 1149394 : break;
991 1149356 : case JSON_SEM_OFIELD_END:
992 : {
993 1149356 : json_ofield_action oend = sem->object_field_end;
994 :
995 1149356 : if (oend != NULL)
996 : {
997 624 : char *fname = get_fname(lex);
998 624 : bool isnull = get_fnull(lex);
999 :
1000 624 : result = (*oend) (sem->semstate, fname, isnull);
1001 624 : if (result != JSON_SUCCESS)
1002 0 : return result;
1003 : }
1004 : }
1005 1149356 : break;
1006 2686970 : case JSON_SEM_AELEM_START:
1007 : {
1008 2686970 : json_aelem_action astart = sem->array_element_start;
1009 2686970 : bool isnull = tok == JSON_TOKEN_NULL;
1010 :
1011 2686970 : set_fnull(lex, isnull);
1012 :
1013 2686970 : if (astart != NULL)
1014 : {
1015 120 : result = (*astart) (sem->semstate, isnull);
1016 120 : if (result != JSON_SUCCESS)
1017 0 : return result;
1018 : }
1019 : }
1020 2686970 : break;
1021 1048570 : case JSON_SEM_AELEM_END:
1022 : {
1023 1048570 : json_aelem_action aend = sem->array_element_end;
1024 :
1025 1048570 : if (aend != NULL)
1026 : {
1027 120 : bool isnull = get_fnull(lex);
1028 :
1029 120 : result = (*aend) (sem->semstate, isnull);
1030 120 : if (result != JSON_SUCCESS)
1031 0 : return result;
1032 : }
1033 : }
1034 1048570 : break;
1035 1144210 : case JSON_SEM_SCALAR_INIT:
1036 : {
1037 1144210 : json_scalar_action sfunc = sem->scalar;
1038 :
1039 1144210 : pstack->scalar_val = NULL;
1040 :
1041 1144210 : if (sfunc != NULL)
1042 : {
1043 : /*
1044 : * extract the de-escaped string value, or the raw
1045 : * lexeme
1046 : */
1047 : /*
1048 : * XXX copied from RD parser but looks like a
1049 : * buglet
1050 : */
1051 1108614 : if (tok == JSON_TOKEN_STRING)
1052 : {
1053 886472 : if (lex->need_escapes)
1054 : {
1055 886472 : pstack->scalar_val = STRDUP(lex->strval->data);
1056 886472 : if (pstack->scalar_val == NULL)
1057 0 : return JSON_OUT_OF_MEMORY;
1058 : }
1059 : }
1060 : else
1061 : {
1062 222142 : ptrdiff_t tlen = (lex->token_terminator - lex->token_start);
1063 :
1064 222142 : pstack->scalar_val = ALLOC(tlen + 1);
1065 222142 : if (pstack->scalar_val == NULL)
1066 0 : return JSON_OUT_OF_MEMORY;
1067 :
1068 222142 : memcpy(pstack->scalar_val, lex->token_start, tlen);
1069 222142 : pstack->scalar_val[tlen] = '\0';
1070 : }
1071 1108614 : pstack->scalar_tok = tok;
1072 : }
1073 : }
1074 1144210 : break;
1075 1144210 : case JSON_SEM_SCALAR_CALL:
1076 : {
1077 : /*
1078 : * We'd like to be able to get rid of this business of
1079 : * two bits of scalar action, but we can't. It breaks
1080 : * certain semantic actions which expect that when
1081 : * called the lexer has consumed the item. See for
1082 : * example get_scalar() in jsonfuncs.c.
1083 : */
1084 1144210 : json_scalar_action sfunc = sem->scalar;
1085 :
1086 1144210 : if (sfunc != NULL)
1087 : {
1088 1108614 : result = (*sfunc) (sem->semstate, pstack->scalar_val, pstack->scalar_tok);
1089 1108612 : if (result != JSON_SUCCESS)
1090 0 : return result;
1091 : }
1092 : }
1093 1144208 : break;
1094 0 : default:
1095 : /* should not happen */
1096 0 : break;
1097 : }
1098 : }
1099 : else
1100 : {
1101 : /*
1102 : * The token didn't match the stack top if it's a terminal nor a
1103 : * production for the stack top if it's a non-terminal.
1104 : *
1105 : * Various cases here are Asserted to be not possible, as the
1106 : * token would not appear at the top of the prediction stack
1107 : * unless the lookahead matched.
1108 : */
1109 442 : switch (top)
1110 : {
1111 76 : case JSON_TOKEN_STRING:
1112 76 : if (next_prediction(pstack) == JSON_TOKEN_COLON)
1113 76 : ctx = JSON_PARSE_STRING;
1114 : else
1115 : {
1116 : Assert(false);
1117 0 : ctx = JSON_PARSE_VALUE;
1118 : }
1119 76 : break;
1120 0 : case JSON_TOKEN_NUMBER:
1121 : case JSON_TOKEN_TRUE:
1122 : case JSON_TOKEN_FALSE:
1123 : case JSON_TOKEN_NULL:
1124 : case JSON_TOKEN_ARRAY_START:
1125 : case JSON_TOKEN_OBJECT_START:
1126 : Assert(false);
1127 0 : ctx = JSON_PARSE_VALUE;
1128 0 : break;
1129 0 : case JSON_TOKEN_ARRAY_END:
1130 : Assert(false);
1131 0 : ctx = JSON_PARSE_ARRAY_NEXT;
1132 0 : break;
1133 0 : case JSON_TOKEN_OBJECT_END:
1134 : Assert(false);
1135 0 : ctx = JSON_PARSE_OBJECT_NEXT;
1136 0 : break;
1137 0 : case JSON_TOKEN_COMMA:
1138 : Assert(false);
1139 0 : if (next_prediction(pstack) == JSON_TOKEN_STRING)
1140 0 : ctx = JSON_PARSE_OBJECT_NEXT;
1141 : else
1142 0 : ctx = JSON_PARSE_ARRAY_NEXT;
1143 0 : break;
1144 52 : case JSON_TOKEN_COLON:
1145 52 : ctx = JSON_PARSE_OBJECT_LABEL;
1146 52 : break;
1147 12 : case JSON_TOKEN_END:
1148 12 : ctx = JSON_PARSE_END;
1149 12 : break;
1150 36 : case JSON_NT_MORE_ARRAY_ELEMENTS:
1151 36 : ctx = JSON_PARSE_ARRAY_NEXT;
1152 36 : break;
1153 28 : case JSON_NT_ARRAY_ELEMENTS:
1154 28 : ctx = JSON_PARSE_ARRAY_START;
1155 28 : break;
1156 140 : case JSON_NT_MORE_KEY_PAIRS:
1157 140 : ctx = JSON_PARSE_OBJECT_NEXT;
1158 140 : break;
1159 60 : case JSON_NT_KEY_PAIRS:
1160 60 : ctx = JSON_PARSE_OBJECT_START;
1161 60 : break;
1162 38 : default:
1163 38 : ctx = JSON_PARSE_VALUE;
1164 : }
1165 442 : return report_parse_error(ctx, lex);
1166 : }
1167 : }
1168 :
1169 1232 : return JSON_SUCCESS;
1170 : }
1171 :
1172 : /*
1173 : * Recursive Descent parse routines. There is one for each structural
1174 : * element in a json document:
1175 : * - scalar (string, number, true, false, null)
1176 : * - array ( [ ] )
1177 : * - array element
1178 : * - object ( { } )
1179 : * - object field
1180 : */
1181 : static inline JsonParseErrorType
1182 245740 : parse_scalar(JsonLexContext *lex, const JsonSemAction *sem)
1183 : {
1184 245740 : char *val = NULL;
1185 245740 : json_scalar_action sfunc = sem->scalar;
1186 245740 : JsonTokenType tok = lex_peek(lex);
1187 : JsonParseErrorType result;
1188 :
1189 : /* a scalar must be a string, a number, true, false, or null */
1190 245740 : if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
1191 32556 : tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
1192 : tok != JSON_TOKEN_NULL)
1193 188 : return report_parse_error(JSON_PARSE_VALUE, lex);
1194 :
1195 : /* if no semantic function, just consume the token */
1196 245552 : if (sfunc == NULL)
1197 11762 : return json_lex(lex);
1198 :
1199 : /* extract the de-escaped string value, or the raw lexeme */
1200 233790 : if (lex_peek(lex) == JSON_TOKEN_STRING)
1201 : {
1202 74934 : if (lex->need_escapes)
1203 : {
1204 69258 : val = STRDUP(lex->strval->data);
1205 69258 : if (val == NULL)
1206 0 : return JSON_OUT_OF_MEMORY;
1207 : }
1208 : }
1209 : else
1210 : {
1211 158856 : int len = (lex->token_terminator - lex->token_start);
1212 :
1213 158856 : val = ALLOC(len + 1);
1214 158856 : if (val == NULL)
1215 0 : return JSON_OUT_OF_MEMORY;
1216 :
1217 158856 : memcpy(val, lex->token_start, len);
1218 158856 : val[len] = '\0';
1219 : }
1220 :
1221 : /* consume the token */
1222 233790 : result = json_lex(lex);
1223 233790 : if (result != JSON_SUCCESS)
1224 0 : return result;
1225 :
1226 : /* invoke the callback */
1227 233790 : result = (*sfunc) (sem->semstate, val, tok);
1228 :
1229 233694 : return result;
1230 : }
1231 :
1232 : static JsonParseErrorType
1233 238816 : parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
1234 : {
1235 : /*
1236 : * An object field is "fieldname" : value where value can be a scalar,
1237 : * object or array. Note: in user-facing docs and error messages, we
1238 : * generally call a field name a "key".
1239 : */
1240 :
1241 238816 : char *fname = NULL; /* keep compiler quiet */
1242 238816 : json_ofield_action ostart = sem->object_field_start;
1243 238816 : json_ofield_action oend = sem->object_field_end;
1244 : bool isnull;
1245 : JsonTokenType tok;
1246 : JsonParseErrorType result;
1247 :
1248 238816 : if (lex_peek(lex) != JSON_TOKEN_STRING)
1249 12 : return report_parse_error(JSON_PARSE_STRING, lex);
1250 238804 : if ((ostart != NULL || oend != NULL) && lex->need_escapes)
1251 : {
1252 191286 : fname = STRDUP(lex->strval->data);
1253 191286 : if (fname == NULL)
1254 0 : return JSON_OUT_OF_MEMORY;
1255 : }
1256 238804 : result = json_lex(lex);
1257 238804 : if (result != JSON_SUCCESS)
1258 12 : return result;
1259 :
1260 238792 : result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
1261 238792 : if (result != JSON_SUCCESS)
1262 90 : return result;
1263 :
1264 238702 : tok = lex_peek(lex);
1265 238702 : isnull = tok == JSON_TOKEN_NULL;
1266 :
1267 238702 : if (ostart != NULL)
1268 : {
1269 191202 : result = (*ostart) (sem->semstate, fname, isnull);
1270 191194 : if (result != JSON_SUCCESS)
1271 0 : return result;
1272 : }
1273 :
1274 238694 : switch (tok)
1275 : {
1276 11832 : case JSON_TOKEN_OBJECT_START:
1277 11832 : result = parse_object(lex, sem);
1278 4012 : break;
1279 14822 : case JSON_TOKEN_ARRAY_START:
1280 14822 : result = parse_array(lex, sem);
1281 14788 : break;
1282 212040 : default:
1283 212040 : result = parse_scalar(lex, sem);
1284 : }
1285 230834 : if (result != JSON_SUCCESS)
1286 42 : return result;
1287 :
1288 230792 : if (oend != NULL)
1289 : {
1290 118602 : result = (*oend) (sem->semstate, fname, isnull);
1291 118602 : if (result != JSON_SUCCESS)
1292 0 : return result;
1293 : }
1294 :
1295 230792 : return JSON_SUCCESS;
1296 : }
1297 :
1298 : static JsonParseErrorType
1299 50300 : parse_object(JsonLexContext *lex, const JsonSemAction *sem)
1300 : {
1301 : /*
1302 : * an object is a possibly empty sequence of object fields, separated by
1303 : * commas and surrounded by curly braces.
1304 : */
1305 50300 : json_struct_action ostart = sem->object_start;
1306 50300 : json_struct_action oend = sem->object_end;
1307 : JsonTokenType tok;
1308 : JsonParseErrorType result;
1309 :
1310 : #ifndef FRONTEND
1311 :
1312 : /*
1313 : * TODO: clients need some way to put a bound on stack growth. Parse level
1314 : * limits maybe?
1315 : */
1316 44366 : check_stack_depth();
1317 : #endif
1318 :
1319 50288 : if (ostart != NULL)
1320 : {
1321 30640 : result = (*ostart) (sem->semstate);
1322 30620 : if (result != JSON_SUCCESS)
1323 0 : return result;
1324 : }
1325 :
1326 : /*
1327 : * Data inside an object is at a higher nesting level than the object
1328 : * itself. Note that we increment this after we call the semantic routine
1329 : * for the object start and restore it before we call the routine for the
1330 : * object end.
1331 : */
1332 50268 : lex->lex_level++;
1333 :
1334 : Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START);
1335 50268 : result = json_lex(lex);
1336 50268 : if (result != JSON_SUCCESS)
1337 60 : return result;
1338 :
1339 50208 : tok = lex_peek(lex);
1340 50208 : switch (tok)
1341 : {
1342 47346 : case JSON_TOKEN_STRING:
1343 47346 : result = parse_object_field(lex, sem);
1344 230948 : while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
1345 : {
1346 191470 : result = json_lex(lex);
1347 191470 : if (result != JSON_SUCCESS)
1348 0 : break;
1349 191470 : result = parse_object_field(lex, sem);
1350 : }
1351 39478 : break;
1352 2848 : case JSON_TOKEN_OBJECT_END:
1353 2848 : break;
1354 14 : default:
1355 : /* case of an invalid initial token inside the object */
1356 14 : result = report_parse_error(JSON_PARSE_OBJECT_START, lex);
1357 : }
1358 42340 : if (result != JSON_SUCCESS)
1359 170 : return result;
1360 :
1361 42170 : result = lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
1362 42170 : if (result != JSON_SUCCESS)
1363 36 : return result;
1364 :
1365 42134 : lex->lex_level--;
1366 :
1367 42134 : if (oend != NULL)
1368 : {
1369 24220 : result = (*oend) (sem->semstate);
1370 24174 : if (result != JSON_SUCCESS)
1371 0 : return result;
1372 : }
1373 :
1374 42088 : return JSON_SUCCESS;
1375 : }
1376 :
1377 : static JsonParseErrorType
1378 54114 : parse_array_element(JsonLexContext *lex, const JsonSemAction *sem)
1379 : {
1380 54114 : json_aelem_action astart = sem->array_element_start;
1381 54114 : json_aelem_action aend = sem->array_element_end;
1382 54114 : JsonTokenType tok = lex_peek(lex);
1383 : JsonParseErrorType result;
1384 : bool isnull;
1385 :
1386 54114 : isnull = tok == JSON_TOKEN_NULL;
1387 :
1388 54114 : if (astart != NULL)
1389 : {
1390 7738 : result = (*astart) (sem->semstate, isnull);
1391 7738 : if (result != JSON_SUCCESS)
1392 0 : return result;
1393 : }
1394 :
1395 : /* an array element is any object, array or scalar */
1396 54114 : switch (tok)
1397 : {
1398 18124 : case JSON_TOKEN_OBJECT_START:
1399 18124 : result = parse_object(lex, sem);
1400 18066 : break;
1401 12114 : case JSON_TOKEN_ARRAY_START:
1402 12114 : result = parse_array(lex, sem);
1403 3292 : break;
1404 23876 : default:
1405 23876 : result = parse_scalar(lex, sem);
1406 : }
1407 :
1408 45216 : if (result != JSON_SUCCESS)
1409 66 : return result;
1410 :
1411 45150 : if (aend != NULL)
1412 : {
1413 7228 : result = (*aend) (sem->semstate, isnull);
1414 7216 : if (result != JSON_SUCCESS)
1415 0 : return result;
1416 : }
1417 :
1418 45138 : return JSON_SUCCESS;
1419 : }
1420 :
1421 : static JsonParseErrorType
1422 33780 : parse_array(JsonLexContext *lex, const JsonSemAction *sem)
1423 : {
1424 : /*
1425 : * an array is a possibly empty sequence of array elements, separated by
1426 : * commas and surrounded by square brackets.
1427 : */
1428 33780 : json_struct_action astart = sem->array_start;
1429 33780 : json_struct_action aend = sem->array_end;
1430 : JsonParseErrorType result;
1431 :
1432 : #ifndef FRONTEND
1433 33724 : check_stack_depth();
1434 : #endif
1435 :
1436 33768 : if (astart != NULL)
1437 : {
1438 15778 : result = (*astart) (sem->semstate);
1439 15764 : if (result != JSON_SUCCESS)
1440 0 : return result;
1441 : }
1442 :
1443 : /*
1444 : * Data inside an array is at a higher nesting level than the array
1445 : * itself. Note that we increment this after we call the semantic routine
1446 : * for the array start and restore it before we call the routine for the
1447 : * array end.
1448 : */
1449 33754 : lex->lex_level++;
1450 :
1451 33754 : result = lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
1452 33754 : if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
1453 : {
1454 26292 : result = parse_array_element(lex, sem);
1455 :
1456 45156 : while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
1457 : {
1458 27774 : result = json_lex(lex);
1459 27774 : if (result != JSON_SUCCESS)
1460 0 : break;
1461 27774 : result = parse_array_element(lex, sem);
1462 : }
1463 : }
1464 24844 : if (result != JSON_SUCCESS)
1465 66 : return result;
1466 :
1467 24778 : result = lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
1468 24778 : if (result != JSON_SUCCESS)
1469 24 : return result;
1470 :
1471 24754 : lex->lex_level--;
1472 :
1473 24754 : if (aend != NULL)
1474 : {
1475 9160 : result = (*aend) (sem->semstate);
1476 9136 : if (result != JSON_SUCCESS)
1477 0 : return result;
1478 : }
1479 :
1480 24730 : return JSON_SUCCESS;
1481 : }
1482 :
1483 : /*
1484 : * Lex one token from the input stream.
1485 : *
1486 : * When doing incremental parsing, we can reach the end of the input string
1487 : * without having (or knowing we have) a complete token. If it's not the
1488 : * final chunk of input, the partial token is then saved to the lex
1489 : * structure's ptok StringInfo. On subsequent calls input is appended to this
1490 : * buffer until we have something that we think is a complete token,
1491 : * which is then lexed using a recursive call to json_lex. Processing then
1492 : * continues as normal on subsequent calls.
1493 : *
1494 : * Note than when doing incremental processing, the lex.prev_token_terminator
1495 : * should not be relied on. It could point into a previous input chunk or
1496 : * worse.
1497 : */
1498 : JsonParseErrorType
1499 9911748 : json_lex(JsonLexContext *lex)
1500 : {
1501 : const char *s;
1502 9911748 : const char *const end = lex->input + lex->input_length;
1503 : JsonParseErrorType result;
1504 :
1505 9911748 : if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
1506 0 : return JSON_OUT_OF_MEMORY;
1507 :
1508 9911748 : if (lex->incremental)
1509 : {
1510 8709408 : if (lex->inc_state->partial_completed)
1511 : {
1512 : /*
1513 : * We just lexed a completed partial token on the last call, so
1514 : * reset everything
1515 : */
1516 34414 : jsonapi_resetStringInfo(&(lex->inc_state->partial_token));
1517 34414 : lex->token_terminator = lex->input;
1518 34414 : lex->inc_state->partial_completed = false;
1519 : }
1520 :
1521 : #ifdef JSONAPI_USE_PQEXPBUFFER
1522 : /* Make sure our partial token buffer is valid before using it below. */
1523 1915760 : if (PQExpBufferDataBroken(lex->inc_state->partial_token))
1524 0 : return JSON_OUT_OF_MEMORY;
1525 : #endif
1526 : }
1527 :
1528 9911748 : s = lex->token_terminator;
1529 :
1530 9911748 : if (lex->incremental && lex->inc_state->partial_token.len)
1531 : {
1532 : /*
1533 : * We have a partial token. Extend it and if completed lex it by a
1534 : * recursive call
1535 : */
1536 83186 : jsonapi_StrValType *ptok = &(lex->inc_state->partial_token);
1537 83186 : size_t added = 0;
1538 83186 : bool tok_done = false;
1539 83186 : JsonLexContext dummy_lex = {0};
1540 : JsonParseErrorType partial_result;
1541 :
1542 83186 : if (ptok->data[0] == '"')
1543 : {
1544 : /*
1545 : * It's a string. Accumulate characters until we reach an
1546 : * unescaped '"'.
1547 : */
1548 80062 : int escapes = 0;
1549 :
1550 81346 : for (int i = ptok->len - 1; i > 0; i--)
1551 : {
1552 : /* count the trailing backslashes on the partial token */
1553 75648 : if (ptok->data[i] == '\\')
1554 1284 : escapes++;
1555 : else
1556 74364 : break;
1557 : }
1558 :
1559 607462 : for (size_t i = 0; i < lex->input_length; i++)
1560 : {
1561 559890 : char c = lex->input[i];
1562 :
1563 559890 : jsonapi_appendStringInfoCharMacro(ptok, c);
1564 559890 : added++;
1565 559890 : if (c == '"' && escapes % 2 == 0)
1566 : {
1567 32490 : tok_done = true;
1568 32490 : break;
1569 : }
1570 527400 : if (c == '\\')
1571 2048 : escapes++;
1572 : else
1573 525352 : escapes = 0;
1574 : }
1575 : }
1576 : else
1577 : {
1578 : /* not a string */
1579 3124 : char c = ptok->data[0];
1580 :
1581 3124 : if (c == '-' || (c >= '0' && c <= '9'))
1582 : {
1583 : /* for numbers look for possible numeric continuations */
1584 :
1585 656 : bool numend = false;
1586 :
1587 1840 : for (size_t i = 0; i < lex->input_length && !numend; i++)
1588 : {
1589 1184 : char cc = lex->input[i];
1590 :
1591 1184 : switch (cc)
1592 : {
1593 796 : case '+':
1594 : case '-':
1595 : case 'e':
1596 : case 'E':
1597 : case '0':
1598 : case '1':
1599 : case '2':
1600 : case '3':
1601 : case '4':
1602 : case '5':
1603 : case '6':
1604 : case '7':
1605 : case '8':
1606 : case '9':
1607 : {
1608 796 : jsonapi_appendStringInfoCharMacro(ptok, cc);
1609 796 : added++;
1610 : }
1611 796 : break;
1612 388 : default:
1613 388 : numend = true;
1614 : }
1615 : }
1616 : }
1617 :
1618 : /*
1619 : * Add any remaining alphanumeric chars. This takes care of the
1620 : * {null, false, true} literals as well as any trailing
1621 : * alphanumeric junk on non-string tokens.
1622 : */
1623 6448 : for (size_t i = added; i < lex->input_length; i++)
1624 : {
1625 5468 : char cc = lex->input[i];
1626 :
1627 5468 : if (JSON_ALPHANUMERIC_CHAR(cc))
1628 : {
1629 3324 : jsonapi_appendStringInfoCharMacro(ptok, cc);
1630 3324 : added++;
1631 : }
1632 : else
1633 : {
1634 2144 : tok_done = true;
1635 2144 : break;
1636 : }
1637 : }
1638 3124 : if (added == lex->input_length &&
1639 980 : lex->inc_state->is_last_chunk)
1640 : {
1641 76 : tok_done = true;
1642 : }
1643 : }
1644 :
1645 83186 : if (!tok_done)
1646 : {
1647 : /* We should have consumed the whole chunk in this case. */
1648 : Assert(added == lex->input_length);
1649 :
1650 48476 : if (!lex->inc_state->is_last_chunk)
1651 48436 : return JSON_INCOMPLETE;
1652 :
1653 : /* json_errdetail() needs access to the accumulated token. */
1654 40 : lex->token_start = ptok->data;
1655 40 : lex->token_terminator = ptok->data + ptok->len;
1656 40 : return JSON_INVALID_TOKEN;
1657 : }
1658 :
1659 : /*
1660 : * Everything up to lex->input[added] has been added to the partial
1661 : * token, so move the input past it.
1662 : */
1663 34710 : lex->input += added;
1664 34710 : lex->input_length -= added;
1665 :
1666 34710 : dummy_lex.input = dummy_lex.token_terminator =
1667 34710 : dummy_lex.line_start = ptok->data;
1668 34710 : dummy_lex.line_number = lex->line_number;
1669 34710 : dummy_lex.input_length = ptok->len;
1670 34710 : dummy_lex.input_encoding = lex->input_encoding;
1671 34710 : dummy_lex.incremental = false;
1672 34710 : dummy_lex.need_escapes = lex->need_escapes;
1673 34710 : dummy_lex.strval = lex->strval;
1674 :
1675 34710 : partial_result = json_lex(&dummy_lex);
1676 :
1677 : /*
1678 : * We either have a complete token or an error. In either case we need
1679 : * to point to the partial token data for the semantic or error
1680 : * routines. If it's not an error we'll readjust on the next call to
1681 : * json_lex.
1682 : */
1683 34710 : lex->token_type = dummy_lex.token_type;
1684 34710 : lex->line_number = dummy_lex.line_number;
1685 :
1686 : /*
1687 : * We know the prev_token_terminator must be back in some previous
1688 : * piece of input, so we just make it NULL.
1689 : */
1690 34710 : lex->prev_token_terminator = NULL;
1691 :
1692 : /*
1693 : * Normally token_start would be ptok->data, but it could be later,
1694 : * see json_lex_string's handling of invalid escapes.
1695 : */
1696 34710 : lex->token_start = dummy_lex.token_start;
1697 34710 : lex->token_terminator = dummy_lex.token_terminator;
1698 34710 : if (partial_result == JSON_SUCCESS)
1699 : {
1700 : /* make sure we've used all the input */
1701 34602 : if (lex->token_terminator - lex->token_start != ptok->len)
1702 : {
1703 : Assert(false);
1704 0 : return JSON_INVALID_TOKEN;
1705 : }
1706 :
1707 34602 : lex->inc_state->partial_completed = true;
1708 : }
1709 34710 : return partial_result;
1710 : /* end of partial token processing */
1711 : }
1712 :
1713 : /* Skip leading whitespace. */
1714 16336304 : while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
1715 : {
1716 6507742 : if (*s++ == '\n')
1717 : {
1718 443660 : ++lex->line_number;
1719 443660 : lex->line_start = s;
1720 : }
1721 : }
1722 9828562 : lex->token_start = s;
1723 :
1724 : /* Determine token type. */
1725 9828562 : if (s >= end)
1726 : {
1727 361924 : lex->token_start = NULL;
1728 361924 : lex->prev_token_terminator = lex->token_terminator;
1729 361924 : lex->token_terminator = s;
1730 361924 : lex->token_type = JSON_TOKEN_END;
1731 : }
1732 : else
1733 : {
1734 9466638 : switch (*s)
1735 : {
1736 : /* Single-character token, some kind of punctuation mark. */
1737 283264 : case '{':
1738 283264 : lex->prev_token_terminator = lex->token_terminator;
1739 283264 : lex->token_terminator = s + 1;
1740 283264 : lex->token_type = JSON_TOKEN_OBJECT_START;
1741 283264 : break;
1742 274500 : case '}':
1743 274500 : lex->prev_token_terminator = lex->token_terminator;
1744 274500 : lex->token_terminator = s + 1;
1745 274500 : lex->token_type = JSON_TOKEN_OBJECT_END;
1746 274500 : break;
1747 2495406 : case '[':
1748 2495406 : lex->prev_token_terminator = lex->token_terminator;
1749 2495406 : lex->token_terminator = s + 1;
1750 2495406 : lex->token_type = JSON_TOKEN_ARRAY_START;
1751 2495406 : break;
1752 847612 : case ']':
1753 847612 : lex->prev_token_terminator = lex->token_terminator;
1754 847612 : lex->token_terminator = s + 1;
1755 847612 : lex->token_type = JSON_TOKEN_ARRAY_END;
1756 847612 : break;
1757 1363714 : case ',':
1758 1363714 : lex->prev_token_terminator = lex->token_terminator;
1759 1363714 : lex->token_terminator = s + 1;
1760 1363714 : lex->token_type = JSON_TOKEN_COMMA;
1761 1363714 : break;
1762 1388186 : case ':':
1763 1388186 : lex->prev_token_terminator = lex->token_terminator;
1764 1388186 : lex->token_terminator = s + 1;
1765 1388186 : lex->token_type = JSON_TOKEN_COLON;
1766 1388186 : break;
1767 2413254 : case '"':
1768 : /* string */
1769 2413254 : result = json_lex_string(lex);
1770 2413254 : if (result != JSON_SUCCESS)
1771 32790 : return result;
1772 2380464 : lex->token_type = JSON_TOKEN_STRING;
1773 2380464 : break;
1774 184 : case '-':
1775 : /* Negative number. */
1776 184 : result = json_lex_number(lex, s + 1, NULL, NULL);
1777 184 : if (result != JSON_SUCCESS)
1778 0 : return result;
1779 184 : lex->token_type = JSON_TOKEN_NUMBER;
1780 184 : break;
1781 350620 : case '0':
1782 : case '1':
1783 : case '2':
1784 : case '3':
1785 : case '4':
1786 : case '5':
1787 : case '6':
1788 : case '7':
1789 : case '8':
1790 : case '9':
1791 : /* Positive number. */
1792 350620 : result = json_lex_number(lex, s, NULL, NULL);
1793 350620 : if (result != JSON_SUCCESS)
1794 476 : return result;
1795 350144 : lex->token_type = JSON_TOKEN_NUMBER;
1796 350144 : break;
1797 49898 : default:
1798 : {
1799 : const char *p;
1800 :
1801 : /*
1802 : * We're not dealing with a string, number, legal
1803 : * punctuation mark, or end of string. The only legal
1804 : * tokens we might find here are true, false, and null,
1805 : * but for error reporting purposes we scan until we see a
1806 : * non-alphanumeric character. That way, we can report
1807 : * the whole word as an unexpected token, rather than just
1808 : * some unintuitive prefix thereof.
1809 : */
1810 274768 : for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++)
1811 : /* skip */ ;
1812 :
1813 : /*
1814 : * We got some sort of unexpected punctuation or an
1815 : * otherwise unexpected character, so just complain about
1816 : * that one character.
1817 : */
1818 49898 : if (p == s)
1819 : {
1820 52 : lex->prev_token_terminator = lex->token_terminator;
1821 52 : lex->token_terminator = s + 1;
1822 52 : return JSON_INVALID_TOKEN;
1823 : }
1824 :
1825 49846 : if (lex->incremental && !lex->inc_state->is_last_chunk &&
1826 7832 : p == lex->input + lex->input_length)
1827 : {
1828 1832 : jsonapi_appendBinaryStringInfo(&(lex->inc_state->partial_token), s, end - s);
1829 1832 : return JSON_INCOMPLETE;
1830 : }
1831 :
1832 : /*
1833 : * We've got a real alphanumeric token here. If it
1834 : * happens to be true, false, or null, all is well. If
1835 : * not, error out.
1836 : */
1837 48014 : lex->prev_token_terminator = lex->token_terminator;
1838 48014 : lex->token_terminator = p;
1839 48014 : if (p - s == 4)
1840 : {
1841 19156 : if (memcmp(s, "true", 4) == 0)
1842 7462 : lex->token_type = JSON_TOKEN_TRUE;
1843 11694 : else if (memcmp(s, "null", 4) == 0)
1844 11682 : lex->token_type = JSON_TOKEN_NULL;
1845 : else
1846 12 : return JSON_INVALID_TOKEN;
1847 : }
1848 28858 : else if (p - s == 5 && memcmp(s, "false", 5) == 0)
1849 28652 : lex->token_type = JSON_TOKEN_FALSE;
1850 : else
1851 206 : return JSON_INVALID_TOKEN;
1852 : }
1853 : } /* end of switch */
1854 : }
1855 :
1856 9793194 : if (lex->incremental && lex->token_type == JSON_TOKEN_END && !lex->inc_state->is_last_chunk)
1857 287786 : return JSON_INCOMPLETE;
1858 : else
1859 9505408 : return JSON_SUCCESS;
1860 : }
1861 :
1862 : /*
1863 : * The next token in the input stream is known to be a string; lex it.
1864 : *
1865 : * If lex->strval isn't NULL, fill it with the decoded string.
1866 : * Set lex->token_terminator to the end of the decoded input, and in
1867 : * success cases, transfer its previous value to lex->prev_token_terminator.
1868 : * Return JSON_SUCCESS or an error code.
1869 : *
1870 : * Note: be careful that all error exits advance lex->token_terminator
1871 : * to the point after the character we detected the error on.
1872 : */
1873 : static inline JsonParseErrorType
1874 2413254 : json_lex_string(JsonLexContext *lex)
1875 : {
1876 : const char *s;
1877 2413254 : const char *const end = lex->input + lex->input_length;
1878 2413254 : int hi_surrogate = -1;
1879 :
1880 : /* Convenience macros for error exits */
1881 : #define FAIL_OR_INCOMPLETE_AT_CHAR_START(code) \
1882 : do { \
1883 : if (lex->incremental && !lex->inc_state->is_last_chunk) \
1884 : { \
1885 : jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token, \
1886 : lex->token_start, \
1887 : end - lex->token_start); \
1888 : return JSON_INCOMPLETE; \
1889 : } \
1890 : lex->token_terminator = s; \
1891 : return code; \
1892 : } while (0)
1893 : #define FAIL_AT_CHAR_END(code) \
1894 : do { \
1895 : const char *term = s + pg_encoding_mblen(lex->input_encoding, s); \
1896 : lex->token_terminator = (term <= end) ? term : end; \
1897 : return code; \
1898 : } while (0)
1899 :
1900 2413254 : if (lex->need_escapes)
1901 : {
1902 : #ifdef JSONAPI_USE_PQEXPBUFFER
1903 : /* make sure initialization succeeded */
1904 668 : if (lex->strval == NULL)
1905 0 : return JSON_OUT_OF_MEMORY;
1906 : #endif
1907 2265262 : jsonapi_resetStringInfo(lex->strval);
1908 : }
1909 :
1910 : Assert(lex->input_length > 0);
1911 2413254 : s = lex->token_start;
1912 : for (;;)
1913 : {
1914 4827326 : s++;
1915 : /* Premature end of the string. */
1916 4827326 : if (s >= end)
1917 32322 : FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
1918 4795004 : else if (*s == '"')
1919 2380464 : break;
1920 2414540 : else if (*s == '\\')
1921 : {
1922 : /* OK, we have an escape character. */
1923 5656 : s++;
1924 5656 : if (s >= end)
1925 96 : FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
1926 5560 : else if (*s == 'u')
1927 : {
1928 : int i;
1929 2116 : int ch = 0;
1930 :
1931 10168 : for (i = 1; i <= 4; i++)
1932 : {
1933 8216 : s++;
1934 8216 : if (s >= end)
1935 128 : FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
1936 8088 : else if (*s >= '0' && *s <= '9')
1937 5038 : ch = (ch * 16) + (*s - '0');
1938 3050 : else if (*s >= 'a' && *s <= 'f')
1939 2990 : ch = (ch * 16) + (*s - 'a') + 10;
1940 60 : else if (*s >= 'A' && *s <= 'F')
1941 24 : ch = (ch * 16) + (*s - 'A') + 10;
1942 : else
1943 36 : FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT);
1944 : }
1945 1952 : if (lex->need_escapes)
1946 : {
1947 : /*
1948 : * Combine surrogate pairs.
1949 : */
1950 232 : if (is_utf16_surrogate_first(ch))
1951 : {
1952 72 : if (hi_surrogate != -1)
1953 12 : FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_SURROGATE);
1954 60 : hi_surrogate = ch;
1955 60 : continue;
1956 : }
1957 160 : else if (is_utf16_surrogate_second(ch))
1958 : {
1959 60 : if (hi_surrogate == -1)
1960 24 : FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
1961 36 : ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
1962 36 : hi_surrogate = -1;
1963 : }
1964 :
1965 136 : if (hi_surrogate != -1)
1966 0 : FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
1967 :
1968 : /*
1969 : * Reject invalid cases. We can't have a value above
1970 : * 0xFFFF here (since we only accepted 4 hex digits
1971 : * above), so no need to test for out-of-range chars.
1972 : */
1973 136 : if (ch == 0)
1974 : {
1975 : /* We can't allow this, since our TEXT type doesn't */
1976 24 : FAIL_AT_CHAR_END(JSON_UNICODE_CODE_POINT_ZERO);
1977 : }
1978 :
1979 : /*
1980 : * Add the represented character to lex->strval. In the
1981 : * backend, we can let pg_unicode_to_server_noerror()
1982 : * handle any required character set conversion; in
1983 : * frontend, we can only deal with trivial conversions.
1984 : */
1985 : #ifndef FRONTEND
1986 : {
1987 : char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
1988 :
1989 84 : if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
1990 0 : FAIL_AT_CHAR_END(JSON_UNICODE_UNTRANSLATABLE);
1991 84 : appendStringInfoString(lex->strval, cbuf);
1992 : }
1993 : #else
1994 28 : if (lex->input_encoding == PG_UTF8)
1995 : {
1996 : /* OK, we can map the code point to UTF8 easily */
1997 : char utf8str[5];
1998 : int utf8len;
1999 :
2000 28 : unicode_to_utf8(ch, (unsigned char *) utf8str);
2001 28 : utf8len = pg_utf_mblen((unsigned char *) utf8str);
2002 28 : jsonapi_appendBinaryStringInfo(lex->strval, utf8str, utf8len);
2003 : }
2004 0 : else if (ch <= 0x007f)
2005 : {
2006 : /* The ASCII range is the same in all encodings */
2007 0 : jsonapi_appendStringInfoChar(lex->strval, (char) ch);
2008 : }
2009 : else
2010 0 : FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE);
2011 : #endif /* FRONTEND */
2012 : }
2013 : }
2014 3444 : else if (lex->need_escapes)
2015 : {
2016 582 : if (hi_surrogate != -1)
2017 0 : FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
2018 :
2019 582 : switch (*s)
2020 : {
2021 382 : case '"':
2022 : case '\\':
2023 : case '/':
2024 382 : jsonapi_appendStringInfoChar(lex->strval, *s);
2025 382 : break;
2026 40 : case 'b':
2027 40 : jsonapi_appendStringInfoChar(lex->strval, '\b');
2028 40 : break;
2029 4 : case 'f':
2030 4 : jsonapi_appendStringInfoChar(lex->strval, '\f');
2031 4 : break;
2032 58 : case 'n':
2033 58 : jsonapi_appendStringInfoChar(lex->strval, '\n');
2034 58 : break;
2035 4 : case 'r':
2036 4 : jsonapi_appendStringInfoChar(lex->strval, '\r');
2037 4 : break;
2038 88 : case 't':
2039 88 : jsonapi_appendStringInfoChar(lex->strval, '\t');
2040 88 : break;
2041 6 : default:
2042 :
2043 : /*
2044 : * Not a valid string escape, so signal error. We
2045 : * adjust token_start so that just the escape sequence
2046 : * is reported, not the whole string.
2047 : */
2048 6 : lex->token_start = s;
2049 6 : FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
2050 : }
2051 : }
2052 2862 : else if (strchr("\"\\/bfnrt", *s) == NULL)
2053 : {
2054 : /*
2055 : * Simpler processing if we're not bothered about de-escaping
2056 : *
2057 : * It's very tempting to remove the strchr() call here and
2058 : * replace it with a switch statement, but testing so far has
2059 : * shown it's not a performance win.
2060 : */
2061 66 : lex->token_start = s;
2062 66 : FAIL_AT_CHAR_END(JSON_ESCAPING_INVALID);
2063 : }
2064 : }
2065 : else
2066 : {
2067 2408884 : const char *p = s;
2068 :
2069 2408884 : if (hi_surrogate != -1)
2070 12 : FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE);
2071 :
2072 : /*
2073 : * Skip to the first byte that requires special handling, so we
2074 : * can batch calls to jsonapi_appendBinaryStringInfo.
2075 : */
2076 3058154 : while (p < end - sizeof(Vector8) &&
2077 2952058 : !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
2078 2949964 : !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
2079 649282 : !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
2080 649282 : p += sizeof(Vector8);
2081 :
2082 18988700 : for (; p < end; p++)
2083 : {
2084 18961980 : if (*p == '\\' || *p == '"')
2085 : break;
2086 16579892 : else if ((unsigned char) *p <= 31)
2087 : {
2088 : /* Per RFC4627, these characters MUST be escaped. */
2089 : /*
2090 : * Since *p isn't printable, exclude it from the context
2091 : * string
2092 : */
2093 64 : lex->token_terminator = p;
2094 64 : return JSON_ESCAPING_REQUIRED;
2095 : }
2096 : }
2097 :
2098 2408808 : if (lex->need_escapes)
2099 2265432 : jsonapi_appendBinaryStringInfo(lex->strval, s, p - s);
2100 :
2101 : /*
2102 : * s will be incremented at the top of the loop, so set it to just
2103 : * behind our lookahead position
2104 : */
2105 2408808 : s = p - 1;
2106 : }
2107 : }
2108 :
2109 2380464 : if (hi_surrogate != -1)
2110 : {
2111 0 : lex->token_terminator = s + 1;
2112 0 : return JSON_UNICODE_LOW_SURROGATE;
2113 : }
2114 :
2115 : #ifdef JSONAPI_USE_PQEXPBUFFER
2116 34448 : if (lex->need_escapes && PQExpBufferBroken(lex->strval))
2117 0 : return JSON_OUT_OF_MEMORY;
2118 : #endif
2119 :
2120 : /* Hooray, we found the end of the string! */
2121 2380464 : lex->prev_token_terminator = lex->token_terminator;
2122 2380464 : lex->token_terminator = s + 1;
2123 2380464 : return JSON_SUCCESS;
2124 :
2125 : #undef FAIL_OR_INCOMPLETE_AT_CHAR_START
2126 : #undef FAIL_AT_CHAR_END
2127 : }
2128 :
2129 : /*
2130 : * The next token in the input stream is known to be a number; lex it.
2131 : *
2132 : * In JSON, a number consists of four parts:
2133 : *
2134 : * (1) An optional minus sign ('-').
2135 : *
2136 : * (2) Either a single '0', or a string of one or more digits that does not
2137 : * begin with a '0'.
2138 : *
2139 : * (3) An optional decimal part, consisting of a period ('.') followed by
2140 : * one or more digits. (Note: While this part can be omitted
2141 : * completely, it's not OK to have only the decimal point without
2142 : * any digits afterwards.)
2143 : *
2144 : * (4) An optional exponent part, consisting of 'e' or 'E', optionally
2145 : * followed by '+' or '-', followed by one or more digits. (Note:
2146 : * As with the decimal part, if 'e' or 'E' is present, it must be
2147 : * followed by at least one digit.)
2148 : *
2149 : * The 's' argument to this function points to the ostensible beginning
2150 : * of part 2 - i.e. the character after any optional minus sign, or the
2151 : * first character of the string if there is none.
2152 : *
2153 : * If num_err is not NULL, we return an error flag to *num_err rather than
2154 : * raising an error for a badly-formed number. Also, if total_len is not NULL
2155 : * the distance from lex->input to the token end+1 is returned to *total_len.
2156 : */
2157 : static inline JsonParseErrorType
2158 350850 : json_lex_number(JsonLexContext *lex, const char *s,
2159 : bool *num_err, size_t *total_len)
2160 : {
2161 350850 : bool error = false;
2162 350850 : int len = s - lex->input;
2163 :
2164 : /* Part (1): leading sign indicator. */
2165 : /* Caller already did this for us; so do nothing. */
2166 :
2167 : /* Part (2): parse main digit string. */
2168 350850 : if (len < lex->input_length && *s == '0')
2169 : {
2170 56072 : s++;
2171 56072 : len++;
2172 : }
2173 294778 : else if (len < lex->input_length && *s >= '1' && *s <= '9')
2174 : {
2175 : do
2176 : {
2177 1024730 : s++;
2178 1024730 : len++;
2179 1024730 : } while (len < lex->input_length && *s >= '0' && *s <= '9');
2180 : }
2181 : else
2182 2 : error = true;
2183 :
2184 : /* Part (3): parse optional decimal portion. */
2185 350850 : if (len < lex->input_length && *s == '.')
2186 : {
2187 37054 : s++;
2188 37054 : len++;
2189 37054 : if (len == lex->input_length || *s < '0' || *s > '9')
2190 12 : error = true;
2191 : else
2192 : {
2193 : do
2194 : {
2195 91148 : s++;
2196 91148 : len++;
2197 91148 : } while (len < lex->input_length && *s >= '0' && *s <= '9');
2198 : }
2199 : }
2200 :
2201 : /* Part (4): parse optional exponent. */
2202 350850 : if (len < lex->input_length && (*s == 'e' || *s == 'E'))
2203 : {
2204 94 : s++;
2205 94 : len++;
2206 94 : if (len < lex->input_length && (*s == '+' || *s == '-'))
2207 : {
2208 10 : s++;
2209 10 : len++;
2210 : }
2211 94 : if (len == lex->input_length || *s < '0' || *s > '9')
2212 12 : error = true;
2213 : else
2214 : {
2215 : do
2216 : {
2217 284 : s++;
2218 284 : len++;
2219 284 : } while (len < lex->input_length && *s >= '0' && *s <= '9');
2220 : }
2221 : }
2222 :
2223 : /*
2224 : * Check for trailing garbage. As in json_lex(), any alphanumeric stuff
2225 : * here should be considered part of the token for error-reporting
2226 : * purposes.
2227 : */
2228 351102 : for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
2229 252 : error = true;
2230 :
2231 350850 : if (total_len != NULL)
2232 46 : *total_len = len;
2233 :
2234 350850 : if (lex->incremental && !lex->inc_state->is_last_chunk &&
2235 119516 : len >= lex->input_length)
2236 : {
2237 388 : jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token,
2238 388 : lex->token_start, s - lex->token_start);
2239 388 : if (num_err != NULL)
2240 0 : *num_err = error;
2241 :
2242 388 : return JSON_INCOMPLETE;
2243 : }
2244 350462 : else if (num_err != NULL)
2245 : {
2246 : /* let the caller handle any error */
2247 46 : *num_err = error;
2248 : }
2249 : else
2250 : {
2251 : /* return token endpoint */
2252 350416 : lex->prev_token_terminator = lex->token_terminator;
2253 350416 : lex->token_terminator = s;
2254 : /* handle error if any */
2255 350416 : if (error)
2256 88 : return JSON_INVALID_TOKEN;
2257 : }
2258 :
2259 350374 : return JSON_SUCCESS;
2260 : }
2261 :
2262 : /*
2263 : * Report a parse error.
2264 : *
2265 : * lex->token_start and lex->token_terminator must identify the current token.
2266 : */
2267 : static JsonParseErrorType
2268 764 : report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
2269 : {
2270 : /* Handle case where the input ended prematurely. */
2271 764 : if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
2272 202 : return JSON_EXPECTED_MORE;
2273 :
2274 : /* Otherwise choose the error type based on the parsing context. */
2275 562 : switch (ctx)
2276 : {
2277 36 : case JSON_PARSE_END:
2278 36 : return JSON_EXPECTED_END;
2279 138 : case JSON_PARSE_VALUE:
2280 138 : return JSON_EXPECTED_JSON;
2281 88 : case JSON_PARSE_STRING:
2282 88 : return JSON_EXPECTED_STRING;
2283 28 : case JSON_PARSE_ARRAY_START:
2284 28 : return JSON_EXPECTED_ARRAY_FIRST;
2285 36 : case JSON_PARSE_ARRAY_NEXT:
2286 36 : return JSON_EXPECTED_ARRAY_NEXT;
2287 68 : case JSON_PARSE_OBJECT_START:
2288 68 : return JSON_EXPECTED_OBJECT_FIRST;
2289 76 : case JSON_PARSE_OBJECT_LABEL:
2290 76 : return JSON_EXPECTED_COLON;
2291 92 : case JSON_PARSE_OBJECT_NEXT:
2292 92 : return JSON_EXPECTED_OBJECT_NEXT;
2293 0 : case JSON_PARSE_OBJECT_COMMA:
2294 0 : return JSON_EXPECTED_STRING;
2295 : }
2296 :
2297 : /*
2298 : * We don't use a default: case, so that the compiler will warn about
2299 : * unhandled enum values.
2300 : */
2301 : Assert(false);
2302 0 : return JSON_SUCCESS; /* silence stupider compilers */
2303 : }
2304 :
2305 : /*
2306 : * Construct an (already translated) detail message for a JSON error.
2307 : *
2308 : * The returned pointer should not be freed, the allocation is either static
2309 : * or owned by the JsonLexContext.
2310 : */
2311 : char *
2312 1404 : json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
2313 : {
2314 1404 : if (error == JSON_OUT_OF_MEMORY || lex == &failed_oom)
2315 : {
2316 : /* Short circuit. Allocating anything for this case is unhelpful. */
2317 0 : return _("out of memory");
2318 : }
2319 :
2320 1404 : if (lex->errormsg)
2321 0 : jsonapi_resetStringInfo(lex->errormsg);
2322 : else
2323 1404 : lex->errormsg = jsonapi_makeStringInfo();
2324 :
2325 : /*
2326 : * A helper for error messages that should print the current token. The
2327 : * format must contain exactly one %.*s specifier.
2328 : */
2329 : #define json_token_error(lex, format) \
2330 : jsonapi_appendStringInfo((lex)->errormsg, _(format), \
2331 : (int) ((lex)->token_terminator - (lex)->token_start), \
2332 : (lex)->token_start);
2333 :
2334 1404 : switch (error)
2335 : {
2336 0 : case JSON_INCOMPLETE:
2337 : case JSON_SUCCESS:
2338 : /* fall through to the error code after switch */
2339 0 : break;
2340 0 : case JSON_INVALID_LEXER_TYPE:
2341 0 : if (lex->incremental)
2342 0 : return _("Recursive descent parser cannot use incremental lexer.");
2343 : else
2344 0 : return _("Incremental parser requires incremental lexer.");
2345 256 : case JSON_NESTING_TOO_DEEP:
2346 256 : return (_("JSON nested too deep, maximum permitted depth is 6400."));
2347 72 : case JSON_ESCAPING_INVALID:
2348 72 : json_token_error(lex, "Escape sequence \"\\%.*s\" is invalid.");
2349 72 : break;
2350 64 : case JSON_ESCAPING_REQUIRED:
2351 64 : jsonapi_appendStringInfo(lex->errormsg,
2352 64 : _("Character with value 0x%02x must be escaped."),
2353 64 : (unsigned char) *(lex->token_terminator));
2354 64 : break;
2355 36 : case JSON_EXPECTED_END:
2356 36 : json_token_error(lex, "Expected end of input, but found \"%.*s\".");
2357 36 : break;
2358 28 : case JSON_EXPECTED_ARRAY_FIRST:
2359 28 : json_token_error(lex, "Expected array element or \"]\", but found \"%.*s\".");
2360 28 : break;
2361 36 : case JSON_EXPECTED_ARRAY_NEXT:
2362 36 : json_token_error(lex, "Expected \",\" or \"]\", but found \"%.*s\".");
2363 36 : break;
2364 76 : case JSON_EXPECTED_COLON:
2365 76 : json_token_error(lex, "Expected \":\", but found \"%.*s\".");
2366 76 : break;
2367 84 : case JSON_EXPECTED_JSON:
2368 84 : json_token_error(lex, "Expected JSON value, but found \"%.*s\".");
2369 84 : break;
2370 126 : case JSON_EXPECTED_MORE:
2371 126 : return _("The input string ended unexpectedly.");
2372 68 : case JSON_EXPECTED_OBJECT_FIRST:
2373 68 : json_token_error(lex, "Expected string or \"}\", but found \"%.*s\".");
2374 68 : break;
2375 92 : case JSON_EXPECTED_OBJECT_NEXT:
2376 92 : json_token_error(lex, "Expected \",\" or \"}\", but found \"%.*s\".");
2377 92 : break;
2378 88 : case JSON_EXPECTED_STRING:
2379 88 : json_token_error(lex, "Expected string, but found \"%.*s\".");
2380 88 : break;
2381 270 : case JSON_INVALID_TOKEN:
2382 270 : json_token_error(lex, "Token \"%.*s\" is invalid.");
2383 270 : break;
2384 0 : case JSON_OUT_OF_MEMORY:
2385 : /* should have been handled above; use the error path */
2386 0 : break;
2387 24 : case JSON_UNICODE_CODE_POINT_ZERO:
2388 24 : return _("\\u0000 cannot be converted to text.");
2389 36 : case JSON_UNICODE_ESCAPE_FORMAT:
2390 36 : return _("\"\\u\" must be followed by four hexadecimal digits.");
2391 0 : case JSON_UNICODE_HIGH_ESCAPE:
2392 : /* note: this case is only reachable in frontend not backend */
2393 0 : return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
2394 0 : case JSON_UNICODE_UNTRANSLATABLE:
2395 :
2396 : /*
2397 : * Note: this case is only reachable in backend and not frontend.
2398 : * #ifdef it away so the frontend doesn't try to link against
2399 : * backend functionality.
2400 : */
2401 : #ifndef FRONTEND
2402 0 : return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."),
2403 : GetDatabaseEncodingName());
2404 : #else
2405 : Assert(false);
2406 0 : break;
2407 : #endif
2408 12 : case JSON_UNICODE_HIGH_SURROGATE:
2409 12 : return _("Unicode high surrogate must not follow a high surrogate.");
2410 36 : case JSON_UNICODE_LOW_SURROGATE:
2411 36 : return _("Unicode low surrogate must follow a high surrogate.");
2412 0 : case JSON_SEM_ACTION_FAILED:
2413 : /* fall through to the error code after switch */
2414 0 : break;
2415 : }
2416 : #undef json_token_error
2417 :
2418 : /* Note that lex->errormsg can be NULL in shlib code. */
2419 914 : if (lex->errormsg && lex->errormsg->len == 0)
2420 : {
2421 : /*
2422 : * We don't use a default: case, so that the compiler will warn about
2423 : * unhandled enum values. But this needs to be here anyway to cover
2424 : * the possibility of an incorrect input.
2425 : */
2426 0 : jsonapi_appendStringInfo(lex->errormsg,
2427 : "unexpected json parse error type: %d",
2428 : (int) error);
2429 : }
2430 :
2431 : #ifdef JSONAPI_USE_PQEXPBUFFER
2432 316 : if (PQExpBufferBroken(lex->errormsg))
2433 0 : return _("out of memory while constructing error description");
2434 : #endif
2435 :
2436 914 : return lex->errormsg->data;
2437 : }
|