Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * parse_manifest.c
4 : * Parse a backup manifest in JSON format.
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * src/common/parse_manifest.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 :
14 : #include "postgres_fe.h"
15 :
16 : #include "common/jsonapi.h"
17 : #include "common/parse_manifest.h"
18 :
19 : /*
20 : * Semantic states for JSON manifest parsing.
21 : */
22 : typedef enum
23 : {
24 : JM_EXPECT_TOPLEVEL_START,
25 : JM_EXPECT_TOPLEVEL_END,
26 : JM_EXPECT_TOPLEVEL_FIELD,
27 : JM_EXPECT_VERSION_VALUE,
28 : JM_EXPECT_SYSTEM_IDENTIFIER_VALUE,
29 : JM_EXPECT_FILES_START,
30 : JM_EXPECT_FILES_NEXT,
31 : JM_EXPECT_THIS_FILE_FIELD,
32 : JM_EXPECT_THIS_FILE_VALUE,
33 : JM_EXPECT_WAL_RANGES_START,
34 : JM_EXPECT_WAL_RANGES_NEXT,
35 : JM_EXPECT_THIS_WAL_RANGE_FIELD,
36 : JM_EXPECT_THIS_WAL_RANGE_VALUE,
37 : JM_EXPECT_MANIFEST_CHECKSUM_VALUE,
38 : JM_EXPECT_EOF,
39 : } JsonManifestSemanticState;
40 :
41 : /*
42 : * Possible fields for one file as described by the manifest.
43 : */
44 : typedef enum
45 : {
46 : JMFF_PATH,
47 : JMFF_ENCODED_PATH,
48 : JMFF_SIZE,
49 : JMFF_LAST_MODIFIED,
50 : JMFF_CHECKSUM_ALGORITHM,
51 : JMFF_CHECKSUM,
52 : } JsonManifestFileField;
53 :
54 : /*
55 : * Possible fields for one file as described by the manifest.
56 : */
57 : typedef enum
58 : {
59 : JMWRF_TIMELINE,
60 : JMWRF_START_LSN,
61 : JMWRF_END_LSN,
62 : } JsonManifestWALRangeField;
63 :
64 : /*
65 : * Internal state used while decoding the JSON-format backup manifest.
66 : */
67 : typedef struct
68 : {
69 : JsonManifestParseContext *context;
70 : JsonManifestSemanticState state;
71 :
72 : /* These fields are used for parsing objects in the list of files. */
73 : JsonManifestFileField file_field;
74 : char *pathname;
75 : char *encoded_pathname;
76 : char *size;
77 : char *algorithm;
78 : pg_checksum_type checksum_algorithm;
79 : char *checksum;
80 :
81 : /* These fields are used for parsing objects in the list of WAL ranges. */
82 : JsonManifestWALRangeField wal_range_field;
83 : char *timeline;
84 : char *start_lsn;
85 : char *end_lsn;
86 :
87 : /* Miscellaneous other stuff. */
88 : bool saw_version_field;
89 : char *manifest_version;
90 : char *manifest_system_identifier;
91 : char *manifest_checksum;
92 : } JsonManifestParseState;
93 :
94 : /* typedef appears in parse_manifest.h */
95 : struct JsonManifestParseIncrementalState
96 : {
97 : JsonLexContext lex;
98 : JsonSemAction sem;
99 : pg_cryptohash_ctx *manifest_ctx;
100 : };
101 :
102 : static JsonParseErrorType json_manifest_object_start(void *state);
103 : static JsonParseErrorType json_manifest_object_end(void *state);
104 : static JsonParseErrorType json_manifest_array_start(void *state);
105 : static JsonParseErrorType json_manifest_array_end(void *state);
106 : static JsonParseErrorType json_manifest_object_field_start(void *state, char *fname,
107 : bool isnull);
108 : static JsonParseErrorType json_manifest_scalar(void *state, char *token,
109 : JsonTokenType tokentype);
110 : static void json_manifest_finalize_version(JsonManifestParseState *parse);
111 : static void json_manifest_finalize_system_identifier(JsonManifestParseState *parse);
112 : static void json_manifest_finalize_file(JsonManifestParseState *parse);
113 : static void json_manifest_finalize_wal_range(JsonManifestParseState *parse);
114 : static void verify_manifest_checksum(JsonManifestParseState *parse,
115 : const char *buffer, size_t size,
116 : pg_cryptohash_ctx *incr_ctx);
117 : static void json_manifest_parse_failure(JsonManifestParseContext *context,
118 : char *msg);
119 :
120 : static int hexdecode_char(char c);
121 : static bool hexdecode_string(uint8 *result, char *input, int nbytes);
122 : static bool parse_xlogrecptr(XLogRecPtr *result, char *input);
123 :
124 : /*
125 : * Set up for incremental parsing of the manifest.
126 : */
127 :
128 : JsonManifestParseIncrementalState *
129 226 : json_parse_manifest_incremental_init(JsonManifestParseContext *context)
130 : {
131 : JsonManifestParseIncrementalState *incstate;
132 : JsonManifestParseState *parse;
133 : pg_cryptohash_ctx *manifest_ctx;
134 :
135 226 : incstate = palloc(sizeof(JsonManifestParseIncrementalState));
136 226 : parse = palloc(sizeof(JsonManifestParseState));
137 :
138 226 : parse->context = context;
139 226 : parse->state = JM_EXPECT_TOPLEVEL_START;
140 226 : parse->saw_version_field = false;
141 :
142 226 : makeJsonLexContextIncremental(&(incstate->lex), PG_UTF8, true);
143 :
144 226 : incstate->sem.semstate = parse;
145 226 : incstate->sem.object_start = json_manifest_object_start;
146 226 : incstate->sem.object_end = json_manifest_object_end;
147 226 : incstate->sem.array_start = json_manifest_array_start;
148 226 : incstate->sem.array_end = json_manifest_array_end;
149 226 : incstate->sem.object_field_start = json_manifest_object_field_start;
150 226 : incstate->sem.object_field_end = NULL;
151 226 : incstate->sem.array_element_start = NULL;
152 226 : incstate->sem.array_element_end = NULL;
153 226 : incstate->sem.scalar = json_manifest_scalar;
154 :
155 226 : manifest_ctx = pg_cryptohash_create(PG_SHA256);
156 226 : if (manifest_ctx == NULL)
157 0 : context->error_cb(context, "out of memory");
158 226 : if (pg_cryptohash_init(manifest_ctx) < 0)
159 0 : context->error_cb(context, "could not initialize checksum of manifest");
160 226 : incstate->manifest_ctx = manifest_ctx;
161 :
162 226 : return incstate;
163 : }
164 :
165 : /*
166 : * Free an incremental state object and its contents.
167 : */
168 : void
169 220 : json_parse_manifest_incremental_shutdown(JsonManifestParseIncrementalState *incstate)
170 : {
171 220 : pfree(incstate->sem.semstate);
172 220 : freeJsonLexContext(&(incstate->lex));
173 : /* incstate->manifest_ctx has already been freed */
174 220 : pfree(incstate);
175 220 : }
176 :
177 : /*
178 : * parse the manifest in pieces.
179 : *
180 : * The caller must ensure that the final piece contains the final lines
181 : * with the complete checksum.
182 : */
183 :
184 : void
185 450 : json_parse_manifest_incremental_chunk(JsonManifestParseIncrementalState *incstate,
186 : const char *chunk, size_t size, bool is_last)
187 : {
188 : JsonParseErrorType res,
189 : expected;
190 450 : JsonManifestParseState *parse = incstate->sem.semstate;
191 450 : JsonManifestParseContext *context = parse->context;
192 :
193 450 : res = pg_parse_json_incremental(&(incstate->lex), &(incstate->sem),
194 : chunk, size, is_last);
195 :
196 448 : expected = is_last ? JSON_SUCCESS : JSON_INCOMPLETE;
197 :
198 448 : if (res != expected)
199 0 : json_manifest_parse_failure(context,
200 : json_errdetail(res, &(incstate->lex)));
201 :
202 448 : if (is_last && parse->state != JM_EXPECT_EOF)
203 0 : json_manifest_parse_failure(context, "manifest ended unexpectedly");
204 :
205 448 : if (!is_last)
206 : {
207 224 : if (pg_cryptohash_update(incstate->manifest_ctx,
208 : (const uint8 *) chunk, size) < 0)
209 0 : context->error_cb(context, "could not update checksum of manifest");
210 : }
211 : else
212 : {
213 224 : verify_manifest_checksum(parse, chunk, size, incstate->manifest_ctx);
214 : }
215 444 : }
216 :
217 :
218 : /*
219 : * Main entrypoint to parse a JSON-format backup manifest.
220 : *
221 : * Caller should set up the parsing context and then invoke this function.
222 : * For each file whose information is extracted from the manifest,
223 : * context->per_file_cb is invoked. In case of trouble, context->error_cb is
224 : * invoked and is expected not to return.
225 : */
226 : void
227 66 : json_parse_manifest(JsonManifestParseContext *context, const char *buffer,
228 : size_t size)
229 : {
230 : JsonLexContext *lex;
231 : JsonParseErrorType json_error;
232 : JsonSemAction sem;
233 : JsonManifestParseState parse;
234 :
235 : /* Set up our private parsing context. */
236 66 : parse.context = context;
237 66 : parse.state = JM_EXPECT_TOPLEVEL_START;
238 66 : parse.saw_version_field = false;
239 :
240 : /* Create a JSON lexing context. */
241 66 : lex = makeJsonLexContextCstringLen(NULL, buffer, size, PG_UTF8, true);
242 :
243 : /* Set up semantic actions. */
244 66 : sem.semstate = &parse;
245 66 : sem.object_start = json_manifest_object_start;
246 66 : sem.object_end = json_manifest_object_end;
247 66 : sem.array_start = json_manifest_array_start;
248 66 : sem.array_end = json_manifest_array_end;
249 66 : sem.object_field_start = json_manifest_object_field_start;
250 66 : sem.object_field_end = NULL;
251 66 : sem.array_element_start = NULL;
252 66 : sem.array_element_end = NULL;
253 66 : sem.scalar = json_manifest_scalar;
254 :
255 : /* Run the actual JSON parser. */
256 66 : json_error = pg_parse_json(lex, &sem);
257 14 : if (json_error != JSON_SUCCESS)
258 2 : json_manifest_parse_failure(context, json_errdetail(json_error, lex));
259 12 : if (parse.state != JM_EXPECT_EOF)
260 0 : json_manifest_parse_failure(context, "manifest ended unexpectedly");
261 :
262 : /* Verify the manifest checksum. */
263 12 : verify_manifest_checksum(&parse, buffer, size, NULL);
264 :
265 6 : freeJsonLexContext(lex);
266 6 : }
267 :
268 : /*
269 : * Invoked at the start of each object in the JSON document.
270 : *
271 : * The document as a whole is expected to be an object; each file and each
272 : * WAL range is also expected to be an object. If we're anywhere else in the
273 : * document, it's an error.
274 : */
275 : static JsonParseErrorType
276 227730 : json_manifest_object_start(void *state)
277 : {
278 227730 : JsonManifestParseState *parse = state;
279 :
280 227730 : switch (parse->state)
281 : {
282 290 : case JM_EXPECT_TOPLEVEL_START:
283 290 : parse->state = JM_EXPECT_TOPLEVEL_FIELD;
284 290 : break;
285 227192 : case JM_EXPECT_FILES_NEXT:
286 227192 : parse->state = JM_EXPECT_THIS_FILE_FIELD;
287 227192 : parse->pathname = NULL;
288 227192 : parse->encoded_pathname = NULL;
289 227192 : parse->size = NULL;
290 227192 : parse->algorithm = NULL;
291 227192 : parse->checksum = NULL;
292 227192 : break;
293 246 : case JM_EXPECT_WAL_RANGES_NEXT:
294 246 : parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
295 246 : parse->timeline = NULL;
296 246 : parse->start_lsn = NULL;
297 246 : parse->end_lsn = NULL;
298 246 : break;
299 2 : default:
300 2 : json_manifest_parse_failure(parse->context,
301 : "unexpected object start");
302 0 : break;
303 : }
304 :
305 227728 : return JSON_SUCCESS;
306 : }
307 :
308 : /*
309 : * Invoked at the end of each object in the JSON document.
310 : *
311 : * The possible cases here are the same as for json_manifest_object_start.
312 : * There's nothing special to do at the end of the document, but when we
313 : * reach the end of an object representing a particular file or WAL range,
314 : * we must call json_manifest_finalize_file() to save the associated details.
315 : */
316 : static JsonParseErrorType
317 227674 : json_manifest_object_end(void *state)
318 : {
319 227674 : JsonManifestParseState *parse = state;
320 :
321 227674 : switch (parse->state)
322 : {
323 236 : case JM_EXPECT_TOPLEVEL_END:
324 236 : parse->state = JM_EXPECT_EOF;
325 236 : break;
326 227190 : case JM_EXPECT_THIS_FILE_FIELD:
327 227190 : json_manifest_finalize_file(parse);
328 227172 : parse->state = JM_EXPECT_FILES_NEXT;
329 227172 : break;
330 244 : case JM_EXPECT_THIS_WAL_RANGE_FIELD:
331 244 : json_manifest_finalize_wal_range(parse);
332 232 : parse->state = JM_EXPECT_WAL_RANGES_NEXT;
333 232 : break;
334 4 : default:
335 4 : json_manifest_parse_failure(parse->context,
336 : "unexpected object end");
337 0 : break;
338 : }
339 :
340 227640 : return JSON_SUCCESS;
341 : }
342 :
343 : /*
344 : * Invoked at the start of each array in the JSON document.
345 : *
346 : * Within the toplevel object, the value associated with the "Files" key
347 : * should be an array. Similarly for the "WAL-Ranges" key. No other arrays
348 : * are expected.
349 : */
350 : static JsonParseErrorType
351 504 : json_manifest_array_start(void *state)
352 : {
353 504 : JsonManifestParseState *parse = state;
354 :
355 504 : switch (parse->state)
356 : {
357 256 : case JM_EXPECT_FILES_START:
358 256 : parse->state = JM_EXPECT_FILES_NEXT;
359 256 : break;
360 246 : case JM_EXPECT_WAL_RANGES_START:
361 246 : parse->state = JM_EXPECT_WAL_RANGES_NEXT;
362 246 : break;
363 2 : default:
364 2 : json_manifest_parse_failure(parse->context,
365 : "unexpected array start");
366 0 : break;
367 : }
368 :
369 502 : return JSON_SUCCESS;
370 : }
371 :
372 : /*
373 : * Invoked at the end of each array in the JSON document.
374 : *
375 : * The cases here are analogous to those in json_manifest_array_start.
376 : */
377 : static JsonParseErrorType
378 468 : json_manifest_array_end(void *state)
379 : {
380 468 : JsonManifestParseState *parse = state;
381 :
382 468 : switch (parse->state)
383 : {
384 468 : case JM_EXPECT_FILES_NEXT:
385 : case JM_EXPECT_WAL_RANGES_NEXT:
386 468 : parse->state = JM_EXPECT_TOPLEVEL_FIELD;
387 468 : break;
388 0 : default:
389 0 : json_manifest_parse_failure(parse->context,
390 : "unexpected array end");
391 0 : break;
392 : }
393 :
394 468 : return JSON_SUCCESS;
395 : }
396 :
397 : /*
398 : * Invoked at the start of each object field in the JSON document.
399 : */
400 : static JsonParseErrorType
401 1126224 : json_manifest_object_field_start(void *state, char *fname, bool isnull)
402 : {
403 1126224 : JsonManifestParseState *parse = state;
404 :
405 1126224 : switch (parse->state)
406 : {
407 1262 : case JM_EXPECT_TOPLEVEL_FIELD:
408 :
409 : /*
410 : * Inside toplevel object. The version indicator should always be
411 : * the first field.
412 : */
413 1262 : if (!parse->saw_version_field)
414 : {
415 286 : if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0)
416 2 : json_manifest_parse_failure(parse->context,
417 : "expected version indicator");
418 284 : parse->state = JM_EXPECT_VERSION_VALUE;
419 284 : parse->saw_version_field = true;
420 284 : break;
421 : }
422 :
423 : /* Is this the system identifier? */
424 976 : if (strcmp(fname, "System-Identifier") == 0)
425 : {
426 232 : parse->state = JM_EXPECT_SYSTEM_IDENTIFIER_VALUE;
427 232 : break;
428 : }
429 :
430 : /* Is this the list of files? */
431 744 : if (strcmp(fname, "Files") == 0)
432 : {
433 260 : parse->state = JM_EXPECT_FILES_START;
434 260 : break;
435 : }
436 :
437 : /* Is this the list of WAL ranges? */
438 484 : if (strcmp(fname, "WAL-Ranges") == 0)
439 : {
440 246 : parse->state = JM_EXPECT_WAL_RANGES_START;
441 246 : break;
442 : }
443 :
444 : /* Is this the manifest checksum? */
445 238 : if (strcmp(fname, "Manifest-Checksum") == 0)
446 : {
447 236 : parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE;
448 236 : break;
449 : }
450 :
451 : /* It's not a field we recognize. */
452 2 : json_manifest_parse_failure(parse->context,
453 : "unrecognized top-level field");
454 0 : break;
455 :
456 1124240 : case JM_EXPECT_THIS_FILE_FIELD:
457 : /* Inside object for one file; which key have we got? */
458 1124240 : if (strcmp(fname, "Path") == 0)
459 225246 : parse->file_field = JMFF_PATH;
460 898994 : else if (strcmp(fname, "Encoded-Path") == 0)
461 1944 : parse->file_field = JMFF_ENCODED_PATH;
462 897050 : else if (strcmp(fname, "Size") == 0)
463 227184 : parse->file_field = JMFF_SIZE;
464 669866 : else if (strcmp(fname, "Last-Modified") == 0)
465 227170 : parse->file_field = JMFF_LAST_MODIFIED;
466 442696 : else if (strcmp(fname, "Checksum-Algorithm") == 0)
467 221346 : parse->file_field = JMFF_CHECKSUM_ALGORITHM;
468 221350 : else if (strcmp(fname, "Checksum") == 0)
469 221348 : parse->file_field = JMFF_CHECKSUM;
470 : else
471 2 : json_manifest_parse_failure(parse->context,
472 : "unexpected file field");
473 1124238 : parse->state = JM_EXPECT_THIS_FILE_VALUE;
474 1124238 : break;
475 :
476 722 : case JM_EXPECT_THIS_WAL_RANGE_FIELD:
477 : /* Inside object for one file; which key have we got? */
478 722 : if (strcmp(fname, "Timeline") == 0)
479 242 : parse->wal_range_field = JMWRF_TIMELINE;
480 480 : else if (strcmp(fname, "Start-LSN") == 0)
481 240 : parse->wal_range_field = JMWRF_START_LSN;
482 240 : else if (strcmp(fname, "End-LSN") == 0)
483 238 : parse->wal_range_field = JMWRF_END_LSN;
484 : else
485 2 : json_manifest_parse_failure(parse->context,
486 : "unexpected WAL range field");
487 720 : parse->state = JM_EXPECT_THIS_WAL_RANGE_VALUE;
488 720 : break;
489 :
490 0 : default:
491 0 : json_manifest_parse_failure(parse->context,
492 : "unexpected object field");
493 0 : break;
494 : }
495 :
496 1126216 : pfree(fname);
497 :
498 1126216 : return JSON_SUCCESS;
499 : }
500 :
501 : /*
502 : * Invoked at the start of each scalar in the JSON document.
503 : *
504 : * Object field names don't reach this code; those are handled by
505 : * json_manifest_object_field_start. When we're inside of the object for
506 : * a particular file or WAL range, that function will have noticed the name
507 : * of the field, and we'll get the corresponding value here. When we're in
508 : * the toplevel object, the parse state itself tells us which field this is.
509 : *
510 : * In all cases except for PostgreSQL-Backup-Manifest-Version, which we
511 : * can just check on the spot, the goal here is just to save the value in
512 : * the parse state for later use. We don't actually do anything until we
513 : * reach either the end of the object representing this file, or the end
514 : * of the manifest, as the case may be.
515 : */
516 : static JsonParseErrorType
517 1125712 : json_manifest_scalar(void *state, char *token, JsonTokenType tokentype)
518 : {
519 1125712 : JsonManifestParseState *parse = state;
520 :
521 1125712 : switch (parse->state)
522 : {
523 284 : case JM_EXPECT_VERSION_VALUE:
524 284 : parse->manifest_version = token;
525 284 : json_manifest_finalize_version(parse);
526 280 : parse->state = JM_EXPECT_TOPLEVEL_FIELD;
527 280 : break;
528 :
529 232 : case JM_EXPECT_SYSTEM_IDENTIFIER_VALUE:
530 232 : parse->manifest_system_identifier = token;
531 232 : json_manifest_finalize_system_identifier(parse);
532 230 : parse->state = JM_EXPECT_TOPLEVEL_FIELD;
533 230 : break;
534 :
535 1124238 : case JM_EXPECT_THIS_FILE_VALUE:
536 1124238 : switch (parse->file_field)
537 : {
538 225246 : case JMFF_PATH:
539 225246 : parse->pathname = token;
540 225246 : break;
541 1944 : case JMFF_ENCODED_PATH:
542 1944 : parse->encoded_pathname = token;
543 1944 : break;
544 227184 : case JMFF_SIZE:
545 227184 : parse->size = token;
546 227184 : break;
547 227170 : case JMFF_LAST_MODIFIED:
548 227170 : pfree(token); /* unused */
549 227170 : break;
550 221346 : case JMFF_CHECKSUM_ALGORITHM:
551 221346 : parse->algorithm = token;
552 221346 : break;
553 221348 : case JMFF_CHECKSUM:
554 221348 : parse->checksum = token;
555 221348 : break;
556 : }
557 1124238 : parse->state = JM_EXPECT_THIS_FILE_FIELD;
558 1124238 : break;
559 :
560 720 : case JM_EXPECT_THIS_WAL_RANGE_VALUE:
561 720 : switch (parse->wal_range_field)
562 : {
563 242 : case JMWRF_TIMELINE:
564 242 : parse->timeline = token;
565 242 : break;
566 240 : case JMWRF_START_LSN:
567 240 : parse->start_lsn = token;
568 240 : break;
569 238 : case JMWRF_END_LSN:
570 238 : parse->end_lsn = token;
571 238 : break;
572 : }
573 720 : parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
574 720 : break;
575 :
576 236 : case JM_EXPECT_MANIFEST_CHECKSUM_VALUE:
577 236 : parse->state = JM_EXPECT_TOPLEVEL_END;
578 236 : parse->manifest_checksum = token;
579 236 : break;
580 :
581 2 : default:
582 2 : json_manifest_parse_failure(parse->context, "unexpected scalar");
583 0 : break;
584 : }
585 :
586 1125704 : return JSON_SUCCESS;
587 : }
588 :
589 : /*
590 : * Do additional parsing and sanity-checking of the manifest version, and invoke
591 : * the callback so that the caller can gets that detail and take actions
592 : * accordingly. This happens for each manifest when the corresponding JSON
593 : * object is completely parsed.
594 : */
595 : static void
596 284 : json_manifest_finalize_version(JsonManifestParseState *parse)
597 : {
598 284 : JsonManifestParseContext *context = parse->context;
599 : int version;
600 : char *ep;
601 :
602 : Assert(parse->saw_version_field);
603 :
604 : /* Parse version. */
605 284 : version = strtoi64(parse->manifest_version, &ep, 10);
606 284 : if (*ep)
607 2 : json_manifest_parse_failure(parse->context,
608 : "manifest version not an integer");
609 :
610 282 : if (version != 1 && version != 2)
611 2 : json_manifest_parse_failure(parse->context,
612 : "unexpected manifest version");
613 :
614 : /* Invoke the callback for version */
615 280 : context->version_cb(context, version);
616 280 : }
617 :
618 : /*
619 : * Do additional parsing and sanity-checking of the system identifier, and
620 : * invoke the callback so that the caller can gets that detail and take actions
621 : * accordingly.
622 : */
623 : static void
624 232 : json_manifest_finalize_system_identifier(JsonManifestParseState *parse)
625 : {
626 232 : JsonManifestParseContext *context = parse->context;
627 : uint64 system_identifier;
628 : char *ep;
629 :
630 : Assert(parse->manifest_system_identifier != NULL);
631 :
632 : /* Parse system identifier. */
633 232 : system_identifier = strtou64(parse->manifest_system_identifier, &ep, 10);
634 232 : if (*ep)
635 0 : json_manifest_parse_failure(parse->context,
636 : "system identifier in manifest not an integer");
637 :
638 : /* Invoke the callback for system identifier */
639 232 : context->system_identifier_cb(context, system_identifier);
640 230 : }
641 :
642 : /*
643 : * Do additional parsing and sanity-checking of the details gathered for one
644 : * file, and invoke the per-file callback so that the caller gets those
645 : * details. This happens for each file when the corresponding JSON object is
646 : * completely parsed.
647 : */
648 : static void
649 227190 : json_manifest_finalize_file(JsonManifestParseState *parse)
650 : {
651 227190 : JsonManifestParseContext *context = parse->context;
652 : uint64 size;
653 : char *ep;
654 : int checksum_string_length;
655 : pg_checksum_type checksum_type;
656 : int checksum_length;
657 : uint8 *checksum_payload;
658 :
659 : /* Pathname and size are required. */
660 227190 : if (parse->pathname == NULL && parse->encoded_pathname == NULL)
661 2 : json_manifest_parse_failure(parse->context, "missing path name");
662 227188 : if (parse->pathname != NULL && parse->encoded_pathname != NULL)
663 2 : json_manifest_parse_failure(parse->context,
664 : "both path name and encoded path name");
665 227186 : if (parse->size == NULL)
666 2 : json_manifest_parse_failure(parse->context, "missing size");
667 227184 : if (parse->algorithm == NULL && parse->checksum != NULL)
668 2 : json_manifest_parse_failure(parse->context,
669 : "checksum without algorithm");
670 :
671 : /* Decode encoded pathname, if that's what we have. */
672 227182 : if (parse->encoded_pathname != NULL)
673 : {
674 1942 : int encoded_length = strlen(parse->encoded_pathname);
675 1942 : int raw_length = encoded_length / 2;
676 :
677 1942 : parse->pathname = palloc(raw_length + 1);
678 1942 : if (encoded_length % 2 != 0 ||
679 1940 : !hexdecode_string((uint8 *) parse->pathname,
680 : parse->encoded_pathname,
681 : raw_length))
682 2 : json_manifest_parse_failure(parse->context,
683 : "could not decode file name");
684 1940 : parse->pathname[raw_length] = '\0';
685 1940 : pfree(parse->encoded_pathname);
686 1940 : parse->encoded_pathname = NULL;
687 : }
688 :
689 : /* Parse size. */
690 227180 : size = strtou64(parse->size, &ep, 10);
691 227180 : if (*ep)
692 2 : json_manifest_parse_failure(parse->context,
693 : "file size is not an integer");
694 :
695 : /* Parse the checksum algorithm, if it's present. */
696 227178 : if (parse->algorithm == NULL)
697 5832 : checksum_type = CHECKSUM_TYPE_NONE;
698 221346 : else if (!pg_checksum_parse_type(parse->algorithm, &checksum_type))
699 2 : context->error_cb(context, "unrecognized checksum algorithm: \"%s\"",
700 : parse->algorithm);
701 :
702 : /* Parse the checksum payload, if it's present. */
703 227176 : checksum_string_length = parse->checksum == NULL ? 0
704 221344 : : strlen(parse->checksum);
705 227176 : if (checksum_string_length == 0)
706 : {
707 5832 : checksum_length = 0;
708 5832 : checksum_payload = NULL;
709 : }
710 : else
711 : {
712 221344 : checksum_length = checksum_string_length / 2;
713 221344 : checksum_payload = palloc(checksum_length);
714 221344 : if (checksum_string_length % 2 != 0 ||
715 221342 : !hexdecode_string(checksum_payload, parse->checksum,
716 : checksum_length))
717 2 : context->error_cb(context,
718 : "invalid checksum for file \"%s\": \"%s\"",
719 : parse->pathname, parse->checksum);
720 : }
721 :
722 : /* Invoke the callback with the details we've gathered. */
723 227174 : context->per_file_cb(context, parse->pathname, size,
724 : checksum_type, checksum_length, checksum_payload);
725 :
726 : /* Free memory we no longer need. */
727 227172 : if (parse->size != NULL)
728 : {
729 227172 : pfree(parse->size);
730 227172 : parse->size = NULL;
731 : }
732 227172 : if (parse->algorithm != NULL)
733 : {
734 221342 : pfree(parse->algorithm);
735 221342 : parse->algorithm = NULL;
736 : }
737 227172 : if (parse->checksum != NULL)
738 : {
739 221342 : pfree(parse->checksum);
740 221342 : parse->checksum = NULL;
741 : }
742 227172 : }
743 :
744 : /*
745 : * Do additional parsing and sanity-checking of the details gathered for one
746 : * WAL range, and invoke the per-WAL-range callback so that the caller gets
747 : * those details. This happens for each WAL range when the corresponding JSON
748 : * object is completely parsed.
749 : */
750 : static void
751 244 : json_manifest_finalize_wal_range(JsonManifestParseState *parse)
752 : {
753 244 : JsonManifestParseContext *context = parse->context;
754 : TimeLineID tli;
755 : XLogRecPtr start_lsn,
756 : end_lsn;
757 : char *ep;
758 :
759 : /* Make sure all fields are present. */
760 244 : if (parse->timeline == NULL)
761 2 : json_manifest_parse_failure(parse->context, "missing timeline");
762 242 : if (parse->start_lsn == NULL)
763 2 : json_manifest_parse_failure(parse->context, "missing start LSN");
764 240 : if (parse->end_lsn == NULL)
765 2 : json_manifest_parse_failure(parse->context, "missing end LSN");
766 :
767 : /* Parse timeline. */
768 238 : tli = strtoul(parse->timeline, &ep, 10);
769 238 : if (*ep)
770 2 : json_manifest_parse_failure(parse->context,
771 : "timeline is not an integer");
772 236 : if (!parse_xlogrecptr(&start_lsn, parse->start_lsn))
773 2 : json_manifest_parse_failure(parse->context,
774 : "could not parse start LSN");
775 234 : if (!parse_xlogrecptr(&end_lsn, parse->end_lsn))
776 2 : json_manifest_parse_failure(parse->context,
777 : "could not parse end LSN");
778 :
779 : /* Invoke the callback with the details we've gathered. */
780 232 : context->per_wal_range_cb(context, tli, start_lsn, end_lsn);
781 :
782 : /* Free memory we no longer need. */
783 232 : if (parse->timeline != NULL)
784 : {
785 232 : pfree(parse->timeline);
786 232 : parse->timeline = NULL;
787 : }
788 232 : if (parse->start_lsn != NULL)
789 : {
790 232 : pfree(parse->start_lsn);
791 232 : parse->start_lsn = NULL;
792 : }
793 232 : if (parse->end_lsn != NULL)
794 : {
795 232 : pfree(parse->end_lsn);
796 232 : parse->end_lsn = NULL;
797 : }
798 232 : }
799 :
800 : /*
801 : * Verify that the manifest checksum is correct.
802 : *
803 : * The last line of the manifest file is excluded from the manifest checksum,
804 : * because the last line is expected to contain the checksum that covers
805 : * the rest of the file.
806 : *
807 : * For an incremental parse, this will just be called on the last chunk of the
808 : * manifest, and the cryptohash context passed in. For a non-incremental
809 : * parse incr_ctx will be NULL.
810 : */
811 : static void
812 236 : verify_manifest_checksum(JsonManifestParseState *parse, const char *buffer,
813 : size_t size, pg_cryptohash_ctx *incr_ctx)
814 : {
815 236 : JsonManifestParseContext *context = parse->context;
816 : size_t i;
817 236 : size_t number_of_newlines = 0;
818 236 : size_t ultimate_newline = 0;
819 236 : size_t penultimate_newline = 0;
820 : pg_cryptohash_ctx *manifest_ctx;
821 : uint8 manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH];
822 : uint8 manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH];
823 :
824 : /* Find the last two newlines in the file. */
825 15913098 : for (i = 0; i < size; ++i)
826 : {
827 15912862 : if (buffer[i] == '\n')
828 : {
829 109932 : ++number_of_newlines;
830 109932 : penultimate_newline = ultimate_newline;
831 109932 : ultimate_newline = i;
832 : }
833 : }
834 :
835 : /*
836 : * Make sure that the last newline is right at the end, and that there are
837 : * at least two lines total. We need this to be true in order for the
838 : * following code, which computes the manifest checksum, to work properly.
839 : */
840 236 : if (number_of_newlines < 2)
841 2 : json_manifest_parse_failure(parse->context,
842 : "expected at least 2 lines");
843 234 : if (ultimate_newline != size - 1)
844 2 : json_manifest_parse_failure(parse->context,
845 : "last line not newline-terminated");
846 :
847 : /* Checksum the rest. */
848 232 : if (incr_ctx == NULL)
849 : {
850 8 : manifest_ctx = pg_cryptohash_create(PG_SHA256);
851 8 : if (manifest_ctx == NULL)
852 0 : context->error_cb(context, "out of memory");
853 8 : if (pg_cryptohash_init(manifest_ctx) < 0)
854 0 : context->error_cb(context, "could not initialize checksum of manifest");
855 : }
856 : else
857 : {
858 224 : manifest_ctx = incr_ctx;
859 : }
860 232 : if (pg_cryptohash_update(manifest_ctx, (const uint8 *) buffer, penultimate_newline + 1) < 0)
861 0 : context->error_cb(context, "could not update checksum of manifest");
862 232 : if (pg_cryptohash_final(manifest_ctx, manifest_checksum_actual,
863 : sizeof(manifest_checksum_actual)) < 0)
864 0 : context->error_cb(context, "could not finalize checksum of manifest");
865 :
866 : /* Now verify it. */
867 232 : if (parse->manifest_checksum == NULL)
868 0 : context->error_cb(parse->context, "manifest has no checksum");
869 232 : if (strlen(parse->manifest_checksum) != PG_SHA256_DIGEST_LENGTH * 2 ||
870 232 : !hexdecode_string(manifest_checksum_expected, parse->manifest_checksum,
871 : PG_SHA256_DIGEST_LENGTH))
872 2 : context->error_cb(context, "invalid manifest checksum: \"%s\"",
873 : parse->manifest_checksum);
874 230 : if (memcmp(manifest_checksum_actual, manifest_checksum_expected,
875 : PG_SHA256_DIGEST_LENGTH) != 0)
876 4 : context->error_cb(context, "manifest checksum mismatch");
877 226 : pg_cryptohash_free(manifest_ctx);
878 226 : }
879 :
880 : /*
881 : * Report a parse error.
882 : *
883 : * This is intended to be used for fairly low-level failures that probably
884 : * shouldn't occur unless somebody has deliberately constructed a bad manifest,
885 : * or unless the server is generating bad manifests due to some bug. msg should
886 : * be a short string giving some hint as to what the problem is.
887 : */
888 : static void
889 52 : json_manifest_parse_failure(JsonManifestParseContext *context, char *msg)
890 : {
891 52 : context->error_cb(context, "could not parse backup manifest: %s", msg);
892 : }
893 :
894 : /*
895 : * Convert a character which represents a hexadecimal digit to an integer.
896 : *
897 : * Returns -1 if the character is not a hexadecimal digit.
898 : */
899 : static int
900 3136368 : hexdecode_char(char c)
901 : {
902 3136368 : if (c >= '0' && c <= '9')
903 2074214 : return c - '0';
904 1062154 : if (c >= 'a' && c <= 'f')
905 1062138 : return c - 'a' + 10;
906 16 : if (c >= 'A' && c <= 'F')
907 12 : return c - 'A' + 10;
908 :
909 4 : return -1;
910 : }
911 :
912 : /*
913 : * Decode a hex string into a byte string, 2 hex chars per byte.
914 : *
915 : * Returns false if invalid characters are encountered; otherwise true.
916 : */
917 : static bool
918 223514 : hexdecode_string(uint8 *result, char *input, int nbytes)
919 : {
920 : int i;
921 :
922 1791696 : for (i = 0; i < nbytes; ++i)
923 : {
924 1568184 : int n1 = hexdecode_char(input[i * 2]);
925 1568184 : int n2 = hexdecode_char(input[i * 2 + 1]);
926 :
927 1568184 : if (n1 < 0 || n2 < 0)
928 2 : return false;
929 1568182 : result[i] = n1 * 16 + n2;
930 : }
931 :
932 223512 : return true;
933 : }
934 :
935 : /*
936 : * Parse an XLogRecPtr expressed using the usual string format.
937 : */
938 : static bool
939 470 : parse_xlogrecptr(XLogRecPtr *result, char *input)
940 : {
941 : uint32 hi;
942 : uint32 lo;
943 :
944 470 : if (sscanf(input, "%X/%X", &hi, &lo) != 2)
945 4 : return false;
946 466 : *result = ((uint64) hi) << 32 | lo;
947 466 : return true;
948 : }
|