Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * parse_manifest.c
4 : * Parse a backup manifest in JSON format.
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * src/common/parse_manifest.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 :
14 : #include "postgres_fe.h"
15 :
16 : #include "common/jsonapi.h"
17 : #include "common/parse_manifest.h"
18 :
19 : /*
20 : * Semantic states for JSON manifest parsing.
21 : */
22 : typedef enum
23 : {
24 : JM_EXPECT_TOPLEVEL_START,
25 : JM_EXPECT_TOPLEVEL_END,
26 : JM_EXPECT_TOPLEVEL_FIELD,
27 : JM_EXPECT_VERSION_VALUE,
28 : JM_EXPECT_SYSTEM_IDENTIFIER_VALUE,
29 : JM_EXPECT_FILES_START,
30 : JM_EXPECT_FILES_NEXT,
31 : JM_EXPECT_THIS_FILE_FIELD,
32 : JM_EXPECT_THIS_FILE_VALUE,
33 : JM_EXPECT_WAL_RANGES_START,
34 : JM_EXPECT_WAL_RANGES_NEXT,
35 : JM_EXPECT_THIS_WAL_RANGE_FIELD,
36 : JM_EXPECT_THIS_WAL_RANGE_VALUE,
37 : JM_EXPECT_MANIFEST_CHECKSUM_VALUE,
38 : JM_EXPECT_EOF,
39 : } JsonManifestSemanticState;
40 :
41 : /*
42 : * Possible fields for one file as described by the manifest.
43 : */
44 : typedef enum
45 : {
46 : JMFF_PATH,
47 : JMFF_ENCODED_PATH,
48 : JMFF_SIZE,
49 : JMFF_LAST_MODIFIED,
50 : JMFF_CHECKSUM_ALGORITHM,
51 : JMFF_CHECKSUM,
52 : } JsonManifestFileField;
53 :
54 : /*
55 : * Possible fields for one file as described by the manifest.
56 : */
57 : typedef enum
58 : {
59 : JMWRF_TIMELINE,
60 : JMWRF_START_LSN,
61 : JMWRF_END_LSN,
62 : } JsonManifestWALRangeField;
63 :
64 : /*
65 : * Internal state used while decoding the JSON-format backup manifest.
66 : */
67 : typedef struct
68 : {
69 : JsonManifestParseContext *context;
70 : JsonManifestSemanticState state;
71 :
72 : /* These fields are used for parsing objects in the list of files. */
73 : JsonManifestFileField file_field;
74 : char *pathname;
75 : char *encoded_pathname;
76 : char *size;
77 : char *algorithm;
78 : pg_checksum_type checksum_algorithm;
79 : char *checksum;
80 :
81 : /* These fields are used for parsing objects in the list of WAL ranges. */
82 : JsonManifestWALRangeField wal_range_field;
83 : char *timeline;
84 : char *start_lsn;
85 : char *end_lsn;
86 :
87 : /* Miscellaneous other stuff. */
88 : bool saw_version_field;
89 : char *manifest_version;
90 : char *manifest_system_identifier;
91 : char *manifest_checksum;
92 : } JsonManifestParseState;
93 :
94 : /* typedef appears in parse_manifest.h */
95 : struct JsonManifestParseIncrementalState
96 : {
97 : JsonLexContext lex;
98 : JsonSemAction sem;
99 : pg_cryptohash_ctx *manifest_ctx;
100 : };
101 :
102 : static JsonParseErrorType json_manifest_object_start(void *state);
103 : static JsonParseErrorType json_manifest_object_end(void *state);
104 : static JsonParseErrorType json_manifest_array_start(void *state);
105 : static JsonParseErrorType json_manifest_array_end(void *state);
106 : static JsonParseErrorType json_manifest_object_field_start(void *state, char *fname,
107 : bool isnull);
108 : static JsonParseErrorType json_manifest_scalar(void *state, char *token,
109 : JsonTokenType tokentype);
110 : static void json_manifest_finalize_version(JsonManifestParseState *parse);
111 : static void json_manifest_finalize_system_identifier(JsonManifestParseState *parse);
112 : static void json_manifest_finalize_file(JsonManifestParseState *parse);
113 : static void json_manifest_finalize_wal_range(JsonManifestParseState *parse);
114 : static void verify_manifest_checksum(JsonManifestParseState *parse,
115 : const char *buffer, size_t size,
116 : pg_cryptohash_ctx *incr_ctx);
117 : static void json_manifest_parse_failure(JsonManifestParseContext *context,
118 : char *msg);
119 :
120 : static int hexdecode_char(char c);
121 : static bool hexdecode_string(uint8 *result, char *input, int nbytes);
122 : static bool parse_xlogrecptr(XLogRecPtr *result, char *input);
123 :
124 : /*
125 : * Set up for incremental parsing of the manifest.
126 : */
127 :
128 : JsonManifestParseIncrementalState *
129 226 : json_parse_manifest_incremental_init(JsonManifestParseContext *context)
130 : {
131 : JsonManifestParseIncrementalState *incstate;
132 : JsonManifestParseState *parse;
133 : pg_cryptohash_ctx *manifest_ctx;
134 :
135 226 : incstate = palloc(sizeof(JsonManifestParseIncrementalState));
136 226 : parse = palloc(sizeof(JsonManifestParseState));
137 :
138 226 : parse->context = context;
139 226 : parse->state = JM_EXPECT_TOPLEVEL_START;
140 226 : parse->saw_version_field = false;
141 :
142 226 : makeJsonLexContextIncremental(&(incstate->lex), PG_UTF8, true);
143 :
144 226 : incstate->sem.semstate = parse;
145 226 : incstate->sem.object_start = json_manifest_object_start;
146 226 : incstate->sem.object_end = json_manifest_object_end;
147 226 : incstate->sem.array_start = json_manifest_array_start;
148 226 : incstate->sem.array_end = json_manifest_array_end;
149 226 : incstate->sem.object_field_start = json_manifest_object_field_start;
150 226 : incstate->sem.object_field_end = NULL;
151 226 : incstate->sem.array_element_start = NULL;
152 226 : incstate->sem.array_element_end = NULL;
153 226 : incstate->sem.scalar = json_manifest_scalar;
154 :
155 226 : manifest_ctx = pg_cryptohash_create(PG_SHA256);
156 226 : if (manifest_ctx == NULL)
157 0 : context->error_cb(context, "out of memory");
158 226 : if (pg_cryptohash_init(manifest_ctx) < 0)
159 0 : context->error_cb(context, "could not initialize checksum of manifest");
160 226 : incstate->manifest_ctx = manifest_ctx;
161 :
162 226 : return incstate;
163 : }
164 :
165 : /*
166 : * Free an incremental state object and its contents.
167 : */
168 : void
169 220 : json_parse_manifest_incremental_shutdown(JsonManifestParseIncrementalState *incstate)
170 : {
171 220 : pfree(incstate->sem.semstate);
172 220 : freeJsonLexContext(&(incstate->lex));
173 : /* incstate->manifest_ctx has already been freed */
174 220 : pfree(incstate);
175 220 : }
176 :
177 : /*
178 : * parse the manifest in pieces.
179 : *
180 : * The caller must ensure that the final piece contains the final lines
181 : * with the complete checksum.
182 : */
183 :
184 : void
185 450 : json_parse_manifest_incremental_chunk(
186 : JsonManifestParseIncrementalState *incstate, const char *chunk, size_t size,
187 : bool is_last)
188 : {
189 : JsonParseErrorType res,
190 : expected;
191 450 : JsonManifestParseState *parse = incstate->sem.semstate;
192 450 : JsonManifestParseContext *context = parse->context;
193 :
194 450 : res = pg_parse_json_incremental(&(incstate->lex), &(incstate->sem),
195 : chunk, size, is_last);
196 :
197 448 : expected = is_last ? JSON_SUCCESS : JSON_INCOMPLETE;
198 :
199 448 : if (res != expected)
200 0 : json_manifest_parse_failure(context,
201 : json_errdetail(res, &(incstate->lex)));
202 :
203 448 : if (is_last && parse->state != JM_EXPECT_EOF)
204 0 : json_manifest_parse_failure(context, "manifest ended unexpectedly");
205 :
206 448 : if (!is_last)
207 : {
208 224 : if (pg_cryptohash_update(incstate->manifest_ctx,
209 : (const uint8 *) chunk, size) < 0)
210 0 : context->error_cb(context, "could not update checksum of manifest");
211 : }
212 : else
213 : {
214 224 : verify_manifest_checksum(parse, chunk, size, incstate->manifest_ctx);
215 : }
216 444 : }
217 :
218 :
219 : /*
220 : * Main entrypoint to parse a JSON-format backup manifest.
221 : *
222 : * Caller should set up the parsing context and then invoke this function.
223 : * For each file whose information is extracted from the manifest,
224 : * context->per_file_cb is invoked. In case of trouble, context->error_cb is
225 : * invoked and is expected not to return.
226 : */
227 : void
228 66 : json_parse_manifest(JsonManifestParseContext *context, const char *buffer,
229 : size_t size)
230 : {
231 : JsonLexContext *lex;
232 : JsonParseErrorType json_error;
233 : JsonSemAction sem;
234 : JsonManifestParseState parse;
235 :
236 : /* Set up our private parsing context. */
237 66 : parse.context = context;
238 66 : parse.state = JM_EXPECT_TOPLEVEL_START;
239 66 : parse.saw_version_field = false;
240 :
241 : /* Create a JSON lexing context. */
242 66 : lex = makeJsonLexContextCstringLen(NULL, buffer, size, PG_UTF8, true);
243 :
244 : /* Set up semantic actions. */
245 66 : sem.semstate = &parse;
246 66 : sem.object_start = json_manifest_object_start;
247 66 : sem.object_end = json_manifest_object_end;
248 66 : sem.array_start = json_manifest_array_start;
249 66 : sem.array_end = json_manifest_array_end;
250 66 : sem.object_field_start = json_manifest_object_field_start;
251 66 : sem.object_field_end = NULL;
252 66 : sem.array_element_start = NULL;
253 66 : sem.array_element_end = NULL;
254 66 : sem.scalar = json_manifest_scalar;
255 :
256 : /* Run the actual JSON parser. */
257 66 : json_error = pg_parse_json(lex, &sem);
258 14 : if (json_error != JSON_SUCCESS)
259 2 : json_manifest_parse_failure(context, json_errdetail(json_error, lex));
260 12 : if (parse.state != JM_EXPECT_EOF)
261 0 : json_manifest_parse_failure(context, "manifest ended unexpectedly");
262 :
263 : /* Verify the manifest checksum. */
264 12 : verify_manifest_checksum(&parse, buffer, size, NULL);
265 :
266 6 : freeJsonLexContext(lex);
267 6 : }
268 :
269 : /*
270 : * Invoked at the start of each object in the JSON document.
271 : *
272 : * The document as a whole is expected to be an object; each file and each
273 : * WAL range is also expected to be an object. If we're anywhere else in the
274 : * document, it's an error.
275 : */
276 : static JsonParseErrorType
277 227730 : json_manifest_object_start(void *state)
278 : {
279 227730 : JsonManifestParseState *parse = state;
280 :
281 227730 : switch (parse->state)
282 : {
283 290 : case JM_EXPECT_TOPLEVEL_START:
284 290 : parse->state = JM_EXPECT_TOPLEVEL_FIELD;
285 290 : break;
286 227192 : case JM_EXPECT_FILES_NEXT:
287 227192 : parse->state = JM_EXPECT_THIS_FILE_FIELD;
288 227192 : parse->pathname = NULL;
289 227192 : parse->encoded_pathname = NULL;
290 227192 : parse->size = NULL;
291 227192 : parse->algorithm = NULL;
292 227192 : parse->checksum = NULL;
293 227192 : break;
294 246 : case JM_EXPECT_WAL_RANGES_NEXT:
295 246 : parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
296 246 : parse->timeline = NULL;
297 246 : parse->start_lsn = NULL;
298 246 : parse->end_lsn = NULL;
299 246 : break;
300 2 : default:
301 2 : json_manifest_parse_failure(parse->context,
302 : "unexpected object start");
303 0 : break;
304 : }
305 :
306 227728 : return JSON_SUCCESS;
307 : }
308 :
309 : /*
310 : * Invoked at the end of each object in the JSON document.
311 : *
312 : * The possible cases here are the same as for json_manifest_object_start.
313 : * There's nothing special to do at the end of the document, but when we
314 : * reach the end of an object representing a particular file or WAL range,
315 : * we must call json_manifest_finalize_file() to save the associated details.
316 : */
317 : static JsonParseErrorType
318 227674 : json_manifest_object_end(void *state)
319 : {
320 227674 : JsonManifestParseState *parse = state;
321 :
322 227674 : switch (parse->state)
323 : {
324 236 : case JM_EXPECT_TOPLEVEL_END:
325 236 : parse->state = JM_EXPECT_EOF;
326 236 : break;
327 227190 : case JM_EXPECT_THIS_FILE_FIELD:
328 227190 : json_manifest_finalize_file(parse);
329 227172 : parse->state = JM_EXPECT_FILES_NEXT;
330 227172 : break;
331 244 : case JM_EXPECT_THIS_WAL_RANGE_FIELD:
332 244 : json_manifest_finalize_wal_range(parse);
333 232 : parse->state = JM_EXPECT_WAL_RANGES_NEXT;
334 232 : break;
335 4 : default:
336 4 : json_manifest_parse_failure(parse->context,
337 : "unexpected object end");
338 0 : break;
339 : }
340 :
341 227640 : return JSON_SUCCESS;
342 : }
343 :
344 : /*
345 : * Invoked at the start of each array in the JSON document.
346 : *
347 : * Within the toplevel object, the value associated with the "Files" key
348 : * should be an array. Similarly for the "WAL-Ranges" key. No other arrays
349 : * are expected.
350 : */
351 : static JsonParseErrorType
352 504 : json_manifest_array_start(void *state)
353 : {
354 504 : JsonManifestParseState *parse = state;
355 :
356 504 : switch (parse->state)
357 : {
358 256 : case JM_EXPECT_FILES_START:
359 256 : parse->state = JM_EXPECT_FILES_NEXT;
360 256 : break;
361 246 : case JM_EXPECT_WAL_RANGES_START:
362 246 : parse->state = JM_EXPECT_WAL_RANGES_NEXT;
363 246 : break;
364 2 : default:
365 2 : json_manifest_parse_failure(parse->context,
366 : "unexpected array start");
367 0 : break;
368 : }
369 :
370 502 : return JSON_SUCCESS;
371 : }
372 :
373 : /*
374 : * Invoked at the end of each array in the JSON document.
375 : *
376 : * The cases here are analogous to those in json_manifest_array_start.
377 : */
378 : static JsonParseErrorType
379 468 : json_manifest_array_end(void *state)
380 : {
381 468 : JsonManifestParseState *parse = state;
382 :
383 468 : switch (parse->state)
384 : {
385 468 : case JM_EXPECT_FILES_NEXT:
386 : case JM_EXPECT_WAL_RANGES_NEXT:
387 468 : parse->state = JM_EXPECT_TOPLEVEL_FIELD;
388 468 : break;
389 0 : default:
390 0 : json_manifest_parse_failure(parse->context,
391 : "unexpected array end");
392 0 : break;
393 : }
394 :
395 468 : return JSON_SUCCESS;
396 : }
397 :
398 : /*
399 : * Invoked at the start of each object field in the JSON document.
400 : */
401 : static JsonParseErrorType
402 1126224 : json_manifest_object_field_start(void *state, char *fname, bool isnull)
403 : {
404 1126224 : JsonManifestParseState *parse = state;
405 :
406 1126224 : switch (parse->state)
407 : {
408 1262 : case JM_EXPECT_TOPLEVEL_FIELD:
409 :
410 : /*
411 : * Inside toplevel object. The version indicator should always be
412 : * the first field.
413 : */
414 1262 : if (!parse->saw_version_field)
415 : {
416 286 : if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0)
417 2 : json_manifest_parse_failure(parse->context,
418 : "expected version indicator");
419 284 : parse->state = JM_EXPECT_VERSION_VALUE;
420 284 : parse->saw_version_field = true;
421 284 : break;
422 : }
423 :
424 : /* Is this the system identifier? */
425 976 : if (strcmp(fname, "System-Identifier") == 0)
426 : {
427 232 : parse->state = JM_EXPECT_SYSTEM_IDENTIFIER_VALUE;
428 232 : break;
429 : }
430 :
431 : /* Is this the list of files? */
432 744 : if (strcmp(fname, "Files") == 0)
433 : {
434 260 : parse->state = JM_EXPECT_FILES_START;
435 260 : break;
436 : }
437 :
438 : /* Is this the list of WAL ranges? */
439 484 : if (strcmp(fname, "WAL-Ranges") == 0)
440 : {
441 246 : parse->state = JM_EXPECT_WAL_RANGES_START;
442 246 : break;
443 : }
444 :
445 : /* Is this the manifest checksum? */
446 238 : if (strcmp(fname, "Manifest-Checksum") == 0)
447 : {
448 236 : parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE;
449 236 : break;
450 : }
451 :
452 : /* It's not a field we recognize. */
453 2 : json_manifest_parse_failure(parse->context,
454 : "unrecognized top-level field");
455 0 : break;
456 :
457 1124240 : case JM_EXPECT_THIS_FILE_FIELD:
458 : /* Inside object for one file; which key have we got? */
459 1124240 : if (strcmp(fname, "Path") == 0)
460 225246 : parse->file_field = JMFF_PATH;
461 898994 : else if (strcmp(fname, "Encoded-Path") == 0)
462 1944 : parse->file_field = JMFF_ENCODED_PATH;
463 897050 : else if (strcmp(fname, "Size") == 0)
464 227184 : parse->file_field = JMFF_SIZE;
465 669866 : else if (strcmp(fname, "Last-Modified") == 0)
466 227170 : parse->file_field = JMFF_LAST_MODIFIED;
467 442696 : else if (strcmp(fname, "Checksum-Algorithm") == 0)
468 221346 : parse->file_field = JMFF_CHECKSUM_ALGORITHM;
469 221350 : else if (strcmp(fname, "Checksum") == 0)
470 221348 : parse->file_field = JMFF_CHECKSUM;
471 : else
472 2 : json_manifest_parse_failure(parse->context,
473 : "unexpected file field");
474 1124238 : parse->state = JM_EXPECT_THIS_FILE_VALUE;
475 1124238 : break;
476 :
477 722 : case JM_EXPECT_THIS_WAL_RANGE_FIELD:
478 : /* Inside object for one file; which key have we got? */
479 722 : if (strcmp(fname, "Timeline") == 0)
480 242 : parse->wal_range_field = JMWRF_TIMELINE;
481 480 : else if (strcmp(fname, "Start-LSN") == 0)
482 240 : parse->wal_range_field = JMWRF_START_LSN;
483 240 : else if (strcmp(fname, "End-LSN") == 0)
484 238 : parse->wal_range_field = JMWRF_END_LSN;
485 : else
486 2 : json_manifest_parse_failure(parse->context,
487 : "unexpected WAL range field");
488 720 : parse->state = JM_EXPECT_THIS_WAL_RANGE_VALUE;
489 720 : break;
490 :
491 0 : default:
492 0 : json_manifest_parse_failure(parse->context,
493 : "unexpected object field");
494 0 : break;
495 : }
496 :
497 1126216 : pfree(fname);
498 :
499 1126216 : return JSON_SUCCESS;
500 : }
501 :
502 : /*
503 : * Invoked at the start of each scalar in the JSON document.
504 : *
505 : * Object field names don't reach this code; those are handled by
506 : * json_manifest_object_field_start. When we're inside of the object for
507 : * a particular file or WAL range, that function will have noticed the name
508 : * of the field, and we'll get the corresponding value here. When we're in
509 : * the toplevel object, the parse state itself tells us which field this is.
510 : *
511 : * In all cases except for PostgreSQL-Backup-Manifest-Version, which we
512 : * can just check on the spot, the goal here is just to save the value in
513 : * the parse state for later use. We don't actually do anything until we
514 : * reach either the end of the object representing this file, or the end
515 : * of the manifest, as the case may be.
516 : */
517 : static JsonParseErrorType
518 1125712 : json_manifest_scalar(void *state, char *token, JsonTokenType tokentype)
519 : {
520 1125712 : JsonManifestParseState *parse = state;
521 :
522 1125712 : switch (parse->state)
523 : {
524 284 : case JM_EXPECT_VERSION_VALUE:
525 284 : parse->manifest_version = token;
526 284 : json_manifest_finalize_version(parse);
527 280 : parse->state = JM_EXPECT_TOPLEVEL_FIELD;
528 280 : break;
529 :
530 232 : case JM_EXPECT_SYSTEM_IDENTIFIER_VALUE:
531 232 : parse->manifest_system_identifier = token;
532 232 : json_manifest_finalize_system_identifier(parse);
533 230 : parse->state = JM_EXPECT_TOPLEVEL_FIELD;
534 230 : break;
535 :
536 1124238 : case JM_EXPECT_THIS_FILE_VALUE:
537 1124238 : switch (parse->file_field)
538 : {
539 225246 : case JMFF_PATH:
540 225246 : parse->pathname = token;
541 225246 : break;
542 1944 : case JMFF_ENCODED_PATH:
543 1944 : parse->encoded_pathname = token;
544 1944 : break;
545 227184 : case JMFF_SIZE:
546 227184 : parse->size = token;
547 227184 : break;
548 227170 : case JMFF_LAST_MODIFIED:
549 227170 : pfree(token); /* unused */
550 227170 : break;
551 221346 : case JMFF_CHECKSUM_ALGORITHM:
552 221346 : parse->algorithm = token;
553 221346 : break;
554 221348 : case JMFF_CHECKSUM:
555 221348 : parse->checksum = token;
556 221348 : break;
557 : }
558 1124238 : parse->state = JM_EXPECT_THIS_FILE_FIELD;
559 1124238 : break;
560 :
561 720 : case JM_EXPECT_THIS_WAL_RANGE_VALUE:
562 720 : switch (parse->wal_range_field)
563 : {
564 242 : case JMWRF_TIMELINE:
565 242 : parse->timeline = token;
566 242 : break;
567 240 : case JMWRF_START_LSN:
568 240 : parse->start_lsn = token;
569 240 : break;
570 238 : case JMWRF_END_LSN:
571 238 : parse->end_lsn = token;
572 238 : break;
573 : }
574 720 : parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
575 720 : break;
576 :
577 236 : case JM_EXPECT_MANIFEST_CHECKSUM_VALUE:
578 236 : parse->state = JM_EXPECT_TOPLEVEL_END;
579 236 : parse->manifest_checksum = token;
580 236 : break;
581 :
582 2 : default:
583 2 : json_manifest_parse_failure(parse->context, "unexpected scalar");
584 0 : break;
585 : }
586 :
587 1125704 : return JSON_SUCCESS;
588 : }
589 :
590 : /*
591 : * Do additional parsing and sanity-checking of the manifest version, and invoke
592 : * the callback so that the caller can gets that detail and take actions
593 : * accordingly. This happens for each manifest when the corresponding JSON
594 : * object is completely parsed.
595 : */
596 : static void
597 284 : json_manifest_finalize_version(JsonManifestParseState *parse)
598 : {
599 284 : JsonManifestParseContext *context = parse->context;
600 : int version;
601 : char *ep;
602 :
603 : Assert(parse->saw_version_field);
604 :
605 : /* Parse version. */
606 284 : version = strtoi64(parse->manifest_version, &ep, 10);
607 284 : if (*ep)
608 2 : json_manifest_parse_failure(parse->context,
609 : "manifest version not an integer");
610 :
611 282 : if (version != 1 && version != 2)
612 2 : json_manifest_parse_failure(parse->context,
613 : "unexpected manifest version");
614 :
615 : /* Invoke the callback for version */
616 280 : context->version_cb(context, version);
617 280 : }
618 :
619 : /*
620 : * Do additional parsing and sanity-checking of the system identifier, and
621 : * invoke the callback so that the caller can gets that detail and take actions
622 : * accordingly.
623 : */
624 : static void
625 232 : json_manifest_finalize_system_identifier(JsonManifestParseState *parse)
626 : {
627 232 : JsonManifestParseContext *context = parse->context;
628 : uint64 system_identifier;
629 : char *ep;
630 :
631 : Assert(parse->manifest_system_identifier != NULL);
632 :
633 : /* Parse system identifier. */
634 232 : system_identifier = strtou64(parse->manifest_system_identifier, &ep, 10);
635 232 : if (*ep)
636 0 : json_manifest_parse_failure(parse->context,
637 : "system identifier in manifest not an integer");
638 :
639 : /* Invoke the callback for system identifier */
640 232 : context->system_identifier_cb(context, system_identifier);
641 230 : }
642 :
643 : /*
644 : * Do additional parsing and sanity-checking of the details gathered for one
645 : * file, and invoke the per-file callback so that the caller gets those
646 : * details. This happens for each file when the corresponding JSON object is
647 : * completely parsed.
648 : */
649 : static void
650 227190 : json_manifest_finalize_file(JsonManifestParseState *parse)
651 : {
652 227190 : JsonManifestParseContext *context = parse->context;
653 : uint64 size;
654 : char *ep;
655 : int checksum_string_length;
656 : pg_checksum_type checksum_type;
657 : int checksum_length;
658 : uint8 *checksum_payload;
659 :
660 : /* Pathname and size are required. */
661 227190 : if (parse->pathname == NULL && parse->encoded_pathname == NULL)
662 2 : json_manifest_parse_failure(parse->context, "missing path name");
663 227188 : if (parse->pathname != NULL && parse->encoded_pathname != NULL)
664 2 : json_manifest_parse_failure(parse->context,
665 : "both path name and encoded path name");
666 227186 : if (parse->size == NULL)
667 2 : json_manifest_parse_failure(parse->context, "missing size");
668 227184 : if (parse->algorithm == NULL && parse->checksum != NULL)
669 2 : json_manifest_parse_failure(parse->context,
670 : "checksum without algorithm");
671 :
672 : /* Decode encoded pathname, if that's what we have. */
673 227182 : if (parse->encoded_pathname != NULL)
674 : {
675 1942 : int encoded_length = strlen(parse->encoded_pathname);
676 1942 : int raw_length = encoded_length / 2;
677 :
678 1942 : parse->pathname = palloc(raw_length + 1);
679 1942 : if (encoded_length % 2 != 0 ||
680 1940 : !hexdecode_string((uint8 *) parse->pathname,
681 : parse->encoded_pathname,
682 : raw_length))
683 2 : json_manifest_parse_failure(parse->context,
684 : "could not decode file name");
685 1940 : parse->pathname[raw_length] = '\0';
686 1940 : pfree(parse->encoded_pathname);
687 1940 : parse->encoded_pathname = NULL;
688 : }
689 :
690 : /* Parse size. */
691 227180 : size = strtou64(parse->size, &ep, 10);
692 227180 : if (*ep)
693 2 : json_manifest_parse_failure(parse->context,
694 : "file size is not an integer");
695 :
696 : /* Parse the checksum algorithm, if it's present. */
697 227178 : if (parse->algorithm == NULL)
698 5832 : checksum_type = CHECKSUM_TYPE_NONE;
699 221346 : else if (!pg_checksum_parse_type(parse->algorithm, &checksum_type))
700 2 : context->error_cb(context, "unrecognized checksum algorithm: \"%s\"",
701 : parse->algorithm);
702 :
703 : /* Parse the checksum payload, if it's present. */
704 227176 : checksum_string_length = parse->checksum == NULL ? 0
705 221344 : : strlen(parse->checksum);
706 227176 : if (checksum_string_length == 0)
707 : {
708 5832 : checksum_length = 0;
709 5832 : checksum_payload = NULL;
710 : }
711 : else
712 : {
713 221344 : checksum_length = checksum_string_length / 2;
714 221344 : checksum_payload = palloc(checksum_length);
715 221344 : if (checksum_string_length % 2 != 0 ||
716 221342 : !hexdecode_string(checksum_payload, parse->checksum,
717 : checksum_length))
718 2 : context->error_cb(context,
719 : "invalid checksum for file \"%s\": \"%s\"",
720 : parse->pathname, parse->checksum);
721 : }
722 :
723 : /* Invoke the callback with the details we've gathered. */
724 227174 : context->per_file_cb(context, parse->pathname, size,
725 : checksum_type, checksum_length, checksum_payload);
726 :
727 : /* Free memory we no longer need. */
728 227172 : if (parse->size != NULL)
729 : {
730 227172 : pfree(parse->size);
731 227172 : parse->size = NULL;
732 : }
733 227172 : if (parse->algorithm != NULL)
734 : {
735 221342 : pfree(parse->algorithm);
736 221342 : parse->algorithm = NULL;
737 : }
738 227172 : if (parse->checksum != NULL)
739 : {
740 221342 : pfree(parse->checksum);
741 221342 : parse->checksum = NULL;
742 : }
743 227172 : }
744 :
745 : /*
746 : * Do additional parsing and sanity-checking of the details gathered for one
747 : * WAL range, and invoke the per-WAL-range callback so that the caller gets
748 : * those details. This happens for each WAL range when the corresponding JSON
749 : * object is completely parsed.
750 : */
751 : static void
752 244 : json_manifest_finalize_wal_range(JsonManifestParseState *parse)
753 : {
754 244 : JsonManifestParseContext *context = parse->context;
755 : TimeLineID tli;
756 : XLogRecPtr start_lsn,
757 : end_lsn;
758 : char *ep;
759 :
760 : /* Make sure all fields are present. */
761 244 : if (parse->timeline == NULL)
762 2 : json_manifest_parse_failure(parse->context, "missing timeline");
763 242 : if (parse->start_lsn == NULL)
764 2 : json_manifest_parse_failure(parse->context, "missing start LSN");
765 240 : if (parse->end_lsn == NULL)
766 2 : json_manifest_parse_failure(parse->context, "missing end LSN");
767 :
768 : /* Parse timeline. */
769 238 : tli = strtoul(parse->timeline, &ep, 10);
770 238 : if (*ep)
771 2 : json_manifest_parse_failure(parse->context,
772 : "timeline is not an integer");
773 236 : if (!parse_xlogrecptr(&start_lsn, parse->start_lsn))
774 2 : json_manifest_parse_failure(parse->context,
775 : "could not parse start LSN");
776 234 : if (!parse_xlogrecptr(&end_lsn, parse->end_lsn))
777 2 : json_manifest_parse_failure(parse->context,
778 : "could not parse end LSN");
779 :
780 : /* Invoke the callback with the details we've gathered. */
781 232 : context->per_wal_range_cb(context, tli, start_lsn, end_lsn);
782 :
783 : /* Free memory we no longer need. */
784 232 : if (parse->timeline != NULL)
785 : {
786 232 : pfree(parse->timeline);
787 232 : parse->timeline = NULL;
788 : }
789 232 : if (parse->start_lsn != NULL)
790 : {
791 232 : pfree(parse->start_lsn);
792 232 : parse->start_lsn = NULL;
793 : }
794 232 : if (parse->end_lsn != NULL)
795 : {
796 232 : pfree(parse->end_lsn);
797 232 : parse->end_lsn = NULL;
798 : }
799 232 : }
800 :
801 : /*
802 : * Verify that the manifest checksum is correct.
803 : *
804 : * The last line of the manifest file is excluded from the manifest checksum,
805 : * because the last line is expected to contain the checksum that covers
806 : * the rest of the file.
807 : *
808 : * For an incremental parse, this will just be called on the last chunk of the
809 : * manifest, and the cryptohash context passed in. For a non-incremental
810 : * parse incr_ctx will be NULL.
811 : */
812 : static void
813 236 : verify_manifest_checksum(JsonManifestParseState *parse, const char *buffer,
814 : size_t size, pg_cryptohash_ctx *incr_ctx)
815 : {
816 236 : JsonManifestParseContext *context = parse->context;
817 : size_t i;
818 236 : size_t number_of_newlines = 0;
819 236 : size_t ultimate_newline = 0;
820 236 : size_t penultimate_newline = 0;
821 : pg_cryptohash_ctx *manifest_ctx;
822 : uint8 manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH];
823 : uint8 manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH];
824 :
825 : /* Find the last two newlines in the file. */
826 15913098 : for (i = 0; i < size; ++i)
827 : {
828 15912862 : if (buffer[i] == '\n')
829 : {
830 109932 : ++number_of_newlines;
831 109932 : penultimate_newline = ultimate_newline;
832 109932 : ultimate_newline = i;
833 : }
834 : }
835 :
836 : /*
837 : * Make sure that the last newline is right at the end, and that there are
838 : * at least two lines total. We need this to be true in order for the
839 : * following code, which computes the manifest checksum, to work properly.
840 : */
841 236 : if (number_of_newlines < 2)
842 2 : json_manifest_parse_failure(parse->context,
843 : "expected at least 2 lines");
844 234 : if (ultimate_newline != size - 1)
845 2 : json_manifest_parse_failure(parse->context,
846 : "last line not newline-terminated");
847 :
848 : /* Checksum the rest. */
849 232 : if (incr_ctx == NULL)
850 : {
851 8 : manifest_ctx = pg_cryptohash_create(PG_SHA256);
852 8 : if (manifest_ctx == NULL)
853 0 : context->error_cb(context, "out of memory");
854 8 : if (pg_cryptohash_init(manifest_ctx) < 0)
855 0 : context->error_cb(context, "could not initialize checksum of manifest");
856 : }
857 : else
858 : {
859 224 : manifest_ctx = incr_ctx;
860 : }
861 232 : if (pg_cryptohash_update(manifest_ctx, (const uint8 *) buffer, penultimate_newline + 1) < 0)
862 0 : context->error_cb(context, "could not update checksum of manifest");
863 232 : if (pg_cryptohash_final(manifest_ctx, manifest_checksum_actual,
864 : sizeof(manifest_checksum_actual)) < 0)
865 0 : context->error_cb(context, "could not finalize checksum of manifest");
866 :
867 : /* Now verify it. */
868 232 : if (parse->manifest_checksum == NULL)
869 0 : context->error_cb(parse->context, "manifest has no checksum");
870 232 : if (strlen(parse->manifest_checksum) != PG_SHA256_DIGEST_LENGTH * 2 ||
871 232 : !hexdecode_string(manifest_checksum_expected, parse->manifest_checksum,
872 : PG_SHA256_DIGEST_LENGTH))
873 2 : context->error_cb(context, "invalid manifest checksum: \"%s\"",
874 : parse->manifest_checksum);
875 230 : if (memcmp(manifest_checksum_actual, manifest_checksum_expected,
876 : PG_SHA256_DIGEST_LENGTH) != 0)
877 4 : context->error_cb(context, "manifest checksum mismatch");
878 226 : pg_cryptohash_free(manifest_ctx);
879 226 : }
880 :
881 : /*
882 : * Report a parse error.
883 : *
884 : * This is intended to be used for fairly low-level failures that probably
885 : * shouldn't occur unless somebody has deliberately constructed a bad manifest,
886 : * or unless the server is generating bad manifests due to some bug. msg should
887 : * be a short string giving some hint as to what the problem is.
888 : */
889 : static void
890 52 : json_manifest_parse_failure(JsonManifestParseContext *context, char *msg)
891 : {
892 52 : context->error_cb(context, "could not parse backup manifest: %s", msg);
893 : }
894 :
895 : /*
896 : * Convert a character which represents a hexadecimal digit to an integer.
897 : *
898 : * Returns -1 if the character is not a hexadecimal digit.
899 : */
900 : static int
901 3136368 : hexdecode_char(char c)
902 : {
903 3136368 : if (c >= '0' && c <= '9')
904 2049086 : return c - '0';
905 1087282 : if (c >= 'a' && c <= 'f')
906 1087266 : return c - 'a' + 10;
907 16 : if (c >= 'A' && c <= 'F')
908 12 : return c - 'A' + 10;
909 :
910 4 : return -1;
911 : }
912 :
913 : /*
914 : * Decode a hex string into a byte string, 2 hex chars per byte.
915 : *
916 : * Returns false if invalid characters are encountered; otherwise true.
917 : */
918 : static bool
919 223514 : hexdecode_string(uint8 *result, char *input, int nbytes)
920 : {
921 : int i;
922 :
923 1791696 : for (i = 0; i < nbytes; ++i)
924 : {
925 1568184 : int n1 = hexdecode_char(input[i * 2]);
926 1568184 : int n2 = hexdecode_char(input[i * 2 + 1]);
927 :
928 1568184 : if (n1 < 0 || n2 < 0)
929 2 : return false;
930 1568182 : result[i] = n1 * 16 + n2;
931 : }
932 :
933 223512 : return true;
934 : }
935 :
936 : /*
937 : * Parse an XLogRecPtr expressed using the usual string format.
938 : */
939 : static bool
940 470 : parse_xlogrecptr(XLogRecPtr *result, char *input)
941 : {
942 : uint32 hi;
943 : uint32 lo;
944 :
945 470 : if (sscanf(input, "%X/%X", &hi, &lo) != 2)
946 4 : return false;
947 466 : *result = ((uint64) hi) << 32 | lo;
948 466 : return true;
949 : }
|