Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * parse_manifest.c
4 : * Parse a backup manifest in JSON format.
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * src/common/parse_manifest.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 :
14 : #include "postgres_fe.h"
15 :
16 : #include "common/jsonapi.h"
17 : #include "common/parse_manifest.h"
18 :
19 : /*
20 : * Semantic states for JSON manifest parsing.
21 : */
22 : typedef enum
23 : {
24 : JM_EXPECT_TOPLEVEL_START,
25 : JM_EXPECT_TOPLEVEL_END,
26 : JM_EXPECT_TOPLEVEL_FIELD,
27 : JM_EXPECT_VERSION_VALUE,
28 : JM_EXPECT_SYSTEM_IDENTIFIER_VALUE,
29 : JM_EXPECT_FILES_START,
30 : JM_EXPECT_FILES_NEXT,
31 : JM_EXPECT_THIS_FILE_FIELD,
32 : JM_EXPECT_THIS_FILE_VALUE,
33 : JM_EXPECT_WAL_RANGES_START,
34 : JM_EXPECT_WAL_RANGES_NEXT,
35 : JM_EXPECT_THIS_WAL_RANGE_FIELD,
36 : JM_EXPECT_THIS_WAL_RANGE_VALUE,
37 : JM_EXPECT_MANIFEST_CHECKSUM_VALUE,
38 : JM_EXPECT_EOF,
39 : } JsonManifestSemanticState;
40 :
41 : /*
42 : * Possible fields for one file as described by the manifest.
43 : */
44 : typedef enum
45 : {
46 : JMFF_PATH,
47 : JMFF_ENCODED_PATH,
48 : JMFF_SIZE,
49 : JMFF_LAST_MODIFIED,
50 : JMFF_CHECKSUM_ALGORITHM,
51 : JMFF_CHECKSUM,
52 : } JsonManifestFileField;
53 :
54 : /*
55 : * Possible fields for one file as described by the manifest.
56 : */
57 : typedef enum
58 : {
59 : JMWRF_TIMELINE,
60 : JMWRF_START_LSN,
61 : JMWRF_END_LSN,
62 : } JsonManifestWALRangeField;
63 :
64 : /*
65 : * Internal state used while decoding the JSON-format backup manifest.
66 : */
67 : typedef struct
68 : {
69 : JsonManifestParseContext *context;
70 : JsonManifestSemanticState state;
71 :
72 : /* These fields are used for parsing objects in the list of files. */
73 : JsonManifestFileField file_field;
74 : char *pathname;
75 : char *encoded_pathname;
76 : char *size;
77 : char *algorithm;
78 : pg_checksum_type checksum_algorithm;
79 : char *checksum;
80 :
81 : /* These fields are used for parsing objects in the list of WAL ranges. */
82 : JsonManifestWALRangeField wal_range_field;
83 : char *timeline;
84 : char *start_lsn;
85 : char *end_lsn;
86 :
87 : /* Miscellaneous other stuff. */
88 : bool saw_version_field;
89 : char *manifest_version;
90 : char *manifest_system_identifier;
91 : char *manifest_checksum;
92 : } JsonManifestParseState;
93 :
94 : /* typedef appears in parse_manifest.h */
95 : struct JsonManifestParseIncrementalState
96 : {
97 : JsonLexContext lex;
98 : JsonSemAction sem;
99 : pg_cryptohash_ctx *manifest_ctx;
100 : };
101 :
102 : static JsonParseErrorType json_manifest_object_start(void *state);
103 : static JsonParseErrorType json_manifest_object_end(void *state);
104 : static JsonParseErrorType json_manifest_array_start(void *state);
105 : static JsonParseErrorType json_manifest_array_end(void *state);
106 : static JsonParseErrorType json_manifest_object_field_start(void *state, char *fname,
107 : bool isnull);
108 : static JsonParseErrorType json_manifest_scalar(void *state, char *token,
109 : JsonTokenType tokentype);
110 : static void json_manifest_finalize_version(JsonManifestParseState *parse);
111 : static void json_manifest_finalize_system_identifier(JsonManifestParseState *parse);
112 : static void json_manifest_finalize_file(JsonManifestParseState *parse);
113 : static void json_manifest_finalize_wal_range(JsonManifestParseState *parse);
114 : static void verify_manifest_checksum(JsonManifestParseState *parse,
115 : char *buffer, size_t size,
116 : pg_cryptohash_ctx *incr_ctx);
117 : static void json_manifest_parse_failure(JsonManifestParseContext *context,
118 : char *msg);
119 :
120 : static int hexdecode_char(char c);
121 : static bool hexdecode_string(uint8 *result, char *input, int nbytes);
122 : static bool parse_xlogrecptr(XLogRecPtr *result, char *input);
123 :
124 : /*
125 : * Set up for incremental parsing of the manifest.
126 : */
127 :
128 : JsonManifestParseIncrementalState *
129 176 : json_parse_manifest_incremental_init(JsonManifestParseContext *context)
130 : {
131 : JsonManifestParseIncrementalState *incstate;
132 : JsonManifestParseState *parse;
133 : pg_cryptohash_ctx *manifest_ctx;
134 :
135 176 : incstate = palloc(sizeof(JsonManifestParseIncrementalState));
136 176 : parse = palloc(sizeof(JsonManifestParseState));
137 :
138 176 : parse->context = context;
139 176 : parse->state = JM_EXPECT_TOPLEVEL_START;
140 176 : parse->saw_version_field = false;
141 :
142 176 : makeJsonLexContextIncremental(&(incstate->lex), PG_UTF8, true);
143 :
144 176 : incstate->sem.semstate = parse;
145 176 : incstate->sem.object_start = json_manifest_object_start;
146 176 : incstate->sem.object_end = json_manifest_object_end;
147 176 : incstate->sem.array_start = json_manifest_array_start;
148 176 : incstate->sem.array_end = json_manifest_array_end;
149 176 : incstate->sem.object_field_start = json_manifest_object_field_start;
150 176 : incstate->sem.object_field_end = NULL;
151 176 : incstate->sem.array_element_start = NULL;
152 176 : incstate->sem.array_element_end = NULL;
153 176 : incstate->sem.scalar = json_manifest_scalar;
154 :
155 176 : manifest_ctx = pg_cryptohash_create(PG_SHA256);
156 176 : if (manifest_ctx == NULL)
157 0 : context->error_cb(context, "out of memory");
158 176 : if (pg_cryptohash_init(manifest_ctx) < 0)
159 0 : context->error_cb(context, "could not initialize checksum of manifest");
160 176 : incstate->manifest_ctx = manifest_ctx;
161 :
162 176 : return incstate;
163 : }
164 :
165 : /*
166 : * Free an incremental state object and its contents.
167 : */
168 : void
169 172 : json_parse_manifest_incremental_shutdown(JsonManifestParseIncrementalState *incstate)
170 : {
171 172 : pfree(incstate->sem.semstate);
172 172 : freeJsonLexContext(&(incstate->lex));
173 : /* incstate->manifest_ctx has already been freed */
174 172 : pfree(incstate);
175 172 : }
176 :
177 : /*
178 : * parse the manifest in pieces.
179 : *
180 : * The caller must ensure that the final piece contains the final lines
181 : * with the complete checksum.
182 : */
183 :
184 : void
185 350 : json_parse_manifest_incremental_chunk(
186 : JsonManifestParseIncrementalState *incstate, char *chunk, int size,
187 : bool is_last)
188 : {
189 : JsonParseErrorType res,
190 : expected;
191 350 : JsonManifestParseState *parse = incstate->sem.semstate;
192 350 : JsonManifestParseContext *context = parse->context;
193 :
194 350 : res = pg_parse_json_incremental(&(incstate->lex), &(incstate->sem),
195 : chunk, size, is_last);
196 :
197 348 : expected = is_last ? JSON_SUCCESS : JSON_INCOMPLETE;
198 :
199 348 : if (res != expected)
200 0 : json_manifest_parse_failure(context,
201 : json_errdetail(res, &(incstate->lex)));
202 :
203 348 : if (is_last && parse->state != JM_EXPECT_EOF)
204 0 : json_manifest_parse_failure(context, "manifest ended unexpectedly");
205 :
206 348 : if (!is_last)
207 : {
208 174 : if (pg_cryptohash_update(incstate->manifest_ctx,
209 : (uint8 *) chunk, size) < 0)
210 0 : context->error_cb(context, "could not update checksum of manifest");
211 : }
212 : else
213 : {
214 174 : verify_manifest_checksum(parse, chunk, size, incstate->manifest_ctx);
215 : }
216 346 : }
217 :
218 :
219 : /*
220 : * Main entrypoint to parse a JSON-format backup manifest.
221 : *
222 : * Caller should set up the parsing context and then invoke this function.
223 : * For each file whose information is extracted from the manifest,
224 : * context->per_file_cb is invoked. In case of trouble, context->error_cb is
225 : * invoked and is expected not to return.
226 : */
227 : void
228 64 : json_parse_manifest(JsonManifestParseContext *context, char *buffer,
229 : size_t size)
230 : {
231 : JsonLexContext *lex;
232 : JsonParseErrorType json_error;
233 : JsonSemAction sem;
234 : JsonManifestParseState parse;
235 :
236 : /* Set up our private parsing context. */
237 64 : parse.context = context;
238 64 : parse.state = JM_EXPECT_TOPLEVEL_START;
239 64 : parse.saw_version_field = false;
240 :
241 : /* Create a JSON lexing context. */
242 64 : lex = makeJsonLexContextCstringLen(NULL, buffer, size, PG_UTF8, true);
243 :
244 : /* Set up semantic actions. */
245 64 : sem.semstate = &parse;
246 64 : sem.object_start = json_manifest_object_start;
247 64 : sem.object_end = json_manifest_object_end;
248 64 : sem.array_start = json_manifest_array_start;
249 64 : sem.array_end = json_manifest_array_end;
250 64 : sem.object_field_start = json_manifest_object_field_start;
251 64 : sem.object_field_end = NULL;
252 64 : sem.array_element_start = NULL;
253 64 : sem.array_element_end = NULL;
254 64 : sem.scalar = json_manifest_scalar;
255 :
256 : /* Run the actual JSON parser. */
257 64 : json_error = pg_parse_json(lex, &sem);
258 12 : if (json_error != JSON_SUCCESS)
259 2 : json_manifest_parse_failure(context, json_errdetail(json_error, lex));
260 10 : if (parse.state != JM_EXPECT_EOF)
261 0 : json_manifest_parse_failure(context, "manifest ended unexpectedly");
262 :
263 : /* Verify the manifest checksum. */
264 10 : verify_manifest_checksum(&parse, buffer, size, NULL);
265 :
266 4 : freeJsonLexContext(lex);
267 4 : }
268 :
269 : /*
270 : * Invoked at the start of each object in the JSON document.
271 : *
272 : * The document as a whole is expected to be an object; each file and each
273 : * WAL range is also expected to be an object. If we're anywhere else in the
274 : * document, it's an error.
275 : */
276 : static JsonParseErrorType
277 176366 : json_manifest_object_start(void *state)
278 : {
279 176366 : JsonManifestParseState *parse = state;
280 :
281 176366 : switch (parse->state)
282 : {
283 238 : case JM_EXPECT_TOPLEVEL_START:
284 238 : parse->state = JM_EXPECT_TOPLEVEL_FIELD;
285 238 : break;
286 175932 : case JM_EXPECT_FILES_NEXT:
287 175932 : parse->state = JM_EXPECT_THIS_FILE_FIELD;
288 175932 : parse->pathname = NULL;
289 175932 : parse->encoded_pathname = NULL;
290 175932 : parse->size = NULL;
291 175932 : parse->algorithm = NULL;
292 175932 : parse->checksum = NULL;
293 175932 : break;
294 194 : case JM_EXPECT_WAL_RANGES_NEXT:
295 194 : parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
296 194 : parse->timeline = NULL;
297 194 : parse->start_lsn = NULL;
298 194 : parse->end_lsn = NULL;
299 194 : break;
300 2 : default:
301 2 : json_manifest_parse_failure(parse->context,
302 : "unexpected object start");
303 0 : break;
304 : }
305 :
306 176364 : return JSON_SUCCESS;
307 : }
308 :
309 : /*
310 : * Invoked at the end of each object in the JSON document.
311 : *
312 : * The possible cases here are the same as for json_manifest_object_start.
313 : * There's nothing special to do at the end of the document, but when we
314 : * reach the end of an object representing a particular file or WAL range,
315 : * we must call json_manifest_finalize_file() to save the associated details.
316 : */
317 : static JsonParseErrorType
318 176310 : json_manifest_object_end(void *state)
319 : {
320 176310 : JsonManifestParseState *parse = state;
321 :
322 176310 : switch (parse->state)
323 : {
324 184 : case JM_EXPECT_TOPLEVEL_END:
325 184 : parse->state = JM_EXPECT_EOF;
326 184 : break;
327 175930 : case JM_EXPECT_THIS_FILE_FIELD:
328 175930 : json_manifest_finalize_file(parse);
329 175912 : parse->state = JM_EXPECT_FILES_NEXT;
330 175912 : break;
331 192 : case JM_EXPECT_THIS_WAL_RANGE_FIELD:
332 192 : json_manifest_finalize_wal_range(parse);
333 180 : parse->state = JM_EXPECT_WAL_RANGES_NEXT;
334 180 : break;
335 4 : default:
336 4 : json_manifest_parse_failure(parse->context,
337 : "unexpected object end");
338 0 : break;
339 : }
340 :
341 176276 : return JSON_SUCCESS;
342 : }
343 :
344 : /*
345 : * Invoked at the start of each array in the JSON document.
346 : *
347 : * Within the toplevel object, the value associated with the "Files" key
348 : * should be an array. Similarly for the "WAL-Ranges" key. No other arrays
349 : * are expected.
350 : */
351 : static JsonParseErrorType
352 400 : json_manifest_array_start(void *state)
353 : {
354 400 : JsonManifestParseState *parse = state;
355 :
356 400 : switch (parse->state)
357 : {
358 204 : case JM_EXPECT_FILES_START:
359 204 : parse->state = JM_EXPECT_FILES_NEXT;
360 204 : break;
361 194 : case JM_EXPECT_WAL_RANGES_START:
362 194 : parse->state = JM_EXPECT_WAL_RANGES_NEXT;
363 194 : break;
364 2 : default:
365 2 : json_manifest_parse_failure(parse->context,
366 : "unexpected array start");
367 0 : break;
368 : }
369 :
370 398 : return JSON_SUCCESS;
371 : }
372 :
373 : /*
374 : * Invoked at the end of each array in the JSON document.
375 : *
376 : * The cases here are analogous to those in json_manifest_array_start.
377 : */
378 : static JsonParseErrorType
379 364 : json_manifest_array_end(void *state)
380 : {
381 364 : JsonManifestParseState *parse = state;
382 :
383 364 : switch (parse->state)
384 : {
385 364 : case JM_EXPECT_FILES_NEXT:
386 : case JM_EXPECT_WAL_RANGES_NEXT:
387 364 : parse->state = JM_EXPECT_TOPLEVEL_FIELD;
388 364 : break;
389 0 : default:
390 0 : json_manifest_parse_failure(parse->context,
391 : "unexpected array end");
392 0 : break;
393 : }
394 :
395 364 : return JSON_SUCCESS;
396 : }
397 :
398 : /*
399 : * Invoked at the start of each object field in the JSON document.
400 : */
401 : static JsonParseErrorType
402 873424 : json_manifest_object_field_start(void *state, char *fname, bool isnull)
403 : {
404 873424 : JsonManifestParseState *parse = state;
405 :
406 873424 : switch (parse->state)
407 : {
408 1002 : case JM_EXPECT_TOPLEVEL_FIELD:
409 :
410 : /*
411 : * Inside toplevel object. The version indicator should always be
412 : * the first field.
413 : */
414 1002 : if (!parse->saw_version_field)
415 : {
416 234 : if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0)
417 2 : json_manifest_parse_failure(parse->context,
418 : "expected version indicator");
419 232 : parse->state = JM_EXPECT_VERSION_VALUE;
420 232 : parse->saw_version_field = true;
421 232 : break;
422 : }
423 :
424 : /* Is this the system identifier? */
425 768 : if (strcmp(fname, "System-Identifier") == 0)
426 : {
427 180 : parse->state = JM_EXPECT_SYSTEM_IDENTIFIER_VALUE;
428 180 : break;
429 : }
430 :
431 : /* Is this the list of files? */
432 588 : if (strcmp(fname, "Files") == 0)
433 : {
434 208 : parse->state = JM_EXPECT_FILES_START;
435 208 : break;
436 : }
437 :
438 : /* Is this the list of WAL ranges? */
439 380 : if (strcmp(fname, "WAL-Ranges") == 0)
440 : {
441 194 : parse->state = JM_EXPECT_WAL_RANGES_START;
442 194 : break;
443 : }
444 :
445 : /* Is this the manifest checksum? */
446 186 : if (strcmp(fname, "Manifest-Checksum") == 0)
447 : {
448 184 : parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE;
449 184 : break;
450 : }
451 :
452 : /* It's not a field we recognize. */
453 2 : json_manifest_parse_failure(parse->context,
454 : "unrecognized top-level field");
455 0 : break;
456 :
457 871856 : case JM_EXPECT_THIS_FILE_FIELD:
458 : /* Inside object for one file; which key have we got? */
459 871856 : if (strcmp(fname, "Path") == 0)
460 173994 : parse->file_field = JMFF_PATH;
461 697862 : else if (strcmp(fname, "Encoded-Path") == 0)
462 1936 : parse->file_field = JMFF_ENCODED_PATH;
463 695926 : else if (strcmp(fname, "Size") == 0)
464 175924 : parse->file_field = JMFF_SIZE;
465 520002 : else if (strcmp(fname, "Last-Modified") == 0)
466 175910 : parse->file_field = JMFF_LAST_MODIFIED;
467 344092 : else if (strcmp(fname, "Checksum-Algorithm") == 0)
468 172044 : parse->file_field = JMFF_CHECKSUM_ALGORITHM;
469 172048 : else if (strcmp(fname, "Checksum") == 0)
470 172046 : parse->file_field = JMFF_CHECKSUM;
471 : else
472 2 : json_manifest_parse_failure(parse->context,
473 : "unexpected file field");
474 871854 : parse->state = JM_EXPECT_THIS_FILE_VALUE;
475 871854 : break;
476 :
477 566 : case JM_EXPECT_THIS_WAL_RANGE_FIELD:
478 : /* Inside object for one file; which key have we got? */
479 566 : if (strcmp(fname, "Timeline") == 0)
480 190 : parse->wal_range_field = JMWRF_TIMELINE;
481 376 : else if (strcmp(fname, "Start-LSN") == 0)
482 188 : parse->wal_range_field = JMWRF_START_LSN;
483 188 : else if (strcmp(fname, "End-LSN") == 0)
484 186 : parse->wal_range_field = JMWRF_END_LSN;
485 : else
486 2 : json_manifest_parse_failure(parse->context,
487 : "unexpected WAL range field");
488 564 : parse->state = JM_EXPECT_THIS_WAL_RANGE_VALUE;
489 564 : break;
490 :
491 0 : default:
492 0 : json_manifest_parse_failure(parse->context,
493 : "unexpected object field");
494 0 : break;
495 : }
496 :
497 873416 : pfree(fname);
498 :
499 873416 : return JSON_SUCCESS;
500 : }
501 :
502 : /*
503 : * Invoked at the start of each scalar in the JSON document.
504 : *
505 : * Object field names don't reach this code; those are handled by
506 : * json_manifest_object_field_start. When we're inside of the object for
507 : * a particular file or WAL range, that function will have noticed the name
508 : * of the field, and we'll get the corresponding value here. When we're in
509 : * the toplevel object, the parse state itself tells us which field this is.
510 : *
511 : * In all cases except for PostgreSQL-Backup-Manifest-Version, which we
512 : * can just check on the spot, the goal here is just to save the value in
513 : * the parse state for later use. We don't actually do anything until we
514 : * reach either the end of the object representing this file, or the end
515 : * of the manifest, as the case may be.
516 : */
517 : static JsonParseErrorType
518 873016 : json_manifest_scalar(void *state, char *token, JsonTokenType tokentype)
519 : {
520 873016 : JsonManifestParseState *parse = state;
521 :
522 873016 : switch (parse->state)
523 : {
524 232 : case JM_EXPECT_VERSION_VALUE:
525 232 : parse->manifest_version = token;
526 232 : json_manifest_finalize_version(parse);
527 228 : parse->state = JM_EXPECT_TOPLEVEL_FIELD;
528 228 : break;
529 :
530 180 : case JM_EXPECT_SYSTEM_IDENTIFIER_VALUE:
531 180 : parse->manifest_system_identifier = token;
532 180 : json_manifest_finalize_system_identifier(parse);
533 178 : parse->state = JM_EXPECT_TOPLEVEL_FIELD;
534 178 : break;
535 :
536 871854 : case JM_EXPECT_THIS_FILE_VALUE:
537 871854 : switch (parse->file_field)
538 : {
539 173994 : case JMFF_PATH:
540 173994 : parse->pathname = token;
541 173994 : break;
542 1936 : case JMFF_ENCODED_PATH:
543 1936 : parse->encoded_pathname = token;
544 1936 : break;
545 175924 : case JMFF_SIZE:
546 175924 : parse->size = token;
547 175924 : break;
548 175910 : case JMFF_LAST_MODIFIED:
549 175910 : pfree(token); /* unused */
550 175910 : break;
551 172044 : case JMFF_CHECKSUM_ALGORITHM:
552 172044 : parse->algorithm = token;
553 172044 : break;
554 172046 : case JMFF_CHECKSUM:
555 172046 : parse->checksum = token;
556 172046 : break;
557 : }
558 871854 : parse->state = JM_EXPECT_THIS_FILE_FIELD;
559 871854 : break;
560 :
561 564 : case JM_EXPECT_THIS_WAL_RANGE_VALUE:
562 564 : switch (parse->wal_range_field)
563 : {
564 190 : case JMWRF_TIMELINE:
565 190 : parse->timeline = token;
566 190 : break;
567 188 : case JMWRF_START_LSN:
568 188 : parse->start_lsn = token;
569 188 : break;
570 186 : case JMWRF_END_LSN:
571 186 : parse->end_lsn = token;
572 186 : break;
573 : }
574 564 : parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
575 564 : break;
576 :
577 184 : case JM_EXPECT_MANIFEST_CHECKSUM_VALUE:
578 184 : parse->state = JM_EXPECT_TOPLEVEL_END;
579 184 : parse->manifest_checksum = token;
580 184 : break;
581 :
582 2 : default:
583 2 : json_manifest_parse_failure(parse->context, "unexpected scalar");
584 0 : break;
585 : }
586 :
587 873008 : return JSON_SUCCESS;
588 : }
589 :
590 : /*
591 : * Do additional parsing and sanity-checking of the manifest version, and invoke
592 : * the callback so that the caller can gets that detail and take actions
593 : * accordingly. This happens for each manifest when the corresponding JSON
594 : * object is completely parsed.
595 : */
596 : static void
597 232 : json_manifest_finalize_version(JsonManifestParseState *parse)
598 : {
599 232 : JsonManifestParseContext *context = parse->context;
600 : int version;
601 : char *ep;
602 :
603 : Assert(parse->saw_version_field);
604 :
605 : /* Parse version. */
606 232 : version = strtoi64(parse->manifest_version, &ep, 10);
607 232 : if (*ep)
608 2 : json_manifest_parse_failure(parse->context,
609 : "manifest version not an integer");
610 :
611 230 : if (version != 1 && version != 2)
612 2 : json_manifest_parse_failure(parse->context,
613 : "unexpected manifest version");
614 :
615 : /* Invoke the callback for version */
616 228 : context->version_cb(context, version);
617 228 : }
618 :
619 : /*
620 : * Do additional parsing and sanity-checking of the system identifier, and
621 : * invoke the callback so that the caller can gets that detail and take actions
622 : * accordingly.
623 : */
624 : static void
625 180 : json_manifest_finalize_system_identifier(JsonManifestParseState *parse)
626 : {
627 180 : JsonManifestParseContext *context = parse->context;
628 : uint64 system_identifier;
629 : char *ep;
630 :
631 : Assert(parse->manifest_system_identifier != NULL);
632 :
633 : /* Parse system identifier. */
634 180 : system_identifier = strtou64(parse->manifest_system_identifier, &ep, 10);
635 180 : if (*ep)
636 0 : json_manifest_parse_failure(parse->context,
637 : "manifest system identifier not an integer");
638 :
639 : /* Invoke the callback for system identifier */
640 180 : context->system_identifier_cb(context, system_identifier);
641 178 : }
642 :
643 : /*
644 : * Do additional parsing and sanity-checking of the details gathered for one
645 : * file, and invoke the per-file callback so that the caller gets those
646 : * details. This happens for each file when the corresponding JSON object is
647 : * completely parsed.
648 : */
649 : static void
650 175930 : json_manifest_finalize_file(JsonManifestParseState *parse)
651 : {
652 175930 : JsonManifestParseContext *context = parse->context;
653 : size_t size;
654 : char *ep;
655 : int checksum_string_length;
656 : pg_checksum_type checksum_type;
657 : int checksum_length;
658 : uint8 *checksum_payload;
659 :
660 : /* Pathname and size are required. */
661 175930 : if (parse->pathname == NULL && parse->encoded_pathname == NULL)
662 2 : json_manifest_parse_failure(parse->context, "missing path name");
663 175928 : if (parse->pathname != NULL && parse->encoded_pathname != NULL)
664 2 : json_manifest_parse_failure(parse->context,
665 : "both path name and encoded path name");
666 175926 : if (parse->size == NULL)
667 2 : json_manifest_parse_failure(parse->context, "missing size");
668 175924 : if (parse->algorithm == NULL && parse->checksum != NULL)
669 2 : json_manifest_parse_failure(parse->context,
670 : "checksum without algorithm");
671 :
672 : /* Decode encoded pathname, if that's what we have. */
673 175922 : if (parse->encoded_pathname != NULL)
674 : {
675 1934 : int encoded_length = strlen(parse->encoded_pathname);
676 1934 : int raw_length = encoded_length / 2;
677 :
678 1934 : parse->pathname = palloc(raw_length + 1);
679 1934 : if (encoded_length % 2 != 0 ||
680 1932 : !hexdecode_string((uint8 *) parse->pathname,
681 : parse->encoded_pathname,
682 : raw_length))
683 2 : json_manifest_parse_failure(parse->context,
684 : "could not decode file name");
685 1932 : parse->pathname[raw_length] = '\0';
686 1932 : pfree(parse->encoded_pathname);
687 1932 : parse->encoded_pathname = NULL;
688 : }
689 :
690 : /* Parse size. */
691 175920 : size = strtoul(parse->size, &ep, 10);
692 175920 : if (*ep)
693 2 : json_manifest_parse_failure(parse->context,
694 : "file size is not an integer");
695 :
696 : /* Parse the checksum algorithm, if it's present. */
697 175918 : if (parse->algorithm == NULL)
698 3874 : checksum_type = CHECKSUM_TYPE_NONE;
699 172044 : else if (!pg_checksum_parse_type(parse->algorithm, &checksum_type))
700 2 : context->error_cb(context, "unrecognized checksum algorithm: \"%s\"",
701 : parse->algorithm);
702 :
703 : /* Parse the checksum payload, if it's present. */
704 175916 : checksum_string_length = parse->checksum == NULL ? 0
705 172042 : : strlen(parse->checksum);
706 175916 : if (checksum_string_length == 0)
707 : {
708 3874 : checksum_length = 0;
709 3874 : checksum_payload = NULL;
710 : }
711 : else
712 : {
713 172042 : checksum_length = checksum_string_length / 2;
714 172042 : checksum_payload = palloc(checksum_length);
715 172042 : if (checksum_string_length % 2 != 0 ||
716 172040 : !hexdecode_string(checksum_payload, parse->checksum,
717 : checksum_length))
718 2 : context->error_cb(context,
719 : "invalid checksum for file \"%s\": \"%s\"",
720 : parse->pathname, parse->checksum);
721 : }
722 :
723 : /* Invoke the callback with the details we've gathered. */
724 175914 : context->per_file_cb(context, parse->pathname, size,
725 : checksum_type, checksum_length, checksum_payload);
726 :
727 : /* Free memory we no longer need. */
728 175912 : if (parse->size != NULL)
729 : {
730 175912 : pfree(parse->size);
731 175912 : parse->size = NULL;
732 : }
733 175912 : if (parse->algorithm != NULL)
734 : {
735 172040 : pfree(parse->algorithm);
736 172040 : parse->algorithm = NULL;
737 : }
738 175912 : if (parse->checksum != NULL)
739 : {
740 172040 : pfree(parse->checksum);
741 172040 : parse->checksum = NULL;
742 : }
743 175912 : }
744 :
745 : /*
746 : * Do additional parsing and sanity-checking of the details gathered for one
747 : * WAL range, and invoke the per-WAL-range callback so that the caller gets
748 : * those details. This happens for each WAL range when the corresponding JSON
749 : * object is completely parsed.
750 : */
751 : static void
752 192 : json_manifest_finalize_wal_range(JsonManifestParseState *parse)
753 : {
754 192 : JsonManifestParseContext *context = parse->context;
755 : TimeLineID tli;
756 : XLogRecPtr start_lsn,
757 : end_lsn;
758 : char *ep;
759 :
760 : /* Make sure all fields are present. */
761 192 : if (parse->timeline == NULL)
762 2 : json_manifest_parse_failure(parse->context, "missing timeline");
763 190 : if (parse->start_lsn == NULL)
764 2 : json_manifest_parse_failure(parse->context, "missing start LSN");
765 188 : if (parse->end_lsn == NULL)
766 2 : json_manifest_parse_failure(parse->context, "missing end LSN");
767 :
768 : /* Parse timeline. */
769 186 : tli = strtoul(parse->timeline, &ep, 10);
770 186 : if (*ep)
771 2 : json_manifest_parse_failure(parse->context,
772 : "timeline is not an integer");
773 184 : if (!parse_xlogrecptr(&start_lsn, parse->start_lsn))
774 2 : json_manifest_parse_failure(parse->context,
775 : "could not parse start LSN");
776 182 : if (!parse_xlogrecptr(&end_lsn, parse->end_lsn))
777 2 : json_manifest_parse_failure(parse->context,
778 : "could not parse end LSN");
779 :
780 : /* Invoke the callback with the details we've gathered. */
781 180 : context->per_wal_range_cb(context, tli, start_lsn, end_lsn);
782 :
783 : /* Free memory we no longer need. */
784 180 : if (parse->timeline != NULL)
785 : {
786 180 : pfree(parse->timeline);
787 180 : parse->timeline = NULL;
788 : }
789 180 : if (parse->start_lsn != NULL)
790 : {
791 180 : pfree(parse->start_lsn);
792 180 : parse->start_lsn = NULL;
793 : }
794 180 : if (parse->end_lsn != NULL)
795 : {
796 180 : pfree(parse->end_lsn);
797 180 : parse->end_lsn = NULL;
798 : }
799 180 : }
800 :
801 : /*
802 : * Verify that the manifest checksum is correct.
803 : *
804 : * The last line of the manifest file is excluded from the manifest checksum,
805 : * because the last line is expected to contain the checksum that covers
806 : * the rest of the file.
807 : *
808 : * For an incremental parse, this will just be called on the last chunk of the
809 : * manifest, and the cryptohash context passed in. For a non-incremental
810 : * parse incr_ctx will be NULL.
811 : */
812 : static void
813 184 : verify_manifest_checksum(JsonManifestParseState *parse, char *buffer,
814 : size_t size, pg_cryptohash_ctx *incr_ctx)
815 : {
816 184 : JsonManifestParseContext *context = parse->context;
817 : size_t i;
818 184 : size_t number_of_newlines = 0;
819 184 : size_t ultimate_newline = 0;
820 184 : size_t penultimate_newline = 0;
821 : pg_cryptohash_ctx *manifest_ctx;
822 : uint8 manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH];
823 : uint8 manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH];
824 :
825 : /* Find the last two newlines in the file. */
826 12283474 : for (i = 0; i < size; ++i)
827 : {
828 12283290 : if (buffer[i] == '\n')
829 : {
830 85588 : ++number_of_newlines;
831 85588 : penultimate_newline = ultimate_newline;
832 85588 : ultimate_newline = i;
833 : }
834 : }
835 :
836 : /*
837 : * Make sure that the last newline is right at the end, and that there are
838 : * at least two lines total. We need this to be true in order for the
839 : * following code, which computes the manifest checksum, to work properly.
840 : */
841 184 : if (number_of_newlines < 2)
842 2 : json_manifest_parse_failure(parse->context,
843 : "expected at least 2 lines");
844 182 : if (ultimate_newline != size - 1)
845 2 : json_manifest_parse_failure(parse->context,
846 : "last line not newline-terminated");
847 :
848 : /* Checksum the rest. */
849 180 : if (incr_ctx == NULL)
850 : {
851 6 : manifest_ctx = pg_cryptohash_create(PG_SHA256);
852 6 : if (manifest_ctx == NULL)
853 0 : context->error_cb(context, "out of memory");
854 6 : if (pg_cryptohash_init(manifest_ctx) < 0)
855 0 : context->error_cb(context, "could not initialize checksum of manifest");
856 : }
857 : else
858 : {
859 174 : manifest_ctx = incr_ctx;
860 : }
861 180 : if (pg_cryptohash_update(manifest_ctx, (uint8 *) buffer, penultimate_newline + 1) < 0)
862 0 : context->error_cb(context, "could not update checksum of manifest");
863 180 : if (pg_cryptohash_final(manifest_ctx, manifest_checksum_actual,
864 : sizeof(manifest_checksum_actual)) < 0)
865 0 : context->error_cb(context, "could not finalize checksum of manifest");
866 :
867 : /* Now verify it. */
868 180 : if (parse->manifest_checksum == NULL)
869 0 : context->error_cb(parse->context, "manifest has no checksum");
870 180 : if (strlen(parse->manifest_checksum) != PG_SHA256_DIGEST_LENGTH * 2 ||
871 180 : !hexdecode_string(manifest_checksum_expected, parse->manifest_checksum,
872 : PG_SHA256_DIGEST_LENGTH))
873 2 : context->error_cb(context, "invalid manifest checksum: \"%s\"",
874 : parse->manifest_checksum);
875 178 : if (memcmp(manifest_checksum_actual, manifest_checksum_expected,
876 : PG_SHA256_DIGEST_LENGTH) != 0)
877 2 : context->error_cb(context, "manifest checksum mismatch");
878 176 : pg_cryptohash_free(manifest_ctx);
879 176 : }
880 :
881 : /*
882 : * Report a parse error.
883 : *
884 : * This is intended to be used for fairly low-level failures that probably
885 : * shouldn't occur unless somebody has deliberately constructed a bad manifest,
886 : * or unless the server is generating bad manifests due to some bug. msg should
887 : * be a short string giving some hint as to what the problem is.
888 : */
889 : static void
890 52 : json_manifest_parse_failure(JsonManifestParseContext *context, char *msg)
891 : {
892 52 : context->error_cb(context, "could not parse backup manifest: %s", msg);
893 : }
894 :
895 : /*
896 : * Convert a character which represents a hexadecimal digit to an integer.
897 : *
898 : * Returns -1 if the character is not a hexadecimal digit.
899 : */
900 : static int
901 2129664 : hexdecode_char(char c)
902 : {
903 2129664 : if (c >= '0' && c <= '9')
904 1410044 : return c - '0';
905 719620 : if (c >= 'a' && c <= 'f')
906 719604 : return c - 'a' + 10;
907 16 : if (c >= 'A' && c <= 'F')
908 12 : return c - 'A' + 10;
909 :
910 4 : return -1;
911 : }
912 :
913 : /*
914 : * Decode a hex string into a byte string, 2 hex chars per byte.
915 : *
916 : * Returns false if invalid characters are encountered; otherwise true.
917 : */
918 : static bool
919 174152 : hexdecode_string(uint8 *result, char *input, int nbytes)
920 : {
921 : int i;
922 :
923 1238982 : for (i = 0; i < nbytes; ++i)
924 : {
925 1064832 : int n1 = hexdecode_char(input[i * 2]);
926 1064832 : int n2 = hexdecode_char(input[i * 2 + 1]);
927 :
928 1064832 : if (n1 < 0 || n2 < 0)
929 2 : return false;
930 1064830 : result[i] = n1 * 16 + n2;
931 : }
932 :
933 174150 : return true;
934 : }
935 :
936 : /*
937 : * Parse an XLogRecPtr expressed using the usual string format.
938 : */
939 : static bool
940 366 : parse_xlogrecptr(XLogRecPtr *result, char *input)
941 : {
942 : uint32 hi;
943 : uint32 lo;
944 :
945 366 : if (sscanf(input, "%X/%X", &hi, &lo) != 2)
946 4 : return false;
947 362 : *result = ((uint64) hi) << 32 | lo;
948 362 : return true;
949 : }
|