Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * astreamer_tar.c
4 : *
5 : * This module implements three types of tar processing. A tar parser
6 : * expects unlabelled chunks of data (e.g. ASTREAMER_UNKNOWN) and splits
7 : * it into labelled chunks (any other value of astreamer_archive_context).
8 : * A tar archiver does the reverse: it takes a bunch of labelled chunks
9 : * and produces a tarfile, optionally replacing member headers and trailers
10 : * so that upstream astreamer objects can perform surgery on the tarfile
11 : * contents without knowing the details of the tar format. A tar terminator
12 : * just adds two blocks of NUL bytes to the end of the file, since older
13 : * server versions produce files with this terminator omitted.
14 : *
15 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
16 : *
17 : * IDENTIFICATION
18 : * src/fe_utils/astreamer_tar.c
19 : *-------------------------------------------------------------------------
20 : */
21 :
22 : #include "postgres_fe.h"
23 :
24 : #include <time.h>
25 :
26 : #include "common/logging.h"
27 : #include "fe_utils/astreamer.h"
28 : #include "pgtar.h"
29 :
30 : typedef struct astreamer_tar_parser
31 : {
32 : astreamer base;
33 : astreamer_archive_context next_context;
34 : astreamer_member member;
35 : size_t file_bytes_sent;
36 : size_t pad_bytes_expected;
37 : } astreamer_tar_parser;
38 :
39 : typedef struct astreamer_tar_archiver
40 : {
41 : astreamer base;
42 : bool rearchive_member;
43 : } astreamer_tar_archiver;
44 :
45 : static void astreamer_tar_parser_content(astreamer *streamer,
46 : astreamer_member *member,
47 : const char *data, int len,
48 : astreamer_archive_context context);
49 : static void astreamer_tar_parser_finalize(astreamer *streamer);
50 : static void astreamer_tar_parser_free(astreamer *streamer);
51 : static bool astreamer_tar_header(astreamer_tar_parser *mystreamer);
52 :
53 : static const astreamer_ops astreamer_tar_parser_ops = {
54 : .content = astreamer_tar_parser_content,
55 : .finalize = astreamer_tar_parser_finalize,
56 : .free = astreamer_tar_parser_free
57 : };
58 :
59 : static void astreamer_tar_archiver_content(astreamer *streamer,
60 : astreamer_member *member,
61 : const char *data, int len,
62 : astreamer_archive_context context);
63 : static void astreamer_tar_archiver_finalize(astreamer *streamer);
64 : static void astreamer_tar_archiver_free(astreamer *streamer);
65 :
66 : static const astreamer_ops astreamer_tar_archiver_ops = {
67 : .content = astreamer_tar_archiver_content,
68 : .finalize = astreamer_tar_archiver_finalize,
69 : .free = astreamer_tar_archiver_free
70 : };
71 :
72 : static void astreamer_tar_terminator_content(astreamer *streamer,
73 : astreamer_member *member,
74 : const char *data, int len,
75 : astreamer_archive_context context);
76 : static void astreamer_tar_terminator_finalize(astreamer *streamer);
77 : static void astreamer_tar_terminator_free(astreamer *streamer);
78 :
79 : static const astreamer_ops astreamer_tar_terminator_ops = {
80 : .content = astreamer_tar_terminator_content,
81 : .finalize = astreamer_tar_terminator_finalize,
82 : .free = astreamer_tar_terminator_free
83 : };
84 :
85 : /*
86 : * Create a astreamer that can parse a stream of content as tar data.
87 : *
88 : * The input should be a series of ASTREAMER_UNKNOWN chunks; the astreamer
89 : * specified by 'next' will receive a series of typed chunks, as per the
90 : * conventions described in astreamer.h.
91 : */
92 : astreamer *
93 257 : astreamer_tar_parser_new(astreamer *next)
94 : {
95 : astreamer_tar_parser *streamer;
96 :
97 257 : streamer = palloc0_object(astreamer_tar_parser);
98 257 : *((const astreamer_ops **) &streamer->base.bbs_ops) =
99 : &astreamer_tar_parser_ops;
100 257 : streamer->base.bbs_next = next;
101 257 : initStringInfo(&streamer->base.bbs_buffer);
102 257 : streamer->next_context = ASTREAMER_MEMBER_HEADER;
103 :
104 257 : return &streamer->base;
105 : }
106 :
107 : /*
108 : * Parse unknown content as tar data.
109 : */
110 : static void
111 347837 : astreamer_tar_parser_content(astreamer *streamer, astreamer_member *member,
112 : const char *data, int len,
113 : astreamer_archive_context context)
114 : {
115 347837 : astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
116 : size_t nbytes;
117 :
118 : /* Expect unparsed input. */
119 : Assert(member == NULL);
120 : Assert(context == ASTREAMER_UNKNOWN);
121 :
122 757003 : while (len > 0)
123 : {
124 409369 : switch (mystreamer->next_context)
125 : {
126 186200 : case ASTREAMER_MEMBER_HEADER:
127 :
128 : /*
129 : * If we're expecting an archive member header, accumulate a
130 : * full block of data before doing anything further.
131 : */
132 186200 : if (!astreamer_buffer_until(streamer, &data, &len,
133 : TAR_BLOCK_SIZE))
134 0 : return;
135 :
136 : /*
137 : * Now we can process the header and get ready to process the
138 : * file contents; however, we might find out that what we
139 : * thought was the next file header is actually the start of
140 : * the archive trailer. Switch modes accordingly.
141 : */
142 186200 : if (astreamer_tar_header(mystreamer))
143 : {
144 185998 : if (mystreamer->member.size == 0)
145 : {
146 : /* No content; trailer is zero-length. */
147 38113 : astreamer_content(mystreamer->base.bbs_next,
148 : &mystreamer->member,
149 : NULL, 0,
150 : ASTREAMER_MEMBER_TRAILER);
151 :
152 : /* Expect next header. */
153 38113 : mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
154 : }
155 : else
156 : {
157 : /* Expect contents. */
158 147885 : mystreamer->next_context = ASTREAMER_MEMBER_CONTENTS;
159 : }
160 185998 : mystreamer->base.bbs_buffer.len = 0;
161 185998 : mystreamer->file_bytes_sent = 0;
162 : }
163 : else
164 202 : mystreamer->next_context = ASTREAMER_ARCHIVE_TRAILER;
165 186200 : break;
166 :
167 212571 : case ASTREAMER_MEMBER_CONTENTS:
168 :
169 : /*
170 : * Send as much content as we have, but not more than the
171 : * remaining file length.
172 : */
173 : Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
174 212571 : nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
175 212571 : nbytes = Min(nbytes, len);
176 : Assert(nbytes > 0);
177 212571 : astreamer_content(mystreamer->base.bbs_next,
178 : &mystreamer->member,
179 : data, nbytes,
180 : ASTREAMER_MEMBER_CONTENTS);
181 212571 : mystreamer->file_bytes_sent += nbytes;
182 212571 : data += nbytes;
183 212571 : len -= nbytes;
184 :
185 : /*
186 : * If we've not yet sent the whole file, then there's more
187 : * content to come; otherwise, it's time to expect the file
188 : * trailer.
189 : */
190 : Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
191 212571 : if (mystreamer->file_bytes_sent == mystreamer->member.size)
192 : {
193 147832 : if (mystreamer->pad_bytes_expected == 0)
194 : {
195 : /* Trailer is zero-length. */
196 137436 : astreamer_content(mystreamer->base.bbs_next,
197 : &mystreamer->member,
198 : NULL, 0,
199 : ASTREAMER_MEMBER_TRAILER);
200 :
201 : /* Expect next header. */
202 137435 : mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
203 : }
204 : else
205 : {
206 : /* Trailer is not zero-length. */
207 10396 : mystreamer->next_context = ASTREAMER_MEMBER_TRAILER;
208 : }
209 147831 : mystreamer->base.bbs_buffer.len = 0;
210 : }
211 212570 : break;
212 :
213 10396 : case ASTREAMER_MEMBER_TRAILER:
214 :
215 : /*
216 : * If we're expecting an archive member trailer, accumulate
217 : * the expected number of padding bytes before sending
218 : * anything onward.
219 : */
220 10396 : if (!astreamer_buffer_until(streamer, &data, &len,
221 10396 : mystreamer->pad_bytes_expected))
222 0 : return;
223 :
224 : /* OK, now we can send it. */
225 10396 : astreamer_content(mystreamer->base.bbs_next,
226 : &mystreamer->member,
227 10396 : mystreamer->base.bbs_buffer.data,
228 10396 : mystreamer->pad_bytes_expected,
229 : ASTREAMER_MEMBER_TRAILER);
230 :
231 : /* Expect next file header. */
232 10396 : mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
233 10396 : mystreamer->base.bbs_buffer.len = 0;
234 10396 : break;
235 :
236 202 : case ASTREAMER_ARCHIVE_TRAILER:
237 :
238 : /*
239 : * We've seen an end-of-archive indicator, so anything more is
240 : * buffered and sent as part of the archive trailer.
241 : *
242 : * Per POSIX, the last physical block of a tar archive is
243 : * always full-sized, so there may be undefined data after the
244 : * two zero blocks that mark end-of-archive. GNU tar, for
245 : * example, zero-pads to a 10kB boundary by default. We just
246 : * buffer whatever we receive and pass it along at finalize
247 : * time.
248 : */
249 202 : astreamer_buffer_bytes(streamer, &data, &len, len);
250 202 : return;
251 :
252 0 : default:
253 : /* Shouldn't happen. */
254 0 : pg_fatal("unexpected state while parsing tar archive");
255 : }
256 : }
257 : }
258 :
259 : /*
260 : * Parse a file header within a tar stream.
261 : *
262 : * The return value is true if we found a file header and passed it on to the
263 : * next astreamer; it is false if we have found the archive trailer.
264 : * We throw error if we see invalid data.
265 : */
266 : static bool
267 186200 : astreamer_tar_header(astreamer_tar_parser *mystreamer)
268 : {
269 186200 : bool has_nonzero_byte = false;
270 : int i;
271 186200 : astreamer_member *member = &mystreamer->member;
272 186200 : char *buffer = mystreamer->base.bbs_buffer.data;
273 :
274 : Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
275 :
276 : /* Zero out fields of *member, just for consistency. */
277 186200 : memset(member, 0, sizeof(astreamer_member));
278 :
279 : /* Check whether we've got a block of all zero bytes. */
280 289624 : for (i = 0; i < TAR_BLOCK_SIZE; ++i)
281 : {
282 289422 : if (buffer[i] != '\0')
283 : {
284 185998 : has_nonzero_byte = true;
285 185998 : break;
286 : }
287 : }
288 :
289 : /*
290 : * If the entire block was zeros, this is the end of the archive, not the
291 : * start of the next file.
292 : */
293 186200 : if (!has_nonzero_byte)
294 202 : return false;
295 :
296 : /*
297 : * Verify that we have a reasonable-looking header.
298 : */
299 185998 : if (!isValidTarHeader(buffer))
300 0 : pg_fatal("input file does not appear to be a valid tar archive");
301 :
302 : /*
303 : * Parse key fields out of the header.
304 : */
305 185998 : strlcpy(member->pathname, &buffer[TAR_OFFSET_NAME], MAXPGPATH);
306 185998 : if (member->pathname[0] == '\0')
307 0 : pg_fatal("tar member has empty name");
308 185998 : member->size = read_tar_number(&buffer[TAR_OFFSET_SIZE], 12);
309 185998 : member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
310 185998 : member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
311 185998 : member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
312 :
313 185998 : switch (buffer[TAR_OFFSET_TYPEFLAG])
314 : {
315 181404 : case TAR_FILETYPE_PLAIN:
316 : case TAR_FILETYPE_PLAIN_OLD:
317 181404 : member->is_regular = true;
318 181404 : break;
319 4578 : case TAR_FILETYPE_DIRECTORY:
320 4578 : member->is_directory = true;
321 4578 : break;
322 16 : case TAR_FILETYPE_SYMLINK:
323 16 : member->is_symlink = true;
324 16 : strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
325 16 : break;
326 0 : case TAR_FILETYPE_PAX_EXTENDED:
327 : case TAR_FILETYPE_PAX_EXTENDED_GLOBAL:
328 0 : pg_fatal("pax extensions to tar format are not supported");
329 : break;
330 0 : default:
331 : /* For special filetypes, set none of the three is_xxx flags */
332 0 : break;
333 : }
334 :
335 : /* Compute number of padding bytes. */
336 185998 : mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
337 :
338 : /* Forward the entire header to the next astreamer. */
339 185998 : astreamer_content(mystreamer->base.bbs_next, member,
340 : buffer, TAR_BLOCK_SIZE,
341 : ASTREAMER_MEMBER_HEADER);
342 :
343 185998 : return true;
344 : }
345 :
346 : /*
347 : * End-of-stream processing for a tar parser.
348 : */
349 : static void
350 202 : astreamer_tar_parser_finalize(astreamer *streamer)
351 : {
352 202 : astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
353 :
354 202 : if (mystreamer->next_context != ASTREAMER_ARCHIVE_TRAILER &&
355 0 : (mystreamer->next_context != ASTREAMER_MEMBER_HEADER ||
356 0 : mystreamer->base.bbs_buffer.len > 0))
357 0 : pg_fatal("COPY stream ended before last file was finished");
358 :
359 : /* Send the archive trailer, even if empty. */
360 202 : astreamer_content(streamer->bbs_next, NULL,
361 202 : streamer->bbs_buffer.data, streamer->bbs_buffer.len,
362 : ASTREAMER_ARCHIVE_TRAILER);
363 :
364 : /* Now finalize successor. */
365 202 : astreamer_finalize(streamer->bbs_next);
366 202 : }
367 :
368 : /*
369 : * Free memory associated with a tar parser.
370 : */
371 : static void
372 250 : astreamer_tar_parser_free(astreamer *streamer)
373 : {
374 250 : pfree(streamer->bbs_buffer.data);
375 250 : astreamer_free(streamer->bbs_next);
376 250 : pfree(streamer);
377 250 : }
378 :
379 : /*
380 : * Create a astreamer that can generate a tar archive.
381 : *
382 : * This is intended to be usable either for generating a brand-new tar archive
383 : * or for modifying one on the fly. The input should be a series of typed
384 : * chunks (i.e. not ASTREAMER_UNKNOWN). See also the comments for
385 : * astreamer_tar_parser_content.
386 : */
387 : astreamer *
388 0 : astreamer_tar_archiver_new(astreamer *next)
389 : {
390 : astreamer_tar_archiver *streamer;
391 :
392 0 : streamer = palloc0_object(astreamer_tar_archiver);
393 0 : *((const astreamer_ops **) &streamer->base.bbs_ops) =
394 : &astreamer_tar_archiver_ops;
395 0 : streamer->base.bbs_next = next;
396 :
397 0 : return &streamer->base;
398 : }
399 :
400 : /*
401 : * Fix up the stream of input chunks to create a valid tar file.
402 : *
403 : * If a ASTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
404 : * newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
405 : * passed through without change. Any other size is a fatal error (and
406 : * indicates a bug).
407 : *
408 : * Whenever a new ASTREAMER_MEMBER_HEADER chunk is constructed, the
409 : * corresponding ASTREAMER_MEMBER_TRAILER chunk is also constructed from
410 : * scratch. Specifically, we construct a block of zero bytes sufficient to
411 : * pad out to a block boundary, as required by the tar format. Other
412 : * ASTREAMER_MEMBER_TRAILER chunks are passed through without change.
413 : *
414 : * Any ASTREAMER_MEMBER_CONTENTS chunks are passed through without change.
415 : *
416 : * The ASTREAMER_ARCHIVE_TRAILER chunk is replaced with two
417 : * blocks of zero bytes. Not all tar programs require this, but apparently
418 : * some do. The server does not supply this trailer. If no archive trailer is
419 : * present, one will be added by astreamer_tar_parser_finalize.
420 : */
421 : static void
422 0 : astreamer_tar_archiver_content(astreamer *streamer,
423 : astreamer_member *member,
424 : const char *data, int len,
425 : astreamer_archive_context context)
426 : {
427 0 : astreamer_tar_archiver *mystreamer = (astreamer_tar_archiver *) streamer;
428 : char buffer[2 * TAR_BLOCK_SIZE];
429 :
430 : Assert(context != ASTREAMER_UNKNOWN);
431 :
432 0 : if (context == ASTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE)
433 : {
434 : Assert(len == 0);
435 :
436 : /* Replace zero-length tar header with a newly constructed one. */
437 0 : tarCreateHeader(buffer, member->pathname, NULL,
438 : member->size, member->mode, member->uid, member->gid,
439 : time(NULL));
440 0 : data = buffer;
441 0 : len = TAR_BLOCK_SIZE;
442 :
443 : /* Also make a note to replace padding, in case size changed. */
444 0 : mystreamer->rearchive_member = true;
445 : }
446 0 : else if (context == ASTREAMER_MEMBER_TRAILER &&
447 0 : mystreamer->rearchive_member)
448 0 : {
449 0 : int pad_bytes = tarPaddingBytesRequired(member->size);
450 :
451 : /* Also replace padding, if we regenerated the header. */
452 0 : memset(buffer, 0, pad_bytes);
453 0 : data = buffer;
454 0 : len = pad_bytes;
455 :
456 : /* Don't do this again unless we replace another header. */
457 0 : mystreamer->rearchive_member = false;
458 : }
459 0 : else if (context == ASTREAMER_ARCHIVE_TRAILER)
460 : {
461 : /* Trailer should always be two blocks of zero bytes. */
462 0 : memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
463 0 : data = buffer;
464 0 : len = 2 * TAR_BLOCK_SIZE;
465 : }
466 :
467 0 : astreamer_content(streamer->bbs_next, member, data, len, context);
468 0 : }
469 :
470 : /*
471 : * End-of-stream processing for a tar archiver.
472 : */
473 : static void
474 0 : astreamer_tar_archiver_finalize(astreamer *streamer)
475 : {
476 0 : astreamer_finalize(streamer->bbs_next);
477 0 : }
478 :
479 : /*
480 : * Free memory associated with a tar archiver.
481 : */
482 : static void
483 0 : astreamer_tar_archiver_free(astreamer *streamer)
484 : {
485 0 : astreamer_free(streamer->bbs_next);
486 0 : pfree(streamer);
487 0 : }
488 :
489 : /*
490 : * Create a astreamer that blindly adds two blocks of NUL bytes to the
491 : * end of an incomplete tarfile that the server might send us.
492 : */
493 : astreamer *
494 0 : astreamer_tar_terminator_new(astreamer *next)
495 : {
496 : astreamer *streamer;
497 :
498 0 : streamer = palloc0_object(astreamer);
499 0 : *((const astreamer_ops **) &streamer->bbs_ops) =
500 : &astreamer_tar_terminator_ops;
501 0 : streamer->bbs_next = next;
502 :
503 0 : return streamer;
504 : }
505 :
506 : /*
507 : * Pass all the content through without change.
508 : */
509 : static void
510 0 : astreamer_tar_terminator_content(astreamer *streamer,
511 : astreamer_member *member,
512 : const char *data, int len,
513 : astreamer_archive_context context)
514 : {
515 : /* Expect unparsed input. */
516 : Assert(member == NULL);
517 : Assert(context == ASTREAMER_UNKNOWN);
518 :
519 : /* Just forward it. */
520 0 : astreamer_content(streamer->bbs_next, member, data, len, context);
521 0 : }
522 :
523 : /*
524 : * At the end, blindly add the two blocks of NUL bytes which the server fails
525 : * to supply.
526 : */
527 : static void
528 0 : astreamer_tar_terminator_finalize(astreamer *streamer)
529 : {
530 : char buffer[2 * TAR_BLOCK_SIZE];
531 :
532 0 : memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
533 0 : astreamer_content(streamer->bbs_next, NULL, buffer,
534 : 2 * TAR_BLOCK_SIZE, ASTREAMER_UNKNOWN);
535 0 : astreamer_finalize(streamer->bbs_next);
536 0 : }
537 :
538 : /*
539 : * Free memory associated with a tar terminator.
540 : */
541 : static void
542 0 : astreamer_tar_terminator_free(astreamer *streamer)
543 : {
544 0 : astreamer_free(streamer->bbs_next);
545 0 : pfree(streamer);
546 0 : }
|