Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * astreamer_tar.c
4 : *
5 : * This module implements three types of tar processing. A tar parser
6 : * expects unlabelled chunks of data (e.g. ASTREAMER_UNKNOWN) and splits
7 : * it into labelled chunks (any other value of astreamer_archive_context).
8 : * A tar archiver does the reverse: it takes a bunch of labelled chunks
9 : * and produces a tarfile, optionally replacing member headers and trailers
10 : * so that upstream astreamer objects can perform surgery on the tarfile
11 : * contents without knowing the details of the tar format. A tar terminator
12 : * just adds two blocks of NUL bytes to the end of the file, since older
13 : * server versions produce files with this terminator omitted.
14 : *
15 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
16 : *
17 : * IDENTIFICATION
18 : * src/fe_utils/astreamer_tar.c
19 : *-------------------------------------------------------------------------
20 : */
21 :
22 : #include "postgres_fe.h"
23 :
24 : #include <time.h>
25 :
26 : #include "common/logging.h"
27 : #include "fe_utils/astreamer.h"
28 : #include "pgtar.h"
29 :
30 : typedef struct astreamer_tar_parser
31 : {
32 : astreamer base;
33 : astreamer_archive_context next_context;
34 : astreamer_member member;
35 : size_t file_bytes_sent;
36 : size_t pad_bytes_expected;
37 : } astreamer_tar_parser;
38 :
39 : typedef struct astreamer_tar_archiver
40 : {
41 : astreamer base;
42 : bool rearchive_member;
43 : } astreamer_tar_archiver;
44 :
45 : static void astreamer_tar_parser_content(astreamer *streamer,
46 : astreamer_member *member,
47 : const char *data, int len,
48 : astreamer_archive_context context);
49 : static void astreamer_tar_parser_finalize(astreamer *streamer);
50 : static void astreamer_tar_parser_free(astreamer *streamer);
51 : static bool astreamer_tar_header(astreamer_tar_parser *mystreamer);
52 :
53 : static const astreamer_ops astreamer_tar_parser_ops = {
54 : .content = astreamer_tar_parser_content,
55 : .finalize = astreamer_tar_parser_finalize,
56 : .free = astreamer_tar_parser_free
57 : };
58 :
59 : static void astreamer_tar_archiver_content(astreamer *streamer,
60 : astreamer_member *member,
61 : const char *data, int len,
62 : astreamer_archive_context context);
63 : static void astreamer_tar_archiver_finalize(astreamer *streamer);
64 : static void astreamer_tar_archiver_free(astreamer *streamer);
65 :
66 : static const astreamer_ops astreamer_tar_archiver_ops = {
67 : .content = astreamer_tar_archiver_content,
68 : .finalize = astreamer_tar_archiver_finalize,
69 : .free = astreamer_tar_archiver_free
70 : };
71 :
72 : static void astreamer_tar_terminator_content(astreamer *streamer,
73 : astreamer_member *member,
74 : const char *data, int len,
75 : astreamer_archive_context context);
76 : static void astreamer_tar_terminator_finalize(astreamer *streamer);
77 : static void astreamer_tar_terminator_free(astreamer *streamer);
78 :
79 : static const astreamer_ops astreamer_tar_terminator_ops = {
80 : .content = astreamer_tar_terminator_content,
81 : .finalize = astreamer_tar_terminator_finalize,
82 : .free = astreamer_tar_terminator_free
83 : };
84 :
85 : /*
86 : * Create a astreamer that can parse a stream of content as tar data.
87 : *
88 : * The input should be a series of ASTREAMER_UNKNOWN chunks; the astreamer
89 : * specified by 'next' will receive a series of typed chunks, as per the
90 : * conventions described in astreamer.h.
91 : */
92 : astreamer *
93 256 : astreamer_tar_parser_new(astreamer *next)
94 : {
95 : astreamer_tar_parser *streamer;
96 :
97 256 : streamer = palloc0_object(astreamer_tar_parser);
98 256 : *((const astreamer_ops **) &streamer->base.bbs_ops) =
99 : &astreamer_tar_parser_ops;
100 256 : streamer->base.bbs_next = next;
101 256 : initStringInfo(&streamer->base.bbs_buffer);
102 256 : streamer->next_context = ASTREAMER_MEMBER_HEADER;
103 :
104 256 : return &streamer->base;
105 : }
106 :
107 : /*
108 : * Parse unknown content as tar data.
109 : */
110 : static void
111 346575 : astreamer_tar_parser_content(astreamer *streamer, astreamer_member *member,
112 : const char *data, int len,
113 : astreamer_archive_context context)
114 : {
115 346575 : astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
116 : size_t nbytes;
117 :
118 : /* Expect unparsed input. */
119 : Assert(member == NULL);
120 : Assert(context == ASTREAMER_UNKNOWN);
121 :
122 754627 : while (len > 0)
123 : {
124 408268 : switch (mystreamer->next_context)
125 : {
126 185127 : case ASTREAMER_MEMBER_HEADER:
127 :
128 : /*
129 : * If we're expecting an archive member header, accumulate a
130 : * full block of data before doing anything further.
131 : */
132 185127 : if (!astreamer_buffer_until(streamer, &data, &len,
133 : TAR_BLOCK_SIZE))
134 0 : return;
135 :
136 : /*
137 : * Now we can process the header and get ready to process the
138 : * file contents; however, we might find out that what we
139 : * thought was the next file header is actually the start of
140 : * the archive trailer. Switch modes accordingly.
141 : */
142 185127 : if (astreamer_tar_header(mystreamer))
143 : {
144 184912 : if (mystreamer->member.size == 0)
145 : {
146 : /* No content; trailer is zero-length. */
147 37968 : astreamer_content(mystreamer->base.bbs_next,
148 : &mystreamer->member,
149 : NULL, 0,
150 : ASTREAMER_MEMBER_TRAILER);
151 :
152 : /* Expect next header. */
153 37968 : mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
154 : }
155 : else
156 : {
157 : /* Expect contents. */
158 146944 : mystreamer->next_context = ASTREAMER_MEMBER_CONTENTS;
159 : }
160 184912 : mystreamer->base.bbs_buffer.len = 0;
161 184912 : mystreamer->file_bytes_sent = 0;
162 : }
163 : else
164 215 : mystreamer->next_context = ASTREAMER_ARCHIVE_TRAILER;
165 185127 : break;
166 :
167 212549 : case ASTREAMER_MEMBER_CONTENTS:
168 :
169 : /*
170 : * Send as much content as we have, but not more than the
171 : * remaining file length.
172 : */
173 : Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
174 212549 : nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
175 212549 : nbytes = Min(nbytes, len);
176 : Assert(nbytes > 0);
177 212549 : astreamer_content(mystreamer->base.bbs_next,
178 : &mystreamer->member,
179 : data, nbytes,
180 : ASTREAMER_MEMBER_CONTENTS);
181 212549 : mystreamer->file_bytes_sent += nbytes;
182 212549 : data += nbytes;
183 212549 : len -= nbytes;
184 :
185 : /*
186 : * If we've not yet sent the whole file, then there's more
187 : * content to come; otherwise, it's time to expect the file
188 : * trailer.
189 : */
190 : Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
191 212549 : if (mystreamer->file_bytes_sent == mystreamer->member.size)
192 : {
193 146905 : if (mystreamer->pad_bytes_expected == 0)
194 : {
195 : /* Trailer is zero-length. */
196 136528 : astreamer_content(mystreamer->base.bbs_next,
197 : &mystreamer->member,
198 : NULL, 0,
199 : ASTREAMER_MEMBER_TRAILER);
200 :
201 : /* Expect next header. */
202 136527 : mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
203 : }
204 : else
205 : {
206 : /* Trailer is not zero-length. */
207 10377 : mystreamer->next_context = ASTREAMER_MEMBER_TRAILER;
208 : }
209 146904 : mystreamer->base.bbs_buffer.len = 0;
210 : }
211 212548 : break;
212 :
213 10377 : case ASTREAMER_MEMBER_TRAILER:
214 :
215 : /*
216 : * If we're expecting an archive member trailer, accumulate
217 : * the expected number of padding bytes before sending
218 : * anything onward.
219 : */
220 10377 : if (!astreamer_buffer_until(streamer, &data, &len,
221 10377 : mystreamer->pad_bytes_expected))
222 0 : return;
223 :
224 : /* OK, now we can send it. */
225 10377 : astreamer_content(mystreamer->base.bbs_next,
226 : &mystreamer->member,
227 10377 : data, mystreamer->pad_bytes_expected,
228 : ASTREAMER_MEMBER_TRAILER);
229 :
230 : /* Expect next file header. */
231 10377 : mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
232 10377 : mystreamer->base.bbs_buffer.len = 0;
233 10377 : break;
234 :
235 215 : case ASTREAMER_ARCHIVE_TRAILER:
236 :
237 : /*
238 : * We've seen an end-of-archive indicator, so anything more is
239 : * buffered and sent as part of the archive trailer.
240 : *
241 : * Per POSIX, the last physical block of a tar archive is
242 : * always full-sized, so there may be undefined data after the
243 : * two zero blocks that mark end-of-archive. GNU tar, for
244 : * example, zero-pads to a 10kB boundary by default. We just
245 : * buffer whatever we receive and pass it along at finalize
246 : * time.
247 : */
248 215 : astreamer_buffer_bytes(streamer, &data, &len, len);
249 215 : return;
250 :
251 0 : default:
252 : /* Shouldn't happen. */
253 0 : pg_fatal("unexpected state while parsing tar archive");
254 : }
255 : }
256 : }
257 :
258 : /*
259 : * Parse a file header within a tar stream.
260 : *
261 : * The return value is true if we found a file header and passed it on to the
262 : * next astreamer; it is false if we have reached the archive trailer.
263 : */
264 : static bool
265 185127 : astreamer_tar_header(astreamer_tar_parser *mystreamer)
266 : {
267 185127 : bool has_nonzero_byte = false;
268 : int i;
269 185127 : astreamer_member *member = &mystreamer->member;
270 185127 : char *buffer = mystreamer->base.bbs_buffer.data;
271 :
272 : Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
273 :
274 : /* Check whether we've got a block of all zero bytes. */
275 295207 : for (i = 0; i < TAR_BLOCK_SIZE; ++i)
276 : {
277 294992 : if (buffer[i] != '\0')
278 : {
279 184912 : has_nonzero_byte = true;
280 184912 : break;
281 : }
282 : }
283 :
284 : /*
285 : * If the entire block was zeros, this is the end of the archive, not the
286 : * start of the next file.
287 : */
288 185127 : if (!has_nonzero_byte)
289 215 : return false;
290 :
291 : /*
292 : * Parse key fields out of the header.
293 : */
294 184912 : strlcpy(member->pathname, &buffer[TAR_OFFSET_NAME], MAXPGPATH);
295 184912 : if (member->pathname[0] == '\0')
296 0 : pg_fatal("tar member has empty name");
297 184912 : member->size = read_tar_number(&buffer[TAR_OFFSET_SIZE], 12);
298 184912 : member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
299 184912 : member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
300 184912 : member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
301 184912 : member->is_directory =
302 184912 : (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_DIRECTORY);
303 184912 : member->is_link =
304 184912 : (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_SYMLINK);
305 184912 : if (member->is_link)
306 16 : strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
307 :
308 : /* Compute number of padding bytes. */
309 184912 : mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
310 :
311 : /* Forward the entire header to the next astreamer. */
312 184912 : astreamer_content(mystreamer->base.bbs_next, member,
313 : buffer, TAR_BLOCK_SIZE,
314 : ASTREAMER_MEMBER_HEADER);
315 :
316 184912 : return true;
317 : }
318 :
319 : /*
320 : * End-of-stream processing for a tar parser.
321 : */
322 : static void
323 215 : astreamer_tar_parser_finalize(astreamer *streamer)
324 : {
325 215 : astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
326 :
327 215 : if (mystreamer->next_context != ASTREAMER_ARCHIVE_TRAILER &&
328 0 : (mystreamer->next_context != ASTREAMER_MEMBER_HEADER ||
329 0 : mystreamer->base.bbs_buffer.len > 0))
330 0 : pg_fatal("COPY stream ended before last file was finished");
331 :
332 : /* Send the archive trailer, even if empty. */
333 215 : astreamer_content(streamer->bbs_next, NULL,
334 215 : streamer->bbs_buffer.data, streamer->bbs_buffer.len,
335 : ASTREAMER_ARCHIVE_TRAILER);
336 :
337 : /* Now finalize successor. */
338 215 : astreamer_finalize(streamer->bbs_next);
339 215 : }
340 :
341 : /*
342 : * Free memory associated with a tar parser.
343 : */
344 : static void
345 249 : astreamer_tar_parser_free(astreamer *streamer)
346 : {
347 249 : pfree(streamer->bbs_buffer.data);
348 249 : astreamer_free(streamer->bbs_next);
349 249 : }
350 :
351 : /*
352 : * Create a astreamer that can generate a tar archive.
353 : *
354 : * This is intended to be usable either for generating a brand-new tar archive
355 : * or for modifying one on the fly. The input should be a series of typed
356 : * chunks (i.e. not ASTREAMER_UNKNOWN). See also the comments for
357 : * astreamer_tar_parser_content.
358 : */
359 : astreamer *
360 0 : astreamer_tar_archiver_new(astreamer *next)
361 : {
362 : astreamer_tar_archiver *streamer;
363 :
364 0 : streamer = palloc0_object(astreamer_tar_archiver);
365 0 : *((const astreamer_ops **) &streamer->base.bbs_ops) =
366 : &astreamer_tar_archiver_ops;
367 0 : streamer->base.bbs_next = next;
368 :
369 0 : return &streamer->base;
370 : }
371 :
372 : /*
373 : * Fix up the stream of input chunks to create a valid tar file.
374 : *
375 : * If a ASTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
376 : * newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
377 : * passed through without change. Any other size is a fatal error (and
378 : * indicates a bug).
379 : *
380 : * Whenever a new ASTREAMER_MEMBER_HEADER chunk is constructed, the
381 : * corresponding ASTREAMER_MEMBER_TRAILER chunk is also constructed from
382 : * scratch. Specifically, we construct a block of zero bytes sufficient to
383 : * pad out to a block boundary, as required by the tar format. Other
384 : * ASTREAMER_MEMBER_TRAILER chunks are passed through without change.
385 : *
386 : * Any ASTREAMER_MEMBER_CONTENTS chunks are passed through without change.
387 : *
388 : * The ASTREAMER_ARCHIVE_TRAILER chunk is replaced with two
389 : * blocks of zero bytes. Not all tar programs require this, but apparently
390 : * some do. The server does not supply this trailer. If no archive trailer is
391 : * present, one will be added by astreamer_tar_parser_finalize.
392 : */
393 : static void
394 0 : astreamer_tar_archiver_content(astreamer *streamer,
395 : astreamer_member *member,
396 : const char *data, int len,
397 : astreamer_archive_context context)
398 : {
399 0 : astreamer_tar_archiver *mystreamer = (astreamer_tar_archiver *) streamer;
400 : char buffer[2 * TAR_BLOCK_SIZE];
401 :
402 : Assert(context != ASTREAMER_UNKNOWN);
403 :
404 0 : if (context == ASTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE)
405 : {
406 : Assert(len == 0);
407 :
408 : /* Replace zero-length tar header with a newly constructed one. */
409 0 : tarCreateHeader(buffer, member->pathname, NULL,
410 : member->size, member->mode, member->uid, member->gid,
411 : time(NULL));
412 0 : data = buffer;
413 0 : len = TAR_BLOCK_SIZE;
414 :
415 : /* Also make a note to replace padding, in case size changed. */
416 0 : mystreamer->rearchive_member = true;
417 : }
418 0 : else if (context == ASTREAMER_MEMBER_TRAILER &&
419 0 : mystreamer->rearchive_member)
420 0 : {
421 0 : int pad_bytes = tarPaddingBytesRequired(member->size);
422 :
423 : /* Also replace padding, if we regenerated the header. */
424 0 : memset(buffer, 0, pad_bytes);
425 0 : data = buffer;
426 0 : len = pad_bytes;
427 :
428 : /* Don't do this again unless we replace another header. */
429 0 : mystreamer->rearchive_member = false;
430 : }
431 0 : else if (context == ASTREAMER_ARCHIVE_TRAILER)
432 : {
433 : /* Trailer should always be two blocks of zero bytes. */
434 0 : memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
435 0 : data = buffer;
436 0 : len = 2 * TAR_BLOCK_SIZE;
437 : }
438 :
439 0 : astreamer_content(streamer->bbs_next, member, data, len, context);
440 0 : }
441 :
442 : /*
443 : * End-of-stream processing for a tar archiver.
444 : */
445 : static void
446 0 : astreamer_tar_archiver_finalize(astreamer *streamer)
447 : {
448 0 : astreamer_finalize(streamer->bbs_next);
449 0 : }
450 :
451 : /*
452 : * Free memory associated with a tar archiver.
453 : */
454 : static void
455 0 : astreamer_tar_archiver_free(astreamer *streamer)
456 : {
457 0 : astreamer_free(streamer->bbs_next);
458 0 : pfree(streamer);
459 0 : }
460 :
461 : /*
462 : * Create a astreamer that blindly adds two blocks of NUL bytes to the
463 : * end of an incomplete tarfile that the server might send us.
464 : */
465 : astreamer *
466 0 : astreamer_tar_terminator_new(astreamer *next)
467 : {
468 : astreamer *streamer;
469 :
470 0 : streamer = palloc0_object(astreamer);
471 0 : *((const astreamer_ops **) &streamer->bbs_ops) =
472 : &astreamer_tar_terminator_ops;
473 0 : streamer->bbs_next = next;
474 :
475 0 : return streamer;
476 : }
477 :
478 : /*
479 : * Pass all the content through without change.
480 : */
481 : static void
482 0 : astreamer_tar_terminator_content(astreamer *streamer,
483 : astreamer_member *member,
484 : const char *data, int len,
485 : astreamer_archive_context context)
486 : {
487 : /* Expect unparsed input. */
488 : Assert(member == NULL);
489 : Assert(context == ASTREAMER_UNKNOWN);
490 :
491 : /* Just forward it. */
492 0 : astreamer_content(streamer->bbs_next, member, data, len, context);
493 0 : }
494 :
495 : /*
496 : * At the end, blindly add the two blocks of NUL bytes which the server fails
497 : * to supply.
498 : */
499 : static void
500 0 : astreamer_tar_terminator_finalize(astreamer *streamer)
501 : {
502 : char buffer[2 * TAR_BLOCK_SIZE];
503 :
504 0 : memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
505 0 : astreamer_content(streamer->bbs_next, NULL, buffer,
506 : 2 * TAR_BLOCK_SIZE, ASTREAMER_UNKNOWN);
507 0 : astreamer_finalize(streamer->bbs_next);
508 0 : }
509 :
510 : /*
511 : * Free memory associated with a tar terminator.
512 : */
513 : static void
514 0 : astreamer_tar_terminator_free(astreamer *streamer)
515 : {
516 0 : astreamer_free(streamer->bbs_next);
517 0 : pfree(streamer);
518 0 : }
|