LCOV - code coverage report
Current view: top level - src/fe_utils - astreamer_tar.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 58.7 % 143 84
Test Date: 2026-03-01 16:14:42 Functions: 38.5 % 13 5
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * astreamer_tar.c
       4              :  *
       5              :  * This module implements three types of tar processing. A tar parser
       6              :  * expects unlabelled chunks of data (e.g. ASTREAMER_UNKNOWN) and splits
       7              :  * it into labelled chunks (any other value of astreamer_archive_context).
       8              :  * A tar archiver does the reverse: it takes a bunch of labelled chunks
       9              :  * and produces a tarfile, optionally replacing member headers and trailers
      10              :  * so that upstream astreamer objects can perform surgery on the tarfile
      11              :  * contents without knowing the details of the tar format. A tar terminator
      12              :  * just adds two blocks of NUL bytes to the end of the file, since older
      13              :  * server versions produce files with this terminator omitted.
      14              :  *
      15              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      16              :  *
      17              :  * IDENTIFICATION
      18              :  *        src/fe_utils/astreamer_tar.c
      19              :  *-------------------------------------------------------------------------
      20              :  */
      21              : 
      22              : #include "postgres_fe.h"
      23              : 
      24              : #include <time.h>
      25              : 
      26              : #include "common/logging.h"
      27              : #include "fe_utils/astreamer.h"
      28              : #include "pgtar.h"
      29              : 
      30              : typedef struct astreamer_tar_parser
      31              : {
      32              :     astreamer   base;
      33              :     astreamer_archive_context next_context;
      34              :     astreamer_member member;
      35              :     size_t      file_bytes_sent;
      36              :     size_t      pad_bytes_expected;
      37              : } astreamer_tar_parser;
      38              : 
      39              : typedef struct astreamer_tar_archiver
      40              : {
      41              :     astreamer   base;
      42              :     bool        rearchive_member;
      43              : } astreamer_tar_archiver;
      44              : 
      45              : static void astreamer_tar_parser_content(astreamer *streamer,
      46              :                                          astreamer_member *member,
      47              :                                          const char *data, int len,
      48              :                                          astreamer_archive_context context);
      49              : static void astreamer_tar_parser_finalize(astreamer *streamer);
      50              : static void astreamer_tar_parser_free(astreamer *streamer);
      51              : static bool astreamer_tar_header(astreamer_tar_parser *mystreamer);
      52              : 
      53              : static const astreamer_ops astreamer_tar_parser_ops = {
      54              :     .content = astreamer_tar_parser_content,
      55              :     .finalize = astreamer_tar_parser_finalize,
      56              :     .free = astreamer_tar_parser_free
      57              : };
      58              : 
      59              : static void astreamer_tar_archiver_content(astreamer *streamer,
      60              :                                            astreamer_member *member,
      61              :                                            const char *data, int len,
      62              :                                            astreamer_archive_context context);
      63              : static void astreamer_tar_archiver_finalize(astreamer *streamer);
      64              : static void astreamer_tar_archiver_free(astreamer *streamer);
      65              : 
      66              : static const astreamer_ops astreamer_tar_archiver_ops = {
      67              :     .content = astreamer_tar_archiver_content,
      68              :     .finalize = astreamer_tar_archiver_finalize,
      69              :     .free = astreamer_tar_archiver_free
      70              : };
      71              : 
      72              : static void astreamer_tar_terminator_content(astreamer *streamer,
      73              :                                              astreamer_member *member,
      74              :                                              const char *data, int len,
      75              :                                              astreamer_archive_context context);
      76              : static void astreamer_tar_terminator_finalize(astreamer *streamer);
      77              : static void astreamer_tar_terminator_free(astreamer *streamer);
      78              : 
      79              : static const astreamer_ops astreamer_tar_terminator_ops = {
      80              :     .content = astreamer_tar_terminator_content,
      81              :     .finalize = astreamer_tar_terminator_finalize,
      82              :     .free = astreamer_tar_terminator_free
      83              : };
      84              : 
      85              : /*
      86              :  * Create a astreamer that can parse a stream of content as tar data.
      87              :  *
      88              :  * The input should be a series of ASTREAMER_UNKNOWN chunks; the astreamer
      89              :  * specified by 'next' will receive a series of typed chunks, as per the
      90              :  * conventions described in astreamer.h.
      91              :  */
      92              : astreamer *
      93          203 : astreamer_tar_parser_new(astreamer *next)
      94              : {
      95              :     astreamer_tar_parser *streamer;
      96              : 
      97          203 :     streamer = palloc0_object(astreamer_tar_parser);
      98          203 :     *((const astreamer_ops **) &streamer->base.bbs_ops) =
      99              :         &astreamer_tar_parser_ops;
     100          203 :     streamer->base.bbs_next = next;
     101          203 :     initStringInfo(&streamer->base.bbs_buffer);
     102          203 :     streamer->next_context = ASTREAMER_MEMBER_HEADER;
     103              : 
     104          203 :     return &streamer->base;
     105              : }
     106              : 
     107              : /*
     108              :  * Parse unknown content as tar data.
     109              :  */
     110              : static void
     111       646048 : astreamer_tar_parser_content(astreamer *streamer, astreamer_member *member,
     112              :                              const char *data, int len,
     113              :                              astreamer_archive_context context)
     114              : {
     115       646048 :     astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
     116              :     size_t      nbytes;
     117              : 
     118              :     /* Expect unparsed input. */
     119              :     Assert(member == NULL);
     120              :     Assert(context == ASTREAMER_UNKNOWN);
     121              : 
     122      1326541 :     while (len > 0)
     123              :     {
     124       680699 :         switch (mystreamer->next_context)
     125              :         {
     126       165288 :             case ASTREAMER_MEMBER_HEADER:
     127              : 
     128              :                 /*
     129              :                  * If we're expecting an archive member header, accumulate a
     130              :                  * full block of data before doing anything further.
     131              :                  */
     132       165288 :                 if (!astreamer_buffer_until(streamer, &data, &len,
     133              :                                             TAR_BLOCK_SIZE))
     134            0 :                     return;
     135              : 
     136              :                 /*
     137              :                  * Now we can process the header and get ready to process the
     138              :                  * file contents; however, we might find out that what we
     139              :                  * thought was the next file header is actually the start of
     140              :                  * the archive trailer. Switch modes accordingly.
     141              :                  */
     142       165288 :                 if (astreamer_tar_header(mystreamer))
     143              :                 {
     144       165088 :                     if (mystreamer->member.size == 0)
     145              :                     {
     146              :                         /* No content; trailer is zero-length. */
     147        32416 :                         astreamer_content(mystreamer->base.bbs_next,
     148              :                                           &mystreamer->member,
     149              :                                           NULL, 0,
     150              :                                           ASTREAMER_MEMBER_TRAILER);
     151              : 
     152              :                         /* Expect next header. */
     153        32416 :                         mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
     154              :                     }
     155              :                     else
     156              :                     {
     157              :                         /* Expect contents. */
     158       132672 :                         mystreamer->next_context = ASTREAMER_MEMBER_CONTENTS;
     159              :                     }
     160       165088 :                     mystreamer->base.bbs_buffer.len = 0;
     161       165088 :                     mystreamer->file_bytes_sent = 0;
     162              :                 }
     163              :                 else
     164          200 :                     mystreamer->next_context = ASTREAMER_ARCHIVE_TRAILER;
     165       165288 :                 break;
     166              : 
     167       505556 :             case ASTREAMER_MEMBER_CONTENTS:
     168              : 
     169              :                 /*
     170              :                  * Send as much content as we have, but not more than the
     171              :                  * remaining file length.
     172              :                  */
     173              :                 Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
     174       505556 :                 nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
     175       505556 :                 nbytes = Min(nbytes, len);
     176              :                 Assert(nbytes > 0);
     177       505556 :                 astreamer_content(mystreamer->base.bbs_next,
     178              :                                   &mystreamer->member,
     179              :                                   data, nbytes,
     180              :                                   ASTREAMER_MEMBER_CONTENTS);
     181       505556 :                 mystreamer->file_bytes_sent += nbytes;
     182       505556 :                 data += nbytes;
     183       505556 :                 len -= nbytes;
     184              : 
     185              :                 /*
     186              :                  * If we've not yet sent the whole file, then there's more
     187              :                  * content to come; otherwise, it's time to expect the file
     188              :                  * trailer.
     189              :                  */
     190              :                 Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
     191       505556 :                 if (mystreamer->file_bytes_sent == mystreamer->member.size)
     192              :                 {
     193       132670 :                     if (mystreamer->pad_bytes_expected == 0)
     194              :                     {
     195              :                         /* Trailer is zero-length. */
     196       123020 :                         astreamer_content(mystreamer->base.bbs_next,
     197              :                                           &mystreamer->member,
     198              :                                           NULL, 0,
     199              :                                           ASTREAMER_MEMBER_TRAILER);
     200              : 
     201              :                         /* Expect next header. */
     202       123019 :                         mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
     203              :                     }
     204              :                     else
     205              :                     {
     206              :                         /* Trailer is not zero-length. */
     207         9650 :                         mystreamer->next_context = ASTREAMER_MEMBER_TRAILER;
     208              :                     }
     209       132669 :                     mystreamer->base.bbs_buffer.len = 0;
     210              :                 }
     211       505555 :                 break;
     212              : 
     213         9650 :             case ASTREAMER_MEMBER_TRAILER:
     214              : 
     215              :                 /*
     216              :                  * If we're expecting an archive member trailer, accumulate
     217              :                  * the expected number of padding bytes before sending
     218              :                  * anything onward.
     219              :                  */
     220         9650 :                 if (!astreamer_buffer_until(streamer, &data, &len,
     221         9650 :                                             mystreamer->pad_bytes_expected))
     222            0 :                     return;
     223              : 
     224              :                 /* OK, now we can send it. */
     225         9650 :                 astreamer_content(mystreamer->base.bbs_next,
     226              :                                   &mystreamer->member,
     227         9650 :                                   data, mystreamer->pad_bytes_expected,
     228              :                                   ASTREAMER_MEMBER_TRAILER);
     229              : 
     230              :                 /* Expect next file header. */
     231         9650 :                 mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
     232         9650 :                 mystreamer->base.bbs_buffer.len = 0;
     233         9650 :                 break;
     234              : 
     235          205 :             case ASTREAMER_ARCHIVE_TRAILER:
     236              : 
     237              :                 /*
     238              :                  * We've seen an end-of-archive indicator, so anything more is
     239              :                  * buffered and sent as part of the archive trailer. But we
     240              :                  * don't expect more than 2 blocks.
     241              :                  */
     242          205 :                 astreamer_buffer_bytes(streamer, &data, &len, len);
     243          205 :                 if (len > 2 * TAR_BLOCK_SIZE)
     244            0 :                     pg_fatal("tar file trailer exceeds 2 blocks");
     245          205 :                 return;
     246              : 
     247            0 :             default:
     248              :                 /* Shouldn't happen. */
     249            0 :                 pg_fatal("unexpected state while parsing tar archive");
     250              :         }
     251              :     }
     252              : }
     253              : 
     254              : /*
     255              :  * Parse a file header within a tar stream.
     256              :  *
     257              :  * The return value is true if we found a file header and passed it on to the
     258              :  * next astreamer; it is false if we have reached the archive trailer.
     259              :  */
     260              : static bool
     261       165288 : astreamer_tar_header(astreamer_tar_parser *mystreamer)
     262              : {
     263       165288 :     bool        has_nonzero_byte = false;
     264              :     int         i;
     265       165288 :     astreamer_member *member = &mystreamer->member;
     266       165288 :     char       *buffer = mystreamer->base.bbs_buffer.data;
     267              : 
     268              :     Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
     269              : 
     270              :     /* Check whether we've got a block of all zero bytes. */
     271       267688 :     for (i = 0; i < TAR_BLOCK_SIZE; ++i)
     272              :     {
     273       267488 :         if (buffer[i] != '\0')
     274              :         {
     275       165088 :             has_nonzero_byte = true;
     276       165088 :             break;
     277              :         }
     278              :     }
     279              : 
     280              :     /*
     281              :      * If the entire block was zeros, this is the end of the archive, not the
     282              :      * start of the next file.
     283              :      */
     284       165288 :     if (!has_nonzero_byte)
     285          200 :         return false;
     286              : 
     287              :     /*
     288              :      * Parse key fields out of the header.
     289              :      */
     290       165088 :     strlcpy(member->pathname, &buffer[TAR_OFFSET_NAME], MAXPGPATH);
     291       165088 :     if (member->pathname[0] == '\0')
     292            0 :         pg_fatal("tar member has empty name");
     293       165088 :     member->size = read_tar_number(&buffer[TAR_OFFSET_SIZE], 12);
     294       165088 :     member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
     295       165088 :     member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
     296       165088 :     member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
     297       165088 :     member->is_directory =
     298       165088 :         (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_DIRECTORY);
     299       165088 :     member->is_link =
     300       165088 :         (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_SYMLINK);
     301       165088 :     if (member->is_link)
     302           16 :         strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
     303              : 
     304              :     /* Compute number of padding bytes. */
     305       165088 :     mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
     306              : 
     307              :     /* Forward the entire header to the next astreamer. */
     308       165088 :     astreamer_content(mystreamer->base.bbs_next, member,
     309              :                       buffer, TAR_BLOCK_SIZE,
     310              :                       ASTREAMER_MEMBER_HEADER);
     311              : 
     312       165088 :     return true;
     313              : }
     314              : 
     315              : /*
     316              :  * End-of-stream processing for a tar parser.
     317              :  */
     318              : static void
     319          200 : astreamer_tar_parser_finalize(astreamer *streamer)
     320              : {
     321          200 :     astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
     322              : 
     323          200 :     if (mystreamer->next_context != ASTREAMER_ARCHIVE_TRAILER &&
     324            0 :         (mystreamer->next_context != ASTREAMER_MEMBER_HEADER ||
     325            0 :          mystreamer->base.bbs_buffer.len > 0))
     326            0 :         pg_fatal("COPY stream ended before last file was finished");
     327              : 
     328              :     /* Send the archive trailer, even if empty. */
     329          200 :     astreamer_content(streamer->bbs_next, NULL,
     330          200 :                       streamer->bbs_buffer.data, streamer->bbs_buffer.len,
     331              :                       ASTREAMER_ARCHIVE_TRAILER);
     332              : 
     333              :     /* Now finalize successor. */
     334          200 :     astreamer_finalize(streamer->bbs_next);
     335          200 : }
     336              : 
     337              : /*
     338              :  * Free memory associated with a tar parser.
     339              :  */
     340              : static void
     341          200 : astreamer_tar_parser_free(astreamer *streamer)
     342              : {
     343          200 :     pfree(streamer->bbs_buffer.data);
     344          200 :     astreamer_free(streamer->bbs_next);
     345          200 : }
     346              : 
     347              : /*
     348              :  * Create a astreamer that can generate a tar archive.
     349              :  *
     350              :  * This is intended to be usable either for generating a brand-new tar archive
     351              :  * or for modifying one on the fly. The input should be a series of typed
     352              :  * chunks (i.e. not ASTREAMER_UNKNOWN). See also the comments for
     353              :  * astreamer_tar_parser_content.
     354              :  */
     355              : astreamer *
     356            0 : astreamer_tar_archiver_new(astreamer *next)
     357              : {
     358              :     astreamer_tar_archiver *streamer;
     359              : 
     360            0 :     streamer = palloc0_object(astreamer_tar_archiver);
     361            0 :     *((const astreamer_ops **) &streamer->base.bbs_ops) =
     362              :         &astreamer_tar_archiver_ops;
     363            0 :     streamer->base.bbs_next = next;
     364              : 
     365            0 :     return &streamer->base;
     366              : }
     367              : 
     368              : /*
     369              :  * Fix up the stream of input chunks to create a valid tar file.
     370              :  *
     371              :  * If a ASTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
     372              :  * newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
     373              :  * passed through without change. Any other size is a fatal error (and
     374              :  * indicates a bug).
     375              :  *
     376              :  * Whenever a new ASTREAMER_MEMBER_HEADER chunk is constructed, the
     377              :  * corresponding ASTREAMER_MEMBER_TRAILER chunk is also constructed from
     378              :  * scratch. Specifically, we construct a block of zero bytes sufficient to
     379              :  * pad out to a block boundary, as required by the tar format. Other
     380              :  * ASTREAMER_MEMBER_TRAILER chunks are passed through without change.
     381              :  *
     382              :  * Any ASTREAMER_MEMBER_CONTENTS chunks are passed through without change.
     383              :  *
     384              :  * The ASTREAMER_ARCHIVE_TRAILER chunk is replaced with two
     385              :  * blocks of zero bytes. Not all tar programs require this, but apparently
     386              :  * some do. The server does not supply this trailer. If no archive trailer is
     387              :  * present, one will be added by astreamer_tar_parser_finalize.
     388              :  */
     389              : static void
     390            0 : astreamer_tar_archiver_content(astreamer *streamer,
     391              :                                astreamer_member *member,
     392              :                                const char *data, int len,
     393              :                                astreamer_archive_context context)
     394              : {
     395            0 :     astreamer_tar_archiver *mystreamer = (astreamer_tar_archiver *) streamer;
     396              :     char        buffer[2 * TAR_BLOCK_SIZE];
     397              : 
     398              :     Assert(context != ASTREAMER_UNKNOWN);
     399              : 
     400            0 :     if (context == ASTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE)
     401              :     {
     402              :         Assert(len == 0);
     403              : 
     404              :         /* Replace zero-length tar header with a newly constructed one. */
     405            0 :         tarCreateHeader(buffer, member->pathname, NULL,
     406              :                         member->size, member->mode, member->uid, member->gid,
     407              :                         time(NULL));
     408            0 :         data = buffer;
     409            0 :         len = TAR_BLOCK_SIZE;
     410              : 
     411              :         /* Also make a note to replace padding, in case size changed. */
     412            0 :         mystreamer->rearchive_member = true;
     413              :     }
     414            0 :     else if (context == ASTREAMER_MEMBER_TRAILER &&
     415            0 :              mystreamer->rearchive_member)
     416            0 :     {
     417            0 :         int         pad_bytes = tarPaddingBytesRequired(member->size);
     418              : 
     419              :         /* Also replace padding, if we regenerated the header. */
     420            0 :         memset(buffer, 0, pad_bytes);
     421            0 :         data = buffer;
     422            0 :         len = pad_bytes;
     423              : 
     424              :         /* Don't do this again unless we replace another header. */
     425            0 :         mystreamer->rearchive_member = false;
     426              :     }
     427            0 :     else if (context == ASTREAMER_ARCHIVE_TRAILER)
     428              :     {
     429              :         /* Trailer should always be two blocks of zero bytes. */
     430            0 :         memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
     431            0 :         data = buffer;
     432            0 :         len = 2 * TAR_BLOCK_SIZE;
     433              :     }
     434              : 
     435            0 :     astreamer_content(streamer->bbs_next, member, data, len, context);
     436            0 : }
     437              : 
     438              : /*
     439              :  * End-of-stream processing for a tar archiver.
     440              :  */
     441              : static void
     442            0 : astreamer_tar_archiver_finalize(astreamer *streamer)
     443              : {
     444            0 :     astreamer_finalize(streamer->bbs_next);
     445            0 : }
     446              : 
     447              : /*
     448              :  * Free memory associated with a tar archiver.
     449              :  */
     450              : static void
     451            0 : astreamer_tar_archiver_free(astreamer *streamer)
     452              : {
     453            0 :     astreamer_free(streamer->bbs_next);
     454            0 :     pfree(streamer);
     455            0 : }
     456              : 
     457              : /*
     458              :  * Create a astreamer that blindly adds two blocks of NUL bytes to the
     459              :  * end of an incomplete tarfile that the server might send us.
     460              :  */
     461              : astreamer *
     462            0 : astreamer_tar_terminator_new(astreamer *next)
     463              : {
     464              :     astreamer  *streamer;
     465              : 
     466            0 :     streamer = palloc0_object(astreamer);
     467            0 :     *((const astreamer_ops **) &streamer->bbs_ops) =
     468              :         &astreamer_tar_terminator_ops;
     469            0 :     streamer->bbs_next = next;
     470              : 
     471            0 :     return streamer;
     472              : }
     473              : 
     474              : /*
     475              :  * Pass all the content through without change.
     476              :  */
     477              : static void
     478            0 : astreamer_tar_terminator_content(astreamer *streamer,
     479              :                                  astreamer_member *member,
     480              :                                  const char *data, int len,
     481              :                                  astreamer_archive_context context)
     482              : {
     483              :     /* Expect unparsed input. */
     484              :     Assert(member == NULL);
     485              :     Assert(context == ASTREAMER_UNKNOWN);
     486              : 
     487              :     /* Just forward it. */
     488            0 :     astreamer_content(streamer->bbs_next, member, data, len, context);
     489            0 : }
     490              : 
     491              : /*
     492              :  * At the end, blindly add the two blocks of NUL bytes which the server fails
     493              :  * to supply.
     494              :  */
     495              : static void
     496            0 : astreamer_tar_terminator_finalize(astreamer *streamer)
     497              : {
     498              :     char        buffer[2 * TAR_BLOCK_SIZE];
     499              : 
     500            0 :     memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
     501            0 :     astreamer_content(streamer->bbs_next, NULL, buffer,
     502              :                       2 * TAR_BLOCK_SIZE, ASTREAMER_UNKNOWN);
     503            0 :     astreamer_finalize(streamer->bbs_next);
     504            0 : }
     505              : 
     506              : /*
     507              :  * Free memory associated with a tar terminator.
     508              :  */
     509              : static void
     510            0 : astreamer_tar_terminator_free(astreamer *streamer)
     511              : {
     512            0 :     astreamer_free(streamer->bbs_next);
     513            0 :     pfree(streamer);
     514            0 : }
        

Generated by: LCOV version 2.0-1