LCOV - code coverage report
Current view: top level - src/fe_utils - astreamer_tar.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 58.9 % 141 83
Test Date: 2026-03-27 22:16:19 Functions: 38.5 % 13 5
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * astreamer_tar.c
       4              :  *
       5              :  * This module implements three types of tar processing. A tar parser
       6              :  * expects unlabelled chunks of data (e.g. ASTREAMER_UNKNOWN) and splits
       7              :  * it into labelled chunks (any other value of astreamer_archive_context).
       8              :  * A tar archiver does the reverse: it takes a bunch of labelled chunks
       9              :  * and produces a tarfile, optionally replacing member headers and trailers
      10              :  * so that upstream astreamer objects can perform surgery on the tarfile
      11              :  * contents without knowing the details of the tar format. A tar terminator
      12              :  * just adds two blocks of NUL bytes to the end of the file, since older
      13              :  * server versions produce files with this terminator omitted.
      14              :  *
      15              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      16              :  *
      17              :  * IDENTIFICATION
      18              :  *        src/fe_utils/astreamer_tar.c
      19              :  *-------------------------------------------------------------------------
      20              :  */
      21              : 
      22              : #include "postgres_fe.h"
      23              : 
      24              : #include <time.h>
      25              : 
      26              : #include "common/logging.h"
      27              : #include "fe_utils/astreamer.h"
      28              : #include "pgtar.h"
      29              : 
      30              : typedef struct astreamer_tar_parser
      31              : {
      32              :     astreamer   base;
      33              :     astreamer_archive_context next_context;
      34              :     astreamer_member member;
      35              :     size_t      file_bytes_sent;
      36              :     size_t      pad_bytes_expected;
      37              : } astreamer_tar_parser;
      38              : 
      39              : typedef struct astreamer_tar_archiver
      40              : {
      41              :     astreamer   base;
      42              :     bool        rearchive_member;
      43              : } astreamer_tar_archiver;
      44              : 
      45              : static void astreamer_tar_parser_content(astreamer *streamer,
      46              :                                          astreamer_member *member,
      47              :                                          const char *data, int len,
      48              :                                          astreamer_archive_context context);
      49              : static void astreamer_tar_parser_finalize(astreamer *streamer);
      50              : static void astreamer_tar_parser_free(astreamer *streamer);
      51              : static bool astreamer_tar_header(astreamer_tar_parser *mystreamer);
      52              : 
      53              : static const astreamer_ops astreamer_tar_parser_ops = {
      54              :     .content = astreamer_tar_parser_content,
      55              :     .finalize = astreamer_tar_parser_finalize,
      56              :     .free = astreamer_tar_parser_free
      57              : };
      58              : 
      59              : static void astreamer_tar_archiver_content(astreamer *streamer,
      60              :                                            astreamer_member *member,
      61              :                                            const char *data, int len,
      62              :                                            astreamer_archive_context context);
      63              : static void astreamer_tar_archiver_finalize(astreamer *streamer);
      64              : static void astreamer_tar_archiver_free(astreamer *streamer);
      65              : 
      66              : static const astreamer_ops astreamer_tar_archiver_ops = {
      67              :     .content = astreamer_tar_archiver_content,
      68              :     .finalize = astreamer_tar_archiver_finalize,
      69              :     .free = astreamer_tar_archiver_free
      70              : };
      71              : 
      72              : static void astreamer_tar_terminator_content(astreamer *streamer,
      73              :                                              astreamer_member *member,
      74              :                                              const char *data, int len,
      75              :                                              astreamer_archive_context context);
      76              : static void astreamer_tar_terminator_finalize(astreamer *streamer);
      77              : static void astreamer_tar_terminator_free(astreamer *streamer);
      78              : 
      79              : static const astreamer_ops astreamer_tar_terminator_ops = {
      80              :     .content = astreamer_tar_terminator_content,
      81              :     .finalize = astreamer_tar_terminator_finalize,
      82              :     .free = astreamer_tar_terminator_free
      83              : };
      84              : 
      85              : /*
      86              :  * Create a astreamer that can parse a stream of content as tar data.
      87              :  *
      88              :  * The input should be a series of ASTREAMER_UNKNOWN chunks; the astreamer
      89              :  * specified by 'next' will receive a series of typed chunks, as per the
      90              :  * conventions described in astreamer.h.
      91              :  */
      92              : astreamer *
      93          256 : astreamer_tar_parser_new(astreamer *next)
      94              : {
      95              :     astreamer_tar_parser *streamer;
      96              : 
      97          256 :     streamer = palloc0_object(astreamer_tar_parser);
      98          256 :     *((const astreamer_ops **) &streamer->base.bbs_ops) =
      99              :         &astreamer_tar_parser_ops;
     100          256 :     streamer->base.bbs_next = next;
     101          256 :     initStringInfo(&streamer->base.bbs_buffer);
     102          256 :     streamer->next_context = ASTREAMER_MEMBER_HEADER;
     103              : 
     104          256 :     return &streamer->base;
     105              : }
     106              : 
     107              : /*
     108              :  * Parse unknown content as tar data.
     109              :  */
     110              : static void
     111       346575 : astreamer_tar_parser_content(astreamer *streamer, astreamer_member *member,
     112              :                              const char *data, int len,
     113              :                              astreamer_archive_context context)
     114              : {
     115       346575 :     astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
     116              :     size_t      nbytes;
     117              : 
     118              :     /* Expect unparsed input. */
     119              :     Assert(member == NULL);
     120              :     Assert(context == ASTREAMER_UNKNOWN);
     121              : 
     122       754627 :     while (len > 0)
     123              :     {
     124       408268 :         switch (mystreamer->next_context)
     125              :         {
     126       185127 :             case ASTREAMER_MEMBER_HEADER:
     127              : 
     128              :                 /*
     129              :                  * If we're expecting an archive member header, accumulate a
     130              :                  * full block of data before doing anything further.
     131              :                  */
     132       185127 :                 if (!astreamer_buffer_until(streamer, &data, &len,
     133              :                                             TAR_BLOCK_SIZE))
     134            0 :                     return;
     135              : 
     136              :                 /*
     137              :                  * Now we can process the header and get ready to process the
     138              :                  * file contents; however, we might find out that what we
     139              :                  * thought was the next file header is actually the start of
     140              :                  * the archive trailer. Switch modes accordingly.
     141              :                  */
     142       185127 :                 if (astreamer_tar_header(mystreamer))
     143              :                 {
     144       184912 :                     if (mystreamer->member.size == 0)
     145              :                     {
     146              :                         /* No content; trailer is zero-length. */
     147        37968 :                         astreamer_content(mystreamer->base.bbs_next,
     148              :                                           &mystreamer->member,
     149              :                                           NULL, 0,
     150              :                                           ASTREAMER_MEMBER_TRAILER);
     151              : 
     152              :                         /* Expect next header. */
     153        37968 :                         mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
     154              :                     }
     155              :                     else
     156              :                     {
     157              :                         /* Expect contents. */
     158       146944 :                         mystreamer->next_context = ASTREAMER_MEMBER_CONTENTS;
     159              :                     }
     160       184912 :                     mystreamer->base.bbs_buffer.len = 0;
     161       184912 :                     mystreamer->file_bytes_sent = 0;
     162              :                 }
     163              :                 else
     164          215 :                     mystreamer->next_context = ASTREAMER_ARCHIVE_TRAILER;
     165       185127 :                 break;
     166              : 
     167       212549 :             case ASTREAMER_MEMBER_CONTENTS:
     168              : 
     169              :                 /*
     170              :                  * Send as much content as we have, but not more than the
     171              :                  * remaining file length.
     172              :                  */
     173              :                 Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
     174       212549 :                 nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
     175       212549 :                 nbytes = Min(nbytes, len);
     176              :                 Assert(nbytes > 0);
     177       212549 :                 astreamer_content(mystreamer->base.bbs_next,
     178              :                                   &mystreamer->member,
     179              :                                   data, nbytes,
     180              :                                   ASTREAMER_MEMBER_CONTENTS);
     181       212549 :                 mystreamer->file_bytes_sent += nbytes;
     182       212549 :                 data += nbytes;
     183       212549 :                 len -= nbytes;
     184              : 
     185              :                 /*
     186              :                  * If we've not yet sent the whole file, then there's more
     187              :                  * content to come; otherwise, it's time to expect the file
     188              :                  * trailer.
     189              :                  */
     190              :                 Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
     191       212549 :                 if (mystreamer->file_bytes_sent == mystreamer->member.size)
     192              :                 {
     193       146905 :                     if (mystreamer->pad_bytes_expected == 0)
     194              :                     {
     195              :                         /* Trailer is zero-length. */
     196       136528 :                         astreamer_content(mystreamer->base.bbs_next,
     197              :                                           &mystreamer->member,
     198              :                                           NULL, 0,
     199              :                                           ASTREAMER_MEMBER_TRAILER);
     200              : 
     201              :                         /* Expect next header. */
     202       136527 :                         mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
     203              :                     }
     204              :                     else
     205              :                     {
     206              :                         /* Trailer is not zero-length. */
     207        10377 :                         mystreamer->next_context = ASTREAMER_MEMBER_TRAILER;
     208              :                     }
     209       146904 :                     mystreamer->base.bbs_buffer.len = 0;
     210              :                 }
     211       212548 :                 break;
     212              : 
     213        10377 :             case ASTREAMER_MEMBER_TRAILER:
     214              : 
     215              :                 /*
     216              :                  * If we're expecting an archive member trailer, accumulate
     217              :                  * the expected number of padding bytes before sending
     218              :                  * anything onward.
     219              :                  */
     220        10377 :                 if (!astreamer_buffer_until(streamer, &data, &len,
     221        10377 :                                             mystreamer->pad_bytes_expected))
     222            0 :                     return;
     223              : 
     224              :                 /* OK, now we can send it. */
     225        10377 :                 astreamer_content(mystreamer->base.bbs_next,
     226              :                                   &mystreamer->member,
     227        10377 :                                   data, mystreamer->pad_bytes_expected,
     228              :                                   ASTREAMER_MEMBER_TRAILER);
     229              : 
     230              :                 /* Expect next file header. */
     231        10377 :                 mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
     232        10377 :                 mystreamer->base.bbs_buffer.len = 0;
     233        10377 :                 break;
     234              : 
     235          215 :             case ASTREAMER_ARCHIVE_TRAILER:
     236              : 
     237              :                 /*
     238              :                  * We've seen an end-of-archive indicator, so anything more is
     239              :                  * buffered and sent as part of the archive trailer.
     240              :                  *
     241              :                  * Per POSIX, the last physical block of a tar archive is
     242              :                  * always full-sized, so there may be undefined data after the
     243              :                  * two zero blocks that mark end-of-archive.  GNU tar, for
     244              :                  * example, zero-pads to a 10kB boundary by default.  We just
     245              :                  * buffer whatever we receive and pass it along at finalize
     246              :                  * time.
     247              :                  */
     248          215 :                 astreamer_buffer_bytes(streamer, &data, &len, len);
     249          215 :                 return;
     250              : 
     251            0 :             default:
     252              :                 /* Shouldn't happen. */
     253            0 :                 pg_fatal("unexpected state while parsing tar archive");
     254              :         }
     255              :     }
     256              : }
     257              : 
     258              : /*
     259              :  * Parse a file header within a tar stream.
     260              :  *
     261              :  * The return value is true if we found a file header and passed it on to the
     262              :  * next astreamer; it is false if we have reached the archive trailer.
     263              :  */
     264              : static bool
     265       185127 : astreamer_tar_header(astreamer_tar_parser *mystreamer)
     266              : {
     267       185127 :     bool        has_nonzero_byte = false;
     268              :     int         i;
     269       185127 :     astreamer_member *member = &mystreamer->member;
     270       185127 :     char       *buffer = mystreamer->base.bbs_buffer.data;
     271              : 
     272              :     Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
     273              : 
     274              :     /* Check whether we've got a block of all zero bytes. */
     275       295207 :     for (i = 0; i < TAR_BLOCK_SIZE; ++i)
     276              :     {
     277       294992 :         if (buffer[i] != '\0')
     278              :         {
     279       184912 :             has_nonzero_byte = true;
     280       184912 :             break;
     281              :         }
     282              :     }
     283              : 
     284              :     /*
     285              :      * If the entire block was zeros, this is the end of the archive, not the
     286              :      * start of the next file.
     287              :      */
     288       185127 :     if (!has_nonzero_byte)
     289          215 :         return false;
     290              : 
     291              :     /*
     292              :      * Parse key fields out of the header.
     293              :      */
     294       184912 :     strlcpy(member->pathname, &buffer[TAR_OFFSET_NAME], MAXPGPATH);
     295       184912 :     if (member->pathname[0] == '\0')
     296            0 :         pg_fatal("tar member has empty name");
     297       184912 :     member->size = read_tar_number(&buffer[TAR_OFFSET_SIZE], 12);
     298       184912 :     member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
     299       184912 :     member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
     300       184912 :     member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
     301       184912 :     member->is_directory =
     302       184912 :         (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_DIRECTORY);
     303       184912 :     member->is_link =
     304       184912 :         (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_SYMLINK);
     305       184912 :     if (member->is_link)
     306           16 :         strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
     307              : 
     308              :     /* Compute number of padding bytes. */
     309       184912 :     mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
     310              : 
     311              :     /* Forward the entire header to the next astreamer. */
     312       184912 :     astreamer_content(mystreamer->base.bbs_next, member,
     313              :                       buffer, TAR_BLOCK_SIZE,
     314              :                       ASTREAMER_MEMBER_HEADER);
     315              : 
     316       184912 :     return true;
     317              : }
     318              : 
     319              : /*
     320              :  * End-of-stream processing for a tar parser.
     321              :  */
     322              : static void
     323          215 : astreamer_tar_parser_finalize(astreamer *streamer)
     324              : {
     325          215 :     astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
     326              : 
     327          215 :     if (mystreamer->next_context != ASTREAMER_ARCHIVE_TRAILER &&
     328            0 :         (mystreamer->next_context != ASTREAMER_MEMBER_HEADER ||
     329            0 :          mystreamer->base.bbs_buffer.len > 0))
     330            0 :         pg_fatal("COPY stream ended before last file was finished");
     331              : 
     332              :     /* Send the archive trailer, even if empty. */
     333          215 :     astreamer_content(streamer->bbs_next, NULL,
     334          215 :                       streamer->bbs_buffer.data, streamer->bbs_buffer.len,
     335              :                       ASTREAMER_ARCHIVE_TRAILER);
     336              : 
     337              :     /* Now finalize successor. */
     338          215 :     astreamer_finalize(streamer->bbs_next);
     339          215 : }
     340              : 
     341              : /*
     342              :  * Free memory associated with a tar parser.
     343              :  */
     344              : static void
     345          249 : astreamer_tar_parser_free(astreamer *streamer)
     346              : {
     347          249 :     pfree(streamer->bbs_buffer.data);
     348          249 :     astreamer_free(streamer->bbs_next);
     349          249 : }
     350              : 
     351              : /*
     352              :  * Create a astreamer that can generate a tar archive.
     353              :  *
     354              :  * This is intended to be usable either for generating a brand-new tar archive
     355              :  * or for modifying one on the fly. The input should be a series of typed
     356              :  * chunks (i.e. not ASTREAMER_UNKNOWN). See also the comments for
     357              :  * astreamer_tar_parser_content.
     358              :  */
     359              : astreamer *
     360            0 : astreamer_tar_archiver_new(astreamer *next)
     361              : {
     362              :     astreamer_tar_archiver *streamer;
     363              : 
     364            0 :     streamer = palloc0_object(astreamer_tar_archiver);
     365            0 :     *((const astreamer_ops **) &streamer->base.bbs_ops) =
     366              :         &astreamer_tar_archiver_ops;
     367            0 :     streamer->base.bbs_next = next;
     368              : 
     369            0 :     return &streamer->base;
     370              : }
     371              : 
     372              : /*
     373              :  * Fix up the stream of input chunks to create a valid tar file.
     374              :  *
     375              :  * If a ASTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
     376              :  * newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
     377              :  * passed through without change. Any other size is a fatal error (and
     378              :  * indicates a bug).
     379              :  *
     380              :  * Whenever a new ASTREAMER_MEMBER_HEADER chunk is constructed, the
     381              :  * corresponding ASTREAMER_MEMBER_TRAILER chunk is also constructed from
     382              :  * scratch. Specifically, we construct a block of zero bytes sufficient to
     383              :  * pad out to a block boundary, as required by the tar format. Other
     384              :  * ASTREAMER_MEMBER_TRAILER chunks are passed through without change.
     385              :  *
     386              :  * Any ASTREAMER_MEMBER_CONTENTS chunks are passed through without change.
     387              :  *
     388              :  * The ASTREAMER_ARCHIVE_TRAILER chunk is replaced with two
     389              :  * blocks of zero bytes. Not all tar programs require this, but apparently
     390              :  * some do. The server does not supply this trailer. If no archive trailer is
     391              :  * present, one will be added by astreamer_tar_parser_finalize.
     392              :  */
     393              : static void
     394            0 : astreamer_tar_archiver_content(astreamer *streamer,
     395              :                                astreamer_member *member,
     396              :                                const char *data, int len,
     397              :                                astreamer_archive_context context)
     398              : {
     399            0 :     astreamer_tar_archiver *mystreamer = (astreamer_tar_archiver *) streamer;
     400              :     char        buffer[2 * TAR_BLOCK_SIZE];
     401              : 
     402              :     Assert(context != ASTREAMER_UNKNOWN);
     403              : 
     404            0 :     if (context == ASTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE)
     405              :     {
     406              :         Assert(len == 0);
     407              : 
     408              :         /* Replace zero-length tar header with a newly constructed one. */
     409            0 :         tarCreateHeader(buffer, member->pathname, NULL,
     410              :                         member->size, member->mode, member->uid, member->gid,
     411              :                         time(NULL));
     412            0 :         data = buffer;
     413            0 :         len = TAR_BLOCK_SIZE;
     414              : 
     415              :         /* Also make a note to replace padding, in case size changed. */
     416            0 :         mystreamer->rearchive_member = true;
     417              :     }
     418            0 :     else if (context == ASTREAMER_MEMBER_TRAILER &&
     419            0 :              mystreamer->rearchive_member)
     420            0 :     {
     421            0 :         int         pad_bytes = tarPaddingBytesRequired(member->size);
     422              : 
     423              :         /* Also replace padding, if we regenerated the header. */
     424            0 :         memset(buffer, 0, pad_bytes);
     425            0 :         data = buffer;
     426            0 :         len = pad_bytes;
     427              : 
     428              :         /* Don't do this again unless we replace another header. */
     429            0 :         mystreamer->rearchive_member = false;
     430              :     }
     431            0 :     else if (context == ASTREAMER_ARCHIVE_TRAILER)
     432              :     {
     433              :         /* Trailer should always be two blocks of zero bytes. */
     434            0 :         memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
     435            0 :         data = buffer;
     436            0 :         len = 2 * TAR_BLOCK_SIZE;
     437              :     }
     438              : 
     439            0 :     astreamer_content(streamer->bbs_next, member, data, len, context);
     440            0 : }
     441              : 
     442              : /*
     443              :  * End-of-stream processing for a tar archiver.
     444              :  */
     445              : static void
     446            0 : astreamer_tar_archiver_finalize(astreamer *streamer)
     447              : {
     448            0 :     astreamer_finalize(streamer->bbs_next);
     449            0 : }
     450              : 
     451              : /*
     452              :  * Free memory associated with a tar archiver.
     453              :  */
     454              : static void
     455            0 : astreamer_tar_archiver_free(astreamer *streamer)
     456              : {
     457            0 :     astreamer_free(streamer->bbs_next);
     458            0 :     pfree(streamer);
     459            0 : }
     460              : 
     461              : /*
     462              :  * Create a astreamer that blindly adds two blocks of NUL bytes to the
     463              :  * end of an incomplete tarfile that the server might send us.
     464              :  */
     465              : astreamer *
     466            0 : astreamer_tar_terminator_new(astreamer *next)
     467              : {
     468              :     astreamer  *streamer;
     469              : 
     470            0 :     streamer = palloc0_object(astreamer);
     471            0 :     *((const astreamer_ops **) &streamer->bbs_ops) =
     472              :         &astreamer_tar_terminator_ops;
     473            0 :     streamer->bbs_next = next;
     474              : 
     475            0 :     return streamer;
     476              : }
     477              : 
     478              : /*
     479              :  * Pass all the content through without change.
     480              :  */
     481              : static void
     482            0 : astreamer_tar_terminator_content(astreamer *streamer,
     483              :                                  astreamer_member *member,
     484              :                                  const char *data, int len,
     485              :                                  astreamer_archive_context context)
     486              : {
     487              :     /* Expect unparsed input. */
     488              :     Assert(member == NULL);
     489              :     Assert(context == ASTREAMER_UNKNOWN);
     490              : 
     491              :     /* Just forward it. */
     492            0 :     astreamer_content(streamer->bbs_next, member, data, len, context);
     493            0 : }
     494              : 
     495              : /*
     496              :  * At the end, blindly add the two blocks of NUL bytes which the server fails
     497              :  * to supply.
     498              :  */
     499              : static void
     500            0 : astreamer_tar_terminator_finalize(astreamer *streamer)
     501              : {
     502              :     char        buffer[2 * TAR_BLOCK_SIZE];
     503              : 
     504            0 :     memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
     505            0 :     astreamer_content(streamer->bbs_next, NULL, buffer,
     506              :                       2 * TAR_BLOCK_SIZE, ASTREAMER_UNKNOWN);
     507            0 :     astreamer_finalize(streamer->bbs_next);
     508            0 : }
     509              : 
     510              : /*
     511              :  * Free memory associated with a tar terminator.
     512              :  */
     513              : static void
     514            0 : astreamer_tar_terminator_free(astreamer *streamer)
     515              : {
     516            0 :     astreamer_free(streamer->bbs_next);
     517            0 :     pfree(streamer);
     518            0 : }
        

Generated by: LCOV version 2.0-1