LCOV - code coverage report
Current view: top level - src/fe_utils - astreamer_tar.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 84 143 58.7 %
Date: 2025-01-18 03:14:54 Functions: 5 13 38.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * astreamer_tar.c
       4             :  *
       5             :  * This module implements three types of tar processing. A tar parser
       6             :  * expects unlabelled chunks of data (e.g. ASTREAMER_UNKNOWN) and splits
       7             :  * it into labelled chunks (any other value of astreamer_archive_context).
       8             :  * A tar archiver does the reverse: it takes a bunch of labelled chunks
       9             :  * and produces a tarfile, optionally replacing member headers and trailers
      10             :  * so that upstream astreamer objects can perform surgery on the tarfile
      11             :  * contents without knowing the details of the tar format. A tar terminator
      12             :  * just adds two blocks of NUL bytes to the end of the file, since older
      13             :  * server versions produce files with this terminator omitted.
      14             :  *
      15             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      16             :  *
      17             :  * IDENTIFICATION
      18             :  *        src/fe_utils/astreamer_tar.c
      19             :  *-------------------------------------------------------------------------
      20             :  */
      21             : 
      22             : #include "postgres_fe.h"
      23             : 
      24             : #include <time.h>
      25             : 
      26             : #include "common/logging.h"
      27             : #include "fe_utils/astreamer.h"
      28             : #include "pgtar.h"
      29             : 
      30             : typedef struct astreamer_tar_parser
      31             : {
      32             :     astreamer   base;
      33             :     astreamer_archive_context next_context;
      34             :     astreamer_member member;
      35             :     size_t      file_bytes_sent;
      36             :     size_t      pad_bytes_expected;
      37             : } astreamer_tar_parser;
      38             : 
      39             : typedef struct astreamer_tar_archiver
      40             : {
      41             :     astreamer   base;
      42             :     bool        rearchive_member;
      43             : } astreamer_tar_archiver;
      44             : 
      45             : static void astreamer_tar_parser_content(astreamer *streamer,
      46             :                                          astreamer_member *member,
      47             :                                          const char *data, int len,
      48             :                                          astreamer_archive_context context);
      49             : static void astreamer_tar_parser_finalize(astreamer *streamer);
      50             : static void astreamer_tar_parser_free(astreamer *streamer);
      51             : static bool astreamer_tar_header(astreamer_tar_parser *mystreamer);
      52             : 
      53             : static const astreamer_ops astreamer_tar_parser_ops = {
      54             :     .content = astreamer_tar_parser_content,
      55             :     .finalize = astreamer_tar_parser_finalize,
      56             :     .free = astreamer_tar_parser_free
      57             : };
      58             : 
      59             : static void astreamer_tar_archiver_content(astreamer *streamer,
      60             :                                            astreamer_member *member,
      61             :                                            const char *data, int len,
      62             :                                            astreamer_archive_context context);
      63             : static void astreamer_tar_archiver_finalize(astreamer *streamer);
      64             : static void astreamer_tar_archiver_free(astreamer *streamer);
      65             : 
      66             : static const astreamer_ops astreamer_tar_archiver_ops = {
      67             :     .content = astreamer_tar_archiver_content,
      68             :     .finalize = astreamer_tar_archiver_finalize,
      69             :     .free = astreamer_tar_archiver_free
      70             : };
      71             : 
      72             : static void astreamer_tar_terminator_content(astreamer *streamer,
      73             :                                              astreamer_member *member,
      74             :                                              const char *data, int len,
      75             :                                              astreamer_archive_context context);
      76             : static void astreamer_tar_terminator_finalize(astreamer *streamer);
      77             : static void astreamer_tar_terminator_free(astreamer *streamer);
      78             : 
      79             : static const astreamer_ops astreamer_tar_terminator_ops = {
      80             :     .content = astreamer_tar_terminator_content,
      81             :     .finalize = astreamer_tar_terminator_finalize,
      82             :     .free = astreamer_tar_terminator_free
      83             : };
      84             : 
      85             : /*
      86             :  * Create a astreamer that can parse a stream of content as tar data.
      87             :  *
      88             :  * The input should be a series of ASTREAMER_UNKNOWN chunks; the astreamer
      89             :  * specified by 'next' will receive a series of typed chunks, as per the
      90             :  * conventions described in astreamer.h.
      91             :  */
      92             : astreamer *
      93         376 : astreamer_tar_parser_new(astreamer *next)
      94             : {
      95             :     astreamer_tar_parser *streamer;
      96             : 
      97         376 :     streamer = palloc0(sizeof(astreamer_tar_parser));
      98         376 :     *((const astreamer_ops **) &streamer->base.bbs_ops) =
      99             :         &astreamer_tar_parser_ops;
     100         376 :     streamer->base.bbs_next = next;
     101         376 :     initStringInfo(&streamer->base.bbs_buffer);
     102         376 :     streamer->next_context = ASTREAMER_MEMBER_HEADER;
     103             : 
     104         376 :     return &streamer->base;
     105             : }
     106             : 
     107             : /*
     108             :  * Parse unknown content as tar data.
     109             :  */
     110             : static void
     111     1059756 : astreamer_tar_parser_content(astreamer *streamer, astreamer_member *member,
     112             :                              const char *data, int len,
     113             :                              astreamer_archive_context context)
     114             : {
     115     1059756 :     astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
     116             :     size_t      nbytes;
     117             : 
     118             :     /* Expect unparsed input. */
     119             :     Assert(member == NULL);
     120             :     Assert(context == ASTREAMER_UNKNOWN);
     121             : 
     122     2184846 :     while (len > 0)
     123             :     {
     124     1125464 :         switch (mystreamer->next_context)
     125             :         {
     126      300494 :             case ASTREAMER_MEMBER_HEADER:
     127             : 
     128             :                 /*
     129             :                  * If we're expecting an archive member header, accumulate a
     130             :                  * full block of data before doing anything further.
     131             :                  */
     132      300494 :                 if (!astreamer_buffer_until(streamer, &data, &len,
     133             :                                             TAR_BLOCK_SIZE))
     134           0 :                     return;
     135             : 
     136             :                 /*
     137             :                  * Now we can process the header and get ready to process the
     138             :                  * file contents; however, we might find out that what we
     139             :                  * thought was the next file header is actually the start of
     140             :                  * the archive trailer. Switch modes accordingly.
     141             :                  */
     142      300494 :                 if (astreamer_tar_header(mystreamer))
     143             :                 {
     144      300126 :                     if (mystreamer->member.size == 0)
     145             :                     {
     146             :                         /* No content; trailer is zero-length. */
     147       59170 :                         astreamer_content(mystreamer->base.bbs_next,
     148             :                                           &mystreamer->member,
     149             :                                           NULL, 0,
     150             :                                           ASTREAMER_MEMBER_TRAILER);
     151             : 
     152             :                         /* Expect next header. */
     153       59170 :                         mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
     154             :                     }
     155             :                     else
     156             :                     {
     157             :                         /* Expect contents. */
     158      240956 :                         mystreamer->next_context = ASTREAMER_MEMBER_CONTENTS;
     159             :                     }
     160      300126 :                     mystreamer->base.bbs_buffer.len = 0;
     161      300126 :                     mystreamer->file_bytes_sent = 0;
     162             :                 }
     163             :                 else
     164         368 :                     mystreamer->next_context = ASTREAMER_ARCHIVE_TRAILER;
     165      300494 :                 break;
     166             : 
     167      808372 :             case ASTREAMER_MEMBER_CONTENTS:
     168             : 
     169             :                 /*
     170             :                  * Send as much content as we have, but not more than the
     171             :                  * remaining file length.
     172             :                  */
     173             :                 Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
     174      808372 :                 nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
     175      808372 :                 nbytes = Min(nbytes, len);
     176             :                 Assert(nbytes > 0);
     177      808372 :                 astreamer_content(mystreamer->base.bbs_next,
     178             :                                   &mystreamer->member,
     179             :                                   data, nbytes,
     180             :                                   ASTREAMER_MEMBER_CONTENTS);
     181      808372 :                 mystreamer->file_bytes_sent += nbytes;
     182      808372 :                 data += nbytes;
     183      808372 :                 len -= nbytes;
     184             : 
     185             :                 /*
     186             :                  * If we've not yet sent the whole file, then there's more
     187             :                  * content to come; otherwise, it's time to expect the file
     188             :                  * trailer.
     189             :                  */
     190             :                 Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
     191      808372 :                 if (mystreamer->file_bytes_sent == mystreamer->member.size)
     192             :                 {
     193      240952 :                     if (mystreamer->pad_bytes_expected == 0)
     194             :                     {
     195             :                         /* Trailer is zero-length. */
     196      224726 :                         astreamer_content(mystreamer->base.bbs_next,
     197             :                                           &mystreamer->member,
     198             :                                           NULL, 0,
     199             :                                           ASTREAMER_MEMBER_TRAILER);
     200             : 
     201             :                         /* Expect next header. */
     202      224724 :                         mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
     203             :                     }
     204             :                     else
     205             :                     {
     206             :                         /* Trailer is not zero-length. */
     207       16226 :                         mystreamer->next_context = ASTREAMER_MEMBER_TRAILER;
     208             :                     }
     209      240950 :                     mystreamer->base.bbs_buffer.len = 0;
     210             :                 }
     211      808370 :                 break;
     212             : 
     213       16226 :             case ASTREAMER_MEMBER_TRAILER:
     214             : 
     215             :                 /*
     216             :                  * If we're expecting an archive member trailer, accumulate
     217             :                  * the expected number of padding bytes before sending
     218             :                  * anything onward.
     219             :                  */
     220       16226 :                 if (!astreamer_buffer_until(streamer, &data, &len,
     221       16226 :                                             mystreamer->pad_bytes_expected))
     222           0 :                     return;
     223             : 
     224             :                 /* OK, now we can send it. */
     225       16226 :                 astreamer_content(mystreamer->base.bbs_next,
     226             :                                   &mystreamer->member,
     227       16226 :                                   data, mystreamer->pad_bytes_expected,
     228             :                                   ASTREAMER_MEMBER_TRAILER);
     229             : 
     230             :                 /* Expect next file header. */
     231       16226 :                 mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
     232       16226 :                 mystreamer->base.bbs_buffer.len = 0;
     233       16226 :                 break;
     234             : 
     235         372 :             case ASTREAMER_ARCHIVE_TRAILER:
     236             : 
     237             :                 /*
     238             :                  * We've seen an end-of-archive indicator, so anything more is
     239             :                  * buffered and sent as part of the archive trailer. But we
     240             :                  * don't expect more than 2 blocks.
     241             :                  */
     242         372 :                 astreamer_buffer_bytes(streamer, &data, &len, len);
     243         372 :                 if (len > 2 * TAR_BLOCK_SIZE)
     244           0 :                     pg_fatal("tar file trailer exceeds 2 blocks");
     245         372 :                 return;
     246             : 
     247           0 :             default:
     248             :                 /* Shouldn't happen. */
     249           0 :                 pg_fatal("unexpected state while parsing tar archive");
     250             :         }
     251             :     }
     252             : }
     253             : 
     254             : /*
     255             :  * Parse a file header within a tar stream.
     256             :  *
     257             :  * The return value is true if we found a file header and passed it on to the
     258             :  * next astreamer; it is false if we have reached the archive trailer.
     259             :  */
     260             : static bool
     261      300494 : astreamer_tar_header(astreamer_tar_parser *mystreamer)
     262             : {
     263      300494 :     bool        has_nonzero_byte = false;
     264             :     int         i;
     265      300494 :     astreamer_member *member = &mystreamer->member;
     266      300494 :     char       *buffer = mystreamer->base.bbs_buffer.data;
     267             : 
     268             :     Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
     269             : 
     270             :     /* Check whether we've got a block of all zero bytes. */
     271      488910 :     for (i = 0; i < TAR_BLOCK_SIZE; ++i)
     272             :     {
     273      488542 :         if (buffer[i] != '\0')
     274             :         {
     275      300126 :             has_nonzero_byte = true;
     276      300126 :             break;
     277             :         }
     278             :     }
     279             : 
     280             :     /*
     281             :      * If the entire block was zeros, this is the end of the archive, not the
     282             :      * start of the next file.
     283             :      */
     284      300494 :     if (!has_nonzero_byte)
     285         368 :         return false;
     286             : 
     287             :     /*
     288             :      * Parse key fields out of the header.
     289             :      */
     290      300126 :     strlcpy(member->pathname, &buffer[TAR_OFFSET_NAME], MAXPGPATH);
     291      300126 :     if (member->pathname[0] == '\0')
     292           0 :         pg_fatal("tar member has empty name");
     293      300126 :     member->size = read_tar_number(&buffer[TAR_OFFSET_SIZE], 12);
     294      300126 :     member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
     295      300126 :     member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
     296      300126 :     member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
     297      300126 :     member->is_directory =
     298      300126 :         (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_DIRECTORY);
     299      300126 :     member->is_link =
     300      300126 :         (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_SYMLINK);
     301      300126 :     if (member->is_link)
     302          32 :         strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
     303             : 
     304             :     /* Compute number of padding bytes. */
     305      300126 :     mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
     306             : 
     307             :     /* Forward the entire header to the next astreamer. */
     308      300126 :     astreamer_content(mystreamer->base.bbs_next, member,
     309             :                       buffer, TAR_BLOCK_SIZE,
     310             :                       ASTREAMER_MEMBER_HEADER);
     311             : 
     312      300126 :     return true;
     313             : }
     314             : 
     315             : /*
     316             :  * End-of-stream processing for a tar parser.
     317             :  */
     318             : static void
     319         368 : astreamer_tar_parser_finalize(astreamer *streamer)
     320             : {
     321         368 :     astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
     322             : 
     323         368 :     if (mystreamer->next_context != ASTREAMER_ARCHIVE_TRAILER &&
     324           0 :         (mystreamer->next_context != ASTREAMER_MEMBER_HEADER ||
     325           0 :          mystreamer->base.bbs_buffer.len > 0))
     326           0 :         pg_fatal("COPY stream ended before last file was finished");
     327             : 
     328             :     /* Send the archive trailer, even if empty. */
     329         368 :     astreamer_content(streamer->bbs_next, NULL,
     330         368 :                       streamer->bbs_buffer.data, streamer->bbs_buffer.len,
     331             :                       ASTREAMER_ARCHIVE_TRAILER);
     332             : 
     333             :     /* Now finalize successor. */
     334         368 :     astreamer_finalize(streamer->bbs_next);
     335         368 : }
     336             : 
     337             : /*
     338             :  * Free memory associated with a tar parser.
     339             :  */
     340             : static void
     341         368 : astreamer_tar_parser_free(astreamer *streamer)
     342             : {
     343         368 :     pfree(streamer->bbs_buffer.data);
     344         368 :     astreamer_free(streamer->bbs_next);
     345         368 : }
     346             : 
     347             : /*
     348             :  * Create a astreamer that can generate a tar archive.
     349             :  *
     350             :  * This is intended to be usable either for generating a brand-new tar archive
     351             :  * or for modifying one on the fly. The input should be a series of typed
     352             :  * chunks (i.e. not ASTREAMER_UNKNOWN). See also the comments for
     353             :  * astreamer_tar_parser_content.
     354             :  */
     355             : astreamer *
     356           0 : astreamer_tar_archiver_new(astreamer *next)
     357             : {
     358             :     astreamer_tar_archiver *streamer;
     359             : 
     360           0 :     streamer = palloc0(sizeof(astreamer_tar_archiver));
     361           0 :     *((const astreamer_ops **) &streamer->base.bbs_ops) =
     362             :         &astreamer_tar_archiver_ops;
     363           0 :     streamer->base.bbs_next = next;
     364             : 
     365           0 :     return &streamer->base;
     366             : }
     367             : 
     368             : /*
     369             :  * Fix up the stream of input chunks to create a valid tar file.
     370             :  *
     371             :  * If a ASTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
     372             :  * newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
     373             :  * passed through without change. Any other size is a fatal error (and
     374             :  * indicates a bug).
     375             :  *
     376             :  * Whenever a new ASTREAMER_MEMBER_HEADER chunk is constructed, the
     377             :  * corresponding ASTREAMER_MEMBER_TRAILER chunk is also constructed from
     378             :  * scratch. Specifically, we construct a block of zero bytes sufficient to
     379             :  * pad out to a block boundary, as required by the tar format. Other
     380             :  * ASTREAMER_MEMBER_TRAILER chunks are passed through without change.
     381             :  *
     382             :  * Any ASTREAMER_MEMBER_CONTENTS chunks are passed through without change.
     383             :  *
     384             :  * The ASTREAMER_ARCHIVE_TRAILER chunk is replaced with two
     385             :  * blocks of zero bytes. Not all tar programs require this, but apparently
     386             :  * some do. The server does not supply this trailer. If no archive trailer is
     387             :  * present, one will be added by astreamer_tar_parser_finalize.
     388             :  */
     389             : static void
     390           0 : astreamer_tar_archiver_content(astreamer *streamer,
     391             :                                astreamer_member *member,
     392             :                                const char *data, int len,
     393             :                                astreamer_archive_context context)
     394             : {
     395           0 :     astreamer_tar_archiver *mystreamer = (astreamer_tar_archiver *) streamer;
     396             :     char        buffer[2 * TAR_BLOCK_SIZE];
     397             : 
     398             :     Assert(context != ASTREAMER_UNKNOWN);
     399             : 
     400           0 :     if (context == ASTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE)
     401             :     {
     402             :         Assert(len == 0);
     403             : 
     404             :         /* Replace zero-length tar header with a newly constructed one. */
     405           0 :         tarCreateHeader(buffer, member->pathname, NULL,
     406             :                         member->size, member->mode, member->uid, member->gid,
     407             :                         time(NULL));
     408           0 :         data = buffer;
     409           0 :         len = TAR_BLOCK_SIZE;
     410             : 
     411             :         /* Also make a note to replace padding, in case size changed. */
     412           0 :         mystreamer->rearchive_member = true;
     413             :     }
     414           0 :     else if (context == ASTREAMER_MEMBER_TRAILER &&
     415           0 :              mystreamer->rearchive_member)
     416           0 :     {
     417           0 :         int         pad_bytes = tarPaddingBytesRequired(member->size);
     418             : 
     419             :         /* Also replace padding, if we regenerated the header. */
     420           0 :         memset(buffer, 0, pad_bytes);
     421           0 :         data = buffer;
     422           0 :         len = pad_bytes;
     423             : 
     424             :         /* Don't do this again unless we replace another header. */
     425           0 :         mystreamer->rearchive_member = false;
     426             :     }
     427           0 :     else if (context == ASTREAMER_ARCHIVE_TRAILER)
     428             :     {
     429             :         /* Trailer should always be two blocks of zero bytes. */
     430           0 :         memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
     431           0 :         data = buffer;
     432           0 :         len = 2 * TAR_BLOCK_SIZE;
     433             :     }
     434             : 
     435           0 :     astreamer_content(streamer->bbs_next, member, data, len, context);
     436           0 : }
     437             : 
     438             : /*
     439             :  * End-of-stream processing for a tar archiver.
     440             :  */
     441             : static void
     442           0 : astreamer_tar_archiver_finalize(astreamer *streamer)
     443             : {
     444           0 :     astreamer_finalize(streamer->bbs_next);
     445           0 : }
     446             : 
     447             : /*
     448             :  * Free memory associated with a tar archiver.
     449             :  */
     450             : static void
     451           0 : astreamer_tar_archiver_free(astreamer *streamer)
     452             : {
     453           0 :     astreamer_free(streamer->bbs_next);
     454           0 :     pfree(streamer);
     455           0 : }
     456             : 
     457             : /*
     458             :  * Create a astreamer that blindly adds two blocks of NUL bytes to the
     459             :  * end of an incomplete tarfile that the server might send us.
     460             :  */
     461             : astreamer *
     462           0 : astreamer_tar_terminator_new(astreamer *next)
     463             : {
     464             :     astreamer  *streamer;
     465             : 
     466           0 :     streamer = palloc0(sizeof(astreamer));
     467           0 :     *((const astreamer_ops **) &streamer->bbs_ops) =
     468             :         &astreamer_tar_terminator_ops;
     469           0 :     streamer->bbs_next = next;
     470             : 
     471           0 :     return streamer;
     472             : }
     473             : 
     474             : /*
     475             :  * Pass all the content through without change.
     476             :  */
     477             : static void
     478           0 : astreamer_tar_terminator_content(astreamer *streamer,
     479             :                                  astreamer_member *member,
     480             :                                  const char *data, int len,
     481             :                                  astreamer_archive_context context)
     482             : {
     483             :     /* Expect unparsed input. */
     484             :     Assert(member == NULL);
     485             :     Assert(context == ASTREAMER_UNKNOWN);
     486             : 
     487             :     /* Just forward it. */
     488           0 :     astreamer_content(streamer->bbs_next, member, data, len, context);
     489           0 : }
     490             : 
     491             : /*
     492             :  * At the end, blindly add the two blocks of NUL bytes which the server fails
     493             :  * to supply.
     494             :  */
     495             : static void
     496           0 : astreamer_tar_terminator_finalize(astreamer *streamer)
     497             : {
     498             :     char        buffer[2 * TAR_BLOCK_SIZE];
     499             : 
     500           0 :     memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
     501           0 :     astreamer_content(streamer->bbs_next, NULL, buffer,
     502             :                       2 * TAR_BLOCK_SIZE, ASTREAMER_UNKNOWN);
     503           0 :     astreamer_finalize(streamer->bbs_next);
     504           0 : }
     505             : 
     506             : /*
     507             :  * Free memory associated with a tar terminator.
     508             :  */
     509             : static void
     510           0 : astreamer_tar_terminator_free(astreamer *streamer)
     511             : {
     512           0 :     astreamer_free(streamer->bbs_next);
     513           0 :     pfree(streamer);
     514           0 : }

Generated by: LCOV version 1.14