LCOV - code coverage report
Current view: top level - src/bin/pg_basebackup - bbstreamer_tar.c (source / functions) Hit Total Coverage
Test: PostgreSQL 16beta1 Lines: 82 141 58.2 %
Date: 2023-05-31 01:12:27 Functions: 5 13 38.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * bbstreamer_tar.c
       4             :  *
       5             :  * This module implements three types of tar processing. A tar parser
       6             :  * expects unlabelled chunks of data (e.g. BBSTREAMER_UNKNOWN) and splits
       7             :  * it into labelled chunks (any other value of bbstreamer_archive_context).
       8             :  * A tar archiver does the reverse: it takes a bunch of labelled chunks
       9             :  * and produces a tarfile, optionally replacing member headers and trailers
      10             :  * so that upstream bbstreamer objects can perform surgery on the tarfile
      11             :  * contents without knowing the details of the tar format. A tar terminator
      12             :  * just adds two blocks of NUL bytes to the end of the file, since older
      13             :  * server versions produce files with this terminator omitted.
      14             :  *
      15             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      16             :  *
      17             :  * IDENTIFICATION
      18             :  *        src/bin/pg_basebackup/bbstreamer_tar.c
      19             :  *-------------------------------------------------------------------------
      20             :  */
      21             : 
      22             : #include "postgres_fe.h"
      23             : 
      24             : #include <time.h>
      25             : 
      26             : #include "bbstreamer.h"
      27             : #include "common/logging.h"
      28             : #include "pgtar.h"
      29             : 
      30             : typedef struct bbstreamer_tar_parser
      31             : {
      32             :     bbstreamer  base;
      33             :     bbstreamer_archive_context next_context;
      34             :     bbstreamer_member member;
      35             :     size_t      file_bytes_sent;
      36             :     size_t      pad_bytes_expected;
      37             : } bbstreamer_tar_parser;
      38             : 
      39             : typedef struct bbstreamer_tar_archiver
      40             : {
      41             :     bbstreamer  base;
      42             :     bool        rearchive_member;
      43             : } bbstreamer_tar_archiver;
      44             : 
      45             : static void bbstreamer_tar_parser_content(bbstreamer *streamer,
      46             :                                           bbstreamer_member *member,
      47             :                                           const char *data, int len,
      48             :                                           bbstreamer_archive_context context);
      49             : static void bbstreamer_tar_parser_finalize(bbstreamer *streamer);
      50             : static void bbstreamer_tar_parser_free(bbstreamer *streamer);
      51             : static bool bbstreamer_tar_header(bbstreamer_tar_parser *mystreamer);
      52             : 
      53             : const bbstreamer_ops bbstreamer_tar_parser_ops = {
      54             :     .content = bbstreamer_tar_parser_content,
      55             :     .finalize = bbstreamer_tar_parser_finalize,
      56             :     .free = bbstreamer_tar_parser_free
      57             : };
      58             : 
      59             : static void bbstreamer_tar_archiver_content(bbstreamer *streamer,
      60             :                                             bbstreamer_member *member,
      61             :                                             const char *data, int len,
      62             :                                             bbstreamer_archive_context context);
      63             : static void bbstreamer_tar_archiver_finalize(bbstreamer *streamer);
      64             : static void bbstreamer_tar_archiver_free(bbstreamer *streamer);
      65             : 
      66             : const bbstreamer_ops bbstreamer_tar_archiver_ops = {
      67             :     .content = bbstreamer_tar_archiver_content,
      68             :     .finalize = bbstreamer_tar_archiver_finalize,
      69             :     .free = bbstreamer_tar_archiver_free
      70             : };
      71             : 
      72             : static void bbstreamer_tar_terminator_content(bbstreamer *streamer,
      73             :                                               bbstreamer_member *member,
      74             :                                               const char *data, int len,
      75             :                                               bbstreamer_archive_context context);
      76             : static void bbstreamer_tar_terminator_finalize(bbstreamer *streamer);
      77             : static void bbstreamer_tar_terminator_free(bbstreamer *streamer);
      78             : 
      79             : const bbstreamer_ops bbstreamer_tar_terminator_ops = {
      80             :     .content = bbstreamer_tar_terminator_content,
      81             :     .finalize = bbstreamer_tar_terminator_finalize,
      82             :     .free = bbstreamer_tar_terminator_free
      83             : };
      84             : 
      85             : /*
      86             :  * Create a bbstreamer that can parse a stream of content as tar data.
      87             :  *
      88             :  * The input should be a series of BBSTREAMER_UNKNOWN chunks; the bbstreamer
      89             :  * specified by 'next' will receive a series of typed chunks, as per the
      90             :  * conventions described in bbstreamer.h.
      91             :  */
      92             : extern bbstreamer *
      93         244 : bbstreamer_tar_parser_new(bbstreamer *next)
      94             : {
      95             :     bbstreamer_tar_parser *streamer;
      96             : 
      97         244 :     streamer = palloc0(sizeof(bbstreamer_tar_parser));
      98         244 :     *((const bbstreamer_ops **) &streamer->base.bbs_ops) =
      99             :         &bbstreamer_tar_parser_ops;
     100         244 :     streamer->base.bbs_next = next;
     101         244 :     initStringInfo(&streamer->base.bbs_buffer);
     102         244 :     streamer->next_context = BBSTREAMER_MEMBER_HEADER;
     103             : 
     104         244 :     return &streamer->base;
     105             : }
     106             : 
     107             : /*
     108             :  * Parse unknown content as tar data.
     109             :  */
     110             : static void
     111      578168 : bbstreamer_tar_parser_content(bbstreamer *streamer, bbstreamer_member *member,
     112             :                               const char *data, int len,
     113             :                               bbstreamer_archive_context context)
     114             : {
     115      578168 :     bbstreamer_tar_parser *mystreamer = (bbstreamer_tar_parser *) streamer;
     116             :     size_t      nbytes;
     117             : 
     118             :     /* Expect unparsed input. */
     119             :     Assert(member == NULL);
     120             :     Assert(context == BBSTREAMER_UNKNOWN);
     121             : 
     122     1159952 :     while (len > 0)
     123             :     {
     124      582022 :         switch (mystreamer->next_context)
     125             :         {
     126      194468 :             case BBSTREAMER_MEMBER_HEADER:
     127             : 
     128             :                 /*
     129             :                  * If we're expecting an archive member header, accumulate a
     130             :                  * full block of data before doing anything further.
     131             :                  */
     132      194468 :                 if (!bbstreamer_buffer_until(streamer, &data, &len,
     133             :                                              TAR_BLOCK_SIZE))
     134           0 :                     return;
     135             : 
     136             :                 /*
     137             :                  * Now we can process the header and get ready to process the
     138             :                  * file contents; however, we might find out that what we
     139             :                  * thought was the next file header is actually the start of
     140             :                  * the archive trailer. Switch modes accordingly.
     141             :                  */
     142      194468 :                 if (bbstreamer_tar_header(mystreamer))
     143             :                 {
     144      194230 :                     if (mystreamer->member.size == 0)
     145             :                     {
     146             :                         /* No content; trailer is zero-length. */
     147       37930 :                         bbstreamer_content(mystreamer->base.bbs_next,
     148             :                                            &mystreamer->member,
     149             :                                            NULL, 0,
     150             :                                            BBSTREAMER_MEMBER_TRAILER);
     151             : 
     152             :                         /* Expect next header. */
     153       37930 :                         mystreamer->next_context = BBSTREAMER_MEMBER_HEADER;
     154             :                     }
     155             :                     else
     156             :                     {
     157             :                         /* Expect contents. */
     158      156300 :                         mystreamer->next_context = BBSTREAMER_MEMBER_CONTENTS;
     159             :                     }
     160      194230 :                     mystreamer->base.bbs_buffer.len = 0;
     161      194230 :                     mystreamer->file_bytes_sent = 0;
     162             :                 }
     163             :                 else
     164         238 :                     mystreamer->next_context = BBSTREAMER_ARCHIVE_TRAILER;
     165      194468 :                 break;
     166             : 
     167      384518 :             case BBSTREAMER_MEMBER_CONTENTS:
     168             : 
     169             :                 /*
     170             :                  * Send as much content as we have, but not more than the
     171             :                  * remaining file length.
     172             :                  */
     173             :                 Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
     174      384518 :                 nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
     175      384518 :                 nbytes = Min(nbytes, len);
     176             :                 Assert(nbytes > 0);
     177      384518 :                 bbstreamer_content(mystreamer->base.bbs_next,
     178             :                                    &mystreamer->member,
     179             :                                    data, nbytes,
     180             :                                    BBSTREAMER_MEMBER_CONTENTS);
     181      384518 :                 mystreamer->file_bytes_sent += nbytes;
     182      384518 :                 data += nbytes;
     183      384518 :                 len -= nbytes;
     184             : 
     185             :                 /*
     186             :                  * If we've not yet sent the whole file, then there's more
     187             :                  * content to come; otherwise, it's time to expect the file
     188             :                  * trailer.
     189             :                  */
     190             :                 Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
     191      384518 :                 if (mystreamer->file_bytes_sent == mystreamer->member.size)
     192             :                 {
     193      156296 :                     if (mystreamer->pad_bytes_expected == 0)
     194             :                     {
     195             :                         /* Trailer is zero-length. */
     196      153498 :                         bbstreamer_content(mystreamer->base.bbs_next,
     197             :                                            &mystreamer->member,
     198             :                                            NULL, 0,
     199             :                                            BBSTREAMER_MEMBER_TRAILER);
     200             : 
     201             :                         /* Expect next header. */
     202      153498 :                         mystreamer->next_context = BBSTREAMER_MEMBER_HEADER;
     203             :                     }
     204             :                     else
     205             :                     {
     206             :                         /* Trailer is not zero-length. */
     207        2798 :                         mystreamer->next_context = BBSTREAMER_MEMBER_TRAILER;
     208             :                     }
     209      156296 :                     mystreamer->base.bbs_buffer.len = 0;
     210             :                 }
     211      384518 :                 break;
     212             : 
     213        2798 :             case BBSTREAMER_MEMBER_TRAILER:
     214             : 
     215             :                 /*
     216             :                  * If we're expecting an archive member trailer, accumulate
     217             :                  * the expected number of padding bytes before sending
     218             :                  * anything onward.
     219             :                  */
     220        2798 :                 if (!bbstreamer_buffer_until(streamer, &data, &len,
     221        2798 :                                              mystreamer->pad_bytes_expected))
     222           0 :                     return;
     223             : 
     224             :                 /* OK, now we can send it. */
     225        2798 :                 bbstreamer_content(mystreamer->base.bbs_next,
     226             :                                    &mystreamer->member,
     227        2798 :                                    data, mystreamer->pad_bytes_expected,
     228             :                                    BBSTREAMER_MEMBER_TRAILER);
     229             : 
     230             :                 /* Expect next file header. */
     231        2798 :                 mystreamer->next_context = BBSTREAMER_MEMBER_HEADER;
     232        2798 :                 mystreamer->base.bbs_buffer.len = 0;
     233        2798 :                 break;
     234             : 
     235         238 :             case BBSTREAMER_ARCHIVE_TRAILER:
     236             : 
     237             :                 /*
     238             :                  * We've seen an end-of-archive indicator, so anything more is
     239             :                  * buffered and sent as part of the archive trailer. But we
     240             :                  * don't expect more than 2 blocks.
     241             :                  */
     242         238 :                 bbstreamer_buffer_bytes(streamer, &data, &len, len);
     243         238 :                 if (len > 2 * TAR_BLOCK_SIZE)
     244           0 :                     pg_fatal("tar file trailer exceeds 2 blocks");
     245         238 :                 return;
     246             : 
     247           0 :             default:
     248             :                 /* Shouldn't happen. */
     249           0 :                 pg_fatal("unexpected state while parsing tar archive");
     250             :         }
     251             :     }
     252             : }
     253             : 
     254             : /*
     255             :  * Parse a file header within a tar stream.
     256             :  *
     257             :  * The return value is true if we found a file header and passed it on to the
     258             :  * next bbstreamer; it is false if we have reached the archive trailer.
     259             :  */
     260             : static bool
     261      194468 : bbstreamer_tar_header(bbstreamer_tar_parser *mystreamer)
     262             : {
     263      194468 :     bool        has_nonzero_byte = false;
     264             :     int         i;
     265      194468 :     bbstreamer_member *member = &mystreamer->member;
     266      194468 :     char       *buffer = mystreamer->base.bbs_buffer.data;
     267             : 
     268             :     Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
     269             : 
     270             :     /* Check whether we've got a block of all zero bytes. */
     271      316324 :     for (i = 0; i < TAR_BLOCK_SIZE; ++i)
     272             :     {
     273      316086 :         if (buffer[i] != '\0')
     274             :         {
     275      194230 :             has_nonzero_byte = true;
     276      194230 :             break;
     277             :         }
     278             :     }
     279             : 
     280             :     /*
     281             :      * If the entire block was zeros, this is the end of the archive, not the
     282             :      * start of the next file.
     283             :      */
     284      194468 :     if (!has_nonzero_byte)
     285         238 :         return false;
     286             : 
     287             :     /*
     288             :      * Parse key fields out of the header.
     289             :      *
     290             :      * FIXME: It's terrible that we use hard-coded values here instead of some
     291             :      * more principled approach. It's been like this for a long time, but we
     292             :      * ought to do better.
     293             :      */
     294      194230 :     strlcpy(member->pathname, &buffer[0], MAXPGPATH);
     295      194230 :     if (member->pathname[0] == '\0')
     296           0 :         pg_fatal("tar member has empty name");
     297      194230 :     member->size = read_tar_number(&buffer[124], 12);
     298      194230 :     member->mode = read_tar_number(&buffer[100], 8);
     299      194230 :     member->uid = read_tar_number(&buffer[108], 8);
     300      194230 :     member->gid = read_tar_number(&buffer[116], 8);
     301      194230 :     member->is_directory = (buffer[156] == '5');
     302      194230 :     member->is_link = (buffer[156] == '2');
     303      194230 :     if (member->is_link)
     304          26 :         strlcpy(member->linktarget, &buffer[157], 100);
     305             : 
     306             :     /* Compute number of padding bytes. */
     307      194230 :     mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
     308             : 
     309             :     /* Forward the entire header to the next bbstreamer. */
     310      194230 :     bbstreamer_content(mystreamer->base.bbs_next, member,
     311             :                        buffer, TAR_BLOCK_SIZE,
     312             :                        BBSTREAMER_MEMBER_HEADER);
     313             : 
     314      194230 :     return true;
     315             : }
     316             : 
     317             : /*
     318             :  * End-of-stream processing for a tar parser.
     319             :  */
     320             : static void
     321         238 : bbstreamer_tar_parser_finalize(bbstreamer *streamer)
     322             : {
     323         238 :     bbstreamer_tar_parser *mystreamer = (bbstreamer_tar_parser *) streamer;
     324             : 
     325         238 :     if (mystreamer->next_context != BBSTREAMER_ARCHIVE_TRAILER &&
     326           0 :         (mystreamer->next_context != BBSTREAMER_MEMBER_HEADER ||
     327           0 :          mystreamer->base.bbs_buffer.len > 0))
     328           0 :         pg_fatal("COPY stream ended before last file was finished");
     329             : 
     330             :     /* Send the archive trailer, even if empty. */
     331         238 :     bbstreamer_content(streamer->bbs_next, NULL,
     332         238 :                        streamer->bbs_buffer.data, streamer->bbs_buffer.len,
     333             :                        BBSTREAMER_ARCHIVE_TRAILER);
     334             : 
     335             :     /* Now finalize successor. */
     336         238 :     bbstreamer_finalize(streamer->bbs_next);
     337         238 : }
     338             : 
     339             : /*
     340             :  * Free memory associated with a tar parser.
     341             :  */
     342             : static void
     343         238 : bbstreamer_tar_parser_free(bbstreamer *streamer)
     344             : {
     345         238 :     pfree(streamer->bbs_buffer.data);
     346         238 :     bbstreamer_free(streamer->bbs_next);
     347         238 : }
     348             : 
     349             : /*
     350             :  * Create an bbstreamer that can generate a tar archive.
     351             :  *
     352             :  * This is intended to be usable either for generating a brand-new tar archive
     353             :  * or for modifying one on the fly. The input should be a series of typed
     354             :  * chunks (i.e. not BBSTREAMER_UNKNOWN). See also the comments for
     355             :  * bbstreamer_tar_parser_content.
     356             :  */
     357             : extern bbstreamer *
     358           0 : bbstreamer_tar_archiver_new(bbstreamer *next)
     359             : {
     360             :     bbstreamer_tar_archiver *streamer;
     361             : 
     362           0 :     streamer = palloc0(sizeof(bbstreamer_tar_archiver));
     363           0 :     *((const bbstreamer_ops **) &streamer->base.bbs_ops) =
     364             :         &bbstreamer_tar_archiver_ops;
     365           0 :     streamer->base.bbs_next = next;
     366             : 
     367           0 :     return &streamer->base;
     368             : }
     369             : 
     370             : /*
     371             :  * Fix up the stream of input chunks to create a valid tar file.
     372             :  *
     373             :  * If a BBSTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
     374             :  * newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
     375             :  * passed through without change. Any other size is a fatal error (and
     376             :  * indicates a bug).
     377             :  *
     378             :  * Whenever a new BBSTREAMER_MEMBER_HEADER chunk is constructed, the
     379             :  * corresponding BBSTREAMER_MEMBER_TRAILER chunk is also constructed from
     380             :  * scratch. Specifically, we construct a block of zero bytes sufficient to
     381             :  * pad out to a block boundary, as required by the tar format. Other
     382             :  * BBSTREAMER_MEMBER_TRAILER chunks are passed through without change.
     383             :  *
     384             :  * Any BBSTREAMER_MEMBER_CONTENTS chunks are passed through without change.
     385             :  *
     386             :  * The BBSTREAMER_ARCHIVE_TRAILER chunk is replaced with two
     387             :  * blocks of zero bytes. Not all tar programs require this, but apparently
     388             :  * some do. The server does not supply this trailer. If no archive trailer is
     389             :  * present, one will be added by bbstreamer_tar_parser_finalize.
     390             :  */
     391             : static void
     392           0 : bbstreamer_tar_archiver_content(bbstreamer *streamer,
     393             :                                 bbstreamer_member *member,
     394             :                                 const char *data, int len,
     395             :                                 bbstreamer_archive_context context)
     396             : {
     397           0 :     bbstreamer_tar_archiver *mystreamer = (bbstreamer_tar_archiver *) streamer;
     398             :     char        buffer[2 * TAR_BLOCK_SIZE];
     399             : 
     400             :     Assert(context != BBSTREAMER_UNKNOWN);
     401             : 
     402           0 :     if (context == BBSTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE)
     403             :     {
     404             :         Assert(len == 0);
     405             : 
     406             :         /* Replace zero-length tar header with a newly constructed one. */
     407           0 :         tarCreateHeader(buffer, member->pathname, NULL,
     408             :                         member->size, member->mode, member->uid, member->gid,
     409             :                         time(NULL));
     410           0 :         data = buffer;
     411           0 :         len = TAR_BLOCK_SIZE;
     412             : 
     413             :         /* Also make a note to replace padding, in case size changed. */
     414           0 :         mystreamer->rearchive_member = true;
     415             :     }
     416           0 :     else if (context == BBSTREAMER_MEMBER_TRAILER &&
     417           0 :              mystreamer->rearchive_member)
     418           0 :     {
     419           0 :         int         pad_bytes = tarPaddingBytesRequired(member->size);
     420             : 
     421             :         /* Also replace padding, if we regenerated the header. */
     422           0 :         memset(buffer, 0, pad_bytes);
     423           0 :         data = buffer;
     424           0 :         len = pad_bytes;
     425             : 
     426             :         /* Don't do this again unless we replace another header. */
     427           0 :         mystreamer->rearchive_member = false;
     428             :     }
     429           0 :     else if (context == BBSTREAMER_ARCHIVE_TRAILER)
     430             :     {
     431             :         /* Trailer should always be two blocks of zero bytes. */
     432           0 :         memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
     433           0 :         data = buffer;
     434           0 :         len = 2 * TAR_BLOCK_SIZE;
     435             :     }
     436             : 
     437           0 :     bbstreamer_content(streamer->bbs_next, member, data, len, context);
     438           0 : }
     439             : 
     440             : /*
     441             :  * End-of-stream processing for a tar archiver.
     442             :  */
     443             : static void
     444           0 : bbstreamer_tar_archiver_finalize(bbstreamer *streamer)
     445             : {
     446           0 :     bbstreamer_finalize(streamer->bbs_next);
     447           0 : }
     448             : 
     449             : /*
     450             :  * Free memory associated with a tar archiver.
     451             :  */
     452             : static void
     453           0 : bbstreamer_tar_archiver_free(bbstreamer *streamer)
     454             : {
     455           0 :     bbstreamer_free(streamer->bbs_next);
     456           0 :     pfree(streamer);
     457           0 : }
     458             : 
     459             : /*
     460             :  * Create a bbstreamer that blindly adds two blocks of NUL bytes to the
     461             :  * end of an incomplete tarfile that the server might send us.
     462             :  */
     463             : bbstreamer *
     464           0 : bbstreamer_tar_terminator_new(bbstreamer *next)
     465             : {
     466             :     bbstreamer *streamer;
     467             : 
     468           0 :     streamer = palloc0(sizeof(bbstreamer));
     469           0 :     *((const bbstreamer_ops **) &streamer->bbs_ops) =
     470             :         &bbstreamer_tar_terminator_ops;
     471           0 :     streamer->bbs_next = next;
     472             : 
     473           0 :     return streamer;
     474             : }
     475             : 
     476             : /*
     477             :  * Pass all the content through without change.
     478             :  */
     479             : static void
     480           0 : bbstreamer_tar_terminator_content(bbstreamer *streamer,
     481             :                                   bbstreamer_member *member,
     482             :                                   const char *data, int len,
     483             :                                   bbstreamer_archive_context context)
     484             : {
     485             :     /* Expect unparsed input. */
     486             :     Assert(member == NULL);
     487             :     Assert(context == BBSTREAMER_UNKNOWN);
     488             : 
     489             :     /* Just forward it. */
     490           0 :     bbstreamer_content(streamer->bbs_next, member, data, len, context);
     491           0 : }
     492             : 
     493             : /*
     494             :  * At the end, blindly add the two blocks of NUL bytes which the server fails
     495             :  * to supply.
     496             :  */
     497             : static void
     498           0 : bbstreamer_tar_terminator_finalize(bbstreamer *streamer)
     499             : {
     500             :     char        buffer[2 * TAR_BLOCK_SIZE];
     501             : 
     502           0 :     memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
     503           0 :     bbstreamer_content(streamer->bbs_next, NULL, buffer,
     504             :                        2 * TAR_BLOCK_SIZE, BBSTREAMER_UNKNOWN);
     505           0 :     bbstreamer_finalize(streamer->bbs_next);
     506           0 : }
     507             : 
     508             : /*
     509             :  * Free memory associated with a tar terminator.
     510             :  */
     511             : static void
     512           0 : bbstreamer_tar_terminator_free(bbstreamer *streamer)
     513             : {
     514           0 :     bbstreamer_free(streamer->bbs_next);
     515           0 :     pfree(streamer);
     516           0 : }

Generated by: LCOV version 1.14