Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * astreamer.h
4 : *
5 : * The "archive streamer" interface is intended to allow frontend code
6 : * to stream from possibly-compressed archive files from any source and
7 : * perform arbitrary actions based on the contents of those archives.
8 : * Archive streamers are intended to be composable, and most tasks will
9 : * require two or more archive streamers to complete. For instance,
10 : * if the input is an uncompressed tar stream, a tar parser astreamer
11 : * could be used to interpret it, and then an extractor astreamer could
12 : * be used to write each archive member out to a file.
13 : *
14 : * In general, each archive streamer is relatively free to take whatever
15 : * action it desires in the stream of chunks provided by the caller. It
16 : * may do something simple, like write the archive to a file, perhaps after
17 : * compressing it, but it can also do more complicated things, like
18 : * annotating the byte stream to indicate which parts of the data
19 : * correspond to tar headers or trailing padding, vs. which parts are
20 : * payload data. A subsequent astreamer may use this information to
21 : * make further decisions about how to process the data; for example,
22 : * it might choose to modify the archive contents.
23 : *
24 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
25 : *
26 : * IDENTIFICATION
27 : * src/include/fe_utils/astreamer.h
28 : *-------------------------------------------------------------------------
29 : */
30 :
31 : #ifndef ASTREAMER_H
32 : #define ASTREAMER_H
33 :
34 : #include "common/compression.h"
35 : #include "lib/stringinfo.h"
36 : #include "pqexpbuffer.h"
37 :
38 : struct astreamer;
39 : struct astreamer_ops;
40 : typedef struct astreamer astreamer;
41 : typedef struct astreamer_ops astreamer_ops;
42 :
43 : /*
44 : * Each chunk of archive data passed to a astreamer is classified into one
45 : * of these categories. When data is initially passed to an archive streamer,
46 : * each chunk will be categorized as ASTREAMER_UNKNOWN, and the chunks can
47 : * be of whatever size the caller finds convenient.
48 : *
49 : * If the archive is parsed (e.g. see astreamer_tar_parser_new()), then all
50 : * chunks should be labelled as one of the other types listed here. In
51 : * addition, there should be exactly one ASTREAMER_MEMBER_HEADER chunk and
52 : * exactly one ASTREAMER_MEMBER_TRAILER chunk per archive member, even if
53 : * that means a zero-length call. There can be any number of
54 : * ASTREAMER_MEMBER_CONTENTS chunks in between those calls. There
55 : * should exactly ASTREAMER_ARCHIVE_TRAILER chunk, and it should follow the
56 : * last ASTREAMER_MEMBER_TRAILER chunk.
57 : *
58 : * In theory, we could need other classifications here, such as a way of
59 : * indicating an archive header, but the "tar" format doesn't need anything
60 : * else, so for the time being there's no point.
61 : */
62 : typedef enum
63 : {
64 : ASTREAMER_UNKNOWN,
65 : ASTREAMER_MEMBER_HEADER,
66 : ASTREAMER_MEMBER_CONTENTS,
67 : ASTREAMER_MEMBER_TRAILER,
68 : ASTREAMER_ARCHIVE_TRAILER,
69 : } astreamer_archive_context;
70 :
71 : /*
72 : * Each chunk of data that is classified as ASTREAMER_MEMBER_HEADER,
73 : * ASTREAMER_MEMBER_CONTENTS, or ASTREAMER_MEMBER_TRAILER should also
74 : * pass a pointer to an instance of this struct. The details are expected
75 : * to be present in the archive header and used to fill the struct, after
76 : * which all subsequent calls for the same archive member are expected to
77 : * pass the same details.
78 : */
79 : typedef struct
80 : {
81 : char pathname[MAXPGPATH];
82 : pgoff_t size;
83 : mode_t mode;
84 : uid_t uid;
85 : gid_t gid;
86 : bool is_directory;
87 : bool is_link;
88 : char linktarget[MAXPGPATH];
89 : } astreamer_member;
90 :
91 : /*
92 : * Generally, each type of astreamer will define its own struct, but the
93 : * first element should be 'astreamer base'. A astreamer that does not
94 : * require any additional private data could use this structure directly.
95 : *
96 : * bbs_ops is a pointer to the astreamer_ops object which contains the
97 : * function pointers appropriate to this type of astreamer.
98 : *
99 : * bbs_next is a pointer to the successor astreamer, for those types of
100 : * astreamer which forward data to a successor. It need not be used and
101 : * should be set to NULL when not relevant.
102 : *
103 : * bbs_buffer is a buffer for accumulating data for temporary storage. Each
104 : * type of astreamer makes its own decisions about whether and how to use
105 : * this buffer.
106 : */
107 : struct astreamer
108 : {
109 : const astreamer_ops *bbs_ops;
110 : astreamer *bbs_next;
111 : StringInfoData bbs_buffer;
112 : };
113 :
114 : /*
115 : * There are three callbacks for a astreamer. The 'content' callback is
116 : * called repeatedly, as described in the astreamer_archive_context comments.
117 : * Then, the 'finalize' callback is called once at the end, to give the
118 : * astreamer a chance to perform cleanup such as closing files. Finally,
119 : * because this code is running in a frontend environment where, as of this
120 : * writing, there are no memory contexts, the 'free' callback is called to
121 : * release memory. These callbacks should always be invoked using the static
122 : * inline functions defined below.
123 : */
124 : struct astreamer_ops
125 : {
126 : void (*content) (astreamer *streamer, astreamer_member *member,
127 : const char *data, int len,
128 : astreamer_archive_context context);
129 : void (*finalize) (astreamer *streamer);
130 : void (*free) (astreamer *streamer);
131 : };
132 :
133 : /* Send some content to a astreamer. */
134 : static inline void
135 2566164 : astreamer_content(astreamer *streamer, astreamer_member *member,
136 : const char *data, int len,
137 : astreamer_archive_context context)
138 : {
139 : Assert(streamer != NULL);
140 2566164 : streamer->bbs_ops->content(streamer, member, data, len, context);
141 2566160 : }
142 :
143 : /* Finalize a astreamer. */
144 : static inline void
145 802 : astreamer_finalize(astreamer *streamer)
146 : {
147 : Assert(streamer != NULL);
148 802 : streamer->bbs_ops->finalize(streamer);
149 802 : }
150 :
151 : /* Free a astreamer. */
152 : static inline void
153 802 : astreamer_free(astreamer *streamer)
154 : {
155 : Assert(streamer != NULL);
156 802 : streamer->bbs_ops->free(streamer);
157 802 : }
158 :
159 : /*
160 : * This is a convenience method for use when implementing a astreamer; it is
161 : * not for use by outside callers. It adds the amount of data specified by
162 : * 'nbytes' to the astreamer's buffer and adjusts '*len' and '*data'
163 : * accordingly.
164 : */
165 : static inline void
166 317110 : astreamer_buffer_bytes(astreamer *streamer, const char **data, int *len,
167 : int nbytes)
168 : {
169 : Assert(nbytes <= *len);
170 :
171 317110 : appendBinaryStringInfo(&streamer->bbs_buffer, *data, nbytes);
172 317110 : *len -= nbytes;
173 317110 : *data += nbytes;
174 317110 : }
175 :
176 : /*
177 : * This is a convenience method for use when implementing a astreamer; it is
178 : * not for use by outsider callers. It attempts to add enough data to the
179 : * astreamer's buffer to reach a length of target_bytes and adjusts '*len'
180 : * and '*data' accordingly. It returns true if the target length has been
181 : * reached and false otherwise.
182 : */
183 : static inline bool
184 316738 : astreamer_buffer_until(astreamer *streamer, const char **data, int *len,
185 : int target_bytes)
186 : {
187 316738 : int buflen = streamer->bbs_buffer.len;
188 :
189 316738 : if (buflen >= target_bytes)
190 : {
191 : /* Target length already reached; nothing to do. */
192 0 : return true;
193 : }
194 :
195 316738 : if (buflen + *len < target_bytes)
196 : {
197 : /* Not enough data to reach target length; buffer all of it. */
198 0 : astreamer_buffer_bytes(streamer, data, len, *len);
199 0 : return false;
200 : }
201 :
202 : /* Buffer just enough to reach the target length. */
203 316738 : astreamer_buffer_bytes(streamer, data, len, target_bytes - buflen);
204 316738 : return true;
205 : }
206 :
207 : /*
208 : * Functions for creating astreamer objects of various types. See the header
209 : * comments for each of these functions for details.
210 : */
211 : extern astreamer *astreamer_plain_writer_new(char *pathname, FILE *file);
212 : extern astreamer *astreamer_gzip_writer_new(char *pathname, FILE *file,
213 : pg_compress_specification *compress);
214 : extern astreamer *astreamer_extractor_new(const char *basepath,
215 : const char *(*link_map) (const char *),
216 : void (*report_output_file) (const char *));
217 :
218 : extern astreamer *astreamer_gzip_decompressor_new(astreamer *next);
219 : extern astreamer *astreamer_lz4_compressor_new(astreamer *next,
220 : pg_compress_specification *compress);
221 : extern astreamer *astreamer_lz4_decompressor_new(astreamer *next);
222 : extern astreamer *astreamer_zstd_compressor_new(astreamer *next,
223 : pg_compress_specification *compress);
224 : extern astreamer *astreamer_zstd_decompressor_new(astreamer *next);
225 : extern astreamer *astreamer_tar_parser_new(astreamer *next);
226 : extern astreamer *astreamer_tar_terminator_new(astreamer *next);
227 : extern astreamer *astreamer_tar_archiver_new(astreamer *next);
228 :
229 : #endif
|