Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * astreamer_file.c
4 : *
5 : * Archive streamers that write to files. astreamer_plain_writer writes
6 : * the whole archive to a single file, and astreamer_extractor writes
7 : * each archive member to a separate file in a given directory.
8 : *
9 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
10 : *
11 : * IDENTIFICATION
12 : * src/fe_utils/astreamer_file.c
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres_fe.h"
17 :
18 : #include <unistd.h>
19 :
20 : #include "common/file_perm.h"
21 : #include "common/logging.h"
22 : #include "fe_utils/astreamer.h"
23 :
24 : typedef struct astreamer_plain_writer
25 : {
26 : astreamer base;
27 : char *pathname;
28 : FILE *file;
29 : bool should_close_file;
30 : } astreamer_plain_writer;
31 :
32 : typedef struct astreamer_extractor
33 : {
34 : astreamer base;
35 : char *basepath;
36 : const char *(*link_map) (const char *);
37 : void (*report_output_file) (const char *);
38 : char filename[MAXPGPATH];
39 : FILE *file;
40 : } astreamer_extractor;
41 :
42 : static void astreamer_plain_writer_content(astreamer *streamer,
43 : astreamer_member *member,
44 : const char *data, int len,
45 : astreamer_archive_context context);
46 : static void astreamer_plain_writer_finalize(astreamer *streamer);
47 : static void astreamer_plain_writer_free(astreamer *streamer);
48 :
49 : static const astreamer_ops astreamer_plain_writer_ops = {
50 : .content = astreamer_plain_writer_content,
51 : .finalize = astreamer_plain_writer_finalize,
52 : .free = astreamer_plain_writer_free
53 : };
54 :
55 : static void astreamer_extractor_content(astreamer *streamer,
56 : astreamer_member *member,
57 : const char *data, int len,
58 : astreamer_archive_context context);
59 : static void astreamer_extractor_finalize(astreamer *streamer);
60 : static void astreamer_extractor_free(astreamer *streamer);
61 : static void extract_directory(const char *filename, mode_t mode);
62 : static void extract_link(const char *filename, const char *linktarget);
63 : static FILE *create_file_for_extract(const char *filename, mode_t mode);
64 :
65 : static const astreamer_ops astreamer_extractor_ops = {
66 : .content = astreamer_extractor_content,
67 : .finalize = astreamer_extractor_finalize,
68 : .free = astreamer_extractor_free
69 : };
70 :
71 : /*
72 : * Create a astreamer that just writes data to a file.
73 : *
74 : * The caller must specify a pathname and may specify a file. The pathname is
75 : * used for error-reporting purposes either way. If file is NULL, the pathname
76 : * also identifies the file to which the data should be written: it is opened
77 : * for writing and closed when done. If file is not NULL, the data is written
78 : * there.
79 : */
80 : astreamer *
81 19 : astreamer_plain_writer_new(char *pathname, FILE *file)
82 : {
83 : astreamer_plain_writer *streamer;
84 :
85 19 : streamer = palloc0_object(astreamer_plain_writer);
86 19 : *((const astreamer_ops **) &streamer->base.bbs_ops) =
87 : &astreamer_plain_writer_ops;
88 :
89 19 : streamer->pathname = pstrdup(pathname);
90 19 : streamer->file = file;
91 :
92 19 : if (file == NULL)
93 : {
94 19 : streamer->file = fopen(pathname, "wb");
95 19 : if (streamer->file == NULL)
96 0 : pg_fatal("could not create file \"%s\": %m", pathname);
97 19 : streamer->should_close_file = true;
98 : }
99 :
100 19 : return &streamer->base;
101 : }
102 :
103 : /*
104 : * Write archive content to file.
105 : */
106 : static void
107 30558 : astreamer_plain_writer_content(astreamer *streamer,
108 : astreamer_member *member, const char *data,
109 : int len, astreamer_archive_context context)
110 : {
111 : astreamer_plain_writer *mystreamer;
112 :
113 30558 : mystreamer = (astreamer_plain_writer *) streamer;
114 :
115 30558 : if (len == 0)
116 0 : return;
117 :
118 30558 : errno = 0;
119 30558 : if (fwrite(data, len, 1, mystreamer->file) != 1)
120 : {
121 : /* if write didn't set errno, assume problem is no disk space */
122 0 : if (errno == 0)
123 0 : errno = ENOSPC;
124 0 : pg_fatal("could not write to file \"%s\": %m",
125 : mystreamer->pathname);
126 : }
127 : }
128 :
129 : /*
130 : * End-of-archive processing when writing to a plain file consists of closing
131 : * the file if we opened it, but not if the caller provided it.
132 : */
133 : static void
134 19 : astreamer_plain_writer_finalize(astreamer *streamer)
135 : {
136 : astreamer_plain_writer *mystreamer;
137 :
138 19 : mystreamer = (astreamer_plain_writer *) streamer;
139 :
140 19 : if (mystreamer->should_close_file && fclose(mystreamer->file) != 0)
141 0 : pg_fatal("could not close file \"%s\": %m",
142 : mystreamer->pathname);
143 :
144 19 : mystreamer->file = NULL;
145 19 : mystreamer->should_close_file = false;
146 19 : }
147 :
148 : /*
149 : * Free memory associated with this astreamer.
150 : */
151 : static void
152 19 : astreamer_plain_writer_free(astreamer *streamer)
153 : {
154 : astreamer_plain_writer *mystreamer;
155 :
156 19 : mystreamer = (astreamer_plain_writer *) streamer;
157 :
158 : Assert(!mystreamer->should_close_file);
159 : Assert(mystreamer->base.bbs_next == NULL);
160 :
161 19 : pfree(mystreamer->pathname);
162 19 : pfree(mystreamer);
163 19 : }
164 :
165 : /*
166 : * Create a astreamer that extracts an archive.
167 : *
168 : * All pathnames in the archive are interpreted relative to basepath.
169 : *
170 : * Unlike e.g. astreamer_plain_writer_new() we can't do anything useful here
171 : * with untyped chunks; we need typed chunks which follow the rules described
172 : * in astreamer.h. Assuming we have that, we don't need to worry about the
173 : * original archive format; it's enough to just look at the member information
174 : * provided and write to the corresponding file.
175 : *
176 : * 'link_map' is a function that will be applied to the target of any
177 : * symbolic link, and which should return a replacement pathname to be used
178 : * in its place. If NULL, the symbolic link target is used without
179 : * modification.
180 : *
181 : * 'report_output_file' is a function that will be called each time we open a
182 : * new output file. The pathname to that file is passed as an argument. If
183 : * NULL, the call is skipped.
184 : */
185 : astreamer *
186 171 : astreamer_extractor_new(const char *basepath,
187 : const char *(*link_map) (const char *),
188 : void (*report_output_file) (const char *))
189 : {
190 : astreamer_extractor *streamer;
191 :
192 171 : streamer = palloc0_object(astreamer_extractor);
193 171 : *((const astreamer_ops **) &streamer->base.bbs_ops) =
194 : &astreamer_extractor_ops;
195 171 : streamer->basepath = pstrdup(basepath);
196 171 : streamer->link_map = link_map;
197 171 : streamer->report_output_file = report_output_file;
198 :
199 171 : return &streamer->base;
200 : }
201 :
202 : /*
203 : * Extract archive contents to the filesystem.
204 : */
205 : static void
206 481203 : astreamer_extractor_content(astreamer *streamer, astreamer_member *member,
207 : const char *data, int len,
208 : astreamer_archive_context context)
209 : {
210 481203 : astreamer_extractor *mystreamer = (astreamer_extractor *) streamer;
211 : int fnamelen;
212 :
213 : Assert(member != NULL || context == ASTREAMER_ARCHIVE_TRAILER);
214 : Assert(context != ASTREAMER_UNKNOWN);
215 :
216 481203 : switch (context)
217 : {
218 152909 : case ASTREAMER_MEMBER_HEADER:
219 : Assert(mystreamer->file == NULL);
220 :
221 : /* Prepend basepath. */
222 152909 : snprintf(mystreamer->filename, sizeof(mystreamer->filename),
223 152909 : "%s/%s", mystreamer->basepath, member->pathname);
224 :
225 : /* Remove any trailing slash. */
226 152909 : fnamelen = strlen(mystreamer->filename);
227 152909 : if (mystreamer->filename[fnamelen - 1] == '/')
228 3733 : mystreamer->filename[fnamelen - 1] = '\0';
229 :
230 : /* Dispatch based on file type. */
231 152909 : if (member->is_regular)
232 149176 : mystreamer->file =
233 149176 : create_file_for_extract(mystreamer->filename,
234 : member->mode);
235 3733 : else if (member->is_directory)
236 3717 : extract_directory(mystreamer->filename, member->mode);
237 16 : else if (member->is_symlink)
238 : {
239 16 : const char *linktarget = member->linktarget;
240 :
241 16 : if (mystreamer->link_map)
242 16 : linktarget = mystreamer->link_map(linktarget);
243 16 : extract_link(mystreamer->filename, linktarget);
244 : }
245 :
246 : /* Report output file change. */
247 152909 : if (mystreamer->report_output_file)
248 152909 : mystreamer->report_output_file(mystreamer->filename);
249 152909 : break;
250 :
251 175218 : case ASTREAMER_MEMBER_CONTENTS:
252 175218 : if (mystreamer->file == NULL)
253 0 : break;
254 :
255 175218 : errno = 0;
256 175218 : if (len > 0 && fwrite(data, len, 1, mystreamer->file) != 1)
257 : {
258 : /* if write didn't set errno, assume problem is no disk space */
259 0 : if (errno == 0)
260 0 : errno = ENOSPC;
261 0 : pg_fatal("could not write to file \"%s\": %m",
262 : mystreamer->filename);
263 : }
264 175218 : break;
265 :
266 152907 : case ASTREAMER_MEMBER_TRAILER:
267 152907 : if (mystreamer->file == NULL)
268 3733 : break;
269 149174 : if (fclose(mystreamer->file) != 0)
270 0 : pg_fatal("could not close file \"%s\": %m",
271 : mystreamer->filename);
272 149174 : mystreamer->file = NULL;
273 149174 : break;
274 :
275 169 : case ASTREAMER_ARCHIVE_TRAILER:
276 169 : break;
277 :
278 0 : default:
279 : /* Shouldn't happen. */
280 0 : pg_fatal("unexpected state while extracting archive");
281 : }
282 481203 : }
283 :
284 : /*
285 : * Should we tolerate an already-existing directory?
286 : *
287 : * When streaming WAL, pg_wal (or pg_xlog for pre-9.6 clusters) will have been
288 : * created by the wal receiver process. Also, when the WAL directory location
289 : * was specified, pg_wal (or pg_xlog) has already been created as a symbolic
290 : * link before starting the actual backup. So just ignore creation failures
291 : * on related directories.
292 : *
293 : * If in-place tablespaces are used, pg_tblspc and subdirectories may already
294 : * exist when we get here. So tolerate that case, too.
295 : */
296 : static bool
297 409 : should_allow_existing_directory(const char *pathname)
298 : {
299 409 : const char *filename = last_dir_separator(pathname) + 1;
300 :
301 409 : if (strcmp(filename, "pg_wal") == 0 ||
302 280 : strcmp(filename, "pg_xlog") == 0 ||
303 280 : strcmp(filename, "archive_status") == 0 ||
304 151 : strcmp(filename, "summaries") == 0 ||
305 22 : strcmp(filename, "pg_tblspc") == 0)
306 395 : return true;
307 :
308 14 : if (strspn(filename, "0123456789") == strlen(filename))
309 : {
310 14 : const char *pg_tblspc = strstr(pathname, "/pg_tblspc/");
311 :
312 14 : return pg_tblspc != NULL && pg_tblspc + 11 == filename;
313 : }
314 :
315 0 : return false;
316 : }
317 :
318 : /*
319 : * Create a directory.
320 : */
321 : static void
322 3717 : extract_directory(const char *filename, mode_t mode)
323 : {
324 3717 : if (mkdir(filename, pg_dir_create_mode) != 0 &&
325 409 : (errno != EEXIST || !should_allow_existing_directory(filename)))
326 0 : pg_fatal("could not create directory \"%s\": %m",
327 : filename);
328 :
329 : #ifndef WIN32
330 3717 : if (chmod(filename, mode))
331 0 : pg_fatal("could not set permissions on directory \"%s\": %m",
332 : filename);
333 : #endif
334 3717 : }
335 :
336 : /*
337 : * Create a symbolic link.
338 : *
339 : * It's most likely a link in pg_tblspc directory, to the location of a
340 : * tablespace. Apply any tablespace mapping given on the command line
341 : * (--tablespace-mapping). (We blindly apply the mapping without checking that
342 : * the link really is inside pg_tblspc. We don't expect there to be other
343 : * symlinks in a data directory, but if there are, you can call it an
344 : * undocumented feature that you can map them too.)
345 : */
346 : static void
347 16 : extract_link(const char *filename, const char *linktarget)
348 : {
349 16 : if (symlink(linktarget, filename) != 0)
350 0 : pg_fatal("could not create symbolic link from \"%s\" to \"%s\": %m",
351 : filename, linktarget);
352 16 : }
353 :
354 : /*
355 : * Create a regular file.
356 : *
357 : * Return the resulting handle so we can write the content to the file.
358 : */
359 : static FILE *
360 149176 : create_file_for_extract(const char *filename, mode_t mode)
361 : {
362 : FILE *file;
363 :
364 149176 : file = fopen(filename, "wb");
365 149176 : if (file == NULL)
366 0 : pg_fatal("could not create file \"%s\": %m", filename);
367 :
368 : #ifndef WIN32
369 149176 : if (chmod(filename, mode))
370 0 : pg_fatal("could not set permissions on file \"%s\": %m",
371 : filename);
372 : #endif
373 :
374 149176 : return file;
375 : }
376 :
377 : /*
378 : * End-of-stream processing for extracting an archive.
379 : *
380 : * There's nothing to do here but sanity checking.
381 : */
382 : static void
383 169 : astreamer_extractor_finalize(astreamer *streamer)
384 : {
385 169 : astreamer_extractor *mystreamer PG_USED_FOR_ASSERTS_ONLY
386 : = (astreamer_extractor *) streamer;
387 :
388 : Assert(mystreamer->file == NULL);
389 169 : }
390 :
391 : /*
392 : * Free memory.
393 : */
394 : static void
395 169 : astreamer_extractor_free(astreamer *streamer)
396 : {
397 169 : astreamer_extractor *mystreamer = (astreamer_extractor *) streamer;
398 :
399 169 : pfree(mystreamer->basepath);
400 169 : pfree(mystreamer);
401 169 : }
|