Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * basic_archive.c
4 : *
5 : * This file demonstrates a basic archive library implementation that is
6 : * roughly equivalent to the following shell command:
7 : *
8 : * test ! -f /path/to/dest && cp /path/to/src /path/to/dest
9 : *
10 : * One notable difference between this module and the shell command above
11 : * is that this module first copies the file to a temporary destination,
12 : * syncs it to disk, and then durably moves it to the final destination.
13 : *
14 : * Another notable difference is that if /path/to/dest already exists
15 : * but has contents identical to /path/to/src, archiving will succeed,
16 : * whereas the command shown above would fail. This prevents problems if
17 : * a file is successfully archived and then the system crashes before
18 : * a durable record of the success has been made.
19 : *
20 : * Copyright (c) 2022-2025, PostgreSQL Global Development Group
21 : *
22 : * IDENTIFICATION
23 : * contrib/basic_archive/basic_archive.c
24 : *
25 : *-------------------------------------------------------------------------
26 : */
27 : #include "postgres.h"
28 :
29 : #include <sys/stat.h>
30 : #include <sys/time.h>
31 : #include <unistd.h>
32 :
33 : #include "archive/archive_module.h"
34 : #include "common/int.h"
35 : #include "miscadmin.h"
36 : #include "storage/copydir.h"
37 : #include "storage/fd.h"
38 : #include "utils/guc.h"
39 :
40 2 : PG_MODULE_MAGIC_EXT(
41 : .name = "basic_archive",
42 : .version = PG_VERSION
43 : );
44 :
45 : static char *archive_directory = NULL;
46 :
47 : static bool basic_archive_configured(ArchiveModuleState *state);
48 : static bool basic_archive_file(ArchiveModuleState *state, const char *file, const char *path);
49 : static bool check_archive_directory(char **newval, void **extra, GucSource source);
50 : static bool compare_files(const char *file1, const char *file2);
51 :
52 : static const ArchiveModuleCallbacks basic_archive_callbacks = {
53 : .startup_cb = NULL,
54 : .check_configured_cb = basic_archive_configured,
55 : .archive_file_cb = basic_archive_file,
56 : .shutdown_cb = NULL
57 : };
58 :
59 : /*
60 : * _PG_init
61 : *
62 : * Defines the module's GUC.
63 : */
64 : void
65 2 : _PG_init(void)
66 : {
67 2 : DefineCustomStringVariable("basic_archive.archive_directory",
68 : gettext_noop("Archive file destination directory."),
69 : NULL,
70 : &archive_directory,
71 : "",
72 : PGC_SIGHUP,
73 : 0,
74 : check_archive_directory, NULL, NULL);
75 :
76 2 : MarkGUCPrefixReserved("basic_archive");
77 2 : }
78 :
79 : /*
80 : * _PG_archive_module_init
81 : *
82 : * Returns the module's archiving callbacks.
83 : */
84 : const ArchiveModuleCallbacks *
85 2 : _PG_archive_module_init(void)
86 : {
87 2 : return &basic_archive_callbacks;
88 : }
89 :
90 : /*
91 : * check_archive_directory
92 : *
93 : * Checks that the provided archive directory exists.
94 : */
95 : static bool
96 4 : check_archive_directory(char **newval, void **extra, GucSource source)
97 : {
98 : struct stat st;
99 :
100 : /*
101 : * The default value is an empty string, so we have to accept that value.
102 : * Our check_configured callback also checks for this and prevents
103 : * archiving from proceeding if it is still empty.
104 : */
105 4 : if (*newval == NULL || *newval[0] == '\0')
106 2 : return true;
107 :
108 : /*
109 : * Make sure the file paths won't be too long. The docs indicate that the
110 : * file names to be archived can be up to 64 characters long.
111 : */
112 2 : if (strlen(*newval) + 64 + 2 >= MAXPGPATH)
113 : {
114 0 : GUC_check_errdetail("Archive directory too long.");
115 0 : return false;
116 : }
117 :
118 : /*
119 : * Do a basic sanity check that the specified archive directory exists. It
120 : * could be removed at some point in the future, so we still need to be
121 : * prepared for it not to exist in the actual archiving logic.
122 : */
123 2 : if (stat(*newval, &st) != 0 || !S_ISDIR(st.st_mode))
124 : {
125 0 : GUC_check_errdetail("Specified archive directory does not exist.");
126 0 : return false;
127 : }
128 :
129 2 : return true;
130 : }
131 :
132 : /*
133 : * basic_archive_configured
134 : *
135 : * Checks that archive_directory is not blank.
136 : */
137 : static bool
138 4 : basic_archive_configured(ArchiveModuleState *state)
139 : {
140 4 : if (archive_directory != NULL && archive_directory[0] != '\0')
141 4 : return true;
142 :
143 0 : arch_module_check_errdetail("%s is not set.",
144 : "basic_archive.archive_directory");
145 0 : return false;
146 : }
147 :
148 : /*
149 : * basic_archive_file
150 : *
151 : * Archives one file.
152 : */
153 : static bool
154 4 : basic_archive_file(ArchiveModuleState *state, const char *file, const char *path)
155 : {
156 : char destination[MAXPGPATH];
157 : char temp[MAXPGPATH + 256];
158 : struct stat st;
159 : struct timeval tv;
160 : uint64 epoch; /* milliseconds */
161 :
162 4 : ereport(DEBUG3,
163 : (errmsg("archiving \"%s\" via basic_archive", file)));
164 :
165 4 : snprintf(destination, MAXPGPATH, "%s/%s", archive_directory, file);
166 :
167 : /*
168 : * First, check if the file has already been archived. If it already
169 : * exists and has the same contents as the file we're trying to archive,
170 : * we can return success (after ensuring the file is persisted to disk).
171 : * This scenario is possible if the server crashed after archiving the
172 : * file but before renaming its .ready file to .done.
173 : *
174 : * If the archive file already exists but has different contents,
175 : * something might be wrong, so we just fail.
176 : */
177 4 : if (stat(destination, &st) == 0)
178 : {
179 0 : if (compare_files(path, destination))
180 : {
181 0 : ereport(DEBUG3,
182 : (errmsg("archive file \"%s\" already exists with identical contents",
183 : destination)));
184 :
185 0 : fsync_fname(destination, false);
186 0 : fsync_fname(archive_directory, true);
187 :
188 0 : return true;
189 : }
190 :
191 0 : ereport(ERROR,
192 : (errmsg("archive file \"%s\" already exists", destination)));
193 : }
194 4 : else if (errno != ENOENT)
195 0 : ereport(ERROR,
196 : (errcode_for_file_access(),
197 : errmsg("could not stat file \"%s\": %m", destination)));
198 :
199 : /*
200 : * Pick a sufficiently unique name for the temporary file so that a
201 : * collision is unlikely. This helps avoid problems in case a temporary
202 : * file was left around after a crash or another server happens to be
203 : * archiving to the same directory.
204 : */
205 4 : gettimeofday(&tv, NULL);
206 8 : if (pg_mul_u64_overflow((uint64) 1000, (uint64) tv.tv_sec, &epoch) ||
207 4 : pg_add_u64_overflow(epoch, (uint64) (tv.tv_usec / 1000), &epoch))
208 0 : elog(ERROR, "could not generate temporary file name for archiving");
209 :
210 4 : snprintf(temp, sizeof(temp), "%s/%s.%s.%d." UINT64_FORMAT,
211 : archive_directory, "archtemp", file, MyProcPid, epoch);
212 :
213 : /*
214 : * Copy the file to its temporary destination. Note that this will fail
215 : * if temp already exists.
216 : */
217 4 : copy_file(path, temp);
218 :
219 : /*
220 : * Sync the temporary file to disk and move it to its final destination.
221 : * Note that this will overwrite any existing file, but this is only
222 : * possible if someone else created the file since the stat() above.
223 : */
224 4 : (void) durable_rename(temp, destination, ERROR);
225 :
226 4 : ereport(DEBUG1,
227 : (errmsg("archived \"%s\" via basic_archive", file)));
228 :
229 4 : return true;
230 : }
231 :
232 : /*
233 : * compare_files
234 : *
235 : * Returns whether the contents of the files are the same.
236 : */
237 : static bool
238 0 : compare_files(const char *file1, const char *file2)
239 : {
240 : #define CMP_BUF_SIZE (4096)
241 : char buf1[CMP_BUF_SIZE];
242 : char buf2[CMP_BUF_SIZE];
243 : int fd1;
244 : int fd2;
245 0 : bool ret = true;
246 :
247 0 : fd1 = OpenTransientFile(file1, O_RDONLY | PG_BINARY);
248 0 : if (fd1 < 0)
249 0 : ereport(ERROR,
250 : (errcode_for_file_access(),
251 : errmsg("could not open file \"%s\": %m", file1)));
252 :
253 0 : fd2 = OpenTransientFile(file2, O_RDONLY | PG_BINARY);
254 0 : if (fd2 < 0)
255 0 : ereport(ERROR,
256 : (errcode_for_file_access(),
257 : errmsg("could not open file \"%s\": %m", file2)));
258 :
259 : for (;;)
260 0 : {
261 0 : int nbytes = 0;
262 0 : int buf1_len = 0;
263 0 : int buf2_len = 0;
264 :
265 0 : while (buf1_len < CMP_BUF_SIZE)
266 : {
267 0 : nbytes = read(fd1, buf1 + buf1_len, CMP_BUF_SIZE - buf1_len);
268 0 : if (nbytes < 0)
269 0 : ereport(ERROR,
270 : (errcode_for_file_access(),
271 : errmsg("could not read file \"%s\": %m", file1)));
272 0 : else if (nbytes == 0)
273 0 : break;
274 :
275 0 : buf1_len += nbytes;
276 : }
277 :
278 0 : while (buf2_len < CMP_BUF_SIZE)
279 : {
280 0 : nbytes = read(fd2, buf2 + buf2_len, CMP_BUF_SIZE - buf2_len);
281 0 : if (nbytes < 0)
282 0 : ereport(ERROR,
283 : (errcode_for_file_access(),
284 : errmsg("could not read file \"%s\": %m", file2)));
285 0 : else if (nbytes == 0)
286 0 : break;
287 :
288 0 : buf2_len += nbytes;
289 : }
290 :
291 0 : if (buf1_len != buf2_len || memcmp(buf1, buf2, buf1_len) != 0)
292 : {
293 0 : ret = false;
294 0 : break;
295 : }
296 0 : else if (buf1_len == 0)
297 0 : break;
298 : }
299 :
300 0 : if (CloseTransientFile(fd1) != 0)
301 0 : ereport(ERROR,
302 : (errcode_for_file_access(),
303 : errmsg("could not close file \"%s\": %m", file1)));
304 :
305 0 : if (CloseTransientFile(fd2) != 0)
306 0 : ereport(ERROR,
307 : (errcode_for_file_access(),
308 : errmsg("could not close file \"%s\": %m", file2)));
309 :
310 0 : return ret;
311 : }
|