Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * basic_archive.c
4 : *
5 : * This file demonstrates a basic archive library implementation that is
6 : * roughly equivalent to the following shell command:
7 : *
8 : * test ! -f /path/to/dest && cp /path/to/src /path/to/dest
9 : *
10 : * One notable difference between this module and the shell command above
11 : * is that this module first copies the file to a temporary destination,
12 : * syncs it to disk, and then durably moves it to the final destination.
13 : *
14 : * Another notable difference is that if /path/to/dest already exists
15 : * but has contents identical to /path/to/src, archiving will succeed,
16 : * whereas the command shown above would fail. This prevents problems if
17 : * a file is successfully archived and then the system crashes before
18 : * a durable record of the success has been made.
19 : *
20 : * Copyright (c) 2022-2025, PostgreSQL Global Development Group
21 : *
22 : * IDENTIFICATION
23 : * contrib/basic_archive/basic_archive.c
24 : *
25 : *-------------------------------------------------------------------------
26 : */
27 : #include "postgres.h"
28 :
29 : #include <sys/stat.h>
30 : #include <sys/time.h>
31 : #include <unistd.h>
32 :
33 : #include "archive/archive_module.h"
34 : #include "common/int.h"
35 : #include "miscadmin.h"
36 : #include "storage/copydir.h"
37 : #include "storage/fd.h"
38 : #include "utils/guc.h"
39 :
40 2 : PG_MODULE_MAGIC;
41 :
42 : static char *archive_directory = NULL;
43 :
44 : static bool basic_archive_configured(ArchiveModuleState *state);
45 : static bool basic_archive_file(ArchiveModuleState *state, const char *file, const char *path);
46 : static bool check_archive_directory(char **newval, void **extra, GucSource source);
47 : static bool compare_files(const char *file1, const char *file2);
48 :
49 : static const ArchiveModuleCallbacks basic_archive_callbacks = {
50 : .startup_cb = NULL,
51 : .check_configured_cb = basic_archive_configured,
52 : .archive_file_cb = basic_archive_file,
53 : .shutdown_cb = NULL
54 : };
55 :
56 : /*
57 : * _PG_init
58 : *
59 : * Defines the module's GUC.
60 : */
61 : void
62 2 : _PG_init(void)
63 : {
64 2 : DefineCustomStringVariable("basic_archive.archive_directory",
65 : gettext_noop("Archive file destination directory."),
66 : NULL,
67 : &archive_directory,
68 : "",
69 : PGC_SIGHUP,
70 : 0,
71 : check_archive_directory, NULL, NULL);
72 :
73 2 : MarkGUCPrefixReserved("basic_archive");
74 2 : }
75 :
76 : /*
77 : * _PG_archive_module_init
78 : *
79 : * Returns the module's archiving callbacks.
80 : */
81 : const ArchiveModuleCallbacks *
82 2 : _PG_archive_module_init(void)
83 : {
84 2 : return &basic_archive_callbacks;
85 : }
86 :
87 : /*
88 : * check_archive_directory
89 : *
90 : * Checks that the provided archive directory exists.
91 : */
92 : static bool
93 4 : check_archive_directory(char **newval, void **extra, GucSource source)
94 : {
95 : struct stat st;
96 :
97 : /*
98 : * The default value is an empty string, so we have to accept that value.
99 : * Our check_configured callback also checks for this and prevents
100 : * archiving from proceeding if it is still empty.
101 : */
102 4 : if (*newval == NULL || *newval[0] == '\0')
103 2 : return true;
104 :
105 : /*
106 : * Make sure the file paths won't be too long. The docs indicate that the
107 : * file names to be archived can be up to 64 characters long.
108 : */
109 2 : if (strlen(*newval) + 64 + 2 >= MAXPGPATH)
110 : {
111 0 : GUC_check_errdetail("Archive directory too long.");
112 0 : return false;
113 : }
114 :
115 : /*
116 : * Do a basic sanity check that the specified archive directory exists. It
117 : * could be removed at some point in the future, so we still need to be
118 : * prepared for it not to exist in the actual archiving logic.
119 : */
120 2 : if (stat(*newval, &st) != 0 || !S_ISDIR(st.st_mode))
121 : {
122 0 : GUC_check_errdetail("Specified archive directory does not exist.");
123 0 : return false;
124 : }
125 :
126 2 : return true;
127 : }
128 :
129 : /*
130 : * basic_archive_configured
131 : *
132 : * Checks that archive_directory is not blank.
133 : */
134 : static bool
135 4 : basic_archive_configured(ArchiveModuleState *state)
136 : {
137 4 : if (archive_directory != NULL && archive_directory[0] != '\0')
138 4 : return true;
139 :
140 0 : arch_module_check_errdetail("%s is not set.",
141 : "basic_archive.archive_directory");
142 0 : return false;
143 : }
144 :
145 : /*
146 : * basic_archive_file
147 : *
148 : * Archives one file.
149 : */
150 : static bool
151 4 : basic_archive_file(ArchiveModuleState *state, const char *file, const char *path)
152 : {
153 : char destination[MAXPGPATH];
154 : char temp[MAXPGPATH + 256];
155 : struct stat st;
156 : struct timeval tv;
157 : uint64 epoch; /* milliseconds */
158 :
159 4 : ereport(DEBUG3,
160 : (errmsg("archiving \"%s\" via basic_archive", file)));
161 :
162 4 : snprintf(destination, MAXPGPATH, "%s/%s", archive_directory, file);
163 :
164 : /*
165 : * First, check if the file has already been archived. If it already
166 : * exists and has the same contents as the file we're trying to archive,
167 : * we can return success (after ensuring the file is persisted to disk).
168 : * This scenario is possible if the server crashed after archiving the
169 : * file but before renaming its .ready file to .done.
170 : *
171 : * If the archive file already exists but has different contents,
172 : * something might be wrong, so we just fail.
173 : */
174 4 : if (stat(destination, &st) == 0)
175 : {
176 0 : if (compare_files(path, destination))
177 : {
178 0 : ereport(DEBUG3,
179 : (errmsg("archive file \"%s\" already exists with identical contents",
180 : destination)));
181 :
182 0 : fsync_fname(destination, false);
183 0 : fsync_fname(archive_directory, true);
184 :
185 0 : return true;
186 : }
187 :
188 0 : ereport(ERROR,
189 : (errmsg("archive file \"%s\" already exists", destination)));
190 : }
191 4 : else if (errno != ENOENT)
192 0 : ereport(ERROR,
193 : (errcode_for_file_access(),
194 : errmsg("could not stat file \"%s\": %m", destination)));
195 :
196 : /*
197 : * Pick a sufficiently unique name for the temporary file so that a
198 : * collision is unlikely. This helps avoid problems in case a temporary
199 : * file was left around after a crash or another server happens to be
200 : * archiving to the same directory.
201 : */
202 4 : gettimeofday(&tv, NULL);
203 8 : if (pg_mul_u64_overflow((uint64) 1000, (uint64) tv.tv_sec, &epoch) ||
204 4 : pg_add_u64_overflow(epoch, (uint64) (tv.tv_usec / 1000), &epoch))
205 0 : elog(ERROR, "could not generate temporary file name for archiving");
206 :
207 4 : snprintf(temp, sizeof(temp), "%s/%s.%s.%d." UINT64_FORMAT,
208 : archive_directory, "archtemp", file, MyProcPid, epoch);
209 :
210 : /*
211 : * Copy the file to its temporary destination. Note that this will fail
212 : * if temp already exists.
213 : */
214 4 : copy_file(path, temp);
215 :
216 : /*
217 : * Sync the temporary file to disk and move it to its final destination.
218 : * Note that this will overwrite any existing file, but this is only
219 : * possible if someone else created the file since the stat() above.
220 : */
221 4 : (void) durable_rename(temp, destination, ERROR);
222 :
223 4 : ereport(DEBUG1,
224 : (errmsg("archived \"%s\" via basic_archive", file)));
225 :
226 4 : return true;
227 : }
228 :
229 : /*
230 : * compare_files
231 : *
232 : * Returns whether the contents of the files are the same.
233 : */
234 : static bool
235 0 : compare_files(const char *file1, const char *file2)
236 : {
237 : #define CMP_BUF_SIZE (4096)
238 : char buf1[CMP_BUF_SIZE];
239 : char buf2[CMP_BUF_SIZE];
240 : int fd1;
241 : int fd2;
242 0 : bool ret = true;
243 :
244 0 : fd1 = OpenTransientFile(file1, O_RDONLY | PG_BINARY);
245 0 : if (fd1 < 0)
246 0 : ereport(ERROR,
247 : (errcode_for_file_access(),
248 : errmsg("could not open file \"%s\": %m", file1)));
249 :
250 0 : fd2 = OpenTransientFile(file2, O_RDONLY | PG_BINARY);
251 0 : if (fd2 < 0)
252 0 : ereport(ERROR,
253 : (errcode_for_file_access(),
254 : errmsg("could not open file \"%s\": %m", file2)));
255 :
256 : for (;;)
257 0 : {
258 0 : int nbytes = 0;
259 0 : int buf1_len = 0;
260 0 : int buf2_len = 0;
261 :
262 0 : while (buf1_len < CMP_BUF_SIZE)
263 : {
264 0 : nbytes = read(fd1, buf1 + buf1_len, CMP_BUF_SIZE - buf1_len);
265 0 : if (nbytes < 0)
266 0 : ereport(ERROR,
267 : (errcode_for_file_access(),
268 : errmsg("could not read file \"%s\": %m", file1)));
269 0 : else if (nbytes == 0)
270 0 : break;
271 :
272 0 : buf1_len += nbytes;
273 : }
274 :
275 0 : while (buf2_len < CMP_BUF_SIZE)
276 : {
277 0 : nbytes = read(fd2, buf2 + buf2_len, CMP_BUF_SIZE - buf2_len);
278 0 : if (nbytes < 0)
279 0 : ereport(ERROR,
280 : (errcode_for_file_access(),
281 : errmsg("could not read file \"%s\": %m", file2)));
282 0 : else if (nbytes == 0)
283 0 : break;
284 :
285 0 : buf2_len += nbytes;
286 : }
287 :
288 0 : if (buf1_len != buf2_len || memcmp(buf1, buf2, buf1_len) != 0)
289 : {
290 0 : ret = false;
291 0 : break;
292 : }
293 0 : else if (buf1_len == 0)
294 0 : break;
295 : }
296 :
297 0 : if (CloseTransientFile(fd1) != 0)
298 0 : ereport(ERROR,
299 : (errcode_for_file_access(),
300 : errmsg("could not close file \"%s\": %m", file1)));
301 :
302 0 : if (CloseTransientFile(fd2) != 0)
303 0 : ereport(ERROR,
304 : (errcode_for_file_access(),
305 : errmsg("could not close file \"%s\": %m", file2)));
306 :
307 0 : return ret;
308 : }
|