Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * basic_archive.c
4 : *
5 : * This file demonstrates a basic archive library implementation that is
6 : * roughly equivalent to the following shell command:
7 : *
8 : * test ! -f /path/to/dest && cp /path/to/src /path/to/dest
9 : *
10 : * One notable difference between this module and the shell command above
11 : * is that this module first copies the file to a temporary destination,
12 : * syncs it to disk, and then durably moves it to the final destination.
13 : *
14 : * Another notable difference is that if /path/to/dest already exists
15 : * but has contents identical to /path/to/src, archiving will succeed,
16 : * whereas the command shown above would fail. This prevents problems if
17 : * a file is successfully archived and then the system crashes before
18 : * a durable record of the success has been made.
19 : *
20 : * Copyright (c) 2022-2024, PostgreSQL Global Development Group
21 : *
22 : * IDENTIFICATION
23 : * contrib/basic_archive/basic_archive.c
24 : *
25 : *-------------------------------------------------------------------------
26 : */
27 : #include "postgres.h"
28 :
29 : #include <sys/stat.h>
30 : #include <sys/time.h>
31 : #include <unistd.h>
32 :
33 : #include "archive/archive_module.h"
34 : #include "common/int.h"
35 : #include "miscadmin.h"
36 : #include "storage/copydir.h"
37 : #include "storage/fd.h"
38 : #include "utils/guc.h"
39 : #include "utils/memutils.h"
40 :
41 2 : PG_MODULE_MAGIC;
42 :
43 : static char *archive_directory = NULL;
44 :
45 : static bool basic_archive_configured(ArchiveModuleState *state);
46 : static bool basic_archive_file(ArchiveModuleState *state, const char *file, const char *path);
47 : static bool check_archive_directory(char **newval, void **extra, GucSource source);
48 : static bool compare_files(const char *file1, const char *file2);
49 :
50 : static const ArchiveModuleCallbacks basic_archive_callbacks = {
51 : .startup_cb = NULL,
52 : .check_configured_cb = basic_archive_configured,
53 : .archive_file_cb = basic_archive_file,
54 : .shutdown_cb = NULL
55 : };
56 :
57 : /*
58 : * _PG_init
59 : *
60 : * Defines the module's GUC.
61 : */
62 : void
63 2 : _PG_init(void)
64 : {
65 2 : DefineCustomStringVariable("basic_archive.archive_directory",
66 : gettext_noop("Archive file destination directory."),
67 : NULL,
68 : &archive_directory,
69 : "",
70 : PGC_SIGHUP,
71 : 0,
72 : check_archive_directory, NULL, NULL);
73 :
74 2 : MarkGUCPrefixReserved("basic_archive");
75 2 : }
76 :
77 : /*
78 : * _PG_archive_module_init
79 : *
80 : * Returns the module's archiving callbacks.
81 : */
82 : const ArchiveModuleCallbacks *
83 2 : _PG_archive_module_init(void)
84 : {
85 2 : return &basic_archive_callbacks;
86 : }
87 :
88 : /*
89 : * check_archive_directory
90 : *
91 : * Checks that the provided archive directory exists.
92 : */
93 : static bool
94 4 : check_archive_directory(char **newval, void **extra, GucSource source)
95 : {
96 : struct stat st;
97 :
98 : /*
99 : * The default value is an empty string, so we have to accept that value.
100 : * Our check_configured callback also checks for this and prevents
101 : * archiving from proceeding if it is still empty.
102 : */
103 4 : if (*newval == NULL || *newval[0] == '\0')
104 2 : return true;
105 :
106 : /*
107 : * Make sure the file paths won't be too long. The docs indicate that the
108 : * file names to be archived can be up to 64 characters long.
109 : */
110 2 : if (strlen(*newval) + 64 + 2 >= MAXPGPATH)
111 : {
112 0 : GUC_check_errdetail("Archive directory too long.");
113 0 : return false;
114 : }
115 :
116 : /*
117 : * Do a basic sanity check that the specified archive directory exists. It
118 : * could be removed at some point in the future, so we still need to be
119 : * prepared for it not to exist in the actual archiving logic.
120 : */
121 2 : if (stat(*newval, &st) != 0 || !S_ISDIR(st.st_mode))
122 : {
123 0 : GUC_check_errdetail("Specified archive directory does not exist.");
124 0 : return false;
125 : }
126 :
127 2 : return true;
128 : }
129 :
130 : /*
131 : * basic_archive_configured
132 : *
133 : * Checks that archive_directory is not blank.
134 : */
135 : static bool
136 4 : basic_archive_configured(ArchiveModuleState *state)
137 : {
138 4 : if (archive_directory != NULL && archive_directory[0] != '\0')
139 4 : return true;
140 :
141 0 : arch_module_check_errdetail("%s is not set.",
142 : "basic_archive.archive_directory");
143 0 : return false;
144 : }
145 :
146 : /*
147 : * basic_archive_file
148 : *
149 : * Archives one file.
150 : */
151 : static bool
152 4 : basic_archive_file(ArchiveModuleState *state, const char *file, const char *path)
153 : {
154 : char destination[MAXPGPATH];
155 : char temp[MAXPGPATH + 256];
156 : struct stat st;
157 : struct timeval tv;
158 : uint64 epoch; /* milliseconds */
159 :
160 4 : ereport(DEBUG3,
161 : (errmsg("archiving \"%s\" via basic_archive", file)));
162 :
163 4 : snprintf(destination, MAXPGPATH, "%s/%s", archive_directory, file);
164 :
165 : /*
166 : * First, check if the file has already been archived. If it already
167 : * exists and has the same contents as the file we're trying to archive,
168 : * we can return success (after ensuring the file is persisted to disk).
169 : * This scenario is possible if the server crashed after archiving the
170 : * file but before renaming its .ready file to .done.
171 : *
172 : * If the archive file already exists but has different contents,
173 : * something might be wrong, so we just fail.
174 : */
175 4 : if (stat(destination, &st) == 0)
176 : {
177 0 : if (compare_files(path, destination))
178 : {
179 0 : ereport(DEBUG3,
180 : (errmsg("archive file \"%s\" already exists with identical contents",
181 : destination)));
182 :
183 0 : fsync_fname(destination, false);
184 0 : fsync_fname(archive_directory, true);
185 :
186 0 : return true;
187 : }
188 :
189 0 : ereport(ERROR,
190 : (errmsg("archive file \"%s\" already exists", destination)));
191 : }
192 4 : else if (errno != ENOENT)
193 0 : ereport(ERROR,
194 : (errcode_for_file_access(),
195 : errmsg("could not stat file \"%s\": %m", destination)));
196 :
197 : /*
198 : * Pick a sufficiently unique name for the temporary file so that a
199 : * collision is unlikely. This helps avoid problems in case a temporary
200 : * file was left around after a crash or another server happens to be
201 : * archiving to the same directory.
202 : */
203 4 : gettimeofday(&tv, NULL);
204 8 : if (pg_mul_u64_overflow((uint64) 1000, (uint64) tv.tv_sec, &epoch) ||
205 4 : pg_add_u64_overflow(epoch, (uint64) (tv.tv_usec / 1000), &epoch))
206 0 : elog(ERROR, "could not generate temporary file name for archiving");
207 :
208 4 : snprintf(temp, sizeof(temp), "%s/%s.%s.%d." UINT64_FORMAT,
209 : archive_directory, "archtemp", file, MyProcPid, epoch);
210 :
211 : /*
212 : * Copy the file to its temporary destination. Note that this will fail
213 : * if temp already exists.
214 : */
215 4 : copy_file(path, temp);
216 :
217 : /*
218 : * Sync the temporary file to disk and move it to its final destination.
219 : * Note that this will overwrite any existing file, but this is only
220 : * possible if someone else created the file since the stat() above.
221 : */
222 4 : (void) durable_rename(temp, destination, ERROR);
223 :
224 4 : ereport(DEBUG1,
225 : (errmsg("archived \"%s\" via basic_archive", file)));
226 :
227 4 : return true;
228 : }
229 :
230 : /*
231 : * compare_files
232 : *
233 : * Returns whether the contents of the files are the same.
234 : */
235 : static bool
236 0 : compare_files(const char *file1, const char *file2)
237 : {
238 : #define CMP_BUF_SIZE (4096)
239 : char buf1[CMP_BUF_SIZE];
240 : char buf2[CMP_BUF_SIZE];
241 : int fd1;
242 : int fd2;
243 0 : bool ret = true;
244 :
245 0 : fd1 = OpenTransientFile(file1, O_RDONLY | PG_BINARY);
246 0 : if (fd1 < 0)
247 0 : ereport(ERROR,
248 : (errcode_for_file_access(),
249 : errmsg("could not open file \"%s\": %m", file1)));
250 :
251 0 : fd2 = OpenTransientFile(file2, O_RDONLY | PG_BINARY);
252 0 : if (fd2 < 0)
253 0 : ereport(ERROR,
254 : (errcode_for_file_access(),
255 : errmsg("could not open file \"%s\": %m", file2)));
256 :
257 : for (;;)
258 0 : {
259 0 : int nbytes = 0;
260 0 : int buf1_len = 0;
261 0 : int buf2_len = 0;
262 :
263 0 : while (buf1_len < CMP_BUF_SIZE)
264 : {
265 0 : nbytes = read(fd1, buf1 + buf1_len, CMP_BUF_SIZE - buf1_len);
266 0 : if (nbytes < 0)
267 0 : ereport(ERROR,
268 : (errcode_for_file_access(),
269 : errmsg("could not read file \"%s\": %m", file1)));
270 0 : else if (nbytes == 0)
271 0 : break;
272 :
273 0 : buf1_len += nbytes;
274 : }
275 :
276 0 : while (buf2_len < CMP_BUF_SIZE)
277 : {
278 0 : nbytes = read(fd2, buf2 + buf2_len, CMP_BUF_SIZE - buf2_len);
279 0 : if (nbytes < 0)
280 0 : ereport(ERROR,
281 : (errcode_for_file_access(),
282 : errmsg("could not read file \"%s\": %m", file2)));
283 0 : else if (nbytes == 0)
284 0 : break;
285 :
286 0 : buf2_len += nbytes;
287 : }
288 :
289 0 : if (buf1_len != buf2_len || memcmp(buf1, buf2, buf1_len) != 0)
290 : {
291 0 : ret = false;
292 0 : break;
293 : }
294 0 : else if (buf1_len == 0)
295 0 : break;
296 : }
297 :
298 0 : if (CloseTransientFile(fd1) != 0)
299 0 : ereport(ERROR,
300 : (errcode_for_file_access(),
301 : errmsg("could not close file \"%s\": %m", file1)));
302 :
303 0 : if (CloseTransientFile(fd2) != 0)
304 0 : ereport(ERROR,
305 : (errcode_for_file_access(),
306 : errmsg("could not close file \"%s\": %m", file2)));
307 :
308 0 : return ret;
309 : }
|