Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * basic_archive.c
4 : *
5 : * This file demonstrates a basic archive library implementation that is
6 : * roughly equivalent to the following shell command:
7 : *
8 : * test ! -f /path/to/dest && cp /path/to/src /path/to/dest
9 : *
10 : * One notable difference between this module and the shell command above
11 : * is that this module first copies the file to a temporary destination,
12 : * syncs it to disk, and then durably moves it to the final destination.
13 : *
14 : * Another notable difference is that if /path/to/dest already exists
15 : * but has contents identical to /path/to/src, archiving will succeed,
16 : * whereas the command shown above would fail. This prevents problems if
17 : * a file is successfully archived and then the system crashes before
18 : * a durable record of the success has been made.
19 : *
20 : * Copyright (c) 2022-2026, PostgreSQL Global Development Group
21 : *
22 : * IDENTIFICATION
23 : * contrib/basic_archive/basic_archive.c
24 : *
25 : *-------------------------------------------------------------------------
26 : */
27 : #include "postgres.h"
28 :
29 : #include <sys/stat.h>
30 : #include <sys/time.h>
31 : #include <unistd.h>
32 :
33 : #include "archive/archive_module.h"
34 : #include "common/int.h"
35 : #include "miscadmin.h"
36 : #include "storage/copydir.h"
37 : #include "storage/fd.h"
38 : #include "utils/guc.h"
39 :
40 1 : PG_MODULE_MAGIC_EXT(
41 : .name = "basic_archive",
42 : .version = PG_VERSION
43 : );
44 :
45 : static char *archive_directory = NULL;
46 :
47 : static bool basic_archive_configured(ArchiveModuleState *state);
48 : static bool basic_archive_file(ArchiveModuleState *state, const char *file, const char *path);
49 : static bool check_archive_directory(char **newval, void **extra, GucSource source);
50 : static bool compare_files(const char *file1, const char *file2);
51 :
52 : static const ArchiveModuleCallbacks basic_archive_callbacks = {
53 : .startup_cb = NULL,
54 : .check_configured_cb = basic_archive_configured,
55 : .archive_file_cb = basic_archive_file,
56 : .shutdown_cb = NULL
57 : };
58 :
59 : /*
60 : * _PG_init
61 : *
62 : * Defines the module's GUC.
63 : */
64 : void
65 1 : _PG_init(void)
66 : {
67 1 : DefineCustomStringVariable("basic_archive.archive_directory",
68 : "Archive file destination directory.",
69 : NULL,
70 : &archive_directory,
71 : "",
72 : PGC_SIGHUP,
73 : 0,
74 : check_archive_directory, NULL, NULL);
75 :
76 1 : MarkGUCPrefixReserved("basic_archive");
77 1 : }
78 :
79 : /*
80 : * _PG_archive_module_init
81 : *
82 : * Returns the module's archiving callbacks.
83 : */
84 : const ArchiveModuleCallbacks *
85 1 : _PG_archive_module_init(void)
86 : {
87 1 : return &basic_archive_callbacks;
88 : }
89 :
90 : /*
91 : * check_archive_directory
92 : *
93 : * Checks that the provided archive directory path isn't too long.
94 : */
95 : static bool
96 2 : check_archive_directory(char **newval, void **extra, GucSource source)
97 : {
98 : /*
99 : * The default value is an empty string, so we have to accept that value.
100 : * Our check_configured callback also checks for this and prevents
101 : * archiving from proceeding if it is still empty.
102 : */
103 2 : if (*newval == NULL || *newval[0] == '\0')
104 1 : return true;
105 :
106 : /*
107 : * Make sure the file paths won't be too long. The docs indicate that the
108 : * file names to be archived can be up to 64 characters long.
109 : */
110 1 : if (strlen(*newval) + 64 + 2 >= MAXPGPATH)
111 : {
112 0 : GUC_check_errdetail("Archive directory too long.");
113 0 : return false;
114 : }
115 :
116 1 : return true;
117 : }
118 :
119 : /*
120 : * basic_archive_configured
121 : *
122 : * Checks that archive_directory is not blank.
123 : */
124 : static bool
125 2 : basic_archive_configured(ArchiveModuleState *state)
126 : {
127 2 : if (archive_directory != NULL && archive_directory[0] != '\0')
128 2 : return true;
129 :
130 0 : arch_module_check_errdetail("%s is not set.",
131 : "basic_archive.archive_directory");
132 0 : return false;
133 : }
134 :
135 : /*
136 : * basic_archive_file
137 : *
138 : * Archives one file.
139 : */
140 : static bool
141 2 : basic_archive_file(ArchiveModuleState *state, const char *file, const char *path)
142 : {
143 : char destination[MAXPGPATH];
144 : char temp[MAXPGPATH + 256];
145 : struct stat st;
146 : struct timeval tv;
147 : uint64 epoch; /* milliseconds */
148 :
149 2 : ereport(DEBUG3,
150 : (errmsg("archiving \"%s\" via basic_archive", file)));
151 :
152 2 : snprintf(destination, MAXPGPATH, "%s/%s", archive_directory, file);
153 :
154 : /*
155 : * First, check if the file has already been archived. If it already
156 : * exists and has the same contents as the file we're trying to archive,
157 : * we can return success (after ensuring the file is persisted to disk).
158 : * This scenario is possible if the server crashed after archiving the
159 : * file but before renaming its .ready file to .done.
160 : *
161 : * If the archive file already exists but has different contents,
162 : * something might be wrong, so we just fail.
163 : */
164 2 : if (stat(destination, &st) == 0)
165 : {
166 0 : if (compare_files(path, destination))
167 : {
168 0 : ereport(DEBUG3,
169 : (errmsg("archive file \"%s\" already exists with identical contents",
170 : destination)));
171 :
172 0 : fsync_fname(destination, false);
173 0 : fsync_fname(archive_directory, true);
174 :
175 0 : return true;
176 : }
177 :
178 0 : ereport(ERROR,
179 : (errmsg("archive file \"%s\" already exists", destination)));
180 : }
181 2 : else if (errno != ENOENT)
182 0 : ereport(ERROR,
183 : (errcode_for_file_access(),
184 : errmsg("could not stat file \"%s\": %m", destination)));
185 :
186 : /*
187 : * Pick a sufficiently unique name for the temporary file so that a
188 : * collision is unlikely. This helps avoid problems in case a temporary
189 : * file was left around after a crash or another server happens to be
190 : * archiving to the same directory.
191 : */
192 2 : gettimeofday(&tv, NULL);
193 4 : if (pg_mul_u64_overflow((uint64) 1000, (uint64) tv.tv_sec, &epoch) ||
194 2 : pg_add_u64_overflow(epoch, (uint64) (tv.tv_usec / 1000), &epoch))
195 0 : elog(ERROR, "could not generate temporary file name for archiving");
196 :
197 2 : snprintf(temp, sizeof(temp), "%s/%s.%s.%d." UINT64_FORMAT,
198 : archive_directory, "archtemp", file, MyProcPid, epoch);
199 :
200 : /*
201 : * Copy the file to its temporary destination. Note that this will fail
202 : * if temp already exists.
203 : */
204 2 : copy_file(path, temp);
205 :
206 : /*
207 : * Sync the temporary file to disk and move it to its final destination.
208 : * Note that this will overwrite any existing file, but this is only
209 : * possible if someone else created the file since the stat() above.
210 : */
211 2 : (void) durable_rename(temp, destination, ERROR);
212 :
213 2 : ereport(DEBUG1,
214 : (errmsg("archived \"%s\" via basic_archive", file)));
215 :
216 2 : return true;
217 : }
218 :
219 : /*
220 : * compare_files
221 : *
222 : * Returns whether the contents of the files are the same.
223 : */
224 : static bool
225 0 : compare_files(const char *file1, const char *file2)
226 : {
227 : #define CMP_BUF_SIZE (4096)
228 : char buf1[CMP_BUF_SIZE];
229 : char buf2[CMP_BUF_SIZE];
230 : int fd1;
231 : int fd2;
232 0 : bool ret = true;
233 :
234 0 : fd1 = OpenTransientFile(file1, O_RDONLY | PG_BINARY);
235 0 : if (fd1 < 0)
236 0 : ereport(ERROR,
237 : (errcode_for_file_access(),
238 : errmsg("could not open file \"%s\": %m", file1)));
239 :
240 0 : fd2 = OpenTransientFile(file2, O_RDONLY | PG_BINARY);
241 0 : if (fd2 < 0)
242 0 : ereport(ERROR,
243 : (errcode_for_file_access(),
244 : errmsg("could not open file \"%s\": %m", file2)));
245 :
246 : for (;;)
247 0 : {
248 0 : int nbytes = 0;
249 0 : int buf1_len = 0;
250 0 : int buf2_len = 0;
251 :
252 0 : while (buf1_len < CMP_BUF_SIZE)
253 : {
254 0 : nbytes = read(fd1, buf1 + buf1_len, CMP_BUF_SIZE - buf1_len);
255 0 : if (nbytes < 0)
256 0 : ereport(ERROR,
257 : (errcode_for_file_access(),
258 : errmsg("could not read file \"%s\": %m", file1)));
259 0 : else if (nbytes == 0)
260 0 : break;
261 :
262 0 : buf1_len += nbytes;
263 : }
264 :
265 0 : while (buf2_len < CMP_BUF_SIZE)
266 : {
267 0 : nbytes = read(fd2, buf2 + buf2_len, CMP_BUF_SIZE - buf2_len);
268 0 : if (nbytes < 0)
269 0 : ereport(ERROR,
270 : (errcode_for_file_access(),
271 : errmsg("could not read file \"%s\": %m", file2)));
272 0 : else if (nbytes == 0)
273 0 : break;
274 :
275 0 : buf2_len += nbytes;
276 : }
277 :
278 0 : if (buf1_len != buf2_len || memcmp(buf1, buf2, buf1_len) != 0)
279 : {
280 0 : ret = false;
281 0 : break;
282 : }
283 0 : else if (buf1_len == 0)
284 0 : break;
285 : }
286 :
287 0 : if (CloseTransientFile(fd1) != 0)
288 0 : ereport(ERROR,
289 : (errcode_for_file_access(),
290 : errmsg("could not close file \"%s\": %m", file1)));
291 :
292 0 : if (CloseTransientFile(fd2) != 0)
293 0 : ereport(ERROR,
294 : (errcode_for_file_access(),
295 : errmsg("could not close file \"%s\": %m", file2)));
296 :
297 0 : return ret;
298 : }
|