Line data Source code
1 : /*
2 : * file.c
3 : *
4 : * file system operations
5 : *
6 : * Copyright (c) 2010-2025, PostgreSQL Global Development Group
7 : * src/bin/pg_upgrade/file.c
8 : */
9 :
10 : #include "postgres_fe.h"
11 :
12 : #include <sys/stat.h>
13 : #include <limits.h>
14 : #include <fcntl.h>
15 : #ifdef HAVE_COPYFILE_H
16 : #include <copyfile.h>
17 : #endif
18 : #ifdef __linux__
19 : #include <sys/ioctl.h>
20 : #include <linux/fs.h>
21 : #endif
22 :
23 : #include "access/visibilitymapdefs.h"
24 : #include "common/file_perm.h"
25 : #include "pg_upgrade.h"
26 : #include "storage/bufpage.h"
27 : #include "storage/checksum.h"
28 : #include "storage/checksum_impl.h"
29 :
30 :
31 : /*
32 : * cloneFile()
33 : *
34 : * Clones/reflinks a relation file from src to dst.
35 : *
36 : * schemaName/relName are relation's SQL name (used for error messages only).
37 : */
38 : void
39 0 : cloneFile(const char *src, const char *dst,
40 : const char *schemaName, const char *relName)
41 : {
42 : #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
43 : if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0)
44 : pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %m",
45 : schemaName, relName, src, dst);
46 : #elif defined(__linux__) && defined(FICLONE)
47 : int src_fd;
48 : int dest_fd;
49 :
50 0 : if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
51 0 : pg_fatal("error while cloning relation \"%s.%s\": could not open file \"%s\": %m",
52 : schemaName, relName, src);
53 :
54 0 : if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
55 : pg_file_create_mode)) < 0)
56 0 : pg_fatal("error while cloning relation \"%s.%s\": could not create file \"%s\": %m",
57 : schemaName, relName, dst);
58 :
59 0 : if (ioctl(dest_fd, FICLONE, src_fd) < 0)
60 : {
61 0 : int save_errno = errno;
62 :
63 0 : unlink(dst);
64 :
65 0 : pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s",
66 : schemaName, relName, src, dst, strerror(save_errno));
67 : }
68 :
69 0 : close(src_fd);
70 0 : close(dest_fd);
71 : #endif
72 0 : }
73 :
74 :
75 : /*
76 : * copyFile()
77 : *
78 : * Copies a relation file from src to dst.
79 : * schemaName/relName are relation's SQL name (used for error messages only).
80 : */
81 : void
82 3344 : copyFile(const char *src, const char *dst,
83 : const char *schemaName, const char *relName)
84 : {
85 : #ifndef WIN32
86 : int src_fd;
87 : int dest_fd;
88 : char *buffer;
89 :
90 3344 : if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
91 0 : pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %m",
92 : schemaName, relName, src);
93 :
94 3344 : if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
95 : pg_file_create_mode)) < 0)
96 0 : pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %m",
97 : schemaName, relName, dst);
98 :
99 : /* copy in fairly large chunks for best efficiency */
100 : #define COPY_BUF_SIZE (50 * BLCKSZ)
101 :
102 3344 : buffer = (char *) pg_malloc(COPY_BUF_SIZE);
103 :
104 : /* perform data copying i.e read src source, write to destination */
105 : while (true)
106 2704 : {
107 6048 : ssize_t nbytes = read(src_fd, buffer, COPY_BUF_SIZE);
108 :
109 6048 : if (nbytes < 0)
110 0 : pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %m",
111 : schemaName, relName, src);
112 :
113 6048 : if (nbytes == 0)
114 3344 : break;
115 :
116 2704 : errno = 0;
117 2704 : if (write(dest_fd, buffer, nbytes) != nbytes)
118 : {
119 : /* if write didn't set errno, assume problem is no disk space */
120 0 : if (errno == 0)
121 0 : errno = ENOSPC;
122 0 : pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %m",
123 : schemaName, relName, dst);
124 : }
125 : }
126 :
127 3344 : pg_free(buffer);
128 3344 : close(src_fd);
129 3344 : close(dest_fd);
130 :
131 : #else /* WIN32 */
132 :
133 : if (CopyFile(src, dst, true) == 0)
134 : {
135 : _dosmaperr(GetLastError());
136 : pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %m",
137 : schemaName, relName, src, dst);
138 : }
139 :
140 : #endif /* WIN32 */
141 3344 : }
142 :
143 :
144 : /*
145 : * copyFileByRange()
146 : *
147 : * Copies a relation file from src to dst.
148 : * schemaName/relName are relation's SQL name (used for error messages only).
149 : */
150 : void
151 0 : copyFileByRange(const char *src, const char *dst,
152 : const char *schemaName, const char *relName)
153 : {
154 : #ifdef HAVE_COPY_FILE_RANGE
155 : int src_fd;
156 : int dest_fd;
157 : ssize_t nbytes;
158 :
159 0 : if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
160 0 : pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %m",
161 : schemaName, relName, src);
162 :
163 0 : if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
164 : pg_file_create_mode)) < 0)
165 0 : pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %m",
166 : schemaName, relName, dst);
167 :
168 : do
169 : {
170 0 : nbytes = copy_file_range(src_fd, NULL, dest_fd, NULL, SSIZE_MAX, 0);
171 0 : if (nbytes < 0)
172 0 : pg_fatal("error while copying relation \"%s.%s\": could not copy file range from \"%s\" to \"%s\": %m",
173 : schemaName, relName, src, dst);
174 : }
175 0 : while (nbytes > 0);
176 :
177 0 : close(src_fd);
178 0 : close(dest_fd);
179 : #endif
180 0 : }
181 :
182 :
183 : /*
184 : * linkFile()
185 : *
186 : * Hard-links a relation file from src to dst.
187 : * schemaName/relName are relation's SQL name (used for error messages only).
188 : */
189 : void
190 0 : linkFile(const char *src, const char *dst,
191 : const char *schemaName, const char *relName)
192 : {
193 0 : if (link(src, dst) < 0)
194 0 : pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %m",
195 : schemaName, relName, src, dst);
196 0 : }
197 :
198 :
199 : /*
200 : * rewriteVisibilityMap()
201 : *
202 : * Transform a visibility map file, copying from src to dst.
203 : * schemaName/relName are relation's SQL name (used for error messages only).
204 : *
205 : * In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's
206 : * visibility map included one bit per heap page; it now includes two.
207 : * When upgrading a cluster from before that time to a current PostgreSQL
208 : * version, we could refuse to copy visibility maps from the old cluster
209 : * to the new cluster; the next VACUUM would recreate them, but at the
210 : * price of scanning the entire table. So, instead, we rewrite the old
211 : * visibility maps in the new format. That way, the all-visible bits
212 : * remain set for the pages for which they were set previously. The
213 : * all-frozen bits are never set by this conversion; we leave that to VACUUM.
214 : */
215 : void
216 0 : rewriteVisibilityMap(const char *fromfile, const char *tofile,
217 : const char *schemaName, const char *relName)
218 : {
219 : int src_fd;
220 : int dst_fd;
221 : PGIOAlignedBlock buffer;
222 : PGIOAlignedBlock new_vmbuf;
223 0 : ssize_t totalBytesRead = 0;
224 : ssize_t src_filesize;
225 : int rewriteVmBytesPerPage;
226 0 : BlockNumber new_blkno = 0;
227 : struct stat statbuf;
228 :
229 : /* Compute number of old-format bytes per new page */
230 0 : rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2;
231 :
232 0 : if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0)
233 0 : pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %m",
234 : schemaName, relName, fromfile);
235 :
236 0 : if (fstat(src_fd, &statbuf) != 0)
237 0 : pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %m",
238 : schemaName, relName, fromfile);
239 :
240 0 : if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
241 : pg_file_create_mode)) < 0)
242 0 : pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %m",
243 : schemaName, relName, tofile);
244 :
245 : /* Save old file size */
246 0 : src_filesize = statbuf.st_size;
247 :
248 : /*
249 : * Turn each visibility map page into 2 pages one by one. Each new page
250 : * has the same page header as the old one. If the last section of the
251 : * last page is empty, we skip it, mostly to avoid turning one-page
252 : * visibility maps for small relations into two pages needlessly.
253 : */
254 0 : while (totalBytesRead < src_filesize)
255 : {
256 : ssize_t bytesRead;
257 : char *old_cur;
258 : char *old_break;
259 : char *old_blkend;
260 : PageHeaderData pageheader;
261 : bool old_lastblk;
262 :
263 0 : if ((bytesRead = read(src_fd, buffer.data, BLCKSZ)) != BLCKSZ)
264 : {
265 0 : if (bytesRead < 0)
266 0 : pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %m",
267 : schemaName, relName, fromfile);
268 : else
269 0 : pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"",
270 : schemaName, relName, fromfile);
271 : }
272 :
273 0 : totalBytesRead += BLCKSZ;
274 0 : old_lastblk = (totalBytesRead == src_filesize);
275 :
276 : /* Save the page header data */
277 0 : memcpy(&pageheader, buffer.data, SizeOfPageHeaderData);
278 :
279 : /*
280 : * These old_* variables point to old visibility map page. old_cur
281 : * points to current position on old page. old_blkend points to end of
282 : * old block. old_break is the end+1 position on the old page for the
283 : * data that will be transferred to the current new page.
284 : */
285 0 : old_cur = buffer.data + SizeOfPageHeaderData;
286 0 : old_blkend = buffer.data + bytesRead;
287 0 : old_break = old_cur + rewriteVmBytesPerPage;
288 :
289 0 : while (old_break <= old_blkend)
290 : {
291 : char *new_cur;
292 0 : bool empty = true;
293 : bool old_lastpart;
294 :
295 : /* First, copy old page header to new page */
296 0 : memcpy(new_vmbuf.data, &pageheader, SizeOfPageHeaderData);
297 :
298 : /* Rewriting the last part of the last old page? */
299 0 : old_lastpart = old_lastblk && (old_break == old_blkend);
300 :
301 0 : new_cur = new_vmbuf.data + SizeOfPageHeaderData;
302 :
303 : /* Process old page bytes one by one, and turn it into new page. */
304 0 : while (old_cur < old_break)
305 : {
306 0 : uint8 byte = *(uint8 *) old_cur;
307 0 : uint16 new_vmbits = 0;
308 : int i;
309 :
310 : /* Generate new format bits while keeping old information */
311 0 : for (i = 0; i < BITS_PER_BYTE; i++)
312 : {
313 0 : if (byte & (1 << i))
314 : {
315 0 : empty = false;
316 0 : new_vmbits |=
317 0 : VISIBILITYMAP_ALL_VISIBLE << (BITS_PER_HEAPBLOCK * i);
318 : }
319 : }
320 :
321 : /* Copy new visibility map bytes to new-format page */
322 0 : new_cur[0] = (char) (new_vmbits & 0xFF);
323 0 : new_cur[1] = (char) (new_vmbits >> 8);
324 :
325 0 : old_cur++;
326 0 : new_cur += BITS_PER_HEAPBLOCK;
327 : }
328 :
329 : /* If the last part of the last page is empty, skip writing it */
330 0 : if (old_lastpart && empty)
331 0 : break;
332 :
333 : /* Set new checksum for visibility map page, if enabled */
334 0 : if (new_cluster.controldata.data_checksum_version != 0)
335 0 : ((PageHeader) new_vmbuf.data)->pd_checksum =
336 0 : pg_checksum_page(new_vmbuf.data, new_blkno);
337 :
338 0 : errno = 0;
339 0 : if (write(dst_fd, new_vmbuf.data, BLCKSZ) != BLCKSZ)
340 : {
341 : /* if write didn't set errno, assume problem is no disk space */
342 0 : if (errno == 0)
343 0 : errno = ENOSPC;
344 0 : pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %m",
345 : schemaName, relName, tofile);
346 : }
347 :
348 : /* Advance for next new page */
349 0 : old_break += rewriteVmBytesPerPage;
350 0 : new_blkno++;
351 : }
352 : }
353 :
354 : /* Clean up */
355 0 : close(dst_fd);
356 0 : close(src_fd);
357 0 : }
358 :
359 : void
360 0 : check_file_clone(void)
361 : {
362 : char existing_file[MAXPGPATH];
363 : char new_link_file[MAXPGPATH];
364 :
365 0 : snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
366 0 : snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.clonetest", new_cluster.pgdata);
367 0 : unlink(new_link_file); /* might fail */
368 :
369 : #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
370 : if (copyfile(existing_file, new_link_file, NULL, COPYFILE_CLONE_FORCE) < 0)
371 : pg_fatal("could not clone file between old and new data directories: %m");
372 : #elif defined(__linux__) && defined(FICLONE)
373 : {
374 : int src_fd;
375 : int dest_fd;
376 :
377 0 : if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
378 0 : pg_fatal("could not open file \"%s\": %m",
379 : existing_file);
380 :
381 0 : if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
382 : pg_file_create_mode)) < 0)
383 0 : pg_fatal("could not create file \"%s\": %m",
384 : new_link_file);
385 :
386 0 : if (ioctl(dest_fd, FICLONE, src_fd) < 0)
387 0 : pg_fatal("could not clone file between old and new data directories: %m");
388 :
389 0 : close(src_fd);
390 0 : close(dest_fd);
391 : }
392 : #else
393 : pg_fatal("file cloning not supported on this platform");
394 : #endif
395 :
396 0 : unlink(new_link_file);
397 0 : }
398 :
399 : void
400 0 : check_copy_file_range(void)
401 : {
402 : char existing_file[MAXPGPATH];
403 : char new_link_file[MAXPGPATH];
404 :
405 0 : snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
406 0 : snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.copy_file_range_test", new_cluster.pgdata);
407 0 : unlink(new_link_file); /* might fail */
408 :
409 : #if defined(HAVE_COPY_FILE_RANGE)
410 : {
411 : int src_fd;
412 : int dest_fd;
413 :
414 0 : if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
415 0 : pg_fatal("could not open file \"%s\": %m",
416 : existing_file);
417 :
418 0 : if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
419 : pg_file_create_mode)) < 0)
420 0 : pg_fatal("could not create file \"%s\": %m",
421 : new_link_file);
422 :
423 0 : if (copy_file_range(src_fd, NULL, dest_fd, NULL, SSIZE_MAX, 0) < 0)
424 0 : pg_fatal("could not copy file range between old and new data directories: %m");
425 :
426 0 : close(src_fd);
427 0 : close(dest_fd);
428 : }
429 : #else
430 : pg_fatal("copy_file_range not supported on this platform");
431 : #endif
432 :
433 0 : unlink(new_link_file);
434 0 : }
435 :
436 : void
437 0 : check_hard_link(void)
438 : {
439 : char existing_file[MAXPGPATH];
440 : char new_link_file[MAXPGPATH];
441 :
442 0 : snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
443 0 : snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.linktest", new_cluster.pgdata);
444 0 : unlink(new_link_file); /* might fail */
445 :
446 0 : if (link(existing_file, new_link_file) < 0)
447 0 : pg_fatal("could not create hard link between old and new data directories: %m\n"
448 : "In link mode the old and new data directories must be on the same file system.");
449 :
450 0 : unlink(new_link_file);
451 0 : }
|