Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * File-processing utility routines.
4 : *
5 : * Assorted utility functions to work on files.
6 : *
7 : *
8 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
9 : * Portions Copyright (c) 1994, Regents of the University of California
10 : *
11 : * src/common/file_utils.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #ifndef FRONTEND
17 : #include "postgres.h"
18 : #else
19 : #include "postgres_fe.h"
20 : #endif
21 :
22 : #include <dirent.h>
23 : #include <fcntl.h>
24 : #include <sys/stat.h>
25 : #include <unistd.h>
26 :
27 : #include "common/file_utils.h"
28 : #ifdef FRONTEND
29 : #include "common/logging.h"
30 : #endif
31 : #include "common/relpath.h"
32 : #include "port/pg_iovec.h"
33 :
34 : #ifdef FRONTEND
35 :
36 : /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
37 : #if defined(HAVE_SYNC_FILE_RANGE)
38 : #define PG_FLUSH_DATA_WORKS 1
39 : #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
40 : #define PG_FLUSH_DATA_WORKS 1
41 : #endif
42 :
43 : /*
44 : * pg_xlog has been renamed to pg_wal in version 10.
45 : */
46 : #define MINIMUM_VERSION_FOR_PG_WAL 100000
47 :
48 : #ifdef PG_FLUSH_DATA_WORKS
49 : static int pre_sync_fname(const char *fname, bool isdir);
50 : #endif
51 : static void walkdir(const char *path,
52 : int (*action) (const char *fname, bool isdir),
53 : bool process_symlinks);
54 :
55 : #ifdef HAVE_SYNCFS
56 :
57 : /*
58 : * do_syncfs -- Try to syncfs a file system
59 : *
60 : * Reports errors trying to open the path. syncfs() errors are fatal.
61 : */
62 : static void
63 4 : do_syncfs(const char *path)
64 : {
65 : int fd;
66 :
67 4 : fd = open(path, O_RDONLY, 0);
68 :
69 4 : if (fd < 0)
70 : {
71 0 : pg_log_error("could not open file \"%s\": %m", path);
72 0 : return;
73 : }
74 :
75 4 : if (syncfs(fd) < 0)
76 : {
77 0 : pg_log_error("could not synchronize file system for file \"%s\": %m", path);
78 0 : (void) close(fd);
79 0 : exit(EXIT_FAILURE);
80 : }
81 :
82 4 : (void) close(fd);
83 : }
84 :
85 : #endif /* HAVE_SYNCFS */
86 :
87 : /*
88 : * Synchronize PGDATA and all its contents.
89 : *
90 : * We sync regular files and directories wherever they are, but we follow
91 : * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
92 : * Other symlinks are presumed to point at files we're not responsible for
93 : * syncing, and might not have privileges to write at all.
94 : *
95 : * serverVersion indicates the version of the server to be sync'd.
96 : */
97 : void
98 30 : sync_pgdata(const char *pg_data,
99 : int serverVersion,
100 : DataDirSyncMethod sync_method)
101 : {
102 : bool xlog_is_symlink;
103 : char pg_wal[MAXPGPATH];
104 : char pg_tblspc[MAXPGPATH];
105 :
106 : /* handle renaming of pg_xlog to pg_wal in post-10 clusters */
107 30 : snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data,
108 : serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal");
109 30 : snprintf(pg_tblspc, MAXPGPATH, "%s/%s", pg_data, PG_TBLSPC_DIR);
110 :
111 : /*
112 : * If pg_wal is a symlink, we'll need to recurse into it separately,
113 : * because the first walkdir below will ignore it.
114 : */
115 30 : xlog_is_symlink = false;
116 :
117 : {
118 : struct stat st;
119 :
120 30 : if (lstat(pg_wal, &st) < 0)
121 0 : pg_log_error("could not stat file \"%s\": %m", pg_wal);
122 30 : else if (S_ISLNK(st.st_mode))
123 4 : xlog_is_symlink = true;
124 : }
125 :
126 30 : switch (sync_method)
127 : {
128 2 : case DATA_DIR_SYNC_METHOD_SYNCFS:
129 : {
130 : #ifndef HAVE_SYNCFS
131 : pg_log_error("this build does not support sync method \"%s\"",
132 : "syncfs");
133 : exit(EXIT_FAILURE);
134 : #else
135 : DIR *dir;
136 : struct dirent *de;
137 :
138 : /*
139 : * On Linux, we don't have to open every single file one by
140 : * one. We can use syncfs() to sync whole filesystems. We
141 : * only expect filesystem boundaries to exist where we
142 : * tolerate symlinks, namely pg_wal and the tablespaces, so we
143 : * call syncfs() for each of those directories.
144 : */
145 :
146 : /* Sync the top level pgdata directory. */
147 2 : do_syncfs(pg_data);
148 :
149 : /* If any tablespaces are configured, sync each of those. */
150 2 : dir = opendir(pg_tblspc);
151 2 : if (dir == NULL)
152 0 : pg_log_error("could not open directory \"%s\": %m",
153 : pg_tblspc);
154 : else
155 : {
156 6 : while (errno = 0, (de = readdir(dir)) != NULL)
157 : {
158 : char subpath[MAXPGPATH * 2];
159 :
160 4 : if (strcmp(de->d_name, ".") == 0 ||
161 2 : strcmp(de->d_name, "..") == 0)
162 4 : continue;
163 :
164 0 : snprintf(subpath, sizeof(subpath), "%s/%s",
165 0 : pg_tblspc, de->d_name);
166 0 : do_syncfs(subpath);
167 : }
168 :
169 2 : if (errno)
170 0 : pg_log_error("could not read directory \"%s\": %m",
171 : pg_tblspc);
172 :
173 2 : (void) closedir(dir);
174 : }
175 :
176 : /* If pg_wal is a symlink, process that too. */
177 2 : if (xlog_is_symlink)
178 2 : do_syncfs(pg_wal);
179 : #endif /* HAVE_SYNCFS */
180 : }
181 2 : break;
182 :
183 28 : case DATA_DIR_SYNC_METHOD_FSYNC:
184 : {
185 : /*
186 : * If possible, hint to the kernel that we're soon going to
187 : * fsync the data directory and its contents.
188 : */
189 : #ifdef PG_FLUSH_DATA_WORKS
190 28 : walkdir(pg_data, pre_sync_fname, false);
191 28 : if (xlog_is_symlink)
192 2 : walkdir(pg_wal, pre_sync_fname, false);
193 28 : walkdir(pg_tblspc, pre_sync_fname, true);
194 : #endif
195 :
196 : /*
197 : * Now we do the fsync()s in the same order.
198 : *
199 : * The main call ignores symlinks, so in addition to specially
200 : * processing pg_wal if it's a symlink, pg_tblspc has to be
201 : * visited separately with process_symlinks = true. Note that
202 : * if there are any plain directories in pg_tblspc, they'll
203 : * get fsync'd twice. That's not an expected case so we don't
204 : * worry about optimizing it.
205 : */
206 28 : walkdir(pg_data, fsync_fname, false);
207 28 : if (xlog_is_symlink)
208 2 : walkdir(pg_wal, fsync_fname, false);
209 28 : walkdir(pg_tblspc, fsync_fname, true);
210 : }
211 28 : break;
212 : }
213 30 : }
214 :
215 : /*
216 : * Synchronize the given directory and all its contents.
217 : *
218 : * This is a convenient wrapper on top of walkdir() and do_syncfs().
219 : */
220 : void
221 8 : sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
222 : {
223 8 : switch (sync_method)
224 : {
225 0 : case DATA_DIR_SYNC_METHOD_SYNCFS:
226 : {
227 : #ifndef HAVE_SYNCFS
228 : pg_log_error("this build does not support sync method \"%s\"",
229 : "syncfs");
230 : exit(EXIT_FAILURE);
231 : #else
232 : /*
233 : * On Linux, we don't have to open every single file one by
234 : * one. We can use syncfs() to sync the whole filesystem.
235 : */
236 0 : do_syncfs(dir);
237 : #endif /* HAVE_SYNCFS */
238 : }
239 0 : break;
240 :
241 8 : case DATA_DIR_SYNC_METHOD_FSYNC:
242 : {
243 : /*
244 : * If possible, hint to the kernel that we're soon going to
245 : * fsync the data directory and its contents.
246 : */
247 : #ifdef PG_FLUSH_DATA_WORKS
248 8 : walkdir(dir, pre_sync_fname, false);
249 : #endif
250 :
251 8 : walkdir(dir, fsync_fname, false);
252 : }
253 8 : break;
254 : }
255 8 : }
256 :
257 : /*
258 : * walkdir: recursively walk a directory, applying the action to each
259 : * regular file and directory (including the named directory itself).
260 : *
261 : * If process_symlinks is true, the action and recursion are also applied
262 : * to regular files and directories that are pointed to by symlinks in the
263 : * given directory; otherwise symlinks are ignored. Symlinks are always
264 : * ignored in subdirectories, ie we intentionally don't pass down the
265 : * process_symlinks flag to recursive calls.
266 : *
267 : * Errors are reported but not considered fatal.
268 : *
269 : * See also walkdir in fd.c, which is a backend version of this logic.
270 : */
271 : static void
272 1608 : walkdir(const char *path,
273 : int (*action) (const char *fname, bool isdir),
274 : bool process_symlinks)
275 : {
276 : DIR *dir;
277 : struct dirent *de;
278 :
279 1608 : dir = opendir(path);
280 1608 : if (dir == NULL)
281 : {
282 0 : pg_log_error("could not open directory \"%s\": %m", path);
283 0 : return;
284 : }
285 :
286 63992 : while (errno = 0, (de = readdir(dir)) != NULL)
287 : {
288 : char subpath[MAXPGPATH * 2];
289 :
290 62384 : if (strcmp(de->d_name, ".") == 0 ||
291 60776 : strcmp(de->d_name, "..") == 0)
292 3216 : continue;
293 :
294 59168 : snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
295 :
296 59168 : switch (get_dirent_type(subpath, de, process_symlinks, PG_LOG_ERROR))
297 : {
298 57684 : case PGFILETYPE_REG:
299 57684 : (*action) (subpath, false);
300 57684 : break;
301 1476 : case PGFILETYPE_DIR:
302 1476 : walkdir(subpath, action, false);
303 1476 : break;
304 8 : default:
305 :
306 : /*
307 : * Errors are already reported directly by get_dirent_type(),
308 : * and any remaining symlinks and unknown file types are
309 : * ignored.
310 : */
311 8 : break;
312 : }
313 : }
314 :
315 1608 : if (errno)
316 0 : pg_log_error("could not read directory \"%s\": %m", path);
317 :
318 1608 : (void) closedir(dir);
319 :
320 : /*
321 : * It's important to fsync the destination directory itself as individual
322 : * file fsyncs don't guarantee that the directory entry for the file is
323 : * synced. Recent versions of ext4 have made the window much wider but
324 : * it's been an issue for ext3 and other filesystems in the past.
325 : */
326 1608 : (*action) (path, true);
327 : }
328 :
329 : /*
330 : * Hint to the OS that it should get ready to fsync() this file.
331 : *
332 : * Ignores errors trying to open unreadable files, and reports other errors
333 : * non-fatally.
334 : */
335 : #ifdef PG_FLUSH_DATA_WORKS
336 :
337 : static int
338 29646 : pre_sync_fname(const char *fname, bool isdir)
339 : {
340 : int fd;
341 :
342 29646 : fd = open(fname, O_RDONLY | PG_BINARY, 0);
343 :
344 29646 : if (fd < 0)
345 : {
346 0 : if (errno == EACCES || (isdir && errno == EISDIR))
347 0 : return 0;
348 0 : pg_log_error("could not open file \"%s\": %m", fname);
349 0 : return -1;
350 : }
351 :
352 : /*
353 : * We do what pg_flush_data() would do in the backend: prefer to use
354 : * sync_file_range, but fall back to posix_fadvise. We ignore errors
355 : * because this is only a hint.
356 : */
357 : #if defined(HAVE_SYNC_FILE_RANGE)
358 29646 : (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE);
359 : #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
360 : (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
361 : #else
362 : #error PG_FLUSH_DATA_WORKS should not have been defined
363 : #endif
364 :
365 29646 : (void) close(fd);
366 29646 : return 0;
367 : }
368 :
369 : #endif /* PG_FLUSH_DATA_WORKS */
370 :
371 : /*
372 : * fsync_fname -- Try to fsync a file or directory
373 : *
374 : * Ignores errors trying to open unreadable files, or trying to fsync
375 : * directories on systems where that isn't allowed/required. All other errors
376 : * are fatal.
377 : */
378 : int
379 29732 : fsync_fname(const char *fname, bool isdir)
380 : {
381 : int fd;
382 : int flags;
383 : int returncode;
384 :
385 : /*
386 : * Some OSs require directories to be opened read-only whereas other
387 : * systems don't allow us to fsync files opened read-only; so we need both
388 : * cases here. Using O_RDWR will cause us to fail to fsync files that are
389 : * not writable by our userid, but we assume that's OK.
390 : */
391 29732 : flags = PG_BINARY;
392 29732 : if (!isdir)
393 28892 : flags |= O_RDWR;
394 : else
395 840 : flags |= O_RDONLY;
396 :
397 : /*
398 : * Open the file, silently ignoring errors about unreadable files (or
399 : * unsupported operations, e.g. opening a directory under Windows), and
400 : * logging others.
401 : */
402 29732 : fd = open(fname, flags, 0);
403 29732 : if (fd < 0)
404 : {
405 0 : if (errno == EACCES || (isdir && errno == EISDIR))
406 0 : return 0;
407 0 : pg_log_error("could not open file \"%s\": %m", fname);
408 0 : return -1;
409 : }
410 :
411 29732 : returncode = fsync(fd);
412 :
413 : /*
414 : * Some OSes don't allow us to fsync directories at all, so we can ignore
415 : * those errors. Anything else needs to be reported.
416 : */
417 29732 : if (returncode != 0 && !(isdir && (errno == EBADF || errno == EINVAL)))
418 : {
419 0 : pg_log_error("could not fsync file \"%s\": %m", fname);
420 0 : (void) close(fd);
421 0 : exit(EXIT_FAILURE);
422 : }
423 :
424 29732 : (void) close(fd);
425 29732 : return 0;
426 : }
427 :
428 : /*
429 : * fsync_parent_path -- fsync the parent path of a file or directory
430 : *
431 : * This is aimed at making file operations persistent on disk in case of
432 : * an OS crash or power failure.
433 : */
434 : int
435 28 : fsync_parent_path(const char *fname)
436 : {
437 : char parentpath[MAXPGPATH];
438 :
439 28 : strlcpy(parentpath, fname, MAXPGPATH);
440 28 : get_parent_directory(parentpath);
441 :
442 : /*
443 : * get_parent_directory() returns an empty string if the input argument is
444 : * just a file name (see comments in path.c), so handle that as being the
445 : * current directory.
446 : */
447 28 : if (strlen(parentpath) == 0)
448 0 : strlcpy(parentpath, ".", MAXPGPATH);
449 :
450 28 : if (fsync_fname(parentpath, true) != 0)
451 0 : return -1;
452 :
453 28 : return 0;
454 : }
455 :
456 : /*
457 : * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability
458 : *
459 : * Wrapper around rename, similar to the backend version.
460 : */
461 : int
462 6 : durable_rename(const char *oldfile, const char *newfile)
463 : {
464 : int fd;
465 :
466 : /*
467 : * First fsync the old and target path (if it exists), to ensure that they
468 : * are properly persistent on disk. Syncing the target file is not
469 : * strictly necessary, but it makes it easier to reason about crashes;
470 : * because it's then guaranteed that either source or target file exists
471 : * after a crash.
472 : */
473 6 : if (fsync_fname(oldfile, false) != 0)
474 0 : return -1;
475 :
476 6 : fd = open(newfile, PG_BINARY | O_RDWR, 0);
477 6 : if (fd < 0)
478 : {
479 6 : if (errno != ENOENT)
480 : {
481 0 : pg_log_error("could not open file \"%s\": %m", newfile);
482 0 : return -1;
483 : }
484 : }
485 : else
486 : {
487 0 : if (fsync(fd) != 0)
488 : {
489 0 : pg_log_error("could not fsync file \"%s\": %m", newfile);
490 0 : close(fd);
491 0 : exit(EXIT_FAILURE);
492 : }
493 0 : close(fd);
494 : }
495 :
496 : /* Time to do the real deal... */
497 6 : if (rename(oldfile, newfile) != 0)
498 : {
499 0 : pg_log_error("could not rename file \"%s\" to \"%s\": %m",
500 : oldfile, newfile);
501 0 : return -1;
502 : }
503 :
504 : /*
505 : * To guarantee renaming the file is persistent, fsync the file with its
506 : * new name, and its containing directory.
507 : */
508 6 : if (fsync_fname(newfile, false) != 0)
509 0 : return -1;
510 :
511 6 : if (fsync_parent_path(newfile) != 0)
512 0 : return -1;
513 :
514 6 : return 0;
515 : }
516 :
517 : #endif /* FRONTEND */
518 :
519 : /*
520 : * Return the type of a directory entry.
521 : *
522 : * In frontend code, elevel should be a level from logging.h; in backend code
523 : * it should be a level from elog.h.
524 : */
525 : PGFileType
526 445408 : get_dirent_type(const char *path,
527 : const struct dirent *de,
528 : bool look_through_symlinks,
529 : int elevel)
530 : {
531 : PGFileType result;
532 :
533 : /*
534 : * Some systems tell us the type directly in the dirent struct, but that's
535 : * a BSD and Linux extension not required by POSIX. Even when the
536 : * interface is present, sometimes the type is unknown, depending on the
537 : * filesystem.
538 : */
539 : #if defined(DT_REG) && defined(DT_DIR) && defined(DT_LNK)
540 445408 : if (de->d_type == DT_REG)
541 437488 : result = PGFILETYPE_REG;
542 7920 : else if (de->d_type == DT_DIR)
543 7854 : result = PGFILETYPE_DIR;
544 66 : else if (de->d_type == DT_LNK && !look_through_symlinks)
545 62 : result = PGFILETYPE_LNK;
546 : else
547 4 : result = PGFILETYPE_UNKNOWN;
548 : #else
549 : result = PGFILETYPE_UNKNOWN;
550 : #endif
551 :
552 445408 : if (result == PGFILETYPE_UNKNOWN)
553 : {
554 : struct stat fst;
555 : int sret;
556 :
557 :
558 4 : if (look_through_symlinks)
559 4 : sret = stat(path, &fst);
560 : else
561 0 : sret = lstat(path, &fst);
562 :
563 4 : if (sret < 0)
564 : {
565 0 : result = PGFILETYPE_ERROR;
566 : #ifdef FRONTEND
567 0 : pg_log_generic(elevel, PG_LOG_PRIMARY, "could not stat file \"%s\": %m", path);
568 : #else
569 0 : ereport(elevel,
570 : (errcode_for_file_access(),
571 : errmsg("could not stat file \"%s\": %m", path)));
572 : #endif
573 : }
574 4 : else if (S_ISREG(fst.st_mode))
575 0 : result = PGFILETYPE_REG;
576 4 : else if (S_ISDIR(fst.st_mode))
577 4 : result = PGFILETYPE_DIR;
578 0 : else if (S_ISLNK(fst.st_mode))
579 0 : result = PGFILETYPE_LNK;
580 : }
581 :
582 445408 : return result;
583 : }
584 :
585 : /*
586 : * Compute what remains to be done after a possibly partial vectored read or
587 : * write. The part of 'source' beginning after 'transferred' bytes is copied
588 : * to 'destination', and its length is returned. 'source' and 'destination'
589 : * may point to the same array, for in-place adjustment. A return value of
590 : * zero indicates completion (for callers without a cheaper way to know that).
591 : */
592 : int
593 558608 : compute_remaining_iovec(struct iovec *destination,
594 : const struct iovec *source,
595 : int iovcnt,
596 : size_t transferred)
597 : {
598 : Assert(iovcnt > 0);
599 :
600 : /* Skip wholly transferred iovecs. */
601 5739492 : while (source->iov_len <= transferred)
602 : {
603 5739492 : transferred -= source->iov_len;
604 5739492 : source++;
605 5739492 : iovcnt--;
606 :
607 : /* All iovecs transferred? */
608 5739492 : if (iovcnt == 0)
609 : {
610 : /*
611 : * We don't expect the kernel to transfer more than we asked it
612 : * to, or something is out of sync.
613 : */
614 : Assert(transferred == 0);
615 558608 : return 0;
616 : }
617 : }
618 :
619 : /* Copy the remaining iovecs to the front of the array. */
620 0 : if (source != destination)
621 0 : memmove(destination, source, sizeof(*source) * iovcnt);
622 :
623 : /* Adjust leading iovec, which may have been partially transferred. */
624 : Assert(destination->iov_len > transferred);
625 0 : destination->iov_base = (char *) destination->iov_base + transferred;
626 0 : destination->iov_len -= transferred;
627 :
628 0 : return iovcnt;
629 : }
630 :
631 : /*
632 : * pg_pwritev_with_retry
633 : *
634 : * Convenience wrapper for pg_pwritev() that retries on partial write. If an
635 : * error is returned, it is unspecified how much has been written.
636 : */
637 : ssize_t
638 558608 : pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
639 : {
640 : struct iovec iov_copy[PG_IOV_MAX];
641 558608 : ssize_t sum = 0;
642 : ssize_t part;
643 :
644 : /* We'd better have space to make a copy, in case we need to retry. */
645 558608 : if (iovcnt > PG_IOV_MAX)
646 : {
647 0 : errno = EINVAL;
648 0 : return -1;
649 : }
650 :
651 : do
652 : {
653 : /* Write as much as we can. */
654 558608 : part = pg_pwritev(fd, iov, iovcnt, offset);
655 558608 : if (part < 0)
656 0 : return -1;
657 :
658 : #ifdef SIMULATE_SHORT_WRITE
659 : part = Min(part, 4096);
660 : #endif
661 :
662 : /* Count our progress. */
663 558608 : sum += part;
664 558608 : offset += part;
665 :
666 : /*
667 : * See what is left. On the first loop we used the caller's array,
668 : * but in later loops we'll use our local copy that we are allowed to
669 : * mutate.
670 : */
671 558608 : iovcnt = compute_remaining_iovec(iov_copy, iov, iovcnt, part);
672 558608 : iov = iov_copy;
673 558608 : } while (iovcnt > 0);
674 :
675 558608 : return sum;
676 : }
677 :
678 : /*
679 : * pg_pwrite_zeros
680 : *
681 : * Writes zeros to file worth "size" bytes at "offset" (from the start of the
682 : * file), using vectored I/O.
683 : *
684 : * Returns the total amount of data written. On failure, a negative value
685 : * is returned with errno set.
686 : */
687 : ssize_t
688 394922 : pg_pwrite_zeros(int fd, size_t size, off_t offset)
689 : {
690 : static const PGIOAlignedBlock zbuffer = {{0}}; /* worth BLCKSZ */
691 394922 : void *zerobuf_addr = unconstify(PGIOAlignedBlock *, &zbuffer)->data;
692 : struct iovec iov[PG_IOV_MAX];
693 394922 : size_t remaining_size = size;
694 394922 : ssize_t total_written = 0;
695 :
696 : /* Loop, writing as many blocks as we can for each system call. */
697 953530 : while (remaining_size > 0)
698 : {
699 558608 : int iovcnt = 0;
700 : ssize_t written;
701 :
702 6298100 : for (; iovcnt < PG_IOV_MAX && remaining_size > 0; iovcnt++)
703 : {
704 : size_t this_iov_size;
705 :
706 5739492 : iov[iovcnt].iov_base = zerobuf_addr;
707 :
708 5739492 : if (remaining_size < BLCKSZ)
709 0 : this_iov_size = remaining_size;
710 : else
711 5739492 : this_iov_size = BLCKSZ;
712 :
713 5739492 : iov[iovcnt].iov_len = this_iov_size;
714 5739492 : remaining_size -= this_iov_size;
715 : }
716 :
717 558608 : written = pg_pwritev_with_retry(fd, iov, iovcnt, offset);
718 :
719 558608 : if (written < 0)
720 0 : return written;
721 :
722 558608 : offset += written;
723 558608 : total_written += written;
724 : }
725 :
726 : Assert(total_written == size);
727 :
728 394922 : return total_written;
729 : }
|