Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_test_fsync --- tests all supported fsync() methods
4 : *
5 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
6 : *
7 : * src/bin/pg_test_fsync/pg_test_fsync.c
8 : *
9 : *-------------------------------------------------------------------------
10 : */
11 :
12 : #include "postgres_fe.h"
13 :
14 : #include <limits.h>
15 : #include <sys/stat.h>
16 : #include <sys/time.h>
17 : #include <fcntl.h>
18 : #include <time.h>
19 : #include <unistd.h>
20 : #include <signal.h>
21 :
22 : #include "common/logging.h"
23 : #include "common/pg_prng.h"
24 : #include "getopt_long.h"
25 :
26 : /*
27 : * put the temp files in the local directory
28 : * unless the user specifies otherwise
29 : */
30 : #define FSYNC_FILENAME "./pg_test_fsync.out"
31 :
32 : #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
33 :
34 : #define LABEL_FORMAT " %-30s"
35 : #define NA_FORMAT "%21s\n"
36 : /* translator: maintain alignment with NA_FORMAT */
37 : #define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
38 : #define USECS_SEC 1000000
39 :
40 : /* These are macros to avoid timing the function call overhead. */
41 : #ifndef WIN32
42 : #define START_TIMER \
43 : do { \
44 : alarm_triggered = false; \
45 : alarm(secs_per_test); \
46 : gettimeofday(&start_t, NULL); \
47 : } while (0)
48 : #else
49 : /* WIN32 doesn't support alarm, so we create a thread and sleep there */
50 : #define START_TIMER \
51 : do { \
52 : alarm_triggered = false; \
53 : if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
54 : INVALID_HANDLE_VALUE) \
55 : pg_fatal("could not create thread for alarm"); \
56 : gettimeofday(&start_t, NULL); \
57 : } while (0)
58 : #endif
59 :
60 : #define STOP_TIMER \
61 : do { \
62 : gettimeofday(&stop_t, NULL); \
63 : print_elapse(start_t, stop_t, ops); \
64 : } while (0)
65 :
66 :
67 : static const char *progname;
68 :
69 : static unsigned int secs_per_test = 5;
70 : static int needs_unlink = 0;
71 : static char full_buf[DEFAULT_XLOG_SEG_SIZE],
72 : *buf,
73 : *filename = FSYNC_FILENAME;
74 : static struct timeval start_t,
75 : stop_t;
76 : static sig_atomic_t alarm_triggered = false;
77 :
78 :
79 : static void handle_args(int argc, char *argv[]);
80 : static void prepare_buf(void);
81 : static void test_open(void);
82 : static void test_non_sync(void);
83 : static void test_sync(int writes_per_op);
84 : static void test_open_syncs(void);
85 : static void test_open_sync(const char *msg, int writes_size);
86 : static void test_file_descriptor_sync(void);
87 :
88 : #ifndef WIN32
89 : static void process_alarm(SIGNAL_ARGS);
90 : #else
91 : static DWORD WINAPI process_alarm(LPVOID param);
92 : #endif
93 : static void signal_cleanup(SIGNAL_ARGS);
94 :
95 : #ifdef HAVE_FSYNC_WRITETHROUGH
96 : static int pg_fsync_writethrough(int fd);
97 : #endif
98 : static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
99 :
100 : #define die(msg) pg_fatal("%s: %m", _(msg))
101 :
102 :
103 : int
104 10 : main(int argc, char *argv[])
105 : {
106 10 : pg_logging_init(argv[0]);
107 10 : set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
108 10 : progname = get_progname(argv[0]);
109 :
110 10 : handle_args(argc, argv);
111 :
112 : /* Prevent leaving behind the test file */
113 0 : pqsignal(SIGINT, signal_cleanup);
114 0 : pqsignal(SIGTERM, signal_cleanup);
115 : #ifndef WIN32
116 0 : pqsignal(SIGALRM, process_alarm);
117 : #endif
118 : #ifdef SIGHUP
119 : /* Not defined on win32 */
120 0 : pqsignal(SIGHUP, signal_cleanup);
121 : #endif
122 :
123 0 : pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL));
124 :
125 0 : prepare_buf();
126 :
127 0 : test_open();
128 :
129 : /* Test using 1 XLOG_BLCKSZ write */
130 0 : test_sync(1);
131 :
132 : /* Test using 2 XLOG_BLCKSZ writes */
133 0 : test_sync(2);
134 :
135 0 : test_open_syncs();
136 :
137 0 : test_file_descriptor_sync();
138 :
139 0 : test_non_sync();
140 :
141 0 : unlink(filename);
142 :
143 0 : return 0;
144 : }
145 :
146 : static void
147 10 : handle_args(int argc, char *argv[])
148 : {
149 : static struct option long_options[] = {
150 : {"filename", required_argument, NULL, 'f'},
151 : {"secs-per-test", required_argument, NULL, 's'},
152 : {NULL, 0, NULL, 0}
153 : };
154 :
155 : int option; /* Command line option */
156 10 : int optindex = 0; /* used by getopt_long */
157 : unsigned long optval; /* used for option parsing */
158 : char *endptr;
159 :
160 10 : if (argc > 1)
161 : {
162 10 : if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
163 : {
164 2 : printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
165 2 : exit(0);
166 : }
167 8 : if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
168 : {
169 2 : puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
170 2 : exit(0);
171 : }
172 : }
173 :
174 6 : while ((option = getopt_long(argc, argv, "f:s:",
175 : long_options, &optindex)) != -1)
176 : {
177 6 : switch (option)
178 : {
179 0 : case 'f':
180 0 : filename = pg_strdup(optarg);
181 0 : break;
182 :
183 4 : case 's':
184 4 : errno = 0;
185 4 : optval = strtoul(optarg, &endptr, 10);
186 :
187 4 : if (endptr == optarg || *endptr != '\0' ||
188 2 : errno != 0 || optval != (unsigned int) optval)
189 : {
190 2 : pg_log_error("invalid argument for option %s", "--secs-per-test");
191 2 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
192 2 : exit(1);
193 : }
194 :
195 2 : secs_per_test = (unsigned int) optval;
196 2 : if (secs_per_test == 0)
197 2 : pg_fatal("%s must be in range %u..%u",
198 : "--secs-per-test", 1, UINT_MAX);
199 0 : break;
200 :
201 2 : default:
202 : /* getopt_long already emitted a complaint */
203 2 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
204 2 : exit(1);
205 : }
206 : }
207 :
208 0 : if (argc > optind)
209 : {
210 0 : pg_log_error("too many command-line arguments (first is \"%s\")",
211 : argv[optind]);
212 0 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
213 0 : exit(1);
214 : }
215 :
216 0 : printf(ngettext("%u second per test\n",
217 : "%u seconds per test\n",
218 : secs_per_test),
219 : secs_per_test);
220 : #if defined(O_DIRECT)
221 0 : printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
222 : #elif defined(F_NOCACHE)
223 : printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n"));
224 : #else
225 : printf(_("Direct I/O is not supported on this platform.\n"));
226 : #endif
227 0 : }
228 :
229 : static void
230 0 : prepare_buf(void)
231 : {
232 : int ops;
233 :
234 : /* write random data into buffer */
235 0 : for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
236 0 : full_buf[ops] = (char) pg_prng_int32(&pg_global_prng_state);
237 :
238 0 : buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
239 0 : }
240 :
241 : static void
242 0 : test_open(void)
243 : {
244 : int tmpfile;
245 :
246 : /*
247 : * test if we can open the target file
248 : */
249 0 : if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1)
250 0 : die("could not open output file");
251 0 : needs_unlink = 1;
252 0 : if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
253 : DEFAULT_XLOG_SEG_SIZE)
254 0 : die("write failed");
255 :
256 : /* fsync now so that dirty buffers don't skew later tests */
257 0 : if (fsync(tmpfile) != 0)
258 0 : die("fsync failed");
259 :
260 0 : close(tmpfile);
261 0 : }
262 :
263 : static int
264 0 : open_direct(const char *path, int flags, mode_t mode)
265 : {
266 : int fd;
267 :
268 : #ifdef O_DIRECT
269 0 : flags |= O_DIRECT;
270 : #endif
271 :
272 0 : fd = open(path, flags, mode);
273 :
274 : #if !defined(O_DIRECT) && defined(F_NOCACHE)
275 : if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0)
276 : {
277 : int save_errno = errno;
278 :
279 : close(fd);
280 : errno = save_errno;
281 : return -1;
282 : }
283 : #endif
284 :
285 0 : return fd;
286 : }
287 :
288 : static void
289 0 : test_sync(int writes_per_op)
290 : {
291 : int tmpfile,
292 : ops,
293 : writes;
294 0 : bool fs_warning = false;
295 :
296 0 : if (writes_per_op == 1)
297 0 : printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
298 : else
299 0 : printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
300 0 : printf(_("(in \"wal_sync_method\" preference order, except fdatasync is Linux's default)\n"));
301 :
302 : /*
303 : * Test open_datasync if available
304 : */
305 0 : printf(LABEL_FORMAT, "open_datasync");
306 0 : fflush(stdout);
307 :
308 : #ifdef O_DSYNC
309 0 : if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
310 : {
311 0 : printf(NA_FORMAT, _("n/a*"));
312 0 : fs_warning = true;
313 : }
314 : else
315 : {
316 0 : START_TIMER;
317 0 : for (ops = 0; alarm_triggered == false; ops++)
318 : {
319 0 : for (writes = 0; writes < writes_per_op; writes++)
320 0 : if (pg_pwrite(tmpfile,
321 : buf,
322 : XLOG_BLCKSZ,
323 0 : writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
324 0 : die("write failed");
325 : }
326 0 : STOP_TIMER;
327 0 : close(tmpfile);
328 : }
329 : #else
330 : printf(NA_FORMAT, _("n/a"));
331 : #endif
332 :
333 : /*
334 : * Test fdatasync if available
335 : */
336 0 : printf(LABEL_FORMAT, "fdatasync");
337 0 : fflush(stdout);
338 :
339 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
340 0 : die("could not open output file");
341 0 : START_TIMER;
342 0 : for (ops = 0; alarm_triggered == false; ops++)
343 : {
344 0 : for (writes = 0; writes < writes_per_op; writes++)
345 0 : if (pg_pwrite(tmpfile,
346 : buf,
347 : XLOG_BLCKSZ,
348 0 : writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
349 0 : die("write failed");
350 0 : fdatasync(tmpfile);
351 : }
352 0 : STOP_TIMER;
353 0 : close(tmpfile);
354 :
355 : /*
356 : * Test fsync
357 : */
358 0 : printf(LABEL_FORMAT, "fsync");
359 0 : fflush(stdout);
360 :
361 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
362 0 : die("could not open output file");
363 0 : START_TIMER;
364 0 : for (ops = 0; alarm_triggered == false; ops++)
365 : {
366 0 : for (writes = 0; writes < writes_per_op; writes++)
367 0 : if (pg_pwrite(tmpfile,
368 : buf,
369 : XLOG_BLCKSZ,
370 0 : writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
371 0 : die("write failed");
372 0 : if (fsync(tmpfile) != 0)
373 0 : die("fsync failed");
374 : }
375 0 : STOP_TIMER;
376 0 : close(tmpfile);
377 :
378 : /*
379 : * If fsync_writethrough is available, test as well
380 : */
381 0 : printf(LABEL_FORMAT, "fsync_writethrough");
382 0 : fflush(stdout);
383 :
384 : #ifdef HAVE_FSYNC_WRITETHROUGH
385 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
386 : die("could not open output file");
387 : START_TIMER;
388 : for (ops = 0; alarm_triggered == false; ops++)
389 : {
390 : for (writes = 0; writes < writes_per_op; writes++)
391 : if (pg_pwrite(tmpfile,
392 : buf,
393 : XLOG_BLCKSZ,
394 : writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
395 : die("write failed");
396 : if (pg_fsync_writethrough(tmpfile) != 0)
397 : die("fsync failed");
398 : }
399 : STOP_TIMER;
400 : close(tmpfile);
401 : #else
402 0 : printf(NA_FORMAT, _("n/a"));
403 : #endif
404 :
405 : /*
406 : * Test open_sync if available
407 : */
408 0 : printf(LABEL_FORMAT, "open_sync");
409 0 : fflush(stdout);
410 :
411 : #ifdef O_SYNC
412 0 : if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
413 : {
414 0 : printf(NA_FORMAT, _("n/a*"));
415 0 : fs_warning = true;
416 : }
417 : else
418 : {
419 0 : START_TIMER;
420 0 : for (ops = 0; alarm_triggered == false; ops++)
421 : {
422 0 : for (writes = 0; writes < writes_per_op; writes++)
423 0 : if (pg_pwrite(tmpfile,
424 : buf,
425 : XLOG_BLCKSZ,
426 0 : writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
427 :
428 : /*
429 : * This can generate write failures if the filesystem has
430 : * a large block size, e.g. 4k, and there is no support
431 : * for O_DIRECT writes smaller than the file system block
432 : * size, e.g. XFS.
433 : */
434 0 : die("write failed");
435 : }
436 0 : STOP_TIMER;
437 0 : close(tmpfile);
438 : }
439 : #else
440 : printf(NA_FORMAT, _("n/a"));
441 : #endif
442 :
443 0 : if (fs_warning)
444 : {
445 0 : printf(_("* This file system and its mount options do not support direct\n"
446 : " I/O, e.g. ext4 in journaled mode.\n"));
447 : }
448 0 : }
449 :
450 : static void
451 0 : test_open_syncs(void)
452 : {
453 0 : printf(_("\nCompare open_sync with different write sizes:\n"));
454 0 : printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
455 : "open_sync sizes.)\n"));
456 :
457 0 : test_open_sync(_(" 1 * 16kB open_sync write"), 16);
458 0 : test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
459 0 : test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
460 0 : test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
461 0 : test_open_sync(_("16 * 1kB open_sync writes"), 1);
462 0 : }
463 :
464 : /*
465 : * Test open_sync with different size files
466 : */
467 : static void
468 0 : test_open_sync(const char *msg, int writes_size)
469 : {
470 : #ifdef O_SYNC
471 : int tmpfile,
472 : ops,
473 : writes;
474 : #endif
475 :
476 0 : printf(LABEL_FORMAT, msg);
477 0 : fflush(stdout);
478 :
479 : #ifdef O_SYNC
480 0 : if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
481 0 : printf(NA_FORMAT, _("n/a*"));
482 : else
483 : {
484 0 : START_TIMER;
485 0 : for (ops = 0; alarm_triggered == false; ops++)
486 : {
487 0 : for (writes = 0; writes < 16 / writes_size; writes++)
488 0 : if (pg_pwrite(tmpfile,
489 : buf,
490 0 : writes_size * 1024,
491 0 : writes * writes_size * 1024) !=
492 0 : writes_size * 1024)
493 0 : die("write failed");
494 : }
495 0 : STOP_TIMER;
496 0 : close(tmpfile);
497 : }
498 : #else
499 : printf(NA_FORMAT, _("n/a"));
500 : #endif
501 0 : }
502 :
503 : static void
504 0 : test_file_descriptor_sync(void)
505 : {
506 : int tmpfile,
507 : ops;
508 :
509 : /*
510 : * Test whether fsync can sync data written on a different descriptor for
511 : * the same file. This checks the efficiency of multi-process fsyncs
512 : * against the same file. Possibly this should be done with writethrough
513 : * on platforms which support it.
514 : */
515 0 : printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
516 0 : printf(_("(If the times are similar, fsync() can sync data written on a different\n"
517 : "descriptor.)\n"));
518 :
519 : /*
520 : * first write, fsync and close, which is the normal behavior without
521 : * multiple descriptors
522 : */
523 0 : printf(LABEL_FORMAT, "write, fsync, close");
524 0 : fflush(stdout);
525 :
526 0 : START_TIMER;
527 0 : for (ops = 0; alarm_triggered == false; ops++)
528 : {
529 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
530 0 : die("could not open output file");
531 0 : if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
532 0 : die("write failed");
533 0 : if (fsync(tmpfile) != 0)
534 0 : die("fsync failed");
535 0 : close(tmpfile);
536 :
537 : /*
538 : * open and close the file again to be consistent with the following
539 : * test
540 : */
541 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
542 0 : die("could not open output file");
543 0 : close(tmpfile);
544 : }
545 0 : STOP_TIMER;
546 :
547 : /*
548 : * Now open, write, close, open again and fsync This simulates processes
549 : * fsyncing each other's writes.
550 : */
551 0 : printf(LABEL_FORMAT, "write, close, fsync");
552 0 : fflush(stdout);
553 :
554 0 : START_TIMER;
555 0 : for (ops = 0; alarm_triggered == false; ops++)
556 : {
557 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
558 0 : die("could not open output file");
559 0 : if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
560 0 : die("write failed");
561 0 : close(tmpfile);
562 : /* reopen file */
563 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
564 0 : die("could not open output file");
565 0 : if (fsync(tmpfile) != 0)
566 0 : die("fsync failed");
567 0 : close(tmpfile);
568 : }
569 0 : STOP_TIMER;
570 0 : }
571 :
572 : static void
573 0 : test_non_sync(void)
574 : {
575 : int tmpfile,
576 : ops;
577 :
578 : /*
579 : * Test a simple write without fsync
580 : */
581 0 : printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
582 0 : printf(LABEL_FORMAT, "write");
583 0 : fflush(stdout);
584 :
585 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
586 0 : die("could not open output file");
587 0 : START_TIMER;
588 0 : for (ops = 0; alarm_triggered == false; ops++)
589 : {
590 0 : if (pg_pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ)
591 0 : die("write failed");
592 : }
593 0 : STOP_TIMER;
594 0 : close(tmpfile);
595 0 : }
596 :
597 : static void
598 0 : signal_cleanup(SIGNAL_ARGS)
599 : {
600 : int rc;
601 :
602 : /* Delete the file if it exists. Ignore errors */
603 0 : if (needs_unlink)
604 0 : unlink(filename);
605 : /* Finish incomplete line on stdout */
606 0 : rc = write(STDOUT_FILENO, "\n", 1);
607 : (void) rc; /* silence compiler warnings */
608 0 : _exit(1);
609 : }
610 :
611 : #ifdef HAVE_FSYNC_WRITETHROUGH
612 :
613 : static int
614 : pg_fsync_writethrough(int fd)
615 : {
616 : #if defined(F_FULLFSYNC)
617 : return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
618 : #else
619 : errno = ENOSYS;
620 : return -1;
621 : #endif
622 : }
623 : #endif
624 :
625 : /*
626 : * print out the writes per second for tests
627 : */
628 : static void
629 0 : print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
630 : {
631 0 : double total_time = (stop_t.tv_sec - start_t.tv_sec) +
632 0 : (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
633 0 : double per_second = ops / total_time;
634 0 : double avg_op_time_us = (total_time / ops) * USECS_SEC;
635 :
636 0 : printf(_(OPS_FORMAT), per_second, avg_op_time_us);
637 0 : }
638 :
639 : #ifndef WIN32
640 : static void
641 0 : process_alarm(SIGNAL_ARGS)
642 : {
643 0 : alarm_triggered = true;
644 0 : }
645 : #else
646 : static DWORD WINAPI
647 : process_alarm(LPVOID param)
648 : {
649 : /* WIN32 doesn't support alarm, so we create a thread and sleep here */
650 : Sleep(secs_per_test * 1000);
651 : alarm_triggered = true;
652 : ExitThread(0);
653 : }
654 : #endif
|