Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_test_fsync --- tests all supported fsync() methods
4 : *
5 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
6 : *
7 : * src/bin/pg_test_fsync/pg_test_fsync.c
8 : *
9 : *-------------------------------------------------------------------------
10 : */
11 :
12 : #include "postgres_fe.h"
13 :
14 : #include <limits.h>
15 : #include <sys/stat.h>
16 : #include <sys/time.h>
17 : #include <fcntl.h>
18 : #include <time.h>
19 : #include <unistd.h>
20 : #include <signal.h>
21 :
22 : #include "access/xlogdefs.h"
23 : #include "common/logging.h"
24 : #include "common/pg_prng.h"
25 : #include "getopt_long.h"
26 :
27 : /*
28 : * put the temp files in the local directory
29 : * unless the user specifies otherwise
30 : */
31 : #define FSYNC_FILENAME "./pg_test_fsync.out"
32 :
33 : #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
34 :
35 : #define LABEL_FORMAT " %-30s"
36 : #define NA_FORMAT "%21s\n"
37 : /* translator: maintain alignment with NA_FORMAT */
38 : #define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
39 : #define USECS_SEC 1000000
40 :
41 : /* These are macros to avoid timing the function call overhead. */
42 : #ifndef WIN32
43 : #define START_TIMER \
44 : do { \
45 : alarm_triggered = false; \
46 : alarm(secs_per_test); \
47 : gettimeofday(&start_t, NULL); \
48 : } while (0)
49 : #else
50 : /* WIN32 doesn't support alarm, so we create a thread and sleep there */
51 : #define START_TIMER \
52 : do { \
53 : alarm_triggered = false; \
54 : if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
55 : INVALID_HANDLE_VALUE) \
56 : pg_fatal("could not create thread for alarm"); \
57 : gettimeofday(&start_t, NULL); \
58 : } while (0)
59 : #endif
60 :
61 : #define STOP_TIMER \
62 : do { \
63 : gettimeofday(&stop_t, NULL); \
64 : print_elapse(start_t, stop_t, ops); \
65 : } while (0)
66 :
67 :
68 : static const char *progname;
69 :
70 : static unsigned int secs_per_test = 5;
71 : static int needs_unlink = 0;
72 : static char full_buf[DEFAULT_XLOG_SEG_SIZE],
73 : *buf,
74 : *filename = FSYNC_FILENAME;
75 : static struct timeval start_t,
76 : stop_t;
77 : static sig_atomic_t alarm_triggered = false;
78 :
79 :
80 : static void handle_args(int argc, char *argv[]);
81 : static void prepare_buf(void);
82 : static void test_open(void);
83 : static void test_non_sync(void);
84 : static void test_sync(int writes_per_op);
85 : static void test_open_syncs(void);
86 : static void test_open_sync(const char *msg, int writes_size);
87 : static void test_file_descriptor_sync(void);
88 :
89 : #ifndef WIN32
90 : static void process_alarm(SIGNAL_ARGS);
91 : #else
92 : static DWORD WINAPI process_alarm(LPVOID param);
93 : #endif
94 : static void signal_cleanup(SIGNAL_ARGS);
95 :
96 : #ifdef HAVE_FSYNC_WRITETHROUGH
97 : static int pg_fsync_writethrough(int fd);
98 : #endif
99 : static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
100 :
101 : #define die(msg) pg_fatal("%s: %m", _(msg))
102 :
103 :
104 : int
105 10 : main(int argc, char *argv[])
106 : {
107 10 : pg_logging_init(argv[0]);
108 10 : set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
109 10 : progname = get_progname(argv[0]);
110 :
111 10 : handle_args(argc, argv);
112 :
113 : /* Prevent leaving behind the test file */
114 0 : pqsignal(SIGINT, signal_cleanup);
115 0 : pqsignal(SIGTERM, signal_cleanup);
116 : #ifndef WIN32
117 0 : pqsignal(SIGALRM, process_alarm);
118 : #endif
119 : #ifdef SIGHUP
120 : /* Not defined on win32 */
121 0 : pqsignal(SIGHUP, signal_cleanup);
122 : #endif
123 :
124 0 : pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL));
125 :
126 0 : prepare_buf();
127 :
128 0 : test_open();
129 :
130 : /* Test using 1 XLOG_BLCKSZ write */
131 0 : test_sync(1);
132 :
133 : /* Test using 2 XLOG_BLCKSZ writes */
134 0 : test_sync(2);
135 :
136 0 : test_open_syncs();
137 :
138 0 : test_file_descriptor_sync();
139 :
140 0 : test_non_sync();
141 :
142 0 : unlink(filename);
143 :
144 0 : return 0;
145 : }
146 :
147 : static void
148 10 : handle_args(int argc, char *argv[])
149 : {
150 : static struct option long_options[] = {
151 : {"filename", required_argument, NULL, 'f'},
152 : {"secs-per-test", required_argument, NULL, 's'},
153 : {NULL, 0, NULL, 0}
154 : };
155 :
156 : int option; /* Command line option */
157 10 : int optindex = 0; /* used by getopt_long */
158 : unsigned long optval; /* used for option parsing */
159 : char *endptr;
160 :
161 10 : if (argc > 1)
162 : {
163 10 : if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
164 : {
165 2 : printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
166 2 : exit(0);
167 : }
168 8 : if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
169 : {
170 2 : puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
171 2 : exit(0);
172 : }
173 : }
174 :
175 6 : while ((option = getopt_long(argc, argv, "f:s:",
176 : long_options, &optindex)) != -1)
177 : {
178 6 : switch (option)
179 : {
180 0 : case 'f':
181 0 : filename = pg_strdup(optarg);
182 0 : break;
183 :
184 4 : case 's':
185 4 : errno = 0;
186 4 : optval = strtoul(optarg, &endptr, 10);
187 :
188 4 : if (endptr == optarg || *endptr != '\0' ||
189 2 : errno != 0 || optval != (unsigned int) optval)
190 : {
191 2 : pg_log_error("invalid argument for option %s", "--secs-per-test");
192 2 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
193 2 : exit(1);
194 : }
195 :
196 2 : secs_per_test = (unsigned int) optval;
197 2 : if (secs_per_test == 0)
198 2 : pg_fatal("%s must be in range %u..%u",
199 : "--secs-per-test", 1, UINT_MAX);
200 0 : break;
201 :
202 2 : default:
203 : /* getopt_long already emitted a complaint */
204 2 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
205 2 : exit(1);
206 : }
207 : }
208 :
209 0 : if (argc > optind)
210 : {
211 0 : pg_log_error("too many command-line arguments (first is \"%s\")",
212 : argv[optind]);
213 0 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
214 0 : exit(1);
215 : }
216 :
217 0 : printf(ngettext("%u second per test\n",
218 : "%u seconds per test\n",
219 : secs_per_test),
220 : secs_per_test);
221 : #if defined(O_DIRECT)
222 0 : printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
223 : #elif defined(F_NOCACHE)
224 : printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n"));
225 : #else
226 : printf(_("Direct I/O is not supported on this platform.\n"));
227 : #endif
228 0 : }
229 :
230 : static void
231 0 : prepare_buf(void)
232 : {
233 : int ops;
234 :
235 : /* write random data into buffer */
236 0 : for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
237 0 : full_buf[ops] = (char) pg_prng_int32(&pg_global_prng_state);
238 :
239 0 : buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
240 0 : }
241 :
242 : static void
243 0 : test_open(void)
244 : {
245 : int tmpfile;
246 :
247 : /*
248 : * test if we can open the target file
249 : */
250 0 : if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1)
251 0 : die("could not open output file");
252 0 : needs_unlink = 1;
253 0 : if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
254 : DEFAULT_XLOG_SEG_SIZE)
255 0 : die("write failed");
256 :
257 : /* fsync now so that dirty buffers don't skew later tests */
258 0 : if (fsync(tmpfile) != 0)
259 0 : die("fsync failed");
260 :
261 0 : close(tmpfile);
262 0 : }
263 :
264 : static int
265 0 : open_direct(const char *path, int flags, mode_t mode)
266 : {
267 : int fd;
268 :
269 : #ifdef O_DIRECT
270 0 : flags |= O_DIRECT;
271 : #endif
272 :
273 0 : fd = open(path, flags, mode);
274 :
275 : #if !defined(O_DIRECT) && defined(F_NOCACHE)
276 : if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0)
277 : {
278 : int save_errno = errno;
279 :
280 : close(fd);
281 : errno = save_errno;
282 : return -1;
283 : }
284 : #endif
285 :
286 0 : return fd;
287 : }
288 :
289 : static void
290 0 : test_sync(int writes_per_op)
291 : {
292 : int tmpfile,
293 : ops,
294 : writes;
295 0 : bool fs_warning = false;
296 :
297 0 : if (writes_per_op == 1)
298 0 : printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
299 : else
300 0 : printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
301 0 : printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n"));
302 :
303 : /*
304 : * Test open_datasync if available
305 : */
306 0 : printf(LABEL_FORMAT, "open_datasync");
307 0 : fflush(stdout);
308 :
309 : #ifdef O_DSYNC
310 0 : if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
311 : {
312 0 : printf(NA_FORMAT, _("n/a*"));
313 0 : fs_warning = true;
314 : }
315 : else
316 : {
317 0 : START_TIMER;
318 0 : for (ops = 0; alarm_triggered == false; ops++)
319 : {
320 0 : for (writes = 0; writes < writes_per_op; writes++)
321 0 : if (pg_pwrite(tmpfile,
322 : buf,
323 : XLOG_BLCKSZ,
324 0 : writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
325 0 : die("write failed");
326 : }
327 0 : STOP_TIMER;
328 0 : close(tmpfile);
329 : }
330 : #else
331 : printf(NA_FORMAT, _("n/a"));
332 : #endif
333 :
334 : /*
335 : * Test fdatasync if available
336 : */
337 0 : printf(LABEL_FORMAT, "fdatasync");
338 0 : fflush(stdout);
339 :
340 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
341 0 : die("could not open output file");
342 0 : START_TIMER;
343 0 : for (ops = 0; alarm_triggered == false; ops++)
344 : {
345 0 : for (writes = 0; writes < writes_per_op; writes++)
346 0 : if (pg_pwrite(tmpfile,
347 : buf,
348 : XLOG_BLCKSZ,
349 0 : writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
350 0 : die("write failed");
351 0 : fdatasync(tmpfile);
352 : }
353 0 : STOP_TIMER;
354 0 : close(tmpfile);
355 :
356 : /*
357 : * Test fsync
358 : */
359 0 : printf(LABEL_FORMAT, "fsync");
360 0 : fflush(stdout);
361 :
362 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
363 0 : die("could not open output file");
364 0 : START_TIMER;
365 0 : for (ops = 0; alarm_triggered == false; ops++)
366 : {
367 0 : for (writes = 0; writes < writes_per_op; writes++)
368 0 : if (pg_pwrite(tmpfile,
369 : buf,
370 : XLOG_BLCKSZ,
371 0 : writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
372 0 : die("write failed");
373 0 : if (fsync(tmpfile) != 0)
374 0 : die("fsync failed");
375 : }
376 0 : STOP_TIMER;
377 0 : close(tmpfile);
378 :
379 : /*
380 : * If fsync_writethrough is available, test as well
381 : */
382 0 : printf(LABEL_FORMAT, "fsync_writethrough");
383 0 : fflush(stdout);
384 :
385 : #ifdef HAVE_FSYNC_WRITETHROUGH
386 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
387 : die("could not open output file");
388 : START_TIMER;
389 : for (ops = 0; alarm_triggered == false; ops++)
390 : {
391 : for (writes = 0; writes < writes_per_op; writes++)
392 : if (pg_pwrite(tmpfile,
393 : buf,
394 : XLOG_BLCKSZ,
395 : writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
396 : die("write failed");
397 : if (pg_fsync_writethrough(tmpfile) != 0)
398 : die("fsync failed");
399 : }
400 : STOP_TIMER;
401 : close(tmpfile);
402 : #else
403 0 : printf(NA_FORMAT, _("n/a"));
404 : #endif
405 :
406 : /*
407 : * Test open_sync if available
408 : */
409 0 : printf(LABEL_FORMAT, "open_sync");
410 0 : fflush(stdout);
411 :
412 : #ifdef O_SYNC
413 0 : if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
414 : {
415 0 : printf(NA_FORMAT, _("n/a*"));
416 0 : fs_warning = true;
417 : }
418 : else
419 : {
420 0 : START_TIMER;
421 0 : for (ops = 0; alarm_triggered == false; ops++)
422 : {
423 0 : for (writes = 0; writes < writes_per_op; writes++)
424 0 : if (pg_pwrite(tmpfile,
425 : buf,
426 : XLOG_BLCKSZ,
427 0 : writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
428 :
429 : /*
430 : * This can generate write failures if the filesystem has
431 : * a large block size, e.g. 4k, and there is no support
432 : * for O_DIRECT writes smaller than the file system block
433 : * size, e.g. XFS.
434 : */
435 0 : die("write failed");
436 : }
437 0 : STOP_TIMER;
438 0 : close(tmpfile);
439 : }
440 : #else
441 : printf(NA_FORMAT, _("n/a"));
442 : #endif
443 :
444 0 : if (fs_warning)
445 : {
446 0 : printf(_("* This file system and its mount options do not support direct\n"
447 : " I/O, e.g. ext4 in journaled mode.\n"));
448 : }
449 0 : }
450 :
451 : static void
452 0 : test_open_syncs(void)
453 : {
454 0 : printf(_("\nCompare open_sync with different write sizes:\n"));
455 0 : printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
456 : "open_sync sizes.)\n"));
457 :
458 0 : test_open_sync(_(" 1 * 16kB open_sync write"), 16);
459 0 : test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
460 0 : test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
461 0 : test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
462 0 : test_open_sync(_("16 * 1kB open_sync writes"), 1);
463 0 : }
464 :
465 : /*
466 : * Test open_sync with different size files
467 : */
468 : static void
469 0 : test_open_sync(const char *msg, int writes_size)
470 : {
471 : #ifdef O_SYNC
472 : int tmpfile,
473 : ops,
474 : writes;
475 : #endif
476 :
477 0 : printf(LABEL_FORMAT, msg);
478 0 : fflush(stdout);
479 :
480 : #ifdef O_SYNC
481 0 : if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
482 0 : printf(NA_FORMAT, _("n/a*"));
483 : else
484 : {
485 0 : START_TIMER;
486 0 : for (ops = 0; alarm_triggered == false; ops++)
487 : {
488 0 : for (writes = 0; writes < 16 / writes_size; writes++)
489 0 : if (pg_pwrite(tmpfile,
490 : buf,
491 0 : writes_size * 1024,
492 0 : writes * writes_size * 1024) !=
493 0 : writes_size * 1024)
494 0 : die("write failed");
495 : }
496 0 : STOP_TIMER;
497 0 : close(tmpfile);
498 : }
499 : #else
500 : printf(NA_FORMAT, _("n/a"));
501 : #endif
502 0 : }
503 :
504 : static void
505 0 : test_file_descriptor_sync(void)
506 : {
507 : int tmpfile,
508 : ops;
509 :
510 : /*
511 : * Test whether fsync can sync data written on a different descriptor for
512 : * the same file. This checks the efficiency of multi-process fsyncs
513 : * against the same file. Possibly this should be done with writethrough
514 : * on platforms which support it.
515 : */
516 0 : printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
517 0 : printf(_("(If the times are similar, fsync() can sync data written on a different\n"
518 : "descriptor.)\n"));
519 :
520 : /*
521 : * first write, fsync and close, which is the normal behavior without
522 : * multiple descriptors
523 : */
524 0 : printf(LABEL_FORMAT, "write, fsync, close");
525 0 : fflush(stdout);
526 :
527 0 : START_TIMER;
528 0 : for (ops = 0; alarm_triggered == false; ops++)
529 : {
530 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
531 0 : die("could not open output file");
532 0 : if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
533 0 : die("write failed");
534 0 : if (fsync(tmpfile) != 0)
535 0 : die("fsync failed");
536 0 : close(tmpfile);
537 :
538 : /*
539 : * open and close the file again to be consistent with the following
540 : * test
541 : */
542 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
543 0 : die("could not open output file");
544 0 : close(tmpfile);
545 : }
546 0 : STOP_TIMER;
547 :
548 : /*
549 : * Now open, write, close, open again and fsync This simulates processes
550 : * fsyncing each other's writes.
551 : */
552 0 : printf(LABEL_FORMAT, "write, close, fsync");
553 0 : fflush(stdout);
554 :
555 0 : START_TIMER;
556 0 : for (ops = 0; alarm_triggered == false; ops++)
557 : {
558 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
559 0 : die("could not open output file");
560 0 : if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
561 0 : die("write failed");
562 0 : close(tmpfile);
563 : /* reopen file */
564 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
565 0 : die("could not open output file");
566 0 : if (fsync(tmpfile) != 0)
567 0 : die("fsync failed");
568 0 : close(tmpfile);
569 : }
570 0 : STOP_TIMER;
571 0 : }
572 :
573 : static void
574 0 : test_non_sync(void)
575 : {
576 : int tmpfile,
577 : ops;
578 :
579 : /*
580 : * Test a simple write without fsync
581 : */
582 0 : printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
583 0 : printf(LABEL_FORMAT, "write");
584 0 : fflush(stdout);
585 :
586 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
587 0 : die("could not open output file");
588 0 : START_TIMER;
589 0 : for (ops = 0; alarm_triggered == false; ops++)
590 : {
591 0 : if (pg_pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ)
592 0 : die("write failed");
593 : }
594 0 : STOP_TIMER;
595 0 : close(tmpfile);
596 0 : }
597 :
598 : static void
599 0 : signal_cleanup(SIGNAL_ARGS)
600 : {
601 : int rc;
602 :
603 : /* Delete the file if it exists. Ignore errors */
604 0 : if (needs_unlink)
605 0 : unlink(filename);
606 : /* Finish incomplete line on stdout */
607 0 : rc = write(STDOUT_FILENO, "\n", 1);
608 : (void) rc; /* silence compiler warnings */
609 0 : _exit(1);
610 : }
611 :
612 : #ifdef HAVE_FSYNC_WRITETHROUGH
613 :
614 : static int
615 : pg_fsync_writethrough(int fd)
616 : {
617 : #if defined(F_FULLFSYNC)
618 : return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
619 : #else
620 : errno = ENOSYS;
621 : return -1;
622 : #endif
623 : }
624 : #endif
625 :
626 : /*
627 : * print out the writes per second for tests
628 : */
629 : static void
630 0 : print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
631 : {
632 0 : double total_time = (stop_t.tv_sec - start_t.tv_sec) +
633 0 : (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
634 0 : double per_second = ops / total_time;
635 0 : double avg_op_time_us = (total_time / ops) * USECS_SEC;
636 :
637 0 : printf(_(OPS_FORMAT), per_second, avg_op_time_us);
638 0 : }
639 :
640 : #ifndef WIN32
641 : static void
642 0 : process_alarm(SIGNAL_ARGS)
643 : {
644 0 : alarm_triggered = true;
645 0 : }
646 : #else
647 : static DWORD WINAPI
648 : process_alarm(LPVOID param)
649 : {
650 : /* WIN32 doesn't support alarm, so we create a thread and sleep here */
651 : Sleep(secs_per_test * 1000);
652 : alarm_triggered = true;
653 : ExitThread(0);
654 : }
655 : #endif
|