Line data Source code
1 : /*
2 : * pg_test_fsync.c
3 : * tests all supported fsync() methods
4 : */
5 :
6 : #include "postgres_fe.h"
7 :
8 : #include <limits.h>
9 : #include <sys/stat.h>
10 : #include <sys/time.h>
11 : #include <fcntl.h>
12 : #include <time.h>
13 : #include <unistd.h>
14 : #include <signal.h>
15 :
16 : #include "access/xlogdefs.h"
17 : #include "common/logging.h"
18 : #include "common/pg_prng.h"
19 : #include "getopt_long.h"
20 :
21 : /*
22 : * put the temp files in the local directory
23 : * unless the user specifies otherwise
24 : */
25 : #define FSYNC_FILENAME "./pg_test_fsync.out"
26 :
27 : #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
28 :
29 : #define LABEL_FORMAT " %-30s"
30 : #define NA_FORMAT "%21s\n"
31 : /* translator: maintain alignment with NA_FORMAT */
32 : #define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
33 : #define USECS_SEC 1000000
34 :
35 : /* These are macros to avoid timing the function call overhead. */
36 : #ifndef WIN32
37 : #define START_TIMER \
38 : do { \
39 : alarm_triggered = false; \
40 : alarm(secs_per_test); \
41 : gettimeofday(&start_t, NULL); \
42 : } while (0)
43 : #else
44 : /* WIN32 doesn't support alarm, so we create a thread and sleep there */
45 : #define START_TIMER \
46 : do { \
47 : alarm_triggered = false; \
48 : if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
49 : INVALID_HANDLE_VALUE) \
50 : pg_fatal("could not create thread for alarm"); \
51 : gettimeofday(&start_t, NULL); \
52 : } while (0)
53 : #endif
54 :
55 : #define STOP_TIMER \
56 : do { \
57 : gettimeofday(&stop_t, NULL); \
58 : print_elapse(start_t, stop_t, ops); \
59 : } while (0)
60 :
61 :
62 : static const char *progname;
63 :
64 : static unsigned int secs_per_test = 5;
65 : static int needs_unlink = 0;
66 : static char full_buf[DEFAULT_XLOG_SEG_SIZE],
67 : *buf,
68 : *filename = FSYNC_FILENAME;
69 : static struct timeval start_t,
70 : stop_t;
71 : static sig_atomic_t alarm_triggered = false;
72 :
73 :
74 : static void handle_args(int argc, char *argv[]);
75 : static void prepare_buf(void);
76 : static void test_open(void);
77 : static void test_non_sync(void);
78 : static void test_sync(int writes_per_op);
79 : static void test_open_syncs(void);
80 : static void test_open_sync(const char *msg, int writes_size);
81 : static void test_file_descriptor_sync(void);
82 :
83 : #ifndef WIN32
84 : static void process_alarm(SIGNAL_ARGS);
85 : #else
86 : static DWORD WINAPI process_alarm(LPVOID param);
87 : #endif
88 : static void signal_cleanup(SIGNAL_ARGS);
89 :
90 : #ifdef HAVE_FSYNC_WRITETHROUGH
91 : static int pg_fsync_writethrough(int fd);
92 : #endif
93 : static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
94 :
95 : #define die(msg) pg_fatal("%s: %m", _(msg))
96 :
97 :
98 : int
99 10 : main(int argc, char *argv[])
100 : {
101 10 : pg_logging_init(argv[0]);
102 10 : set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
103 10 : progname = get_progname(argv[0]);
104 :
105 10 : handle_args(argc, argv);
106 :
107 : /* Prevent leaving behind the test file */
108 0 : pqsignal(SIGINT, signal_cleanup);
109 0 : pqsignal(SIGTERM, signal_cleanup);
110 : #ifndef WIN32
111 0 : pqsignal(SIGALRM, process_alarm);
112 : #endif
113 : #ifdef SIGHUP
114 : /* Not defined on win32 */
115 0 : pqsignal(SIGHUP, signal_cleanup);
116 : #endif
117 :
118 0 : pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL));
119 :
120 0 : prepare_buf();
121 :
122 0 : test_open();
123 :
124 : /* Test using 1 XLOG_BLCKSZ write */
125 0 : test_sync(1);
126 :
127 : /* Test using 2 XLOG_BLCKSZ writes */
128 0 : test_sync(2);
129 :
130 0 : test_open_syncs();
131 :
132 0 : test_file_descriptor_sync();
133 :
134 0 : test_non_sync();
135 :
136 0 : unlink(filename);
137 :
138 0 : return 0;
139 : }
140 :
141 : static void
142 10 : handle_args(int argc, char *argv[])
143 : {
144 : static struct option long_options[] = {
145 : {"filename", required_argument, NULL, 'f'},
146 : {"secs-per-test", required_argument, NULL, 's'},
147 : {NULL, 0, NULL, 0}
148 : };
149 :
150 : int option; /* Command line option */
151 10 : int optindex = 0; /* used by getopt_long */
152 : unsigned long optval; /* used for option parsing */
153 : char *endptr;
154 :
155 10 : if (argc > 1)
156 : {
157 10 : if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
158 : {
159 2 : printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
160 2 : exit(0);
161 : }
162 8 : if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
163 : {
164 2 : puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
165 2 : exit(0);
166 : }
167 : }
168 :
169 6 : while ((option = getopt_long(argc, argv, "f:s:",
170 : long_options, &optindex)) != -1)
171 : {
172 6 : switch (option)
173 : {
174 0 : case 'f':
175 0 : filename = pg_strdup(optarg);
176 0 : break;
177 :
178 4 : case 's':
179 4 : errno = 0;
180 4 : optval = strtoul(optarg, &endptr, 10);
181 :
182 4 : if (endptr == optarg || *endptr != '\0' ||
183 2 : errno != 0 || optval != (unsigned int) optval)
184 : {
185 2 : pg_log_error("invalid argument for option %s", "--secs-per-test");
186 2 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
187 2 : exit(1);
188 : }
189 :
190 2 : secs_per_test = (unsigned int) optval;
191 2 : if (secs_per_test == 0)
192 2 : pg_fatal("%s must be in range %u..%u",
193 : "--secs-per-test", 1, UINT_MAX);
194 0 : break;
195 :
196 2 : default:
197 : /* getopt_long already emitted a complaint */
198 2 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
199 2 : exit(1);
200 : }
201 : }
202 :
203 0 : if (argc > optind)
204 : {
205 0 : pg_log_error("too many command-line arguments (first is \"%s\")",
206 : argv[optind]);
207 0 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
208 0 : exit(1);
209 : }
210 :
211 0 : printf(ngettext("%u second per test\n",
212 : "%u seconds per test\n",
213 : secs_per_test),
214 : secs_per_test);
215 : #if defined(O_DIRECT)
216 0 : printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
217 : #elif defined(F_NOCACHE)
218 : printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n"));
219 : #else
220 : printf(_("Direct I/O is not supported on this platform.\n"));
221 : #endif
222 0 : }
223 :
224 : static void
225 0 : prepare_buf(void)
226 : {
227 : int ops;
228 :
229 : /* write random data into buffer */
230 0 : for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
231 0 : full_buf[ops] = (char) pg_prng_int32(&pg_global_prng_state);
232 :
233 0 : buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
234 0 : }
235 :
236 : static void
237 0 : test_open(void)
238 : {
239 : int tmpfile;
240 :
241 : /*
242 : * test if we can open the target file
243 : */
244 0 : if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1)
245 0 : die("could not open output file");
246 0 : needs_unlink = 1;
247 0 : if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
248 : DEFAULT_XLOG_SEG_SIZE)
249 0 : die("write failed");
250 :
251 : /* fsync now so that dirty buffers don't skew later tests */
252 0 : if (fsync(tmpfile) != 0)
253 0 : die("fsync failed");
254 :
255 0 : close(tmpfile);
256 0 : }
257 :
258 : static int
259 0 : open_direct(const char *path, int flags, mode_t mode)
260 : {
261 : int fd;
262 :
263 : #ifdef O_DIRECT
264 0 : flags |= O_DIRECT;
265 : #endif
266 :
267 0 : fd = open(path, flags, mode);
268 :
269 : #if !defined(O_DIRECT) && defined(F_NOCACHE)
270 : if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0)
271 : {
272 : int save_errno = errno;
273 :
274 : close(fd);
275 : errno = save_errno;
276 : return -1;
277 : }
278 : #endif
279 :
280 0 : return fd;
281 : }
282 :
283 : static void
284 0 : test_sync(int writes_per_op)
285 : {
286 : int tmpfile,
287 : ops,
288 : writes;
289 0 : bool fs_warning = false;
290 :
291 0 : if (writes_per_op == 1)
292 0 : printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
293 : else
294 0 : printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
295 0 : printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n"));
296 :
297 : /*
298 : * Test open_datasync if available
299 : */
300 0 : printf(LABEL_FORMAT, "open_datasync");
301 0 : fflush(stdout);
302 :
303 : #ifdef O_DSYNC
304 0 : if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
305 : {
306 0 : printf(NA_FORMAT, _("n/a*"));
307 0 : fs_warning = true;
308 : }
309 : else
310 : {
311 0 : START_TIMER;
312 0 : for (ops = 0; alarm_triggered == false; ops++)
313 : {
314 0 : for (writes = 0; writes < writes_per_op; writes++)
315 0 : if (pg_pwrite(tmpfile,
316 : buf,
317 : XLOG_BLCKSZ,
318 0 : writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
319 0 : die("write failed");
320 : }
321 0 : STOP_TIMER;
322 0 : close(tmpfile);
323 : }
324 : #else
325 : printf(NA_FORMAT, _("n/a"));
326 : #endif
327 :
328 : /*
329 : * Test fdatasync if available
330 : */
331 0 : printf(LABEL_FORMAT, "fdatasync");
332 0 : fflush(stdout);
333 :
334 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
335 0 : die("could not open output file");
336 0 : START_TIMER;
337 0 : for (ops = 0; alarm_triggered == false; ops++)
338 : {
339 0 : for (writes = 0; writes < writes_per_op; writes++)
340 0 : if (pg_pwrite(tmpfile,
341 : buf,
342 : XLOG_BLCKSZ,
343 0 : writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
344 0 : die("write failed");
345 0 : fdatasync(tmpfile);
346 : }
347 0 : STOP_TIMER;
348 0 : close(tmpfile);
349 :
350 : /*
351 : * Test fsync
352 : */
353 0 : printf(LABEL_FORMAT, "fsync");
354 0 : fflush(stdout);
355 :
356 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
357 0 : die("could not open output file");
358 0 : START_TIMER;
359 0 : for (ops = 0; alarm_triggered == false; ops++)
360 : {
361 0 : for (writes = 0; writes < writes_per_op; writes++)
362 0 : if (pg_pwrite(tmpfile,
363 : buf,
364 : XLOG_BLCKSZ,
365 0 : writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
366 0 : die("write failed");
367 0 : if (fsync(tmpfile) != 0)
368 0 : die("fsync failed");
369 : }
370 0 : STOP_TIMER;
371 0 : close(tmpfile);
372 :
373 : /*
374 : * If fsync_writethrough is available, test as well
375 : */
376 0 : printf(LABEL_FORMAT, "fsync_writethrough");
377 0 : fflush(stdout);
378 :
379 : #ifdef HAVE_FSYNC_WRITETHROUGH
380 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
381 : die("could not open output file");
382 : START_TIMER;
383 : for (ops = 0; alarm_triggered == false; ops++)
384 : {
385 : for (writes = 0; writes < writes_per_op; writes++)
386 : if (pg_pwrite(tmpfile,
387 : buf,
388 : XLOG_BLCKSZ,
389 : writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
390 : die("write failed");
391 : if (pg_fsync_writethrough(tmpfile) != 0)
392 : die("fsync failed");
393 : }
394 : STOP_TIMER;
395 : close(tmpfile);
396 : #else
397 0 : printf(NA_FORMAT, _("n/a"));
398 : #endif
399 :
400 : /*
401 : * Test open_sync if available
402 : */
403 0 : printf(LABEL_FORMAT, "open_sync");
404 0 : fflush(stdout);
405 :
406 : #ifdef O_SYNC
407 0 : if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
408 : {
409 0 : printf(NA_FORMAT, _("n/a*"));
410 0 : fs_warning = true;
411 : }
412 : else
413 : {
414 0 : START_TIMER;
415 0 : for (ops = 0; alarm_triggered == false; ops++)
416 : {
417 0 : for (writes = 0; writes < writes_per_op; writes++)
418 0 : if (pg_pwrite(tmpfile,
419 : buf,
420 : XLOG_BLCKSZ,
421 0 : writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
422 :
423 : /*
424 : * This can generate write failures if the filesystem has
425 : * a large block size, e.g. 4k, and there is no support
426 : * for O_DIRECT writes smaller than the file system block
427 : * size, e.g. XFS.
428 : */
429 0 : die("write failed");
430 : }
431 0 : STOP_TIMER;
432 0 : close(tmpfile);
433 : }
434 : #else
435 : printf(NA_FORMAT, _("n/a"));
436 : #endif
437 :
438 0 : if (fs_warning)
439 : {
440 0 : printf(_("* This file system and its mount options do not support direct\n"
441 : " I/O, e.g. ext4 in journaled mode.\n"));
442 : }
443 0 : }
444 :
445 : static void
446 0 : test_open_syncs(void)
447 : {
448 0 : printf(_("\nCompare open_sync with different write sizes:\n"));
449 0 : printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
450 : "open_sync sizes.)\n"));
451 :
452 0 : test_open_sync(_(" 1 * 16kB open_sync write"), 16);
453 0 : test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
454 0 : test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
455 0 : test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
456 0 : test_open_sync(_("16 * 1kB open_sync writes"), 1);
457 0 : }
458 :
459 : /*
460 : * Test open_sync with different size files
461 : */
462 : static void
463 0 : test_open_sync(const char *msg, int writes_size)
464 : {
465 : #ifdef O_SYNC
466 : int tmpfile,
467 : ops,
468 : writes;
469 : #endif
470 :
471 0 : printf(LABEL_FORMAT, msg);
472 0 : fflush(stdout);
473 :
474 : #ifdef O_SYNC
475 0 : if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
476 0 : printf(NA_FORMAT, _("n/a*"));
477 : else
478 : {
479 0 : START_TIMER;
480 0 : for (ops = 0; alarm_triggered == false; ops++)
481 : {
482 0 : for (writes = 0; writes < 16 / writes_size; writes++)
483 0 : if (pg_pwrite(tmpfile,
484 : buf,
485 0 : writes_size * 1024,
486 0 : writes * writes_size * 1024) !=
487 0 : writes_size * 1024)
488 0 : die("write failed");
489 : }
490 0 : STOP_TIMER;
491 0 : close(tmpfile);
492 : }
493 : #else
494 : printf(NA_FORMAT, _("n/a"));
495 : #endif
496 0 : }
497 :
498 : static void
499 0 : test_file_descriptor_sync(void)
500 : {
501 : int tmpfile,
502 : ops;
503 :
504 : /*
505 : * Test whether fsync can sync data written on a different descriptor for
506 : * the same file. This checks the efficiency of multi-process fsyncs
507 : * against the same file. Possibly this should be done with writethrough
508 : * on platforms which support it.
509 : */
510 0 : printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
511 0 : printf(_("(If the times are similar, fsync() can sync data written on a different\n"
512 : "descriptor.)\n"));
513 :
514 : /*
515 : * first write, fsync and close, which is the normal behavior without
516 : * multiple descriptors
517 : */
518 0 : printf(LABEL_FORMAT, "write, fsync, close");
519 0 : fflush(stdout);
520 :
521 0 : START_TIMER;
522 0 : for (ops = 0; alarm_triggered == false; ops++)
523 : {
524 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
525 0 : die("could not open output file");
526 0 : if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
527 0 : die("write failed");
528 0 : if (fsync(tmpfile) != 0)
529 0 : die("fsync failed");
530 0 : close(tmpfile);
531 :
532 : /*
533 : * open and close the file again to be consistent with the following
534 : * test
535 : */
536 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
537 0 : die("could not open output file");
538 0 : close(tmpfile);
539 : }
540 0 : STOP_TIMER;
541 :
542 : /*
543 : * Now open, write, close, open again and fsync This simulates processes
544 : * fsyncing each other's writes.
545 : */
546 0 : printf(LABEL_FORMAT, "write, close, fsync");
547 0 : fflush(stdout);
548 :
549 0 : START_TIMER;
550 0 : for (ops = 0; alarm_triggered == false; ops++)
551 : {
552 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
553 0 : die("could not open output file");
554 0 : if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
555 0 : die("write failed");
556 0 : close(tmpfile);
557 : /* reopen file */
558 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
559 0 : die("could not open output file");
560 0 : if (fsync(tmpfile) != 0)
561 0 : die("fsync failed");
562 0 : close(tmpfile);
563 : }
564 0 : STOP_TIMER;
565 0 : }
566 :
567 : static void
568 0 : test_non_sync(void)
569 : {
570 : int tmpfile,
571 : ops;
572 :
573 : /*
574 : * Test a simple write without fsync
575 : */
576 0 : printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
577 0 : printf(LABEL_FORMAT, "write");
578 0 : fflush(stdout);
579 :
580 0 : if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
581 0 : die("could not open output file");
582 0 : START_TIMER;
583 0 : for (ops = 0; alarm_triggered == false; ops++)
584 : {
585 0 : if (pg_pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ)
586 0 : die("write failed");
587 : }
588 0 : STOP_TIMER;
589 0 : close(tmpfile);
590 0 : }
591 :
592 : static void
593 0 : signal_cleanup(SIGNAL_ARGS)
594 : {
595 : /* Delete the file if it exists. Ignore errors */
596 0 : if (needs_unlink)
597 0 : unlink(filename);
598 : /* Finish incomplete line on stdout */
599 0 : puts("");
600 0 : exit(1);
601 : }
602 :
603 : #ifdef HAVE_FSYNC_WRITETHROUGH
604 :
605 : static int
606 : pg_fsync_writethrough(int fd)
607 : {
608 : #ifdef WIN32
609 : return _commit(fd);
610 : #elif defined(F_FULLFSYNC)
611 : return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
612 : #else
613 : errno = ENOSYS;
614 : return -1;
615 : #endif
616 : }
617 : #endif
618 :
619 : /*
620 : * print out the writes per second for tests
621 : */
622 : static void
623 0 : print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
624 : {
625 0 : double total_time = (stop_t.tv_sec - start_t.tv_sec) +
626 0 : (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
627 0 : double per_second = ops / total_time;
628 0 : double avg_op_time_us = (total_time / ops) * USECS_SEC;
629 :
630 0 : printf(_(OPS_FORMAT), per_second, avg_op_time_us);
631 0 : }
632 :
633 : #ifndef WIN32
634 : static void
635 0 : process_alarm(SIGNAL_ARGS)
636 : {
637 0 : alarm_triggered = true;
638 0 : }
639 : #else
640 : static DWORD WINAPI
641 : process_alarm(LPVOID param)
642 : {
643 : /* WIN32 doesn't support alarm, so we create a thread and sleep here */
644 : Sleep(secs_per_test * 1000);
645 : alarm_triggered = true;
646 : ExitThread(0);
647 : }
648 : #endif
|