Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_checksums.c
4 : * Checks, enables or disables page level checksums for an offline
5 : * cluster
6 : *
7 : * Copyright (c) 2010-2024, PostgreSQL Global Development Group
8 : *
9 : * IDENTIFICATION
10 : * src/bin/pg_checksums/pg_checksums.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres_fe.h"
16 :
17 : #include <dirent.h>
18 : #include <limits.h>
19 : #include <sys/stat.h>
20 : #include <time.h>
21 : #include <unistd.h>
22 :
23 : #include "common/controldata_utils.h"
24 : #include "common/file_perm.h"
25 : #include "common/file_utils.h"
26 : #include "common/logging.h"
27 : #include "common/relpath.h"
28 : #include "fe_utils/option_utils.h"
29 : #include "getopt_long.h"
30 : #include "pg_getopt.h"
31 : #include "storage/bufpage.h"
32 : #include "storage/checksum.h"
33 : #include "storage/checksum_impl.h"
34 :
35 :
36 : static int64 files_scanned = 0;
37 : static int64 files_written = 0;
38 : static int64 blocks_scanned = 0;
39 : static int64 blocks_written = 0;
40 : static int64 badblocks = 0;
41 : static ControlFileData *ControlFile;
42 :
43 : static char *only_filenode = NULL;
44 : static bool do_sync = true;
45 : static bool verbose = false;
46 : static bool showprogress = false;
47 : static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
48 :
49 : typedef enum
50 : {
51 : PG_MODE_CHECK,
52 : PG_MODE_DISABLE,
53 : PG_MODE_ENABLE,
54 : } PgChecksumMode;
55 :
56 : static PgChecksumMode mode = PG_MODE_CHECK;
57 :
58 : static const char *progname;
59 :
60 : /*
61 : * Progress status information.
62 : */
63 : static int64 total_size = 0;
64 : static int64 current_size = 0;
65 : static pg_time_t last_progress_report = 0;
66 :
67 : static void
68 2 : usage(void)
69 : {
70 2 : printf(_("%s enables, disables, or verifies data checksums in a PostgreSQL database cluster.\n\n"), progname);
71 2 : printf(_("Usage:\n"));
72 2 : printf(_(" %s [OPTION]... [DATADIR]\n"), progname);
73 2 : printf(_("\nOptions:\n"));
74 2 : printf(_(" [-D, --pgdata=]DATADIR data directory\n"));
75 2 : printf(_(" -c, --check check data checksums (default)\n"));
76 2 : printf(_(" -d, --disable disable data checksums\n"));
77 2 : printf(_(" -e, --enable enable data checksums\n"));
78 2 : printf(_(" -f, --filenode=FILENODE check only relation with specified filenode\n"));
79 2 : printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n"));
80 2 : printf(_(" -P, --progress show progress information\n"));
81 2 : printf(_(" --sync-method=METHOD set method for syncing files to disk\n"));
82 2 : printf(_(" -v, --verbose output verbose messages\n"));
83 2 : printf(_(" -V, --version output version information, then exit\n"));
84 2 : printf(_(" -?, --help show this help, then exit\n"));
85 2 : printf(_("\nIf no data directory (DATADIR) is specified, "
86 : "the environment variable PGDATA\nis used.\n\n"));
87 2 : printf(_("Report bugs to <%s>.\n"), PACKAGE_BUGREPORT);
88 2 : printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
89 2 : }
90 :
91 : /*
92 : * Definition of one element part of an exclusion list, used for files
93 : * to exclude from checksum validation. "name" is the name of the file
94 : * or path to check for exclusion. If "match_prefix" is true, any items
95 : * matching the name as prefix are excluded.
96 : */
97 : struct exclude_list_item
98 : {
99 : const char *name;
100 : bool match_prefix;
101 : };
102 :
103 : /*
104 : * List of files excluded from checksum validation.
105 : *
106 : * Note: this list should be kept in sync with what basebackup.c includes.
107 : */
108 : static const struct exclude_list_item skip[] = {
109 : {"pg_control", false},
110 : {"pg_filenode.map", false},
111 : {"pg_internal.init", true},
112 : {"PG_VERSION", false},
113 : #ifdef EXEC_BACKEND
114 : {"config_exec_params", true},
115 : #endif
116 : {NULL, false}
117 : };
118 :
119 : /*
120 : * Report current progress status. Parts borrowed from
121 : * src/bin/pg_basebackup/pg_basebackup.c.
122 : */
123 : static void
124 0 : progress_report(bool finished)
125 : {
126 : int percent;
127 : pg_time_t now;
128 :
129 : Assert(showprogress);
130 :
131 0 : now = time(NULL);
132 0 : if (now == last_progress_report && !finished)
133 0 : return; /* Max once per second */
134 :
135 : /* Save current time */
136 0 : last_progress_report = now;
137 :
138 : /* Adjust total size if current_size is larger */
139 0 : if (current_size > total_size)
140 0 : total_size = current_size;
141 :
142 : /* Calculate current percentage of size done */
143 0 : percent = total_size ? (int) ((current_size) * 100 / total_size) : 0;
144 :
145 0 : fprintf(stderr, _("%lld/%lld MB (%d%%) computed"),
146 0 : (long long) (current_size / (1024 * 1024)),
147 0 : (long long) (total_size / (1024 * 1024)),
148 : percent);
149 :
150 : /*
151 : * Stay on the same line if reporting to a terminal and we're not done
152 : * yet.
153 : */
154 0 : fputc((!finished && isatty(fileno(stderr))) ? '\r' : '\n', stderr);
155 : }
156 :
157 : static bool
158 25766 : skipfile(const char *fn)
159 : {
160 : int excludeIdx;
161 :
162 128152 : for (excludeIdx = 0; skip[excludeIdx].name != NULL; excludeIdx++)
163 : {
164 102678 : int cmplen = strlen(skip[excludeIdx].name);
165 :
166 102678 : if (!skip[excludeIdx].match_prefix)
167 77052 : cmplen++;
168 102678 : if (strncmp(skip[excludeIdx].name, fn, cmplen) == 0)
169 292 : return true;
170 : }
171 :
172 25474 : return false;
173 : }
174 :
175 : static void
176 17794 : scan_file(const char *fn, int segmentno)
177 : {
178 : PGIOAlignedBlock buf;
179 17794 : PageHeader header = (PageHeader) buf.data;
180 : int f;
181 : BlockNumber blockno;
182 : int flags;
183 17794 : int64 blocks_written_in_file = 0;
184 :
185 : Assert(mode == PG_MODE_ENABLE ||
186 : mode == PG_MODE_CHECK);
187 :
188 17794 : flags = (mode == PG_MODE_ENABLE) ? O_RDWR : O_RDONLY;
189 17794 : f = open(fn, PG_BINARY | flags, 0);
190 :
191 17794 : if (f < 0)
192 0 : pg_fatal("could not open file \"%s\": %m", fn);
193 :
194 17794 : files_scanned++;
195 :
196 17794 : for (blockno = 0;; blockno++)
197 51988 : {
198 : uint16 csum;
199 69782 : int r = read(f, buf.data, BLCKSZ);
200 :
201 69782 : if (r == 0)
202 17778 : break;
203 52004 : if (r != BLCKSZ)
204 : {
205 16 : if (r < 0)
206 0 : pg_fatal("could not read block %u in file \"%s\": %m",
207 : blockno, fn);
208 : else
209 16 : pg_fatal("could not read block %u in file \"%s\": read %d of %d",
210 : blockno, fn, r, BLCKSZ);
211 : }
212 51988 : blocks_scanned++;
213 :
214 : /*
215 : * Since the file size is counted as total_size for progress status
216 : * information, the sizes of all pages including new ones in the file
217 : * should be counted as current_size. Otherwise the progress reporting
218 : * calculated using those counters may not reach 100%.
219 : */
220 51988 : current_size += r;
221 :
222 : /* New pages have no checksum yet */
223 51988 : if (PageIsNew(buf.data))
224 228 : continue;
225 :
226 51760 : csum = pg_checksum_page(buf.data, blockno + segmentno * RELSEG_SIZE);
227 51760 : if (mode == PG_MODE_CHECK)
228 : {
229 40484 : if (csum != header->pd_checksum)
230 : {
231 8 : if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION)
232 8 : pg_log_error("checksum verification failed in file \"%s\", block %u: calculated checksum %X but block contains %X",
233 : fn, blockno, csum, header->pd_checksum);
234 8 : badblocks++;
235 : }
236 : }
237 11276 : else if (mode == PG_MODE_ENABLE)
238 : {
239 : int w;
240 :
241 : /*
242 : * Do not rewrite if the checksum is already set to the expected
243 : * value.
244 : */
245 11276 : if (header->pd_checksum == csum)
246 5638 : continue;
247 :
248 5638 : blocks_written_in_file++;
249 :
250 : /* Set checksum in page header */
251 5638 : header->pd_checksum = csum;
252 :
253 : /* Seek back to beginning of block */
254 5638 : if (lseek(f, -BLCKSZ, SEEK_CUR) < 0)
255 0 : pg_fatal("seek failed for block %u in file \"%s\": %m", blockno, fn);
256 :
257 : /* Write block with checksum */
258 5638 : w = write(f, buf.data, BLCKSZ);
259 5638 : if (w != BLCKSZ)
260 : {
261 0 : if (w < 0)
262 0 : pg_fatal("could not write block %u in file \"%s\": %m",
263 : blockno, fn);
264 : else
265 0 : pg_fatal("could not write block %u in file \"%s\": wrote %d of %d",
266 : blockno, fn, w, BLCKSZ);
267 : }
268 : }
269 :
270 46122 : if (showprogress)
271 0 : progress_report(false);
272 : }
273 :
274 17778 : if (verbose)
275 : {
276 0 : if (mode == PG_MODE_CHECK)
277 0 : pg_log_info("checksums verified in file \"%s\"", fn);
278 0 : if (mode == PG_MODE_ENABLE)
279 0 : pg_log_info("checksums enabled in file \"%s\"", fn);
280 : }
281 :
282 : /* Update write counters if any write activity has happened */
283 17778 : if (blocks_written_in_file > 0)
284 : {
285 1564 : files_written++;
286 1564 : blocks_written += blocks_written_in_file;
287 : }
288 :
289 17778 : close(f);
290 17778 : }
291 :
292 : /*
293 : * Scan the given directory for items which can be checksummed and
294 : * operate on each one of them. If "sizeonly" is true, the size of
295 : * all the items which have checksums is computed and returned back
296 : * to the caller without operating on the files. This is used to compile
297 : * the total size of the data directory for progress reports.
298 : */
299 : static int64
300 192 : scan_directory(const char *basedir, const char *subdir, bool sizeonly)
301 : {
302 192 : int64 dirsize = 0;
303 : char path[MAXPGPATH];
304 : DIR *dir;
305 : struct dirent *de;
306 :
307 192 : snprintf(path, sizeof(path), "%s/%s", basedir, subdir);
308 192 : dir = opendir(path);
309 192 : if (!dir)
310 0 : pg_fatal("could not open directory \"%s\": %m", path);
311 26504 : while ((de = readdir(dir)) != NULL)
312 : {
313 : char fn[MAXPGPATH];
314 : struct stat st;
315 :
316 26328 : if (strcmp(de->d_name, ".") == 0 ||
317 26146 : strcmp(de->d_name, "..") == 0)
318 8436 : continue;
319 :
320 : /* Skip temporary files */
321 25964 : if (strncmp(de->d_name,
322 : PG_TEMP_FILE_PREFIX,
323 : strlen(PG_TEMP_FILE_PREFIX)) == 0)
324 62 : continue;
325 :
326 : /* Skip temporary folders */
327 25902 : if (strncmp(de->d_name,
328 : PG_TEMP_FILES_DIR,
329 : strlen(PG_TEMP_FILES_DIR)) == 0)
330 0 : continue;
331 :
332 : /* Skip macOS system files */
333 25902 : if (strcmp(de->d_name, ".DS_Store") == 0)
334 38 : continue;
335 :
336 25864 : snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);
337 25864 : if (lstat(fn, &st) < 0)
338 0 : pg_fatal("could not stat file \"%s\": %m", fn);
339 25864 : if (S_ISREG(st.st_mode))
340 : {
341 : char fnonly[MAXPGPATH];
342 : char *forkpath,
343 : *segmentpath;
344 25766 : int segmentno = 0;
345 :
346 25766 : if (skipfile(de->d_name))
347 7972 : continue;
348 :
349 : /*
350 : * Cut off at the segment boundary (".") to get the segment number
351 : * in order to mix it into the checksum. Then also cut off at the
352 : * fork boundary, to get the filenode the file belongs to for
353 : * filtering.
354 : */
355 25474 : strlcpy(fnonly, de->d_name, sizeof(fnonly));
356 25474 : segmentpath = strchr(fnonly, '.');
357 25474 : if (segmentpath != NULL)
358 : {
359 142 : *segmentpath++ = '\0';
360 142 : segmentno = atoi(segmentpath);
361 142 : if (segmentno == 0)
362 0 : pg_fatal("invalid segment number %d in file name \"%s\"",
363 : segmentno, fn);
364 : }
365 :
366 25474 : forkpath = strchr(fnonly, '_');
367 25474 : if (forkpath != NULL)
368 6302 : *forkpath++ = '\0';
369 :
370 25474 : if (only_filenode && strcmp(only_filenode, fnonly) != 0)
371 : /* filenode not to be included */
372 7680 : continue;
373 :
374 17794 : dirsize += st.st_size;
375 :
376 : /*
377 : * No need to work on the file when calculating only the size of
378 : * the items in the data folder.
379 : */
380 17794 : if (!sizeonly)
381 17794 : scan_file(fn, segmentno);
382 : }
383 98 : else if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
384 : {
385 : /*
386 : * If going through the entries of pg_tblspc, we assume to operate
387 : * on tablespace locations where only TABLESPACE_VERSION_DIRECTORY
388 : * is valid, resolving the linked locations and dive into them
389 : * directly.
390 : */
391 98 : if (strncmp(PG_TBLSPC_DIR, subdir, strlen(PG_TBLSPC_DIR)) == 0)
392 : {
393 : char tblspc_path[MAXPGPATH];
394 : struct stat tblspc_st;
395 :
396 : /*
397 : * Resolve tablespace location path and check whether
398 : * TABLESPACE_VERSION_DIRECTORY exists. Not finding a valid
399 : * location is unexpected, since there should be no orphaned
400 : * links and no links pointing to something else than a
401 : * directory.
402 : */
403 10 : snprintf(tblspc_path, sizeof(tblspc_path), "%s/%s/%s",
404 10 : path, de->d_name, TABLESPACE_VERSION_DIRECTORY);
405 :
406 10 : if (lstat(tblspc_path, &tblspc_st) < 0)
407 0 : pg_fatal("could not stat file \"%s\": %m",
408 : tblspc_path);
409 :
410 : /*
411 : * Move backwards once as the scan needs to happen for the
412 : * contents of TABLESPACE_VERSION_DIRECTORY.
413 : */
414 10 : snprintf(tblspc_path, sizeof(tblspc_path), "%s/%s",
415 10 : path, de->d_name);
416 :
417 : /* Looks like a valid tablespace location */
418 10 : dirsize += scan_directory(tblspc_path,
419 : TABLESPACE_VERSION_DIRECTORY,
420 : sizeonly);
421 : }
422 : else
423 : {
424 88 : dirsize += scan_directory(path, de->d_name, sizeonly);
425 : }
426 : }
427 : }
428 176 : closedir(dir);
429 176 : return dirsize;
430 : }
431 :
432 : int
433 62 : main(int argc, char *argv[])
434 : {
435 : static struct option long_options[] = {
436 : {"check", no_argument, NULL, 'c'},
437 : {"pgdata", required_argument, NULL, 'D'},
438 : {"disable", no_argument, NULL, 'd'},
439 : {"enable", no_argument, NULL, 'e'},
440 : {"filenode", required_argument, NULL, 'f'},
441 : {"no-sync", no_argument, NULL, 'N'},
442 : {"progress", no_argument, NULL, 'P'},
443 : {"verbose", no_argument, NULL, 'v'},
444 : {"sync-method", required_argument, NULL, 1},
445 : {NULL, 0, NULL, 0}
446 : };
447 :
448 62 : char *DataDir = NULL;
449 : int c;
450 : int option_index;
451 : bool crc_ok;
452 :
453 62 : pg_logging_init(argv[0]);
454 62 : set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_checksums"));
455 62 : progname = get_progname(argv[0]);
456 :
457 62 : if (argc > 1)
458 : {
459 62 : if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
460 : {
461 2 : usage();
462 2 : exit(0);
463 : }
464 60 : if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
465 : {
466 2 : puts("pg_checksums (PostgreSQL) " PG_VERSION);
467 2 : exit(0);
468 : }
469 : }
470 :
471 186 : while ((c = getopt_long(argc, argv, "cdD:ef:NPv", long_options, &option_index)) != -1)
472 : {
473 130 : switch (c)
474 : {
475 38 : case 'c':
476 38 : mode = PG_MODE_CHECK;
477 38 : break;
478 6 : case 'd':
479 6 : mode = PG_MODE_DISABLE;
480 6 : break;
481 56 : case 'D':
482 56 : DataDir = optarg;
483 56 : break;
484 8 : case 'e':
485 8 : mode = PG_MODE_ENABLE;
486 8 : break;
487 12 : case 'f':
488 12 : if (!option_parse_int(optarg, "-f/--filenode", 0,
489 : INT_MAX,
490 : NULL))
491 0 : exit(1);
492 12 : only_filenode = pstrdup(optarg);
493 12 : break;
494 8 : case 'N':
495 8 : do_sync = false;
496 8 : break;
497 0 : case 'P':
498 0 : showprogress = true;
499 0 : break;
500 0 : case 'v':
501 0 : verbose = true;
502 0 : break;
503 0 : case 1:
504 0 : if (!parse_sync_method(optarg, &sync_method))
505 0 : exit(1);
506 0 : break;
507 2 : default:
508 : /* getopt_long already emitted a complaint */
509 2 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
510 2 : exit(1);
511 : }
512 : }
513 :
514 56 : if (DataDir == NULL)
515 : {
516 0 : if (optind < argc)
517 0 : DataDir = argv[optind++];
518 : else
519 0 : DataDir = getenv("PGDATA");
520 :
521 : /* If no DataDir was specified, and none could be found, error out */
522 0 : if (DataDir == NULL)
523 : {
524 0 : pg_log_error("no data directory specified");
525 0 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
526 0 : exit(1);
527 : }
528 : }
529 :
530 : /* Complain if any arguments remain */
531 56 : if (optind < argc)
532 : {
533 0 : pg_log_error("too many command-line arguments (first is \"%s\")",
534 : argv[optind]);
535 0 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
536 0 : exit(1);
537 : }
538 :
539 : /* filenode checking only works in --check mode */
540 56 : if (mode != PG_MODE_CHECK && only_filenode)
541 : {
542 4 : pg_log_error("option -f/--filenode can only be used with --check");
543 4 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
544 4 : exit(1);
545 : }
546 :
547 : /* Read the control file and check compatibility */
548 52 : ControlFile = get_controlfile(DataDir, &crc_ok);
549 52 : if (!crc_ok)
550 0 : pg_fatal("pg_control CRC value is incorrect");
551 :
552 52 : if (ControlFile->pg_control_version != PG_CONTROL_VERSION)
553 0 : pg_fatal("cluster is not compatible with this version of pg_checksums");
554 :
555 52 : if (ControlFile->blcksz != BLCKSZ)
556 : {
557 0 : pg_log_error("database cluster is not compatible");
558 0 : pg_log_error_detail("The database cluster was initialized with block size %u, but pg_checksums was compiled with block size %u.",
559 : ControlFile->blcksz, BLCKSZ);
560 0 : exit(1);
561 : }
562 :
563 : /*
564 : * Check if cluster is running. A clean shutdown is required to avoid
565 : * random checksum failures caused by torn pages. Note that this doesn't
566 : * guard against someone starting the cluster concurrently.
567 : */
568 52 : if (ControlFile->state != DB_SHUTDOWNED &&
569 2 : ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY)
570 2 : pg_fatal("cluster must be shut down");
571 :
572 50 : if (ControlFile->data_checksum_version == 0 &&
573 8 : mode == PG_MODE_CHECK)
574 2 : pg_fatal("data checksums are not enabled in cluster");
575 :
576 48 : if (ControlFile->data_checksum_version == 0 &&
577 6 : mode == PG_MODE_DISABLE)
578 2 : pg_fatal("data checksums are already disabled in cluster");
579 :
580 46 : if (ControlFile->data_checksum_version > 0 &&
581 42 : mode == PG_MODE_ENABLE)
582 2 : pg_fatal("data checksums are already enabled in cluster");
583 :
584 : /* Operate on all files if checking or enabling checksums */
585 44 : if (mode == PG_MODE_CHECK || mode == PG_MODE_ENABLE)
586 : {
587 : /*
588 : * If progress status information is requested, we need to scan the
589 : * directory tree twice: once to know how much total data needs to be
590 : * processed and once to do the real work.
591 : */
592 42 : if (showprogress)
593 : {
594 0 : total_size = scan_directory(DataDir, "global", true);
595 0 : total_size += scan_directory(DataDir, "base", true);
596 0 : total_size += scan_directory(DataDir, PG_TBLSPC_DIR, true);
597 : }
598 :
599 42 : (void) scan_directory(DataDir, "global", false);
600 26 : (void) scan_directory(DataDir, "base", false);
601 26 : (void) scan_directory(DataDir, PG_TBLSPC_DIR, false);
602 :
603 26 : if (showprogress)
604 0 : progress_report(true);
605 :
606 26 : printf(_("Checksum operation completed\n"));
607 26 : printf(_("Files scanned: %lld\n"), (long long) files_scanned);
608 26 : printf(_("Blocks scanned: %lld\n"), (long long) blocks_scanned);
609 26 : if (mode == PG_MODE_CHECK)
610 : {
611 22 : printf(_("Bad checksums: %lld\n"), (long long) badblocks);
612 22 : printf(_("Data checksum version: %u\n"), ControlFile->data_checksum_version);
613 :
614 22 : if (badblocks > 0)
615 8 : exit(1);
616 : }
617 4 : else if (mode == PG_MODE_ENABLE)
618 : {
619 4 : printf(_("Files written: %lld\n"), (long long) files_written);
620 4 : printf(_("Blocks written: %lld\n"), (long long) blocks_written);
621 : }
622 : }
623 :
624 : /*
625 : * Finally make the data durable on disk if enabling or disabling
626 : * checksums. Flush first the data directory for safety, and then update
627 : * the control file to keep the switch consistent.
628 : */
629 20 : if (mode == PG_MODE_ENABLE || mode == PG_MODE_DISABLE)
630 : {
631 6 : ControlFile->data_checksum_version =
632 6 : (mode == PG_MODE_ENABLE) ? PG_DATA_CHECKSUM_VERSION : 0;
633 :
634 6 : if (do_sync)
635 : {
636 2 : pg_log_info("syncing data directory");
637 2 : sync_pgdata(DataDir, PG_VERSION_NUM, sync_method);
638 : }
639 :
640 6 : pg_log_info("updating control file");
641 6 : update_controlfile(DataDir, ControlFile, do_sync);
642 :
643 6 : if (verbose)
644 0 : printf(_("Data checksum version: %u\n"), ControlFile->data_checksum_version);
645 6 : if (mode == PG_MODE_ENABLE)
646 4 : printf(_("Checksums enabled in cluster\n"));
647 : else
648 2 : printf(_("Checksums disabled in cluster\n"));
649 : }
650 :
651 20 : return 0;
652 : }
|