Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * datachecksum_state.c
4 : * Background worker for enabling or disabling data checksums online as
5 : * well as functionality for manipulating data checksum state
6 : *
7 : * When enabling data checksums on a cluster at initdb time or when shut down
8 : * with pg_checksums, no extra process is required as each page is checksummed,
9 : * and verified, when accessed. When enabling checksums on an already running
10 : * cluster, this worker will ensure that all pages are checksummed before
11 : * verification of the checksums is turned on. In the case of disabling
12 : * checksums, the state transition is performed only in the control file, no
13 : * changes are performed on the data pages.
14 : *
15 : * Checksums can be either enabled or disabled cluster-wide, with on/off being
16 : * the end state for data_checksums.
17 : *
18 : * 1. Enabling checksums
19 : * ---------------------
20 : * When enabling checksums in an online cluster, data_checksums will be set to
21 : * "inprogress-on" which signals that write operations MUST compute and write
22 : * the checksum on the data page, but during reading the checksum SHALL NOT be
23 : * verified. This ensures that all objects created during when checksums are
24 : * being enabled will have checksums set, but reads won't fail due to missing or
25 : * invalid checksums. Invalid checksums can be present in case the cluster had
26 : * checksums enabled, then disabled them and updated the page while they were
27 : * disabled.
28 : *
29 : * The DataChecksumsWorker will compile a list of all databases at the start,
30 : * any databases created concurrently will see the in-progress state and will
31 : * be checksummed automatically. All databases from the original list MUST BE
32 : * successfully processed in order for data checksums to be enabled, the only
33 : * exception are databases which are dropped before having been processed.
34 : *
35 : * For each database, all relations which have storage are read and every data
36 : * page is marked dirty to force a write with the checksum. This will generate
37 : * a lot of WAL as the entire database is read and written.
38 : *
39 : * If the processing is interrupted by a cluster crash or restart, it needs to
40 : * be restarted from the beginning again as state isn't persisted.
41 : *
42 : * 2. Disabling checksums
43 : * ----------------------
44 : * When disabling checksums, data_checksums will be set to "inprogress-off"
45 : * which signals that checksums are written but no longer need to be verified.
46 : * This ensures that backends which have not yet transitioned to the
47 : * "inprogress-off" state will still see valid checksums on pages.
48 : *
49 : * 3. Synchronization and Correctness
50 : * ----------------------------------
51 : * The processes involved in enabling or disabling data checksums in an
52 : * online cluster must be properly synchronized with the normal backends
53 : * serving concurrent queries to ensure correctness. Correctness is defined
54 : * as the following:
55 : *
56 : * - Backends SHALL NOT violate the data_checksums state they have agreed to
57 : * by acknowledging the procsignalbarrier: This means that all backends
58 : * MUST calculate and write data checksums during all states except off;
59 : * MUST validate checksums only in the 'on' state.
60 : * - Data checksums SHALL NOT be considered enabled cluster-wide until all
61 : * currently connected backends have state "on": This means that all
62 : * backends must wait on the procsignalbarrier to be acknowledged by all
63 : * before proceeding to validate data checksums.
64 : *
65 : * There are two steps of synchronization required for changing data_checksums
66 : * in an online cluster: (i) changing state in the active backends ("on",
67 : * "off", "inprogress-on" and "inprogress-off"), and (ii) ensuring no
68 : * incompatible objects and processes are left in a database when workers end.
69 : * The former deals with cluster-wide agreement on data checksum state and the
70 : * latter with ensuring that any concurrent activity cannot break the data
71 : * checksum contract during processing.
72 : *
73 : * Synchronizing the state change is done with procsignal barriers. Before
74 : * updating the data_checksums state in the control file, all other backends must absorb the
75 : * barrier. Barrier absorption will happen during interrupt processing, which
76 : * means that connected backends will change state at different times. If
77 : * waiting for a barrier is done during startup, for example during replay, it
78 : * is important to realize that any locks held by the startup process might
79 : * cause deadlocks if backends end up waiting for those locks while startup
80 : * is waiting for a procsignalbarrier.
81 : *
82 : * 3.1 When Enabling Data Checksums
83 : * --------------------------------
84 : * A process which fails to observe data checksums being enabled can induce two
85 : * types of errors: failing to write the checksum when modifying the page and
86 : * failing to validate the data checksum on the page when reading it.
87 : *
88 : * When processing starts all backends belong to one of the below sets, with
89 : * one if Bd and Bi being empty:
90 : *
91 : * Bg: Backend updating the global state and emitting the procsignalbarrier
92 : * Bd: Backends in "off" state
93 : * Bi: Backends in "inprogress-on" state
94 : *
95 : * If processing is started in an online cluster then all backends are in Bd.
96 : * If processing was halted by the cluster shutting down (due to a crash or
97 : * intentional restart), the controlfile state "inprogress-on" will be observed
98 : * on system startup and all backends will be placed in Bd. The controlfile
99 : * state will also be set to "off".
100 : *
101 : * Backends transition Bd -> Bi via a procsignalbarrier which is emitted by the
102 : * DataChecksumsLauncher. When all backends have acknowledged the barrier then
103 : * Bd will be empty and the next phase can begin: calculating and writing data
104 : * checksums with DataChecksumsWorkers. When the DataChecksumsWorker processes
105 : * have finished writing checksums on all pages, data checksums are enabled
106 : * cluster-wide via another procsignalbarrier. There are four sets of backends
107 : * where Bd shall be an empty set:
108 : *
109 : * Bg: Backend updating the global state and emitting the procsignalbarrier
110 : * Bd: Backends in "off" state
111 : * Be: Backends in "on" state
112 : * Bi: Backends in "inprogress-on" state
113 : *
114 : * Backends in Bi and Be will write checksums when modifying a page, but only
115 : * backends in Be will verify the checksum during reading. The Bg backend is
116 : * blocked waiting for all backends in Bi to process interrupts and move to
117 : * Be. Any backend starting while Bg is waiting on the procsignalbarrier will
118 : * observe the global state being "on" and will thus automatically belong to
119 : * Be. Checksums are enabled cluster-wide when Bi is an empty set. Bi and Be
120 : * are compatible sets while still operating based on their local state as
121 : * both write data checksums.
122 : *
123 : * 3.2 When Disabling Data Checksums
124 : * ---------------------------------
125 : * A process which fails to observe that data checksums have been disabled
126 : * can induce two types of errors: writing the checksum when modifying the
127 : * page and validating a data checksum which is no longer correct due to
128 : * modifications to the page. The former is not an error per se as data
129 : * integrity is maintained, but it is wasteful. The latter will cause errors
130 : * in user operations. Assuming the following sets of backends:
131 : *
132 : * Bg: Backend updating the global state and emitting the procsignalbarrier
133 : * Bd: Backends in "off" state
134 : * Be: Backends in "on" state
135 : * Bo: Backends in "inprogress-off" state
136 : * Bi: Backends in "inprogress-on" state
137 : *
138 : * Backends transition from the Be state to Bd like so: Be -> Bo -> Bd. From
139 : * all other states, the transition can be straight to Bd.
140 : *
141 : * The goal is to transition all backends to Bd making the others empty sets.
142 : * Backends in Bo write data checksums, but don't validate them, such that
143 : * backends still in Be can continue to validate pages until the barrier has
144 : * been absorbed such that they are in Bo. Once all backends are in Bo, the
145 : * barrier to transition to "off" can be raised and all backends can safely
146 : * stop writing data checksums as no backend is enforcing data checksum
147 : * validation any longer.
148 : *
149 : * 4. Future opportunities for optimizations
150 : * -----------------------------------------
151 : * Below are some potential optimizations and improvements which were brought
152 : * up during reviews of this feature, but which weren't implemented in the
153 : * initial version. These are ideas listed without any validation on their
154 : * feasibility or potential payoff. More discussion on (most of) these can be
155 : * found on the -hackers threads linked to in the commit message of this
156 : * feature.
157 : *
158 : * * Launching datachecksumsworker for resuming operation from the startup
159 : * process: Currently users have to restart processing manually after a
160 : * restart since dynamic background worker cannot be started from the
161 : * postmaster. Changing the startup process could make restarting the
162 : * processing automatic on cluster restart.
163 : * * Avoid dirtying the page when checksums already match: Iff the checksum
164 : * on the page happens to already match we still dirty the page. It should
165 : * be enough to only do the log_newpage_buffer() call in that case.
166 : * * Teach pg_checksums to avoid checksummed pages when pg_checksums is used
167 : * to enable checksums on a cluster which is in inprogress-on state and
168 : * may have checksummed pages (make pg_checksums be able to resume an
169 : * online operation). This should only be attempted for wal_level minimal.
170 : * * Restartability (not necessarily with page granularity).
171 : * * Avoid processing databases which were created during inprogress-on.
172 : * Right now all databases are processed regardless to be safe.
173 : * * Teach CREATE DATABASE to calculate checksums for databases created
174 : * during inprogress-on with a template database which has yet to be
175 : * processed.
176 : *
177 : *
178 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
179 : * Portions Copyright (c) 1994, Regents of the University of California
180 : *
181 : *
182 : * IDENTIFICATION
183 : * src/backend/postmaster/datachecksum_state.c
184 : *
185 : *-------------------------------------------------------------------------
186 : */
187 : #include "postgres.h"
188 :
189 : #include "access/genam.h"
190 : #include "access/heapam.h"
191 : #include "access/htup_details.h"
192 : #include "access/xact.h"
193 : #include "access/xlog.h"
194 : #include "access/xloginsert.h"
195 : #include "catalog/indexing.h"
196 : #include "catalog/pg_class.h"
197 : #include "catalog/pg_database.h"
198 : #include "commands/progress.h"
199 : #include "commands/vacuum.h"
200 : #include "common/relpath.h"
201 : #include "miscadmin.h"
202 : #include "pgstat.h"
203 : #include "postmaster/bgworker.h"
204 : #include "postmaster/bgwriter.h"
205 : #include "postmaster/datachecksum_state.h"
206 : #include "storage/bufmgr.h"
207 : #include "storage/checksum.h"
208 : #include "storage/ipc.h"
209 : #include "storage/latch.h"
210 : #include "storage/lmgr.h"
211 : #include "storage/lwlock.h"
212 : #include "storage/procarray.h"
213 : #include "storage/smgr.h"
214 : #include "storage/subsystems.h"
215 : #include "tcop/tcopprot.h"
216 : #include "utils/builtins.h"
217 : #include "utils/fmgroids.h"
218 : #include "utils/injection_point.h"
219 : #include "utils/lsyscache.h"
220 : #include "utils/ps_status.h"
221 : #include "utils/syscache.h"
222 : #include "utils/wait_event.h"
223 :
224 : /*
225 : * Configuration of conditions which must match when absorbing a procsignal
226 : * barrier during data checksum enable/disable operations. A single function
227 : * is used for absorbing all barriers, and the current and target states must
228 : * be defined as a from/to tuple in the checksum_barriers struct.
229 : */
230 : typedef struct ChecksumBarrierCondition
231 : {
232 : /* Current state of data checksums */
233 : int from;
234 : /* Target state for data checksums */
235 : int to;
236 : } ChecksumBarrierCondition;
237 :
238 : static const ChecksumBarrierCondition checksum_barriers[6] =
239 : {
240 : /*
241 : * Disabling checksums: If checksums are currently enabled, disabling must
242 : * go through the 'inprogress-off' state.
243 : */
244 : {PG_DATA_CHECKSUM_VERSION, PG_DATA_CHECKSUM_INPROGRESS_OFF},
245 : {PG_DATA_CHECKSUM_INPROGRESS_OFF, PG_DATA_CHECKSUM_OFF},
246 :
247 : /*
248 : * If checksums are in the process of being enabled, but are not yet being
249 : * verified, we can abort by going back to 'off' state.
250 : */
251 : {PG_DATA_CHECKSUM_INPROGRESS_ON, PG_DATA_CHECKSUM_OFF},
252 :
253 : /*
254 : * Enabling checksums must normally go through the 'inprogress-on' state.
255 : */
256 : {PG_DATA_CHECKSUM_OFF, PG_DATA_CHECKSUM_INPROGRESS_ON},
257 : {PG_DATA_CHECKSUM_INPROGRESS_ON, PG_DATA_CHECKSUM_VERSION},
258 :
259 : /*
260 : * If checksums are being disabled but all backends are still computing
261 : * checksums, we can go straight back to 'on'
262 : */
263 : {PG_DATA_CHECKSUM_INPROGRESS_OFF, PG_DATA_CHECKSUM_VERSION},
264 : };
265 :
266 : /*
267 : * Signaling between backends calling pg_enable/disable_data_checksums, the
268 : * checksums launcher process, and the checksums worker process.
269 : *
270 : * This struct is protected by DataChecksumsWorkerLock
271 : */
272 : typedef struct DataChecksumsStateStruct
273 : {
274 : /*
275 : * These are set by pg_{enable|disable}_data_checksums, to tell the
276 : * launcher what the target state is.
277 : */
278 : DataChecksumsWorkerOperation launch_operation;
279 : int launch_cost_delay;
280 : int launch_cost_limit;
281 :
282 : /*
283 : * Is a launcher process is currently running? This is set by the main
284 : * launcher process, after it has read the above launch_* parameters.
285 : */
286 : bool launcher_running;
287 :
288 : /*
289 : * Is a worker process currently running? This is set by the worker
290 : * launcher when it starts waiting for a worker process to finish.
291 : */
292 : int worker_pid;
293 :
294 : /*
295 : * These fields indicate the target state that the launcher is currently
296 : * working towards. They can be different from the corresponding launch_*
297 : * fields, if a new pg_enable/disable_data_checksums() call was made while
298 : * the launcher/worker was already running.
299 : *
300 : * The below members are set when the launcher starts, and are only
301 : * accessed read-only by the single worker. Thus, we can access these
302 : * without a lock. If multiple workers, or dynamic cost parameters, are
303 : * supported at some point then this would need to be revisited.
304 : */
305 : DataChecksumsWorkerOperation operation;
306 : int cost_delay;
307 : int cost_limit;
308 :
309 : /*
310 : * Signaling between the launcher and the worker process.
311 : *
312 : * As there is only a single worker, and the launcher won't read these
313 : * until the worker exits, they can be accessed without the need for a
314 : * lock. If multiple workers are supported then this will have to be
315 : * revisited.
316 : */
317 :
318 : /* result, set by worker before exiting */
319 : DataChecksumsWorkerResult success;
320 :
321 : /*
322 : * tells the worker process whether it should also process the shared
323 : * catalogs
324 : */
325 : bool process_shared_catalogs;
326 : } DataChecksumsStateStruct;
327 :
328 : /* Shared memory segment for datachecksumsworker */
329 : static DataChecksumsStateStruct *DataChecksumState;
330 :
331 : typedef struct DataChecksumsWorkerDatabase
332 : {
333 : Oid dboid;
334 : char *dbname;
335 : } DataChecksumsWorkerDatabase;
336 :
337 : /* Flag set by the interrupt handler */
338 : static volatile sig_atomic_t abort_requested = false;
339 :
340 : /*
341 : * Have we set the DataChecksumsStateStruct->launcher_running flag?
342 : * If we have, we need to clear it before exiting!
343 : */
344 : static volatile sig_atomic_t launcher_running = false;
345 :
346 : /* Are we enabling data checksums, or disabling them? */
347 : static DataChecksumsWorkerOperation operation;
348 :
349 : /* Prototypes */
350 : static void DataChecksumsShmemRequest(void *arg);
351 : static bool DatabaseExists(Oid dboid);
352 : static List *BuildDatabaseList(void);
353 : static List *BuildRelationList(bool temp_relations, bool include_shared);
354 : static void FreeDatabaseList(List *dblist);
355 : static DataChecksumsWorkerResult ProcessDatabase(DataChecksumsWorkerDatabase *db);
356 : static bool ProcessAllDatabases(void);
357 : static bool ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrategy strategy);
358 : static void launcher_cancel_handler(SIGNAL_ARGS);
359 : static void WaitForAllTransactionsToFinish(void);
360 :
361 : const ShmemCallbacks DataChecksumsShmemCallbacks = {
362 : .request_fn = DataChecksumsShmemRequest,
363 : };
364 :
365 : /*****************************************************************************
366 : * Functionality for manipulating the data checksum state in the cluster
367 : */
368 :
369 : void
370 10 : EmitAndWaitDataChecksumsBarrier(uint32 state)
371 : {
372 : uint64 barrier;
373 :
374 10 : switch (state)
375 : {
376 1 : case PG_DATA_CHECKSUM_INPROGRESS_ON:
377 1 : barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON);
378 1 : WaitForProcSignalBarrier(barrier);
379 1 : break;
380 :
381 3 : case PG_DATA_CHECKSUM_INPROGRESS_OFF:
382 3 : barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF);
383 3 : WaitForProcSignalBarrier(barrier);
384 3 : break;
385 :
386 3 : case PG_DATA_CHECKSUM_VERSION:
387 3 : barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_ON);
388 3 : WaitForProcSignalBarrier(barrier);
389 3 : break;
390 :
391 3 : case PG_DATA_CHECKSUM_OFF:
392 3 : barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_OFF);
393 3 : WaitForProcSignalBarrier(barrier);
394 3 : break;
395 :
396 10 : default:
397 : Assert(false);
398 : }
399 10 : }
400 :
401 : /*
402 : * AbsorbDataChecksumsBarrier
403 : * Generic function for absorbing data checksum state changes
404 : *
405 : * All procsignalbarriers regarding data checksum state changes are absorbed
406 : * with this function. The set of conditions required for the state change to
407 : * be accepted are listed in the checksum_barriers struct, target_state is
408 : * used to look up the relevant entry.
409 : */
410 : bool
411 288 : AbsorbDataChecksumsBarrier(ProcSignalBarrierType barrier)
412 : {
413 : uint32 target_state;
414 288 : int current = data_checksums;
415 288 : bool found = false;
416 :
417 : /*
418 : * Translate the barrier condition to the target state, doing it here
419 : * instead of in the procsignal code saves the latter from knowing about
420 : * checksum states.
421 : */
422 288 : switch (barrier)
423 : {
424 81 : case PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON:
425 81 : target_state = PG_DATA_CHECKSUM_INPROGRESS_ON;
426 81 : break;
427 76 : case PROCSIGNAL_BARRIER_CHECKSUM_ON:
428 76 : target_state = PG_DATA_CHECKSUM_VERSION;
429 76 : break;
430 58 : case PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF:
431 58 : target_state = PG_DATA_CHECKSUM_INPROGRESS_OFF;
432 58 : break;
433 73 : case PROCSIGNAL_BARRIER_CHECKSUM_OFF:
434 73 : target_state = PG_DATA_CHECKSUM_OFF;
435 73 : break;
436 0 : default:
437 0 : elog(ERROR, "incorrect barrier \"%i\" received", barrier);
438 : }
439 :
440 : /*
441 : * If the target state matches the current state then the barrier has been
442 : * repeated.
443 : */
444 288 : if (current == target_state)
445 42 : return true;
446 :
447 : /*
448 : * If the cluster is in recovery we skip the validation of current state
449 : * since the replay is trusted.
450 : */
451 246 : if (RecoveryInProgress())
452 : {
453 28 : SetLocalDataChecksumState(target_state);
454 28 : return true;
455 : }
456 :
457 : /*
458 : * Find the barrier condition definition for the target state. Not finding
459 : * a condition would be a grave programmer error as the states are a
460 : * discrete set.
461 : */
462 945 : for (int i = 0; i < lengthof(checksum_barriers) && !found; i++)
463 : {
464 727 : if (checksum_barriers[i].from == current && checksum_barriers[i].to == target_state)
465 218 : found = true;
466 : }
467 :
468 : /*
469 : * If the relevant state criteria aren't satisfied, throw an error which
470 : * will be caught by the procsignal machinery for a later retry.
471 : */
472 218 : if (!found)
473 0 : ereport(ERROR,
474 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
475 : errmsg("incorrect data checksum state %i for target state %i",
476 : current, target_state));
477 :
478 218 : SetLocalDataChecksumState(target_state);
479 218 : return true;
480 : }
481 :
482 :
483 : /*
484 : * Disables data checksums for the cluster, if applicable. Starts a background
485 : * worker which turns off the data checksums.
486 : */
487 : Datum
488 6 : disable_data_checksums(PG_FUNCTION_ARGS)
489 : {
490 6 : if (!superuser())
491 0 : ereport(ERROR,
492 : errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
493 : errmsg("must be superuser to change data checksum state"));
494 :
495 6 : StartDataChecksumsWorkerLauncher(DISABLE_DATACHECKSUMS, 0, 0);
496 6 : PG_RETURN_VOID();
497 : }
498 :
499 : /*
500 : * Enables data checksums for the cluster, if applicable. Supports vacuum-
501 : * like cost based throttling to limit system load. Starts a background worker
502 : * which updates data checksums on existing data.
503 : */
504 : Datum
505 9 : enable_data_checksums(PG_FUNCTION_ARGS)
506 : {
507 9 : int cost_delay = PG_GETARG_INT32(0);
508 9 : int cost_limit = PG_GETARG_INT32(1);
509 :
510 9 : if (!superuser())
511 0 : ereport(ERROR,
512 : errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
513 : errmsg("must be superuser to change data checksum state"));
514 :
515 9 : if (cost_delay < 0)
516 0 : ereport(ERROR,
517 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
518 : errmsg("cost delay cannot be a negative value"));
519 :
520 9 : if (cost_limit <= 0)
521 0 : ereport(ERROR,
522 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
523 : errmsg("cost limit must be greater than zero"));
524 :
525 9 : StartDataChecksumsWorkerLauncher(ENABLE_DATACHECKSUMS, cost_delay, cost_limit);
526 :
527 9 : PG_RETURN_VOID();
528 : }
529 :
530 :
531 : /*****************************************************************************
532 : * Functionality for running the datachecksumsworker and associated launcher
533 : */
534 :
535 : /*
536 : * StartDataChecksumsWorkerLauncher
537 : * Main entry point for datachecksumsworker launcher process
538 : *
539 : * The main entrypoint for starting data checksums processing for enabling as
540 : * well as disabling.
541 : */
542 : void
543 15 : StartDataChecksumsWorkerLauncher(DataChecksumsWorkerOperation op,
544 : int cost_delay,
545 : int cost_limit)
546 : {
547 : BackgroundWorker bgw;
548 : BackgroundWorkerHandle *bgw_handle;
549 : bool launcher_running;
550 : DataChecksumsWorkerOperation launcher_running_op;
551 :
552 : #ifdef USE_ASSERT_CHECKING
553 : /* The cost delay settings have no effect when disabling */
554 : if (op == DISABLE_DATACHECKSUMS)
555 : Assert(cost_delay == 0 && cost_limit == 0);
556 : #endif
557 :
558 15 : INJECTION_POINT("datachecksumsworker-startup-delay", NULL);
559 :
560 : /* Store the desired state in shared memory */
561 15 : LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE);
562 :
563 15 : DataChecksumState->launch_operation = op;
564 15 : DataChecksumState->launch_cost_delay = cost_delay;
565 15 : DataChecksumState->launch_cost_limit = cost_limit;
566 :
567 : /* Is the launcher already running? If so, what is it doing? */
568 15 : launcher_running = DataChecksumState->launcher_running;
569 15 : if (launcher_running)
570 0 : launcher_running_op = DataChecksumState->operation;
571 :
572 15 : LWLockRelease(DataChecksumsWorkerLock);
573 :
574 : /*
575 : * Launch a new launcher process, if it's not running already.
576 : *
577 : * If the launcher is currently busy enabling the checksums, and we want
578 : * them disabled (or vice versa), the launcher will notice that at latest
579 : * when it's about to exit, and will loop back process the new request. So
580 : * if the launcher is already running, we don't need to do anything more
581 : * here to abort it.
582 : *
583 : * If you call pg_enable/disable_data_checksums() twice in a row, before
584 : * the launcher has had a chance to start up, we still end up launching it
585 : * twice. That's OK, the second invocation will see that a launcher is
586 : * already running and exit quickly.
587 : *
588 : * TODO: We could optimize here and skip launching the launcher, if we are
589 : * already in the desired state, i.e. if the checksums are already enabled
590 : * and you call pg_enable_data_checksums().
591 : */
592 15 : if (!launcher_running)
593 : {
594 : /*
595 : * Prepare the BackgroundWorker and launch it.
596 : */
597 15 : memset(&bgw, 0, sizeof(bgw));
598 15 : bgw.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
599 15 : bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
600 15 : snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
601 15 : snprintf(bgw.bgw_function_name, BGW_MAXLEN, "DataChecksumsWorkerLauncherMain");
602 15 : snprintf(bgw.bgw_name, BGW_MAXLEN, "datachecksum launcher");
603 15 : snprintf(bgw.bgw_type, BGW_MAXLEN, "datachecksum launcher");
604 15 : bgw.bgw_restart_time = BGW_NEVER_RESTART;
605 15 : bgw.bgw_notify_pid = MyProcPid;
606 15 : bgw.bgw_main_arg = (Datum) 0;
607 :
608 15 : if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle))
609 0 : ereport(ERROR,
610 : errcode(ERRCODE_INSUFFICIENT_RESOURCES),
611 : errmsg("failed to start background worker to process data checksums"));
612 : }
613 : else
614 : {
615 0 : if (launcher_running_op == op)
616 0 : ereport(ERROR,
617 : errmsg("data checksum processing already running"));
618 : }
619 15 : }
620 :
621 : /*
622 : * ProcessSingleRelationFork
623 : * Enable data checksums in a single relation/fork.
624 : *
625 : * Returns true if successful, and false if *aborted*. On error, an actual
626 : * error is raised in the lower levels.
627 : */
628 : static bool
629 6820 : ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrategy strategy)
630 : {
631 6820 : BlockNumber numblocks = RelationGetNumberOfBlocksInFork(reln, forkNum);
632 : char activity[NAMEDATALEN * 2 + 128];
633 : char *relns;
634 :
635 6820 : relns = get_namespace_name(RelationGetNamespace(reln));
636 :
637 : /* Report the current relation to pgstat_activity */
638 6820 : snprintf(activity, sizeof(activity) - 1, "processing: %s.%s (%s, %u blocks)",
639 6820 : (relns ? relns : ""), RelationGetRelationName(reln), forkNames[forkNum], numblocks);
640 6820 : pgstat_report_activity(STATE_RUNNING, activity);
641 6820 : pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_BLOCKS_TOTAL, numblocks);
642 6820 : if (relns)
643 6820 : pfree(relns);
644 :
645 : /*
646 : * We are looping over the blocks which existed at the time of process
647 : * start, which is safe since new blocks are created with checksums set
648 : * already due to the state being "inprogress-on".
649 : */
650 44773 : for (BlockNumber blknum = 0; blknum < numblocks; blknum++)
651 : {
652 37953 : Buffer buf = ReadBufferExtended(reln, forkNum, blknum, RBM_NORMAL, strategy);
653 :
654 : /* Need to get an exclusive lock to mark the buffer as dirty */
655 37953 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
656 :
657 : /*
658 : * Mark the buffer as dirty and force a full page write. We have to
659 : * re-write the page to WAL even if the checksum hasn't changed,
660 : * because if there is a replica it might have a slightly different
661 : * version of the page with an invalid checksum, caused by unlogged
662 : * changes (e.g. hintbits) on the primary happening while checksums
663 : * were off. This can happen if there was a valid checksum on the page
664 : * at one point in the past, so only when checksums are first on, then
665 : * off, and then turned on again. TODO: investigate if this could be
666 : * avoided if the checksum is calculated to be correct and wal_level
667 : * is set to "minimal",
668 : */
669 37953 : START_CRIT_SECTION();
670 37953 : MarkBufferDirty(buf);
671 37953 : log_newpage_buffer(buf, false);
672 37953 : END_CRIT_SECTION();
673 :
674 37953 : UnlockReleaseBuffer(buf);
675 :
676 : /*
677 : * This is the only place where we check if we are asked to abort, the
678 : * abortion will bubble up from here.
679 : */
680 : Assert(operation == ENABLE_DATACHECKSUMS);
681 37953 : LWLockAcquire(DataChecksumsWorkerLock, LW_SHARED);
682 37953 : if (DataChecksumState->launch_operation == DISABLE_DATACHECKSUMS)
683 0 : abort_requested = true;
684 37953 : LWLockRelease(DataChecksumsWorkerLock);
685 :
686 37953 : if (abort_requested)
687 0 : return false;
688 :
689 : /* update the block counter */
690 37953 : pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_BLOCKS_DONE,
691 37953 : (blknum + 1));
692 :
693 : /*
694 : * Processing is re-using the vacuum cost delay for process
695 : * throttling, hence why we call vacuum APIs here.
696 : */
697 37953 : vacuum_delay_point(false);
698 : }
699 :
700 6820 : return true;
701 : }
702 :
703 : /*
704 : * ProcessSingleRelationByOid
705 : * Process a single relation based on oid.
706 : *
707 : * Returns true if successful, and false if *aborted*. On error, an actual
708 : * error is raised in the lower levels.
709 : */
710 : static bool
711 5255 : ProcessSingleRelationByOid(Oid relationId, BufferAccessStrategy strategy)
712 : {
713 : Relation rel;
714 5255 : bool aborted = false;
715 :
716 5255 : StartTransactionCommand();
717 :
718 5255 : rel = try_relation_open(relationId, AccessShareLock);
719 5255 : if (rel == NULL)
720 : {
721 : /*
722 : * Relation no longer exists. We don't consider this an error since
723 : * there are no pages in it that need data checksums, and thus return
724 : * true. The worker operates off a list of relations generated at the
725 : * start of processing, so relations being dropped in the meantime is
726 : * to be expected.
727 : */
728 0 : CommitTransactionCommand();
729 0 : pgstat_report_activity(STATE_IDLE, NULL);
730 0 : return true;
731 : }
732 5255 : RelationGetSmgr(rel);
733 :
734 26275 : for (ForkNumber fnum = 0; fnum <= MAX_FORKNUM; fnum++)
735 : {
736 21020 : if (smgrexists(rel->rd_smgr, fnum))
737 : {
738 6820 : if (!ProcessSingleRelationFork(rel, fnum, strategy))
739 : {
740 0 : aborted = true;
741 0 : break;
742 : }
743 : }
744 : }
745 5255 : relation_close(rel, AccessShareLock);
746 :
747 5255 : CommitTransactionCommand();
748 5255 : pgstat_report_activity(STATE_IDLE, NULL);
749 :
750 5255 : return !aborted;
751 : }
752 :
753 : /*
754 : * ProcessDatabase
755 : * Enable data checksums in a single database.
756 : *
757 : * We do this by launching a dynamic background worker into this database, and
758 : * waiting for it to finish. We have to do this in a separate worker, since
759 : * each process can only be connected to one database during its lifetime.
760 : */
761 : static DataChecksumsWorkerResult
762 20 : ProcessDatabase(DataChecksumsWorkerDatabase *db)
763 : {
764 : BackgroundWorker bgw;
765 : BackgroundWorkerHandle *bgw_handle;
766 : BgwHandleStatus status;
767 : pid_t pid;
768 : char activity[NAMEDATALEN + 64];
769 :
770 20 : DataChecksumState->success = DATACHECKSUMSWORKER_FAILED;
771 :
772 20 : memset(&bgw, 0, sizeof(bgw));
773 20 : bgw.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
774 20 : bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
775 20 : snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
776 20 : snprintf(bgw.bgw_function_name, BGW_MAXLEN, "%s", "DataChecksumsWorkerMain");
777 20 : snprintf(bgw.bgw_name, BGW_MAXLEN, "datachecksum worker");
778 20 : snprintf(bgw.bgw_type, BGW_MAXLEN, "datachecksum worker");
779 20 : bgw.bgw_restart_time = BGW_NEVER_RESTART;
780 20 : bgw.bgw_notify_pid = MyProcPid;
781 20 : bgw.bgw_main_arg = ObjectIdGetDatum(db->dboid);
782 :
783 : /*
784 : * If there are no worker slots available, there is little we can do. If
785 : * we retry in a bit it's still unlikely that the user has managed to
786 : * reconfigure in the meantime and we'd be run through retries fast.
787 : */
788 20 : if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle))
789 : {
790 0 : ereport(WARNING,
791 : errmsg("could not start background worker for enabling data checksums in database \"%s\"",
792 : db->dbname),
793 : errhint("The \"%s\" setting might be too low.", "max_worker_processes"));
794 0 : return DATACHECKSUMSWORKER_FAILED;
795 : }
796 :
797 20 : status = WaitForBackgroundWorkerStartup(bgw_handle, &pid);
798 20 : if (status == BGWH_STOPPED)
799 : {
800 : /*
801 : * If the worker managed to start, and stop, before we got to waiting
802 : * for it we can se a STOPPED status here without it being a failure.
803 : */
804 0 : if (DataChecksumState->success == DATACHECKSUMSWORKER_SUCCESSFUL)
805 : {
806 0 : pgstat_report_activity(STATE_IDLE, NULL);
807 0 : LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE);
808 0 : DataChecksumState->worker_pid = InvalidPid;
809 0 : LWLockRelease(DataChecksumsWorkerLock);
810 0 : return DataChecksumState->success;
811 : }
812 :
813 0 : ereport(WARNING,
814 : errmsg("could not start background worker for enabling data checksums in database \"%s\"",
815 : db->dbname),
816 : errhint("More details on the error might be found in the server log."));
817 :
818 : /*
819 : * Heuristic to see if the database was dropped, and if it was we can
820 : * treat it as not an error, else treat as fatal and error out. TODO:
821 : * this could probably be improved with a tighter check.
822 : */
823 0 : if (DatabaseExists(db->dboid))
824 0 : return DATACHECKSUMSWORKER_FAILED;
825 : else
826 0 : return DATACHECKSUMSWORKER_DROPDB;
827 : }
828 :
829 : /*
830 : * If the postmaster crashed we cannot end up with a processed database so
831 : * we have no alternative other than exiting. When enabling checksums we
832 : * won't at this time have changed the data checksums state in pg_control
833 : * to enabled so when the cluster comes back up processing will have to be
834 : * restarted.
835 : */
836 20 : if (status == BGWH_POSTMASTER_DIED)
837 0 : ereport(FATAL,
838 : errcode(ERRCODE_ADMIN_SHUTDOWN),
839 : errmsg("cannot enable data checksums without the postmaster process"),
840 : errhint("Restart the database and restart data checksum processing by calling pg_enable_data_checksums()."));
841 :
842 : Assert(status == BGWH_STARTED);
843 20 : ereport(LOG,
844 : errmsg("initiating data checksum processing in database \"%s\"",
845 : db->dbname));
846 :
847 : /* Save the pid of the worker so we can signal it later */
848 20 : LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE);
849 20 : DataChecksumState->worker_pid = pid;
850 20 : LWLockRelease(DataChecksumsWorkerLock);
851 :
852 20 : snprintf(activity, sizeof(activity) - 1,
853 : "Waiting for worker in database %s (pid %ld)", db->dbname, (long) pid);
854 20 : pgstat_report_activity(STATE_RUNNING, activity);
855 :
856 20 : status = WaitForBackgroundWorkerShutdown(bgw_handle);
857 19 : if (status == BGWH_POSTMASTER_DIED)
858 0 : ereport(FATAL,
859 : errcode(ERRCODE_ADMIN_SHUTDOWN),
860 : errmsg("postmaster exited during data checksum processing in \"%s\"",
861 : db->dbname),
862 : errhint("Restart the database and restart data checksum processing by calling pg_enable_data_checksums()."));
863 :
864 19 : if (DataChecksumState->success == DATACHECKSUMSWORKER_ABORTED)
865 0 : ereport(LOG,
866 : errmsg("data checksums processing was aborted in database \"%s\"",
867 : db->dbname));
868 :
869 19 : pgstat_report_activity(STATE_IDLE, NULL);
870 19 : LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE);
871 19 : DataChecksumState->worker_pid = InvalidPid;
872 19 : LWLockRelease(DataChecksumsWorkerLock);
873 :
874 19 : return DataChecksumState->success;
875 : }
876 :
877 : /*
878 : * launcher_exit
879 : *
880 : * Internal routine for cleaning up state when the launcher process exits. We
881 : * need to clean up the abort flag to ensure that processing started again if
882 : * it was previously aborted (note: started again, *not* restarted from where
883 : * it left off).
884 : */
885 : static void
886 15 : launcher_exit(int code, Datum arg)
887 : {
888 15 : abort_requested = false;
889 :
890 15 : if (launcher_running)
891 : {
892 2 : LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE);
893 2 : if (DataChecksumState->worker_pid != InvalidPid)
894 : {
895 1 : ereport(LOG,
896 : errmsg("data checksums launcher exiting while worker is still running, signalling worker"));
897 1 : kill(DataChecksumState->worker_pid, SIGTERM);
898 : }
899 2 : LWLockRelease(DataChecksumsWorkerLock);
900 : }
901 :
902 : /*
903 : * If the launcher is exiting before data checksums are enabled then set
904 : * the state to off since processing cannot be resumed.
905 : */
906 15 : if (DataChecksumsInProgressOn())
907 1 : SetDataChecksumsOff();
908 :
909 15 : LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE);
910 15 : launcher_running = false;
911 15 : DataChecksumState->launcher_running = false;
912 15 : LWLockRelease(DataChecksumsWorkerLock);
913 15 : }
914 :
915 : /*
916 : * launcher_cancel_handler
917 : *
918 : * Internal routine for reacting to SIGINT and flagging the worker to abort.
919 : * The worker won't be interrupted immediately but will check for abort flag
920 : * between each block in a relation.
921 : */
922 : static void
923 0 : launcher_cancel_handler(SIGNAL_ARGS)
924 : {
925 0 : int save_errno = errno;
926 :
927 0 : abort_requested = true;
928 :
929 : /*
930 : * There is no sleeping in the main loop, the flag will be checked
931 : * periodically in ProcessSingleRelationFork. The worker does however
932 : * sleep when waiting for concurrent transactions to end so we still need
933 : * to set the latch.
934 : */
935 0 : SetLatch(MyLatch);
936 :
937 0 : errno = save_errno;
938 0 : }
939 :
940 : /*
941 : * WaitForAllTransactionsToFinish
942 : * Blocks awaiting all current transactions to finish
943 : *
944 : * Returns when all transactions which are active at the call of the function
945 : * have ended, or if the postmaster dies while waiting. If the postmaster dies
946 : * the abort flag will be set to indicate that the caller of this shouldn't
947 : * proceed.
948 : *
949 : * NB: this will return early, if aborted by SIGINT or if the target state
950 : * is changed while we're running.
951 : */
952 : static void
953 8 : WaitForAllTransactionsToFinish(void)
954 : {
955 : TransactionId waitforxid;
956 :
957 8 : LWLockAcquire(XidGenLock, LW_SHARED);
958 8 : waitforxid = XidFromFullTransactionId(TransamVariables->nextXid);
959 8 : LWLockRelease(XidGenLock);
960 :
961 8 : while (TransactionIdPrecedes(GetOldestActiveTransactionId(false, true), waitforxid))
962 : {
963 : char activity[64];
964 : int rc;
965 :
966 : /* Oldest running xid is older than us, so wait */
967 0 : snprintf(activity,
968 : sizeof(activity),
969 : "Waiting for current transactions to finish (waiting for %u)",
970 : waitforxid);
971 0 : pgstat_report_activity(STATE_RUNNING, activity);
972 :
973 : /* Retry every 3 seconds */
974 0 : ResetLatch(MyLatch);
975 0 : rc = WaitLatch(MyLatch,
976 : WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
977 : 3000,
978 : WAIT_EVENT_CHECKSUM_ENABLE_STARTCONDITION);
979 :
980 : /*
981 : * If the postmaster died we won't be able to enable checksums
982 : * cluster-wide so abort and hope to continue when restarted.
983 : */
984 0 : if (rc & WL_POSTMASTER_DEATH)
985 0 : ereport(FATAL,
986 : errcode(ERRCODE_ADMIN_SHUTDOWN),
987 : errmsg("postmaster exited during data checksums processing"),
988 : errhint("Data checksums processing must be restarted manually after cluster restart."));
989 :
990 0 : CHECK_FOR_INTERRUPTS();
991 :
992 0 : LWLockAcquire(DataChecksumsWorkerLock, LW_SHARED);
993 0 : if (DataChecksumState->launch_operation != operation)
994 0 : abort_requested = true;
995 0 : LWLockRelease(DataChecksumsWorkerLock);
996 0 : if (abort_requested)
997 0 : break;
998 : }
999 :
1000 8 : pgstat_report_activity(STATE_IDLE, NULL);
1001 8 : return;
1002 : }
1003 :
1004 : /*
1005 : * DataChecksumsWorkerLauncherMain
1006 : *
1007 : * Main function for launching dynamic background workers for processing data
1008 : * checksums in databases. This function has the bgworker management, with
1009 : * ProcessAllDatabases being responsible for looping over the databases and
1010 : * initiating processing.
1011 : */
1012 : void
1013 15 : DataChecksumsWorkerLauncherMain(Datum arg)
1014 : {
1015 15 : on_shmem_exit(launcher_exit, 0);
1016 :
1017 15 : ereport(DEBUG1,
1018 : errmsg("background worker \"datachecksums launcher\" started"));
1019 :
1020 15 : pqsignal(SIGTERM, die);
1021 15 : pqsignal(SIGINT, launcher_cancel_handler);
1022 15 : pqsignal(SIGUSR1, procsignal_sigusr1_handler);
1023 15 : pqsignal(SIGUSR2, SIG_IGN);
1024 :
1025 15 : BackgroundWorkerUnblockSignals();
1026 :
1027 15 : MyBackendType = B_DATACHECKSUMSWORKER_LAUNCHER;
1028 15 : init_ps_display(NULL);
1029 :
1030 15 : INJECTION_POINT("datachecksumsworker-launcher-delay", NULL);
1031 :
1032 15 : LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE);
1033 :
1034 15 : if (DataChecksumState->launcher_running)
1035 : {
1036 0 : ereport(LOG,
1037 : errmsg("background worker \"datachecksums launcher\" already running, exiting"));
1038 : /* Launcher was already running, let it finish */
1039 0 : LWLockRelease(DataChecksumsWorkerLock);
1040 0 : return;
1041 : }
1042 :
1043 15 : launcher_running = true;
1044 :
1045 : /* Initialize a connection to shared catalogs only */
1046 15 : BackgroundWorkerInitializeConnectionByOid(InvalidOid, InvalidOid, 0);
1047 :
1048 15 : operation = DataChecksumState->launch_operation;
1049 15 : DataChecksumState->launcher_running = true;
1050 15 : DataChecksumState->operation = operation;
1051 15 : DataChecksumState->cost_delay = DataChecksumState->launch_cost_delay;
1052 15 : DataChecksumState->cost_limit = DataChecksumState->launch_cost_limit;
1053 15 : LWLockRelease(DataChecksumsWorkerLock);
1054 :
1055 : /*
1056 : * The target state can change while we are busy enabling/disabling
1057 : * checksums, if the user calls pg_disable/enable_data_checksums() before
1058 : * we are finished with the previous request. In that case, we will loop
1059 : * back here, to process the new request.
1060 : */
1061 15 : again:
1062 :
1063 15 : pgstat_progress_start_command(PROGRESS_COMMAND_DATACHECKSUMS,
1064 : InvalidOid);
1065 :
1066 15 : if (operation == ENABLE_DATACHECKSUMS)
1067 : {
1068 : /*
1069 : * If we are asked to enable checksums in a cluster which already has
1070 : * checksums enabled, exit immediately as there is nothing more to do.
1071 : */
1072 9 : if (DataChecksumsNeedVerify())
1073 1 : goto done;
1074 :
1075 8 : ereport(LOG,
1076 : errmsg("enabling data checksums requested, starting data checksum calculation"));
1077 :
1078 : /*
1079 : * Set the state to inprogress-on and wait on the procsignal barrier.
1080 : */
1081 8 : pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE,
1082 : PROGRESS_DATACHECKSUMS_PHASE_ENABLING);
1083 8 : SetDataChecksumsOnInProgress();
1084 :
1085 : /*
1086 : * All backends are now in inprogress-on state and are writing data
1087 : * checksums. Start processing all data at rest.
1088 : */
1089 8 : if (!ProcessAllDatabases())
1090 : {
1091 : /*
1092 : * If the target state changed during processing then it's not a
1093 : * failure, so restart processing instead.
1094 : */
1095 0 : LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE);
1096 0 : if (DataChecksumState->launch_operation != operation)
1097 : {
1098 0 : LWLockRelease(DataChecksumsWorkerLock);
1099 0 : goto done;
1100 : }
1101 0 : LWLockRelease(DataChecksumsWorkerLock);
1102 0 : ereport(ERROR,
1103 : errcode(ERRCODE_INSUFFICIENT_RESOURCES),
1104 : errmsg("unable to enable data checksums in cluster"));
1105 : }
1106 :
1107 : /*
1108 : * Data checksums have been set on all pages, set the state to on in
1109 : * order to instruct backends to validate checksums on reading.
1110 : */
1111 6 : SetDataChecksumsOn();
1112 :
1113 6 : ereport(LOG,
1114 : errmsg("data checksums are now enabled"));
1115 : }
1116 6 : else if (operation == DISABLE_DATACHECKSUMS)
1117 : {
1118 6 : ereport(LOG,
1119 : errmsg("disabling data checksums requested"));
1120 :
1121 6 : pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE,
1122 : PROGRESS_DATACHECKSUMS_PHASE_DISABLING);
1123 6 : SetDataChecksumsOff();
1124 6 : ereport(LOG,
1125 : errmsg("data checksums are now disabled"));
1126 : }
1127 : else
1128 : Assert(false);
1129 :
1130 0 : done:
1131 :
1132 : /*
1133 : * This state will only be displayed for a fleeting moment, but for the
1134 : * sake of correctness it is still added before ending the command.
1135 : */
1136 13 : pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE,
1137 : PROGRESS_DATACHECKSUMS_PHASE_DONE);
1138 :
1139 : /*
1140 : * All done. But before we exit, check if the target state was changed
1141 : * while we were running. In that case we will have to start all over
1142 : * again.
1143 : */
1144 13 : LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE);
1145 13 : if (DataChecksumState->launch_operation != operation)
1146 : {
1147 0 : DataChecksumState->operation = DataChecksumState->launch_operation;
1148 0 : operation = DataChecksumState->launch_operation;
1149 0 : DataChecksumState->cost_delay = DataChecksumState->launch_cost_delay;
1150 0 : DataChecksumState->cost_limit = DataChecksumState->launch_cost_limit;
1151 0 : LWLockRelease(DataChecksumsWorkerLock);
1152 0 : goto again;
1153 : }
1154 :
1155 : /* Shut down progress reporting as we are done */
1156 13 : pgstat_progress_end_command();
1157 :
1158 13 : launcher_running = false;
1159 13 : DataChecksumState->launcher_running = false;
1160 13 : LWLockRelease(DataChecksumsWorkerLock);
1161 : }
1162 :
1163 : /*
1164 : * ProcessAllDatabases
1165 : * Compute the list of all databases and process checksums in each
1166 : *
1167 : * This will generate a list of databases to process for enabling checksums.
1168 : * If a database encounters a failure then processing will end immediately and
1169 : * return an error.
1170 : */
1171 : static bool
1172 8 : ProcessAllDatabases(void)
1173 : {
1174 : List *DatabaseList;
1175 8 : int cumulative_total = 0;
1176 :
1177 : /* Set up so first run processes shared catalogs, not once in every db */
1178 8 : DataChecksumState->process_shared_catalogs = true;
1179 :
1180 : /* Get a list of all databases to process */
1181 8 : WaitForAllTransactionsToFinish();
1182 8 : DatabaseList = BuildDatabaseList();
1183 :
1184 : /*
1185 : * Update progress reporting with the total number of databases we need to
1186 : * process. This number should not be changed during processing, the
1187 : * columns for processed databases is instead increased such that it can
1188 : * be compared against the total.
1189 : */
1190 : {
1191 8 : const int index[] = {
1192 : PROGRESS_DATACHECKSUMS_DBS_TOTAL,
1193 : PROGRESS_DATACHECKSUMS_DBS_DONE,
1194 : PROGRESS_DATACHECKSUMS_RELS_TOTAL,
1195 : PROGRESS_DATACHECKSUMS_RELS_DONE,
1196 : PROGRESS_DATACHECKSUMS_BLOCKS_TOTAL,
1197 : PROGRESS_DATACHECKSUMS_BLOCKS_DONE,
1198 : };
1199 :
1200 : int64 vals[6];
1201 :
1202 8 : vals[0] = list_length(DatabaseList);
1203 8 : vals[1] = 0;
1204 : /* translated to NULL */
1205 8 : vals[2] = -1;
1206 8 : vals[3] = -1;
1207 8 : vals[4] = -1;
1208 8 : vals[5] = -1;
1209 :
1210 8 : pgstat_progress_update_multi_param(6, index, vals);
1211 : }
1212 :
1213 32 : foreach_ptr(DataChecksumsWorkerDatabase, db, DatabaseList)
1214 : {
1215 : DataChecksumsWorkerResult result;
1216 :
1217 20 : result = ProcessDatabase(db);
1218 :
1219 : #ifdef USE_INJECTION_POINTS
1220 : /* Allow a test process to alter the result of the operation */
1221 19 : if (IS_INJECTION_POINT_ATTACHED("datachecksumsworker-fail-db-result"))
1222 : {
1223 1 : result = DATACHECKSUMSWORKER_FAILED;
1224 1 : INJECTION_POINT_CACHED("datachecksumsworker-fail-db-result",
1225 : db->dbname);
1226 : }
1227 : #endif
1228 :
1229 19 : pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_DBS_DONE,
1230 : ++cumulative_total);
1231 :
1232 19 : if (result == DATACHECKSUMSWORKER_FAILED)
1233 : {
1234 : /*
1235 : * Disable checksums on cluster, because we failed one of the
1236 : * databases and this is an all or nothing process.
1237 : */
1238 1 : SetDataChecksumsOff();
1239 1 : ereport(ERROR,
1240 : errcode(ERRCODE_INSUFFICIENT_RESOURCES),
1241 : errmsg("data checksums failed to get enabled in all databases, aborting"),
1242 : errhint("The server log might have more information on the cause of the error."));
1243 : }
1244 18 : else if (result == DATACHECKSUMSWORKER_ABORTED || abort_requested)
1245 : {
1246 : /* Abort flag set, so exit the whole process */
1247 0 : return false;
1248 : }
1249 :
1250 : /*
1251 : * When one database has completed, it will have done shared catalogs
1252 : * so we don't have to process them again.
1253 : */
1254 18 : DataChecksumState->process_shared_catalogs = false;
1255 : }
1256 :
1257 6 : FreeDatabaseList(DatabaseList);
1258 :
1259 6 : pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE,
1260 : PROGRESS_DATACHECKSUMS_PHASE_WAITING_BARRIER);
1261 6 : return true;
1262 : }
1263 :
1264 : /*
1265 : * DataChecksumShmemRequest
1266 : * Request datachecksumsworker-related shared memory
1267 : */
1268 : static void
1269 1234 : DataChecksumsShmemRequest(void *arg)
1270 : {
1271 1234 : ShmemRequestStruct(.name = "DataChecksumsWorker Data",
1272 : .size = sizeof(DataChecksumsStateStruct),
1273 : .ptr = (void **) &DataChecksumState,
1274 : );
1275 1234 : }
1276 :
1277 : /*
1278 : * DatabaseExists
1279 : *
1280 : * Scans the system catalog to check if a database with the given Oid exist
1281 : * and returns true if it is found, else false.
1282 : */
1283 : static bool
1284 0 : DatabaseExists(Oid dboid)
1285 : {
1286 : Relation rel;
1287 : ScanKeyData skey;
1288 : SysScanDesc scan;
1289 : bool found;
1290 : HeapTuple tuple;
1291 :
1292 0 : StartTransactionCommand();
1293 :
1294 0 : rel = table_open(DatabaseRelationId, AccessShareLock);
1295 0 : ScanKeyInit(&skey,
1296 : Anum_pg_database_oid,
1297 : BTEqualStrategyNumber, F_OIDEQ,
1298 : dboid);
1299 0 : scan = systable_beginscan(rel, DatabaseOidIndexId, true, SnapshotSelf,
1300 : 1, &skey);
1301 0 : tuple = systable_getnext(scan);
1302 0 : found = HeapTupleIsValid(tuple);
1303 :
1304 0 : systable_endscan(scan);
1305 0 : table_close(rel, AccessShareLock);
1306 :
1307 0 : CommitTransactionCommand();
1308 :
1309 0 : return found;
1310 : }
1311 :
1312 : /*
1313 : * BuildDatabaseList
1314 : * Compile a list of all currently available databases in the cluster
1315 : *
1316 : * This creates the list of databases for the datachecksumsworker workers to
1317 : * add checksums to. If the caller wants to ensure that no concurrently
1318 : * running CREATE DATABASE calls exist, this needs to be preceded by a call
1319 : * to WaitForAllTransactionsToFinish().
1320 : */
1321 : static List *
1322 8 : BuildDatabaseList(void)
1323 : {
1324 8 : List *DatabaseList = NIL;
1325 : Relation rel;
1326 : TableScanDesc scan;
1327 : HeapTuple tup;
1328 8 : MemoryContext ctx = CurrentMemoryContext;
1329 : MemoryContext oldctx;
1330 :
1331 8 : StartTransactionCommand();
1332 :
1333 8 : rel = table_open(DatabaseRelationId, AccessShareLock);
1334 8 : scan = table_beginscan_catalog(rel, 0, NULL);
1335 :
1336 32 : while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
1337 : {
1338 24 : Form_pg_database pgdb = (Form_pg_database) GETSTRUCT(tup);
1339 : DataChecksumsWorkerDatabase *db;
1340 :
1341 24 : oldctx = MemoryContextSwitchTo(ctx);
1342 :
1343 24 : db = (DataChecksumsWorkerDatabase *) palloc0(sizeof(DataChecksumsWorkerDatabase));
1344 :
1345 24 : db->dboid = pgdb->oid;
1346 24 : db->dbname = pstrdup(NameStr(pgdb->datname));
1347 :
1348 24 : DatabaseList = lappend(DatabaseList, db);
1349 :
1350 24 : MemoryContextSwitchTo(oldctx);
1351 : }
1352 :
1353 8 : table_endscan(scan);
1354 8 : table_close(rel, AccessShareLock);
1355 :
1356 8 : CommitTransactionCommand();
1357 :
1358 8 : return DatabaseList;
1359 : }
1360 :
1361 : static void
1362 6 : FreeDatabaseList(List *dblist)
1363 : {
1364 6 : if (!dblist)
1365 0 : return;
1366 :
1367 30 : foreach_ptr(DataChecksumsWorkerDatabase, db, dblist)
1368 : {
1369 18 : if (db->dbname != NULL)
1370 18 : pfree(db->dbname);
1371 : }
1372 :
1373 6 : list_free_deep(dblist);
1374 : }
1375 :
1376 : /*
1377 : * BuildRelationList
1378 : * Compile a list of relations in the database
1379 : *
1380 : * Returns a list of OIDs for the request relation types. If temp_relations
1381 : * is True then only temporary relations are returned. If temp_relations is
1382 : * False then non-temporary relations which have data checksums are returned.
1383 : * If include_shared is True then shared relations are included as well in a
1384 : * non-temporary list. include_shared has no relevance when building a list of
1385 : * temporary relations.
1386 : */
1387 : static List *
1388 60 : BuildRelationList(bool temp_relations, bool include_shared)
1389 : {
1390 60 : List *RelationList = NIL;
1391 : Relation rel;
1392 : TableScanDesc scan;
1393 : HeapTuple tup;
1394 60 : MemoryContext ctx = CurrentMemoryContext;
1395 : MemoryContext oldctx;
1396 :
1397 60 : StartTransactionCommand();
1398 :
1399 60 : rel = table_open(RelationRelationId, AccessShareLock);
1400 60 : scan = table_beginscan_catalog(rel, 0, NULL);
1401 :
1402 27204 : while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
1403 : {
1404 27144 : Form_pg_class pgc = (Form_pg_class) GETSTRUCT(tup);
1405 :
1406 : /* Only include temporary relations when explicitly asked to */
1407 27144 : if (pgc->relpersistence == RELPERSISTENCE_TEMP)
1408 : {
1409 3 : if (!temp_relations)
1410 1 : continue;
1411 : }
1412 : else
1413 : {
1414 : /*
1415 : * If we are only interested in temp relations then continue
1416 : * immediately as the current relation isn't a temp relation.
1417 : */
1418 27141 : if (temp_relations)
1419 18094 : continue;
1420 :
1421 9047 : if (!RELKIND_HAS_STORAGE(pgc->relkind))
1422 3240 : continue;
1423 :
1424 5807 : if (pgc->relisshared && !include_shared)
1425 552 : continue;
1426 : }
1427 :
1428 5257 : oldctx = MemoryContextSwitchTo(ctx);
1429 5257 : RelationList = lappend_oid(RelationList, pgc->oid);
1430 5257 : MemoryContextSwitchTo(oldctx);
1431 : }
1432 :
1433 60 : table_endscan(scan);
1434 60 : table_close(rel, AccessShareLock);
1435 :
1436 60 : CommitTransactionCommand();
1437 :
1438 60 : return RelationList;
1439 : }
1440 :
1441 : /*
1442 : * DataChecksumsWorkerMain
1443 : *
1444 : * Main function for enabling checksums in a single database, This is the
1445 : * function set as the bgw_function_name in the dynamic background worker
1446 : * process initiated for each database by the worker launcher. After enabling
1447 : * data checksums in each applicable relation in the database, it will wait for
1448 : * all temporary relations that were present when the function started to
1449 : * disappear before returning. This is required since we cannot rewrite
1450 : * existing temporary relations with data checksums.
1451 : */
1452 : void
1453 20 : DataChecksumsWorkerMain(Datum arg)
1454 : {
1455 20 : Oid dboid = DatumGetObjectId(arg);
1456 20 : List *RelationList = NIL;
1457 20 : List *InitialTempTableList = NIL;
1458 : BufferAccessStrategy strategy;
1459 20 : bool aborted = false;
1460 : int64 rels_done;
1461 : #ifdef USE_INJECTION_POINTS
1462 20 : bool retried = false;
1463 : #endif
1464 :
1465 20 : operation = ENABLE_DATACHECKSUMS;
1466 :
1467 20 : pqsignal(SIGTERM, die);
1468 20 : pqsignal(SIGUSR1, procsignal_sigusr1_handler);
1469 :
1470 20 : BackgroundWorkerUnblockSignals();
1471 :
1472 20 : MyBackendType = B_DATACHECKSUMSWORKER_WORKER;
1473 20 : init_ps_display(NULL);
1474 :
1475 20 : BackgroundWorkerInitializeConnectionByOid(dboid, InvalidOid,
1476 : BGWORKER_BYPASS_ALLOWCONN);
1477 :
1478 : /* worker will have a separate entry in pg_stat_progress_data_checksums */
1479 20 : pgstat_progress_start_command(PROGRESS_COMMAND_DATACHECKSUMS,
1480 : InvalidOid);
1481 :
1482 : /*
1483 : * Get a list of all temp tables present as we start in this database. We
1484 : * need to wait until they are all gone until we are done, since we cannot
1485 : * access these relations and modify them.
1486 : */
1487 20 : InitialTempTableList = BuildRelationList(true, false);
1488 :
1489 : /*
1490 : * Enable vacuum cost delay, if any. While this process isn't doing any
1491 : * vacuuming, we are re-using the infrastructure that vacuum cost delay
1492 : * provides rather than inventing something bespoke. This is an internal
1493 : * implementation detail and care should be taken to avoid it bleeding
1494 : * through to the user to avoid confusion.
1495 : */
1496 : Assert(DataChecksumState->operation == ENABLE_DATACHECKSUMS);
1497 20 : VacuumCostDelay = DataChecksumState->cost_delay;
1498 20 : VacuumCostLimit = DataChecksumState->cost_limit;
1499 20 : VacuumCostActive = (VacuumCostDelay > 0);
1500 20 : VacuumCostBalance = 0;
1501 20 : VacuumCostPageHit = 0;
1502 20 : VacuumCostPageMiss = 0;
1503 20 : VacuumCostPageDirty = 0;
1504 :
1505 : /*
1506 : * Create and set the vacuum strategy as our buffer strategy.
1507 : */
1508 20 : strategy = GetAccessStrategy(BAS_VACUUM);
1509 :
1510 20 : RelationList = BuildRelationList(false,
1511 20 : DataChecksumState->process_shared_catalogs);
1512 :
1513 : /* Update the total number of relations to be processed in this DB. */
1514 : {
1515 20 : const int index[] = {
1516 : PROGRESS_DATACHECKSUMS_RELS_TOTAL,
1517 : PROGRESS_DATACHECKSUMS_RELS_DONE
1518 : };
1519 :
1520 : int64 vals[2];
1521 :
1522 20 : vals[0] = list_length(RelationList);
1523 20 : vals[1] = 0;
1524 :
1525 20 : pgstat_progress_update_multi_param(2, index, vals);
1526 : }
1527 :
1528 : /* Process the relations */
1529 20 : rels_done = 0;
1530 5295 : foreach_oid(reloid, RelationList)
1531 : {
1532 5255 : CHECK_FOR_INTERRUPTS();
1533 :
1534 5255 : if (!ProcessSingleRelationByOid(reloid, strategy))
1535 : {
1536 0 : aborted = true;
1537 0 : break;
1538 : }
1539 :
1540 5255 : pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_RELS_DONE,
1541 : ++rels_done);
1542 : }
1543 20 : list_free(RelationList);
1544 :
1545 20 : if (aborted)
1546 : {
1547 0 : DataChecksumState->success = DATACHECKSUMSWORKER_ABORTED;
1548 0 : ereport(DEBUG1,
1549 : errmsg("data checksum processing aborted in database OID %u",
1550 : dboid));
1551 0 : return;
1552 : }
1553 :
1554 : /* The worker is about to wait for temporary tables to go away. */
1555 20 : pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE,
1556 : PROGRESS_DATACHECKSUMS_PHASE_WAITING_TEMPREL);
1557 :
1558 : /*
1559 : * Wait for all temp tables that existed when we started to go away. This
1560 : * is necessary since we cannot "reach" them to enable checksums. Any temp
1561 : * tables created after we started will already have checksums in them
1562 : * (due to the "inprogress-on" state), so no need to wait for those.
1563 : */
1564 : for (;;)
1565 0 : {
1566 : List *CurrentTempTables;
1567 : int numleft;
1568 : char activity[64];
1569 :
1570 20 : CurrentTempTables = BuildRelationList(true, false);
1571 20 : numleft = 0;
1572 41 : foreach_oid(tmptbloid, InitialTempTableList)
1573 : {
1574 1 : if (list_member_oid(CurrentTempTables, tmptbloid))
1575 1 : numleft++;
1576 : }
1577 20 : list_free(CurrentTempTables);
1578 :
1579 : #ifdef USE_INJECTION_POINTS
1580 20 : if (IS_INJECTION_POINT_ATTACHED("datachecksumsworker-fake-temptable-wait"))
1581 : {
1582 : /* Make sure to just cause one retry */
1583 0 : if (!retried && numleft == 0)
1584 : {
1585 0 : numleft = 1;
1586 0 : retried = true;
1587 :
1588 0 : INJECTION_POINT_CACHED("datachecksumsworker-fake-temptable-wait", NULL);
1589 : }
1590 : }
1591 : #endif
1592 :
1593 20 : if (numleft == 0)
1594 19 : break;
1595 :
1596 : /*
1597 : * At least one temp table is left to wait for, indicate in pgstat
1598 : * activity and progress reporting.
1599 : */
1600 1 : snprintf(activity,
1601 : sizeof(activity),
1602 : "Waiting for %d temp tables to be removed", numleft);
1603 1 : pgstat_report_activity(STATE_RUNNING, activity);
1604 :
1605 : /* Retry every 3 seconds */
1606 1 : ResetLatch(MyLatch);
1607 1 : (void) WaitLatch(MyLatch,
1608 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
1609 : 3000,
1610 : WAIT_EVENT_CHECKSUM_ENABLE_TEMPTABLE_WAIT);
1611 :
1612 1 : LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE);
1613 1 : aborted = DataChecksumState->launch_operation != operation;
1614 1 : LWLockRelease(DataChecksumsWorkerLock);
1615 :
1616 1 : CHECK_FOR_INTERRUPTS();
1617 :
1618 0 : if (aborted || abort_requested)
1619 : {
1620 0 : DataChecksumState->success = DATACHECKSUMSWORKER_ABORTED;
1621 0 : ereport(LOG,
1622 : errmsg("data checksum processing aborted in database OID %u",
1623 : dboid));
1624 0 : return;
1625 : }
1626 : }
1627 :
1628 19 : list_free(InitialTempTableList);
1629 :
1630 : /* worker done */
1631 19 : pgstat_progress_end_command();
1632 :
1633 19 : LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE);
1634 19 : DataChecksumState->success = DATACHECKSUMSWORKER_SUCCESSFUL;
1635 19 : LWLockRelease(DataChecksumsWorkerLock);
1636 : }
|