Line data Source code
1 : /*-------------------------------------------------------------------------
2 : * slotsync.c
3 : * Functionality for synchronizing slots to a standby server from the
4 : * primary server.
5 : *
6 : * Copyright (c) 2024-2026, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/replication/logical/slotsync.c
10 : *
11 : * This file contains the code for slot synchronization on a physical standby
12 : * to fetch logical failover slots information from the primary server, create
13 : * the slots on the standby and synchronize them periodically.
14 : *
15 : * Slot synchronization can be performed either automatically by enabling slot
16 : * sync worker or manually by calling SQL function pg_sync_replication_slots().
17 : *
18 : * If the WAL corresponding to the remote's restart_lsn is not available on the
19 : * physical standby or the remote's catalog_xmin precedes the oldest xid for
20 : * which it is guaranteed that rows wouldn't have been removed then we cannot
21 : * create the local standby slot because that would mean moving the local slot
22 : * backward and decoding won't be possible via such a slot. In this case, the
23 : * slot will be marked as RS_TEMPORARY. Once the primary server catches up,
24 : * the slot will be marked as RS_PERSISTENT (which means sync-ready) after
25 : * which slot sync worker can perform the sync periodically or user can call
26 : * pg_sync_replication_slots() periodically to perform the syncs.
27 : *
28 : * If synchronized slots fail to build a consistent snapshot from the
29 : * restart_lsn before reaching confirmed_flush_lsn, they would become
30 : * unreliable after promotion due to potential data loss from changes
31 : * before reaching a consistent point. This can happen because the slots can
32 : * be synced at some random time and we may not reach the consistent point
33 : * at the same WAL location as the primary. So, we mark such slots as
34 : * RS_TEMPORARY. Once the decoding from corresponding LSNs can reach a
35 : * consistent point, they will be marked as RS_PERSISTENT.
36 : *
37 : * If the WAL prior to the remote slot's confirmed_flush_lsn has not been
38 : * flushed on the standby, the slot is marked as RS_TEMPORARY. Once the standby
39 : * catches up and flushes that WAL, the slot will be marked as RS_PERSISTENT.
40 : *
41 : * The slot sync worker waits for some time before the next synchronization,
42 : * with the duration varying based on whether any slots were updated during
43 : * the last cycle. Refer to the comments above wait_for_slot_activity() for
44 : * more details.
45 : *
46 : * If the SQL function pg_sync_replication_slots() is used to sync the slots,
47 : * and if the slots are not ready to be synced and are marked as RS_TEMPORARY
48 : * because of any of the reasons mentioned above, then the SQL function also
49 : * waits and retries until the slots are marked as RS_PERSISTENT (which means
50 : * sync-ready). Refer to the comments in SyncReplicationSlots() for more
51 : * details.
52 : *
53 : * Any standby synchronized slots will be dropped if they no longer need
54 : * to be synchronized. See comment atop drop_local_obsolete_slots() for more
55 : * details.
56 : *---------------------------------------------------------------------------
57 : */
58 :
59 : #include "postgres.h"
60 :
61 : #include <time.h>
62 :
63 : #include "access/xlog_internal.h"
64 : #include "access/xlogrecovery.h"
65 : #include "catalog/pg_database.h"
66 : #include "libpq/pqsignal.h"
67 : #include "pgstat.h"
68 : #include "postmaster/interrupt.h"
69 : #include "replication/logical.h"
70 : #include "replication/slotsync.h"
71 : #include "replication/snapbuild.h"
72 : #include "storage/ipc.h"
73 : #include "storage/lmgr.h"
74 : #include "storage/proc.h"
75 : #include "storage/procarray.h"
76 : #include "storage/subsystems.h"
77 : #include "tcop/tcopprot.h"
78 : #include "utils/builtins.h"
79 : #include "utils/memutils.h"
80 : #include "utils/pg_lsn.h"
81 : #include "utils/ps_status.h"
82 : #include "utils/timeout.h"
83 : #include "utils/wait_event.h"
84 :
85 : /*
86 : * Struct for sharing information to control slot synchronization.
87 : *
88 : * The 'pid' is either the slot sync worker's pid or the backend's pid running
89 : * the SQL function pg_sync_replication_slots(). When the startup process sets
90 : * 'stopSignaled' during promotion, it uses this 'pid' to wake up the currently
91 : * synchronizing process so that the process can immediately stop its
92 : * synchronizing work on seeing 'stopSignaled' set.
93 : * Setting 'stopSignaled' is also used to handle the race condition when the
94 : * postmaster has not noticed the promotion yet and thus may end up restarting
95 : * the slot sync worker. If 'stopSignaled' is set, the worker will exit in such a
96 : * case. The SQL function pg_sync_replication_slots() will also error out if
97 : * this flag is set. Note that we don't need to reset this variable as after
98 : * promotion the slot sync worker won't be restarted because the pmState
99 : * changes to PM_RUN from PM_HOT_STANDBY and we don't support demoting
100 : * primary without restarting the server. See LaunchMissingBackgroundProcesses.
101 : *
102 : * The 'syncing' flag is needed to prevent concurrent slot syncs to avoid slot
103 : * overwrites.
104 : *
105 : * The 'last_start_time' is needed by postmaster to start the slot sync worker
106 : * once per SLOTSYNC_RESTART_INTERVAL_SEC. In cases where an immediate restart
107 : * is expected (e.g., slot sync GUCs change), slot sync worker will reset
108 : * last_start_time before exiting, so that postmaster can start the worker
109 : * without waiting for SLOTSYNC_RESTART_INTERVAL_SEC.
110 : */
111 : typedef struct SlotSyncCtxStruct
112 : {
113 : pid_t pid;
114 : bool stopSignaled;
115 : bool syncing;
116 : time_t last_start_time;
117 : slock_t mutex;
118 : } SlotSyncCtxStruct;
119 :
120 : static SlotSyncCtxStruct *SlotSyncCtx = NULL;
121 :
122 : static void SlotSyncShmemRequest(void *arg);
123 : static void SlotSyncShmemInit(void *arg);
124 :
125 : const ShmemCallbacks SlotSyncShmemCallbacks = {
126 : .request_fn = SlotSyncShmemRequest,
127 : .init_fn = SlotSyncShmemInit,
128 : };
129 :
130 : /* GUC variable */
131 : bool sync_replication_slots = false;
132 :
133 : /*
134 : * The sleep time (ms) between slot-sync cycles varies dynamically
135 : * (within a MIN/MAX range) according to slot activity. See
136 : * wait_for_slot_activity() for details.
137 : */
138 : #define MIN_SLOTSYNC_WORKER_NAPTIME_MS 200
139 : #define MAX_SLOTSYNC_WORKER_NAPTIME_MS 30000 /* 30s */
140 :
141 : static long sleep_ms = MIN_SLOTSYNC_WORKER_NAPTIME_MS;
142 :
143 : /* The restart interval for slot sync work used by postmaster */
144 : #define SLOTSYNC_RESTART_INTERVAL_SEC 10
145 :
146 : /*
147 : * Flag to tell if we are syncing replication slots. Unlike the 'syncing' flag
148 : * in SlotSyncCtxStruct, this flag is true only if the current process is
149 : * performing slot synchronization.
150 : */
151 : static bool syncing_slots = false;
152 :
153 : /*
154 : * Structure to hold information fetched from the primary server about a logical
155 : * replication slot.
156 : */
157 : typedef struct RemoteSlot
158 : {
159 : char *name;
160 : char *plugin;
161 : char *database;
162 : bool two_phase;
163 : bool failover;
164 : XLogRecPtr restart_lsn;
165 : XLogRecPtr confirmed_lsn;
166 : XLogRecPtr two_phase_at;
167 : TransactionId catalog_xmin;
168 :
169 : /* RS_INVAL_NONE if valid, or the reason of invalidation */
170 : ReplicationSlotInvalidationCause invalidated;
171 : } RemoteSlot;
172 :
173 : static void slotsync_failure_callback(int code, Datum arg);
174 : static void update_synced_slots_inactive_since(void);
175 :
176 : /*
177 : * Update slot sync skip stats. This function requires the caller to acquire
178 : * the slot.
179 : */
180 : static void
181 60 : update_slotsync_skip_stats(SlotSyncSkipReason skip_reason)
182 : {
183 : ReplicationSlot *slot;
184 :
185 : Assert(MyReplicationSlot);
186 :
187 60 : slot = MyReplicationSlot;
188 :
189 : /*
190 : * Update the slot sync related stats in pg_stat_replication_slots when a
191 : * slot sync is skipped
192 : */
193 60 : if (skip_reason != SS_SKIP_NONE)
194 7 : pgstat_report_replslotsync(slot);
195 :
196 : /* Update the slot sync skip reason */
197 60 : if (slot->slotsync_skip_reason != skip_reason)
198 : {
199 5 : SpinLockAcquire(&slot->mutex);
200 5 : slot->slotsync_skip_reason = skip_reason;
201 5 : SpinLockRelease(&slot->mutex);
202 : }
203 60 : }
204 :
205 : /*
206 : * If necessary, update the local synced slot's metadata based on the data
207 : * from the remote slot.
208 : *
209 : * If no update was needed (the data of the remote slot is the same as the
210 : * local slot) return false, otherwise true.
211 : */
212 : static bool
213 60 : update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
214 : {
215 60 : ReplicationSlot *slot = MyReplicationSlot;
216 60 : bool updated_xmin_or_lsn = false;
217 60 : bool updated_config = false;
218 60 : SlotSyncSkipReason skip_reason = SS_SKIP_NONE;
219 60 : XLogRecPtr latestFlushPtr = GetStandbyFlushRecPtr(NULL);
220 :
221 : Assert(slot->data.invalidated == RS_INVAL_NONE);
222 :
223 : /*
224 : * Make sure that concerned WAL is received and flushed before syncing
225 : * slot to target lsn received from the primary server.
226 : */
227 60 : if (remote_slot->confirmed_lsn > latestFlushPtr)
228 : {
229 0 : update_slotsync_skip_stats(SS_SKIP_WAL_NOT_FLUSHED);
230 :
231 : /*
232 : * Can get here only if GUC 'synchronized_standby_slots' on the
233 : * primary server was not configured correctly.
234 : */
235 0 : ereport(LOG,
236 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
237 : errmsg("skipping slot synchronization because the received slot sync"
238 : " LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
239 : LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
240 : remote_slot->name,
241 : LSN_FORMAT_ARGS(latestFlushPtr)));
242 :
243 0 : return false;
244 : }
245 :
246 : /*
247 : * Don't overwrite if we already have a newer catalog_xmin and
248 : * restart_lsn.
249 : */
250 116 : if (remote_slot->restart_lsn < slot->data.restart_lsn ||
251 56 : TransactionIdPrecedes(remote_slot->catalog_xmin,
252 : slot->data.catalog_xmin))
253 : {
254 : /* Update slot sync skip stats */
255 7 : update_slotsync_skip_stats(SS_SKIP_WAL_OR_ROWS_REMOVED);
256 :
257 : /*
258 : * This can happen in following situations:
259 : *
260 : * If the slot is temporary, it means either the initial WAL location
261 : * reserved for the local slot is ahead of the remote slot's
262 : * restart_lsn or the initial xmin_horizon computed for the local slot
263 : * is ahead of the remote slot.
264 : *
265 : * If the slot is persistent, both restart_lsn and catalog_xmin of the
266 : * synced slot could still be ahead of the remote slot. Since we use
267 : * slot advance functionality to keep snapbuild/slot updated, it is
268 : * possible that the restart_lsn and catalog_xmin are advanced to a
269 : * later position than it has on the primary. This can happen when
270 : * slot advancing machinery finds running xacts record after reaching
271 : * the consistent state at a later point than the primary where it
272 : * serializes the snapshot and updates the restart_lsn.
273 : *
274 : * We LOG the message if the slot is temporary as it can help the user
275 : * to understand why the slot is not sync-ready. In the case of a
276 : * persistent slot, it would be a more common case and won't directly
277 : * impact the users, so we used DEBUG1 level to log the message.
278 : */
279 7 : ereport(slot->data.persistency == RS_TEMPORARY ? LOG : DEBUG1,
280 : errmsg("could not synchronize replication slot \"%s\"",
281 : remote_slot->name),
282 : errdetail("Synchronization could lead to data loss, because the remote slot needs WAL at LSN %X/%08X and catalog xmin %u, but the standby has LSN %X/%08X and catalog xmin %u.",
283 : LSN_FORMAT_ARGS(remote_slot->restart_lsn),
284 : remote_slot->catalog_xmin,
285 : LSN_FORMAT_ARGS(slot->data.restart_lsn),
286 : slot->data.catalog_xmin));
287 :
288 : /*
289 : * Skip updating the configuration. This is required to avoid syncing
290 : * two_phase_at without syncing confirmed_lsn. Otherwise, the prepared
291 : * transaction between old confirmed_lsn and two_phase_at will
292 : * unexpectedly get decoded and sent to the downstream after
293 : * promotion. See comments in ReorderBufferFinishPrepared.
294 : */
295 7 : return false;
296 : }
297 :
298 : /*
299 : * Attempt to sync LSNs and xmins only if remote slot is ahead of local
300 : * slot.
301 : */
302 53 : if (remote_slot->confirmed_lsn > slot->data.confirmed_flush ||
303 79 : remote_slot->restart_lsn > slot->data.restart_lsn ||
304 39 : TransactionIdFollows(remote_slot->catalog_xmin,
305 : slot->data.catalog_xmin))
306 : {
307 : /*
308 : * We can't directly copy the remote slot's LSN or xmin unless there
309 : * exists a consistent snapshot at that point. Otherwise, after
310 : * promotion, the slots may not reach a consistent point before the
311 : * confirmed_flush_lsn which can lead to a data loss. To avoid data
312 : * loss, we let slot machinery advance the slot which ensures that
313 : * snapbuilder/slot statuses are updated properly.
314 : */
315 14 : if (SnapBuildSnapshotExists(remote_slot->restart_lsn))
316 : {
317 : /*
318 : * Update the slot info directly if there is a serialized snapshot
319 : * at the restart_lsn, as the slot can quickly reach consistency
320 : * at restart_lsn by restoring the snapshot.
321 : */
322 3 : SpinLockAcquire(&slot->mutex);
323 3 : slot->data.restart_lsn = remote_slot->restart_lsn;
324 3 : slot->data.confirmed_flush = remote_slot->confirmed_lsn;
325 3 : slot->data.catalog_xmin = remote_slot->catalog_xmin;
326 3 : SpinLockRelease(&slot->mutex);
327 : }
328 : else
329 : {
330 : bool found_consistent_snapshot;
331 :
332 11 : LogicalSlotAdvanceAndCheckSnapState(remote_slot->confirmed_lsn,
333 : &found_consistent_snapshot);
334 :
335 : /* Sanity check */
336 11 : if (slot->data.confirmed_flush != remote_slot->confirmed_lsn)
337 0 : ereport(ERROR,
338 : errmsg_internal("synchronized confirmed_flush for slot \"%s\" differs from remote slot",
339 : remote_slot->name),
340 : errdetail_internal("Remote slot has LSN %X/%08X but local slot has LSN %X/%08X.",
341 : LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
342 : LSN_FORMAT_ARGS(slot->data.confirmed_flush)));
343 :
344 : /*
345 : * If we can't reach a consistent snapshot, the slot won't be
346 : * persisted. See update_and_persist_local_synced_slot().
347 : */
348 11 : if (!found_consistent_snapshot)
349 : {
350 : Assert(MyReplicationSlot->data.persistency == RS_TEMPORARY);
351 :
352 0 : ereport(LOG,
353 : errmsg("could not synchronize replication slot \"%s\"",
354 : remote_slot->name),
355 : errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%08X.",
356 : LSN_FORMAT_ARGS(slot->data.restart_lsn)));
357 :
358 0 : skip_reason = SS_SKIP_NO_CONSISTENT_SNAPSHOT;
359 : }
360 : }
361 :
362 14 : updated_xmin_or_lsn = true;
363 : }
364 :
365 : /* Update slot sync skip stats */
366 53 : update_slotsync_skip_stats(skip_reason);
367 :
368 53 : if (remote_dbid != slot->data.database ||
369 53 : remote_slot->two_phase != slot->data.two_phase ||
370 52 : remote_slot->failover != slot->data.failover ||
371 52 : strcmp(remote_slot->plugin, NameStr(slot->data.plugin)) != 0 ||
372 52 : remote_slot->two_phase_at != slot->data.two_phase_at)
373 : {
374 : NameData plugin_name;
375 :
376 : /* Avoid expensive operations while holding a spinlock. */
377 1 : namestrcpy(&plugin_name, remote_slot->plugin);
378 :
379 1 : SpinLockAcquire(&slot->mutex);
380 1 : slot->data.plugin = plugin_name;
381 1 : slot->data.database = remote_dbid;
382 1 : slot->data.two_phase = remote_slot->two_phase;
383 1 : slot->data.two_phase_at = remote_slot->two_phase_at;
384 1 : slot->data.failover = remote_slot->failover;
385 1 : SpinLockRelease(&slot->mutex);
386 :
387 1 : updated_config = true;
388 :
389 : /*
390 : * Ensure that there is no risk of sending prepared transactions
391 : * unexpectedly after the promotion.
392 : */
393 : Assert(slot->data.two_phase_at <= slot->data.confirmed_flush);
394 : }
395 :
396 : /*
397 : * We have to write the changed xmin to disk *before* we change the
398 : * in-memory value, otherwise after a crash we wouldn't know that some
399 : * catalog tuples might have been removed already.
400 : */
401 53 : if (updated_config || updated_xmin_or_lsn)
402 : {
403 15 : ReplicationSlotMarkDirty();
404 15 : ReplicationSlotSave();
405 : }
406 :
407 : /*
408 : * Now the new xmin is safely on disk, we can let the global value
409 : * advance. We do not take ProcArrayLock or similar since we only advance
410 : * xmin here and there's not much harm done by a concurrent computation
411 : * missing that.
412 : */
413 53 : if (updated_xmin_or_lsn)
414 : {
415 14 : SpinLockAcquire(&slot->mutex);
416 14 : slot->effective_catalog_xmin = remote_slot->catalog_xmin;
417 14 : SpinLockRelease(&slot->mutex);
418 :
419 14 : ReplicationSlotsComputeRequiredXmin(false);
420 14 : ReplicationSlotsComputeRequiredLSN();
421 : }
422 :
423 53 : return updated_config || updated_xmin_or_lsn;
424 : }
425 :
426 : /*
427 : * Get the list of local logical slots that are synchronized from the
428 : * primary server.
429 : */
430 : static List *
431 36 : get_local_synced_slots(void)
432 : {
433 36 : List *local_slots = NIL;
434 :
435 36 : LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
436 :
437 396 : for (int i = 0; i < max_replication_slots; i++)
438 : {
439 360 : ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
440 :
441 : /* Check if it is a synchronized slot */
442 360 : if (s->in_use && s->data.synced)
443 : {
444 : Assert(SlotIsLogical(s));
445 54 : local_slots = lappend(local_slots, s);
446 : }
447 : }
448 :
449 36 : LWLockRelease(ReplicationSlotControlLock);
450 :
451 36 : return local_slots;
452 : }
453 :
454 : /*
455 : * Helper function to check if local_slot is required to be retained.
456 : *
457 : * Return false either if local_slot does not exist in the remote_slots list
458 : * or is invalidated while the corresponding remote slot is still valid,
459 : * otherwise true.
460 : */
461 : static bool
462 54 : local_sync_slot_required(ReplicationSlot *local_slot, List *remote_slots)
463 : {
464 54 : bool remote_exists = false;
465 54 : bool locally_invalidated = false;
466 :
467 131 : foreach_ptr(RemoteSlot, remote_slot, remote_slots)
468 : {
469 76 : if (strcmp(remote_slot->name, NameStr(local_slot->data.name)) == 0)
470 : {
471 53 : remote_exists = true;
472 :
473 : /*
474 : * If remote slot is not invalidated but local slot is marked as
475 : * invalidated, then set locally_invalidated flag.
476 : */
477 53 : SpinLockAcquire(&local_slot->mutex);
478 53 : locally_invalidated =
479 106 : (remote_slot->invalidated == RS_INVAL_NONE) &&
480 53 : (local_slot->data.invalidated != RS_INVAL_NONE);
481 53 : SpinLockRelease(&local_slot->mutex);
482 :
483 53 : break;
484 : }
485 : }
486 :
487 54 : return (remote_exists && !locally_invalidated);
488 : }
489 :
490 : /*
491 : * Drop local obsolete slots.
492 : *
493 : * Drop the local slots that no longer need to be synced i.e. these either do
494 : * not exist on the primary or are no longer enabled for failover.
495 : *
496 : * Additionally, drop any slots that are valid on the primary but got
497 : * invalidated on the standby. This situation may occur due to the following
498 : * reasons:
499 : * - The 'max_slot_wal_keep_size' on the standby is insufficient to retain WAL
500 : * records from the restart_lsn of the slot.
501 : * - 'primary_slot_name' is temporarily reset to null and the physical slot is
502 : * removed.
503 : * These dropped slots will get recreated in next sync-cycle and it is okay to
504 : * drop and recreate such slots as long as these are not consumable on the
505 : * standby (which is the case currently).
506 : *
507 : * Note: Change of 'wal_level' on the primary server to a level lower than
508 : * logical may also result in slot invalidation and removal on the standby.
509 : * This is because such 'wal_level' change is only possible if the logical
510 : * slots are removed on the primary server, so it's expected to see the
511 : * slots being invalidated and removed on the standby too (and re-created
512 : * if they are re-created on the primary server).
513 : */
514 : static void
515 36 : drop_local_obsolete_slots(List *remote_slot_list)
516 : {
517 36 : List *local_slots = get_local_synced_slots();
518 :
519 126 : foreach_ptr(ReplicationSlot, local_slot, local_slots)
520 : {
521 : /* Drop the local slot if it is not required to be retained. */
522 54 : if (!local_sync_slot_required(local_slot, remote_slot_list))
523 : {
524 : bool synced_slot;
525 :
526 : /*
527 : * Use shared lock to prevent a conflict with
528 : * ReplicationSlotsDropDBSlots(), trying to drop the same slot
529 : * during a drop-database operation.
530 : */
531 2 : LockSharedObject(DatabaseRelationId, local_slot->data.database,
532 : 0, AccessShareLock);
533 :
534 : /*
535 : * In the small window between getting the slot to drop and
536 : * locking the database, there is a possibility of a parallel
537 : * database drop by the startup process and the creation of a new
538 : * slot by the user. This new user-created slot may end up using
539 : * the same shared memory as that of 'local_slot'. Thus check if
540 : * local_slot is still the synced one before performing actual
541 : * drop.
542 : */
543 2 : SpinLockAcquire(&local_slot->mutex);
544 2 : synced_slot = local_slot->in_use && local_slot->data.synced;
545 2 : SpinLockRelease(&local_slot->mutex);
546 :
547 2 : if (synced_slot)
548 : {
549 2 : ReplicationSlotAcquire(NameStr(local_slot->data.name), true, false);
550 2 : ReplicationSlotDropAcquired();
551 : }
552 :
553 2 : UnlockSharedObject(DatabaseRelationId, local_slot->data.database,
554 : 0, AccessShareLock);
555 :
556 2 : ereport(LOG,
557 : errmsg("dropped replication slot \"%s\" of database with OID %u",
558 : NameStr(local_slot->data.name),
559 : local_slot->data.database));
560 : }
561 : }
562 36 : }
563 :
564 : /*
565 : * Reserve WAL for the currently active local slot using the specified WAL
566 : * location (restart_lsn).
567 : *
568 : * If the given WAL location has been removed or is at risk of removal,
569 : * reserve WAL using the oldest segment that is non-removable.
570 : */
571 : static void
572 8 : reserve_wal_for_local_slot(XLogRecPtr restart_lsn)
573 : {
574 : XLogRecPtr slot_min_lsn;
575 : XLogRecPtr min_safe_lsn;
576 : XLogSegNo segno;
577 8 : ReplicationSlot *slot = MyReplicationSlot;
578 :
579 : Assert(slot != NULL);
580 : Assert(!XLogRecPtrIsValid(slot->data.restart_lsn));
581 :
582 : /*
583 : * Acquire an exclusive lock to prevent the checkpoint process from
584 : * concurrently calculating the minimum slot LSN (see
585 : * CheckPointReplicationSlots), ensuring that if WAL reservation occurs
586 : * first, the checkpoint must wait for the restart_lsn update before
587 : * calculating the minimum LSN.
588 : *
589 : * Note: Unlike ReplicationSlotReserveWal(), this lock does not protect a
590 : * newly synced slot from being invalidated if a concurrent checkpoint has
591 : * invoked CheckPointReplicationSlots() before the WAL reservation here.
592 : * This can happen because the initial restart_lsn received from the
593 : * remote server can precede the redo pointer. Therefore, when selecting
594 : * the initial restart_lsn, we consider using the redo pointer or the
595 : * minimum slot LSN (if those values are greater than the remote
596 : * restart_lsn) instead of relying solely on the remote value.
597 : */
598 8 : LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
599 :
600 : /*
601 : * Determine the minimum non-removable LSN by comparing the redo pointer
602 : * with the minimum slot LSN.
603 : *
604 : * The minimum slot LSN is considered because the redo pointer advances at
605 : * every checkpoint, even when replication slots are present on the
606 : * standby. In such scenarios, the redo pointer can exceed the remote
607 : * restart_lsn, while WALs preceding the remote restart_lsn remain
608 : * protected by a local replication slot.
609 : */
610 8 : min_safe_lsn = GetRedoRecPtr();
611 8 : slot_min_lsn = XLogGetReplicationSlotMinimumLSN();
612 :
613 8 : if (XLogRecPtrIsValid(slot_min_lsn) && min_safe_lsn > slot_min_lsn)
614 0 : min_safe_lsn = slot_min_lsn;
615 :
616 : /*
617 : * If the minimum safe LSN is greater than the given restart_lsn, use it
618 : * as the initial restart_lsn for the newly synced slot. Otherwise, use
619 : * the given remote restart_lsn.
620 : */
621 8 : SpinLockAcquire(&slot->mutex);
622 8 : slot->data.restart_lsn = Max(restart_lsn, min_safe_lsn);
623 8 : SpinLockRelease(&slot->mutex);
624 :
625 8 : ReplicationSlotsComputeRequiredLSN();
626 :
627 8 : XLByteToSeg(slot->data.restart_lsn, segno, wal_segment_size);
628 8 : if (XLogGetLastRemovedSegno() >= segno)
629 0 : elog(ERROR, "WAL required by replication slot %s has been removed concurrently",
630 : NameStr(slot->data.name));
631 :
632 8 : LWLockRelease(ReplicationSlotAllocationLock);
633 8 : }
634 :
635 : /*
636 : * If the remote restart_lsn and catalog_xmin have caught up with the
637 : * local ones, then update the LSNs and persist the local synced slot for
638 : * future synchronization; otherwise, do nothing.
639 : *
640 : * *slot_persistence_pending is set to true if any of the slots fail to
641 : * persist.
642 : *
643 : * Return true if the slot is marked as RS_PERSISTENT (sync-ready), otherwise
644 : * false.
645 : */
646 : static bool
647 14 : update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
648 : bool *slot_persistence_pending)
649 : {
650 14 : ReplicationSlot *slot = MyReplicationSlot;
651 :
652 : /* Slotsync skip stats are handled in function update_local_synced_slot() */
653 14 : (void) update_local_synced_slot(remote_slot, remote_dbid);
654 :
655 : /*
656 : * Check if the slot cannot be synchronized. Refer to the comment atop the
657 : * file for details on this check.
658 : */
659 14 : if (slot->slotsync_skip_reason != SS_SKIP_NONE)
660 : {
661 : /*
662 : * We reach this point when the remote slot didn't catch up to locally
663 : * reserved position, or it cannot reach the consistent point from the
664 : * restart_lsn, or the WAL prior to the remote confirmed flush LSN has
665 : * not been received and flushed.
666 : *
667 : * We do not drop the slot because the restart_lsn and confirmed_lsn
668 : * can be ahead of the current location when recreating the slot in
669 : * the next cycle. It may take more time to create such a slot or
670 : * reach the consistent point. Therefore, we keep this slot and
671 : * attempt the synchronization in the next cycle.
672 : *
673 : * We also update the slot_persistence_pending parameter, so the SQL
674 : * function can retry.
675 : */
676 7 : if (slot_persistence_pending)
677 3 : *slot_persistence_pending = true;
678 :
679 7 : return false;
680 : }
681 :
682 7 : ReplicationSlotPersist();
683 :
684 7 : ereport(LOG,
685 : errmsg("newly created replication slot \"%s\" is sync-ready now",
686 : remote_slot->name));
687 :
688 7 : return true;
689 : }
690 :
691 : /*
692 : * Synchronize a single slot to the given position.
693 : *
694 : * This creates a new slot if there is no existing one and updates the
695 : * metadata of the slot as per the data received from the primary server.
696 : *
697 : * The slot is created as a temporary slot and stays in the same state until the
698 : * remote_slot catches up with locally reserved position and local slot is
699 : * updated. The slot is then persisted and is considered as sync-ready for
700 : * periodic syncs.
701 : *
702 : * *slot_persistence_pending is set to true if any of the slots fail to
703 : * persist.
704 : *
705 : * Returns TRUE if the local slot is updated.
706 : */
707 : static bool
708 60 : synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid,
709 : bool *slot_persistence_pending)
710 : {
711 : ReplicationSlot *slot;
712 60 : bool slot_updated = false;
713 :
714 : /* Search for the named slot */
715 60 : if ((slot = SearchNamedReplicationSlot(remote_slot->name, true)))
716 : {
717 : bool synced;
718 :
719 52 : SpinLockAcquire(&slot->mutex);
720 52 : synced = slot->data.synced;
721 52 : SpinLockRelease(&slot->mutex);
722 :
723 : /* User-created slot with the same name exists, raise ERROR. */
724 52 : if (!synced)
725 0 : ereport(ERROR,
726 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
727 : errmsg("exiting from slot synchronization because same"
728 : " name slot \"%s\" already exists on the standby",
729 : remote_slot->name));
730 :
731 : /*
732 : * The slot has been synchronized before.
733 : *
734 : * It is important to acquire the slot here before checking
735 : * invalidation. If we don't acquire the slot first, there could be a
736 : * race condition that the local slot could be invalidated just after
737 : * checking the 'invalidated' flag here and we could end up
738 : * overwriting 'invalidated' flag to remote_slot's value. See
739 : * InvalidatePossiblyObsoleteSlot() where it invalidates slot directly
740 : * if the slot is not acquired by other processes.
741 : *
742 : * XXX: If it ever turns out that slot acquire/release is costly for
743 : * cases when none of the slot properties is changed then we can do a
744 : * pre-check to ensure that at least one of the slot properties is
745 : * changed before acquiring the slot.
746 : */
747 52 : ReplicationSlotAcquire(remote_slot->name, true, false);
748 :
749 : Assert(slot == MyReplicationSlot);
750 :
751 : /*
752 : * Copy the invalidation cause from remote only if local slot is not
753 : * invalidated locally, we don't want to overwrite existing one.
754 : */
755 52 : if (slot->data.invalidated == RS_INVAL_NONE &&
756 52 : remote_slot->invalidated != RS_INVAL_NONE)
757 : {
758 0 : SpinLockAcquire(&slot->mutex);
759 0 : slot->data.invalidated = remote_slot->invalidated;
760 0 : SpinLockRelease(&slot->mutex);
761 :
762 : /* Make sure the invalidated state persists across server restart */
763 0 : ReplicationSlotMarkDirty();
764 0 : ReplicationSlotSave();
765 :
766 0 : slot_updated = true;
767 : }
768 :
769 : /* Skip the sync of an invalidated slot */
770 52 : if (slot->data.invalidated != RS_INVAL_NONE)
771 : {
772 0 : update_slotsync_skip_stats(SS_SKIP_INVALID);
773 :
774 0 : ReplicationSlotRelease();
775 0 : return slot_updated;
776 : }
777 :
778 : /* Slot not ready yet, let's attempt to make it sync-ready now. */
779 52 : if (slot->data.persistency == RS_TEMPORARY)
780 : {
781 6 : slot_updated = update_and_persist_local_synced_slot(remote_slot,
782 : remote_dbid,
783 : slot_persistence_pending);
784 : }
785 :
786 : /* Slot ready for sync, so sync it. */
787 : else
788 : {
789 : /*
790 : * Sanity check: As long as the invalidations are handled
791 : * appropriately as above, this should never happen.
792 : *
793 : * We don't need to check restart_lsn here. See the comments in
794 : * update_local_synced_slot() for details.
795 : */
796 46 : if (remote_slot->confirmed_lsn < slot->data.confirmed_flush)
797 0 : ereport(ERROR,
798 : errmsg_internal("cannot synchronize local slot \"%s\"",
799 : remote_slot->name),
800 : errdetail_internal("Local slot's start streaming location LSN(%X/%08X) is ahead of remote slot's LSN(%X/%08X).",
801 : LSN_FORMAT_ARGS(slot->data.confirmed_flush),
802 : LSN_FORMAT_ARGS(remote_slot->confirmed_lsn)));
803 :
804 46 : slot_updated = update_local_synced_slot(remote_slot, remote_dbid);
805 : }
806 : }
807 : /* Otherwise create the slot first. */
808 : else
809 : {
810 : NameData plugin_name;
811 8 : TransactionId xmin_horizon = InvalidTransactionId;
812 :
813 : /* Skip creating the local slot if remote_slot is invalidated already */
814 8 : if (remote_slot->invalidated != RS_INVAL_NONE)
815 0 : return false;
816 :
817 : /*
818 : * We create temporary slots instead of ephemeral slots here because
819 : * we want the slots to survive after releasing them. This is done to
820 : * avoid dropping and re-creating the slots in each synchronization
821 : * cycle if the restart_lsn or catalog_xmin of the remote slot has not
822 : * caught up.
823 : */
824 8 : ReplicationSlotCreate(remote_slot->name, true, RS_TEMPORARY,
825 8 : remote_slot->two_phase,
826 8 : remote_slot->failover,
827 : true);
828 :
829 : /* For shorter lines. */
830 8 : slot = MyReplicationSlot;
831 :
832 : /* Avoid expensive operations while holding a spinlock. */
833 8 : namestrcpy(&plugin_name, remote_slot->plugin);
834 :
835 8 : SpinLockAcquire(&slot->mutex);
836 8 : slot->data.database = remote_dbid;
837 8 : slot->data.plugin = plugin_name;
838 8 : SpinLockRelease(&slot->mutex);
839 :
840 8 : reserve_wal_for_local_slot(remote_slot->restart_lsn);
841 :
842 8 : LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
843 8 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
844 8 : xmin_horizon = GetOldestSafeDecodingTransactionId(true);
845 8 : SpinLockAcquire(&slot->mutex);
846 8 : slot->effective_catalog_xmin = xmin_horizon;
847 8 : slot->data.catalog_xmin = xmin_horizon;
848 8 : SpinLockRelease(&slot->mutex);
849 8 : ReplicationSlotsComputeRequiredXmin(true);
850 8 : LWLockRelease(ProcArrayLock);
851 8 : LWLockRelease(ReplicationSlotControlLock);
852 :
853 8 : update_and_persist_local_synced_slot(remote_slot, remote_dbid,
854 : slot_persistence_pending);
855 :
856 8 : slot_updated = true;
857 : }
858 :
859 60 : ReplicationSlotRelease();
860 :
861 60 : return slot_updated;
862 : }
863 :
864 : /*
865 : * Fetch remote slots.
866 : *
867 : * If slot_names is NIL, fetches all failover logical slots from the
868 : * primary server, otherwise fetches only the ones with names in slot_names.
869 : *
870 : * Returns a list of remote slot information structures, or NIL if none
871 : * are found.
872 : */
873 : static List *
874 38 : fetch_remote_slots(WalReceiverConn *wrconn, List *slot_names)
875 : {
876 : #define SLOTSYNC_COLUMN_COUNT 10
877 38 : Oid slotRow[SLOTSYNC_COLUMN_COUNT] = {TEXTOID, TEXTOID, LSNOID,
878 : LSNOID, XIDOID, BOOLOID, LSNOID, BOOLOID, TEXTOID, TEXTOID};
879 :
880 : WalRcvExecResult *res;
881 : TupleTableSlot *tupslot;
882 38 : List *remote_slot_list = NIL;
883 : StringInfoData query;
884 :
885 38 : initStringInfo(&query);
886 38 : appendStringInfoString(&query,
887 : "SELECT slot_name, plugin, confirmed_flush_lsn,"
888 : " restart_lsn, catalog_xmin, two_phase,"
889 : " two_phase_at, failover,"
890 : " database, invalidation_reason"
891 : " FROM pg_catalog.pg_replication_slots"
892 : " WHERE failover and NOT temporary");
893 :
894 38 : if (slot_names != NIL)
895 : {
896 3 : bool first_slot = true;
897 :
898 : /*
899 : * Construct the query to fetch only the specified slots
900 : */
901 3 : appendStringInfoString(&query, " AND slot_name IN (");
902 :
903 9 : foreach_ptr(char, slot_name, slot_names)
904 : {
905 3 : if (!first_slot)
906 0 : appendStringInfoString(&query, ", ");
907 :
908 3 : appendStringInfo(&query, "%s", quote_literal_cstr(slot_name));
909 3 : first_slot = false;
910 : }
911 3 : appendStringInfoChar(&query, ')');
912 : }
913 :
914 : /* Execute the query */
915 38 : res = walrcv_exec(wrconn, query.data, SLOTSYNC_COLUMN_COUNT, slotRow);
916 38 : pfree(query.data);
917 38 : if (res->status != WALRCV_OK_TUPLES)
918 2 : ereport(ERROR,
919 : errmsg("could not fetch failover logical slots info from the primary server: %s",
920 : res->err));
921 :
922 36 : tupslot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple);
923 96 : while (tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
924 : {
925 : bool isnull;
926 60 : RemoteSlot *remote_slot = palloc0_object(RemoteSlot);
927 : Datum d;
928 60 : int col = 0;
929 :
930 60 : remote_slot->name = TextDatumGetCString(slot_getattr(tupslot, ++col,
931 : &isnull));
932 : Assert(!isnull);
933 :
934 60 : remote_slot->plugin = TextDatumGetCString(slot_getattr(tupslot, ++col,
935 : &isnull));
936 : Assert(!isnull);
937 :
938 : /*
939 : * It is possible to get null values for LSN and Xmin if slot is
940 : * invalidated on the primary server, so handle accordingly.
941 : */
942 60 : d = slot_getattr(tupslot, ++col, &isnull);
943 60 : remote_slot->confirmed_lsn = isnull ? InvalidXLogRecPtr :
944 60 : DatumGetLSN(d);
945 :
946 60 : d = slot_getattr(tupslot, ++col, &isnull);
947 60 : remote_slot->restart_lsn = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
948 :
949 60 : d = slot_getattr(tupslot, ++col, &isnull);
950 60 : remote_slot->catalog_xmin = isnull ? InvalidTransactionId :
951 60 : DatumGetTransactionId(d);
952 :
953 60 : remote_slot->two_phase = DatumGetBool(slot_getattr(tupslot, ++col,
954 : &isnull));
955 : Assert(!isnull);
956 :
957 60 : d = slot_getattr(tupslot, ++col, &isnull);
958 60 : remote_slot->two_phase_at = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
959 :
960 60 : remote_slot->failover = DatumGetBool(slot_getattr(tupslot, ++col,
961 : &isnull));
962 : Assert(!isnull);
963 :
964 60 : remote_slot->database = TextDatumGetCString(slot_getattr(tupslot,
965 : ++col, &isnull));
966 : Assert(!isnull);
967 :
968 60 : d = slot_getattr(tupslot, ++col, &isnull);
969 60 : remote_slot->invalidated = isnull ? RS_INVAL_NONE :
970 0 : GetSlotInvalidationCause(TextDatumGetCString(d));
971 :
972 : /* Sanity check */
973 : Assert(col == SLOTSYNC_COLUMN_COUNT);
974 :
975 : /*
976 : * If restart_lsn, confirmed_lsn or catalog_xmin is invalid but the
977 : * slot is valid, that means we have fetched the remote_slot in its
978 : * RS_EPHEMERAL state. In such a case, don't sync it; we can always
979 : * sync it in the next sync cycle when the remote_slot is persisted
980 : * and has valid lsn(s) and xmin values.
981 : *
982 : * XXX: In future, if we plan to expose 'slot->data.persistency' in
983 : * pg_replication_slots view, then we can avoid fetching RS_EPHEMERAL
984 : * slots in the first place.
985 : */
986 60 : if ((!XLogRecPtrIsValid(remote_slot->restart_lsn) ||
987 60 : !XLogRecPtrIsValid(remote_slot->confirmed_lsn) ||
988 60 : !TransactionIdIsValid(remote_slot->catalog_xmin)) &&
989 0 : remote_slot->invalidated == RS_INVAL_NONE)
990 0 : pfree(remote_slot);
991 : else
992 : /* Create list of remote slots */
993 60 : remote_slot_list = lappend(remote_slot_list, remote_slot);
994 :
995 60 : ExecClearTuple(tupslot);
996 : }
997 :
998 36 : walrcv_clear_result(res);
999 :
1000 36 : return remote_slot_list;
1001 : }
1002 :
1003 : /*
1004 : * Synchronize slots.
1005 : *
1006 : * This function takes a list of remote slots and synchronizes them locally. It
1007 : * creates the slots if not present on the standby and updates existing ones.
1008 : *
1009 : * If slot_persistence_pending is not NULL, it will be set to true if one or
1010 : * more slots could not be persisted. This allows callers such as
1011 : * SyncReplicationSlots() to retry those slots.
1012 : *
1013 : * Returns TRUE if any of the slots gets updated in this sync-cycle.
1014 : */
1015 : static bool
1016 36 : synchronize_slots(WalReceiverConn *wrconn, List *remote_slot_list,
1017 : bool *slot_persistence_pending)
1018 : {
1019 36 : bool some_slot_updated = false;
1020 :
1021 : /* Drop local slots that no longer need to be synced. */
1022 36 : drop_local_obsolete_slots(remote_slot_list);
1023 :
1024 : /* Now sync the slots locally */
1025 132 : foreach_ptr(RemoteSlot, remote_slot, remote_slot_list)
1026 : {
1027 60 : Oid remote_dbid = get_database_oid(remote_slot->database, false);
1028 :
1029 : /*
1030 : * Use shared lock to prevent a conflict with
1031 : * ReplicationSlotsDropDBSlots(), trying to drop the same slot during
1032 : * a drop-database operation.
1033 : */
1034 60 : LockSharedObject(DatabaseRelationId, remote_dbid, 0, AccessShareLock);
1035 :
1036 60 : some_slot_updated |= synchronize_one_slot(remote_slot, remote_dbid,
1037 : slot_persistence_pending);
1038 :
1039 60 : UnlockSharedObject(DatabaseRelationId, remote_dbid, 0, AccessShareLock);
1040 : }
1041 :
1042 36 : return some_slot_updated;
1043 : }
1044 :
1045 : /*
1046 : * Checks the remote server info.
1047 : *
1048 : * We ensure that the 'primary_slot_name' exists on the remote server and the
1049 : * remote server is not a standby node.
1050 : */
1051 : static void
1052 15 : validate_remote_info(WalReceiverConn *wrconn)
1053 : {
1054 : #define PRIMARY_INFO_OUTPUT_COL_COUNT 2
1055 : WalRcvExecResult *res;
1056 15 : Oid slotRow[PRIMARY_INFO_OUTPUT_COL_COUNT] = {BOOLOID, BOOLOID};
1057 : StringInfoData cmd;
1058 : bool isnull;
1059 : TupleTableSlot *tupslot;
1060 : bool remote_in_recovery;
1061 : bool primary_slot_valid;
1062 15 : bool started_tx = false;
1063 :
1064 15 : initStringInfo(&cmd);
1065 15 : appendStringInfo(&cmd,
1066 : "SELECT pg_is_in_recovery(), count(*) = 1"
1067 : " FROM pg_catalog.pg_replication_slots"
1068 : " WHERE slot_type='physical' AND slot_name=%s",
1069 : quote_literal_cstr(PrimarySlotName));
1070 :
1071 : /* The syscache access in walrcv_exec() needs a transaction env. */
1072 15 : if (!IsTransactionState())
1073 : {
1074 6 : StartTransactionCommand();
1075 6 : started_tx = true;
1076 : }
1077 :
1078 15 : res = walrcv_exec(wrconn, cmd.data, PRIMARY_INFO_OUTPUT_COL_COUNT, slotRow);
1079 15 : pfree(cmd.data);
1080 :
1081 15 : if (res->status != WALRCV_OK_TUPLES)
1082 0 : ereport(ERROR,
1083 : errmsg("could not fetch primary slot name \"%s\" info from the primary server: %s",
1084 : PrimarySlotName, res->err),
1085 : errhint("Check if \"primary_slot_name\" is configured correctly."));
1086 :
1087 15 : tupslot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple);
1088 15 : if (!tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
1089 0 : elog(ERROR,
1090 : "failed to fetch tuple for the primary server slot specified by \"primary_slot_name\"");
1091 :
1092 15 : remote_in_recovery = DatumGetBool(slot_getattr(tupslot, 1, &isnull));
1093 : Assert(!isnull);
1094 :
1095 : /*
1096 : * Slot sync is currently not supported on a cascading standby. This is
1097 : * because if we allow it, the primary server needs to wait for all the
1098 : * cascading standbys, otherwise, logical subscribers can still be ahead
1099 : * of one of the cascading standbys which we plan to promote. Thus, to
1100 : * avoid this additional complexity, we restrict it for the time being.
1101 : */
1102 15 : if (remote_in_recovery)
1103 1 : ereport(ERROR,
1104 : errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1105 : errmsg("cannot synchronize replication slots from a standby server"));
1106 :
1107 14 : primary_slot_valid = DatumGetBool(slot_getattr(tupslot, 2, &isnull));
1108 : Assert(!isnull);
1109 :
1110 14 : if (!primary_slot_valid)
1111 0 : ereport(ERROR,
1112 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1113 : /* translator: second %s is a GUC variable name */
1114 : errmsg("replication slot \"%s\" specified by \"%s\" does not exist on primary server",
1115 : PrimarySlotName, "primary_slot_name"));
1116 :
1117 14 : ExecClearTuple(tupslot);
1118 14 : walrcv_clear_result(res);
1119 :
1120 14 : if (started_tx)
1121 6 : CommitTransactionCommand();
1122 14 : }
1123 :
1124 : /*
1125 : * Checks if dbname is specified in 'primary_conninfo'.
1126 : *
1127 : * Error out if not specified otherwise return it.
1128 : */
1129 : char *
1130 16 : CheckAndGetDbnameFromConninfo(void)
1131 : {
1132 : char *dbname;
1133 :
1134 : /*
1135 : * The slot synchronization needs a database connection for walrcv_exec to
1136 : * work.
1137 : */
1138 16 : dbname = walrcv_get_dbname_from_conninfo(PrimaryConnInfo);
1139 16 : if (dbname == NULL)
1140 1 : ereport(ERROR,
1141 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1142 :
1143 : /*
1144 : * translator: first %s is a connection option; second %s is a GUC
1145 : * variable name
1146 : */
1147 : errmsg("replication slot synchronization requires \"%s\" to be specified in \"%s\"",
1148 : "dbname", "primary_conninfo"));
1149 15 : return dbname;
1150 : }
1151 :
1152 : /*
1153 : * Return true if all necessary GUCs for slot synchronization are set
1154 : * appropriately, otherwise, return false.
1155 : */
1156 : bool
1157 27 : ValidateSlotSyncParams(int elevel)
1158 : {
1159 : /*
1160 : * Logical slot sync/creation requires logical decoding to be enabled.
1161 : */
1162 27 : if (!IsLogicalDecodingEnabled())
1163 : {
1164 0 : ereport(elevel,
1165 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1166 : errmsg("replication slot synchronization requires \"effective_wal_level\" >= \"logical\" on the primary"),
1167 : errhint("To enable logical decoding on primary, set \"wal_level\" >= \"logical\" or create at least one logical slot when \"wal_level\" = \"replica\"."));
1168 :
1169 0 : return false;
1170 : }
1171 :
1172 : /*
1173 : * A physical replication slot(primary_slot_name) is required on the
1174 : * primary to ensure that the rows needed by the standby are not removed
1175 : * after restarting, so that the synchronized slot on the standby will not
1176 : * be invalidated.
1177 : */
1178 27 : if (PrimarySlotName == NULL || *PrimarySlotName == '\0')
1179 : {
1180 0 : ereport(elevel,
1181 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1182 : /* translator: %s is a GUC variable name */
1183 : errmsg("replication slot synchronization requires \"%s\" to be set", "primary_slot_name"));
1184 0 : return false;
1185 : }
1186 :
1187 : /*
1188 : * hot_standby_feedback must be enabled to cooperate with the physical
1189 : * replication slot, which allows informing the primary about the xmin and
1190 : * catalog_xmin values on the standby.
1191 : */
1192 27 : if (!hot_standby_feedback)
1193 : {
1194 1 : ereport(elevel,
1195 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1196 : /* translator: %s is a GUC variable name */
1197 : errmsg("replication slot synchronization requires \"%s\" to be enabled",
1198 : "hot_standby_feedback"));
1199 1 : return false;
1200 : }
1201 :
1202 : /*
1203 : * The primary_conninfo is required to make connection to primary for
1204 : * getting slots information.
1205 : */
1206 26 : if (PrimaryConnInfo == NULL || *PrimaryConnInfo == '\0')
1207 : {
1208 0 : ereport(elevel,
1209 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1210 : /* translator: %s is a GUC variable name */
1211 : errmsg("replication slot synchronization requires \"%s\" to be set",
1212 : "primary_conninfo"));
1213 0 : return false;
1214 : }
1215 :
1216 26 : return true;
1217 : }
1218 :
1219 : /*
1220 : * Re-read the config file for slot synchronization.
1221 : *
1222 : * Exit or throw error if relevant GUCs have changed depending on whether
1223 : * called from slot sync worker or from the SQL function pg_sync_replication_slots()
1224 : */
1225 : static void
1226 1 : slotsync_reread_config(void)
1227 : {
1228 1 : char *old_primary_conninfo = pstrdup(PrimaryConnInfo);
1229 1 : char *old_primary_slotname = pstrdup(PrimarySlotName);
1230 1 : bool old_sync_replication_slots = sync_replication_slots;
1231 1 : bool old_hot_standby_feedback = hot_standby_feedback;
1232 : bool conninfo_changed;
1233 : bool primary_slotname_changed;
1234 1 : bool is_slotsync_worker = AmLogicalSlotSyncWorkerProcess();
1235 1 : bool parameter_changed = false;
1236 :
1237 : if (is_slotsync_worker)
1238 : Assert(sync_replication_slots);
1239 :
1240 1 : ConfigReloadPending = false;
1241 1 : ProcessConfigFile(PGC_SIGHUP);
1242 :
1243 1 : conninfo_changed = strcmp(old_primary_conninfo, PrimaryConnInfo) != 0;
1244 1 : primary_slotname_changed = strcmp(old_primary_slotname, PrimarySlotName) != 0;
1245 1 : pfree(old_primary_conninfo);
1246 1 : pfree(old_primary_slotname);
1247 :
1248 1 : if (old_sync_replication_slots != sync_replication_slots)
1249 : {
1250 0 : if (is_slotsync_worker)
1251 : {
1252 0 : ereport(LOG,
1253 : /* translator: %s is a GUC variable name */
1254 : errmsg("replication slot synchronization worker will stop because \"%s\" is disabled",
1255 : "sync_replication_slots"));
1256 :
1257 0 : proc_exit(0);
1258 : }
1259 :
1260 0 : parameter_changed = true;
1261 : }
1262 : else
1263 : {
1264 1 : if (conninfo_changed ||
1265 1 : primary_slotname_changed ||
1266 1 : (old_hot_standby_feedback != hot_standby_feedback))
1267 : {
1268 :
1269 1 : if (is_slotsync_worker)
1270 : {
1271 1 : ereport(LOG,
1272 : errmsg("replication slot synchronization worker will restart because of a parameter change"));
1273 :
1274 : /*
1275 : * Reset the last-start time for this worker so that the
1276 : * postmaster can restart it without waiting for
1277 : * SLOTSYNC_RESTART_INTERVAL_SEC.
1278 : */
1279 1 : SlotSyncCtx->last_start_time = 0;
1280 :
1281 1 : proc_exit(0);
1282 : }
1283 :
1284 0 : parameter_changed = true;
1285 : }
1286 : }
1287 :
1288 : /*
1289 : * If we have reached here with a parameter change, we must be running in
1290 : * SQL function, emit error in such a case.
1291 : */
1292 0 : if (parameter_changed)
1293 : {
1294 : Assert(!is_slotsync_worker);
1295 0 : ereport(ERROR,
1296 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1297 : errmsg("replication slot synchronization will stop because of a parameter change"));
1298 : }
1299 :
1300 0 : }
1301 :
1302 : /*
1303 : * Interrupt handler for process performing slot synchronization.
1304 : */
1305 : static void
1306 51 : ProcessSlotSyncInterrupts(void)
1307 : {
1308 51 : CHECK_FOR_INTERRUPTS();
1309 :
1310 49 : if (SlotSyncCtx->stopSignaled)
1311 : {
1312 1 : if (AmLogicalSlotSyncWorkerProcess())
1313 : {
1314 1 : ereport(LOG,
1315 : errmsg("replication slot synchronization worker will stop because promotion is triggered"));
1316 :
1317 1 : proc_exit(0);
1318 : }
1319 : else
1320 : {
1321 : /*
1322 : * For the backend executing SQL function
1323 : * pg_sync_replication_slots().
1324 : */
1325 0 : ereport(ERROR,
1326 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1327 : errmsg("replication slot synchronization will stop because promotion is triggered"));
1328 : }
1329 : }
1330 :
1331 48 : if (ConfigReloadPending)
1332 1 : slotsync_reread_config();
1333 47 : }
1334 :
1335 : /*
1336 : * Connection cleanup function for slotsync worker.
1337 : *
1338 : * Called on slotsync worker exit.
1339 : */
1340 : static void
1341 6 : slotsync_worker_disconnect(int code, Datum arg)
1342 : {
1343 6 : WalReceiverConn *wrconn = (WalReceiverConn *) DatumGetPointer(arg);
1344 :
1345 6 : walrcv_disconnect(wrconn);
1346 6 : }
1347 :
1348 : /*
1349 : * Cleanup function for slotsync worker.
1350 : *
1351 : * Called on slotsync worker exit.
1352 : */
1353 : static void
1354 6 : slotsync_worker_onexit(int code, Datum arg)
1355 : {
1356 : /*
1357 : * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1358 : *
1359 : * The startup process during promotion invokes ShutDownSlotSync() which
1360 : * waits for slot sync to finish and it does that by checking the
1361 : * 'syncing' flag. Thus the slot sync worker must be done with slots'
1362 : * release and cleanup to avoid any dangling temporary slots or active
1363 : * slots before it marks itself as finished syncing.
1364 : */
1365 :
1366 : /* Make sure active replication slots are released */
1367 6 : if (MyReplicationSlot != NULL)
1368 0 : ReplicationSlotRelease();
1369 :
1370 : /* Also cleanup the temporary slots. */
1371 6 : ReplicationSlotCleanup(false);
1372 :
1373 6 : SpinLockAcquire(&SlotSyncCtx->mutex);
1374 :
1375 6 : SlotSyncCtx->pid = InvalidPid;
1376 :
1377 : /*
1378 : * If syncing_slots is true, it indicates that the process errored out
1379 : * without resetting the flag. So, we need to clean up shared memory and
1380 : * reset the flag here.
1381 : */
1382 6 : if (syncing_slots)
1383 : {
1384 6 : SlotSyncCtx->syncing = false;
1385 6 : syncing_slots = false;
1386 : }
1387 :
1388 6 : SpinLockRelease(&SlotSyncCtx->mutex);
1389 6 : }
1390 :
1391 : /*
1392 : * Sleep for long enough that we believe it's likely that the slots on primary
1393 : * get updated.
1394 : *
1395 : * If there is no slot activity the wait time between sync-cycles will double
1396 : * (to a maximum of 30s). If there is some slot activity the wait time between
1397 : * sync-cycles is reset to the minimum (200ms).
1398 : */
1399 : static void
1400 28 : wait_for_slot_activity(bool some_slot_updated)
1401 : {
1402 : int rc;
1403 :
1404 28 : if (!some_slot_updated)
1405 : {
1406 : /*
1407 : * No slots were updated, so double the sleep time, but not beyond the
1408 : * maximum allowable value.
1409 : */
1410 17 : sleep_ms = Min(sleep_ms * 2, MAX_SLOTSYNC_WORKER_NAPTIME_MS);
1411 : }
1412 : else
1413 : {
1414 : /*
1415 : * Some slots were updated since the last sleep, so reset the sleep
1416 : * time.
1417 : */
1418 11 : sleep_ms = MIN_SLOTSYNC_WORKER_NAPTIME_MS;
1419 : }
1420 :
1421 28 : rc = WaitLatch(MyLatch,
1422 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
1423 : sleep_ms,
1424 : WAIT_EVENT_REPLICATION_SLOTSYNC_MAIN);
1425 :
1426 28 : if (rc & WL_LATCH_SET)
1427 5 : ResetLatch(MyLatch);
1428 28 : }
1429 :
1430 : /*
1431 : * Emit an error if a concurrent sync call is in progress.
1432 : * Otherwise, advertise that a sync is in progress.
1433 : */
1434 : static void
1435 15 : check_and_set_sync_info(pid_t sync_process_pid)
1436 : {
1437 15 : SpinLockAcquire(&SlotSyncCtx->mutex);
1438 :
1439 15 : if (SlotSyncCtx->syncing)
1440 : {
1441 0 : SpinLockRelease(&SlotSyncCtx->mutex);
1442 0 : ereport(ERROR,
1443 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1444 : errmsg("cannot synchronize replication slots concurrently"));
1445 : }
1446 :
1447 : /* The pid must not be already assigned in SlotSyncCtx */
1448 : Assert(SlotSyncCtx->pid == InvalidPid);
1449 :
1450 15 : SlotSyncCtx->syncing = true;
1451 :
1452 : /*
1453 : * Advertise the required PID so that the startup process can kill the
1454 : * slot sync process on promotion.
1455 : */
1456 15 : SlotSyncCtx->pid = sync_process_pid;
1457 :
1458 15 : SpinLockRelease(&SlotSyncCtx->mutex);
1459 :
1460 15 : syncing_slots = true;
1461 15 : }
1462 :
1463 : /*
1464 : * Reset syncing flag.
1465 : */
1466 : static void
1467 9 : reset_syncing_flag(void)
1468 : {
1469 9 : SpinLockAcquire(&SlotSyncCtx->mutex);
1470 9 : SlotSyncCtx->syncing = false;
1471 9 : SlotSyncCtx->pid = InvalidPid;
1472 9 : SpinLockRelease(&SlotSyncCtx->mutex);
1473 :
1474 9 : syncing_slots = false;
1475 9 : }
1476 :
1477 : /*
1478 : * The main loop of our worker process.
1479 : *
1480 : * It connects to the primary server, fetches logical failover slots
1481 : * information periodically in order to create and sync the slots.
1482 : *
1483 : * Note: If any changes are made here, check if the corresponding SQL
1484 : * function logic in SyncReplicationSlots() also needs to be changed.
1485 : */
1486 : void
1487 6 : ReplSlotSyncWorkerMain(const void *startup_data, size_t startup_data_len)
1488 : {
1489 6 : WalReceiverConn *wrconn = NULL;
1490 : char *dbname;
1491 : char *err;
1492 : sigjmp_buf local_sigjmp_buf;
1493 : StringInfoData app_name;
1494 :
1495 : Assert(startup_data_len == 0);
1496 :
1497 6 : init_ps_display(NULL);
1498 :
1499 : Assert(GetProcessingMode() == InitProcessing);
1500 :
1501 : /*
1502 : * Create a per-backend PGPROC struct in shared memory. We must do this
1503 : * before we access any shared memory.
1504 : */
1505 6 : InitProcess();
1506 :
1507 : /*
1508 : * Early initialization.
1509 : */
1510 6 : BaseInit();
1511 :
1512 : Assert(SlotSyncCtx != NULL);
1513 :
1514 : /*
1515 : * If an exception is encountered, processing resumes here.
1516 : *
1517 : * We just need to clean up, report the error, and go away.
1518 : *
1519 : * If we do not have this handling here, then since this worker process
1520 : * operates at the bottom of the exception stack, ERRORs turn into FATALs.
1521 : * Therefore, we create our own exception handler to catch ERRORs.
1522 : */
1523 6 : if (sigsetjmp(local_sigjmp_buf, 1) != 0)
1524 : {
1525 : /* since not using PG_TRY, must reset error stack by hand */
1526 2 : error_context_stack = NULL;
1527 :
1528 : /* Prevents interrupts while cleaning up */
1529 2 : HOLD_INTERRUPTS();
1530 :
1531 : /* Report the error to the server log */
1532 2 : EmitErrorReport();
1533 :
1534 : /*
1535 : * We can now go away. Note that because we called InitProcess, a
1536 : * callback was registered to do ProcKill, which will clean up
1537 : * necessary state.
1538 : */
1539 2 : proc_exit(0);
1540 : }
1541 :
1542 : /* We can now handle ereport(ERROR) */
1543 6 : PG_exception_stack = &local_sigjmp_buf;
1544 :
1545 : /* Setup signal handling */
1546 6 : pqsignal(SIGHUP, SignalHandlerForConfigReload);
1547 6 : pqsignal(SIGINT, StatementCancelHandler);
1548 6 : pqsignal(SIGTERM, die);
1549 6 : pqsignal(SIGFPE, FloatExceptionHandler);
1550 6 : pqsignal(SIGUSR1, procsignal_sigusr1_handler);
1551 6 : pqsignal(SIGUSR2, SIG_IGN);
1552 6 : pqsignal(SIGPIPE, SIG_IGN);
1553 6 : pqsignal(SIGCHLD, SIG_DFL);
1554 :
1555 6 : check_and_set_sync_info(MyProcPid);
1556 :
1557 6 : ereport(LOG, errmsg("slot sync worker started"));
1558 :
1559 : /* Register it as soon as SlotSyncCtx->pid is initialized. */
1560 6 : before_shmem_exit(slotsync_worker_onexit, (Datum) 0);
1561 :
1562 : /*
1563 : * Establishes SIGALRM handler and initialize timeout module. It is needed
1564 : * by InitPostgres to register different timeouts.
1565 : */
1566 6 : InitializeTimeouts();
1567 :
1568 : /* Load the libpq-specific functions */
1569 6 : load_file("libpqwalreceiver", false);
1570 :
1571 : /*
1572 : * Unblock signals (they were blocked when the postmaster forked us)
1573 : */
1574 6 : sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
1575 :
1576 : /*
1577 : * Set always-secure search path, so malicious users can't redirect user
1578 : * code (e.g. operators).
1579 : *
1580 : * It's not strictly necessary since we won't be scanning or writing to
1581 : * any user table locally, but it's good to retain it here for added
1582 : * precaution.
1583 : */
1584 6 : SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
1585 :
1586 6 : dbname = CheckAndGetDbnameFromConninfo();
1587 :
1588 : /*
1589 : * Connect to the database specified by the user in primary_conninfo. We
1590 : * need a database connection for walrcv_exec to work which we use to
1591 : * fetch slot information from the remote node. See comments atop
1592 : * libpqrcv_exec.
1593 : *
1594 : * We do not specify a specific user here since the slot sync worker will
1595 : * operate as a superuser. This is safe because the slot sync worker does
1596 : * not interact with user tables, eliminating the risk of executing
1597 : * arbitrary code within triggers.
1598 : */
1599 6 : InitPostgres(dbname, InvalidOid, NULL, InvalidOid, 0, NULL);
1600 :
1601 6 : SetProcessingMode(NormalProcessing);
1602 :
1603 6 : initStringInfo(&app_name);
1604 6 : if (cluster_name[0])
1605 6 : appendStringInfo(&app_name, "%s_%s", cluster_name, "slotsync worker");
1606 : else
1607 0 : appendStringInfoString(&app_name, "slotsync worker");
1608 :
1609 : /*
1610 : * Establish the connection to the primary server for slot
1611 : * synchronization.
1612 : */
1613 6 : wrconn = walrcv_connect(PrimaryConnInfo, false, false, false,
1614 : app_name.data, &err);
1615 :
1616 6 : if (!wrconn)
1617 0 : ereport(ERROR,
1618 : errcode(ERRCODE_CONNECTION_FAILURE),
1619 : errmsg("synchronization worker \"%s\" could not connect to the primary server: %s",
1620 : app_name.data, err));
1621 :
1622 6 : pfree(app_name.data);
1623 :
1624 : /*
1625 : * Register the disconnection callback.
1626 : *
1627 : * XXX: This can be combined with previous cleanup registration of
1628 : * slotsync_worker_onexit() but that will need the connection to be made
1629 : * global and we want to avoid introducing global for this purpose.
1630 : */
1631 6 : before_shmem_exit(slotsync_worker_disconnect, PointerGetDatum(wrconn));
1632 :
1633 : /*
1634 : * Using the specified primary server connection, check that we are not a
1635 : * cascading standby and slot configured in 'primary_slot_name' exists on
1636 : * the primary server.
1637 : */
1638 6 : validate_remote_info(wrconn);
1639 :
1640 : /* Main loop to synchronize slots */
1641 : for (;;)
1642 25 : {
1643 31 : bool some_slot_updated = false;
1644 31 : bool started_tx = false;
1645 : List *remote_slots;
1646 :
1647 31 : ProcessSlotSyncInterrupts();
1648 :
1649 : /*
1650 : * The syscache access in fetch_remote_slots() needs a transaction
1651 : * env.
1652 : */
1653 27 : if (!IsTransactionState())
1654 : {
1655 27 : StartTransactionCommand();
1656 27 : started_tx = true;
1657 : }
1658 :
1659 27 : remote_slots = fetch_remote_slots(wrconn, NIL);
1660 25 : some_slot_updated = synchronize_slots(wrconn, remote_slots, NULL);
1661 25 : list_free_deep(remote_slots);
1662 :
1663 25 : if (started_tx)
1664 25 : CommitTransactionCommand();
1665 :
1666 25 : wait_for_slot_activity(some_slot_updated);
1667 : }
1668 :
1669 : /*
1670 : * The slot sync worker can't get here because it will only stop when it
1671 : * receives a stop request from the startup process, or when there is an
1672 : * error.
1673 : */
1674 : Assert(false);
1675 : }
1676 :
1677 : /*
1678 : * Update the inactive_since property for synced slots.
1679 : *
1680 : * Note that this function is currently called when we shutdown the slot
1681 : * sync machinery.
1682 : */
1683 : static void
1684 1005 : update_synced_slots_inactive_since(void)
1685 : {
1686 1005 : TimestampTz now = 0;
1687 :
1688 : /*
1689 : * We need to update inactive_since only when we are promoting standby to
1690 : * correctly interpret the inactive_since if the standby gets promoted
1691 : * without a restart. We don't want the slots to appear inactive for a
1692 : * long time after promotion if they haven't been synchronized recently.
1693 : * Whoever acquires the slot, i.e., makes the slot active, will reset it.
1694 : */
1695 1005 : if (!StandbyMode)
1696 953 : return;
1697 :
1698 : /* The slot sync worker or the SQL function mustn't be running by now */
1699 : Assert((SlotSyncCtx->pid == InvalidPid) && !SlotSyncCtx->syncing);
1700 :
1701 52 : LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1702 :
1703 560 : for (int i = 0; i < max_replication_slots; i++)
1704 : {
1705 508 : ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
1706 :
1707 : /* Check if it is a synchronized slot */
1708 508 : if (s->in_use && s->data.synced)
1709 : {
1710 : Assert(SlotIsLogical(s));
1711 :
1712 : /* The slot must not be acquired by any process */
1713 : Assert(s->active_proc == INVALID_PROC_NUMBER);
1714 :
1715 : /* Use the same inactive_since time for all the slots. */
1716 3 : if (now == 0)
1717 2 : now = GetCurrentTimestamp();
1718 :
1719 3 : ReplicationSlotSetInactiveSince(s, now, true);
1720 : }
1721 : }
1722 :
1723 52 : LWLockRelease(ReplicationSlotControlLock);
1724 : }
1725 :
1726 : /*
1727 : * Shut down slot synchronization.
1728 : *
1729 : * This function sets stopSignaled=true and wakes up the slot sync process
1730 : * (either worker or backend running the SQL function pg_sync_replication_slots())
1731 : * so that worker can exit or the SQL function pg_sync_replication_slots() can
1732 : * finish. It also waits till the slot sync worker has exited or
1733 : * pg_sync_replication_slots() has finished.
1734 : */
1735 : void
1736 1005 : ShutDownSlotSync(void)
1737 : {
1738 : pid_t sync_process_pid;
1739 :
1740 1005 : SpinLockAcquire(&SlotSyncCtx->mutex);
1741 :
1742 1005 : SlotSyncCtx->stopSignaled = true;
1743 :
1744 : /*
1745 : * Return if neither the slot sync worker is running nor the function
1746 : * pg_sync_replication_slots() is executing.
1747 : */
1748 1005 : if (!SlotSyncCtx->syncing)
1749 : {
1750 1004 : SpinLockRelease(&SlotSyncCtx->mutex);
1751 1004 : update_synced_slots_inactive_since();
1752 1004 : return;
1753 : }
1754 :
1755 1 : sync_process_pid = SlotSyncCtx->pid;
1756 :
1757 1 : SpinLockRelease(&SlotSyncCtx->mutex);
1758 :
1759 : /*
1760 : * Signal process doing slotsync, if any. The process will stop upon
1761 : * detecting that the stopSignaled flag is set to true.
1762 : */
1763 1 : if (sync_process_pid != InvalidPid)
1764 1 : kill(sync_process_pid, SIGUSR1);
1765 :
1766 : /* Wait for slot sync to end */
1767 : for (;;)
1768 0 : {
1769 : int rc;
1770 :
1771 : /* Wait a bit, we don't expect to have to wait long */
1772 1 : rc = WaitLatch(MyLatch,
1773 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
1774 : 10L, WAIT_EVENT_REPLICATION_SLOTSYNC_SHUTDOWN);
1775 :
1776 1 : if (rc & WL_LATCH_SET)
1777 : {
1778 0 : ResetLatch(MyLatch);
1779 0 : CHECK_FOR_INTERRUPTS();
1780 : }
1781 :
1782 1 : SpinLockAcquire(&SlotSyncCtx->mutex);
1783 :
1784 : /* Ensure that no process is syncing the slots. */
1785 1 : if (!SlotSyncCtx->syncing)
1786 1 : break;
1787 :
1788 0 : SpinLockRelease(&SlotSyncCtx->mutex);
1789 : }
1790 :
1791 1 : SpinLockRelease(&SlotSyncCtx->mutex);
1792 :
1793 1 : update_synced_slots_inactive_since();
1794 : }
1795 :
1796 : /*
1797 : * SlotSyncWorkerCanRestart
1798 : *
1799 : * Return true, indicating worker is allowed to restart, if enough time has
1800 : * passed since it was last launched to reach SLOTSYNC_RESTART_INTERVAL_SEC.
1801 : * Otherwise return false.
1802 : *
1803 : * This is a safety valve to protect against continuous respawn attempts if the
1804 : * worker is dying immediately at launch. Note that since we will retry to
1805 : * launch the worker from the postmaster main loop, we will get another
1806 : * chance later.
1807 : */
1808 : bool
1809 16 : SlotSyncWorkerCanRestart(void)
1810 : {
1811 16 : time_t curtime = time(NULL);
1812 :
1813 : /*
1814 : * If first time through, or time somehow went backwards, always update
1815 : * last_start_time to match the current clock and allow worker start.
1816 : * Otherwise allow it only once enough time has elapsed.
1817 : */
1818 16 : if (SlotSyncCtx->last_start_time == 0 ||
1819 10 : curtime < SlotSyncCtx->last_start_time ||
1820 10 : curtime - SlotSyncCtx->last_start_time >= SLOTSYNC_RESTART_INTERVAL_SEC)
1821 : {
1822 6 : SlotSyncCtx->last_start_time = curtime;
1823 6 : return true;
1824 : }
1825 10 : return false;
1826 : }
1827 :
1828 : /*
1829 : * Is current process syncing replication slots?
1830 : *
1831 : * Could be either backend executing SQL function or slot sync worker.
1832 : */
1833 : bool
1834 29 : IsSyncingReplicationSlots(void)
1835 : {
1836 29 : return syncing_slots;
1837 : }
1838 :
1839 : /*
1840 : * Register shared memory space needed for slot synchronization.
1841 : */
1842 : static void
1843 1232 : SlotSyncShmemRequest(void *arg)
1844 : {
1845 1232 : ShmemRequestStruct(.name = "Slot Sync Data",
1846 : .size = sizeof(SlotSyncCtxStruct),
1847 : .ptr = (void **) &SlotSyncCtx,
1848 : );
1849 1232 : }
1850 :
1851 : /*
1852 : * Initialize shared memory for slot synchronization.
1853 : */
1854 : static void
1855 1229 : SlotSyncShmemInit(void *arg)
1856 : {
1857 1229 : memset(SlotSyncCtx, 0, sizeof(SlotSyncCtxStruct));
1858 1229 : SlotSyncCtx->pid = InvalidPid;
1859 1229 : SpinLockInit(&SlotSyncCtx->mutex);
1860 1229 : }
1861 :
1862 : /*
1863 : * Error cleanup callback for slot sync SQL function.
1864 : */
1865 : static void
1866 1 : slotsync_failure_callback(int code, Datum arg)
1867 : {
1868 1 : WalReceiverConn *wrconn = (WalReceiverConn *) DatumGetPointer(arg);
1869 :
1870 : /*
1871 : * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1872 : *
1873 : * The startup process during promotion invokes ShutDownSlotSync() which
1874 : * waits for slot sync to finish and it does that by checking the
1875 : * 'syncing' flag. Thus the SQL function must be done with slots' release
1876 : * and cleanup to avoid any dangling temporary slots or active slots
1877 : * before it marks itself as finished syncing.
1878 : */
1879 :
1880 : /* Make sure active replication slots are released */
1881 1 : if (MyReplicationSlot != NULL)
1882 0 : ReplicationSlotRelease();
1883 :
1884 : /* Also cleanup the synced temporary slots. */
1885 1 : ReplicationSlotCleanup(true);
1886 :
1887 : /*
1888 : * The set syncing_slots indicates that the process errored out without
1889 : * resetting the flag. So, we need to clean up shared memory and reset the
1890 : * flag here.
1891 : */
1892 1 : if (syncing_slots)
1893 1 : reset_syncing_flag();
1894 :
1895 1 : walrcv_disconnect(wrconn);
1896 1 : }
1897 :
1898 : /*
1899 : * Helper function to extract slot names from a list of remote slots
1900 : */
1901 : static List *
1902 1 : extract_slot_names(List *remote_slots)
1903 : {
1904 1 : List *slot_names = NIL;
1905 :
1906 3 : foreach_ptr(RemoteSlot, remote_slot, remote_slots)
1907 : {
1908 : char *slot_name;
1909 :
1910 1 : slot_name = pstrdup(remote_slot->name);
1911 1 : slot_names = lappend(slot_names, slot_name);
1912 : }
1913 :
1914 1 : return slot_names;
1915 : }
1916 :
1917 : /*
1918 : * Synchronize the failover enabled replication slots using the specified
1919 : * primary server connection.
1920 : *
1921 : * Repeatedly fetches and updates replication slot information from the
1922 : * primary until all slots are at least "sync ready".
1923 : *
1924 : * Exits early if promotion is triggered or certain critical
1925 : * configuration parameters have changed.
1926 : */
1927 : void
1928 9 : SyncReplicationSlots(WalReceiverConn *wrconn)
1929 : {
1930 9 : PG_ENSURE_ERROR_CLEANUP(slotsync_failure_callback, PointerGetDatum(wrconn));
1931 : {
1932 9 : List *remote_slots = NIL;
1933 9 : List *slot_names = NIL; /* List of slot names to track */
1934 :
1935 9 : check_and_set_sync_info(MyProcPid);
1936 :
1937 : /* Check for interrupts and config changes */
1938 9 : ProcessSlotSyncInterrupts();
1939 :
1940 9 : validate_remote_info(wrconn);
1941 :
1942 : /* Retry until all the slots are sync-ready */
1943 : for (;;)
1944 3 : {
1945 11 : bool slot_persistence_pending = false;
1946 11 : bool some_slot_updated = false;
1947 :
1948 : /* Check for interrupts and config changes */
1949 11 : ProcessSlotSyncInterrupts();
1950 :
1951 : /* We must be in a valid transaction state */
1952 : Assert(IsTransactionState());
1953 :
1954 : /*
1955 : * Fetch remote slot info for the given slot_names. If slot_names
1956 : * is NIL, fetch all failover-enabled slots. Note that we reuse
1957 : * slot_names from the first iteration; re-fetching all failover
1958 : * slots each time could cause an endless loop. Instead of
1959 : * reprocessing only the pending slots in each iteration, it's
1960 : * better to process all the slots received in the first
1961 : * iteration. This ensures that by the time we're done, all slots
1962 : * reflect the latest values.
1963 : */
1964 11 : remote_slots = fetch_remote_slots(wrconn, slot_names);
1965 :
1966 : /* Attempt to synchronize slots */
1967 11 : some_slot_updated = synchronize_slots(wrconn, remote_slots,
1968 : &slot_persistence_pending);
1969 :
1970 : /*
1971 : * If slot_persistence_pending is true, extract slot names for
1972 : * future iterations (only needed if we haven't done it yet)
1973 : */
1974 11 : if (slot_names == NIL && slot_persistence_pending)
1975 1 : slot_names = extract_slot_names(remote_slots);
1976 :
1977 : /* Free the current remote_slots list */
1978 11 : list_free_deep(remote_slots);
1979 :
1980 : /* Done if all slots are persisted i.e are sync-ready */
1981 11 : if (!slot_persistence_pending)
1982 8 : break;
1983 :
1984 : /* wait before retrying again */
1985 3 : wait_for_slot_activity(some_slot_updated);
1986 : }
1987 :
1988 8 : if (slot_names)
1989 1 : list_free_deep(slot_names);
1990 :
1991 : /* Cleanup the synced temporary slots */
1992 8 : ReplicationSlotCleanup(true);
1993 :
1994 : /* We are done with sync, so reset sync flag */
1995 8 : reset_syncing_flag();
1996 : }
1997 9 : PG_END_ENSURE_ERROR_CLEANUP(slotsync_failure_callback, PointerGetDatum(wrconn));
1998 8 : }
|