Line data Source code
1 : /*-------------------------------------------------------------------------
2 : * slotsync.c
3 : * Functionality for synchronizing slots to a standby server from the
4 : * primary server.
5 : *
6 : * Copyright (c) 2024-2026, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/replication/logical/slotsync.c
10 : *
11 : * This file contains the code for slot synchronization on a physical standby
12 : * to fetch logical failover slots information from the primary server, create
13 : * the slots on the standby and synchronize them periodically.
14 : *
15 : * Slot synchronization can be performed either automatically by enabling slot
16 : * sync worker or manually by calling SQL function pg_sync_replication_slots().
17 : *
18 : * If the WAL corresponding to the remote's restart_lsn is not available on the
19 : * physical standby or the remote's catalog_xmin precedes the oldest xid for
20 : * which it is guaranteed that rows wouldn't have been removed then we cannot
21 : * create the local standby slot because that would mean moving the local slot
22 : * backward and decoding won't be possible via such a slot. In this case, the
23 : * slot will be marked as RS_TEMPORARY. Once the primary server catches up,
24 : * the slot will be marked as RS_PERSISTENT (which means sync-ready) after
25 : * which slot sync worker can perform the sync periodically or user can call
26 : * pg_sync_replication_slots() periodically to perform the syncs.
27 : *
28 : * If synchronized slots fail to build a consistent snapshot from the
29 : * restart_lsn before reaching confirmed_flush_lsn, they would become
30 : * unreliable after promotion due to potential data loss from changes
31 : * before reaching a consistent point. This can happen because the slots can
32 : * be synced at some random time and we may not reach the consistent point
33 : * at the same WAL location as the primary. So, we mark such slots as
34 : * RS_TEMPORARY. Once the decoding from corresponding LSNs can reach a
35 : * consistent point, they will be marked as RS_PERSISTENT.
36 : *
37 : * If the WAL prior to the remote slot's confirmed_flush_lsn has not been
38 : * flushed on the standby, the slot is marked as RS_TEMPORARY. Once the standby
39 : * catches up and flushes that WAL, the slot will be marked as RS_PERSISTENT.
40 : *
41 : * The slot sync worker waits for some time before the next synchronization,
42 : * with the duration varying based on whether any slots were updated during
43 : * the last cycle. Refer to the comments above wait_for_slot_activity() for
44 : * more details.
45 : *
46 : * If the SQL function pg_sync_replication_slots() is used to sync the slots,
47 : * and if the slots are not ready to be synced and are marked as RS_TEMPORARY
48 : * because of any of the reasons mentioned above, then the SQL function also
49 : * waits and retries until the slots are marked as RS_PERSISTENT (which means
50 : * sync-ready). Refer to the comments in SyncReplicationSlots() for more
51 : * details.
52 : *
53 : * Any standby synchronized slots will be dropped if they no longer need
54 : * to be synchronized. See comment atop drop_local_obsolete_slots() for more
55 : * details.
56 : *---------------------------------------------------------------------------
57 : */
58 :
59 : #include "postgres.h"
60 :
61 : #include <time.h>
62 :
63 : #include "access/xlog_internal.h"
64 : #include "access/xlogrecovery.h"
65 : #include "catalog/pg_database.h"
66 : #include "libpq/pqsignal.h"
67 : #include "pgstat.h"
68 : #include "postmaster/interrupt.h"
69 : #include "replication/logical.h"
70 : #include "replication/slotsync.h"
71 : #include "replication/snapbuild.h"
72 : #include "storage/ipc.h"
73 : #include "storage/lmgr.h"
74 : #include "storage/proc.h"
75 : #include "storage/procarray.h"
76 : #include "storage/subsystems.h"
77 : #include "tcop/tcopprot.h"
78 : #include "utils/builtins.h"
79 : #include "utils/memutils.h"
80 : #include "utils/pg_lsn.h"
81 : #include "utils/ps_status.h"
82 : #include "utils/timeout.h"
83 : #include "utils/wait_event.h"
84 :
85 : /*
86 : * Struct for sharing information to control slot synchronization.
87 : *
88 : * The 'pid' is either the slot sync worker's pid or the backend's pid running
89 : * the SQL function pg_sync_replication_slots(). On promotion, the startup
90 : * process sets 'stopSignaled' and uses this 'pid' to signal the synchronizing
91 : * process with PROCSIG_SLOTSYNC_MESSAGE and also to wake it up so that the
92 : * process can immediately stop its synchronizing work.
93 : * Setting 'stopSignaled' on the other hand is used to handle the race
94 : * condition when the postmaster has not noticed the promotion yet and thus may
95 : * end up restarting the slot sync worker. If 'stopSignaled' is set, the worker
96 : * will exit in such a case. The SQL function pg_sync_replication_slots() will
97 : * also error out if this flag is set. Note that we don't need to reset this
98 : * variable as after promotion the slot sync worker won't be restarted because
99 : * the pmState changes to PM_RUN from PM_HOT_STANDBY and we don't support
100 : * demoting primary without restarting the server.
101 : * See LaunchMissingBackgroundProcesses.
102 : *
103 : * The 'syncing' flag is needed to prevent concurrent slot syncs to avoid slot
104 : * overwrites.
105 : *
106 : * The 'last_start_time' is needed by postmaster to start the slot sync worker
107 : * once per SLOTSYNC_RESTART_INTERVAL_SEC. In cases where an immediate restart
108 : * is expected (e.g., slot sync GUCs change), slot sync worker will reset
109 : * last_start_time before exiting, so that postmaster can start the worker
110 : * without waiting for SLOTSYNC_RESTART_INTERVAL_SEC.
111 : */
112 : typedef struct SlotSyncCtxStruct
113 : {
114 : pid_t pid;
115 : bool stopSignaled;
116 : bool syncing;
117 : time_t last_start_time;
118 : slock_t mutex;
119 : } SlotSyncCtxStruct;
120 :
121 : static SlotSyncCtxStruct *SlotSyncCtx = NULL;
122 :
123 : static void SlotSyncShmemRequest(void *arg);
124 : static void SlotSyncShmemInit(void *arg);
125 :
126 : const ShmemCallbacks SlotSyncShmemCallbacks = {
127 : .request_fn = SlotSyncShmemRequest,
128 : .init_fn = SlotSyncShmemInit,
129 : };
130 :
131 : /* GUC variable */
132 : bool sync_replication_slots = false;
133 :
134 : /*
135 : * The sleep time (ms) between slot-sync cycles varies dynamically
136 : * (within a MIN/MAX range) according to slot activity. See
137 : * wait_for_slot_activity() for details.
138 : */
139 : #define MIN_SLOTSYNC_WORKER_NAPTIME_MS 200
140 : #define MAX_SLOTSYNC_WORKER_NAPTIME_MS 30000 /* 30s */
141 :
142 : static long sleep_ms = MIN_SLOTSYNC_WORKER_NAPTIME_MS;
143 :
144 : /* The restart interval for slot sync work used by postmaster */
145 : #define SLOTSYNC_RESTART_INTERVAL_SEC 10
146 :
147 : /*
148 : * Flag to tell if we are syncing replication slots. Unlike the 'syncing' flag
149 : * in SlotSyncCtxStruct, this flag is true only if the current process is
150 : * performing slot synchronization.
151 : */
152 : static bool syncing_slots = false;
153 :
154 : /*
155 : * Interrupt flag set when PROCSIG_SLOTSYNC_MESSAGE is received, asking the
156 : * slotsync worker or pg_sync_replication_slots() to stop because
157 : * standby promotion has been triggered.
158 : */
159 : volatile sig_atomic_t SlotSyncShutdownPending = false;
160 :
161 : /*
162 : * Structure to hold information fetched from the primary server about a logical
163 : * replication slot.
164 : */
165 : typedef struct RemoteSlot
166 : {
167 : char *name;
168 : char *plugin;
169 : char *database;
170 : bool two_phase;
171 : bool failover;
172 : XLogRecPtr restart_lsn;
173 : XLogRecPtr confirmed_lsn;
174 : XLogRecPtr two_phase_at;
175 : TransactionId catalog_xmin;
176 :
177 : /* RS_INVAL_NONE if valid, or the reason of invalidation */
178 : ReplicationSlotInvalidationCause invalidated;
179 : } RemoteSlot;
180 :
181 : static void slotsync_failure_callback(int code, Datum arg);
182 : static void update_synced_slots_inactive_since(void);
183 :
184 : /*
185 : * Update slot sync skip stats. This function requires the caller to acquire
186 : * the slot.
187 : */
188 : static void
189 58 : update_slotsync_skip_stats(SlotSyncSkipReason skip_reason)
190 : {
191 : ReplicationSlot *slot;
192 :
193 : Assert(MyReplicationSlot);
194 :
195 58 : slot = MyReplicationSlot;
196 :
197 : /*
198 : * Update the slot sync related stats in pg_stat_replication_slots when a
199 : * slot sync is skipped
200 : */
201 58 : if (skip_reason != SS_SKIP_NONE)
202 5 : pgstat_report_replslotsync(slot);
203 :
204 : /* Update the slot sync skip reason */
205 58 : if (slot->slotsync_skip_reason != skip_reason)
206 : {
207 3 : SpinLockAcquire(&slot->mutex);
208 3 : slot->slotsync_skip_reason = skip_reason;
209 3 : SpinLockRelease(&slot->mutex);
210 : }
211 58 : }
212 :
213 : /*
214 : * If necessary, update the local synced slot's metadata based on the data
215 : * from the remote slot.
216 : *
217 : * If no update was needed (the data of the remote slot is the same as the
218 : * local slot) return false, otherwise true.
219 : */
220 : static bool
221 58 : update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
222 : {
223 58 : ReplicationSlot *slot = MyReplicationSlot;
224 58 : bool updated_xmin_or_lsn = false;
225 58 : bool updated_config = false;
226 58 : SlotSyncSkipReason skip_reason = SS_SKIP_NONE;
227 58 : XLogRecPtr latestFlushPtr = GetStandbyFlushRecPtr(NULL);
228 :
229 : Assert(slot->data.invalidated == RS_INVAL_NONE);
230 :
231 : /*
232 : * Make sure that concerned WAL is received and flushed before syncing
233 : * slot to target lsn received from the primary server.
234 : */
235 58 : if (remote_slot->confirmed_lsn > latestFlushPtr)
236 : {
237 0 : update_slotsync_skip_stats(SS_SKIP_WAL_NOT_FLUSHED);
238 :
239 : /*
240 : * Can get here only if GUC 'synchronized_standby_slots' on the
241 : * primary server was not configured correctly.
242 : */
243 0 : ereport(LOG,
244 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
245 : errmsg("skipping slot synchronization because the received slot sync"
246 : " LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
247 : LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
248 : remote_slot->name,
249 : LSN_FORMAT_ARGS(latestFlushPtr)));
250 :
251 0 : return false;
252 : }
253 :
254 : /*
255 : * Don't overwrite if we already have a newer catalog_xmin and
256 : * restart_lsn.
257 : */
258 114 : if (remote_slot->restart_lsn < slot->data.restart_lsn ||
259 56 : TransactionIdPrecedes(remote_slot->catalog_xmin,
260 : slot->data.catalog_xmin))
261 : {
262 : /* Update slot sync skip stats */
263 5 : update_slotsync_skip_stats(SS_SKIP_WAL_OR_ROWS_REMOVED);
264 :
265 : /*
266 : * This can happen in following situations:
267 : *
268 : * If the slot is temporary, it means either the initial WAL location
269 : * reserved for the local slot is ahead of the remote slot's
270 : * restart_lsn or the initial xmin_horizon computed for the local slot
271 : * is ahead of the remote slot.
272 : *
273 : * If the slot is persistent, both restart_lsn and catalog_xmin of the
274 : * synced slot could still be ahead of the remote slot. Since we use
275 : * slot advance functionality to keep snapbuild/slot updated, it is
276 : * possible that the restart_lsn and catalog_xmin are advanced to a
277 : * later position than it has on the primary. This can happen when
278 : * slot advancing machinery finds running xacts record after reaching
279 : * the consistent state at a later point than the primary where it
280 : * serializes the snapshot and updates the restart_lsn.
281 : *
282 : * We LOG the message if the slot is temporary as it can help the user
283 : * to understand why the slot is not sync-ready. In the case of a
284 : * persistent slot, it would be a more common case and won't directly
285 : * impact the users, so we used DEBUG1 level to log the message.
286 : */
287 5 : ereport(slot->data.persistency == RS_TEMPORARY ? LOG : DEBUG1,
288 : errmsg("could not synchronize replication slot \"%s\"",
289 : remote_slot->name),
290 : errdetail("Synchronization could lead to data loss, because the remote slot needs WAL at LSN %X/%08X and catalog xmin %u, but the standby has LSN %X/%08X and catalog xmin %u.",
291 : LSN_FORMAT_ARGS(remote_slot->restart_lsn),
292 : remote_slot->catalog_xmin,
293 : LSN_FORMAT_ARGS(slot->data.restart_lsn),
294 : slot->data.catalog_xmin));
295 :
296 : /*
297 : * Skip updating the configuration. This is required to avoid syncing
298 : * two_phase_at without syncing confirmed_lsn. Otherwise, the prepared
299 : * transaction between old confirmed_lsn and two_phase_at will
300 : * unexpectedly get decoded and sent to the downstream after
301 : * promotion. See comments in ReorderBufferFinishPrepared.
302 : */
303 5 : return false;
304 : }
305 :
306 : /*
307 : * Attempt to sync LSNs and xmins only if remote slot is ahead of local
308 : * slot.
309 : */
310 53 : if (remote_slot->confirmed_lsn > slot->data.confirmed_flush ||
311 77 : remote_slot->restart_lsn > slot->data.restart_lsn ||
312 38 : TransactionIdFollows(remote_slot->catalog_xmin,
313 : slot->data.catalog_xmin))
314 : {
315 : /*
316 : * We can't directly copy the remote slot's LSN or xmin unless there
317 : * exists a consistent snapshot at that point. Otherwise, after
318 : * promotion, the slots may not reach a consistent point before the
319 : * confirmed_flush_lsn which can lead to a data loss. To avoid data
320 : * loss, we let slot machinery advance the slot which ensures that
321 : * snapbuilder/slot statuses are updated properly.
322 : */
323 15 : if (SnapBuildSnapshotExists(remote_slot->restart_lsn))
324 : {
325 : /*
326 : * Update the slot info directly if there is a serialized snapshot
327 : * at the restart_lsn, as the slot can quickly reach consistency
328 : * at restart_lsn by restoring the snapshot.
329 : */
330 4 : SpinLockAcquire(&slot->mutex);
331 4 : slot->data.restart_lsn = remote_slot->restart_lsn;
332 4 : slot->data.confirmed_flush = remote_slot->confirmed_lsn;
333 4 : slot->data.catalog_xmin = remote_slot->catalog_xmin;
334 4 : SpinLockRelease(&slot->mutex);
335 :
336 4 : updated_xmin_or_lsn = true;
337 : }
338 : else
339 : {
340 : bool found_consistent_snapshot;
341 11 : XLogRecPtr old_confirmed_lsn = slot->data.confirmed_flush;
342 11 : XLogRecPtr old_restart_lsn = slot->data.restart_lsn;
343 11 : TransactionId old_catalog_xmin = slot->data.catalog_xmin;
344 :
345 11 : LogicalSlotAdvanceAndCheckSnapState(remote_slot->confirmed_lsn,
346 : &found_consistent_snapshot);
347 :
348 : /* Sanity check */
349 11 : if (slot->data.confirmed_flush != remote_slot->confirmed_lsn)
350 0 : ereport(ERROR,
351 : errmsg_internal("synchronized confirmed_flush for slot \"%s\" differs from remote slot",
352 : remote_slot->name),
353 : errdetail_internal("Remote slot has LSN %X/%08X but local slot has LSN %X/%08X.",
354 : LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
355 : LSN_FORMAT_ARGS(slot->data.confirmed_flush)));
356 :
357 : /*
358 : * If we can't reach a consistent snapshot, the slot won't be
359 : * persisted. See update_and_persist_local_synced_slot().
360 : */
361 11 : if (!found_consistent_snapshot)
362 : {
363 : Assert(MyReplicationSlot->data.persistency == RS_TEMPORARY);
364 :
365 0 : ereport(LOG,
366 : errmsg("could not synchronize replication slot \"%s\"",
367 : remote_slot->name),
368 : errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%08X.",
369 : LSN_FORMAT_ARGS(slot->data.restart_lsn)));
370 :
371 0 : skip_reason = SS_SKIP_NO_CONSISTENT_SNAPSHOT;
372 : }
373 :
374 : /*
375 : * It is possible that the slot's xmin or LSNs are not updated,
376 : * when the synced slot has reached consistent snapshot state or
377 : * cannot build one at all.
378 : */
379 11 : updated_xmin_or_lsn = (old_confirmed_lsn != slot->data.confirmed_flush ||
380 11 : old_restart_lsn != slot->data.restart_lsn ||
381 0 : old_catalog_xmin != slot->data.catalog_xmin);
382 : }
383 : }
384 :
385 : /* Update slot sync skip stats */
386 53 : update_slotsync_skip_stats(skip_reason);
387 :
388 53 : if (remote_dbid != slot->data.database ||
389 53 : remote_slot->two_phase != slot->data.two_phase ||
390 52 : remote_slot->failover != slot->data.failover ||
391 52 : strcmp(remote_slot->plugin, NameStr(slot->data.plugin)) != 0 ||
392 52 : remote_slot->two_phase_at != slot->data.two_phase_at)
393 : {
394 : NameData plugin_name;
395 :
396 : /* Avoid expensive operations while holding a spinlock. */
397 1 : namestrcpy(&plugin_name, remote_slot->plugin);
398 :
399 1 : SpinLockAcquire(&slot->mutex);
400 1 : slot->data.plugin = plugin_name;
401 1 : slot->data.database = remote_dbid;
402 1 : slot->data.two_phase = remote_slot->two_phase;
403 1 : slot->data.two_phase_at = remote_slot->two_phase_at;
404 1 : slot->data.failover = remote_slot->failover;
405 1 : SpinLockRelease(&slot->mutex);
406 :
407 1 : updated_config = true;
408 :
409 : /*
410 : * Ensure that there is no risk of sending prepared transactions
411 : * unexpectedly after the promotion.
412 : */
413 : Assert(slot->data.two_phase_at <= slot->data.confirmed_flush);
414 : }
415 :
416 : /*
417 : * We have to write the changed xmin to disk *before* we change the
418 : * in-memory value, otherwise after a crash we wouldn't know that some
419 : * catalog tuples might have been removed already.
420 : */
421 53 : if (updated_config || updated_xmin_or_lsn)
422 : {
423 16 : ReplicationSlotMarkDirty();
424 16 : ReplicationSlotSave();
425 : }
426 :
427 : /*
428 : * Now the new xmin is safely on disk, we can let the global value
429 : * advance. We do not take ProcArrayLock or similar since we only advance
430 : * xmin here and there's not much harm done by a concurrent computation
431 : * missing that.
432 : */
433 53 : if (updated_xmin_or_lsn)
434 : {
435 15 : SpinLockAcquire(&slot->mutex);
436 15 : slot->effective_catalog_xmin = remote_slot->catalog_xmin;
437 15 : SpinLockRelease(&slot->mutex);
438 :
439 15 : ReplicationSlotsComputeRequiredXmin(false);
440 15 : ReplicationSlotsComputeRequiredLSN();
441 : }
442 :
443 53 : return updated_config || updated_xmin_or_lsn;
444 : }
445 :
446 : /*
447 : * Get the list of local logical slots that are synchronized from the
448 : * primary server.
449 : */
450 : static List *
451 35 : get_local_synced_slots(void)
452 : {
453 35 : List *local_slots = NIL;
454 :
455 35 : LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
456 :
457 560 : for (int i = 0; i < max_replication_slots + max_repack_replication_slots; i++)
458 : {
459 525 : ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
460 :
461 : /* Check if it is a synchronized slot */
462 525 : if (s->in_use && s->data.synced)
463 : {
464 : Assert(SlotIsLogical(s));
465 52 : local_slots = lappend(local_slots, s);
466 : }
467 : }
468 :
469 35 : LWLockRelease(ReplicationSlotControlLock);
470 :
471 35 : return local_slots;
472 : }
473 :
474 : /*
475 : * Helper function to check if local_slot is required to be retained.
476 : *
477 : * Return false either if local_slot does not exist in the remote_slots list
478 : * or is invalidated while the corresponding remote slot is still valid,
479 : * otherwise true.
480 : */
481 : static bool
482 52 : local_sync_slot_required(ReplicationSlot *local_slot, List *remote_slots)
483 : {
484 52 : bool remote_exists = false;
485 52 : bool locally_invalidated = false;
486 :
487 126 : foreach_ptr(RemoteSlot, remote_slot, remote_slots)
488 : {
489 73 : if (strcmp(remote_slot->name, NameStr(local_slot->data.name)) == 0)
490 : {
491 51 : remote_exists = true;
492 :
493 : /*
494 : * If remote slot is not invalidated but local slot is marked as
495 : * invalidated, then set locally_invalidated flag.
496 : */
497 51 : SpinLockAcquire(&local_slot->mutex);
498 51 : locally_invalidated =
499 102 : (remote_slot->invalidated == RS_INVAL_NONE) &&
500 51 : (local_slot->data.invalidated != RS_INVAL_NONE);
501 51 : SpinLockRelease(&local_slot->mutex);
502 :
503 51 : break;
504 : }
505 : }
506 :
507 52 : return (remote_exists && !locally_invalidated);
508 : }
509 :
510 : /*
511 : * Drop local obsolete slots.
512 : *
513 : * Drop the local slots that no longer need to be synced i.e. these either do
514 : * not exist on the primary or are no longer enabled for failover.
515 : *
516 : * Additionally, drop any slots that are valid on the primary but got
517 : * invalidated on the standby. This situation may occur due to the following
518 : * reasons:
519 : * - The 'max_slot_wal_keep_size' on the standby is insufficient to retain WAL
520 : * records from the restart_lsn of the slot.
521 : * - 'primary_slot_name' is temporarily reset to null and the physical slot is
522 : * removed.
523 : * These dropped slots will get recreated in next sync-cycle and it is okay to
524 : * drop and recreate such slots as long as these are not consumable on the
525 : * standby (which is the case currently).
526 : *
527 : * Note: Change of 'wal_level' on the primary server to a level lower than
528 : * logical may also result in slot invalidation and removal on the standby.
529 : * This is because such 'wal_level' change is only possible if the logical
530 : * slots are removed on the primary server, so it's expected to see the
531 : * slots being invalidated and removed on the standby too (and re-created
532 : * if they are re-created on the primary server).
533 : */
534 : static void
535 35 : drop_local_obsolete_slots(List *remote_slot_list)
536 : {
537 35 : List *local_slots = get_local_synced_slots();
538 :
539 122 : foreach_ptr(ReplicationSlot, local_slot, local_slots)
540 : {
541 : /* Drop the local slot if it is not required to be retained. */
542 52 : if (!local_sync_slot_required(local_slot, remote_slot_list))
543 : {
544 2 : Oid slot_database = local_slot->data.database;
545 : bool synced_slot;
546 :
547 : /*
548 : * Use shared lock to prevent a conflict with
549 : * ReplicationSlotsDropDBSlots(), trying to drop the same slot
550 : * during a drop-database operation.
551 : */
552 2 : LockSharedObject(DatabaseRelationId, slot_database, 0,
553 : AccessShareLock);
554 :
555 : /*
556 : * In the small window between getting the slot to drop and
557 : * locking the database, there is a possibility of a parallel
558 : * database drop by the startup process and the creation of a new
559 : * slot by the user. This new user-created slot may end up using
560 : * the same shared memory as that of 'local_slot'. Thus check if
561 : * local_slot is still the synced one before performing the actual
562 : * drop.
563 : */
564 2 : SpinLockAcquire(&local_slot->mutex);
565 2 : synced_slot = local_slot->in_use && local_slot->data.synced;
566 2 : SpinLockRelease(&local_slot->mutex);
567 :
568 2 : if (synced_slot)
569 : {
570 2 : NameData slot_name = local_slot->data.name;
571 :
572 : /*
573 : * Now acquire and drop the slot. Note we purposely don't
574 : * request logical decoding to be disabled here: since this is
575 : * a standby, which derives its logical decoding state from
576 : * the primary, it would be wrong to do so.
577 : */
578 2 : ReplicationSlotAcquire(NameStr(slot_name), true, false);
579 2 : ReplicationSlotDropAcquired(false);
580 :
581 2 : ereport(LOG,
582 : errmsg("dropped replication slot \"%s\" of database with OID %u",
583 : NameStr(slot_name),
584 : slot_database));
585 : }
586 :
587 2 : UnlockSharedObject(DatabaseRelationId, slot_database, 0,
588 : AccessShareLock);
589 : }
590 : }
591 35 : }
592 :
593 : /*
594 : * Reserve WAL for the currently active local slot using the specified WAL
595 : * location (restart_lsn).
596 : *
597 : * If the given WAL location has been removed or is at risk of removal,
598 : * reserve WAL using the oldest segment that is non-removable.
599 : */
600 : static void
601 8 : reserve_wal_for_local_slot(XLogRecPtr restart_lsn)
602 : {
603 : XLogRecPtr slot_min_lsn;
604 : XLogRecPtr min_safe_lsn;
605 : XLogSegNo segno;
606 8 : ReplicationSlot *slot = MyReplicationSlot;
607 :
608 : Assert(slot != NULL);
609 : Assert(!XLogRecPtrIsValid(slot->data.restart_lsn));
610 :
611 : /*
612 : * Acquire an exclusive lock to prevent the checkpoint process from
613 : * concurrently calculating the minimum slot LSN (see
614 : * CheckPointReplicationSlots), ensuring that if WAL reservation occurs
615 : * first, the checkpoint must wait for the restart_lsn update before
616 : * calculating the minimum LSN.
617 : *
618 : * Note: Unlike ReplicationSlotReserveWal(), this lock does not protect a
619 : * newly synced slot from being invalidated if a concurrent checkpoint has
620 : * invoked CheckPointReplicationSlots() before the WAL reservation here.
621 : * This can happen because the initial restart_lsn received from the
622 : * remote server can precede the redo pointer. Therefore, when selecting
623 : * the initial restart_lsn, we consider using the redo pointer or the
624 : * minimum slot LSN (if those values are greater than the remote
625 : * restart_lsn) instead of relying solely on the remote value.
626 : */
627 8 : LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
628 :
629 : /*
630 : * Determine the minimum non-removable LSN by comparing the redo pointer
631 : * with the minimum slot LSN.
632 : *
633 : * The minimum slot LSN is considered because the redo pointer advances at
634 : * every checkpoint, even when replication slots are present on the
635 : * standby. In such scenarios, the redo pointer can exceed the remote
636 : * restart_lsn, while WALs preceding the remote restart_lsn remain
637 : * protected by a local replication slot.
638 : */
639 8 : min_safe_lsn = GetRedoRecPtr();
640 8 : slot_min_lsn = XLogGetReplicationSlotMinimumLSN();
641 :
642 8 : if (XLogRecPtrIsValid(slot_min_lsn) && min_safe_lsn > slot_min_lsn)
643 0 : min_safe_lsn = slot_min_lsn;
644 :
645 : /*
646 : * If the minimum safe LSN is greater than the given restart_lsn, use it
647 : * as the initial restart_lsn for the newly synced slot. Otherwise, use
648 : * the given remote restart_lsn.
649 : */
650 8 : SpinLockAcquire(&slot->mutex);
651 8 : slot->data.restart_lsn = Max(restart_lsn, min_safe_lsn);
652 8 : SpinLockRelease(&slot->mutex);
653 :
654 8 : ReplicationSlotsComputeRequiredLSN();
655 :
656 8 : XLByteToSeg(slot->data.restart_lsn, segno, wal_segment_size);
657 8 : if (XLogGetLastRemovedSegno() >= segno)
658 0 : elog(ERROR, "WAL required by replication slot %s has been removed concurrently",
659 : NameStr(slot->data.name));
660 :
661 8 : LWLockRelease(ReplicationSlotAllocationLock);
662 8 : }
663 :
664 : /*
665 : * If the remote restart_lsn and catalog_xmin have caught up with the
666 : * local ones, then update the LSNs and persist the local synced slot for
667 : * future synchronization; otherwise, do nothing.
668 : *
669 : * *slot_persistence_pending is set to true if any of the slots fail to
670 : * persist.
671 : *
672 : * Return true if the slot is marked as RS_PERSISTENT (sync-ready), otherwise
673 : * false.
674 : */
675 : static bool
676 12 : update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
677 : bool *slot_persistence_pending)
678 : {
679 12 : ReplicationSlot *slot = MyReplicationSlot;
680 :
681 : /* Slotsync skip stats are handled in function update_local_synced_slot() */
682 12 : (void) update_local_synced_slot(remote_slot, remote_dbid);
683 :
684 : /*
685 : * Check if the slot cannot be synchronized. Refer to the comment atop the
686 : * file for details on this check.
687 : */
688 12 : if (slot->slotsync_skip_reason != SS_SKIP_NONE)
689 : {
690 : /*
691 : * We reach this point when the remote slot didn't catch up to locally
692 : * reserved position, or it cannot reach the consistent point from the
693 : * restart_lsn, or the WAL prior to the remote confirmed flush LSN has
694 : * not been received and flushed.
695 : *
696 : * We do not drop the slot because the restart_lsn and confirmed_lsn
697 : * can be ahead of the current location when recreating the slot in
698 : * the next cycle. It may take more time to create such a slot or
699 : * reach the consistent point. Therefore, we keep this slot and
700 : * attempt the synchronization in the next cycle.
701 : *
702 : * We also update the slot_persistence_pending parameter, so the SQL
703 : * function can retry.
704 : */
705 5 : if (slot_persistence_pending)
706 3 : *slot_persistence_pending = true;
707 :
708 5 : return false;
709 : }
710 :
711 7 : ReplicationSlotPersist();
712 :
713 7 : ereport(LOG,
714 : errmsg("newly created replication slot \"%s\" is sync-ready now",
715 : remote_slot->name));
716 :
717 7 : return true;
718 : }
719 :
720 : /*
721 : * Synchronize a single slot to the given position.
722 : *
723 : * This creates a new slot if there is no existing one and updates the
724 : * metadata of the slot as per the data received from the primary server.
725 : *
726 : * The slot is created as a temporary slot and stays in the same state until the
727 : * remote_slot catches up with locally reserved position and local slot is
728 : * updated. The slot is then persisted and is considered as sync-ready for
729 : * periodic syncs.
730 : *
731 : * *slot_persistence_pending is set to true if any of the slots fail to
732 : * persist.
733 : *
734 : * Returns TRUE if the local slot is updated.
735 : */
736 : static bool
737 58 : synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid,
738 : bool *slot_persistence_pending)
739 : {
740 : ReplicationSlot *slot;
741 58 : bool slot_updated = false;
742 :
743 : /* Search for the named slot */
744 58 : if ((slot = SearchNamedReplicationSlot(remote_slot->name, true)))
745 : {
746 : bool synced;
747 :
748 50 : SpinLockAcquire(&slot->mutex);
749 50 : synced = slot->data.synced;
750 50 : SpinLockRelease(&slot->mutex);
751 :
752 : /* User-created slot with the same name exists, raise ERROR. */
753 50 : if (!synced)
754 0 : ereport(ERROR,
755 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
756 : errmsg("exiting from slot synchronization because same"
757 : " name slot \"%s\" already exists on the standby",
758 : remote_slot->name));
759 :
760 : /*
761 : * The slot has been synchronized before.
762 : *
763 : * It is important to acquire the slot here before checking
764 : * invalidation. If we don't acquire the slot first, there could be a
765 : * race condition that the local slot could be invalidated just after
766 : * checking the 'invalidated' flag here and we could end up
767 : * overwriting 'invalidated' flag to remote_slot's value. See
768 : * InvalidatePossiblyObsoleteSlot() where it invalidates slot directly
769 : * if the slot is not acquired by other processes.
770 : *
771 : * XXX: If it ever turns out that slot acquire/release is costly for
772 : * cases when none of the slot properties is changed then we can do a
773 : * pre-check to ensure that at least one of the slot properties is
774 : * changed before acquiring the slot.
775 : */
776 50 : ReplicationSlotAcquire(remote_slot->name, true, false);
777 :
778 : Assert(slot == MyReplicationSlot);
779 :
780 : /*
781 : * Copy the invalidation cause from remote only if local slot is not
782 : * invalidated locally, we don't want to overwrite existing one.
783 : */
784 50 : if (slot->data.invalidated == RS_INVAL_NONE &&
785 50 : remote_slot->invalidated != RS_INVAL_NONE)
786 : {
787 0 : SpinLockAcquire(&slot->mutex);
788 0 : slot->data.invalidated = remote_slot->invalidated;
789 0 : SpinLockRelease(&slot->mutex);
790 :
791 : /* Make sure the invalidated state persists across server restart */
792 0 : ReplicationSlotMarkDirty();
793 0 : ReplicationSlotSave();
794 :
795 0 : slot_updated = true;
796 : }
797 :
798 : /* Skip the sync of an invalidated slot */
799 50 : if (slot->data.invalidated != RS_INVAL_NONE)
800 : {
801 0 : update_slotsync_skip_stats(SS_SKIP_INVALID);
802 :
803 0 : ReplicationSlotRelease();
804 0 : return slot_updated;
805 : }
806 :
807 : /* Slot not ready yet, let's attempt to make it sync-ready now. */
808 50 : if (slot->data.persistency == RS_TEMPORARY)
809 : {
810 4 : slot_updated = update_and_persist_local_synced_slot(remote_slot,
811 : remote_dbid,
812 : slot_persistence_pending);
813 : }
814 :
815 : /* Slot ready for sync, so sync it. */
816 : else
817 : {
818 : /*
819 : * Sanity check: As long as the invalidations are handled
820 : * appropriately as above, this should never happen.
821 : *
822 : * We don't need to check restart_lsn here. See the comments in
823 : * update_local_synced_slot() for details.
824 : */
825 46 : if (remote_slot->confirmed_lsn < slot->data.confirmed_flush)
826 0 : ereport(ERROR,
827 : errmsg_internal("cannot synchronize local slot \"%s\"",
828 : remote_slot->name),
829 : errdetail_internal("Local slot's start streaming location LSN(%X/%08X) is ahead of remote slot's LSN(%X/%08X).",
830 : LSN_FORMAT_ARGS(slot->data.confirmed_flush),
831 : LSN_FORMAT_ARGS(remote_slot->confirmed_lsn)));
832 :
833 46 : slot_updated = update_local_synced_slot(remote_slot, remote_dbid);
834 : }
835 : }
836 : /* Otherwise create the slot first. */
837 : else
838 : {
839 : NameData plugin_name;
840 8 : TransactionId xmin_horizon = InvalidTransactionId;
841 :
842 : /* Skip creating the local slot if remote_slot is invalidated already */
843 8 : if (remote_slot->invalidated != RS_INVAL_NONE)
844 0 : return false;
845 :
846 : /*
847 : * We create temporary slots instead of ephemeral slots here because
848 : * we want the slots to survive after releasing them. This is done to
849 : * avoid dropping and re-creating the slots in each synchronization
850 : * cycle if the restart_lsn or catalog_xmin of the remote slot has not
851 : * caught up.
852 : */
853 8 : ReplicationSlotCreate(remote_slot->name, true, RS_TEMPORARY,
854 8 : remote_slot->two_phase,
855 : false,
856 8 : remote_slot->failover,
857 : true);
858 :
859 : /* For shorter lines. */
860 8 : slot = MyReplicationSlot;
861 :
862 : /* Avoid expensive operations while holding a spinlock. */
863 8 : namestrcpy(&plugin_name, remote_slot->plugin);
864 :
865 8 : SpinLockAcquire(&slot->mutex);
866 8 : slot->data.database = remote_dbid;
867 8 : slot->data.plugin = plugin_name;
868 8 : SpinLockRelease(&slot->mutex);
869 :
870 8 : reserve_wal_for_local_slot(remote_slot->restart_lsn);
871 :
872 8 : LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
873 8 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
874 8 : xmin_horizon = GetOldestSafeDecodingTransactionId(true);
875 8 : SpinLockAcquire(&slot->mutex);
876 8 : slot->effective_catalog_xmin = xmin_horizon;
877 8 : slot->data.catalog_xmin = xmin_horizon;
878 8 : SpinLockRelease(&slot->mutex);
879 8 : ReplicationSlotsComputeRequiredXmin(true);
880 8 : LWLockRelease(ProcArrayLock);
881 8 : LWLockRelease(ReplicationSlotControlLock);
882 :
883 8 : update_and_persist_local_synced_slot(remote_slot, remote_dbid,
884 : slot_persistence_pending);
885 :
886 8 : slot_updated = true;
887 : }
888 :
889 58 : ReplicationSlotRelease();
890 :
891 58 : return slot_updated;
892 : }
893 :
894 : /*
895 : * Fetch remote slots.
896 : *
897 : * If slot_names is NIL, fetches all failover logical slots from the
898 : * primary server, otherwise fetches only the ones with names in slot_names.
899 : *
900 : * Returns a list of remote slot information structures, or NIL if none
901 : * are found.
902 : */
903 : static List *
904 37 : fetch_remote_slots(WalReceiverConn *wrconn, List *slot_names)
905 : {
906 : #define SLOTSYNC_COLUMN_COUNT 10
907 37 : Oid slotRow[SLOTSYNC_COLUMN_COUNT] = {TEXTOID, TEXTOID, LSNOID,
908 : LSNOID, XIDOID, BOOLOID, LSNOID, BOOLOID, TEXTOID, TEXTOID};
909 :
910 : WalRcvExecResult *res;
911 : TupleTableSlot *tupslot;
912 37 : List *remote_slot_list = NIL;
913 : StringInfoData query;
914 :
915 37 : initStringInfo(&query);
916 37 : appendStringInfoString(&query,
917 : "SELECT slot_name, plugin, confirmed_flush_lsn,"
918 : " restart_lsn, catalog_xmin, two_phase,"
919 : " two_phase_at, failover,"
920 : " database, invalidation_reason"
921 : " FROM pg_catalog.pg_replication_slots"
922 : " WHERE failover and NOT temporary");
923 :
924 37 : if (slot_names != NIL)
925 : {
926 3 : bool first_slot = true;
927 :
928 : /*
929 : * Construct the query to fetch only the specified slots
930 : */
931 3 : appendStringInfoString(&query, " AND slot_name IN (");
932 :
933 9 : foreach_ptr(char, slot_name, slot_names)
934 : {
935 3 : if (!first_slot)
936 0 : appendStringInfoString(&query, ", ");
937 :
938 3 : appendStringInfoString(&query, quote_literal_cstr(slot_name));
939 3 : first_slot = false;
940 : }
941 3 : appendStringInfoChar(&query, ')');
942 : }
943 :
944 : /* Execute the query */
945 37 : res = walrcv_exec(wrconn, query.data, SLOTSYNC_COLUMN_COUNT, slotRow);
946 37 : pfree(query.data);
947 37 : if (res->status != WALRCV_OK_TUPLES)
948 2 : ereport(ERROR,
949 : errmsg("could not fetch failover logical slots info from the primary server: %s",
950 : res->err));
951 :
952 35 : tupslot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple);
953 93 : while (tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
954 : {
955 : bool isnull;
956 58 : RemoteSlot *remote_slot = palloc0_object(RemoteSlot);
957 : Datum d;
958 58 : int col = 0;
959 :
960 58 : remote_slot->name = TextDatumGetCString(slot_getattr(tupslot, ++col,
961 : &isnull));
962 : Assert(!isnull);
963 :
964 58 : remote_slot->plugin = TextDatumGetCString(slot_getattr(tupslot, ++col,
965 : &isnull));
966 : Assert(!isnull);
967 :
968 : /*
969 : * It is possible to get null values for LSN and Xmin if slot is
970 : * invalidated on the primary server, so handle accordingly.
971 : */
972 58 : d = slot_getattr(tupslot, ++col, &isnull);
973 58 : remote_slot->confirmed_lsn = isnull ? InvalidXLogRecPtr :
974 58 : DatumGetLSN(d);
975 :
976 58 : d = slot_getattr(tupslot, ++col, &isnull);
977 58 : remote_slot->restart_lsn = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
978 :
979 58 : d = slot_getattr(tupslot, ++col, &isnull);
980 58 : remote_slot->catalog_xmin = isnull ? InvalidTransactionId :
981 58 : DatumGetTransactionId(d);
982 :
983 58 : remote_slot->two_phase = DatumGetBool(slot_getattr(tupslot, ++col,
984 : &isnull));
985 : Assert(!isnull);
986 :
987 58 : d = slot_getattr(tupslot, ++col, &isnull);
988 58 : remote_slot->two_phase_at = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
989 :
990 58 : remote_slot->failover = DatumGetBool(slot_getattr(tupslot, ++col,
991 : &isnull));
992 : Assert(!isnull);
993 :
994 58 : remote_slot->database = TextDatumGetCString(slot_getattr(tupslot,
995 : ++col, &isnull));
996 : Assert(!isnull);
997 :
998 58 : d = slot_getattr(tupslot, ++col, &isnull);
999 58 : remote_slot->invalidated = isnull ? RS_INVAL_NONE :
1000 0 : GetSlotInvalidationCause(TextDatumGetCString(d));
1001 :
1002 : /* Sanity check */
1003 : Assert(col == SLOTSYNC_COLUMN_COUNT);
1004 :
1005 : /*
1006 : * If restart_lsn, confirmed_lsn or catalog_xmin is invalid but the
1007 : * slot is valid, that means we have fetched the remote_slot in its
1008 : * RS_EPHEMERAL state. In such a case, don't sync it; we can always
1009 : * sync it in the next sync cycle when the remote_slot is persisted
1010 : * and has valid lsn(s) and xmin values.
1011 : *
1012 : * XXX: In future, if we plan to expose 'slot->data.persistency' in
1013 : * pg_replication_slots view, then we can avoid fetching RS_EPHEMERAL
1014 : * slots in the first place.
1015 : */
1016 58 : if ((!XLogRecPtrIsValid(remote_slot->restart_lsn) ||
1017 58 : !XLogRecPtrIsValid(remote_slot->confirmed_lsn) ||
1018 58 : !TransactionIdIsValid(remote_slot->catalog_xmin)) &&
1019 0 : remote_slot->invalidated == RS_INVAL_NONE)
1020 0 : pfree(remote_slot);
1021 : else
1022 : /* Create list of remote slots */
1023 58 : remote_slot_list = lappend(remote_slot_list, remote_slot);
1024 :
1025 58 : ExecClearTuple(tupslot);
1026 : }
1027 :
1028 35 : ExecDropSingleTupleTableSlot(tupslot);
1029 35 : walrcv_clear_result(res);
1030 :
1031 35 : return remote_slot_list;
1032 : }
1033 :
1034 : /*
1035 : * Synchronize slots.
1036 : *
1037 : * This function takes a list of remote slots and synchronizes them locally. It
1038 : * creates the slots if not present on the standby and updates existing ones.
1039 : *
1040 : * If slot_persistence_pending is not NULL, it will be set to true if one or
1041 : * more slots could not be persisted. This allows callers such as
1042 : * SyncReplicationSlots() to retry those slots.
1043 : *
1044 : * Returns TRUE if any of the slots gets updated in this sync-cycle.
1045 : */
1046 : static bool
1047 35 : synchronize_slots(WalReceiverConn *wrconn, List *remote_slot_list,
1048 : bool *slot_persistence_pending)
1049 : {
1050 35 : bool some_slot_updated = false;
1051 :
1052 : /* Drop local slots that no longer need to be synced. */
1053 35 : drop_local_obsolete_slots(remote_slot_list);
1054 :
1055 : /* Now sync the slots locally */
1056 128 : foreach_ptr(RemoteSlot, remote_slot, remote_slot_list)
1057 : {
1058 58 : Oid remote_dbid = get_database_oid(remote_slot->database, false);
1059 :
1060 : /*
1061 : * Use shared lock to prevent a conflict with
1062 : * ReplicationSlotsDropDBSlots(), trying to drop the same slot during
1063 : * a drop-database operation.
1064 : */
1065 58 : LockSharedObject(DatabaseRelationId, remote_dbid, 0, AccessShareLock);
1066 :
1067 58 : some_slot_updated |= synchronize_one_slot(remote_slot, remote_dbid,
1068 : slot_persistence_pending);
1069 :
1070 58 : UnlockSharedObject(DatabaseRelationId, remote_dbid, 0, AccessShareLock);
1071 : }
1072 :
1073 35 : return some_slot_updated;
1074 : }
1075 :
1076 : /*
1077 : * Checks the remote server info.
1078 : *
1079 : * We ensure that the 'primary_slot_name' exists on the remote server and the
1080 : * remote server is not a standby node.
1081 : */
1082 : static void
1083 15 : validate_remote_info(WalReceiverConn *wrconn)
1084 : {
1085 : #define PRIMARY_INFO_OUTPUT_COL_COUNT 2
1086 : WalRcvExecResult *res;
1087 15 : Oid slotRow[PRIMARY_INFO_OUTPUT_COL_COUNT] = {BOOLOID, BOOLOID};
1088 : StringInfoData cmd;
1089 : bool isnull;
1090 : TupleTableSlot *tupslot;
1091 : bool remote_in_recovery;
1092 : bool primary_slot_valid;
1093 15 : bool started_tx = false;
1094 :
1095 15 : initStringInfo(&cmd);
1096 15 : appendStringInfo(&cmd,
1097 : "SELECT pg_is_in_recovery(), count(*) = 1"
1098 : " FROM pg_catalog.pg_replication_slots"
1099 : " WHERE slot_type='physical' AND slot_name=%s",
1100 : quote_literal_cstr(PrimarySlotName));
1101 :
1102 : /* The syscache access in walrcv_exec() needs a transaction env. */
1103 15 : if (!IsTransactionState())
1104 : {
1105 6 : StartTransactionCommand();
1106 6 : started_tx = true;
1107 : }
1108 :
1109 15 : res = walrcv_exec(wrconn, cmd.data, PRIMARY_INFO_OUTPUT_COL_COUNT, slotRow);
1110 15 : pfree(cmd.data);
1111 :
1112 15 : if (res->status != WALRCV_OK_TUPLES)
1113 0 : ereport(ERROR,
1114 : errmsg("could not fetch primary slot name \"%s\" info from the primary server: %s",
1115 : PrimarySlotName, res->err),
1116 : errhint("Check if \"primary_slot_name\" is configured correctly."));
1117 :
1118 15 : tupslot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple);
1119 15 : if (!tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
1120 0 : elog(ERROR,
1121 : "failed to fetch tuple for the primary server slot specified by \"primary_slot_name\"");
1122 :
1123 15 : remote_in_recovery = DatumGetBool(slot_getattr(tupslot, 1, &isnull));
1124 : Assert(!isnull);
1125 :
1126 : /*
1127 : * Slot sync is currently not supported on a cascading standby. This is
1128 : * because if we allow it, the primary server needs to wait for all the
1129 : * cascading standbys, otherwise, logical subscribers can still be ahead
1130 : * of one of the cascading standbys which we plan to promote. Thus, to
1131 : * avoid this additional complexity, we restrict it for the time being.
1132 : */
1133 15 : if (remote_in_recovery)
1134 1 : ereport(ERROR,
1135 : errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1136 : errmsg("cannot synchronize replication slots from a standby server"));
1137 :
1138 14 : primary_slot_valid = DatumGetBool(slot_getattr(tupslot, 2, &isnull));
1139 : Assert(!isnull);
1140 :
1141 14 : if (!primary_slot_valid)
1142 0 : ereport(ERROR,
1143 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1144 : /* translator: second %s is a GUC variable name */
1145 : errmsg("replication slot \"%s\" specified by \"%s\" does not exist on primary server",
1146 : PrimarySlotName, "primary_slot_name"));
1147 :
1148 14 : ExecDropSingleTupleTableSlot(tupslot);
1149 14 : walrcv_clear_result(res);
1150 :
1151 14 : if (started_tx)
1152 6 : CommitTransactionCommand();
1153 14 : }
1154 :
1155 : /*
1156 : * Checks if dbname is specified in 'primary_conninfo'.
1157 : *
1158 : * Error out if not specified otherwise return it.
1159 : */
1160 : char *
1161 16 : CheckAndGetDbnameFromConninfo(void)
1162 : {
1163 : char *dbname;
1164 :
1165 : /*
1166 : * The slot synchronization needs a database connection for walrcv_exec to
1167 : * work.
1168 : */
1169 16 : dbname = walrcv_get_dbname_from_conninfo(PrimaryConnInfo);
1170 16 : if (dbname == NULL)
1171 1 : ereport(ERROR,
1172 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1173 :
1174 : /*
1175 : * translator: first %s is a connection option; second %s is a GUC
1176 : * variable name
1177 : */
1178 : errmsg("replication slot synchronization requires \"%s\" to be specified in \"%s\"",
1179 : "dbname", "primary_conninfo"));
1180 15 : return dbname;
1181 : }
1182 :
1183 : /*
1184 : * Return true if all necessary GUCs for slot synchronization are set
1185 : * appropriately, otherwise, return false.
1186 : */
1187 : bool
1188 26 : ValidateSlotSyncParams(int elevel)
1189 : {
1190 : /*
1191 : * Logical slot sync/creation requires logical decoding to be enabled.
1192 : */
1193 26 : if (!IsLogicalDecodingEnabled())
1194 : {
1195 0 : ereport(elevel,
1196 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1197 : errmsg("replication slot synchronization requires \"effective_wal_level\" >= \"logical\" on the primary"),
1198 : errhint("To enable logical decoding on primary, set \"wal_level\" >= \"logical\" or create at least one logical slot when \"wal_level\" = \"replica\"."));
1199 :
1200 0 : return false;
1201 : }
1202 :
1203 : /*
1204 : * A physical replication slot(primary_slot_name) is required on the
1205 : * primary to ensure that the rows needed by the standby are not removed
1206 : * after restarting, so that the synchronized slot on the standby will not
1207 : * be invalidated.
1208 : */
1209 26 : if (PrimarySlotName == NULL || *PrimarySlotName == '\0')
1210 : {
1211 0 : ereport(elevel,
1212 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1213 : /* translator: %s is a GUC variable name */
1214 : errmsg("replication slot synchronization requires \"%s\" to be set", "primary_slot_name"));
1215 0 : return false;
1216 : }
1217 :
1218 : /*
1219 : * hot_standby_feedback must be enabled to cooperate with the physical
1220 : * replication slot, which allows informing the primary about the xmin and
1221 : * catalog_xmin values on the standby.
1222 : */
1223 26 : if (!hot_standby_feedback)
1224 : {
1225 1 : ereport(elevel,
1226 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1227 : /* translator: %s is a GUC variable name */
1228 : errmsg("replication slot synchronization requires \"%s\" to be enabled",
1229 : "hot_standby_feedback"));
1230 1 : return false;
1231 : }
1232 :
1233 : /*
1234 : * The primary_conninfo is required to make connection to primary for
1235 : * getting slots information.
1236 : */
1237 25 : if (PrimaryConnInfo == NULL || *PrimaryConnInfo == '\0')
1238 : {
1239 0 : ereport(elevel,
1240 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1241 : /* translator: %s is a GUC variable name */
1242 : errmsg("replication slot synchronization requires \"%s\" to be set",
1243 : "primary_conninfo"));
1244 0 : return false;
1245 : }
1246 :
1247 25 : return true;
1248 : }
1249 :
1250 : /*
1251 : * Re-read the config file for slot synchronization.
1252 : *
1253 : * Exit or throw error if relevant GUCs have changed depending on whether
1254 : * called from slot sync worker or from the SQL function pg_sync_replication_slots()
1255 : */
1256 : static void
1257 1 : slotsync_reread_config(void)
1258 : {
1259 1 : char *old_primary_conninfo = pstrdup(PrimaryConnInfo);
1260 1 : char *old_primary_slotname = pstrdup(PrimarySlotName);
1261 1 : bool old_sync_replication_slots = sync_replication_slots;
1262 1 : bool old_hot_standby_feedback = hot_standby_feedback;
1263 : bool conninfo_changed;
1264 : bool primary_slotname_changed;
1265 1 : bool is_slotsync_worker = AmLogicalSlotSyncWorkerProcess();
1266 1 : bool parameter_changed = false;
1267 :
1268 : if (is_slotsync_worker)
1269 : Assert(sync_replication_slots);
1270 :
1271 1 : ConfigReloadPending = false;
1272 1 : ProcessConfigFile(PGC_SIGHUP);
1273 :
1274 1 : conninfo_changed = strcmp(old_primary_conninfo, PrimaryConnInfo) != 0;
1275 1 : primary_slotname_changed = strcmp(old_primary_slotname, PrimarySlotName) != 0;
1276 1 : pfree(old_primary_conninfo);
1277 1 : pfree(old_primary_slotname);
1278 :
1279 1 : if (old_sync_replication_slots != sync_replication_slots)
1280 : {
1281 0 : if (is_slotsync_worker)
1282 : {
1283 0 : ereport(LOG,
1284 : /* translator: %s is a GUC variable name */
1285 : errmsg("replication slot synchronization worker will stop because \"%s\" is disabled",
1286 : "sync_replication_slots"));
1287 :
1288 0 : proc_exit(0);
1289 : }
1290 :
1291 0 : parameter_changed = true;
1292 : }
1293 : else
1294 : {
1295 1 : if (conninfo_changed ||
1296 1 : primary_slotname_changed ||
1297 1 : (old_hot_standby_feedback != hot_standby_feedback))
1298 : {
1299 :
1300 1 : if (is_slotsync_worker)
1301 : {
1302 1 : ereport(LOG,
1303 : errmsg("replication slot synchronization worker will restart because of a parameter change"));
1304 :
1305 : /*
1306 : * Reset the last-start time for this worker so that the
1307 : * postmaster can restart it without waiting for
1308 : * SLOTSYNC_RESTART_INTERVAL_SEC.
1309 : */
1310 1 : SlotSyncCtx->last_start_time = 0;
1311 :
1312 1 : proc_exit(0);
1313 : }
1314 :
1315 0 : parameter_changed = true;
1316 : }
1317 : }
1318 :
1319 : /*
1320 : * If we have reached here with a parameter change, we must be running in
1321 : * SQL function, emit error in such a case.
1322 : */
1323 0 : if (parameter_changed)
1324 : {
1325 : Assert(!is_slotsync_worker);
1326 0 : ereport(ERROR,
1327 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1328 : errmsg("replication slot synchronization will stop because of a parameter change"));
1329 : }
1330 :
1331 0 : }
1332 :
1333 : /*
1334 : * Handle receipt of an interrupt indicating a slotsync shutdown message.
1335 : *
1336 : * This is called within the SIGUSR1 handler. All we do here is set a flag
1337 : * that will cause the next CHECK_FOR_INTERRUPTS() to invoke
1338 : * ProcessSlotSyncMessage().
1339 : */
1340 : void
1341 1 : HandleSlotSyncMessageInterrupt(void)
1342 : {
1343 1 : InterruptPending = true;
1344 1 : SlotSyncShutdownPending = true;
1345 : /* latch will be set by procsignal_sigusr1_handler */
1346 1 : }
1347 :
1348 : /*
1349 : * Handle a PROCSIG_SLOTSYNC_MESSAGE signal, called from ProcessInterrupts().
1350 : *
1351 : * If the current process is the slotsync background worker, log a message
1352 : * and exit cleanly. If it is a backend executing pg_sync_replication_slots(),
1353 : * raise an error, unless the sync has already finished, in which case there
1354 : * is no need to interrupt the caller.
1355 : */
1356 : void
1357 1 : ProcessSlotSyncMessage(void)
1358 : {
1359 1 : SlotSyncShutdownPending = false;
1360 :
1361 1 : if (AmLogicalSlotSyncWorkerProcess())
1362 : {
1363 1 : ereport(LOG,
1364 : errmsg("replication slot synchronization worker will stop because promotion is triggered"));
1365 1 : proc_exit(0);
1366 : }
1367 : else
1368 : {
1369 : /*
1370 : * If sync has already completed, there is no need to interrupt the
1371 : * caller with an error.
1372 : */
1373 0 : if (!IsSyncingReplicationSlots())
1374 0 : return;
1375 :
1376 0 : ereport(ERROR,
1377 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1378 : errmsg("replication slot synchronization will stop because promotion is triggered"));
1379 : }
1380 : }
1381 :
1382 : /*
1383 : * Connection cleanup function for slotsync worker.
1384 : *
1385 : * Called on slotsync worker exit.
1386 : */
1387 : static void
1388 6 : slotsync_worker_disconnect(int code, Datum arg)
1389 : {
1390 6 : WalReceiverConn *wrconn = (WalReceiverConn *) DatumGetPointer(arg);
1391 :
1392 6 : walrcv_disconnect(wrconn);
1393 6 : }
1394 :
1395 : /*
1396 : * Cleanup function for slotsync worker.
1397 : *
1398 : * Called on slotsync worker exit.
1399 : */
1400 : static void
1401 6 : slotsync_worker_onexit(int code, Datum arg)
1402 : {
1403 : /*
1404 : * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1405 : *
1406 : * The startup process during promotion invokes ShutDownSlotSync() which
1407 : * waits for slot sync to finish and it does that by checking the
1408 : * 'syncing' flag. Thus the slot sync worker must be done with slots'
1409 : * release and cleanup to avoid any dangling temporary slots or active
1410 : * slots before it marks itself as finished syncing.
1411 : */
1412 :
1413 : /* Make sure active replication slots are released */
1414 6 : if (MyReplicationSlot != NULL)
1415 0 : ReplicationSlotRelease();
1416 :
1417 : /* Also cleanup the temporary slots. */
1418 6 : ReplicationSlotCleanup(false);
1419 :
1420 6 : SpinLockAcquire(&SlotSyncCtx->mutex);
1421 :
1422 6 : SlotSyncCtx->pid = InvalidPid;
1423 :
1424 : /*
1425 : * If syncing_slots is true, it indicates that the process errored out
1426 : * without resetting the flag. So, we need to clean up shared memory and
1427 : * reset the flag here.
1428 : */
1429 6 : if (syncing_slots)
1430 : {
1431 6 : SlotSyncCtx->syncing = false;
1432 6 : syncing_slots = false;
1433 : }
1434 :
1435 6 : SpinLockRelease(&SlotSyncCtx->mutex);
1436 6 : }
1437 :
1438 : /*
1439 : * Sleep for long enough that we believe it's likely that the slots on primary
1440 : * get updated.
1441 : *
1442 : * If there is no slot activity the wait time between sync-cycles will double
1443 : * (to a maximum of 30s). If there is some slot activity the wait time between
1444 : * sync-cycles is reset to the minimum (200ms).
1445 : */
1446 : static void
1447 27 : wait_for_slot_activity(bool some_slot_updated)
1448 : {
1449 : int rc;
1450 :
1451 27 : if (!some_slot_updated)
1452 : {
1453 : /*
1454 : * No slots were updated, so double the sleep time, but not beyond the
1455 : * maximum allowable value.
1456 : */
1457 16 : sleep_ms = Min(sleep_ms * 2, MAX_SLOTSYNC_WORKER_NAPTIME_MS);
1458 : }
1459 : else
1460 : {
1461 : /*
1462 : * Some slots were updated since the last sleep, so reset the sleep
1463 : * time.
1464 : */
1465 11 : sleep_ms = MIN_SLOTSYNC_WORKER_NAPTIME_MS;
1466 : }
1467 :
1468 27 : rc = WaitLatch(MyLatch,
1469 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
1470 : sleep_ms,
1471 : WAIT_EVENT_REPLICATION_SLOTSYNC_MAIN);
1472 :
1473 27 : if (rc & WL_LATCH_SET)
1474 3 : ResetLatch(MyLatch);
1475 27 : }
1476 :
1477 : /*
1478 : * Emit an error if a concurrent sync call is in progress.
1479 : * Otherwise, advertise that a sync is in progress.
1480 : */
1481 : static void
1482 15 : check_and_set_sync_info(pid_t sync_process_pid)
1483 : {
1484 15 : SpinLockAcquire(&SlotSyncCtx->mutex);
1485 :
1486 : /*
1487 : * Exit immediately if promotion has been triggered. This guards against
1488 : * a new worker (or a call to pg_sync_replication_slots()) that starts
1489 : * after the old worker was stopped by ShutDownSlotSync().
1490 : */
1491 15 : if (SlotSyncCtx->stopSignaled)
1492 : {
1493 0 : SpinLockRelease(&SlotSyncCtx->mutex);
1494 :
1495 0 : if (AmLogicalSlotSyncWorkerProcess())
1496 : {
1497 0 : ereport(DEBUG1,
1498 : errmsg("replication slot synchronization worker will not start because promotion was triggered"));
1499 :
1500 0 : proc_exit(0);
1501 : }
1502 : else
1503 : {
1504 : /*
1505 : * For the backend executing SQL function
1506 : * pg_sync_replication_slots().
1507 : */
1508 0 : ereport(ERROR,
1509 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1510 : errmsg("replication slot synchronization will not start because promotion was triggered"));
1511 : }
1512 : }
1513 :
1514 15 : if (SlotSyncCtx->syncing)
1515 : {
1516 0 : SpinLockRelease(&SlotSyncCtx->mutex);
1517 0 : ereport(ERROR,
1518 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1519 : errmsg("cannot synchronize replication slots concurrently"));
1520 : }
1521 :
1522 : /* The pid must not be already assigned in SlotSyncCtx */
1523 : Assert(SlotSyncCtx->pid == InvalidPid);
1524 :
1525 15 : SlotSyncCtx->syncing = true;
1526 :
1527 : /*
1528 : * Advertise the required PID so that the startup process can kill the
1529 : * slot sync process on promotion.
1530 : */
1531 15 : SlotSyncCtx->pid = sync_process_pid;
1532 :
1533 15 : SpinLockRelease(&SlotSyncCtx->mutex);
1534 :
1535 15 : syncing_slots = true;
1536 15 : }
1537 :
1538 : /*
1539 : * Reset syncing flag.
1540 : */
1541 : static void
1542 9 : reset_syncing_flag(void)
1543 : {
1544 9 : SpinLockAcquire(&SlotSyncCtx->mutex);
1545 9 : SlotSyncCtx->syncing = false;
1546 9 : SlotSyncCtx->pid = InvalidPid;
1547 9 : SpinLockRelease(&SlotSyncCtx->mutex);
1548 :
1549 9 : syncing_slots = false;
1550 9 : }
1551 :
1552 : /*
1553 : * The main loop of our worker process.
1554 : *
1555 : * It connects to the primary server, fetches logical failover slots
1556 : * information periodically in order to create and sync the slots.
1557 : *
1558 : * Note: If any changes are made here, check if the corresponding SQL
1559 : * function logic in SyncReplicationSlots() also needs to be changed.
1560 : */
1561 : void
1562 6 : ReplSlotSyncWorkerMain(const void *startup_data, size_t startup_data_len)
1563 : {
1564 6 : WalReceiverConn *wrconn = NULL;
1565 : char *dbname;
1566 : char *err;
1567 : sigjmp_buf local_sigjmp_buf;
1568 : StringInfoData app_name;
1569 :
1570 : Assert(startup_data_len == 0);
1571 :
1572 : /* Release postmaster's working memory context */
1573 6 : if (PostmasterContext)
1574 : {
1575 6 : MemoryContextDelete(PostmasterContext);
1576 6 : PostmasterContext = NULL;
1577 : }
1578 :
1579 6 : init_ps_display(NULL);
1580 :
1581 : Assert(GetProcessingMode() == InitProcessing);
1582 :
1583 : /*
1584 : * Create a per-backend PGPROC struct in shared memory. We must do this
1585 : * before we access any shared memory.
1586 : */
1587 6 : InitProcess();
1588 :
1589 : /*
1590 : * Early initialization.
1591 : */
1592 6 : BaseInit();
1593 :
1594 : Assert(SlotSyncCtx != NULL);
1595 :
1596 : /*
1597 : * If an exception is encountered, processing resumes here.
1598 : *
1599 : * We just need to clean up, report the error, and go away.
1600 : *
1601 : * If we do not have this handling here, then since this worker process
1602 : * operates at the bottom of the exception stack, ERRORs turn into FATALs.
1603 : * Therefore, we create our own exception handler to catch ERRORs.
1604 : */
1605 6 : if (sigsetjmp(local_sigjmp_buf, 1) != 0)
1606 : {
1607 : /* since not using PG_TRY, must reset error stack by hand */
1608 2 : error_context_stack = NULL;
1609 :
1610 : /* Prevents interrupts while cleaning up */
1611 2 : HOLD_INTERRUPTS();
1612 :
1613 : /* Report the error to the server log */
1614 2 : EmitErrorReport();
1615 :
1616 : /*
1617 : * We can now go away. Note that because we called InitProcess, a
1618 : * callback was registered to do ProcKill, which will clean up
1619 : * necessary state.
1620 : */
1621 2 : proc_exit(0);
1622 : }
1623 :
1624 : /* We can now handle ereport(ERROR) */
1625 6 : PG_exception_stack = &local_sigjmp_buf;
1626 :
1627 : /* Setup signal handling */
1628 6 : pqsignal(SIGHUP, SignalHandlerForConfigReload);
1629 6 : pqsignal(SIGINT, StatementCancelHandler);
1630 6 : pqsignal(SIGTERM, die);
1631 6 : pqsignal(SIGFPE, FloatExceptionHandler);
1632 6 : pqsignal(SIGUSR1, procsignal_sigusr1_handler);
1633 6 : pqsignal(SIGUSR2, PG_SIG_IGN);
1634 6 : pqsignal(SIGPIPE, PG_SIG_IGN);
1635 6 : pqsignal(SIGCHLD, PG_SIG_DFL);
1636 :
1637 6 : check_and_set_sync_info(MyProcPid);
1638 :
1639 6 : ereport(LOG, errmsg("slot sync worker started"));
1640 :
1641 : /* Register it as soon as SlotSyncCtx->pid is initialized. */
1642 6 : before_shmem_exit(slotsync_worker_onexit, (Datum) 0);
1643 :
1644 : /*
1645 : * Establishes SIGALRM handler and initialize timeout module. It is needed
1646 : * by InitPostgres to register different timeouts.
1647 : */
1648 6 : InitializeTimeouts();
1649 :
1650 : /* Load the libpq-specific functions */
1651 6 : load_file("libpqwalreceiver", false);
1652 :
1653 : /*
1654 : * Unblock signals (they were blocked when the postmaster forked us)
1655 : */
1656 6 : sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
1657 :
1658 : /*
1659 : * Set always-secure search path, so malicious users can't redirect user
1660 : * code (e.g. operators).
1661 : *
1662 : * It's not strictly necessary since we won't be scanning or writing to
1663 : * any user table locally, but it's good to retain it here for added
1664 : * precaution.
1665 : */
1666 6 : SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
1667 :
1668 6 : dbname = CheckAndGetDbnameFromConninfo();
1669 :
1670 : /*
1671 : * Connect to the database specified by the user in primary_conninfo. We
1672 : * need a database connection for walrcv_exec to work which we use to
1673 : * fetch slot information from the remote node. See comments atop
1674 : * libpqrcv_exec.
1675 : *
1676 : * We do not specify a specific user here since the slot sync worker will
1677 : * operate as a superuser. This is safe because the slot sync worker does
1678 : * not interact with user tables, eliminating the risk of executing
1679 : * arbitrary code within triggers.
1680 : */
1681 6 : InitPostgres(dbname, InvalidOid, NULL, InvalidOid, 0, NULL);
1682 :
1683 6 : SetProcessingMode(NormalProcessing);
1684 :
1685 6 : initStringInfo(&app_name);
1686 6 : if (cluster_name[0])
1687 6 : appendStringInfo(&app_name, "%s_%s", cluster_name, "slotsync worker");
1688 : else
1689 0 : appendStringInfoString(&app_name, "slotsync worker");
1690 :
1691 : /*
1692 : * Establish the connection to the primary server for slot
1693 : * synchronization.
1694 : */
1695 6 : wrconn = walrcv_connect(PrimaryConnInfo, false, false, false,
1696 : app_name.data, &err);
1697 :
1698 6 : if (!wrconn)
1699 0 : ereport(ERROR,
1700 : errcode(ERRCODE_CONNECTION_FAILURE),
1701 : errmsg("synchronization worker \"%s\" could not connect to the primary server: %s",
1702 : app_name.data, err));
1703 :
1704 6 : pfree(app_name.data);
1705 :
1706 : /*
1707 : * Register the disconnection callback.
1708 : *
1709 : * XXX: This can be combined with previous cleanup registration of
1710 : * slotsync_worker_onexit() but that will need the connection to be made
1711 : * global and we want to avoid introducing global for this purpose.
1712 : */
1713 6 : before_shmem_exit(slotsync_worker_disconnect, PointerGetDatum(wrconn));
1714 :
1715 : /*
1716 : * Using the specified primary server connection, check that we are not a
1717 : * cascading standby and slot configured in 'primary_slot_name' exists on
1718 : * the primary server.
1719 : */
1720 6 : validate_remote_info(wrconn);
1721 :
1722 : /* Main loop to synchronize slots */
1723 : for (;;)
1724 24 : {
1725 30 : bool some_slot_updated = false;
1726 30 : bool started_tx = false;
1727 : List *remote_slots;
1728 :
1729 30 : CHECK_FOR_INTERRUPTS();
1730 :
1731 27 : if (ConfigReloadPending)
1732 1 : slotsync_reread_config();
1733 :
1734 : /*
1735 : * The syscache access in fetch_remote_slots() needs a transaction
1736 : * env.
1737 : */
1738 26 : if (!IsTransactionState())
1739 : {
1740 26 : StartTransactionCommand();
1741 26 : started_tx = true;
1742 : }
1743 :
1744 26 : remote_slots = fetch_remote_slots(wrconn, NIL);
1745 24 : some_slot_updated = synchronize_slots(wrconn, remote_slots, NULL);
1746 24 : list_free_deep(remote_slots);
1747 :
1748 24 : if (started_tx)
1749 24 : CommitTransactionCommand();
1750 :
1751 24 : wait_for_slot_activity(some_slot_updated);
1752 : }
1753 :
1754 : /*
1755 : * The slot sync worker can't get here because it will only stop when it
1756 : * receives a stop request from the startup process, or when there is an
1757 : * error.
1758 : */
1759 : Assert(false);
1760 : }
1761 :
1762 : /*
1763 : * Update the inactive_since property for synced slots.
1764 : *
1765 : * Note that this function is currently called when we shutdown the slot
1766 : * sync machinery.
1767 : */
1768 : static void
1769 1015 : update_synced_slots_inactive_since(void)
1770 : {
1771 1015 : TimestampTz now = 0;
1772 :
1773 : /*
1774 : * We need to update inactive_since only when we are promoting standby to
1775 : * correctly interpret the inactive_since if the standby gets promoted
1776 : * without a restart. We don't want the slots to appear inactive for a
1777 : * long time after promotion if they haven't been synchronized recently.
1778 : * Whoever acquires the slot, i.e., makes the slot active, will reset it.
1779 : */
1780 1015 : if (!StandbyMode)
1781 959 : return;
1782 :
1783 : /* The slot sync worker or the SQL function mustn't be running by now */
1784 : Assert((SlotSyncCtx->pid == InvalidPid) && !SlotSyncCtx->syncing);
1785 :
1786 56 : LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1787 :
1788 879 : for (int i = 0; i < max_replication_slots + max_repack_replication_slots; i++)
1789 : {
1790 823 : ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
1791 :
1792 : /* Check if it is a synchronized slot */
1793 823 : if (s->in_use && s->data.synced)
1794 : {
1795 : Assert(SlotIsLogical(s));
1796 :
1797 : /* The slot must not be acquired by any process */
1798 : Assert(s->active_proc == INVALID_PROC_NUMBER);
1799 :
1800 : /* Use the same inactive_since time for all the slots. */
1801 3 : if (now == 0)
1802 2 : now = GetCurrentTimestamp();
1803 :
1804 3 : ReplicationSlotSetInactiveSince(s, now, true);
1805 : }
1806 : }
1807 :
1808 56 : LWLockRelease(ReplicationSlotControlLock);
1809 : }
1810 :
1811 : /*
1812 : * Shut down slot synchronization.
1813 : *
1814 : * This function sets stopSignaled=true and wakes up the slot sync process
1815 : * (either worker or backend running the SQL function pg_sync_replication_slots())
1816 : * so that worker can exit or the SQL function pg_sync_replication_slots() can
1817 : * finish. It also waits till the slot sync worker has exited or
1818 : * pg_sync_replication_slots() has finished.
1819 : */
1820 : void
1821 1015 : ShutDownSlotSync(void)
1822 : {
1823 : pid_t sync_process_pid;
1824 :
1825 1015 : SpinLockAcquire(&SlotSyncCtx->mutex);
1826 :
1827 1015 : SlotSyncCtx->stopSignaled = true;
1828 :
1829 : /*
1830 : * Return if neither the slot sync worker is running nor the function
1831 : * pg_sync_replication_slots() is executing.
1832 : */
1833 1015 : if (!SlotSyncCtx->syncing)
1834 : {
1835 1014 : SpinLockRelease(&SlotSyncCtx->mutex);
1836 1014 : update_synced_slots_inactive_since();
1837 1014 : return;
1838 : }
1839 :
1840 1 : sync_process_pid = SlotSyncCtx->pid;
1841 :
1842 1 : SpinLockRelease(&SlotSyncCtx->mutex);
1843 :
1844 : /*
1845 : * Signal process doing slotsync, if any, asking it to stop.
1846 : */
1847 1 : if (sync_process_pid != InvalidPid)
1848 1 : SendProcSignal(sync_process_pid, PROCSIG_SLOTSYNC_MESSAGE,
1849 : INVALID_PROC_NUMBER);
1850 :
1851 : /* Wait for slot sync to end */
1852 : for (;;)
1853 0 : {
1854 : int rc;
1855 :
1856 : /* Wait a bit, we don't expect to have to wait long */
1857 1 : rc = WaitLatch(MyLatch,
1858 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
1859 : 10L, WAIT_EVENT_REPLICATION_SLOTSYNC_SHUTDOWN);
1860 :
1861 1 : if (rc & WL_LATCH_SET)
1862 : {
1863 0 : ResetLatch(MyLatch);
1864 0 : CHECK_FOR_INTERRUPTS();
1865 : }
1866 :
1867 1 : SpinLockAcquire(&SlotSyncCtx->mutex);
1868 :
1869 : /* Ensure that no process is syncing the slots. */
1870 1 : if (!SlotSyncCtx->syncing)
1871 1 : break;
1872 :
1873 0 : SpinLockRelease(&SlotSyncCtx->mutex);
1874 : }
1875 :
1876 1 : SpinLockRelease(&SlotSyncCtx->mutex);
1877 :
1878 1 : update_synced_slots_inactive_since();
1879 : }
1880 :
1881 : /*
1882 : * SlotSyncWorkerCanRestart
1883 : *
1884 : * Return true, indicating worker is allowed to restart, if enough time has
1885 : * passed since it was last launched to reach SLOTSYNC_RESTART_INTERVAL_SEC.
1886 : * Otherwise return false.
1887 : *
1888 : * This is a safety valve to protect against continuous respawn attempts if the
1889 : * worker is dying immediately at launch. Note that since we will retry to
1890 : * launch the worker from the postmaster main loop, we will get another
1891 : * chance later.
1892 : */
1893 : bool
1894 15 : SlotSyncWorkerCanRestart(void)
1895 : {
1896 15 : time_t curtime = time(NULL);
1897 :
1898 : /*
1899 : * If first time through, or time somehow went backwards, always update
1900 : * last_start_time to match the current clock and allow worker start.
1901 : * Otherwise allow it only once enough time has elapsed.
1902 : */
1903 15 : if (SlotSyncCtx->last_start_time == 0 ||
1904 9 : curtime < SlotSyncCtx->last_start_time ||
1905 9 : curtime - SlotSyncCtx->last_start_time >= SLOTSYNC_RESTART_INTERVAL_SEC)
1906 : {
1907 6 : SlotSyncCtx->last_start_time = curtime;
1908 6 : return true;
1909 : }
1910 9 : return false;
1911 : }
1912 :
1913 : /*
1914 : * Is current process syncing replication slots?
1915 : *
1916 : * Could be either backend executing SQL function or slot sync worker.
1917 : */
1918 : bool
1919 29 : IsSyncingReplicationSlots(void)
1920 : {
1921 29 : return syncing_slots;
1922 : }
1923 :
1924 : /*
1925 : * Register shared memory space needed for slot synchronization.
1926 : */
1927 : static void
1928 1245 : SlotSyncShmemRequest(void *arg)
1929 : {
1930 1245 : ShmemRequestStruct(.name = "Slot Sync Data",
1931 : .size = sizeof(SlotSyncCtxStruct),
1932 : .ptr = (void **) &SlotSyncCtx,
1933 : );
1934 1245 : }
1935 :
1936 : /*
1937 : * Initialize shared memory for slot synchronization.
1938 : */
1939 : static void
1940 1242 : SlotSyncShmemInit(void *arg)
1941 : {
1942 1242 : memset(SlotSyncCtx, 0, sizeof(SlotSyncCtxStruct));
1943 1242 : SlotSyncCtx->pid = InvalidPid;
1944 1242 : SpinLockInit(&SlotSyncCtx->mutex);
1945 1242 : }
1946 :
1947 : /*
1948 : * Error cleanup callback for slot sync SQL function.
1949 : */
1950 : static void
1951 1 : slotsync_failure_callback(int code, Datum arg)
1952 : {
1953 1 : WalReceiverConn *wrconn = (WalReceiverConn *) DatumGetPointer(arg);
1954 :
1955 : /*
1956 : * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1957 : *
1958 : * The startup process during promotion invokes ShutDownSlotSync() which
1959 : * waits for slot sync to finish and it does that by checking the
1960 : * 'syncing' flag. Thus the SQL function must be done with slots' release
1961 : * and cleanup to avoid any dangling temporary slots or active slots
1962 : * before it marks itself as finished syncing.
1963 : */
1964 :
1965 : /* Make sure active replication slots are released */
1966 1 : if (MyReplicationSlot != NULL)
1967 0 : ReplicationSlotRelease();
1968 :
1969 : /* Also cleanup the synced temporary slots. */
1970 1 : ReplicationSlotCleanup(true);
1971 :
1972 : /*
1973 : * The set syncing_slots indicates that the process errored out without
1974 : * resetting the flag. So, we need to clean up shared memory and reset the
1975 : * flag here.
1976 : */
1977 1 : if (syncing_slots)
1978 1 : reset_syncing_flag();
1979 :
1980 1 : walrcv_disconnect(wrconn);
1981 1 : }
1982 :
1983 : /*
1984 : * Helper function to extract slot names from a list of remote slots
1985 : */
1986 : static List *
1987 1 : extract_slot_names(List *remote_slots)
1988 : {
1989 1 : List *slot_names = NIL;
1990 :
1991 3 : foreach_ptr(RemoteSlot, remote_slot, remote_slots)
1992 : {
1993 : char *slot_name;
1994 :
1995 1 : slot_name = pstrdup(remote_slot->name);
1996 1 : slot_names = lappend(slot_names, slot_name);
1997 : }
1998 :
1999 1 : return slot_names;
2000 : }
2001 :
2002 : /*
2003 : * Synchronize the failover enabled replication slots using the specified
2004 : * primary server connection.
2005 : *
2006 : * Repeatedly fetches and updates replication slot information from the
2007 : * primary until all slots are at least "sync ready".
2008 : *
2009 : * Exits early if promotion is triggered or certain critical
2010 : * configuration parameters have changed.
2011 : */
2012 : void
2013 9 : SyncReplicationSlots(WalReceiverConn *wrconn)
2014 : {
2015 9 : PG_ENSURE_ERROR_CLEANUP(slotsync_failure_callback, PointerGetDatum(wrconn));
2016 : {
2017 9 : List *remote_slots = NIL;
2018 9 : List *slot_names = NIL; /* List of slot names to track */
2019 : MemoryContext sync_retry_ctx;
2020 :
2021 9 : check_and_set_sync_info(MyProcPid);
2022 :
2023 9 : validate_remote_info(wrconn);
2024 :
2025 : /*
2026 : * Setup and use a per-sync-cycle memory context, which is reset every
2027 : * time we loop below. This avoids having to retail freeing the memory
2028 : * used in each sync cycle.
2029 : */
2030 8 : sync_retry_ctx = AllocSetContextCreate(CurrentMemoryContext,
2031 : "slot sync retry context",
2032 : ALLOCSET_DEFAULT_SIZES);
2033 :
2034 : /* Retry until all the slots are sync-ready */
2035 : for (;;)
2036 3 : {
2037 11 : bool slot_persistence_pending = false;
2038 11 : bool some_slot_updated = false;
2039 : MemoryContext oldctx;
2040 :
2041 : /* Check for interrupts and config changes */
2042 11 : CHECK_FOR_INTERRUPTS();
2043 :
2044 11 : if (ConfigReloadPending)
2045 0 : slotsync_reread_config();
2046 :
2047 : /* We must be in a valid transaction state */
2048 : Assert(IsTransactionState());
2049 :
2050 11 : MemoryContextReset(sync_retry_ctx);
2051 11 : oldctx = MemoryContextSwitchTo(sync_retry_ctx);
2052 :
2053 : /*
2054 : * Fetch remote slot info for the given slot_names. If slot_names
2055 : * is NIL, fetch all failover-enabled slots. Note that we reuse
2056 : * slot_names from the first iteration; re-fetching all failover
2057 : * slots each time could cause an endless loop. Instead of
2058 : * reprocessing only the pending slots in each iteration, it's
2059 : * better to process all the slots received in the first
2060 : * iteration. This ensures that by the time we're done, all slots
2061 : * reflect the latest values.
2062 : */
2063 11 : remote_slots = fetch_remote_slots(wrconn, slot_names);
2064 :
2065 : /* Attempt to synchronize slots */
2066 11 : some_slot_updated = synchronize_slots(wrconn, remote_slots,
2067 : &slot_persistence_pending);
2068 :
2069 : /*
2070 : * slot_names must survive later sync_retry_ctx resets, so copy it
2071 : * in the outer context.
2072 : */
2073 11 : MemoryContextSwitchTo(oldctx);
2074 :
2075 : /*
2076 : * If slot_persistence_pending is true, extract slot names for
2077 : * future iterations (only needed if we haven't done it yet)
2078 : */
2079 11 : if (slot_names == NIL && slot_persistence_pending)
2080 1 : slot_names = extract_slot_names(remote_slots);
2081 :
2082 : /* Done if all slots are persisted i.e are sync-ready */
2083 11 : if (!slot_persistence_pending)
2084 8 : break;
2085 :
2086 : /* wait before retrying again */
2087 3 : wait_for_slot_activity(some_slot_updated);
2088 : }
2089 :
2090 8 : MemoryContextDelete(sync_retry_ctx);
2091 :
2092 8 : if (slot_names)
2093 1 : list_free_deep(slot_names);
2094 :
2095 : /* Cleanup the synced temporary slots */
2096 8 : ReplicationSlotCleanup(true);
2097 :
2098 : /* We are done with sync, so reset sync flag */
2099 8 : reset_syncing_flag();
2100 : }
2101 9 : PG_END_ENSURE_ERROR_CLEANUP(slotsync_failure_callback, PointerGetDatum(wrconn));
2102 8 : }
|