Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * slotfuncs.c
4 : * Support functions for replication slots
5 : *
6 : * Copyright (c) 2012-2025, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/replication/slotfuncs.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 : #include "postgres.h"
14 :
15 : #include "access/htup_details.h"
16 : #include "access/xlog_internal.h"
17 : #include "access/xlogrecovery.h"
18 : #include "access/xlogutils.h"
19 : #include "funcapi.h"
20 : #include "replication/logical.h"
21 : #include "replication/slot.h"
22 : #include "replication/slotsync.h"
23 : #include "utils/builtins.h"
24 : #include "utils/guc.h"
25 : #include "utils/pg_lsn.h"
26 :
27 : /*
28 : * Helper function for creating a new physical replication slot with
29 : * given arguments. Note that this function doesn't release the created
30 : * slot.
31 : *
32 : * If restart_lsn is a valid value, we use it without WAL reservation
33 : * routine. So the caller must guarantee that WAL is available.
34 : */
35 : static void
36 74 : create_physical_replication_slot(char *name, bool immediately_reserve,
37 : bool temporary, XLogRecPtr restart_lsn)
38 : {
39 : Assert(!MyReplicationSlot);
40 :
41 : /* acquire replication slot, this will check for conflicting names */
42 74 : ReplicationSlotCreate(name, false,
43 : temporary ? RS_TEMPORARY : RS_PERSISTENT, false,
44 : false, false);
45 :
46 74 : if (immediately_reserve)
47 : {
48 : /* Reserve WAL as the user asked for it */
49 34 : if (XLogRecPtrIsInvalid(restart_lsn))
50 26 : ReplicationSlotReserveWal();
51 : else
52 8 : MyReplicationSlot->data.restart_lsn = restart_lsn;
53 :
54 : /* Write this slot to disk */
55 34 : ReplicationSlotMarkDirty();
56 34 : ReplicationSlotSave();
57 : }
58 74 : }
59 :
60 : /*
61 : * SQL function for creating a new physical (streaming replication)
62 : * replication slot.
63 : */
64 : Datum
65 66 : pg_create_physical_replication_slot(PG_FUNCTION_ARGS)
66 : {
67 66 : Name name = PG_GETARG_NAME(0);
68 66 : bool immediately_reserve = PG_GETARG_BOOL(1);
69 66 : bool temporary = PG_GETARG_BOOL(2);
70 : Datum values[2];
71 : bool nulls[2];
72 : TupleDesc tupdesc;
73 : HeapTuple tuple;
74 : Datum result;
75 :
76 66 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
77 0 : elog(ERROR, "return type must be a row type");
78 :
79 66 : CheckSlotPermissions();
80 :
81 66 : CheckSlotRequirements();
82 :
83 66 : create_physical_replication_slot(NameStr(*name),
84 : immediately_reserve,
85 : temporary,
86 : InvalidXLogRecPtr);
87 :
88 66 : values[0] = NameGetDatum(&MyReplicationSlot->data.name);
89 66 : nulls[0] = false;
90 :
91 66 : if (immediately_reserve)
92 : {
93 26 : values[1] = LSNGetDatum(MyReplicationSlot->data.restart_lsn);
94 26 : nulls[1] = false;
95 : }
96 : else
97 40 : nulls[1] = true;
98 :
99 66 : tuple = heap_form_tuple(tupdesc, values, nulls);
100 66 : result = HeapTupleGetDatum(tuple);
101 :
102 66 : ReplicationSlotRelease();
103 :
104 66 : PG_RETURN_DATUM(result);
105 : }
106 :
107 :
108 : /*
109 : * Helper function for creating a new logical replication slot with
110 : * given arguments. Note that this function doesn't release the created
111 : * slot.
112 : *
113 : * When find_startpoint is false, the slot's confirmed_flush is not set; it's
114 : * caller's responsibility to ensure it's set to something sensible.
115 : */
116 : static void
117 252 : create_logical_replication_slot(char *name, char *plugin,
118 : bool temporary, bool two_phase,
119 : bool failover,
120 : XLogRecPtr restart_lsn,
121 : bool find_startpoint)
122 : {
123 252 : LogicalDecodingContext *ctx = NULL;
124 :
125 : Assert(!MyReplicationSlot);
126 :
127 : /*
128 : * Acquire a logical decoding slot, this will check for conflicting names.
129 : * Initially create persistent slot as ephemeral - that allows us to
130 : * nicely handle errors during initialization because it'll get dropped if
131 : * this transaction fails. We'll make it persistent at the end. Temporary
132 : * slots can be created as temporary from beginning as they get dropped on
133 : * error as well.
134 : */
135 252 : ReplicationSlotCreate(name, true,
136 : temporary ? RS_TEMPORARY : RS_EPHEMERAL, two_phase,
137 : failover, false);
138 :
139 : /*
140 : * Create logical decoding context to find start point or, if we don't
141 : * need it, to 1) bump slot's restart_lsn and xmin 2) check plugin sanity.
142 : *
143 : * Note: when !find_startpoint this is still important, because it's at
144 : * this point that the output plugin is validated.
145 : */
146 242 : ctx = CreateInitDecodingContext(plugin, NIL,
147 : false, /* just catalogs is OK */
148 : restart_lsn,
149 242 : XL_ROUTINE(.page_read = read_local_xlog_page,
150 : .segment_open = wal_segment_open,
151 : .segment_close = wal_segment_close),
152 : NULL, NULL, NULL);
153 :
154 : /*
155 : * If caller needs us to determine the decoding start point, do so now.
156 : * This might take a while.
157 : */
158 236 : if (find_startpoint)
159 224 : DecodingContextFindStartpoint(ctx);
160 :
161 : /* don't need the decoding context anymore */
162 232 : FreeDecodingContext(ctx);
163 232 : }
164 :
165 : /*
166 : * SQL function for creating a new logical replication slot.
167 : */
168 : Datum
169 240 : pg_create_logical_replication_slot(PG_FUNCTION_ARGS)
170 : {
171 240 : Name name = PG_GETARG_NAME(0);
172 240 : Name plugin = PG_GETARG_NAME(1);
173 240 : bool temporary = PG_GETARG_BOOL(2);
174 240 : bool two_phase = PG_GETARG_BOOL(3);
175 240 : bool failover = PG_GETARG_BOOL(4);
176 : Datum result;
177 : TupleDesc tupdesc;
178 : HeapTuple tuple;
179 : Datum values[2];
180 : bool nulls[2];
181 :
182 240 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
183 0 : elog(ERROR, "return type must be a row type");
184 :
185 240 : CheckSlotPermissions();
186 :
187 238 : CheckLogicalDecodingRequirements();
188 :
189 238 : create_logical_replication_slot(NameStr(*name),
190 238 : NameStr(*plugin),
191 : temporary,
192 : two_phase,
193 : failover,
194 : InvalidXLogRecPtr,
195 : true);
196 :
197 220 : values[0] = NameGetDatum(&MyReplicationSlot->data.name);
198 220 : values[1] = LSNGetDatum(MyReplicationSlot->data.confirmed_flush);
199 :
200 220 : memset(nulls, 0, sizeof(nulls));
201 :
202 220 : tuple = heap_form_tuple(tupdesc, values, nulls);
203 220 : result = HeapTupleGetDatum(tuple);
204 :
205 : /* ok, slot is now fully created, mark it as persistent if needed */
206 220 : if (!temporary)
207 210 : ReplicationSlotPersist();
208 220 : ReplicationSlotRelease();
209 :
210 220 : PG_RETURN_DATUM(result);
211 : }
212 :
213 :
214 : /*
215 : * SQL function for dropping a replication slot.
216 : */
217 : Datum
218 264 : pg_drop_replication_slot(PG_FUNCTION_ARGS)
219 : {
220 264 : Name name = PG_GETARG_NAME(0);
221 :
222 264 : CheckSlotPermissions();
223 :
224 260 : CheckSlotRequirements();
225 :
226 260 : ReplicationSlotDrop(NameStr(*name), true);
227 :
228 248 : PG_RETURN_VOID();
229 : }
230 :
231 : /*
232 : * pg_get_replication_slots - SQL SRF showing all replication slots
233 : * that currently exist on the database cluster.
234 : */
235 : Datum
236 592 : pg_get_replication_slots(PG_FUNCTION_ARGS)
237 : {
238 : #define PG_GET_REPLICATION_SLOTS_COLS 20
239 592 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
240 : XLogRecPtr currlsn;
241 : int slotno;
242 :
243 : /*
244 : * We don't require any special permission to see this function's data
245 : * because nothing should be sensitive. The most critical being the slot
246 : * name, which shouldn't contain anything particularly sensitive.
247 : */
248 :
249 592 : InitMaterializedSRF(fcinfo, 0);
250 :
251 592 : currlsn = GetXLogWriteRecPtr();
252 :
253 592 : LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
254 4960 : for (slotno = 0; slotno < max_replication_slots; slotno++)
255 : {
256 4368 : ReplicationSlot *slot = &ReplicationSlotCtl->replication_slots[slotno];
257 : ReplicationSlot slot_contents;
258 : Datum values[PG_GET_REPLICATION_SLOTS_COLS];
259 : bool nulls[PG_GET_REPLICATION_SLOTS_COLS];
260 : WALAvailability walstate;
261 : int i;
262 : ReplicationSlotInvalidationCause cause;
263 :
264 4368 : if (!slot->in_use)
265 3402 : continue;
266 :
267 : /* Copy slot contents while holding spinlock, then examine at leisure */
268 966 : SpinLockAcquire(&slot->mutex);
269 966 : slot_contents = *slot;
270 966 : SpinLockRelease(&slot->mutex);
271 :
272 966 : memset(values, 0, sizeof(values));
273 966 : memset(nulls, 0, sizeof(nulls));
274 :
275 966 : i = 0;
276 966 : values[i++] = NameGetDatum(&slot_contents.data.name);
277 :
278 966 : if (slot_contents.data.database == InvalidOid)
279 274 : nulls[i++] = true;
280 : else
281 692 : values[i++] = NameGetDatum(&slot_contents.data.plugin);
282 :
283 966 : if (slot_contents.data.database == InvalidOid)
284 274 : values[i++] = CStringGetTextDatum("physical");
285 : else
286 692 : values[i++] = CStringGetTextDatum("logical");
287 :
288 966 : if (slot_contents.data.database == InvalidOid)
289 274 : nulls[i++] = true;
290 : else
291 692 : values[i++] = ObjectIdGetDatum(slot_contents.data.database);
292 :
293 966 : values[i++] = BoolGetDatum(slot_contents.data.persistency == RS_TEMPORARY);
294 966 : values[i++] = BoolGetDatum(slot_contents.active_pid != 0);
295 :
296 966 : if (slot_contents.active_pid != 0)
297 304 : values[i++] = Int32GetDatum(slot_contents.active_pid);
298 : else
299 662 : nulls[i++] = true;
300 :
301 966 : if (slot_contents.data.xmin != InvalidTransactionId)
302 126 : values[i++] = TransactionIdGetDatum(slot_contents.data.xmin);
303 : else
304 840 : nulls[i++] = true;
305 :
306 966 : if (slot_contents.data.catalog_xmin != InvalidTransactionId)
307 760 : values[i++] = TransactionIdGetDatum(slot_contents.data.catalog_xmin);
308 : else
309 206 : nulls[i++] = true;
310 :
311 966 : if (slot_contents.data.restart_lsn != InvalidXLogRecPtr)
312 932 : values[i++] = LSNGetDatum(slot_contents.data.restart_lsn);
313 : else
314 34 : nulls[i++] = true;
315 :
316 966 : if (slot_contents.data.confirmed_flush != InvalidXLogRecPtr)
317 644 : values[i++] = LSNGetDatum(slot_contents.data.confirmed_flush);
318 : else
319 322 : nulls[i++] = true;
320 :
321 : /*
322 : * If the slot has not been invalidated, test availability from
323 : * restart_lsn.
324 : */
325 966 : if (slot_contents.data.invalidated != RS_INVAL_NONE)
326 70 : walstate = WALAVAIL_REMOVED;
327 : else
328 896 : walstate = GetWALAvailability(slot_contents.data.restart_lsn);
329 :
330 966 : switch (walstate)
331 : {
332 28 : case WALAVAIL_INVALID_LSN:
333 28 : nulls[i++] = true;
334 28 : break;
335 :
336 862 : case WALAVAIL_RESERVED:
337 862 : values[i++] = CStringGetTextDatum("reserved");
338 862 : break;
339 :
340 4 : case WALAVAIL_EXTENDED:
341 4 : values[i++] = CStringGetTextDatum("extended");
342 4 : break;
343 :
344 2 : case WALAVAIL_UNRESERVED:
345 2 : values[i++] = CStringGetTextDatum("unreserved");
346 2 : break;
347 :
348 70 : case WALAVAIL_REMOVED:
349 :
350 : /*
351 : * If we read the restart_lsn long enough ago, maybe that file
352 : * has been removed by now. However, the walsender could have
353 : * moved forward enough that it jumped to another file after
354 : * we looked. If checkpointer signalled the process to
355 : * termination, then it's definitely lost; but if a process is
356 : * still alive, then "unreserved" seems more appropriate.
357 : *
358 : * If we do change it, save the state for safe_wal_size below.
359 : */
360 70 : if (!XLogRecPtrIsInvalid(slot_contents.data.restart_lsn))
361 : {
362 : int pid;
363 :
364 64 : SpinLockAcquire(&slot->mutex);
365 64 : pid = slot->active_pid;
366 64 : slot_contents.data.restart_lsn = slot->data.restart_lsn;
367 64 : SpinLockRelease(&slot->mutex);
368 64 : if (pid != 0)
369 : {
370 0 : values[i++] = CStringGetTextDatum("unreserved");
371 0 : walstate = WALAVAIL_UNRESERVED;
372 0 : break;
373 : }
374 : }
375 70 : values[i++] = CStringGetTextDatum("lost");
376 70 : break;
377 : }
378 :
379 : /*
380 : * safe_wal_size is only computed for slots that have not been lost,
381 : * and only if there's a configured maximum size.
382 : */
383 966 : if (walstate == WALAVAIL_REMOVED || max_slot_wal_keep_size_mb < 0)
384 956 : nulls[i++] = true;
385 : else
386 : {
387 : XLogSegNo targetSeg;
388 : uint64 slotKeepSegs;
389 : uint64 keepSegs;
390 : XLogSegNo failSeg;
391 : XLogRecPtr failLSN;
392 :
393 10 : XLByteToSeg(slot_contents.data.restart_lsn, targetSeg, wal_segment_size);
394 :
395 : /* determine how many segments can be kept by slots */
396 10 : slotKeepSegs = XLogMBVarToSegs(max_slot_wal_keep_size_mb, wal_segment_size);
397 : /* ditto for wal_keep_size */
398 10 : keepSegs = XLogMBVarToSegs(wal_keep_size_mb, wal_segment_size);
399 :
400 : /* if currpos reaches failLSN, we lose our segment */
401 10 : failSeg = targetSeg + Max(slotKeepSegs, keepSegs) + 1;
402 10 : XLogSegNoOffsetToRecPtr(failSeg, 0, wal_segment_size, failLSN);
403 :
404 10 : values[i++] = Int64GetDatum(failLSN - currlsn);
405 : }
406 :
407 966 : values[i++] = BoolGetDatum(slot_contents.data.two_phase);
408 :
409 966 : if (slot_contents.data.two_phase &&
410 26 : !XLogRecPtrIsInvalid(slot_contents.data.two_phase_at))
411 26 : values[i++] = LSNGetDatum(slot_contents.data.two_phase_at);
412 : else
413 940 : nulls[i++] = true;
414 :
415 966 : if (slot_contents.inactive_since > 0)
416 676 : values[i++] = TimestampTzGetDatum(slot_contents.inactive_since);
417 : else
418 290 : nulls[i++] = true;
419 :
420 966 : cause = slot_contents.data.invalidated;
421 :
422 966 : if (SlotIsPhysical(&slot_contents))
423 274 : nulls[i++] = true;
424 : else
425 : {
426 : /*
427 : * rows_removed and wal_level_insufficient are the only two
428 : * reasons for the logical slot's conflict with recovery.
429 : */
430 692 : if (cause == RS_INVAL_HORIZON ||
431 : cause == RS_INVAL_WAL_LEVEL)
432 56 : values[i++] = BoolGetDatum(true);
433 : else
434 636 : values[i++] = BoolGetDatum(false);
435 : }
436 :
437 966 : if (cause == RS_INVAL_NONE)
438 896 : nulls[i++] = true;
439 : else
440 70 : values[i++] = CStringGetTextDatum(GetSlotInvalidationCauseName(cause));
441 :
442 966 : values[i++] = BoolGetDatum(slot_contents.data.failover);
443 :
444 966 : values[i++] = BoolGetDatum(slot_contents.data.synced);
445 :
446 : Assert(i == PG_GET_REPLICATION_SLOTS_COLS);
447 :
448 966 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
449 : values, nulls);
450 : }
451 :
452 592 : LWLockRelease(ReplicationSlotControlLock);
453 :
454 592 : return (Datum) 0;
455 : }
456 :
457 : /*
458 : * Helper function for advancing our physical replication slot forward.
459 : *
460 : * The LSN position to move to is compared simply to the slot's restart_lsn,
461 : * knowing that any position older than that would be removed by successive
462 : * checkpoints.
463 : */
464 : static XLogRecPtr
465 2 : pg_physical_replication_slot_advance(XLogRecPtr moveto)
466 : {
467 2 : XLogRecPtr startlsn = MyReplicationSlot->data.restart_lsn;
468 2 : XLogRecPtr retlsn = startlsn;
469 :
470 : Assert(moveto != InvalidXLogRecPtr);
471 :
472 2 : if (startlsn < moveto)
473 : {
474 2 : SpinLockAcquire(&MyReplicationSlot->mutex);
475 2 : MyReplicationSlot->data.restart_lsn = moveto;
476 2 : SpinLockRelease(&MyReplicationSlot->mutex);
477 2 : retlsn = moveto;
478 :
479 : /*
480 : * Dirty the slot so as it is written out at the next checkpoint. Note
481 : * that the LSN position advanced may still be lost in the event of a
482 : * crash, but this makes the data consistent after a clean shutdown.
483 : */
484 2 : ReplicationSlotMarkDirty();
485 :
486 : /*
487 : * Wake up logical walsenders holding logical failover slots after
488 : * updating the restart_lsn of the physical slot.
489 : */
490 2 : PhysicalWakeupLogicalWalSnd();
491 : }
492 :
493 2 : return retlsn;
494 : }
495 :
496 : /*
497 : * Advance our logical replication slot forward. See
498 : * LogicalSlotAdvanceAndCheckSnapState for details.
499 : */
500 : static XLogRecPtr
501 10 : pg_logical_replication_slot_advance(XLogRecPtr moveto)
502 : {
503 10 : return LogicalSlotAdvanceAndCheckSnapState(moveto, NULL);
504 : }
505 :
506 : /*
507 : * SQL function for moving the position in a replication slot.
508 : */
509 : Datum
510 16 : pg_replication_slot_advance(PG_FUNCTION_ARGS)
511 : {
512 16 : Name slotname = PG_GETARG_NAME(0);
513 16 : XLogRecPtr moveto = PG_GETARG_LSN(1);
514 : XLogRecPtr endlsn;
515 : XLogRecPtr minlsn;
516 : TupleDesc tupdesc;
517 : Datum values[2];
518 : bool nulls[2];
519 : HeapTuple tuple;
520 : Datum result;
521 :
522 : Assert(!MyReplicationSlot);
523 :
524 16 : CheckSlotPermissions();
525 :
526 16 : if (XLogRecPtrIsInvalid(moveto))
527 2 : ereport(ERROR,
528 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
529 : errmsg("invalid target WAL LSN")));
530 :
531 : /* Build a tuple descriptor for our result type */
532 14 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
533 0 : elog(ERROR, "return type must be a row type");
534 :
535 : /*
536 : * We can't move slot past what's been flushed/replayed so clamp the
537 : * target position accordingly.
538 : */
539 14 : if (!RecoveryInProgress())
540 14 : moveto = Min(moveto, GetFlushRecPtr(NULL));
541 : else
542 0 : moveto = Min(moveto, GetXLogReplayRecPtr(NULL));
543 :
544 : /* Acquire the slot so we "own" it */
545 14 : ReplicationSlotAcquire(NameStr(*slotname), true, true);
546 :
547 : /* A slot whose restart_lsn has never been reserved cannot be advanced */
548 14 : if (XLogRecPtrIsInvalid(MyReplicationSlot->data.restart_lsn))
549 2 : ereport(ERROR,
550 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
551 : errmsg("replication slot \"%s\" cannot be advanced",
552 : NameStr(*slotname)),
553 : errdetail("This slot has never previously reserved WAL, or it has been invalidated.")));
554 :
555 : /*
556 : * Check if the slot is not moving backwards. Physical slots rely simply
557 : * on restart_lsn as a minimum point, while logical slots have confirmed
558 : * consumption up to confirmed_flush, meaning that in both cases data
559 : * older than that is not available anymore.
560 : */
561 12 : if (OidIsValid(MyReplicationSlot->data.database))
562 10 : minlsn = MyReplicationSlot->data.confirmed_flush;
563 : else
564 2 : minlsn = MyReplicationSlot->data.restart_lsn;
565 :
566 12 : if (moveto < minlsn)
567 0 : ereport(ERROR,
568 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
569 : errmsg("cannot advance replication slot to %X/%X, minimum is %X/%X",
570 : LSN_FORMAT_ARGS(moveto), LSN_FORMAT_ARGS(minlsn))));
571 :
572 : /* Do the actual slot update, depending on the slot type */
573 12 : if (OidIsValid(MyReplicationSlot->data.database))
574 10 : endlsn = pg_logical_replication_slot_advance(moveto);
575 : else
576 2 : endlsn = pg_physical_replication_slot_advance(moveto);
577 :
578 12 : values[0] = NameGetDatum(&MyReplicationSlot->data.name);
579 12 : nulls[0] = false;
580 :
581 : /*
582 : * Recompute the minimum LSN and xmin across all slots to adjust with the
583 : * advancing potentially done.
584 : */
585 12 : ReplicationSlotsComputeRequiredXmin(false);
586 12 : ReplicationSlotsComputeRequiredLSN();
587 :
588 12 : ReplicationSlotRelease();
589 :
590 : /* Return the reached position. */
591 12 : values[1] = LSNGetDatum(endlsn);
592 12 : nulls[1] = false;
593 :
594 12 : tuple = heap_form_tuple(tupdesc, values, nulls);
595 12 : result = HeapTupleGetDatum(tuple);
596 :
597 12 : PG_RETURN_DATUM(result);
598 : }
599 :
600 : /*
601 : * Helper function of copying a replication slot.
602 : */
603 : static Datum
604 30 : copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot)
605 : {
606 30 : Name src_name = PG_GETARG_NAME(0);
607 30 : Name dst_name = PG_GETARG_NAME(1);
608 30 : ReplicationSlot *src = NULL;
609 : ReplicationSlot first_slot_contents;
610 : ReplicationSlot second_slot_contents;
611 : XLogRecPtr src_restart_lsn;
612 : bool src_islogical;
613 : bool temporary;
614 : char *plugin;
615 : Datum values[2];
616 : bool nulls[2];
617 : Datum result;
618 : TupleDesc tupdesc;
619 : HeapTuple tuple;
620 :
621 30 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
622 0 : elog(ERROR, "return type must be a row type");
623 :
624 30 : CheckSlotPermissions();
625 :
626 30 : if (logical_slot)
627 18 : CheckLogicalDecodingRequirements();
628 : else
629 12 : CheckSlotRequirements();
630 :
631 30 : LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
632 :
633 : /*
634 : * We need to prevent the source slot's reserved WAL from being removed,
635 : * but we don't want to lock that slot for very long, and it can advance
636 : * in the meantime. So obtain the source slot's data, and create a new
637 : * slot using its restart_lsn. Afterwards we lock the source slot again
638 : * and verify that the data we copied (name, type) has not changed
639 : * incompatibly. No inconvenient WAL removal can occur once the new slot
640 : * is created -- but since WAL removal could have occurred before we
641 : * managed to create the new slot, we advance the new slot's restart_lsn
642 : * to the source slot's updated restart_lsn the second time we lock it.
643 : */
644 32 : for (int i = 0; i < max_replication_slots; i++)
645 : {
646 32 : ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
647 :
648 32 : if (s->in_use && strcmp(NameStr(s->data.name), NameStr(*src_name)) == 0)
649 : {
650 : /* Copy the slot contents while holding spinlock */
651 30 : SpinLockAcquire(&s->mutex);
652 30 : first_slot_contents = *s;
653 30 : SpinLockRelease(&s->mutex);
654 30 : src = s;
655 30 : break;
656 : }
657 : }
658 :
659 30 : LWLockRelease(ReplicationSlotControlLock);
660 :
661 30 : if (src == NULL)
662 0 : ereport(ERROR,
663 : (errcode(ERRCODE_UNDEFINED_OBJECT),
664 : errmsg("replication slot \"%s\" does not exist", NameStr(*src_name))));
665 :
666 30 : src_islogical = SlotIsLogical(&first_slot_contents);
667 30 : src_restart_lsn = first_slot_contents.data.restart_lsn;
668 30 : temporary = (first_slot_contents.data.persistency == RS_TEMPORARY);
669 30 : plugin = logical_slot ? NameStr(first_slot_contents.data.plugin) : NULL;
670 :
671 : /* Check type of replication slot */
672 30 : if (src_islogical != logical_slot)
673 4 : ereport(ERROR,
674 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
675 : src_islogical ?
676 : errmsg("cannot copy physical replication slot \"%s\" as a logical replication slot",
677 : NameStr(*src_name)) :
678 : errmsg("cannot copy logical replication slot \"%s\" as a physical replication slot",
679 : NameStr(*src_name))));
680 :
681 : /* Copying non-reserved slot doesn't make sense */
682 26 : if (XLogRecPtrIsInvalid(src_restart_lsn))
683 2 : ereport(ERROR,
684 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
685 : errmsg("cannot copy a replication slot that doesn't reserve WAL")));
686 :
687 : /* Cannot copy an invalidated replication slot */
688 24 : if (first_slot_contents.data.invalidated != RS_INVAL_NONE)
689 2 : ereport(ERROR,
690 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
691 : errmsg("cannot copy invalidated replication slot \"%s\"",
692 : NameStr(*src_name)));
693 :
694 : /* Overwrite params from optional arguments */
695 22 : if (PG_NARGS() >= 3)
696 12 : temporary = PG_GETARG_BOOL(2);
697 22 : if (PG_NARGS() >= 4)
698 : {
699 : Assert(logical_slot);
700 8 : plugin = NameStr(*(PG_GETARG_NAME(3)));
701 : }
702 :
703 : /* Create new slot and acquire it */
704 22 : if (logical_slot)
705 : {
706 : /*
707 : * We must not try to read WAL, since we haven't reserved it yet --
708 : * hence pass find_startpoint false. confirmed_flush will be set
709 : * below, by copying from the source slot.
710 : *
711 : * We don't copy the failover option to prevent potential issues with
712 : * slot synchronization. For instance, if a slot was synchronized to
713 : * the standby, then dropped on the primary, and immediately recreated
714 : * by copying from another existing slot with much earlier restart_lsn
715 : * and confirmed_flush_lsn, the slot synchronization would only
716 : * observe the LSN of the same slot moving backward. As slot
717 : * synchronization does not copy the restart_lsn and
718 : * confirmed_flush_lsn backward (see update_local_synced_slot() for
719 : * details), if a failover happens before the primary's slot catches
720 : * up, logical replication cannot continue using the synchronized slot
721 : * on the promoted standby because the slot retains the restart_lsn
722 : * and confirmed_flush_lsn that are much later than expected.
723 : */
724 14 : create_logical_replication_slot(NameStr(*dst_name),
725 : plugin,
726 : temporary,
727 : false,
728 : false,
729 : src_restart_lsn,
730 : false);
731 : }
732 : else
733 8 : create_physical_replication_slot(NameStr(*dst_name),
734 : true,
735 : temporary,
736 : src_restart_lsn);
737 :
738 : /*
739 : * Update the destination slot to current values of the source slot;
740 : * recheck that the source slot is still the one we saw previously.
741 : */
742 : {
743 : TransactionId copy_effective_xmin;
744 : TransactionId copy_effective_catalog_xmin;
745 : TransactionId copy_xmin;
746 : TransactionId copy_catalog_xmin;
747 : XLogRecPtr copy_restart_lsn;
748 : XLogRecPtr copy_confirmed_flush;
749 : bool copy_islogical;
750 : char *copy_name;
751 :
752 : /* Copy data of source slot again */
753 20 : SpinLockAcquire(&src->mutex);
754 20 : second_slot_contents = *src;
755 20 : SpinLockRelease(&src->mutex);
756 :
757 20 : copy_effective_xmin = second_slot_contents.effective_xmin;
758 20 : copy_effective_catalog_xmin = second_slot_contents.effective_catalog_xmin;
759 :
760 20 : copy_xmin = second_slot_contents.data.xmin;
761 20 : copy_catalog_xmin = second_slot_contents.data.catalog_xmin;
762 20 : copy_restart_lsn = second_slot_contents.data.restart_lsn;
763 20 : copy_confirmed_flush = second_slot_contents.data.confirmed_flush;
764 :
765 : /* for existence check */
766 20 : copy_name = NameStr(second_slot_contents.data.name);
767 20 : copy_islogical = SlotIsLogical(&second_slot_contents);
768 :
769 : /*
770 : * Check if the source slot still exists and is valid. We regard it as
771 : * invalid if the type of replication slot or name has been changed,
772 : * or the restart_lsn either is invalid or has gone backward. (The
773 : * restart_lsn could go backwards if the source slot is dropped and
774 : * copied from an older slot during installation.)
775 : *
776 : * Since erroring out will release and drop the destination slot we
777 : * don't need to release it here.
778 : */
779 20 : if (copy_restart_lsn < src_restart_lsn ||
780 20 : src_islogical != copy_islogical ||
781 20 : strcmp(copy_name, NameStr(*src_name)) != 0)
782 0 : ereport(ERROR,
783 : (errmsg("could not copy replication slot \"%s\"",
784 : NameStr(*src_name)),
785 : errdetail("The source replication slot was modified incompatibly during the copy operation.")));
786 :
787 : /* The source slot must have a consistent snapshot */
788 20 : if (src_islogical && XLogRecPtrIsInvalid(copy_confirmed_flush))
789 0 : ereport(ERROR,
790 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
791 : errmsg("cannot copy unfinished logical replication slot \"%s\"",
792 : NameStr(*src_name)),
793 : errhint("Retry when the source replication slot's confirmed_flush_lsn is valid.")));
794 :
795 : /*
796 : * Copying an invalid slot doesn't make sense. Note that the source
797 : * slot can become invalid after we create the new slot and copy the
798 : * data of source slot. This is possible because the operations in
799 : * InvalidateObsoleteReplicationSlots() are not serialized with this
800 : * function. Even though we can't detect such a case here, the copied
801 : * slot will become invalid in the next checkpoint cycle.
802 : */
803 20 : if (second_slot_contents.data.invalidated != RS_INVAL_NONE)
804 0 : ereport(ERROR,
805 : errmsg("cannot copy replication slot \"%s\"",
806 : NameStr(*src_name)),
807 : errdetail("The source replication slot was invalidated during the copy operation."));
808 :
809 : /* Install copied values again */
810 20 : SpinLockAcquire(&MyReplicationSlot->mutex);
811 20 : MyReplicationSlot->effective_xmin = copy_effective_xmin;
812 20 : MyReplicationSlot->effective_catalog_xmin = copy_effective_catalog_xmin;
813 :
814 20 : MyReplicationSlot->data.xmin = copy_xmin;
815 20 : MyReplicationSlot->data.catalog_xmin = copy_catalog_xmin;
816 20 : MyReplicationSlot->data.restart_lsn = copy_restart_lsn;
817 20 : MyReplicationSlot->data.confirmed_flush = copy_confirmed_flush;
818 20 : SpinLockRelease(&MyReplicationSlot->mutex);
819 :
820 20 : ReplicationSlotMarkDirty();
821 20 : ReplicationSlotsComputeRequiredXmin(false);
822 20 : ReplicationSlotsComputeRequiredLSN();
823 20 : ReplicationSlotSave();
824 :
825 : #ifdef USE_ASSERT_CHECKING
826 : /* Check that the restart_lsn is available */
827 : {
828 : XLogSegNo segno;
829 :
830 : XLByteToSeg(copy_restart_lsn, segno, wal_segment_size);
831 : Assert(XLogGetLastRemovedSegno() < segno);
832 : }
833 : #endif
834 : }
835 :
836 : /* target slot fully created, mark as persistent if needed */
837 20 : if (logical_slot && !temporary)
838 6 : ReplicationSlotPersist();
839 :
840 : /* All done. Set up the return values */
841 20 : values[0] = NameGetDatum(dst_name);
842 20 : nulls[0] = false;
843 20 : if (!XLogRecPtrIsInvalid(MyReplicationSlot->data.confirmed_flush))
844 : {
845 12 : values[1] = LSNGetDatum(MyReplicationSlot->data.confirmed_flush);
846 12 : nulls[1] = false;
847 : }
848 : else
849 8 : nulls[1] = true;
850 :
851 20 : tuple = heap_form_tuple(tupdesc, values, nulls);
852 20 : result = HeapTupleGetDatum(tuple);
853 :
854 20 : ReplicationSlotRelease();
855 :
856 20 : PG_RETURN_DATUM(result);
857 : }
858 :
859 : /* The wrappers below are all to appease opr_sanity */
860 : Datum
861 8 : pg_copy_logical_replication_slot_a(PG_FUNCTION_ARGS)
862 : {
863 8 : return copy_replication_slot(fcinfo, true);
864 : }
865 :
866 : Datum
867 0 : pg_copy_logical_replication_slot_b(PG_FUNCTION_ARGS)
868 : {
869 0 : return copy_replication_slot(fcinfo, true);
870 : }
871 :
872 : Datum
873 10 : pg_copy_logical_replication_slot_c(PG_FUNCTION_ARGS)
874 : {
875 10 : return copy_replication_slot(fcinfo, true);
876 : }
877 :
878 : Datum
879 4 : pg_copy_physical_replication_slot_a(PG_FUNCTION_ARGS)
880 : {
881 4 : return copy_replication_slot(fcinfo, false);
882 : }
883 :
884 : Datum
885 8 : pg_copy_physical_replication_slot_b(PG_FUNCTION_ARGS)
886 : {
887 8 : return copy_replication_slot(fcinfo, false);
888 : }
889 :
890 : /*
891 : * Synchronize failover enabled replication slots to a standby server
892 : * from the primary server.
893 : */
894 : Datum
895 22 : pg_sync_replication_slots(PG_FUNCTION_ARGS)
896 : {
897 : WalReceiverConn *wrconn;
898 : char *err;
899 : StringInfoData app_name;
900 :
901 22 : CheckSlotPermissions();
902 :
903 20 : if (!RecoveryInProgress())
904 2 : ereport(ERROR,
905 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
906 : errmsg("replication slots can only be synchronized to a standby server"));
907 :
908 18 : ValidateSlotSyncParams(ERROR);
909 :
910 : /* Load the libpq-specific functions */
911 18 : load_file("libpqwalreceiver", false);
912 :
913 18 : (void) CheckAndGetDbnameFromConninfo();
914 :
915 16 : initStringInfo(&app_name);
916 16 : if (cluster_name[0])
917 16 : appendStringInfo(&app_name, "%s_slotsync", cluster_name);
918 : else
919 0 : appendStringInfoString(&app_name, "slotsync");
920 :
921 : /* Connect to the primary server. */
922 16 : wrconn = walrcv_connect(PrimaryConnInfo, false, false, false,
923 : app_name.data, &err);
924 16 : pfree(app_name.data);
925 :
926 16 : if (!wrconn)
927 0 : ereport(ERROR,
928 : errcode(ERRCODE_CONNECTION_FAILURE),
929 : errmsg("synchronization worker \"%s\" could not connect to the primary server: %s",
930 : app_name.data, err));
931 :
932 16 : SyncReplicationSlots(wrconn);
933 :
934 14 : walrcv_disconnect(wrconn);
935 :
936 14 : PG_RETURN_VOID();
937 : }
|