Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * slotfuncs.c
4 : * Support functions for replication slots
5 : *
6 : * Copyright (c) 2012-2024, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/replication/slotfuncs.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 : #include "postgres.h"
14 :
15 : #include "access/htup_details.h"
16 : #include "access/xlog_internal.h"
17 : #include "access/xlogrecovery.h"
18 : #include "access/xlogutils.h"
19 : #include "funcapi.h"
20 : #include "miscadmin.h"
21 : #include "replication/decode.h"
22 : #include "replication/logical.h"
23 : #include "replication/slot.h"
24 : #include "replication/slotsync.h"
25 : #include "utils/builtins.h"
26 : #include "utils/guc.h"
27 : #include "utils/inval.h"
28 : #include "utils/pg_lsn.h"
29 : #include "utils/resowner.h"
30 :
31 : /*
32 : * Helper function for creating a new physical replication slot with
33 : * given arguments. Note that this function doesn't release the created
34 : * slot.
35 : *
36 : * If restart_lsn is a valid value, we use it without WAL reservation
37 : * routine. So the caller must guarantee that WAL is available.
38 : */
39 : static void
40 68 : create_physical_replication_slot(char *name, bool immediately_reserve,
41 : bool temporary, XLogRecPtr restart_lsn)
42 : {
43 : Assert(!MyReplicationSlot);
44 :
45 : /* acquire replication slot, this will check for conflicting names */
46 68 : ReplicationSlotCreate(name, false,
47 : temporary ? RS_TEMPORARY : RS_PERSISTENT, false,
48 : false, false);
49 :
50 68 : if (immediately_reserve)
51 : {
52 : /* Reserve WAL as the user asked for it */
53 32 : if (XLogRecPtrIsInvalid(restart_lsn))
54 24 : ReplicationSlotReserveWal();
55 : else
56 8 : MyReplicationSlot->data.restart_lsn = restart_lsn;
57 :
58 : /* Write this slot to disk */
59 32 : ReplicationSlotMarkDirty();
60 32 : ReplicationSlotSave();
61 : }
62 68 : }
63 :
64 : /*
65 : * SQL function for creating a new physical (streaming replication)
66 : * replication slot.
67 : */
68 : Datum
69 60 : pg_create_physical_replication_slot(PG_FUNCTION_ARGS)
70 : {
71 60 : Name name = PG_GETARG_NAME(0);
72 60 : bool immediately_reserve = PG_GETARG_BOOL(1);
73 60 : bool temporary = PG_GETARG_BOOL(2);
74 : Datum values[2];
75 : bool nulls[2];
76 : TupleDesc tupdesc;
77 : HeapTuple tuple;
78 : Datum result;
79 :
80 60 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
81 0 : elog(ERROR, "return type must be a row type");
82 :
83 60 : CheckSlotPermissions();
84 :
85 60 : CheckSlotRequirements();
86 :
87 60 : create_physical_replication_slot(NameStr(*name),
88 : immediately_reserve,
89 : temporary,
90 : InvalidXLogRecPtr);
91 :
92 60 : values[0] = NameGetDatum(&MyReplicationSlot->data.name);
93 60 : nulls[0] = false;
94 :
95 60 : if (immediately_reserve)
96 : {
97 24 : values[1] = LSNGetDatum(MyReplicationSlot->data.restart_lsn);
98 24 : nulls[1] = false;
99 : }
100 : else
101 36 : nulls[1] = true;
102 :
103 60 : tuple = heap_form_tuple(tupdesc, values, nulls);
104 60 : result = HeapTupleGetDatum(tuple);
105 :
106 60 : ReplicationSlotRelease();
107 :
108 60 : PG_RETURN_DATUM(result);
109 : }
110 :
111 :
112 : /*
113 : * Helper function for creating a new logical replication slot with
114 : * given arguments. Note that this function doesn't release the created
115 : * slot.
116 : *
117 : * When find_startpoint is false, the slot's confirmed_flush is not set; it's
118 : * caller's responsibility to ensure it's set to something sensible.
119 : */
120 : static void
121 224 : create_logical_replication_slot(char *name, char *plugin,
122 : bool temporary, bool two_phase,
123 : bool failover,
124 : XLogRecPtr restart_lsn,
125 : bool find_startpoint)
126 : {
127 224 : LogicalDecodingContext *ctx = NULL;
128 :
129 : Assert(!MyReplicationSlot);
130 :
131 : /*
132 : * Acquire a logical decoding slot, this will check for conflicting names.
133 : * Initially create persistent slot as ephemeral - that allows us to
134 : * nicely handle errors during initialization because it'll get dropped if
135 : * this transaction fails. We'll make it persistent at the end. Temporary
136 : * slots can be created as temporary from beginning as they get dropped on
137 : * error as well.
138 : */
139 224 : ReplicationSlotCreate(name, true,
140 : temporary ? RS_TEMPORARY : RS_EPHEMERAL, two_phase,
141 : failover, false);
142 :
143 : /*
144 : * Create logical decoding context to find start point or, if we don't
145 : * need it, to 1) bump slot's restart_lsn and xmin 2) check plugin sanity.
146 : *
147 : * Note: when !find_startpoint this is still important, because it's at
148 : * this point that the output plugin is validated.
149 : */
150 214 : ctx = CreateInitDecodingContext(plugin, NIL,
151 : false, /* just catalogs is OK */
152 : restart_lsn,
153 214 : XL_ROUTINE(.page_read = read_local_xlog_page,
154 : .segment_open = wal_segment_open,
155 : .segment_close = wal_segment_close),
156 : NULL, NULL, NULL);
157 :
158 : /*
159 : * If caller needs us to determine the decoding start point, do so now.
160 : * This might take a while.
161 : */
162 208 : if (find_startpoint)
163 196 : DecodingContextFindStartpoint(ctx);
164 :
165 : /* don't need the decoding context anymore */
166 204 : FreeDecodingContext(ctx);
167 204 : }
168 :
169 : /*
170 : * SQL function for creating a new logical replication slot.
171 : */
172 : Datum
173 212 : pg_create_logical_replication_slot(PG_FUNCTION_ARGS)
174 : {
175 212 : Name name = PG_GETARG_NAME(0);
176 212 : Name plugin = PG_GETARG_NAME(1);
177 212 : bool temporary = PG_GETARG_BOOL(2);
178 212 : bool two_phase = PG_GETARG_BOOL(3);
179 212 : bool failover = PG_GETARG_BOOL(4);
180 : Datum result;
181 : TupleDesc tupdesc;
182 : HeapTuple tuple;
183 : Datum values[2];
184 : bool nulls[2];
185 :
186 212 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
187 0 : elog(ERROR, "return type must be a row type");
188 :
189 212 : CheckSlotPermissions();
190 :
191 210 : CheckLogicalDecodingRequirements();
192 :
193 210 : create_logical_replication_slot(NameStr(*name),
194 210 : NameStr(*plugin),
195 : temporary,
196 : two_phase,
197 : failover,
198 : InvalidXLogRecPtr,
199 : true);
200 :
201 192 : values[0] = NameGetDatum(&MyReplicationSlot->data.name);
202 192 : values[1] = LSNGetDatum(MyReplicationSlot->data.confirmed_flush);
203 :
204 192 : memset(nulls, 0, sizeof(nulls));
205 :
206 192 : tuple = heap_form_tuple(tupdesc, values, nulls);
207 192 : result = HeapTupleGetDatum(tuple);
208 :
209 : /* ok, slot is now fully created, mark it as persistent if needed */
210 192 : if (!temporary)
211 182 : ReplicationSlotPersist();
212 192 : ReplicationSlotRelease();
213 :
214 192 : PG_RETURN_DATUM(result);
215 : }
216 :
217 :
218 : /*
219 : * SQL function for dropping a replication slot.
220 : */
221 : Datum
222 240 : pg_drop_replication_slot(PG_FUNCTION_ARGS)
223 : {
224 240 : Name name = PG_GETARG_NAME(0);
225 :
226 240 : CheckSlotPermissions();
227 :
228 236 : CheckSlotRequirements();
229 :
230 236 : ReplicationSlotDrop(NameStr(*name), true);
231 :
232 224 : PG_RETURN_VOID();
233 : }
234 :
235 : /*
236 : * pg_get_replication_slots - SQL SRF showing all replication slots
237 : * that currently exist on the database cluster.
238 : */
239 : Datum
240 490 : pg_get_replication_slots(PG_FUNCTION_ARGS)
241 : {
242 : #define PG_GET_REPLICATION_SLOTS_COLS 17
243 490 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
244 : XLogRecPtr currlsn;
245 : int slotno;
246 :
247 : /*
248 : * We don't require any special permission to see this function's data
249 : * because nothing should be sensitive. The most critical being the slot
250 : * name, which shouldn't contain anything particularly sensitive.
251 : */
252 :
253 490 : InitMaterializedSRF(fcinfo, 0);
254 :
255 490 : currlsn = GetXLogWriteRecPtr();
256 :
257 490 : LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
258 3900 : for (slotno = 0; slotno < max_replication_slots; slotno++)
259 : {
260 3410 : ReplicationSlot *slot = &ReplicationSlotCtl->replication_slots[slotno];
261 : ReplicationSlot slot_contents;
262 : Datum values[PG_GET_REPLICATION_SLOTS_COLS];
263 : bool nulls[PG_GET_REPLICATION_SLOTS_COLS];
264 : WALAvailability walstate;
265 : int i;
266 :
267 3410 : if (!slot->in_use)
268 2660 : continue;
269 :
270 : /* Copy slot contents while holding spinlock, then examine at leisure */
271 750 : SpinLockAcquire(&slot->mutex);
272 750 : slot_contents = *slot;
273 750 : SpinLockRelease(&slot->mutex);
274 :
275 750 : memset(values, 0, sizeof(values));
276 750 : memset(nulls, 0, sizeof(nulls));
277 :
278 750 : i = 0;
279 750 : values[i++] = NameGetDatum(&slot_contents.data.name);
280 :
281 750 : if (slot_contents.data.database == InvalidOid)
282 208 : nulls[i++] = true;
283 : else
284 542 : values[i++] = NameGetDatum(&slot_contents.data.plugin);
285 :
286 750 : if (slot_contents.data.database == InvalidOid)
287 208 : values[i++] = CStringGetTextDatum("physical");
288 : else
289 542 : values[i++] = CStringGetTextDatum("logical");
290 :
291 750 : if (slot_contents.data.database == InvalidOid)
292 208 : nulls[i++] = true;
293 : else
294 542 : values[i++] = ObjectIdGetDatum(slot_contents.data.database);
295 :
296 750 : values[i++] = BoolGetDatum(slot_contents.data.persistency == RS_TEMPORARY);
297 750 : values[i++] = BoolGetDatum(slot_contents.active_pid != 0);
298 :
299 750 : if (slot_contents.active_pid != 0)
300 252 : values[i++] = Int32GetDatum(slot_contents.active_pid);
301 : else
302 498 : nulls[i++] = true;
303 :
304 750 : if (slot_contents.data.xmin != InvalidTransactionId)
305 92 : values[i++] = TransactionIdGetDatum(slot_contents.data.xmin);
306 : else
307 658 : nulls[i++] = true;
308 :
309 750 : if (slot_contents.data.catalog_xmin != InvalidTransactionId)
310 586 : values[i++] = TransactionIdGetDatum(slot_contents.data.catalog_xmin);
311 : else
312 164 : nulls[i++] = true;
313 :
314 750 : if (slot_contents.data.restart_lsn != InvalidXLogRecPtr)
315 726 : values[i++] = LSNGetDatum(slot_contents.data.restart_lsn);
316 : else
317 24 : nulls[i++] = true;
318 :
319 750 : if (slot_contents.data.confirmed_flush != InvalidXLogRecPtr)
320 496 : values[i++] = LSNGetDatum(slot_contents.data.confirmed_flush);
321 : else
322 254 : nulls[i++] = true;
323 :
324 : /*
325 : * If the slot has not been invalidated, test availability from
326 : * restart_lsn.
327 : */
328 750 : if (slot_contents.data.invalidated != RS_INVAL_NONE)
329 62 : walstate = WALAVAIL_REMOVED;
330 : else
331 688 : walstate = GetWALAvailability(slot_contents.data.restart_lsn);
332 :
333 750 : switch (walstate)
334 : {
335 18 : case WALAVAIL_INVALID_LSN:
336 18 : nulls[i++] = true;
337 18 : break;
338 :
339 664 : case WALAVAIL_RESERVED:
340 664 : values[i++] = CStringGetTextDatum("reserved");
341 664 : break;
342 :
343 4 : case WALAVAIL_EXTENDED:
344 4 : values[i++] = CStringGetTextDatum("extended");
345 4 : break;
346 :
347 2 : case WALAVAIL_UNRESERVED:
348 2 : values[i++] = CStringGetTextDatum("unreserved");
349 2 : break;
350 :
351 62 : case WALAVAIL_REMOVED:
352 :
353 : /*
354 : * If we read the restart_lsn long enough ago, maybe that file
355 : * has been removed by now. However, the walsender could have
356 : * moved forward enough that it jumped to another file after
357 : * we looked. If checkpointer signalled the process to
358 : * termination, then it's definitely lost; but if a process is
359 : * still alive, then "unreserved" seems more appropriate.
360 : *
361 : * If we do change it, save the state for safe_wal_size below.
362 : */
363 62 : if (!XLogRecPtrIsInvalid(slot_contents.data.restart_lsn))
364 : {
365 : int pid;
366 :
367 56 : SpinLockAcquire(&slot->mutex);
368 56 : pid = slot->active_pid;
369 56 : slot_contents.data.restart_lsn = slot->data.restart_lsn;
370 56 : SpinLockRelease(&slot->mutex);
371 56 : if (pid != 0)
372 : {
373 0 : values[i++] = CStringGetTextDatum("unreserved");
374 0 : walstate = WALAVAIL_UNRESERVED;
375 0 : break;
376 : }
377 : }
378 62 : values[i++] = CStringGetTextDatum("lost");
379 62 : break;
380 : }
381 :
382 : /*
383 : * safe_wal_size is only computed for slots that have not been lost,
384 : * and only if there's a configured maximum size.
385 : */
386 750 : if (walstate == WALAVAIL_REMOVED || max_slot_wal_keep_size_mb < 0)
387 740 : nulls[i++] = true;
388 : else
389 : {
390 : XLogSegNo targetSeg;
391 : uint64 slotKeepSegs;
392 : uint64 keepSegs;
393 : XLogSegNo failSeg;
394 : XLogRecPtr failLSN;
395 :
396 10 : XLByteToSeg(slot_contents.data.restart_lsn, targetSeg, wal_segment_size);
397 :
398 : /* determine how many segments can be kept by slots */
399 10 : slotKeepSegs = XLogMBVarToSegs(max_slot_wal_keep_size_mb, wal_segment_size);
400 : /* ditto for wal_keep_size */
401 10 : keepSegs = XLogMBVarToSegs(wal_keep_size_mb, wal_segment_size);
402 :
403 : /* if currpos reaches failLSN, we lose our segment */
404 10 : failSeg = targetSeg + Max(slotKeepSegs, keepSegs) + 1;
405 10 : XLogSegNoOffsetToRecPtr(failSeg, 0, wal_segment_size, failLSN);
406 :
407 10 : values[i++] = Int64GetDatum(failLSN - currlsn);
408 : }
409 :
410 750 : values[i++] = BoolGetDatum(slot_contents.data.two_phase);
411 :
412 750 : if (slot_contents.data.database == InvalidOid)
413 208 : nulls[i++] = true;
414 : else
415 : {
416 542 : ReplicationSlotInvalidationCause cause = slot_contents.data.invalidated;
417 :
418 542 : if (cause == RS_INVAL_NONE)
419 484 : nulls[i++] = true;
420 : else
421 58 : values[i++] = CStringGetTextDatum(SlotInvalidationCauses[cause]);
422 : }
423 :
424 750 : values[i++] = BoolGetDatum(slot_contents.data.failover);
425 :
426 750 : values[i++] = BoolGetDatum(slot_contents.data.synced);
427 :
428 : Assert(i == PG_GET_REPLICATION_SLOTS_COLS);
429 :
430 750 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
431 : values, nulls);
432 : }
433 :
434 490 : LWLockRelease(ReplicationSlotControlLock);
435 :
436 490 : return (Datum) 0;
437 : }
438 :
439 : /*
440 : * Helper function for advancing our physical replication slot forward.
441 : *
442 : * The LSN position to move to is compared simply to the slot's restart_lsn,
443 : * knowing that any position older than that would be removed by successive
444 : * checkpoints.
445 : */
446 : static XLogRecPtr
447 2 : pg_physical_replication_slot_advance(XLogRecPtr moveto)
448 : {
449 2 : XLogRecPtr startlsn = MyReplicationSlot->data.restart_lsn;
450 2 : XLogRecPtr retlsn = startlsn;
451 :
452 : Assert(moveto != InvalidXLogRecPtr);
453 :
454 2 : if (startlsn < moveto)
455 : {
456 2 : SpinLockAcquire(&MyReplicationSlot->mutex);
457 2 : MyReplicationSlot->data.restart_lsn = moveto;
458 2 : SpinLockRelease(&MyReplicationSlot->mutex);
459 2 : retlsn = moveto;
460 :
461 : /*
462 : * Dirty the slot so as it is written out at the next checkpoint. Note
463 : * that the LSN position advanced may still be lost in the event of a
464 : * crash, but this makes the data consistent after a clean shutdown.
465 : */
466 2 : ReplicationSlotMarkDirty();
467 :
468 : /*
469 : * Wake up logical walsenders holding logical failover slots after
470 : * updating the restart_lsn of the physical slot.
471 : */
472 2 : PhysicalWakeupLogicalWalSnd();
473 : }
474 :
475 2 : return retlsn;
476 : }
477 :
478 : /*
479 : * Helper function for advancing our logical replication slot forward.
480 : *
481 : * The slot's restart_lsn is used as start point for reading records, while
482 : * confirmed_flush is used as base point for the decoding context.
483 : *
484 : * We cannot just do LogicalConfirmReceivedLocation to update confirmed_flush,
485 : * because we need to digest WAL to advance restart_lsn allowing to recycle
486 : * WAL and removal of old catalog tuples. As decoding is done in fast_forward
487 : * mode, no changes are generated anyway.
488 : */
489 : static XLogRecPtr
490 8 : pg_logical_replication_slot_advance(XLogRecPtr moveto)
491 : {
492 : LogicalDecodingContext *ctx;
493 8 : ResourceOwner old_resowner = CurrentResourceOwner;
494 : XLogRecPtr retlsn;
495 :
496 : Assert(moveto != InvalidXLogRecPtr);
497 :
498 8 : PG_TRY();
499 : {
500 : /*
501 : * Create our decoding context in fast_forward mode, passing start_lsn
502 : * as InvalidXLogRecPtr, so that we start processing from my slot's
503 : * confirmed_flush.
504 : */
505 16 : ctx = CreateDecodingContext(InvalidXLogRecPtr,
506 : NIL,
507 : true, /* fast_forward */
508 8 : XL_ROUTINE(.page_read = read_local_xlog_page,
509 : .segment_open = wal_segment_open,
510 : .segment_close = wal_segment_close),
511 : NULL, NULL, NULL);
512 :
513 : /*
514 : * Wait for specified streaming replication standby servers (if any)
515 : * to confirm receipt of WAL up to moveto lsn.
516 : */
517 8 : WaitForStandbyConfirmation(moveto);
518 :
519 : /*
520 : * Start reading at the slot's restart_lsn, which we know to point to
521 : * a valid record.
522 : */
523 8 : XLogBeginRead(ctx->reader, MyReplicationSlot->data.restart_lsn);
524 :
525 : /* invalidate non-timetravel entries */
526 8 : InvalidateSystemCaches();
527 :
528 : /* Decode records until we reach the requested target */
529 318 : while (ctx->reader->EndRecPtr < moveto)
530 : {
531 310 : char *errm = NULL;
532 : XLogRecord *record;
533 :
534 : /*
535 : * Read records. No changes are generated in fast_forward mode,
536 : * but snapbuilder/slot statuses are updated properly.
537 : */
538 310 : record = XLogReadRecord(ctx->reader, &errm);
539 310 : if (errm)
540 0 : elog(ERROR, "could not find record while advancing replication slot: %s",
541 : errm);
542 :
543 : /*
544 : * Process the record. Storage-level changes are ignored in
545 : * fast_forward mode, but other modules (such as snapbuilder)
546 : * might still have critical updates to do.
547 : */
548 310 : if (record)
549 310 : LogicalDecodingProcessRecord(ctx, ctx->reader);
550 :
551 310 : CHECK_FOR_INTERRUPTS();
552 : }
553 :
554 : /*
555 : * Logical decoding could have clobbered CurrentResourceOwner during
556 : * transaction management, so restore the executor's value. (This is
557 : * a kluge, but it's not worth cleaning up right now.)
558 : */
559 8 : CurrentResourceOwner = old_resowner;
560 :
561 8 : if (ctx->reader->EndRecPtr != InvalidXLogRecPtr)
562 : {
563 8 : LogicalConfirmReceivedLocation(moveto);
564 :
565 : /*
566 : * If only the confirmed_flush LSN has changed the slot won't get
567 : * marked as dirty by the above. Callers on the walsender
568 : * interface are expected to keep track of their own progress and
569 : * don't need it written out. But SQL-interface users cannot
570 : * specify their own start positions and it's harder for them to
571 : * keep track of their progress, so we should make more of an
572 : * effort to save it for them.
573 : *
574 : * Dirty the slot so it is written out at the next checkpoint. The
575 : * LSN position advanced to may still be lost on a crash but this
576 : * makes the data consistent after a clean shutdown.
577 : */
578 8 : ReplicationSlotMarkDirty();
579 : }
580 :
581 8 : retlsn = MyReplicationSlot->data.confirmed_flush;
582 :
583 : /* free context, call shutdown callback */
584 8 : FreeDecodingContext(ctx);
585 :
586 8 : InvalidateSystemCaches();
587 : }
588 0 : PG_CATCH();
589 : {
590 : /* clear all timetravel entries */
591 0 : InvalidateSystemCaches();
592 :
593 0 : PG_RE_THROW();
594 : }
595 8 : PG_END_TRY();
596 :
597 8 : return retlsn;
598 : }
599 :
600 : /*
601 : * SQL function for moving the position in a replication slot.
602 : */
603 : Datum
604 14 : pg_replication_slot_advance(PG_FUNCTION_ARGS)
605 : {
606 14 : Name slotname = PG_GETARG_NAME(0);
607 14 : XLogRecPtr moveto = PG_GETARG_LSN(1);
608 : XLogRecPtr endlsn;
609 : XLogRecPtr minlsn;
610 : TupleDesc tupdesc;
611 : Datum values[2];
612 : bool nulls[2];
613 : HeapTuple tuple;
614 : Datum result;
615 :
616 : Assert(!MyReplicationSlot);
617 :
618 14 : CheckSlotPermissions();
619 :
620 14 : if (XLogRecPtrIsInvalid(moveto))
621 2 : ereport(ERROR,
622 : (errmsg("invalid target WAL LSN")));
623 :
624 : /* Build a tuple descriptor for our result type */
625 12 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
626 0 : elog(ERROR, "return type must be a row type");
627 :
628 : /*
629 : * We can't move slot past what's been flushed/replayed so clamp the
630 : * target position accordingly.
631 : */
632 12 : if (!RecoveryInProgress())
633 12 : moveto = Min(moveto, GetFlushRecPtr(NULL));
634 : else
635 0 : moveto = Min(moveto, GetXLogReplayRecPtr(NULL));
636 :
637 : /* Acquire the slot so we "own" it */
638 12 : ReplicationSlotAcquire(NameStr(*slotname), true);
639 :
640 : /* A slot whose restart_lsn has never been reserved cannot be advanced */
641 12 : if (XLogRecPtrIsInvalid(MyReplicationSlot->data.restart_lsn))
642 2 : ereport(ERROR,
643 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
644 : errmsg("replication slot \"%s\" cannot be advanced",
645 : NameStr(*slotname)),
646 : errdetail("This slot has never previously reserved WAL, or it has been invalidated.")));
647 :
648 : /*
649 : * Check if the slot is not moving backwards. Physical slots rely simply
650 : * on restart_lsn as a minimum point, while logical slots have confirmed
651 : * consumption up to confirmed_flush, meaning that in both cases data
652 : * older than that is not available anymore.
653 : */
654 10 : if (OidIsValid(MyReplicationSlot->data.database))
655 8 : minlsn = MyReplicationSlot->data.confirmed_flush;
656 : else
657 2 : minlsn = MyReplicationSlot->data.restart_lsn;
658 :
659 10 : if (moveto < minlsn)
660 0 : ereport(ERROR,
661 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
662 : errmsg("cannot advance replication slot to %X/%X, minimum is %X/%X",
663 : LSN_FORMAT_ARGS(moveto), LSN_FORMAT_ARGS(minlsn))));
664 :
665 : /* Do the actual slot update, depending on the slot type */
666 10 : if (OidIsValid(MyReplicationSlot->data.database))
667 8 : endlsn = pg_logical_replication_slot_advance(moveto);
668 : else
669 2 : endlsn = pg_physical_replication_slot_advance(moveto);
670 :
671 10 : values[0] = NameGetDatum(&MyReplicationSlot->data.name);
672 10 : nulls[0] = false;
673 :
674 : /*
675 : * Recompute the minimum LSN and xmin across all slots to adjust with the
676 : * advancing potentially done.
677 : */
678 10 : ReplicationSlotsComputeRequiredXmin(false);
679 10 : ReplicationSlotsComputeRequiredLSN();
680 :
681 10 : ReplicationSlotRelease();
682 :
683 : /* Return the reached position. */
684 10 : values[1] = LSNGetDatum(endlsn);
685 10 : nulls[1] = false;
686 :
687 10 : tuple = heap_form_tuple(tupdesc, values, nulls);
688 10 : result = HeapTupleGetDatum(tuple);
689 :
690 10 : PG_RETURN_DATUM(result);
691 : }
692 :
693 : /*
694 : * Helper function of copying a replication slot.
695 : */
696 : static Datum
697 28 : copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot)
698 : {
699 28 : Name src_name = PG_GETARG_NAME(0);
700 28 : Name dst_name = PG_GETARG_NAME(1);
701 28 : ReplicationSlot *src = NULL;
702 : ReplicationSlot first_slot_contents;
703 : ReplicationSlot second_slot_contents;
704 : XLogRecPtr src_restart_lsn;
705 : bool src_islogical;
706 : bool temporary;
707 : char *plugin;
708 : Datum values[2];
709 : bool nulls[2];
710 : Datum result;
711 : TupleDesc tupdesc;
712 : HeapTuple tuple;
713 :
714 28 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
715 0 : elog(ERROR, "return type must be a row type");
716 :
717 28 : CheckSlotPermissions();
718 :
719 28 : if (logical_slot)
720 16 : CheckLogicalDecodingRequirements();
721 : else
722 12 : CheckSlotRequirements();
723 :
724 28 : LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
725 :
726 : /*
727 : * We need to prevent the source slot's reserved WAL from being removed,
728 : * but we don't want to lock that slot for very long, and it can advance
729 : * in the meantime. So obtain the source slot's data, and create a new
730 : * slot using its restart_lsn. Afterwards we lock the source slot again
731 : * and verify that the data we copied (name, type) has not changed
732 : * incompatibly. No inconvenient WAL removal can occur once the new slot
733 : * is created -- but since WAL removal could have occurred before we
734 : * managed to create the new slot, we advance the new slot's restart_lsn
735 : * to the source slot's updated restart_lsn the second time we lock it.
736 : */
737 30 : for (int i = 0; i < max_replication_slots; i++)
738 : {
739 30 : ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
740 :
741 30 : if (s->in_use && strcmp(NameStr(s->data.name), NameStr(*src_name)) == 0)
742 : {
743 : /* Copy the slot contents while holding spinlock */
744 28 : SpinLockAcquire(&s->mutex);
745 28 : first_slot_contents = *s;
746 28 : SpinLockRelease(&s->mutex);
747 28 : src = s;
748 28 : break;
749 : }
750 : }
751 :
752 28 : LWLockRelease(ReplicationSlotControlLock);
753 :
754 28 : if (src == NULL)
755 0 : ereport(ERROR,
756 : (errcode(ERRCODE_UNDEFINED_OBJECT),
757 : errmsg("replication slot \"%s\" does not exist", NameStr(*src_name))));
758 :
759 28 : src_islogical = SlotIsLogical(&first_slot_contents);
760 28 : src_restart_lsn = first_slot_contents.data.restart_lsn;
761 28 : temporary = (first_slot_contents.data.persistency == RS_TEMPORARY);
762 28 : plugin = logical_slot ? NameStr(first_slot_contents.data.plugin) : NULL;
763 :
764 : /* Check type of replication slot */
765 28 : if (src_islogical != logical_slot)
766 4 : ereport(ERROR,
767 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
768 : src_islogical ?
769 : errmsg("cannot copy physical replication slot \"%s\" as a logical replication slot",
770 : NameStr(*src_name)) :
771 : errmsg("cannot copy logical replication slot \"%s\" as a physical replication slot",
772 : NameStr(*src_name))));
773 :
774 : /* Copying non-reserved slot doesn't make sense */
775 24 : if (XLogRecPtrIsInvalid(src_restart_lsn))
776 2 : ereport(ERROR,
777 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
778 : errmsg("cannot copy a replication slot that doesn't reserve WAL")));
779 :
780 : /* Overwrite params from optional arguments */
781 22 : if (PG_NARGS() >= 3)
782 12 : temporary = PG_GETARG_BOOL(2);
783 22 : if (PG_NARGS() >= 4)
784 : {
785 : Assert(logical_slot);
786 8 : plugin = NameStr(*(PG_GETARG_NAME(3)));
787 : }
788 :
789 : /* Create new slot and acquire it */
790 22 : if (logical_slot)
791 : {
792 : /*
793 : * We must not try to read WAL, since we haven't reserved it yet --
794 : * hence pass find_startpoint false. confirmed_flush will be set
795 : * below, by copying from the source slot.
796 : *
797 : * To avoid potential issues with the slot synchronization where the
798 : * restart_lsn of a replication slot can go backward, we set the
799 : * failover option to false here. This situation occurs when a slot
800 : * on the primary server is dropped and immediately replaced with a
801 : * new slot of the same name, created by copying from another existing
802 : * slot. However, the slot synchronization will only observe the
803 : * restart_lsn of the same slot going backward.
804 : */
805 14 : create_logical_replication_slot(NameStr(*dst_name),
806 : plugin,
807 : temporary,
808 : false,
809 : false,
810 : src_restart_lsn,
811 : false);
812 : }
813 : else
814 8 : create_physical_replication_slot(NameStr(*dst_name),
815 : true,
816 : temporary,
817 : src_restart_lsn);
818 :
819 : /*
820 : * Update the destination slot to current values of the source slot;
821 : * recheck that the source slot is still the one we saw previously.
822 : */
823 : {
824 : TransactionId copy_effective_xmin;
825 : TransactionId copy_effective_catalog_xmin;
826 : TransactionId copy_xmin;
827 : TransactionId copy_catalog_xmin;
828 : XLogRecPtr copy_restart_lsn;
829 : XLogRecPtr copy_confirmed_flush;
830 : bool copy_islogical;
831 : char *copy_name;
832 :
833 : /* Copy data of source slot again */
834 20 : SpinLockAcquire(&src->mutex);
835 20 : second_slot_contents = *src;
836 20 : SpinLockRelease(&src->mutex);
837 :
838 20 : copy_effective_xmin = second_slot_contents.effective_xmin;
839 20 : copy_effective_catalog_xmin = second_slot_contents.effective_catalog_xmin;
840 :
841 20 : copy_xmin = second_slot_contents.data.xmin;
842 20 : copy_catalog_xmin = second_slot_contents.data.catalog_xmin;
843 20 : copy_restart_lsn = second_slot_contents.data.restart_lsn;
844 20 : copy_confirmed_flush = second_slot_contents.data.confirmed_flush;
845 :
846 : /* for existence check */
847 20 : copy_name = NameStr(second_slot_contents.data.name);
848 20 : copy_islogical = SlotIsLogical(&second_slot_contents);
849 :
850 : /*
851 : * Check if the source slot still exists and is valid. We regard it as
852 : * invalid if the type of replication slot or name has been changed,
853 : * or the restart_lsn either is invalid or has gone backward. (The
854 : * restart_lsn could go backwards if the source slot is dropped and
855 : * copied from an older slot during installation.)
856 : *
857 : * Since erroring out will release and drop the destination slot we
858 : * don't need to release it here.
859 : */
860 20 : if (copy_restart_lsn < src_restart_lsn ||
861 20 : src_islogical != copy_islogical ||
862 20 : strcmp(copy_name, NameStr(*src_name)) != 0)
863 0 : ereport(ERROR,
864 : (errmsg("could not copy replication slot \"%s\"",
865 : NameStr(*src_name)),
866 : errdetail("The source replication slot was modified incompatibly during the copy operation.")));
867 :
868 : /* The source slot must have a consistent snapshot */
869 20 : if (src_islogical && XLogRecPtrIsInvalid(copy_confirmed_flush))
870 0 : ereport(ERROR,
871 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
872 : errmsg("cannot copy unfinished logical replication slot \"%s\"",
873 : NameStr(*src_name)),
874 : errhint("Retry when the source replication slot's confirmed_flush_lsn is valid.")));
875 :
876 : /* Install copied values again */
877 20 : SpinLockAcquire(&MyReplicationSlot->mutex);
878 20 : MyReplicationSlot->effective_xmin = copy_effective_xmin;
879 20 : MyReplicationSlot->effective_catalog_xmin = copy_effective_catalog_xmin;
880 :
881 20 : MyReplicationSlot->data.xmin = copy_xmin;
882 20 : MyReplicationSlot->data.catalog_xmin = copy_catalog_xmin;
883 20 : MyReplicationSlot->data.restart_lsn = copy_restart_lsn;
884 20 : MyReplicationSlot->data.confirmed_flush = copy_confirmed_flush;
885 20 : SpinLockRelease(&MyReplicationSlot->mutex);
886 :
887 20 : ReplicationSlotMarkDirty();
888 20 : ReplicationSlotsComputeRequiredXmin(false);
889 20 : ReplicationSlotsComputeRequiredLSN();
890 20 : ReplicationSlotSave();
891 :
892 : #ifdef USE_ASSERT_CHECKING
893 : /* Check that the restart_lsn is available */
894 : {
895 : XLogSegNo segno;
896 :
897 : XLByteToSeg(copy_restart_lsn, segno, wal_segment_size);
898 : Assert(XLogGetLastRemovedSegno() < segno);
899 : }
900 : #endif
901 : }
902 :
903 : /* target slot fully created, mark as persistent if needed */
904 20 : if (logical_slot && !temporary)
905 6 : ReplicationSlotPersist();
906 :
907 : /* All done. Set up the return values */
908 20 : values[0] = NameGetDatum(dst_name);
909 20 : nulls[0] = false;
910 20 : if (!XLogRecPtrIsInvalid(MyReplicationSlot->data.confirmed_flush))
911 : {
912 12 : values[1] = LSNGetDatum(MyReplicationSlot->data.confirmed_flush);
913 12 : nulls[1] = false;
914 : }
915 : else
916 8 : nulls[1] = true;
917 :
918 20 : tuple = heap_form_tuple(tupdesc, values, nulls);
919 20 : result = HeapTupleGetDatum(tuple);
920 :
921 20 : ReplicationSlotRelease();
922 :
923 20 : PG_RETURN_DATUM(result);
924 : }
925 :
926 : /* The wrappers below are all to appease opr_sanity */
927 : Datum
928 8 : pg_copy_logical_replication_slot_a(PG_FUNCTION_ARGS)
929 : {
930 8 : return copy_replication_slot(fcinfo, true);
931 : }
932 :
933 : Datum
934 0 : pg_copy_logical_replication_slot_b(PG_FUNCTION_ARGS)
935 : {
936 0 : return copy_replication_slot(fcinfo, true);
937 : }
938 :
939 : Datum
940 8 : pg_copy_logical_replication_slot_c(PG_FUNCTION_ARGS)
941 : {
942 8 : return copy_replication_slot(fcinfo, true);
943 : }
944 :
945 : Datum
946 4 : pg_copy_physical_replication_slot_a(PG_FUNCTION_ARGS)
947 : {
948 4 : return copy_replication_slot(fcinfo, false);
949 : }
950 :
951 : Datum
952 8 : pg_copy_physical_replication_slot_b(PG_FUNCTION_ARGS)
953 : {
954 8 : return copy_replication_slot(fcinfo, false);
955 : }
956 :
957 : /*
958 : * Synchronize failover enabled replication slots to a standby server
959 : * from the primary server.
960 : */
961 : Datum
962 16 : pg_sync_replication_slots(PG_FUNCTION_ARGS)
963 : {
964 : WalReceiverConn *wrconn;
965 : char *err;
966 : StringInfoData app_name;
967 :
968 16 : CheckSlotPermissions();
969 :
970 14 : if (!RecoveryInProgress())
971 2 : ereport(ERROR,
972 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
973 : errmsg("replication slots can only be synchronized to a standby server"));
974 :
975 12 : ValidateSlotSyncParams(ERROR);
976 :
977 : /* Load the libpq-specific functions */
978 12 : load_file("libpqwalreceiver", false);
979 :
980 12 : (void) CheckAndGetDbnameFromConninfo();
981 :
982 10 : initStringInfo(&app_name);
983 10 : if (cluster_name[0])
984 10 : appendStringInfo(&app_name, "%s_slotsync", cluster_name);
985 : else
986 0 : appendStringInfoString(&app_name, "slotsync");
987 :
988 : /* Connect to the primary server. */
989 10 : wrconn = walrcv_connect(PrimaryConnInfo, false, false, false,
990 : app_name.data, &err);
991 10 : pfree(app_name.data);
992 :
993 10 : if (!wrconn)
994 0 : ereport(ERROR,
995 : errcode(ERRCODE_CONNECTION_FAILURE),
996 : errmsg("could not connect to the primary server: %s", err));
997 :
998 10 : SyncReplicationSlots(wrconn);
999 :
1000 8 : walrcv_disconnect(wrconn);
1001 :
1002 8 : PG_RETURN_VOID();
1003 : }
|