Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pgoutput.c
4 : * Logical Replication output plugin
5 : *
6 : * Copyright (c) 2012-2026, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/replication/pgoutput/pgoutput.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 : #include "postgres.h"
14 :
15 : #include "access/tupconvert.h"
16 : #include "catalog/partition.h"
17 : #include "catalog/pg_publication.h"
18 : #include "catalog/pg_publication_rel.h"
19 : #include "catalog/pg_subscription.h"
20 : #include "commands/defrem.h"
21 : #include "commands/subscriptioncmds.h"
22 : #include "executor/executor.h"
23 : #include "fmgr.h"
24 : #include "nodes/makefuncs.h"
25 : #include "parser/parse_relation.h"
26 : #include "replication/logical.h"
27 : #include "replication/logicalproto.h"
28 : #include "replication/origin.h"
29 : #include "replication/pgoutput.h"
30 : #include "rewrite/rewriteHandler.h"
31 : #include "utils/builtins.h"
32 : #include "utils/inval.h"
33 : #include "utils/lsyscache.h"
34 : #include "utils/memutils.h"
35 : #include "utils/rel.h"
36 : #include "utils/syscache.h"
37 : #include "utils/varlena.h"
38 :
39 569 : PG_MODULE_MAGIC_EXT(
40 : .name = "pgoutput",
41 : .version = PG_VERSION
42 : );
43 :
44 : static void pgoutput_startup(LogicalDecodingContext *ctx,
45 : OutputPluginOptions *opt, bool is_init);
46 : static void pgoutput_shutdown(LogicalDecodingContext *ctx);
47 : static void pgoutput_begin_txn(LogicalDecodingContext *ctx,
48 : ReorderBufferTXN *txn);
49 : static void pgoutput_commit_txn(LogicalDecodingContext *ctx,
50 : ReorderBufferTXN *txn, XLogRecPtr commit_lsn);
51 : static void pgoutput_change(LogicalDecodingContext *ctx,
52 : ReorderBufferTXN *txn, Relation relation,
53 : ReorderBufferChange *change);
54 : static void pgoutput_truncate(LogicalDecodingContext *ctx,
55 : ReorderBufferTXN *txn, int nrelations, Relation relations[],
56 : ReorderBufferChange *change);
57 : static void pgoutput_message(LogicalDecodingContext *ctx,
58 : ReorderBufferTXN *txn, XLogRecPtr message_lsn,
59 : bool transactional, const char *prefix,
60 : Size sz, const char *message);
61 : static bool pgoutput_origin_filter(LogicalDecodingContext *ctx,
62 : ReplOriginId origin_id);
63 : static void pgoutput_begin_prepare_txn(LogicalDecodingContext *ctx,
64 : ReorderBufferTXN *txn);
65 : static void pgoutput_prepare_txn(LogicalDecodingContext *ctx,
66 : ReorderBufferTXN *txn, XLogRecPtr prepare_lsn);
67 : static void pgoutput_commit_prepared_txn(LogicalDecodingContext *ctx,
68 : ReorderBufferTXN *txn, XLogRecPtr commit_lsn);
69 : static void pgoutput_rollback_prepared_txn(LogicalDecodingContext *ctx,
70 : ReorderBufferTXN *txn,
71 : XLogRecPtr prepare_end_lsn,
72 : TimestampTz prepare_time);
73 : static void pgoutput_stream_start(struct LogicalDecodingContext *ctx,
74 : ReorderBufferTXN *txn);
75 : static void pgoutput_stream_stop(struct LogicalDecodingContext *ctx,
76 : ReorderBufferTXN *txn);
77 : static void pgoutput_stream_abort(struct LogicalDecodingContext *ctx,
78 : ReorderBufferTXN *txn,
79 : XLogRecPtr abort_lsn);
80 : static void pgoutput_stream_commit(struct LogicalDecodingContext *ctx,
81 : ReorderBufferTXN *txn,
82 : XLogRecPtr commit_lsn);
83 : static void pgoutput_stream_prepare_txn(LogicalDecodingContext *ctx,
84 : ReorderBufferTXN *txn, XLogRecPtr prepare_lsn);
85 :
86 : static bool publications_valid;
87 :
88 : static List *LoadPublications(List *pubnames);
89 : static void publication_invalidation_cb(Datum arg, SysCacheIdentifier cacheid,
90 : uint32 hashvalue);
91 : static void send_repl_origin(LogicalDecodingContext *ctx,
92 : ReplOriginId origin_id, XLogRecPtr origin_lsn,
93 : bool send_origin);
94 :
95 : /*
96 : * Only 3 publication actions are used for row filtering ("insert", "update",
97 : * "delete"). See RelationSyncEntry.exprstate[].
98 : */
99 : enum RowFilterPubAction
100 : {
101 : PUBACTION_INSERT,
102 : PUBACTION_UPDATE,
103 : PUBACTION_DELETE,
104 : };
105 :
106 : #define NUM_ROWFILTER_PUBACTIONS (PUBACTION_DELETE+1)
107 :
108 : /*
109 : * Entry in the map used to remember which relation schemas we sent.
110 : *
111 : * The schema_sent flag determines if the current schema record for the
112 : * relation (and for its ancestor if publish_as_relid is set) was already
113 : * sent to the subscriber (in which case we don't need to send it again).
114 : *
115 : * The schema cache on downstream is however updated only at commit time,
116 : * and with streamed transactions the commit order may be different from
117 : * the order the transactions are sent in. Also, the (sub) transactions
118 : * might get aborted so we need to send the schema for each (sub) transaction
119 : * so that we don't lose the schema information on abort. For handling this,
120 : * we maintain the list of xids (streamed_txns) for those we have already sent
121 : * the schema.
122 : *
123 : * For partitions, 'pubactions' considers not only the table's own
124 : * publications, but also those of all of its ancestors.
125 : */
126 : typedef struct RelationSyncEntry
127 : {
128 : Oid relid; /* relation oid */
129 :
130 : bool replicate_valid; /* overall validity flag for entry */
131 :
132 : bool schema_sent;
133 :
134 : /*
135 : * This will be PUBLISH_GENCOLS_STORED if the relation contains generated
136 : * columns and the 'publish_generated_columns' parameter is set to
137 : * PUBLISH_GENCOLS_STORED. Otherwise, it will be PUBLISH_GENCOLS_NONE,
138 : * indicating that no generated columns should be published, unless
139 : * explicitly specified in the column list.
140 : */
141 : PublishGencolsType include_gencols_type;
142 : List *streamed_txns; /* streamed toplevel transactions with this
143 : * schema */
144 :
145 : /* are we publishing this rel? */
146 : PublicationActions pubactions;
147 :
148 : /*
149 : * ExprState array for row filter. Different publication actions don't
150 : * allow multiple expressions to always be combined into one, because
151 : * updates or deletes restrict the column in expression to be part of the
152 : * replica identity index whereas inserts do not have this restriction, so
153 : * there is one ExprState per publication action.
154 : */
155 : ExprState *exprstate[NUM_ROWFILTER_PUBACTIONS];
156 : EState *estate; /* executor state used for row filter */
157 : TupleTableSlot *new_slot; /* slot for storing new tuple */
158 : TupleTableSlot *old_slot; /* slot for storing old tuple */
159 :
160 : /*
161 : * OID of the relation to publish changes as. For a partition, this may
162 : * be set to one of its ancestors whose schema will be used when
163 : * replicating changes, if publish_via_partition_root is set for the
164 : * publication.
165 : */
166 : Oid publish_as_relid;
167 :
168 : /*
169 : * Map used when replicating using an ancestor's schema to convert tuples
170 : * from partition's type to the ancestor's; NULL if publish_as_relid is
171 : * same as 'relid' or if unnecessary due to partition and the ancestor
172 : * having identical TupleDesc.
173 : */
174 : AttrMap *attrmap;
175 :
176 : /*
177 : * Columns included in the publication, or NULL if all columns are
178 : * included implicitly. Note that the attnums in this bitmap are not
179 : * shifted by FirstLowInvalidHeapAttributeNumber.
180 : */
181 : Bitmapset *columns;
182 :
183 : /*
184 : * Private context to store additional data for this entry - state for the
185 : * row filter expressions, column list, etc.
186 : */
187 : MemoryContext entry_cxt;
188 : } RelationSyncEntry;
189 :
190 : /*
191 : * Maintain a per-transaction level variable to track whether the transaction
192 : * has sent BEGIN. BEGIN is only sent when the first change in a transaction
193 : * is processed. This makes it possible to skip sending a pair of BEGIN/COMMIT
194 : * messages for empty transactions which saves network bandwidth.
195 : *
196 : * This optimization is not used for prepared transactions because if the
197 : * WALSender restarts after prepare of a transaction and before commit prepared
198 : * of the same transaction then we won't be able to figure out if we have
199 : * skipped sending BEGIN/PREPARE of a transaction as it was empty. This is
200 : * because we would have lost the in-memory txndata information that was
201 : * present prior to the restart. This will result in sending a spurious
202 : * COMMIT PREPARED without a corresponding prepared transaction at the
203 : * downstream which would lead to an error when it tries to process it.
204 : *
205 : * XXX We could achieve this optimization by changing protocol to send
206 : * additional information so that downstream can detect that the corresponding
207 : * prepare has not been sent. However, adding such a check for every
208 : * transaction in the downstream could be costly so we might want to do it
209 : * optionally.
210 : *
211 : * We also don't have this optimization for streamed transactions because
212 : * they can contain prepared transactions.
213 : */
214 : typedef struct PGOutputTxnData
215 : {
216 : bool sent_begin_txn; /* flag indicating whether BEGIN has been sent */
217 : } PGOutputTxnData;
218 :
219 : /* Map used to remember which relation schemas we sent. */
220 : static HTAB *RelationSyncCache = NULL;
221 :
222 : static void init_rel_sync_cache(MemoryContext cachectx);
223 : static void cleanup_rel_sync_cache(TransactionId xid, bool is_commit);
224 : static RelationSyncEntry *get_rel_sync_entry(PGOutputData *data,
225 : Relation relation);
226 : static void send_relation_and_attrs(Relation relation, TransactionId xid,
227 : LogicalDecodingContext *ctx,
228 : RelationSyncEntry *relentry);
229 : static void rel_sync_cache_relation_cb(Datum arg, Oid relid);
230 : static void rel_sync_cache_publication_cb(Datum arg, SysCacheIdentifier cacheid,
231 : uint32 hashvalue);
232 : static void set_schema_sent_in_streamed_txn(RelationSyncEntry *entry,
233 : TransactionId xid);
234 : static bool get_schema_sent_in_streamed_txn(RelationSyncEntry *entry,
235 : TransactionId xid);
236 : static void init_tuple_slot(PGOutputData *data, Relation relation,
237 : RelationSyncEntry *entry);
238 : static void pgoutput_memory_context_reset(void *arg);
239 :
240 : /* row filter routines */
241 : static EState *create_estate_for_relation(Relation rel);
242 : static void pgoutput_row_filter_init(PGOutputData *data,
243 : List *publications,
244 : RelationSyncEntry *entry);
245 : static bool pgoutput_row_filter_exec_expr(ExprState *state,
246 : ExprContext *econtext);
247 : static bool pgoutput_row_filter(Relation relation, TupleTableSlot *old_slot,
248 : TupleTableSlot **new_slot_ptr,
249 : RelationSyncEntry *entry,
250 : ReorderBufferChangeType *action);
251 :
252 : /* column list routines */
253 : static void pgoutput_column_list_init(PGOutputData *data,
254 : List *publications,
255 : RelationSyncEntry *entry);
256 :
257 : /*
258 : * Specify output plugin callbacks
259 : */
260 : void
261 763 : _PG_output_plugin_init(OutputPluginCallbacks *cb)
262 : {
263 763 : cb->startup_cb = pgoutput_startup;
264 763 : cb->begin_cb = pgoutput_begin_txn;
265 763 : cb->change_cb = pgoutput_change;
266 763 : cb->truncate_cb = pgoutput_truncate;
267 763 : cb->message_cb = pgoutput_message;
268 763 : cb->commit_cb = pgoutput_commit_txn;
269 :
270 763 : cb->begin_prepare_cb = pgoutput_begin_prepare_txn;
271 763 : cb->prepare_cb = pgoutput_prepare_txn;
272 763 : cb->commit_prepared_cb = pgoutput_commit_prepared_txn;
273 763 : cb->rollback_prepared_cb = pgoutput_rollback_prepared_txn;
274 763 : cb->filter_by_origin_cb = pgoutput_origin_filter;
275 763 : cb->shutdown_cb = pgoutput_shutdown;
276 :
277 : /* transaction streaming */
278 763 : cb->stream_start_cb = pgoutput_stream_start;
279 763 : cb->stream_stop_cb = pgoutput_stream_stop;
280 763 : cb->stream_abort_cb = pgoutput_stream_abort;
281 763 : cb->stream_commit_cb = pgoutput_stream_commit;
282 763 : cb->stream_change_cb = pgoutput_change;
283 763 : cb->stream_message_cb = pgoutput_message;
284 763 : cb->stream_truncate_cb = pgoutput_truncate;
285 : /* transaction streaming - two-phase commit */
286 763 : cb->stream_prepare_cb = pgoutput_stream_prepare_txn;
287 763 : }
288 :
289 : static void
290 424 : parse_output_parameters(List *options, PGOutputData *data)
291 : {
292 : ListCell *lc;
293 424 : bool protocol_version_given = false;
294 424 : bool publication_names_given = false;
295 424 : bool binary_option_given = false;
296 424 : bool messages_option_given = false;
297 424 : bool streaming_given = false;
298 424 : bool two_phase_option_given = false;
299 424 : bool origin_option_given = false;
300 :
301 : /* Initialize optional parameters to defaults */
302 424 : data->binary = false;
303 424 : data->streaming = LOGICALREP_STREAM_OFF;
304 424 : data->messages = false;
305 424 : data->two_phase = false;
306 424 : data->publish_no_origin = false;
307 :
308 2116 : foreach(lc, options)
309 : {
310 1692 : DefElem *defel = (DefElem *) lfirst(lc);
311 :
312 : Assert(defel->arg == NULL || IsA(defel->arg, String));
313 :
314 : /* Check each param, whether or not we recognize it */
315 1692 : if (strcmp(defel->defname, "proto_version") == 0)
316 : {
317 : unsigned long parsed;
318 : char *endptr;
319 :
320 424 : if (protocol_version_given)
321 0 : ereport(ERROR,
322 : (errcode(ERRCODE_SYNTAX_ERROR),
323 : errmsg("conflicting or redundant options")));
324 424 : protocol_version_given = true;
325 :
326 424 : errno = 0;
327 424 : parsed = strtoul(strVal(defel->arg), &endptr, 10);
328 424 : if (errno != 0 || *endptr != '\0')
329 0 : ereport(ERROR,
330 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
331 : errmsg("invalid proto_version")));
332 :
333 424 : if (parsed > PG_UINT32_MAX)
334 0 : ereport(ERROR,
335 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
336 : errmsg("proto_version \"%s\" out of range",
337 : strVal(defel->arg))));
338 :
339 424 : data->protocol_version = (uint32) parsed;
340 : }
341 1268 : else if (strcmp(defel->defname, "publication_names") == 0)
342 : {
343 424 : if (publication_names_given)
344 0 : ereport(ERROR,
345 : (errcode(ERRCODE_SYNTAX_ERROR),
346 : errmsg("conflicting or redundant options")));
347 424 : publication_names_given = true;
348 :
349 : /*
350 : * Pass a copy of the DefElem->arg since SplitIdentifierString
351 : * modifies its input.
352 : */
353 424 : if (!SplitIdentifierString(pstrdup(strVal(defel->arg)), ',',
354 : &data->publication_names))
355 0 : ereport(ERROR,
356 : (errcode(ERRCODE_INVALID_NAME),
357 : errmsg("invalid publication_names syntax")));
358 : }
359 844 : else if (strcmp(defel->defname, "binary") == 0)
360 : {
361 11 : if (binary_option_given)
362 0 : ereport(ERROR,
363 : (errcode(ERRCODE_SYNTAX_ERROR),
364 : errmsg("conflicting or redundant options")));
365 11 : binary_option_given = true;
366 :
367 11 : data->binary = defGetBoolean(defel);
368 : }
369 833 : else if (strcmp(defel->defname, "messages") == 0)
370 : {
371 4 : if (messages_option_given)
372 0 : ereport(ERROR,
373 : (errcode(ERRCODE_SYNTAX_ERROR),
374 : errmsg("conflicting or redundant options")));
375 4 : messages_option_given = true;
376 :
377 4 : data->messages = defGetBoolean(defel);
378 : }
379 829 : else if (strcmp(defel->defname, "streaming") == 0)
380 : {
381 406 : if (streaming_given)
382 0 : ereport(ERROR,
383 : (errcode(ERRCODE_SYNTAX_ERROR),
384 : errmsg("conflicting or redundant options")));
385 406 : streaming_given = true;
386 :
387 406 : data->streaming = defGetStreamingMode(defel);
388 : }
389 423 : else if (strcmp(defel->defname, "two_phase") == 0)
390 : {
391 8 : if (two_phase_option_given)
392 0 : ereport(ERROR,
393 : (errcode(ERRCODE_SYNTAX_ERROR),
394 : errmsg("conflicting or redundant options")));
395 8 : two_phase_option_given = true;
396 :
397 8 : data->two_phase = defGetBoolean(defel);
398 : }
399 415 : else if (strcmp(defel->defname, "origin") == 0)
400 : {
401 : char *origin;
402 :
403 415 : if (origin_option_given)
404 0 : ereport(ERROR,
405 : errcode(ERRCODE_SYNTAX_ERROR),
406 : errmsg("conflicting or redundant options"));
407 415 : origin_option_given = true;
408 :
409 415 : origin = defGetString(defel);
410 415 : if (pg_strcasecmp(origin, LOGICALREP_ORIGIN_NONE) == 0)
411 29 : data->publish_no_origin = true;
412 386 : else if (pg_strcasecmp(origin, LOGICALREP_ORIGIN_ANY) == 0)
413 386 : data->publish_no_origin = false;
414 : else
415 0 : ereport(ERROR,
416 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
417 : errmsg("unrecognized origin value: \"%s\"", origin));
418 : }
419 : else
420 0 : elog(ERROR, "unrecognized pgoutput option: %s", defel->defname);
421 : }
422 :
423 : /* Check required options */
424 424 : if (!protocol_version_given)
425 0 : ereport(ERROR,
426 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
427 : errmsg("option \"%s\" missing", "proto_version"));
428 424 : if (!publication_names_given)
429 0 : ereport(ERROR,
430 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
431 : errmsg("option \"%s\" missing", "publication_names"));
432 424 : }
433 :
434 : /*
435 : * Memory context reset callback of PGOutputData->context.
436 : */
437 : static void
438 1074 : pgoutput_memory_context_reset(void *arg)
439 : {
440 1074 : if (RelationSyncCache)
441 : {
442 198 : hash_destroy(RelationSyncCache);
443 198 : RelationSyncCache = NULL;
444 : }
445 1074 : }
446 :
447 : /*
448 : * Initialize this plugin
449 : */
450 : static void
451 763 : pgoutput_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
452 : bool is_init)
453 : {
454 763 : PGOutputData *data = palloc0_object(PGOutputData);
455 : static bool publication_callback_registered = false;
456 : MemoryContextCallback *mcallback;
457 :
458 : /* Create our memory context for private allocations. */
459 763 : data->context = AllocSetContextCreate(ctx->context,
460 : "logical replication output context",
461 : ALLOCSET_DEFAULT_SIZES);
462 :
463 763 : data->cachectx = AllocSetContextCreate(ctx->context,
464 : "logical replication cache context",
465 : ALLOCSET_DEFAULT_SIZES);
466 :
467 763 : data->pubctx = AllocSetContextCreate(ctx->context,
468 : "logical replication publication list context",
469 : ALLOCSET_SMALL_SIZES);
470 :
471 : /*
472 : * Ensure to cleanup RelationSyncCache even when logical decoding invoked
473 : * via SQL interface ends up with an error.
474 : */
475 763 : mcallback = palloc0_object(MemoryContextCallback);
476 763 : mcallback->func = pgoutput_memory_context_reset;
477 763 : MemoryContextRegisterResetCallback(ctx->context, mcallback);
478 :
479 763 : ctx->output_plugin_private = data;
480 :
481 : /* This plugin uses binary protocol. */
482 763 : opt->output_type = OUTPUT_PLUGIN_BINARY_OUTPUT;
483 :
484 : /*
485 : * This is replication start and not slot initialization.
486 : *
487 : * Parse and validate options passed by the client.
488 : */
489 763 : if (!is_init)
490 : {
491 : /* Parse the params and ERROR if we see any we don't recognize */
492 424 : parse_output_parameters(ctx->output_plugin_options, data);
493 :
494 : /* Check if we support requested protocol */
495 424 : if (data->protocol_version > LOGICALREP_PROTO_MAX_VERSION_NUM)
496 0 : ereport(ERROR,
497 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
498 : errmsg("client sent proto_version=%d but server only supports protocol %d or lower",
499 : data->protocol_version, LOGICALREP_PROTO_MAX_VERSION_NUM)));
500 :
501 424 : if (data->protocol_version < LOGICALREP_PROTO_MIN_VERSION_NUM)
502 0 : ereport(ERROR,
503 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
504 : errmsg("client sent proto_version=%d but server only supports protocol %d or higher",
505 : data->protocol_version, LOGICALREP_PROTO_MIN_VERSION_NUM)));
506 :
507 : /*
508 : * Decide whether to enable streaming. It is disabled by default, in
509 : * which case we just update the flag in decoding context. Otherwise
510 : * we only allow it with sufficient version of the protocol, and when
511 : * the output plugin supports it.
512 : */
513 424 : if (data->streaming == LOGICALREP_STREAM_OFF)
514 18 : ctx->streaming = false;
515 406 : else if (data->streaming == LOGICALREP_STREAM_ON &&
516 27 : data->protocol_version < LOGICALREP_PROTO_STREAM_VERSION_NUM)
517 0 : ereport(ERROR,
518 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
519 : errmsg("requested proto_version=%d does not support streaming, need %d or higher",
520 : data->protocol_version, LOGICALREP_PROTO_STREAM_VERSION_NUM)));
521 406 : else if (data->streaming == LOGICALREP_STREAM_PARALLEL &&
522 379 : data->protocol_version < LOGICALREP_PROTO_STREAM_PARALLEL_VERSION_NUM)
523 0 : ereport(ERROR,
524 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
525 : errmsg("requested proto_version=%d does not support parallel streaming, need %d or higher",
526 : data->protocol_version, LOGICALREP_PROTO_STREAM_PARALLEL_VERSION_NUM)));
527 406 : else if (!ctx->streaming)
528 0 : ereport(ERROR,
529 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
530 : errmsg("streaming requested, but not supported by output plugin")));
531 :
532 : /*
533 : * Here, we just check whether the two-phase option is passed by
534 : * plugin and decide whether to enable it at later point of time. It
535 : * remains enabled if the previous start-up has done so. But we only
536 : * allow the option to be passed in with sufficient version of the
537 : * protocol, and when the output plugin supports it.
538 : */
539 424 : if (!data->two_phase)
540 416 : ctx->twophase_opt_given = false;
541 8 : else if (data->protocol_version < LOGICALREP_PROTO_TWOPHASE_VERSION_NUM)
542 0 : ereport(ERROR,
543 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
544 : errmsg("requested proto_version=%d does not support two-phase commit, need %d or higher",
545 : data->protocol_version, LOGICALREP_PROTO_TWOPHASE_VERSION_NUM)));
546 8 : else if (!ctx->twophase)
547 0 : ereport(ERROR,
548 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
549 : errmsg("two-phase commit requested, but not supported by output plugin")));
550 : else
551 8 : ctx->twophase_opt_given = true;
552 :
553 : /* Init publication state. */
554 424 : data->publications = NIL;
555 424 : publications_valid = false;
556 :
557 : /*
558 : * Register callback for pg_publication if we didn't already do that
559 : * during some previous call in this process.
560 : */
561 424 : if (!publication_callback_registered)
562 : {
563 422 : CacheRegisterSyscacheCallback(PUBLICATIONOID,
564 : publication_invalidation_cb,
565 : (Datum) 0);
566 422 : CacheRegisterRelSyncCallback(rel_sync_cache_relation_cb,
567 : (Datum) 0);
568 422 : publication_callback_registered = true;
569 : }
570 :
571 : /* Initialize relation schema cache. */
572 424 : init_rel_sync_cache(CacheMemoryContext);
573 : }
574 : else
575 : {
576 : /*
577 : * Disable the streaming and prepared transactions during the slot
578 : * initialization mode.
579 : */
580 339 : ctx->streaming = false;
581 339 : ctx->twophase = false;
582 : }
583 763 : }
584 :
585 : /*
586 : * BEGIN callback.
587 : *
588 : * Don't send the BEGIN message here instead postpone it until the first
589 : * change. In logical replication, a common scenario is to replicate a set of
590 : * tables (instead of all tables) and transactions whose changes were on
591 : * the table(s) that are not published will produce empty transactions. These
592 : * empty transactions will send BEGIN and COMMIT messages to subscribers,
593 : * using bandwidth on something with little/no use for logical replication.
594 : */
595 : static void
596 1067 : pgoutput_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn)
597 : {
598 1067 : PGOutputTxnData *txndata = MemoryContextAllocZero(ctx->context,
599 : sizeof(PGOutputTxnData));
600 :
601 1067 : txn->output_plugin_private = txndata;
602 1067 : }
603 :
604 : /*
605 : * Send BEGIN.
606 : *
607 : * This is called while processing the first change of the transaction.
608 : */
609 : static void
610 468 : pgoutput_send_begin(LogicalDecodingContext *ctx, ReorderBufferTXN *txn)
611 : {
612 468 : bool send_replication_origin = txn->origin_id != InvalidReplOriginId;
613 468 : PGOutputTxnData *txndata = (PGOutputTxnData *) txn->output_plugin_private;
614 :
615 : Assert(txndata);
616 : Assert(!txndata->sent_begin_txn);
617 :
618 468 : OutputPluginPrepareWrite(ctx, !send_replication_origin);
619 468 : logicalrep_write_begin(ctx->out, txn);
620 468 : txndata->sent_begin_txn = true;
621 :
622 468 : send_repl_origin(ctx, txn->origin_id, txn->origin_lsn,
623 : send_replication_origin);
624 :
625 468 : OutputPluginWrite(ctx, true);
626 467 : }
627 :
628 : /*
629 : * COMMIT callback
630 : */
631 : static void
632 1065 : pgoutput_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
633 : XLogRecPtr commit_lsn)
634 : {
635 1065 : PGOutputTxnData *txndata = (PGOutputTxnData *) txn->output_plugin_private;
636 : bool sent_begin_txn;
637 :
638 : Assert(txndata);
639 :
640 : /*
641 : * We don't need to send the commit message unless some relevant change
642 : * from this transaction has been sent to the downstream.
643 : */
644 1065 : sent_begin_txn = txndata->sent_begin_txn;
645 1065 : OutputPluginUpdateProgress(ctx, !sent_begin_txn);
646 1065 : pfree(txndata);
647 1065 : txn->output_plugin_private = NULL;
648 :
649 1065 : if (!sent_begin_txn)
650 : {
651 598 : elog(DEBUG1, "skipped replication of an empty transaction with XID: %u", txn->xid);
652 598 : return;
653 : }
654 :
655 467 : OutputPluginPrepareWrite(ctx, true);
656 467 : logicalrep_write_commit(ctx->out, txn, commit_lsn);
657 467 : OutputPluginWrite(ctx, true);
658 : }
659 :
660 : /*
661 : * BEGIN PREPARE callback
662 : */
663 : static void
664 22 : pgoutput_begin_prepare_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn)
665 : {
666 22 : bool send_replication_origin = txn->origin_id != InvalidReplOriginId;
667 :
668 22 : OutputPluginPrepareWrite(ctx, !send_replication_origin);
669 22 : logicalrep_write_begin_prepare(ctx->out, txn);
670 :
671 22 : send_repl_origin(ctx, txn->origin_id, txn->origin_lsn,
672 : send_replication_origin);
673 :
674 22 : OutputPluginWrite(ctx, true);
675 22 : }
676 :
677 : /*
678 : * PREPARE callback
679 : */
680 : static void
681 22 : pgoutput_prepare_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
682 : XLogRecPtr prepare_lsn)
683 : {
684 22 : OutputPluginUpdateProgress(ctx, false);
685 :
686 22 : OutputPluginPrepareWrite(ctx, true);
687 22 : logicalrep_write_prepare(ctx->out, txn, prepare_lsn);
688 22 : OutputPluginWrite(ctx, true);
689 22 : }
690 :
691 : /*
692 : * COMMIT PREPARED callback
693 : */
694 : static void
695 26 : pgoutput_commit_prepared_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
696 : XLogRecPtr commit_lsn)
697 : {
698 26 : OutputPluginUpdateProgress(ctx, false);
699 :
700 26 : OutputPluginPrepareWrite(ctx, true);
701 26 : logicalrep_write_commit_prepared(ctx->out, txn, commit_lsn);
702 26 : OutputPluginWrite(ctx, true);
703 26 : }
704 :
705 : /*
706 : * ROLLBACK PREPARED callback
707 : */
708 : static void
709 8 : pgoutput_rollback_prepared_txn(LogicalDecodingContext *ctx,
710 : ReorderBufferTXN *txn,
711 : XLogRecPtr prepare_end_lsn,
712 : TimestampTz prepare_time)
713 : {
714 8 : OutputPluginUpdateProgress(ctx, false);
715 :
716 8 : OutputPluginPrepareWrite(ctx, true);
717 8 : logicalrep_write_rollback_prepared(ctx->out, txn, prepare_end_lsn,
718 : prepare_time);
719 8 : OutputPluginWrite(ctx, true);
720 8 : }
721 :
722 : /*
723 : * Write the current schema of the relation and its ancestor (if any) if not
724 : * done yet.
725 : */
726 : static void
727 182289 : maybe_send_schema(LogicalDecodingContext *ctx,
728 : ReorderBufferChange *change,
729 : Relation relation, RelationSyncEntry *relentry)
730 : {
731 182289 : PGOutputData *data = (PGOutputData *) ctx->output_plugin_private;
732 : bool schema_sent;
733 182289 : TransactionId xid = InvalidTransactionId;
734 182289 : TransactionId topxid = InvalidTransactionId;
735 :
736 : /*
737 : * Remember XID of the (sub)transaction for the change. We don't care if
738 : * it's top-level transaction or not (we have already sent that XID in
739 : * start of the current streaming block).
740 : *
741 : * If we're not in a streaming block, just use InvalidTransactionId and
742 : * the write methods will not include it.
743 : */
744 182289 : if (data->in_streaming)
745 175924 : xid = change->txn->xid;
746 :
747 182289 : if (rbtxn_is_subtxn(change->txn))
748 10169 : topxid = rbtxn_get_toptxn(change->txn)->xid;
749 : else
750 172120 : topxid = xid;
751 :
752 : /*
753 : * Do we need to send the schema? We do track streamed transactions
754 : * separately, because those may be applied later (and the regular
755 : * transactions won't see their effects until then) and in an order that
756 : * we don't know at this point.
757 : *
758 : * XXX There is a scope of optimization here. Currently, we always send
759 : * the schema first time in a streaming transaction but we can probably
760 : * avoid that by checking 'relentry->schema_sent' flag. However, before
761 : * doing that we need to study its impact on the case where we have a mix
762 : * of streaming and non-streaming transactions.
763 : */
764 182289 : if (data->in_streaming)
765 175924 : schema_sent = get_schema_sent_in_streamed_txn(relentry, topxid);
766 : else
767 6365 : schema_sent = relentry->schema_sent;
768 :
769 : /* Nothing to do if we already sent the schema. */
770 182289 : if (schema_sent)
771 181929 : return;
772 :
773 : /*
774 : * Send the schema. If the changes will be published using an ancestor's
775 : * schema, not the relation's own, send that ancestor's schema before
776 : * sending relation's own (XXX - maybe sending only the former suffices?).
777 : */
778 360 : if (relentry->publish_as_relid != RelationGetRelid(relation))
779 : {
780 34 : Relation ancestor = RelationIdGetRelation(relentry->publish_as_relid);
781 :
782 34 : send_relation_and_attrs(ancestor, xid, ctx, relentry);
783 34 : RelationClose(ancestor);
784 : }
785 :
786 360 : send_relation_and_attrs(relation, xid, ctx, relentry);
787 :
788 360 : if (data->in_streaming)
789 71 : set_schema_sent_in_streamed_txn(relentry, topxid);
790 : else
791 289 : relentry->schema_sent = true;
792 : }
793 :
794 : /*
795 : * Sends a relation
796 : */
797 : static void
798 394 : send_relation_and_attrs(Relation relation, TransactionId xid,
799 : LogicalDecodingContext *ctx,
800 : RelationSyncEntry *relentry)
801 : {
802 394 : TupleDesc desc = RelationGetDescr(relation);
803 394 : Bitmapset *columns = relentry->columns;
804 394 : PublishGencolsType include_gencols_type = relentry->include_gencols_type;
805 : int i;
806 :
807 : /*
808 : * Write out type info if needed. We do that only for user-created types.
809 : * We use FirstGenbkiObjectId as the cutoff, so that we only consider
810 : * objects with hand-assigned OIDs to be "built in", not for instance any
811 : * function or type defined in the information_schema. This is important
812 : * because only hand-assigned OIDs can be expected to remain stable across
813 : * major versions.
814 : */
815 1214 : for (i = 0; i < desc->natts; i++)
816 : {
817 820 : Form_pg_attribute att = TupleDescAttr(desc, i);
818 :
819 820 : if (!logicalrep_should_publish_column(att, columns,
820 : include_gencols_type))
821 71 : continue;
822 :
823 749 : if (att->atttypid < FirstGenbkiObjectId)
824 731 : continue;
825 :
826 18 : OutputPluginPrepareWrite(ctx, false);
827 18 : logicalrep_write_typ(ctx->out, xid, att->atttypid);
828 18 : OutputPluginWrite(ctx, false);
829 : }
830 :
831 394 : OutputPluginPrepareWrite(ctx, false);
832 394 : logicalrep_write_rel(ctx->out, xid, relation, columns,
833 : include_gencols_type);
834 394 : OutputPluginWrite(ctx, false);
835 394 : }
836 :
837 : /*
838 : * Executor state preparation for evaluation of row filter expressions for the
839 : * specified relation.
840 : */
841 : static EState *
842 17 : create_estate_for_relation(Relation rel)
843 : {
844 : EState *estate;
845 : RangeTblEntry *rte;
846 17 : List *perminfos = NIL;
847 :
848 17 : estate = CreateExecutorState();
849 :
850 17 : rte = makeNode(RangeTblEntry);
851 17 : rte->rtekind = RTE_RELATION;
852 17 : rte->relid = RelationGetRelid(rel);
853 17 : rte->relkind = rel->rd_rel->relkind;
854 17 : rte->rellockmode = AccessShareLock;
855 :
856 17 : addRTEPermissionInfo(&perminfos, rte);
857 :
858 17 : ExecInitRangeTable(estate, list_make1(rte), perminfos,
859 : bms_make_singleton(1));
860 :
861 17 : estate->es_output_cid = GetCurrentCommandId(false);
862 :
863 17 : return estate;
864 : }
865 :
866 : /*
867 : * Evaluates row filter.
868 : *
869 : * If the row filter evaluates to NULL, it is taken as false i.e. the change
870 : * isn't replicated.
871 : */
872 : static bool
873 38 : pgoutput_row_filter_exec_expr(ExprState *state, ExprContext *econtext)
874 : {
875 : Datum ret;
876 : bool isnull;
877 :
878 : Assert(state != NULL);
879 :
880 38 : ret = ExecEvalExprSwitchContext(state, econtext, &isnull);
881 :
882 38 : elog(DEBUG3, "row filter evaluates to %s (isnull: %s)",
883 : isnull ? "false" : DatumGetBool(ret) ? "true" : "false",
884 : isnull ? "true" : "false");
885 :
886 38 : if (isnull)
887 1 : return false;
888 :
889 37 : return DatumGetBool(ret);
890 : }
891 :
892 : /*
893 : * Make sure the per-entry memory context exists.
894 : */
895 : static void
896 343 : pgoutput_ensure_entry_cxt(PGOutputData *data, RelationSyncEntry *entry)
897 : {
898 : Relation relation;
899 :
900 : /* The context may already exist, in which case bail out. */
901 343 : if (entry->entry_cxt)
902 17 : return;
903 :
904 326 : relation = RelationIdGetRelation(entry->publish_as_relid);
905 :
906 326 : entry->entry_cxt = AllocSetContextCreate(data->cachectx,
907 : "entry private context",
908 : ALLOCSET_SMALL_SIZES);
909 :
910 326 : MemoryContextCopyAndSetIdentifier(entry->entry_cxt,
911 : RelationGetRelationName(relation));
912 : }
913 :
914 : /*
915 : * Initialize the row filter.
916 : */
917 : static void
918 326 : pgoutput_row_filter_init(PGOutputData *data, List *publications,
919 : RelationSyncEntry *entry)
920 : {
921 : ListCell *lc;
922 326 : List *rfnodes[] = {NIL, NIL, NIL}; /* One per pubaction */
923 326 : bool no_filter[] = {false, false, false}; /* One per pubaction */
924 : MemoryContext oldctx;
925 : int idx;
926 326 : bool has_filter = true;
927 326 : Oid schemaid = get_rel_namespace(entry->publish_as_relid);
928 :
929 : /*
930 : * Find if there are any row filters for this relation. If there are, then
931 : * prepare the necessary ExprState and cache it in entry->exprstate. To
932 : * build an expression state, we need to ensure the following:
933 : *
934 : * All the given publication-table mappings must be checked.
935 : *
936 : * Multiple publications might have multiple row filters for this
937 : * relation. Since row filter usage depends on the DML operation, there
938 : * are multiple lists (one for each operation) to which row filters will
939 : * be appended.
940 : *
941 : * FOR ALL TABLES and FOR TABLES IN SCHEMA implies "don't use row filter
942 : * expression" so it takes precedence.
943 : */
944 347 : foreach(lc, publications)
945 : {
946 330 : Publication *pub = lfirst(lc);
947 330 : HeapTuple rftuple = NULL;
948 330 : Datum rfdatum = 0;
949 330 : bool pub_no_filter = true;
950 :
951 : /*
952 : * If the publication is FOR ALL TABLES, or the publication includes a
953 : * FOR TABLES IN SCHEMA where the table belongs to the referred
954 : * schema, then it is treated the same as if there are no row filters
955 : * (even if other publications have a row filter).
956 : */
957 330 : if (!pub->alltables &&
958 246 : !SearchSysCacheExists2(PUBLICATIONNAMESPACEMAP,
959 : ObjectIdGetDatum(schemaid),
960 : ObjectIdGetDatum(pub->oid)))
961 : {
962 : /*
963 : * Check for the presence of a row filter in this publication.
964 : */
965 239 : rftuple = SearchSysCache2(PUBLICATIONRELMAP,
966 : ObjectIdGetDatum(entry->publish_as_relid),
967 : ObjectIdGetDatum(pub->oid));
968 :
969 239 : if (HeapTupleIsValid(rftuple))
970 : {
971 : /* Null indicates no filter. */
972 227 : rfdatum = SysCacheGetAttr(PUBLICATIONRELMAP, rftuple,
973 : Anum_pg_publication_rel_prqual,
974 : &pub_no_filter);
975 : }
976 : }
977 :
978 330 : if (pub_no_filter)
979 : {
980 316 : if (rftuple)
981 213 : ReleaseSysCache(rftuple);
982 :
983 316 : no_filter[PUBACTION_INSERT] |= pub->pubactions.pubinsert;
984 316 : no_filter[PUBACTION_UPDATE] |= pub->pubactions.pubupdate;
985 316 : no_filter[PUBACTION_DELETE] |= pub->pubactions.pubdelete;
986 :
987 : /*
988 : * Quick exit if all the DML actions are publicized via this
989 : * publication.
990 : */
991 316 : if (no_filter[PUBACTION_INSERT] &&
992 316 : no_filter[PUBACTION_UPDATE] &&
993 309 : no_filter[PUBACTION_DELETE])
994 : {
995 309 : has_filter = false;
996 309 : break;
997 : }
998 :
999 : /* No additional work for this publication. Next one. */
1000 7 : continue;
1001 : }
1002 :
1003 : /* Form the per pubaction row filter lists. */
1004 14 : if (pub->pubactions.pubinsert && !no_filter[PUBACTION_INSERT])
1005 14 : rfnodes[PUBACTION_INSERT] = lappend(rfnodes[PUBACTION_INSERT],
1006 14 : TextDatumGetCString(rfdatum));
1007 14 : if (pub->pubactions.pubupdate && !no_filter[PUBACTION_UPDATE])
1008 14 : rfnodes[PUBACTION_UPDATE] = lappend(rfnodes[PUBACTION_UPDATE],
1009 14 : TextDatumGetCString(rfdatum));
1010 14 : if (pub->pubactions.pubdelete && !no_filter[PUBACTION_DELETE])
1011 14 : rfnodes[PUBACTION_DELETE] = lappend(rfnodes[PUBACTION_DELETE],
1012 14 : TextDatumGetCString(rfdatum));
1013 :
1014 14 : ReleaseSysCache(rftuple);
1015 : } /* loop all subscribed publications */
1016 :
1017 : /* Clean the row filter */
1018 1304 : for (idx = 0; idx < NUM_ROWFILTER_PUBACTIONS; idx++)
1019 : {
1020 978 : if (no_filter[idx])
1021 : {
1022 936 : list_free_deep(rfnodes[idx]);
1023 936 : rfnodes[idx] = NIL;
1024 : }
1025 : }
1026 :
1027 326 : if (has_filter)
1028 : {
1029 17 : Relation relation = RelationIdGetRelation(entry->publish_as_relid);
1030 :
1031 17 : pgoutput_ensure_entry_cxt(data, entry);
1032 :
1033 : /*
1034 : * Now all the filters for all pubactions are known. Combine them when
1035 : * their pubactions are the same.
1036 : */
1037 17 : oldctx = MemoryContextSwitchTo(entry->entry_cxt);
1038 17 : entry->estate = create_estate_for_relation(relation);
1039 68 : for (idx = 0; idx < NUM_ROWFILTER_PUBACTIONS; idx++)
1040 : {
1041 51 : List *filters = NIL;
1042 : Expr *rfnode;
1043 :
1044 51 : if (rfnodes[idx] == NIL)
1045 21 : continue;
1046 :
1047 63 : foreach(lc, rfnodes[idx])
1048 33 : filters = lappend(filters, expand_generated_columns_in_expr(stringToNode((char *) lfirst(lc)), relation, 1));
1049 :
1050 : /* combine the row filter and cache the ExprState */
1051 30 : rfnode = make_orclause(filters);
1052 30 : entry->exprstate[idx] = ExecPrepareExpr(rfnode, entry->estate);
1053 : } /* for each pubaction */
1054 17 : MemoryContextSwitchTo(oldctx);
1055 :
1056 17 : RelationClose(relation);
1057 : }
1058 326 : }
1059 :
1060 : /*
1061 : * If the table contains a generated column, check for any conflicting
1062 : * values of 'publish_generated_columns' parameter in the publications.
1063 : */
1064 : static void
1065 326 : check_and_init_gencol(PGOutputData *data, List *publications,
1066 : RelationSyncEntry *entry)
1067 : {
1068 326 : Relation relation = RelationIdGetRelation(entry->publish_as_relid);
1069 326 : TupleDesc desc = RelationGetDescr(relation);
1070 326 : bool gencolpresent = false;
1071 326 : bool first = true;
1072 :
1073 : /* Check if there is any generated column present. */
1074 992 : for (int i = 0; i < desc->natts; i++)
1075 : {
1076 673 : CompactAttribute *att = TupleDescCompactAttr(desc, i);
1077 :
1078 673 : if (att->attgenerated)
1079 : {
1080 7 : gencolpresent = true;
1081 7 : break;
1082 : }
1083 : }
1084 :
1085 : /* There are no generated columns to be published. */
1086 326 : if (!gencolpresent)
1087 : {
1088 319 : entry->include_gencols_type = PUBLISH_GENCOLS_NONE;
1089 319 : return;
1090 : }
1091 :
1092 : /*
1093 : * There may be a conflicting value for 'publish_generated_columns'
1094 : * parameter in the publications.
1095 : */
1096 22 : foreach_ptr(Publication, pub, publications)
1097 : {
1098 : /*
1099 : * The column list takes precedence over the
1100 : * 'publish_generated_columns' parameter. Those will be checked later,
1101 : * see pgoutput_column_list_init.
1102 : */
1103 8 : if (check_and_fetch_column_list(pub, entry->publish_as_relid, NULL, NULL))
1104 3 : continue;
1105 :
1106 5 : if (first)
1107 : {
1108 5 : entry->include_gencols_type = pub->pubgencols_type;
1109 5 : first = false;
1110 : }
1111 0 : else if (entry->include_gencols_type != pub->pubgencols_type)
1112 0 : ereport(ERROR,
1113 : errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1114 : errmsg("cannot use different values of publish_generated_columns for table \"%s.%s\" in different publications",
1115 : get_namespace_name(RelationGetNamespace(relation)),
1116 : RelationGetRelationName(relation)));
1117 : }
1118 : }
1119 :
1120 : /*
1121 : * Initialize the column list.
1122 : */
1123 : static void
1124 326 : pgoutput_column_list_init(PGOutputData *data, List *publications,
1125 : RelationSyncEntry *entry)
1126 : {
1127 : ListCell *lc;
1128 326 : bool first = true;
1129 326 : Relation relation = RelationIdGetRelation(entry->publish_as_relid);
1130 326 : bool found_pub_collist = false;
1131 326 : Bitmapset *relcols = NULL;
1132 :
1133 326 : pgoutput_ensure_entry_cxt(data, entry);
1134 :
1135 : /*
1136 : * Find if there are any column lists for this relation. If there are,
1137 : * build a bitmap using the column lists.
1138 : *
1139 : * Multiple publications might have multiple column lists for this
1140 : * relation.
1141 : *
1142 : * Note that we don't support the case where the column list is different
1143 : * for the same table when combining publications. See comments atop
1144 : * fetch_relation_list. But one can later change the publication so we
1145 : * still need to check all the given publication-table mappings and report
1146 : * an error if any publications have a different column list.
1147 : */
1148 662 : foreach(lc, publications)
1149 : {
1150 337 : Publication *pub = lfirst(lc);
1151 337 : Bitmapset *cols = NULL;
1152 :
1153 : /* Retrieve the bitmap of columns for a column list publication. */
1154 337 : found_pub_collist |= check_and_fetch_column_list(pub,
1155 : entry->publish_as_relid,
1156 : entry->entry_cxt, &cols);
1157 :
1158 : /*
1159 : * For non-column list publications — e.g. TABLE (without a column
1160 : * list), ALL TABLES, or ALL TABLES IN SCHEMA, we consider all columns
1161 : * of the table (including generated columns when
1162 : * 'publish_generated_columns' parameter is true).
1163 : */
1164 337 : if (!cols)
1165 : {
1166 : /*
1167 : * Cache the table columns for the first publication with no
1168 : * specified column list to detect publication with a different
1169 : * column list.
1170 : */
1171 298 : if (!relcols && (list_length(publications) > 1))
1172 : {
1173 9 : MemoryContext oldcxt = MemoryContextSwitchTo(entry->entry_cxt);
1174 :
1175 9 : relcols = pub_form_cols_map(relation,
1176 : entry->include_gencols_type);
1177 9 : MemoryContextSwitchTo(oldcxt);
1178 : }
1179 :
1180 298 : cols = relcols;
1181 : }
1182 :
1183 337 : if (first)
1184 : {
1185 326 : entry->columns = cols;
1186 326 : first = false;
1187 : }
1188 11 : else if (!bms_equal(entry->columns, cols))
1189 1 : ereport(ERROR,
1190 : errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1191 : errmsg("cannot use different column lists for table \"%s.%s\" in different publications",
1192 : get_namespace_name(RelationGetNamespace(relation)),
1193 : RelationGetRelationName(relation)));
1194 : } /* loop all subscribed publications */
1195 :
1196 : /*
1197 : * If no column list publications exist, columns to be published will be
1198 : * computed later according to the 'publish_generated_columns' parameter.
1199 : */
1200 325 : if (!found_pub_collist)
1201 289 : entry->columns = NULL;
1202 :
1203 325 : RelationClose(relation);
1204 325 : }
1205 :
1206 : /*
1207 : * Initialize the slot for storing new and old tuples, and build the map that
1208 : * will be used to convert the relation's tuples into the ancestor's format.
1209 : */
1210 : static void
1211 326 : init_tuple_slot(PGOutputData *data, Relation relation,
1212 : RelationSyncEntry *entry)
1213 : {
1214 : MemoryContext oldctx;
1215 : TupleDesc oldtupdesc;
1216 : TupleDesc newtupdesc;
1217 :
1218 326 : oldctx = MemoryContextSwitchTo(data->cachectx);
1219 :
1220 : /*
1221 : * Create tuple table slots. Create a copy of the TupleDesc as it needs to
1222 : * live as long as the cache remains.
1223 : */
1224 326 : oldtupdesc = CreateTupleDescCopyConstr(RelationGetDescr(relation));
1225 326 : newtupdesc = CreateTupleDescCopyConstr(RelationGetDescr(relation));
1226 :
1227 326 : entry->old_slot = MakeSingleTupleTableSlot(oldtupdesc, &TTSOpsHeapTuple);
1228 326 : entry->new_slot = MakeSingleTupleTableSlot(newtupdesc, &TTSOpsHeapTuple);
1229 :
1230 326 : MemoryContextSwitchTo(oldctx);
1231 :
1232 : /*
1233 : * Cache the map that will be used to convert the relation's tuples into
1234 : * the ancestor's format, if needed.
1235 : */
1236 326 : if (entry->publish_as_relid != RelationGetRelid(relation))
1237 : {
1238 39 : Relation ancestor = RelationIdGetRelation(entry->publish_as_relid);
1239 39 : TupleDesc indesc = RelationGetDescr(relation);
1240 39 : TupleDesc outdesc = RelationGetDescr(ancestor);
1241 :
1242 : /* Map must live as long as the logical decoding context. */
1243 39 : oldctx = MemoryContextSwitchTo(data->cachectx);
1244 :
1245 39 : entry->attrmap = build_attrmap_by_name_if_req(indesc, outdesc, false);
1246 :
1247 39 : MemoryContextSwitchTo(oldctx);
1248 39 : RelationClose(ancestor);
1249 : }
1250 326 : }
1251 :
1252 : /*
1253 : * Change is checked against the row filter if any.
1254 : *
1255 : * Returns true if the change is to be replicated, else false.
1256 : *
1257 : * For inserts, evaluate the row filter for new tuple.
1258 : * For deletes, evaluate the row filter for old tuple.
1259 : * For updates, evaluate the row filter for old and new tuple.
1260 : *
1261 : * For updates, if both evaluations are true, we allow sending the UPDATE and
1262 : * if both the evaluations are false, it doesn't replicate the UPDATE. Now, if
1263 : * only one of the tuples matches the row filter expression, we transform
1264 : * UPDATE to DELETE or INSERT to avoid any data inconsistency based on the
1265 : * following rules:
1266 : *
1267 : * Case 1: old-row (no match) new-row (no match) -> (drop change)
1268 : * Case 2: old-row (no match) new row (match) -> INSERT
1269 : * Case 3: old-row (match) new-row (no match) -> DELETE
1270 : * Case 4: old-row (match) new row (match) -> UPDATE
1271 : *
1272 : * The new action is updated in the action parameter.
1273 : *
1274 : * The new slot could be updated when transforming the UPDATE into INSERT,
1275 : * because the original new tuple might not have column values from the replica
1276 : * identity.
1277 : *
1278 : * Examples:
1279 : * Let's say the old tuple satisfies the row filter but the new tuple doesn't.
1280 : * Since the old tuple satisfies, the initial table synchronization copied this
1281 : * row (or another method was used to guarantee that there is data
1282 : * consistency). However, after the UPDATE the new tuple doesn't satisfy the
1283 : * row filter, so from a data consistency perspective, that row should be
1284 : * removed on the subscriber. The UPDATE should be transformed into a DELETE
1285 : * statement and be sent to the subscriber. Keeping this row on the subscriber
1286 : * is undesirable because it doesn't reflect what was defined in the row filter
1287 : * expression on the publisher. This row on the subscriber would likely not be
1288 : * modified by replication again. If someone inserted a new row with the same
1289 : * old identifier, replication could stop due to a constraint violation.
1290 : *
1291 : * Let's say the old tuple doesn't match the row filter but the new tuple does.
1292 : * Since the old tuple doesn't satisfy, the initial table synchronization
1293 : * probably didn't copy this row. However, after the UPDATE the new tuple does
1294 : * satisfy the row filter, so from a data consistency perspective, that row
1295 : * should be inserted on the subscriber. Otherwise, subsequent UPDATE or DELETE
1296 : * statements have no effect (it matches no row -- see
1297 : * apply_handle_update_internal()). So, the UPDATE should be transformed into a
1298 : * INSERT statement and be sent to the subscriber. However, this might surprise
1299 : * someone who expects the data set to satisfy the row filter expression on the
1300 : * provider.
1301 : */
1302 : static bool
1303 182284 : pgoutput_row_filter(Relation relation, TupleTableSlot *old_slot,
1304 : TupleTableSlot **new_slot_ptr, RelationSyncEntry *entry,
1305 : ReorderBufferChangeType *action)
1306 : {
1307 : TupleDesc desc;
1308 : int i;
1309 : bool old_matched,
1310 : new_matched,
1311 : result;
1312 : TupleTableSlot *tmp_new_slot;
1313 182284 : TupleTableSlot *new_slot = *new_slot_ptr;
1314 : ExprContext *ecxt;
1315 : ExprState *filter_exprstate;
1316 :
1317 : /*
1318 : * We need this map to avoid relying on ReorderBufferChangeType enums
1319 : * having specific values.
1320 : */
1321 : static const int map_changetype_pubaction[] = {
1322 : [REORDER_BUFFER_CHANGE_INSERT] = PUBACTION_INSERT,
1323 : [REORDER_BUFFER_CHANGE_UPDATE] = PUBACTION_UPDATE,
1324 : [REORDER_BUFFER_CHANGE_DELETE] = PUBACTION_DELETE
1325 : };
1326 :
1327 : Assert(*action == REORDER_BUFFER_CHANGE_INSERT ||
1328 : *action == REORDER_BUFFER_CHANGE_UPDATE ||
1329 : *action == REORDER_BUFFER_CHANGE_DELETE);
1330 :
1331 : Assert(new_slot || old_slot);
1332 :
1333 : /* Get the corresponding row filter */
1334 182284 : filter_exprstate = entry->exprstate[map_changetype_pubaction[*action]];
1335 :
1336 : /* Bail out if there is no row filter */
1337 182284 : if (!filter_exprstate)
1338 182250 : return true;
1339 :
1340 34 : elog(DEBUG3, "table \"%s.%s\" has row filter",
1341 : get_namespace_name(RelationGetNamespace(relation)),
1342 : RelationGetRelationName(relation));
1343 :
1344 34 : ResetPerTupleExprContext(entry->estate);
1345 :
1346 34 : ecxt = GetPerTupleExprContext(entry->estate);
1347 :
1348 : /*
1349 : * For the following occasions where there is only one tuple, we can
1350 : * evaluate the row filter for that tuple and return.
1351 : *
1352 : * For inserts, we only have the new tuple.
1353 : *
1354 : * For updates, we can have only a new tuple when none of the replica
1355 : * identity columns changed and none of those columns have external data
1356 : * but we still need to evaluate the row filter for the new tuple as the
1357 : * existing values of those columns might not match the filter. Also,
1358 : * users can use constant expressions in the row filter, so we anyway need
1359 : * to evaluate it for the new tuple.
1360 : *
1361 : * For deletes, we only have the old tuple.
1362 : */
1363 34 : if (!new_slot || !old_slot)
1364 : {
1365 30 : ecxt->ecxt_scantuple = new_slot ? new_slot : old_slot;
1366 30 : result = pgoutput_row_filter_exec_expr(filter_exprstate, ecxt);
1367 :
1368 30 : return result;
1369 : }
1370 :
1371 : /*
1372 : * Both the old and new tuples must be valid only for updates and need to
1373 : * be checked against the row filter.
1374 : */
1375 : Assert(map_changetype_pubaction[*action] == PUBACTION_UPDATE);
1376 :
1377 4 : slot_getallattrs(new_slot);
1378 4 : slot_getallattrs(old_slot);
1379 :
1380 4 : tmp_new_slot = NULL;
1381 4 : desc = RelationGetDescr(relation);
1382 :
1383 : /*
1384 : * The new tuple might not have all the replica identity columns, in which
1385 : * case it needs to be copied over from the old tuple.
1386 : */
1387 12 : for (i = 0; i < desc->natts; i++)
1388 : {
1389 8 : CompactAttribute *att = TupleDescCompactAttr(desc, i);
1390 :
1391 : /*
1392 : * if the column in the new tuple or old tuple is null, nothing to do
1393 : */
1394 8 : if (new_slot->tts_isnull[i] || old_slot->tts_isnull[i])
1395 1 : continue;
1396 :
1397 : /*
1398 : * Unchanged toasted replica identity columns are only logged in the
1399 : * old tuple. Copy this over to the new tuple. The changed (or WAL
1400 : * Logged) toast values are always assembled in memory and set as
1401 : * VARTAG_INDIRECT. See ReorderBufferToastReplace.
1402 : */
1403 11 : if (att->attlen == -1 &&
1404 4 : VARATT_IS_EXTERNAL_ONDISK(DatumGetPointer(new_slot->tts_values[i])) &&
1405 1 : !VARATT_IS_EXTERNAL_ONDISK(DatumGetPointer(old_slot->tts_values[i])))
1406 : {
1407 1 : if (!tmp_new_slot)
1408 : {
1409 1 : tmp_new_slot = MakeSingleTupleTableSlot(desc, &TTSOpsVirtual);
1410 1 : ExecClearTuple(tmp_new_slot);
1411 :
1412 1 : memcpy(tmp_new_slot->tts_values, new_slot->tts_values,
1413 1 : desc->natts * sizeof(Datum));
1414 1 : memcpy(tmp_new_slot->tts_isnull, new_slot->tts_isnull,
1415 1 : desc->natts * sizeof(bool));
1416 : }
1417 :
1418 1 : tmp_new_slot->tts_values[i] = old_slot->tts_values[i];
1419 1 : tmp_new_slot->tts_isnull[i] = old_slot->tts_isnull[i];
1420 : }
1421 : }
1422 :
1423 4 : ecxt->ecxt_scantuple = old_slot;
1424 4 : old_matched = pgoutput_row_filter_exec_expr(filter_exprstate, ecxt);
1425 :
1426 4 : if (tmp_new_slot)
1427 : {
1428 1 : ExecStoreVirtualTuple(tmp_new_slot);
1429 1 : ecxt->ecxt_scantuple = tmp_new_slot;
1430 : }
1431 : else
1432 3 : ecxt->ecxt_scantuple = new_slot;
1433 :
1434 4 : new_matched = pgoutput_row_filter_exec_expr(filter_exprstate, ecxt);
1435 :
1436 : /*
1437 : * Case 1: if both tuples don't match the row filter, bailout. Send
1438 : * nothing.
1439 : */
1440 4 : if (!old_matched && !new_matched)
1441 0 : return false;
1442 :
1443 : /*
1444 : * Case 2: if the old tuple doesn't satisfy the row filter but the new
1445 : * tuple does, transform the UPDATE into INSERT.
1446 : *
1447 : * Use the newly transformed tuple that must contain the column values for
1448 : * all the replica identity columns. This is required to ensure that the
1449 : * while inserting the tuple in the downstream node, we have all the
1450 : * required column values.
1451 : */
1452 4 : if (!old_matched && new_matched)
1453 : {
1454 2 : *action = REORDER_BUFFER_CHANGE_INSERT;
1455 :
1456 2 : if (tmp_new_slot)
1457 1 : *new_slot_ptr = tmp_new_slot;
1458 : }
1459 :
1460 : /*
1461 : * Case 3: if the old tuple satisfies the row filter but the new tuple
1462 : * doesn't, transform the UPDATE into DELETE.
1463 : *
1464 : * This transformation does not require another tuple. The Old tuple will
1465 : * be used for DELETE.
1466 : */
1467 2 : else if (old_matched && !new_matched)
1468 1 : *action = REORDER_BUFFER_CHANGE_DELETE;
1469 :
1470 : /*
1471 : * Case 4: if both tuples match the row filter, transformation isn't
1472 : * required. (*action is default UPDATE).
1473 : */
1474 :
1475 4 : return true;
1476 : }
1477 :
1478 : /*
1479 : * Sends the decoded DML over wire.
1480 : *
1481 : * This is called both in streaming and non-streaming modes.
1482 : */
1483 : static void
1484 183490 : pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
1485 : Relation relation, ReorderBufferChange *change)
1486 : {
1487 183490 : PGOutputData *data = (PGOutputData *) ctx->output_plugin_private;
1488 183490 : PGOutputTxnData *txndata = (PGOutputTxnData *) txn->output_plugin_private;
1489 : MemoryContext old;
1490 : RelationSyncEntry *relentry;
1491 183490 : TransactionId xid = InvalidTransactionId;
1492 183490 : Relation ancestor = NULL;
1493 183490 : Relation targetrel = relation;
1494 183490 : ReorderBufferChangeType action = change->action;
1495 183490 : TupleTableSlot *old_slot = NULL;
1496 183490 : TupleTableSlot *new_slot = NULL;
1497 :
1498 183490 : if (!is_publishable_relation(relation))
1499 1205 : return;
1500 :
1501 : /*
1502 : * Remember the xid for the change in streaming mode. We need to send xid
1503 : * with each change in the streaming mode so that subscriber can make
1504 : * their association and on aborts, it can discard the corresponding
1505 : * changes.
1506 : */
1507 183490 : if (data->in_streaming)
1508 175924 : xid = change->txn->xid;
1509 :
1510 183490 : relentry = get_rel_sync_entry(data, relation);
1511 :
1512 : /* First check the table filter */
1513 183489 : switch (action)
1514 : {
1515 106053 : case REORDER_BUFFER_CHANGE_INSERT:
1516 106053 : if (!relentry->pubactions.pubinsert)
1517 97 : return;
1518 105956 : break;
1519 34493 : case REORDER_BUFFER_CHANGE_UPDATE:
1520 34493 : if (!relentry->pubactions.pubupdate)
1521 44 : return;
1522 34449 : break;
1523 42943 : case REORDER_BUFFER_CHANGE_DELETE:
1524 42943 : if (!relentry->pubactions.pubdelete)
1525 1064 : return;
1526 :
1527 : /*
1528 : * This is only possible if deletes are allowed even when replica
1529 : * identity is not defined for a table. Since the DELETE action
1530 : * can't be published, we simply return.
1531 : */
1532 41879 : if (!change->data.tp.oldtuple)
1533 : {
1534 0 : elog(DEBUG1, "didn't send DELETE change because of missing oldtuple");
1535 0 : return;
1536 : }
1537 41879 : break;
1538 182284 : default:
1539 : Assert(false);
1540 : }
1541 :
1542 : /* Avoid leaking memory by using and resetting our own context */
1543 182284 : old = MemoryContextSwitchTo(data->context);
1544 :
1545 : /* Switch relation if publishing via root. */
1546 182284 : if (relentry->publish_as_relid != RelationGetRelid(relation))
1547 : {
1548 : Assert(relation->rd_rel->relispartition);
1549 72 : ancestor = RelationIdGetRelation(relentry->publish_as_relid);
1550 72 : targetrel = ancestor;
1551 : }
1552 :
1553 182284 : if (change->data.tp.oldtuple)
1554 : {
1555 42021 : old_slot = relentry->old_slot;
1556 42021 : ExecStoreHeapTuple(change->data.tp.oldtuple, old_slot, false);
1557 :
1558 : /* Convert tuple if needed. */
1559 42021 : if (relentry->attrmap)
1560 : {
1561 5 : TupleTableSlot *slot = MakeTupleTableSlot(RelationGetDescr(targetrel),
1562 : &TTSOpsVirtual);
1563 :
1564 5 : old_slot = execute_attr_map_slot(relentry->attrmap, old_slot, slot);
1565 : }
1566 : }
1567 :
1568 182284 : if (change->data.tp.newtuple)
1569 : {
1570 140405 : new_slot = relentry->new_slot;
1571 140405 : ExecStoreHeapTuple(change->data.tp.newtuple, new_slot, false);
1572 :
1573 : /* Convert tuple if needed. */
1574 140405 : if (relentry->attrmap)
1575 : {
1576 22 : TupleTableSlot *slot = MakeTupleTableSlot(RelationGetDescr(targetrel),
1577 : &TTSOpsVirtual);
1578 :
1579 22 : new_slot = execute_attr_map_slot(relentry->attrmap, new_slot, slot);
1580 : }
1581 : }
1582 :
1583 : /*
1584 : * Check row filter.
1585 : *
1586 : * Updates could be transformed to inserts or deletes based on the results
1587 : * of the row filter for old and new tuple.
1588 : */
1589 182284 : if (!pgoutput_row_filter(targetrel, old_slot, &new_slot, relentry, &action))
1590 12 : goto cleanup;
1591 :
1592 : /*
1593 : * Send BEGIN if we haven't yet.
1594 : *
1595 : * We send the BEGIN message after ensuring that we will actually send the
1596 : * change. This avoids sending a pair of BEGIN/COMMIT messages for empty
1597 : * transactions.
1598 : */
1599 182272 : if (txndata && !txndata->sent_begin_txn)
1600 454 : pgoutput_send_begin(ctx, txn);
1601 :
1602 : /*
1603 : * Schema should be sent using the original relation because it also sends
1604 : * the ancestor's relation.
1605 : */
1606 182271 : maybe_send_schema(ctx, change, relation, relentry);
1607 :
1608 182271 : OutputPluginPrepareWrite(ctx, true);
1609 :
1610 : /* Send the data */
1611 182271 : switch (action)
1612 : {
1613 105945 : case REORDER_BUFFER_CHANGE_INSERT:
1614 105945 : logicalrep_write_insert(ctx->out, xid, targetrel, new_slot,
1615 105945 : data->binary, relentry->columns,
1616 : relentry->include_gencols_type);
1617 105945 : break;
1618 34446 : case REORDER_BUFFER_CHANGE_UPDATE:
1619 34446 : logicalrep_write_update(ctx->out, xid, targetrel, old_slot,
1620 34446 : new_slot, data->binary, relentry->columns,
1621 : relentry->include_gencols_type);
1622 34446 : break;
1623 41880 : case REORDER_BUFFER_CHANGE_DELETE:
1624 41880 : logicalrep_write_delete(ctx->out, xid, targetrel, old_slot,
1625 41880 : data->binary, relentry->columns,
1626 : relentry->include_gencols_type);
1627 41880 : break;
1628 182271 : default:
1629 : Assert(false);
1630 : }
1631 :
1632 182271 : OutputPluginWrite(ctx, true);
1633 :
1634 182283 : cleanup:
1635 182283 : if (RelationIsValid(ancestor))
1636 : {
1637 71 : RelationClose(ancestor);
1638 71 : ancestor = NULL;
1639 : }
1640 :
1641 : /* Drop the new slots that were used to store the converted tuples. */
1642 182283 : if (relentry->attrmap)
1643 : {
1644 27 : if (old_slot)
1645 5 : ExecDropSingleTupleTableSlot(old_slot);
1646 :
1647 27 : if (new_slot)
1648 22 : ExecDropSingleTupleTableSlot(new_slot);
1649 : }
1650 :
1651 182283 : MemoryContextSwitchTo(old);
1652 182283 : MemoryContextReset(data->context);
1653 : }
1654 :
1655 : static void
1656 23 : pgoutput_truncate(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
1657 : int nrelations, Relation relations[], ReorderBufferChange *change)
1658 : {
1659 23 : PGOutputData *data = (PGOutputData *) ctx->output_plugin_private;
1660 23 : PGOutputTxnData *txndata = (PGOutputTxnData *) txn->output_plugin_private;
1661 : MemoryContext old;
1662 : RelationSyncEntry *relentry;
1663 : int i;
1664 : int nrelids;
1665 : Oid *relids;
1666 23 : TransactionId xid = InvalidTransactionId;
1667 :
1668 : /* Remember the xid for the change in streaming mode. See pgoutput_change. */
1669 23 : if (data->in_streaming)
1670 0 : xid = change->txn->xid;
1671 :
1672 23 : old = MemoryContextSwitchTo(data->context);
1673 :
1674 23 : relids = palloc0(nrelations * sizeof(Oid));
1675 23 : nrelids = 0;
1676 :
1677 65 : for (i = 0; i < nrelations; i++)
1678 : {
1679 42 : Relation relation = relations[i];
1680 42 : Oid relid = RelationGetRelid(relation);
1681 :
1682 42 : if (!is_publishable_relation(relation))
1683 0 : continue;
1684 :
1685 42 : relentry = get_rel_sync_entry(data, relation);
1686 :
1687 42 : if (!relentry->pubactions.pubtruncate)
1688 21 : continue;
1689 :
1690 : /*
1691 : * Don't send partitions if the publication wants to send only the
1692 : * root tables through it.
1693 : */
1694 21 : if (relation->rd_rel->relispartition &&
1695 15 : relentry->publish_as_relid != relid)
1696 3 : continue;
1697 :
1698 18 : relids[nrelids++] = relid;
1699 :
1700 : /* Send BEGIN if we haven't yet */
1701 18 : if (txndata && !txndata->sent_begin_txn)
1702 12 : pgoutput_send_begin(ctx, txn);
1703 :
1704 18 : maybe_send_schema(ctx, change, relation, relentry);
1705 : }
1706 :
1707 23 : if (nrelids > 0)
1708 : {
1709 12 : OutputPluginPrepareWrite(ctx, true);
1710 12 : logicalrep_write_truncate(ctx->out,
1711 : xid,
1712 : nrelids,
1713 : relids,
1714 12 : change->data.truncate.cascade,
1715 12 : change->data.truncate.restart_seqs);
1716 12 : OutputPluginWrite(ctx, true);
1717 : }
1718 :
1719 23 : MemoryContextSwitchTo(old);
1720 23 : MemoryContextReset(data->context);
1721 23 : }
1722 :
1723 : static void
1724 7 : pgoutput_message(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
1725 : XLogRecPtr message_lsn, bool transactional, const char *prefix, Size sz,
1726 : const char *message)
1727 : {
1728 7 : PGOutputData *data = (PGOutputData *) ctx->output_plugin_private;
1729 7 : TransactionId xid = InvalidTransactionId;
1730 :
1731 7 : if (!data->messages)
1732 2 : return;
1733 :
1734 : /*
1735 : * Remember the xid for the message in streaming mode. See
1736 : * pgoutput_change.
1737 : */
1738 5 : if (data->in_streaming)
1739 0 : xid = txn->xid;
1740 :
1741 : /*
1742 : * Output BEGIN if we haven't yet. Avoid for non-transactional messages.
1743 : */
1744 5 : if (transactional)
1745 : {
1746 2 : PGOutputTxnData *txndata = (PGOutputTxnData *) txn->output_plugin_private;
1747 :
1748 : /* Send BEGIN if we haven't yet */
1749 2 : if (txndata && !txndata->sent_begin_txn)
1750 2 : pgoutput_send_begin(ctx, txn);
1751 : }
1752 :
1753 5 : OutputPluginPrepareWrite(ctx, true);
1754 5 : logicalrep_write_message(ctx->out,
1755 : xid,
1756 : message_lsn,
1757 : transactional,
1758 : prefix,
1759 : sz,
1760 : message);
1761 5 : OutputPluginWrite(ctx, true);
1762 : }
1763 :
1764 : /*
1765 : * Return true if the data is associated with an origin and the user has
1766 : * requested the changes that don't have an origin, false otherwise.
1767 : */
1768 : static bool
1769 401351 : pgoutput_origin_filter(LogicalDecodingContext *ctx,
1770 : ReplOriginId origin_id)
1771 : {
1772 401351 : PGOutputData *data = (PGOutputData *) ctx->output_plugin_private;
1773 :
1774 401351 : if (data->publish_no_origin && origin_id != InvalidReplOriginId)
1775 181 : return true;
1776 :
1777 401170 : return false;
1778 : }
1779 :
1780 : /*
1781 : * Shutdown the output plugin.
1782 : *
1783 : * Note, we don't need to clean the data->context, data->cachectx, and
1784 : * data->pubctx as they are child contexts of the ctx->context so they
1785 : * will be cleaned up by logical decoding machinery.
1786 : */
1787 : static void
1788 537 : pgoutput_shutdown(LogicalDecodingContext *ctx)
1789 : {
1790 537 : pgoutput_memory_context_reset(NULL);
1791 537 : }
1792 :
1793 : /*
1794 : * Load publications from the list of publication names.
1795 : *
1796 : * Here, we skip the publications that don't exist yet. This will allow us
1797 : * to silently continue the replication in the absence of a missing publication.
1798 : * This is required because we allow the users to create publications after they
1799 : * have specified the required publications at the time of replication start.
1800 : */
1801 : static List *
1802 222 : LoadPublications(List *pubnames)
1803 : {
1804 222 : List *result = NIL;
1805 : ListCell *lc;
1806 :
1807 496 : foreach(lc, pubnames)
1808 : {
1809 274 : char *pubname = (char *) lfirst(lc);
1810 274 : Publication *pub = GetPublicationByName(pubname, true);
1811 :
1812 274 : if (pub)
1813 272 : result = lappend(result, pub);
1814 : else
1815 2 : ereport(WARNING,
1816 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1817 : errmsg("skipped loading publication \"%s\"", pubname),
1818 : errdetail("The publication does not exist at this point in the WAL."),
1819 : errhint("Create the publication if it does not exist."));
1820 : }
1821 :
1822 222 : return result;
1823 : }
1824 :
1825 : /*
1826 : * Publication syscache invalidation callback.
1827 : *
1828 : * Called for invalidations on pg_publication.
1829 : */
1830 : static void
1831 350 : publication_invalidation_cb(Datum arg, SysCacheIdentifier cacheid,
1832 : uint32 hashvalue)
1833 : {
1834 350 : publications_valid = false;
1835 350 : }
1836 :
1837 : /*
1838 : * START STREAM callback
1839 : */
1840 : static void
1841 621 : pgoutput_stream_start(struct LogicalDecodingContext *ctx,
1842 : ReorderBufferTXN *txn)
1843 : {
1844 621 : PGOutputData *data = (PGOutputData *) ctx->output_plugin_private;
1845 621 : bool send_replication_origin = txn->origin_id != InvalidReplOriginId;
1846 :
1847 : /* we can't nest streaming of transactions */
1848 : Assert(!data->in_streaming);
1849 :
1850 : /*
1851 : * If we already sent the first stream for this transaction then don't
1852 : * send the origin id in the subsequent streams.
1853 : */
1854 621 : if (rbtxn_is_streamed(txn))
1855 559 : send_replication_origin = false;
1856 :
1857 621 : OutputPluginPrepareWrite(ctx, !send_replication_origin);
1858 621 : logicalrep_write_stream_start(ctx->out, txn->xid, !rbtxn_is_streamed(txn));
1859 :
1860 621 : send_repl_origin(ctx, txn->origin_id, InvalidXLogRecPtr,
1861 : send_replication_origin);
1862 :
1863 621 : OutputPluginWrite(ctx, true);
1864 :
1865 : /* we're streaming a chunk of transaction now */
1866 621 : data->in_streaming = true;
1867 621 : }
1868 :
1869 : /*
1870 : * STOP STREAM callback
1871 : */
1872 : static void
1873 621 : pgoutput_stream_stop(struct LogicalDecodingContext *ctx,
1874 : ReorderBufferTXN *txn)
1875 : {
1876 621 : PGOutputData *data = (PGOutputData *) ctx->output_plugin_private;
1877 :
1878 : /* we should be streaming a transaction */
1879 : Assert(data->in_streaming);
1880 :
1881 621 : OutputPluginPrepareWrite(ctx, true);
1882 621 : logicalrep_write_stream_stop(ctx->out);
1883 621 : OutputPluginWrite(ctx, true);
1884 :
1885 : /* we've stopped streaming a transaction */
1886 621 : data->in_streaming = false;
1887 621 : }
1888 :
1889 : /*
1890 : * Notify downstream to discard the streamed transaction (along with all
1891 : * its subtransactions, if it's a toplevel transaction).
1892 : */
1893 : static void
1894 26 : pgoutput_stream_abort(struct LogicalDecodingContext *ctx,
1895 : ReorderBufferTXN *txn,
1896 : XLogRecPtr abort_lsn)
1897 : {
1898 : ReorderBufferTXN *toptxn;
1899 26 : PGOutputData *data = (PGOutputData *) ctx->output_plugin_private;
1900 26 : bool write_abort_info = (data->streaming == LOGICALREP_STREAM_PARALLEL);
1901 :
1902 : /*
1903 : * The abort should happen outside streaming block, even for streamed
1904 : * transactions. The transaction has to be marked as streamed, though.
1905 : */
1906 : Assert(!data->in_streaming);
1907 :
1908 : /* determine the toplevel transaction */
1909 26 : toptxn = rbtxn_get_toptxn(txn);
1910 :
1911 : Assert(rbtxn_is_streamed(toptxn));
1912 :
1913 26 : OutputPluginPrepareWrite(ctx, true);
1914 26 : logicalrep_write_stream_abort(ctx->out, toptxn->xid, txn->xid, abort_lsn,
1915 : txn->abort_time, write_abort_info);
1916 :
1917 26 : OutputPluginWrite(ctx, true);
1918 :
1919 26 : cleanup_rel_sync_cache(toptxn->xid, false);
1920 26 : }
1921 :
1922 : /*
1923 : * Notify downstream to apply the streamed transaction (along with all
1924 : * its subtransactions).
1925 : */
1926 : static void
1927 45 : pgoutput_stream_commit(struct LogicalDecodingContext *ctx,
1928 : ReorderBufferTXN *txn,
1929 : XLogRecPtr commit_lsn)
1930 : {
1931 45 : PGOutputData *data PG_USED_FOR_ASSERTS_ONLY = (PGOutputData *) ctx->output_plugin_private;
1932 :
1933 : /*
1934 : * The commit should happen outside streaming block, even for streamed
1935 : * transactions. The transaction has to be marked as streamed, though.
1936 : */
1937 : Assert(!data->in_streaming);
1938 : Assert(rbtxn_is_streamed(txn));
1939 :
1940 45 : OutputPluginUpdateProgress(ctx, false);
1941 :
1942 45 : OutputPluginPrepareWrite(ctx, true);
1943 45 : logicalrep_write_stream_commit(ctx->out, txn, commit_lsn);
1944 45 : OutputPluginWrite(ctx, true);
1945 :
1946 45 : cleanup_rel_sync_cache(txn->xid, true);
1947 45 : }
1948 :
1949 : /*
1950 : * PREPARE callback (for streaming two-phase commit).
1951 : *
1952 : * Notify the downstream to prepare the transaction.
1953 : */
1954 : static void
1955 14 : pgoutput_stream_prepare_txn(LogicalDecodingContext *ctx,
1956 : ReorderBufferTXN *txn,
1957 : XLogRecPtr prepare_lsn)
1958 : {
1959 : Assert(rbtxn_is_streamed(txn));
1960 :
1961 14 : OutputPluginUpdateProgress(ctx, false);
1962 14 : OutputPluginPrepareWrite(ctx, true);
1963 14 : logicalrep_write_stream_prepare(ctx->out, txn, prepare_lsn);
1964 14 : OutputPluginWrite(ctx, true);
1965 14 : }
1966 :
1967 : /*
1968 : * Initialize the relation schema sync cache for a decoding session.
1969 : *
1970 : * The hash table is destroyed at the end of a decoding session. While
1971 : * relcache invalidations still exist and will still be invoked, they
1972 : * will just see the null hash table global and take no action.
1973 : */
1974 : static void
1975 424 : init_rel_sync_cache(MemoryContext cachectx)
1976 : {
1977 : HASHCTL ctl;
1978 : static bool relation_callbacks_registered = false;
1979 :
1980 : /* Nothing to do if hash table already exists */
1981 424 : if (RelationSyncCache != NULL)
1982 2 : return;
1983 :
1984 : /* Make a new hash table for the cache */
1985 424 : ctl.keysize = sizeof(Oid);
1986 424 : ctl.entrysize = sizeof(RelationSyncEntry);
1987 424 : ctl.hcxt = cachectx;
1988 :
1989 424 : RelationSyncCache = hash_create("logical replication output relation cache",
1990 : 128, &ctl,
1991 : HASH_ELEM | HASH_CONTEXT | HASH_BLOBS);
1992 :
1993 : Assert(RelationSyncCache != NULL);
1994 :
1995 : /* No more to do if we already registered callbacks */
1996 424 : if (relation_callbacks_registered)
1997 2 : return;
1998 :
1999 : /* We must update the cache entry for a relation after a relcache flush */
2000 422 : CacheRegisterRelcacheCallback(rel_sync_cache_relation_cb, (Datum) 0);
2001 :
2002 : /*
2003 : * Flush all cache entries after a pg_namespace change, in case it was a
2004 : * schema rename affecting a relation being replicated.
2005 : *
2006 : * XXX: It is not a good idea to invalidate all the relation entries in
2007 : * RelationSyncCache on schema rename. We can optimize it to invalidate
2008 : * only the required relations by either having a specific invalidation
2009 : * message containing impacted relations or by having schema information
2010 : * in each RelationSyncCache entry and using hashvalue of pg_namespace.oid
2011 : * passed to the callback.
2012 : */
2013 422 : CacheRegisterSyscacheCallback(NAMESPACEOID,
2014 : rel_sync_cache_publication_cb,
2015 : (Datum) 0);
2016 :
2017 422 : relation_callbacks_registered = true;
2018 : }
2019 :
2020 : /*
2021 : * We expect relatively small number of streamed transactions.
2022 : */
2023 : static bool
2024 175924 : get_schema_sent_in_streamed_txn(RelationSyncEntry *entry, TransactionId xid)
2025 : {
2026 175924 : return list_member_xid(entry->streamed_txns, xid);
2027 : }
2028 :
2029 : /*
2030 : * Add the xid in the rel sync entry for which we have already sent the schema
2031 : * of the relation.
2032 : */
2033 : static void
2034 71 : set_schema_sent_in_streamed_txn(RelationSyncEntry *entry, TransactionId xid)
2035 : {
2036 : MemoryContext oldctx;
2037 :
2038 71 : oldctx = MemoryContextSwitchTo(CacheMemoryContext);
2039 :
2040 71 : entry->streamed_txns = lappend_xid(entry->streamed_txns, xid);
2041 :
2042 71 : MemoryContextSwitchTo(oldctx);
2043 71 : }
2044 :
2045 : /*
2046 : * Find or create entry in the relation schema cache.
2047 : *
2048 : * This looks up publications that the given relation is directly or
2049 : * indirectly part of (the latter if it's really the relation's ancestor that
2050 : * is part of a publication) and fills up the found entry with the information
2051 : * about which operations to publish and whether to use an ancestor's schema
2052 : * when publishing.
2053 : */
2054 : static RelationSyncEntry *
2055 183532 : get_rel_sync_entry(PGOutputData *data, Relation relation)
2056 : {
2057 : RelationSyncEntry *entry;
2058 : bool found;
2059 : MemoryContext oldctx;
2060 183532 : Oid relid = RelationGetRelid(relation);
2061 :
2062 : Assert(RelationSyncCache != NULL);
2063 :
2064 : /* Find cached relation info, creating if not found */
2065 183532 : entry = (RelationSyncEntry *) hash_search(RelationSyncCache,
2066 : &relid,
2067 : HASH_ENTER, &found);
2068 : Assert(entry != NULL);
2069 :
2070 : /* initialize entry, if it's new */
2071 183532 : if (!found)
2072 : {
2073 322 : entry->replicate_valid = false;
2074 322 : entry->schema_sent = false;
2075 322 : entry->include_gencols_type = PUBLISH_GENCOLS_NONE;
2076 322 : entry->streamed_txns = NIL;
2077 322 : entry->pubactions.pubinsert = entry->pubactions.pubupdate =
2078 322 : entry->pubactions.pubdelete = entry->pubactions.pubtruncate = false;
2079 322 : entry->new_slot = NULL;
2080 322 : entry->old_slot = NULL;
2081 322 : memset(entry->exprstate, 0, sizeof(entry->exprstate));
2082 322 : entry->entry_cxt = NULL;
2083 322 : entry->publish_as_relid = InvalidOid;
2084 322 : entry->columns = NULL;
2085 322 : entry->attrmap = NULL;
2086 : }
2087 :
2088 : /* Validate the entry */
2089 183532 : if (!entry->replicate_valid)
2090 : {
2091 419 : Oid schemaId = get_rel_namespace(relid);
2092 419 : List *pubids = GetRelationPublications(relid);
2093 :
2094 : /*
2095 : * We don't acquire a lock on the namespace system table as we build
2096 : * the cache entry using a historic snapshot and all the later changes
2097 : * are absorbed while decoding WAL.
2098 : */
2099 419 : List *schemaPubids = GetSchemaPublications(schemaId);
2100 : ListCell *lc;
2101 419 : Oid publish_as_relid = relid;
2102 419 : int publish_ancestor_level = 0;
2103 419 : bool am_partition = get_rel_relispartition(relid);
2104 419 : char relkind = get_rel_relkind(relid);
2105 419 : List *rel_publications = NIL;
2106 :
2107 : /* Reload publications if needed before use. */
2108 419 : if (!publications_valid)
2109 : {
2110 222 : MemoryContextReset(data->pubctx);
2111 :
2112 222 : oldctx = MemoryContextSwitchTo(data->pubctx);
2113 222 : data->publications = LoadPublications(data->publication_names);
2114 222 : MemoryContextSwitchTo(oldctx);
2115 222 : publications_valid = true;
2116 : }
2117 :
2118 : /*
2119 : * Reset schema_sent status as the relation definition may have
2120 : * changed. Also reset pubactions to empty in case rel was dropped
2121 : * from a publication. Also free any objects that depended on the
2122 : * earlier definition.
2123 : */
2124 419 : entry->schema_sent = false;
2125 419 : entry->include_gencols_type = PUBLISH_GENCOLS_NONE;
2126 419 : list_free(entry->streamed_txns);
2127 419 : entry->streamed_txns = NIL;
2128 419 : bms_free(entry->columns);
2129 419 : entry->columns = NULL;
2130 419 : entry->pubactions.pubinsert = false;
2131 419 : entry->pubactions.pubupdate = false;
2132 419 : entry->pubactions.pubdelete = false;
2133 419 : entry->pubactions.pubtruncate = false;
2134 :
2135 : /*
2136 : * Tuple slots cleanups. (Will be rebuilt later if needed).
2137 : */
2138 419 : if (entry->old_slot)
2139 : {
2140 62 : TupleDesc desc = entry->old_slot->tts_tupleDescriptor;
2141 :
2142 : Assert(desc->tdrefcount == -1);
2143 :
2144 62 : ExecDropSingleTupleTableSlot(entry->old_slot);
2145 :
2146 : /*
2147 : * ExecDropSingleTupleTableSlot() would not free the TupleDesc, so
2148 : * do it now to avoid any leaks.
2149 : */
2150 62 : FreeTupleDesc(desc);
2151 : }
2152 419 : if (entry->new_slot)
2153 : {
2154 62 : TupleDesc desc = entry->new_slot->tts_tupleDescriptor;
2155 :
2156 : Assert(desc->tdrefcount == -1);
2157 :
2158 62 : ExecDropSingleTupleTableSlot(entry->new_slot);
2159 :
2160 : /*
2161 : * ExecDropSingleTupleTableSlot() would not free the TupleDesc, so
2162 : * do it now to avoid any leaks.
2163 : */
2164 62 : FreeTupleDesc(desc);
2165 : }
2166 :
2167 419 : entry->old_slot = NULL;
2168 419 : entry->new_slot = NULL;
2169 :
2170 419 : if (entry->attrmap)
2171 3 : free_attrmap(entry->attrmap);
2172 419 : entry->attrmap = NULL;
2173 :
2174 : /*
2175 : * Row filter cache cleanups.
2176 : */
2177 419 : if (entry->entry_cxt)
2178 62 : MemoryContextDelete(entry->entry_cxt);
2179 :
2180 419 : entry->entry_cxt = NULL;
2181 419 : entry->estate = NULL;
2182 419 : memset(entry->exprstate, 0, sizeof(entry->exprstate));
2183 :
2184 : /*
2185 : * Build publication cache. We can't use one provided by relcache as
2186 : * relcache considers all publications that the given relation is in,
2187 : * but here we only need to consider ones that the subscriber
2188 : * requested.
2189 : */
2190 1011 : foreach(lc, data->publications)
2191 : {
2192 592 : Publication *pub = lfirst(lc);
2193 592 : bool publish = false;
2194 :
2195 : /*
2196 : * Under what relid should we publish changes in this publication?
2197 : * We'll use the top-most relid across all publications. Also
2198 : * track the ancestor level for this publication.
2199 : */
2200 592 : Oid pub_relid = relid;
2201 592 : int ancestor_level = 0;
2202 :
2203 : /*
2204 : * If this is a FOR ALL TABLES publication, pick the partition
2205 : * root and set the ancestor level accordingly.
2206 : */
2207 592 : if (pub->alltables)
2208 : {
2209 86 : publish = true;
2210 86 : if (pub->pubviaroot && am_partition)
2211 : {
2212 15 : List *ancestors = get_partition_ancestors(relid);
2213 :
2214 15 : pub_relid = llast_oid(ancestors);
2215 15 : ancestor_level = list_length(ancestors);
2216 : }
2217 : }
2218 :
2219 592 : if (!publish)
2220 : {
2221 506 : bool ancestor_published = false;
2222 :
2223 : /*
2224 : * For a partition, check if any of the ancestors are
2225 : * published. If so, note down the topmost ancestor that is
2226 : * published via this publication, which will be used as the
2227 : * relation via which to publish the partition's changes.
2228 : */
2229 506 : if (am_partition)
2230 : {
2231 : Oid ancestor;
2232 : int level;
2233 121 : List *ancestors = get_partition_ancestors(relid);
2234 :
2235 121 : ancestor = GetTopMostAncestorInPublication(pub->oid,
2236 : ancestors,
2237 : &level);
2238 :
2239 121 : if (ancestor != InvalidOid)
2240 : {
2241 48 : ancestor_published = true;
2242 48 : if (pub->pubviaroot)
2243 : {
2244 25 : pub_relid = ancestor;
2245 25 : ancestor_level = level;
2246 : }
2247 : }
2248 : }
2249 :
2250 790 : if (list_member_oid(pubids, pub->oid) ||
2251 561 : list_member_oid(schemaPubids, pub->oid) ||
2252 : ancestor_published)
2253 257 : publish = true;
2254 : }
2255 :
2256 : /*
2257 : * If the relation is to be published, determine actions to
2258 : * publish, and list of columns, if appropriate.
2259 : *
2260 : * Don't publish changes for partitioned tables, because
2261 : * publishing those of its partitions suffices, unless partition
2262 : * changes won't be published due to pubviaroot being set.
2263 : */
2264 592 : if (publish &&
2265 4 : (relkind != RELKIND_PARTITIONED_TABLE || pub->pubviaroot))
2266 : {
2267 340 : entry->pubactions.pubinsert |= pub->pubactions.pubinsert;
2268 340 : entry->pubactions.pubupdate |= pub->pubactions.pubupdate;
2269 340 : entry->pubactions.pubdelete |= pub->pubactions.pubdelete;
2270 340 : entry->pubactions.pubtruncate |= pub->pubactions.pubtruncate;
2271 :
2272 : /*
2273 : * We want to publish the changes as the top-most ancestor
2274 : * across all publications. So we need to check if the already
2275 : * calculated level is higher than the new one. If yes, we can
2276 : * ignore the new value (as it's a child). Otherwise the new
2277 : * value is an ancestor, so we keep it.
2278 : */
2279 340 : if (publish_ancestor_level > ancestor_level)
2280 1 : continue;
2281 :
2282 : /*
2283 : * If we found an ancestor higher up in the tree, discard the
2284 : * list of publications through which we replicate it, and use
2285 : * the new ancestor.
2286 : */
2287 339 : if (publish_ancestor_level < ancestor_level)
2288 : {
2289 40 : publish_as_relid = pub_relid;
2290 40 : publish_ancestor_level = ancestor_level;
2291 :
2292 : /* reset the publication list for this relation */
2293 40 : rel_publications = NIL;
2294 : }
2295 : else
2296 : {
2297 : /* Same ancestor level, has to be the same OID. */
2298 : Assert(publish_as_relid == pub_relid);
2299 : }
2300 :
2301 : /* Track publications for this ancestor. */
2302 339 : rel_publications = lappend(rel_publications, pub);
2303 : }
2304 : }
2305 :
2306 419 : entry->publish_as_relid = publish_as_relid;
2307 :
2308 : /*
2309 : * Initialize the tuple slot, map, and row filter. These are only used
2310 : * when publishing inserts, updates, or deletes.
2311 : */
2312 419 : if (entry->pubactions.pubinsert || entry->pubactions.pubupdate ||
2313 93 : entry->pubactions.pubdelete)
2314 : {
2315 : /* Initialize the tuple slot and map */
2316 326 : init_tuple_slot(data, relation, entry);
2317 :
2318 : /* Initialize the row filter */
2319 326 : pgoutput_row_filter_init(data, rel_publications, entry);
2320 :
2321 : /* Check whether to publish generated columns. */
2322 326 : check_and_init_gencol(data, rel_publications, entry);
2323 :
2324 : /* Initialize the column list */
2325 326 : pgoutput_column_list_init(data, rel_publications, entry);
2326 : }
2327 :
2328 418 : list_free(pubids);
2329 418 : list_free(schemaPubids);
2330 418 : list_free(rel_publications);
2331 :
2332 418 : entry->replicate_valid = true;
2333 : }
2334 :
2335 183531 : return entry;
2336 : }
2337 :
2338 : /*
2339 : * Cleanup list of streamed transactions and update the schema_sent flag.
2340 : *
2341 : * When a streamed transaction commits or aborts, we need to remove the
2342 : * toplevel XID from the schema cache. If the transaction aborted, the
2343 : * subscriber will simply throw away the schema records we streamed, so
2344 : * we don't need to do anything else.
2345 : *
2346 : * If the transaction is committed, the subscriber will update the relation
2347 : * cache - so tweak the schema_sent flag accordingly.
2348 : */
2349 : static void
2350 71 : cleanup_rel_sync_cache(TransactionId xid, bool is_commit)
2351 : {
2352 : HASH_SEQ_STATUS hash_seq;
2353 : RelationSyncEntry *entry;
2354 :
2355 : Assert(RelationSyncCache != NULL);
2356 :
2357 71 : hash_seq_init(&hash_seq, RelationSyncCache);
2358 145 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
2359 : {
2360 : /*
2361 : * We can set the schema_sent flag for an entry that has committed xid
2362 : * in the list as that ensures that the subscriber would have the
2363 : * corresponding schema and we don't need to send it unless there is
2364 : * any invalidation for that relation.
2365 : */
2366 167 : foreach_xid(streamed_txn, entry->streamed_txns)
2367 : {
2368 72 : if (xid == streamed_txn)
2369 : {
2370 53 : if (is_commit)
2371 42 : entry->schema_sent = true;
2372 :
2373 53 : entry->streamed_txns =
2374 53 : foreach_delete_current(entry->streamed_txns, streamed_txn);
2375 53 : break;
2376 : }
2377 : }
2378 : }
2379 71 : }
2380 :
2381 : /*
2382 : * Relcache invalidation callback
2383 : */
2384 : static void
2385 4103 : rel_sync_cache_relation_cb(Datum arg, Oid relid)
2386 : {
2387 : RelationSyncEntry *entry;
2388 :
2389 : /*
2390 : * We can get here if the plugin was used in SQL interface as the
2391 : * RelationSyncCache is destroyed when the decoding finishes, but there is
2392 : * no way to unregister the relcache invalidation callback.
2393 : */
2394 4103 : if (RelationSyncCache == NULL)
2395 26 : return;
2396 :
2397 : /*
2398 : * Nobody keeps pointers to entries in this hash table around outside
2399 : * logical decoding callback calls - but invalidation events can come in
2400 : * *during* a callback if we do any syscache access in the callback.
2401 : * Because of that we must mark the cache entry as invalid but not damage
2402 : * any of its substructure here. The next get_rel_sync_entry() call will
2403 : * rebuild it all.
2404 : */
2405 4077 : if (OidIsValid(relid))
2406 : {
2407 : /*
2408 : * Getting invalidations for relations that aren't in the table is
2409 : * entirely normal. So we don't care if it's found or not.
2410 : */
2411 4016 : entry = (RelationSyncEntry *) hash_search(RelationSyncCache, &relid,
2412 : HASH_FIND, NULL);
2413 4016 : if (entry != NULL)
2414 692 : entry->replicate_valid = false;
2415 : }
2416 : else
2417 : {
2418 : /* Whole cache must be flushed. */
2419 : HASH_SEQ_STATUS status;
2420 :
2421 61 : hash_seq_init(&status, RelationSyncCache);
2422 124 : while ((entry = (RelationSyncEntry *) hash_seq_search(&status)) != NULL)
2423 : {
2424 63 : entry->replicate_valid = false;
2425 : }
2426 : }
2427 : }
2428 :
2429 : /*
2430 : * Publication relation/schema map syscache invalidation callback
2431 : *
2432 : * Called for invalidations on pg_namespace.
2433 : */
2434 : static void
2435 35 : rel_sync_cache_publication_cb(Datum arg, SysCacheIdentifier cacheid,
2436 : uint32 hashvalue)
2437 : {
2438 : HASH_SEQ_STATUS status;
2439 : RelationSyncEntry *entry;
2440 :
2441 : /*
2442 : * We can get here if the plugin was used in SQL interface as the
2443 : * RelationSyncCache is destroyed when the decoding finishes, but there is
2444 : * no way to unregister the invalidation callbacks.
2445 : */
2446 35 : if (RelationSyncCache == NULL)
2447 10 : return;
2448 :
2449 : /*
2450 : * We have no easy way to identify which cache entries this invalidation
2451 : * event might have affected, so just mark them all invalid.
2452 : */
2453 25 : hash_seq_init(&status, RelationSyncCache);
2454 46 : while ((entry = (RelationSyncEntry *) hash_seq_search(&status)) != NULL)
2455 : {
2456 21 : entry->replicate_valid = false;
2457 : }
2458 : }
2459 :
2460 : /* Send Replication origin */
2461 : static void
2462 1111 : send_repl_origin(LogicalDecodingContext *ctx, ReplOriginId origin_id,
2463 : XLogRecPtr origin_lsn, bool send_origin)
2464 : {
2465 1111 : if (send_origin)
2466 : {
2467 : char *origin;
2468 :
2469 : /*----------
2470 : * XXX: which behaviour do we want here?
2471 : *
2472 : * Alternatives:
2473 : * - don't send origin message if origin name not found
2474 : * (that's what we do now)
2475 : * - throw error - that will break replication, not good
2476 : * - send some special "unknown" origin
2477 : *----------
2478 : */
2479 8 : if (replorigin_by_oid(origin_id, true, &origin))
2480 : {
2481 : /* Message boundary */
2482 8 : OutputPluginWrite(ctx, false);
2483 8 : OutputPluginPrepareWrite(ctx, true);
2484 :
2485 8 : logicalrep_write_origin(ctx->out, origin, origin_lsn);
2486 : }
2487 : }
2488 1111 : }
|