Line data Source code
1 : /*-------------------------------------------------------------------------
2 : * relation.c
3 : * PostgreSQL logical replication relation mapping cache
4 : *
5 : * Copyright (c) 2016-2025, PostgreSQL Global Development Group
6 : *
7 : * IDENTIFICATION
8 : * src/backend/replication/logical/relation.c
9 : *
10 : * NOTES
11 : * Routines in this file mainly have to do with mapping the properties
12 : * of local replication target relations to the properties of their
13 : * remote counterpart.
14 : *
15 : *-------------------------------------------------------------------------
16 : */
17 :
18 : #include "postgres.h"
19 :
20 : #include "access/amapi.h"
21 : #include "access/genam.h"
22 : #include "access/table.h"
23 : #include "catalog/namespace.h"
24 : #include "catalog/pg_subscription_rel.h"
25 : #include "executor/executor.h"
26 : #include "nodes/makefuncs.h"
27 : #include "replication/logicalrelation.h"
28 : #include "replication/worker_internal.h"
29 : #include "utils/inval.h"
30 : #include "utils/lsyscache.h"
31 : #include "utils/syscache.h"
32 :
33 :
34 : static MemoryContext LogicalRepRelMapContext = NULL;
35 :
36 : static HTAB *LogicalRepRelMap = NULL;
37 :
38 : /*
39 : * Partition map (LogicalRepPartMap)
40 : *
41 : * When a partitioned table is used as replication target, replicated
42 : * operations are actually performed on its leaf partitions, which requires
43 : * the partitions to also be mapped to the remote relation. Parent's entry
44 : * (LogicalRepRelMapEntry) cannot be used as-is for all partitions, because
45 : * individual partitions may have different attribute numbers, which means
46 : * attribute mappings to remote relation's attributes must be maintained
47 : * separately for each partition.
48 : */
49 : static MemoryContext LogicalRepPartMapContext = NULL;
50 : static HTAB *LogicalRepPartMap = NULL;
51 : typedef struct LogicalRepPartMapEntry
52 : {
53 : Oid partoid; /* LogicalRepPartMap's key */
54 : LogicalRepRelMapEntry relmapentry;
55 : } LogicalRepPartMapEntry;
56 :
57 : static Oid FindLogicalRepLocalIndex(Relation localrel, LogicalRepRelation *remoterel,
58 : AttrMap *attrMap);
59 :
60 : /*
61 : * Relcache invalidation callback for our relation map cache.
62 : */
63 : static void
64 1362 : logicalrep_relmap_invalidate_cb(Datum arg, Oid reloid)
65 : {
66 : LogicalRepRelMapEntry *entry;
67 :
68 : /* Just to be sure. */
69 1362 : if (LogicalRepRelMap == NULL)
70 0 : return;
71 :
72 1362 : if (reloid != InvalidOid)
73 : {
74 : HASH_SEQ_STATUS status;
75 :
76 1362 : hash_seq_init(&status, LogicalRepRelMap);
77 :
78 : /* TODO, use inverse lookup hashtable? */
79 6042 : while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
80 : {
81 4924 : if (entry->localreloid == reloid)
82 : {
83 244 : entry->localrelvalid = false;
84 244 : hash_seq_term(&status);
85 244 : break;
86 : }
87 : }
88 : }
89 : else
90 : {
91 : /* invalidate all cache entries */
92 : HASH_SEQ_STATUS status;
93 :
94 0 : hash_seq_init(&status, LogicalRepRelMap);
95 :
96 0 : while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
97 0 : entry->localrelvalid = false;
98 : }
99 : }
100 :
101 : /*
102 : * Initialize the relation map cache.
103 : */
104 : static void
105 672 : logicalrep_relmap_init(void)
106 : {
107 : HASHCTL ctl;
108 :
109 672 : if (!LogicalRepRelMapContext)
110 672 : LogicalRepRelMapContext =
111 672 : AllocSetContextCreate(CacheMemoryContext,
112 : "LogicalRepRelMapContext",
113 : ALLOCSET_DEFAULT_SIZES);
114 :
115 : /* Initialize the relation hash table. */
116 672 : ctl.keysize = sizeof(LogicalRepRelId);
117 672 : ctl.entrysize = sizeof(LogicalRepRelMapEntry);
118 672 : ctl.hcxt = LogicalRepRelMapContext;
119 :
120 672 : LogicalRepRelMap = hash_create("logicalrep relation map cache", 128, &ctl,
121 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
122 :
123 : /* Watch for invalidation events. */
124 672 : CacheRegisterRelcacheCallback(logicalrep_relmap_invalidate_cb,
125 : (Datum) 0);
126 672 : }
127 :
128 : /*
129 : * Free the entry of a relation map cache.
130 : */
131 : static void
132 276 : logicalrep_relmap_free_entry(LogicalRepRelMapEntry *entry)
133 : {
134 : LogicalRepRelation *remoterel;
135 :
136 276 : remoterel = &entry->remoterel;
137 :
138 276 : pfree(remoterel->nspname);
139 276 : pfree(remoterel->relname);
140 :
141 276 : if (remoterel->natts > 0)
142 : {
143 : int i;
144 :
145 828 : for (i = 0; i < remoterel->natts; i++)
146 552 : pfree(remoterel->attnames[i]);
147 :
148 276 : pfree(remoterel->attnames);
149 276 : pfree(remoterel->atttyps);
150 : }
151 276 : bms_free(remoterel->attkeys);
152 :
153 276 : if (entry->attrmap)
154 232 : free_attrmap(entry->attrmap);
155 276 : }
156 :
157 : /*
158 : * Add new entry or update existing entry in the relation map cache.
159 : *
160 : * Called when new relation mapping is sent by the publisher to update
161 : * our expected view of incoming data from said publisher.
162 : */
163 : void
164 1158 : logicalrep_relmap_update(LogicalRepRelation *remoterel)
165 : {
166 : MemoryContext oldctx;
167 : LogicalRepRelMapEntry *entry;
168 : bool found;
169 : int i;
170 :
171 1158 : if (LogicalRepRelMap == NULL)
172 672 : logicalrep_relmap_init();
173 :
174 : /*
175 : * HASH_ENTER returns the existing entry if present or creates a new one.
176 : */
177 1158 : entry = hash_search(LogicalRepRelMap, &remoterel->remoteid,
178 : HASH_ENTER, &found);
179 :
180 1158 : if (found)
181 260 : logicalrep_relmap_free_entry(entry);
182 :
183 1158 : memset(entry, 0, sizeof(LogicalRepRelMapEntry));
184 :
185 : /* Make cached copy of the data */
186 1158 : oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
187 1158 : entry->remoterel.remoteid = remoterel->remoteid;
188 1158 : entry->remoterel.nspname = pstrdup(remoterel->nspname);
189 1158 : entry->remoterel.relname = pstrdup(remoterel->relname);
190 1158 : entry->remoterel.natts = remoterel->natts;
191 1158 : entry->remoterel.attnames = palloc(remoterel->natts * sizeof(char *));
192 1158 : entry->remoterel.atttyps = palloc(remoterel->natts * sizeof(Oid));
193 3214 : for (i = 0; i < remoterel->natts; i++)
194 : {
195 2056 : entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]);
196 2056 : entry->remoterel.atttyps[i] = remoterel->atttyps[i];
197 : }
198 1158 : entry->remoterel.replident = remoterel->replident;
199 1158 : entry->remoterel.attkeys = bms_copy(remoterel->attkeys);
200 1158 : MemoryContextSwitchTo(oldctx);
201 1158 : }
202 :
203 : /*
204 : * Find attribute index in TupleDesc struct by attribute name.
205 : *
206 : * Returns -1 if not found.
207 : */
208 : static int
209 2386 : logicalrep_rel_att_by_name(LogicalRepRelation *remoterel, const char *attname)
210 : {
211 : int i;
212 :
213 4526 : for (i = 0; i < remoterel->natts; i++)
214 : {
215 3974 : if (strcmp(remoterel->attnames[i], attname) == 0)
216 1834 : return i;
217 : }
218 :
219 552 : return -1;
220 : }
221 :
222 : /*
223 : * Returns a comma-separated string of attribute names based on the provided
224 : * relation and bitmap indicating which attributes to include.
225 : */
226 : static char *
227 2 : logicalrep_get_attrs_str(LogicalRepRelation *remoterel, Bitmapset *atts)
228 : {
229 : StringInfoData attsbuf;
230 2 : int attcnt = 0;
231 2 : int i = -1;
232 :
233 : Assert(!bms_is_empty(atts));
234 :
235 2 : initStringInfo(&attsbuf);
236 :
237 6 : while ((i = bms_next_member(atts, i)) >= 0)
238 : {
239 4 : attcnt++;
240 4 : if (attcnt > 1)
241 2 : appendStringInfo(&attsbuf, _(", "));
242 :
243 4 : appendStringInfo(&attsbuf, _("\"%s\""), remoterel->attnames[i]);
244 : }
245 :
246 2 : return attsbuf.data;
247 : }
248 :
249 : /*
250 : * If attempting to replicate missing or generated columns, report an error.
251 : * Prioritize 'missing' errors if both occur though the prioritization is
252 : * arbitrary.
253 : */
254 : static void
255 1038 : logicalrep_report_missing_or_gen_attrs(LogicalRepRelation *remoterel,
256 : Bitmapset *missingatts,
257 : Bitmapset *generatedatts)
258 : {
259 1038 : if (!bms_is_empty(missingatts))
260 0 : ereport(ERROR,
261 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
262 : errmsg_plural("logical replication target relation \"%s.%s\" is missing replicated column: %s",
263 : "logical replication target relation \"%s.%s\" is missing replicated columns: %s",
264 : bms_num_members(missingatts),
265 : remoterel->nspname,
266 : remoterel->relname,
267 : logicalrep_get_attrs_str(remoterel,
268 : missingatts)));
269 :
270 1038 : if (!bms_is_empty(generatedatts))
271 2 : ereport(ERROR,
272 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
273 : errmsg_plural("logical replication target relation \"%s.%s\" has incompatible generated column: %s",
274 : "logical replication target relation \"%s.%s\" has incompatible generated columns: %s",
275 : bms_num_members(generatedatts),
276 : remoterel->nspname,
277 : remoterel->relname,
278 : logicalrep_get_attrs_str(remoterel,
279 : generatedatts)));
280 1036 : }
281 :
282 : /*
283 : * Check if replica identity matches and mark the updatable flag.
284 : *
285 : * We allow for stricter replica identity (fewer columns) on subscriber as
286 : * that will not stop us from finding unique tuple. IE, if publisher has
287 : * identity (id,timestamp) and subscriber just (id) this will not be a
288 : * problem, but in the opposite scenario it will.
289 : *
290 : * We just mark the relation entry as not updatable here if the local
291 : * replica identity is found to be insufficient for applying
292 : * updates/deletes (inserts don't care!) and leave it to
293 : * check_relation_updatable() to throw the actual error if needed.
294 : */
295 : static void
296 1066 : logicalrep_rel_mark_updatable(LogicalRepRelMapEntry *entry)
297 : {
298 : Bitmapset *idkey;
299 1066 : LogicalRepRelation *remoterel = &entry->remoterel;
300 : int i;
301 :
302 1066 : entry->updatable = true;
303 :
304 1066 : idkey = RelationGetIndexAttrBitmap(entry->localrel,
305 : INDEX_ATTR_BITMAP_IDENTITY_KEY);
306 : /* fallback to PK if no replica identity */
307 1066 : if (idkey == NULL)
308 : {
309 384 : idkey = RelationGetIndexAttrBitmap(entry->localrel,
310 : INDEX_ATTR_BITMAP_PRIMARY_KEY);
311 :
312 : /*
313 : * If no replica identity index and no PK, the published table must
314 : * have replica identity FULL.
315 : */
316 384 : if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL)
317 248 : entry->updatable = false;
318 : }
319 :
320 1066 : i = -1;
321 1750 : while ((i = bms_next_member(idkey, i)) >= 0)
322 : {
323 712 : int attnum = i + FirstLowInvalidHeapAttributeNumber;
324 :
325 712 : if (!AttrNumberIsForUserDefinedAttr(attnum))
326 0 : ereport(ERROR,
327 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
328 : errmsg("logical replication target relation \"%s.%s\" uses "
329 : "system columns in REPLICA IDENTITY index",
330 : remoterel->nspname, remoterel->relname)));
331 :
332 712 : attnum = AttrNumberGetAttrOffset(attnum);
333 :
334 712 : if (entry->attrmap->attnums[attnum] < 0 ||
335 710 : !bms_is_member(entry->attrmap->attnums[attnum], remoterel->attkeys))
336 : {
337 28 : entry->updatable = false;
338 28 : break;
339 : }
340 : }
341 1066 : }
342 :
343 : /*
344 : * Open the local relation associated with the remote one.
345 : *
346 : * Rebuilds the Relcache mapping if it was invalidated by local DDL.
347 : */
348 : LogicalRepRelMapEntry *
349 297544 : logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode)
350 : {
351 : LogicalRepRelMapEntry *entry;
352 : bool found;
353 : LogicalRepRelation *remoterel;
354 :
355 297544 : if (LogicalRepRelMap == NULL)
356 0 : logicalrep_relmap_init();
357 :
358 : /* Search for existing entry. */
359 297544 : entry = hash_search(LogicalRepRelMap, &remoteid,
360 : HASH_FIND, &found);
361 :
362 297544 : if (!found)
363 0 : elog(ERROR, "no relation map entry for remote relation ID %u",
364 : remoteid);
365 :
366 297544 : remoterel = &entry->remoterel;
367 :
368 : /* Ensure we don't leak a relcache refcount. */
369 297544 : if (entry->localrel)
370 0 : elog(ERROR, "remote relation ID %u is already open", remoteid);
371 :
372 : /*
373 : * When opening and locking a relation, pending invalidation messages are
374 : * processed which can invalidate the relation. Hence, if the entry is
375 : * currently considered valid, try to open the local relation by OID and
376 : * see if invalidation ensues.
377 : */
378 297544 : if (entry->localrelvalid)
379 : {
380 296494 : entry->localrel = try_table_open(entry->localreloid, lockmode);
381 296494 : if (!entry->localrel)
382 : {
383 : /* Table was renamed or dropped. */
384 0 : entry->localrelvalid = false;
385 : }
386 296494 : else if (!entry->localrelvalid)
387 : {
388 : /* Note we release the no-longer-useful lock here. */
389 0 : table_close(entry->localrel, lockmode);
390 0 : entry->localrel = NULL;
391 : }
392 : }
393 :
394 : /*
395 : * If the entry has been marked invalid since we last had lock on it,
396 : * re-open the local relation by name and rebuild all derived data.
397 : */
398 297544 : if (!entry->localrelvalid)
399 : {
400 : Oid relid;
401 : TupleDesc desc;
402 : MemoryContext oldctx;
403 : int i;
404 : Bitmapset *missingatts;
405 1050 : Bitmapset *generatedattrs = NULL;
406 :
407 : /* Release the no-longer-useful attrmap, if any. */
408 1050 : if (entry->attrmap)
409 : {
410 24 : free_attrmap(entry->attrmap);
411 24 : entry->attrmap = NULL;
412 : }
413 :
414 : /* Try to find and lock the relation by name. */
415 1050 : relid = RangeVarGetRelid(makeRangeVar(remoterel->nspname,
416 : remoterel->relname, -1),
417 : lockmode, true);
418 1050 : if (!OidIsValid(relid))
419 12 : ereport(ERROR,
420 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
421 : errmsg("logical replication target relation \"%s.%s\" does not exist",
422 : remoterel->nspname, remoterel->relname)));
423 1038 : entry->localrel = table_open(relid, NoLock);
424 1038 : entry->localreloid = relid;
425 :
426 : /* Check for supported relkind. */
427 1038 : CheckSubscriptionRelkind(entry->localrel->rd_rel->relkind,
428 1038 : remoterel->nspname, remoterel->relname);
429 :
430 : /*
431 : * Build the mapping of local attribute numbers to remote attribute
432 : * numbers and validate that we don't miss any replicated columns as
433 : * that would result in potentially unwanted data loss.
434 : */
435 1038 : desc = RelationGetDescr(entry->localrel);
436 1038 : oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
437 1038 : entry->attrmap = make_attrmap(desc->natts);
438 1038 : MemoryContextSwitchTo(oldctx);
439 :
440 : /* check and report missing attrs, if any */
441 1038 : missingatts = bms_add_range(NULL, 0, remoterel->natts - 1);
442 3428 : for (i = 0; i < desc->natts; i++)
443 : {
444 : int attnum;
445 2390 : Form_pg_attribute attr = TupleDescAttr(desc, i);
446 :
447 2390 : if (attr->attisdropped)
448 : {
449 4 : entry->attrmap->attnums[i] = -1;
450 4 : continue;
451 : }
452 :
453 2386 : attnum = logicalrep_rel_att_by_name(remoterel,
454 2386 : NameStr(attr->attname));
455 :
456 2386 : entry->attrmap->attnums[i] = attnum;
457 2386 : if (attnum >= 0)
458 : {
459 : /* Remember which subscriber columns are generated. */
460 1834 : if (attr->attgenerated)
461 4 : generatedattrs = bms_add_member(generatedattrs, attnum);
462 :
463 1834 : missingatts = bms_del_member(missingatts, attnum);
464 : }
465 : }
466 :
467 1038 : logicalrep_report_missing_or_gen_attrs(remoterel, missingatts,
468 : generatedattrs);
469 :
470 : /* be tidy */
471 1036 : bms_free(generatedattrs);
472 1036 : bms_free(missingatts);
473 :
474 : /*
475 : * Set if the table's replica identity is enough to apply
476 : * update/delete.
477 : */
478 1036 : logicalrep_rel_mark_updatable(entry);
479 :
480 : /*
481 : * Finding a usable index is an infrequent task. It occurs when an
482 : * operation is first performed on the relation, or after invalidation
483 : * of the relation cache entry (such as ANALYZE or CREATE/DROP index
484 : * on the relation).
485 : */
486 1036 : entry->localindexoid = FindLogicalRepLocalIndex(entry->localrel, remoterel,
487 : entry->attrmap);
488 :
489 1036 : entry->localrelvalid = true;
490 : }
491 :
492 297530 : if (entry->state != SUBREL_STATE_READY)
493 1112 : entry->state = GetSubscriptionRelState(MySubscription->oid,
494 : entry->localreloid,
495 : &entry->statelsn);
496 :
497 297530 : return entry;
498 : }
499 :
500 : /*
501 : * Close the previously opened logical relation.
502 : */
503 : void
504 297474 : logicalrep_rel_close(LogicalRepRelMapEntry *rel, LOCKMODE lockmode)
505 : {
506 297474 : table_close(rel->localrel, lockmode);
507 297474 : rel->localrel = NULL;
508 297474 : }
509 :
510 : /*
511 : * Partition cache: look up partition LogicalRepRelMapEntry's
512 : *
513 : * Unlike relation map cache, this is keyed by partition OID, not remote
514 : * relation OID, because we only have to use this cache in the case where
515 : * partitions are not directly mapped to any remote relation, such as when
516 : * replication is occurring with one of their ancestors as target.
517 : */
518 :
519 : /*
520 : * Relcache invalidation callback
521 : */
522 : static void
523 572 : logicalrep_partmap_invalidate_cb(Datum arg, Oid reloid)
524 : {
525 : LogicalRepPartMapEntry *entry;
526 :
527 : /* Just to be sure. */
528 572 : if (LogicalRepPartMap == NULL)
529 0 : return;
530 :
531 572 : if (reloid != InvalidOid)
532 : {
533 : HASH_SEQ_STATUS status;
534 :
535 572 : hash_seq_init(&status, LogicalRepPartMap);
536 :
537 : /* TODO, use inverse lookup hashtable? */
538 1632 : while ((entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
539 : {
540 1072 : if (entry->relmapentry.localreloid == reloid)
541 : {
542 12 : entry->relmapentry.localrelvalid = false;
543 12 : hash_seq_term(&status);
544 12 : break;
545 : }
546 : }
547 : }
548 : else
549 : {
550 : /* invalidate all cache entries */
551 : HASH_SEQ_STATUS status;
552 :
553 0 : hash_seq_init(&status, LogicalRepPartMap);
554 :
555 0 : while ((entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
556 0 : entry->relmapentry.localrelvalid = false;
557 : }
558 : }
559 :
560 : /*
561 : * Reset the entries in the partition map that refer to remoterel.
562 : *
563 : * Called when new relation mapping is sent by the publisher to update our
564 : * expected view of incoming data from said publisher.
565 : *
566 : * Note that we don't update the remoterel information in the entry here,
567 : * we will update the information in logicalrep_partition_open to avoid
568 : * unnecessary work.
569 : */
570 : void
571 786 : logicalrep_partmap_reset_relmap(LogicalRepRelation *remoterel)
572 : {
573 : HASH_SEQ_STATUS status;
574 : LogicalRepPartMapEntry *part_entry;
575 : LogicalRepRelMapEntry *entry;
576 :
577 786 : if (LogicalRepPartMap == NULL)
578 718 : return;
579 :
580 68 : hash_seq_init(&status, LogicalRepPartMap);
581 174 : while ((part_entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
582 : {
583 106 : entry = &part_entry->relmapentry;
584 :
585 106 : if (entry->remoterel.remoteid != remoterel->remoteid)
586 90 : continue;
587 :
588 16 : logicalrep_relmap_free_entry(entry);
589 :
590 16 : memset(entry, 0, sizeof(LogicalRepRelMapEntry));
591 : }
592 : }
593 :
594 : /*
595 : * Initialize the partition map cache.
596 : */
597 : static void
598 12 : logicalrep_partmap_init(void)
599 : {
600 : HASHCTL ctl;
601 :
602 12 : if (!LogicalRepPartMapContext)
603 12 : LogicalRepPartMapContext =
604 12 : AllocSetContextCreate(CacheMemoryContext,
605 : "LogicalRepPartMapContext",
606 : ALLOCSET_DEFAULT_SIZES);
607 :
608 : /* Initialize the relation hash table. */
609 12 : ctl.keysize = sizeof(Oid); /* partition OID */
610 12 : ctl.entrysize = sizeof(LogicalRepPartMapEntry);
611 12 : ctl.hcxt = LogicalRepPartMapContext;
612 :
613 12 : LogicalRepPartMap = hash_create("logicalrep partition map cache", 64, &ctl,
614 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
615 :
616 : /* Watch for invalidation events. */
617 12 : CacheRegisterRelcacheCallback(logicalrep_partmap_invalidate_cb,
618 : (Datum) 0);
619 12 : }
620 :
621 : /*
622 : * logicalrep_partition_open
623 : *
624 : * Returned entry reuses most of the values of the root table's entry, save
625 : * the attribute map, which can be different for the partition. However,
626 : * we must physically copy all the data, in case the root table's entry
627 : * gets freed/rebuilt.
628 : *
629 : * Note there's no logicalrep_partition_close, because the caller closes the
630 : * component relation.
631 : */
632 : LogicalRepRelMapEntry *
633 60 : logicalrep_partition_open(LogicalRepRelMapEntry *root,
634 : Relation partrel, AttrMap *map)
635 : {
636 : LogicalRepRelMapEntry *entry;
637 : LogicalRepPartMapEntry *part_entry;
638 60 : LogicalRepRelation *remoterel = &root->remoterel;
639 60 : Oid partOid = RelationGetRelid(partrel);
640 60 : AttrMap *attrmap = root->attrmap;
641 : bool found;
642 : MemoryContext oldctx;
643 :
644 60 : if (LogicalRepPartMap == NULL)
645 12 : logicalrep_partmap_init();
646 :
647 : /* Search for existing entry. */
648 60 : part_entry = (LogicalRepPartMapEntry *) hash_search(LogicalRepPartMap,
649 : &partOid,
650 : HASH_ENTER, &found);
651 :
652 60 : entry = &part_entry->relmapentry;
653 :
654 : /*
655 : * We must always overwrite entry->localrel with the latest partition
656 : * Relation pointer, because the Relation pointed to by the old value may
657 : * have been cleared after the caller would have closed the partition
658 : * relation after the last use of this entry. Note that localrelvalid is
659 : * only updated by the relcache invalidation callback, so it may still be
660 : * true irrespective of whether the Relation pointed to by localrel has
661 : * been cleared or not.
662 : */
663 60 : if (found && entry->localrelvalid)
664 : {
665 30 : entry->localrel = partrel;
666 30 : return entry;
667 : }
668 :
669 : /* Switch to longer-lived context. */
670 30 : oldctx = MemoryContextSwitchTo(LogicalRepPartMapContext);
671 :
672 30 : if (!found)
673 : {
674 18 : memset(part_entry, 0, sizeof(LogicalRepPartMapEntry));
675 18 : part_entry->partoid = partOid;
676 : }
677 :
678 : /* Release the no-longer-useful attrmap, if any. */
679 30 : if (entry->attrmap)
680 : {
681 2 : free_attrmap(entry->attrmap);
682 2 : entry->attrmap = NULL;
683 : }
684 :
685 30 : if (!entry->remoterel.remoteid)
686 : {
687 : int i;
688 :
689 : /* Remote relation is copied as-is from the root entry. */
690 28 : entry->remoterel.remoteid = remoterel->remoteid;
691 28 : entry->remoterel.nspname = pstrdup(remoterel->nspname);
692 28 : entry->remoterel.relname = pstrdup(remoterel->relname);
693 28 : entry->remoterel.natts = remoterel->natts;
694 28 : entry->remoterel.attnames = palloc(remoterel->natts * sizeof(char *));
695 28 : entry->remoterel.atttyps = palloc(remoterel->natts * sizeof(Oid));
696 88 : for (i = 0; i < remoterel->natts; i++)
697 : {
698 60 : entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]);
699 60 : entry->remoterel.atttyps[i] = remoterel->atttyps[i];
700 : }
701 28 : entry->remoterel.replident = remoterel->replident;
702 28 : entry->remoterel.attkeys = bms_copy(remoterel->attkeys);
703 : }
704 :
705 30 : entry->localrel = partrel;
706 30 : entry->localreloid = partOid;
707 :
708 : /*
709 : * If the partition's attributes don't match the root relation's, we'll
710 : * need to make a new attrmap which maps partition attribute numbers to
711 : * remoterel's, instead of the original which maps root relation's
712 : * attribute numbers to remoterel's.
713 : *
714 : * Note that 'map' which comes from the tuple routing data structure
715 : * contains 1-based attribute numbers (of the parent relation). However,
716 : * the map in 'entry', a logical replication data structure, contains
717 : * 0-based attribute numbers (of the remote relation).
718 : */
719 30 : if (map)
720 : {
721 : AttrNumber attno;
722 :
723 16 : entry->attrmap = make_attrmap(map->maplen);
724 68 : for (attno = 0; attno < entry->attrmap->maplen; attno++)
725 : {
726 52 : AttrNumber root_attno = map->attnums[attno];
727 :
728 : /* 0 means it's a dropped attribute. See comments atop AttrMap. */
729 52 : if (root_attno == 0)
730 4 : entry->attrmap->attnums[attno] = -1;
731 : else
732 48 : entry->attrmap->attnums[attno] = attrmap->attnums[root_attno - 1];
733 : }
734 : }
735 : else
736 : {
737 : /* Lacking copy_attmap, do this the hard way. */
738 14 : entry->attrmap = make_attrmap(attrmap->maplen);
739 14 : memcpy(entry->attrmap->attnums, attrmap->attnums,
740 14 : attrmap->maplen * sizeof(AttrNumber));
741 : }
742 :
743 : /* Set if the table's replica identity is enough to apply update/delete. */
744 30 : logicalrep_rel_mark_updatable(entry);
745 :
746 : /* state and statelsn are left set to 0. */
747 30 : MemoryContextSwitchTo(oldctx);
748 :
749 : /*
750 : * Finding a usable index is an infrequent task. It occurs when an
751 : * operation is first performed on the relation, or after invalidation of
752 : * the relation cache entry (such as ANALYZE or CREATE/DROP index on the
753 : * relation).
754 : *
755 : * We also prefer to run this code on the oldctx so that we do not leak
756 : * anything in the LogicalRepPartMapContext (hence CacheMemoryContext).
757 : */
758 30 : entry->localindexoid = FindLogicalRepLocalIndex(partrel, remoterel,
759 : entry->attrmap);
760 :
761 30 : entry->localrelvalid = true;
762 :
763 30 : return entry;
764 : }
765 :
766 : /*
767 : * Returns the oid of an index that can be used by the apply worker to scan
768 : * the relation.
769 : *
770 : * We expect to call this function when REPLICA IDENTITY FULL is defined for
771 : * the remote relation.
772 : *
773 : * If no suitable index is found, returns InvalidOid.
774 : */
775 : static Oid
776 116 : FindUsableIndexForReplicaIdentityFull(Relation localrel, AttrMap *attrmap)
777 : {
778 116 : List *idxlist = RelationGetIndexList(localrel);
779 :
780 208 : foreach_oid(idxoid, idxlist)
781 : {
782 : bool isUsableIdx;
783 : Relation idxRel;
784 :
785 40 : idxRel = index_open(idxoid, AccessShareLock);
786 40 : isUsableIdx = IsIndexUsableForReplicaIdentityFull(idxRel, attrmap);
787 40 : index_close(idxRel, AccessShareLock);
788 :
789 : /* Return the first eligible index found */
790 40 : if (isUsableIdx)
791 32 : return idxoid;
792 : }
793 :
794 84 : return InvalidOid;
795 : }
796 :
797 : /*
798 : * Returns true if the index is usable for replica identity full.
799 : *
800 : * The index must have an equal strategy for each key column, be non-partial,
801 : * and the leftmost field must be a column (not an expression) that references
802 : * the remote relation column. These limitations help to keep the index scan
803 : * similar to PK/RI index scans.
804 : *
805 : * attrmap is a map of local attributes to remote ones. We can consult this
806 : * map to check whether the local index attribute has a corresponding remote
807 : * attribute.
808 : *
809 : * Note that the limitations of index scans for replica identity full only
810 : * adheres to a subset of the limitations of PK/RI. For example, we support
811 : * columns that are marked as [NULL] or we are not interested in the [NOT
812 : * DEFERRABLE] aspect of constraints here. It works for us because we always
813 : * compare the tuples for non-PK/RI index scans. See
814 : * RelationFindReplTupleByIndex().
815 : *
816 : * XXX: To support partial indexes, the required changes are likely to be larger.
817 : * If none of the tuples satisfy the expression for the index scan, we fall-back
818 : * to sequential execution, which might not be a good idea in some cases.
819 : */
820 : bool
821 40 : IsIndexUsableForReplicaIdentityFull(Relation idxrel, AttrMap *attrmap)
822 : {
823 : AttrNumber keycol;
824 : oidvector *indclass;
825 :
826 : /* The index must not be a partial index */
827 40 : if (!heap_attisnull(idxrel->rd_indextuple, Anum_pg_index_indpred, NULL))
828 4 : return false;
829 :
830 : Assert(idxrel->rd_index->indnatts >= 1);
831 :
832 36 : indclass = (oidvector *) DatumGetPointer(SysCacheGetAttrNotNull(INDEXRELID,
833 36 : idxrel->rd_indextuple,
834 : Anum_pg_index_indclass));
835 :
836 : /* Ensure that the index has a valid equal strategy for each key column */
837 104 : for (int i = 0; i < idxrel->rd_index->indnkeyatts; i++)
838 : {
839 : Oid opfamily;
840 :
841 68 : opfamily = get_opclass_family(indclass->values[i]);
842 68 : if (IndexAmTranslateCompareType(COMPARE_EQ, idxrel->rd_rel->relam, opfamily, true) == InvalidStrategy)
843 0 : return false;
844 : }
845 :
846 : /*
847 : * For indexes other than PK and REPLICA IDENTITY, we need to match the
848 : * local and remote tuples. The equality routine tuples_equal() cannot
849 : * accept a data type where the type cache cannot provide an equality
850 : * operator.
851 : */
852 104 : for (int i = 0; i < idxrel->rd_att->natts; i++)
853 : {
854 : TypeCacheEntry *typentry;
855 :
856 68 : typentry = lookup_type_cache(TupleDescAttr(idxrel->rd_att, i)->atttypid, TYPECACHE_EQ_OPR_FINFO);
857 68 : if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
858 0 : return false;
859 : }
860 :
861 : /* The leftmost index field must not be an expression */
862 36 : keycol = idxrel->rd_index->indkey.values[0];
863 36 : if (!AttributeNumberIsValid(keycol))
864 4 : return false;
865 :
866 : /*
867 : * And the leftmost index field must reference the remote relation column.
868 : * This is because if it doesn't, the sequential scan is favorable over
869 : * index scan in most cases.
870 : */
871 32 : if (attrmap->maplen <= AttrNumberGetAttrOffset(keycol) ||
872 32 : attrmap->attnums[AttrNumberGetAttrOffset(keycol)] < 0)
873 0 : return false;
874 :
875 : /*
876 : * The given index access method must implement "amgettuple", which will
877 : * be used later to fetch the tuples. See RelationFindReplTupleByIndex().
878 : */
879 32 : if (GetIndexAmRoutineByAmId(idxrel->rd_rel->relam, false)->amgettuple == NULL)
880 0 : return false;
881 :
882 32 : return true;
883 : }
884 :
885 : /*
886 : * Return the OID of the replica identity index if one is defined;
887 : * the OID of the PK if one exists and is not deferrable;
888 : * otherwise, InvalidOid.
889 : */
890 : Oid
891 145200 : GetRelationIdentityOrPK(Relation rel)
892 : {
893 : Oid idxoid;
894 :
895 145200 : idxoid = RelationGetReplicaIndex(rel);
896 :
897 145200 : if (!OidIsValid(idxoid))
898 400 : idxoid = RelationGetPrimaryKeyIndex(rel, false);
899 :
900 145200 : return idxoid;
901 : }
902 :
903 : /*
904 : * Returns the index oid if we can use an index for subscriber. Otherwise,
905 : * returns InvalidOid.
906 : */
907 : static Oid
908 1066 : FindLogicalRepLocalIndex(Relation localrel, LogicalRepRelation *remoterel,
909 : AttrMap *attrMap)
910 : {
911 : Oid idxoid;
912 :
913 : /*
914 : * We never need index oid for partitioned tables, always rely on leaf
915 : * partition's index.
916 : */
917 1066 : if (localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
918 100 : return InvalidOid;
919 :
920 : /*
921 : * Simple case, we already have a primary key or a replica identity index.
922 : */
923 966 : idxoid = GetRelationIdentityOrPK(localrel);
924 966 : if (OidIsValid(idxoid))
925 630 : return idxoid;
926 :
927 336 : if (remoterel->replident == REPLICA_IDENTITY_FULL)
928 : {
929 : /*
930 : * We are looking for one more opportunity for using an index. If
931 : * there are any indexes defined on the local relation, try to pick a
932 : * suitable index.
933 : *
934 : * The index selection safely assumes that all the columns are going
935 : * to be available for the index scan given that remote relation has
936 : * replica identity full.
937 : *
938 : * Note that we are not using the planner to find the cheapest method
939 : * to scan the relation as that would require us to either use lower
940 : * level planner functions which would be a maintenance burden in the
941 : * long run or use the full-fledged planner which could cause
942 : * overhead.
943 : */
944 116 : return FindUsableIndexForReplicaIdentityFull(localrel, attrMap);
945 : }
946 :
947 220 : return InvalidOid;
948 : }
|