Line data Source code
1 : /*-------------------------------------------------------------------------
2 : * relation.c
3 : * PostgreSQL logical replication relation mapping cache
4 : *
5 : * Copyright (c) 2016-2025, PostgreSQL Global Development Group
6 : *
7 : * IDENTIFICATION
8 : * src/backend/replication/logical/relation.c
9 : *
10 : * NOTES
11 : * Routines in this file mainly have to do with mapping the properties
12 : * of local replication target relations to the properties of their
13 : * remote counterpart.
14 : *
15 : *-------------------------------------------------------------------------
16 : */
17 :
18 : #include "postgres.h"
19 :
20 : #include "access/amapi.h"
21 : #include "access/genam.h"
22 : #include "access/table.h"
23 : #include "catalog/namespace.h"
24 : #include "catalog/pg_subscription_rel.h"
25 : #include "executor/executor.h"
26 : #include "nodes/makefuncs.h"
27 : #include "replication/logicalrelation.h"
28 : #include "replication/worker_internal.h"
29 : #include "utils/inval.h"
30 : #include "utils/syscache.h"
31 :
32 :
33 : static MemoryContext LogicalRepRelMapContext = NULL;
34 :
35 : static HTAB *LogicalRepRelMap = NULL;
36 :
37 : /*
38 : * Partition map (LogicalRepPartMap)
39 : *
40 : * When a partitioned table is used as replication target, replicated
41 : * operations are actually performed on its leaf partitions, which requires
42 : * the partitions to also be mapped to the remote relation. Parent's entry
43 : * (LogicalRepRelMapEntry) cannot be used as-is for all partitions, because
44 : * individual partitions may have different attribute numbers, which means
45 : * attribute mappings to remote relation's attributes must be maintained
46 : * separately for each partition.
47 : */
48 : static MemoryContext LogicalRepPartMapContext = NULL;
49 : static HTAB *LogicalRepPartMap = NULL;
50 : typedef struct LogicalRepPartMapEntry
51 : {
52 : Oid partoid; /* LogicalRepPartMap's key */
53 : LogicalRepRelMapEntry relmapentry;
54 : } LogicalRepPartMapEntry;
55 :
56 : static Oid FindLogicalRepLocalIndex(Relation localrel, LogicalRepRelation *remoterel,
57 : AttrMap *attrMap);
58 :
59 : /*
60 : * Relcache invalidation callback for our relation map cache.
61 : */
62 : static void
63 1280 : logicalrep_relmap_invalidate_cb(Datum arg, Oid reloid)
64 : {
65 : LogicalRepRelMapEntry *entry;
66 :
67 : /* Just to be sure. */
68 1280 : if (LogicalRepRelMap == NULL)
69 0 : return;
70 :
71 1280 : if (reloid != InvalidOid)
72 : {
73 : HASH_SEQ_STATUS status;
74 :
75 1280 : hash_seq_init(&status, LogicalRepRelMap);
76 :
77 : /* TODO, use inverse lookup hashtable? */
78 5712 : while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
79 : {
80 4666 : if (entry->localreloid == reloid)
81 : {
82 234 : entry->localrelvalid = false;
83 234 : hash_seq_term(&status);
84 234 : break;
85 : }
86 : }
87 : }
88 : else
89 : {
90 : /* invalidate all cache entries */
91 : HASH_SEQ_STATUS status;
92 :
93 0 : hash_seq_init(&status, LogicalRepRelMap);
94 :
95 0 : while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
96 0 : entry->localrelvalid = false;
97 : }
98 : }
99 :
100 : /*
101 : * Initialize the relation map cache.
102 : */
103 : static void
104 664 : logicalrep_relmap_init(void)
105 : {
106 : HASHCTL ctl;
107 :
108 664 : if (!LogicalRepRelMapContext)
109 664 : LogicalRepRelMapContext =
110 664 : AllocSetContextCreate(CacheMemoryContext,
111 : "LogicalRepRelMapContext",
112 : ALLOCSET_DEFAULT_SIZES);
113 :
114 : /* Initialize the relation hash table. */
115 664 : ctl.keysize = sizeof(LogicalRepRelId);
116 664 : ctl.entrysize = sizeof(LogicalRepRelMapEntry);
117 664 : ctl.hcxt = LogicalRepRelMapContext;
118 :
119 664 : LogicalRepRelMap = hash_create("logicalrep relation map cache", 128, &ctl,
120 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
121 :
122 : /* Watch for invalidation events. */
123 664 : CacheRegisterRelcacheCallback(logicalrep_relmap_invalidate_cb,
124 : (Datum) 0);
125 664 : }
126 :
127 : /*
128 : * Free the entry of a relation map cache.
129 : */
130 : static void
131 280 : logicalrep_relmap_free_entry(LogicalRepRelMapEntry *entry)
132 : {
133 : LogicalRepRelation *remoterel;
134 :
135 280 : remoterel = &entry->remoterel;
136 :
137 280 : pfree(remoterel->nspname);
138 280 : pfree(remoterel->relname);
139 :
140 280 : if (remoterel->natts > 0)
141 : {
142 : int i;
143 :
144 842 : for (i = 0; i < remoterel->natts; i++)
145 562 : pfree(remoterel->attnames[i]);
146 :
147 280 : pfree(remoterel->attnames);
148 280 : pfree(remoterel->atttyps);
149 : }
150 280 : bms_free(remoterel->attkeys);
151 :
152 280 : if (entry->attrmap)
153 236 : free_attrmap(entry->attrmap);
154 280 : }
155 :
156 : /*
157 : * Add new entry or update existing entry in the relation map cache.
158 : *
159 : * Called when new relation mapping is sent by the publisher to update
160 : * our expected view of incoming data from said publisher.
161 : */
162 : void
163 1154 : logicalrep_relmap_update(LogicalRepRelation *remoterel)
164 : {
165 : MemoryContext oldctx;
166 : LogicalRepRelMapEntry *entry;
167 : bool found;
168 : int i;
169 :
170 1154 : if (LogicalRepRelMap == NULL)
171 664 : logicalrep_relmap_init();
172 :
173 : /*
174 : * HASH_ENTER returns the existing entry if present or creates a new one.
175 : */
176 1154 : entry = hash_search(LogicalRepRelMap, &remoterel->remoteid,
177 : HASH_ENTER, &found);
178 :
179 1154 : if (found)
180 264 : logicalrep_relmap_free_entry(entry);
181 :
182 1154 : memset(entry, 0, sizeof(LogicalRepRelMapEntry));
183 :
184 : /* Make cached copy of the data */
185 1154 : oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
186 1154 : entry->remoterel.remoteid = remoterel->remoteid;
187 1154 : entry->remoterel.nspname = pstrdup(remoterel->nspname);
188 1154 : entry->remoterel.relname = pstrdup(remoterel->relname);
189 1154 : entry->remoterel.natts = remoterel->natts;
190 1154 : entry->remoterel.attnames = palloc(remoterel->natts * sizeof(char *));
191 1154 : entry->remoterel.atttyps = palloc(remoterel->natts * sizeof(Oid));
192 3204 : for (i = 0; i < remoterel->natts; i++)
193 : {
194 2050 : entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]);
195 2050 : entry->remoterel.atttyps[i] = remoterel->atttyps[i];
196 : }
197 1154 : entry->remoterel.replident = remoterel->replident;
198 1154 : entry->remoterel.attkeys = bms_copy(remoterel->attkeys);
199 1154 : MemoryContextSwitchTo(oldctx);
200 1154 : }
201 :
202 : /*
203 : * Find attribute index in TupleDesc struct by attribute name.
204 : *
205 : * Returns -1 if not found.
206 : */
207 : static int
208 2372 : logicalrep_rel_att_by_name(LogicalRepRelation *remoterel, const char *attname)
209 : {
210 : int i;
211 :
212 4510 : for (i = 0; i < remoterel->natts; i++)
213 : {
214 3964 : if (strcmp(remoterel->attnames[i], attname) == 0)
215 1826 : return i;
216 : }
217 :
218 546 : return -1;
219 : }
220 :
221 : /*
222 : * Returns a comma-separated string of attribute names based on the provided
223 : * relation and bitmap indicating which attributes to include.
224 : */
225 : static char *
226 2 : logicalrep_get_attrs_str(LogicalRepRelation *remoterel, Bitmapset *atts)
227 : {
228 : StringInfoData attsbuf;
229 2 : int attcnt = 0;
230 2 : int i = -1;
231 :
232 : Assert(!bms_is_empty(atts));
233 :
234 2 : initStringInfo(&attsbuf);
235 :
236 6 : while ((i = bms_next_member(atts, i)) >= 0)
237 : {
238 4 : attcnt++;
239 4 : if (attcnt > 1)
240 2 : appendStringInfo(&attsbuf, _(", "));
241 :
242 4 : appendStringInfo(&attsbuf, _("\"%s\""), remoterel->attnames[i]);
243 : }
244 :
245 2 : return attsbuf.data;
246 : }
247 :
248 : /*
249 : * If attempting to replicate missing or generated columns, report an error.
250 : * Prioritize 'missing' errors if both occur though the prioritization is
251 : * arbitrary.
252 : */
253 : static void
254 1030 : logicalrep_report_missing_or_gen_attrs(LogicalRepRelation *remoterel,
255 : Bitmapset *missingatts,
256 : Bitmapset *generatedatts)
257 : {
258 1030 : if (!bms_is_empty(missingatts))
259 0 : ereport(ERROR,
260 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
261 : errmsg_plural("logical replication target relation \"%s.%s\" is missing replicated column: %s",
262 : "logical replication target relation \"%s.%s\" is missing replicated columns: %s",
263 : bms_num_members(missingatts),
264 : remoterel->nspname,
265 : remoterel->relname,
266 : logicalrep_get_attrs_str(remoterel,
267 : missingatts)));
268 :
269 1030 : if (!bms_is_empty(generatedatts))
270 2 : ereport(ERROR,
271 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
272 : errmsg_plural("logical replication target relation \"%s.%s\" has incompatible generated column: %s",
273 : "logical replication target relation \"%s.%s\" has incompatible generated columns: %s",
274 : bms_num_members(generatedatts),
275 : remoterel->nspname,
276 : remoterel->relname,
277 : logicalrep_get_attrs_str(remoterel,
278 : generatedatts)));
279 1028 : }
280 :
281 : /*
282 : * Check if replica identity matches and mark the updatable flag.
283 : *
284 : * We allow for stricter replica identity (fewer columns) on subscriber as
285 : * that will not stop us from finding unique tuple. IE, if publisher has
286 : * identity (id,timestamp) and subscriber just (id) this will not be a
287 : * problem, but in the opposite scenario it will.
288 : *
289 : * We just mark the relation entry as not updatable here if the local
290 : * replica identity is found to be insufficient for applying
291 : * updates/deletes (inserts don't care!) and leave it to
292 : * check_relation_updatable() to throw the actual error if needed.
293 : */
294 : static void
295 1058 : logicalrep_rel_mark_updatable(LogicalRepRelMapEntry *entry)
296 : {
297 : Bitmapset *idkey;
298 1058 : LogicalRepRelation *remoterel = &entry->remoterel;
299 : int i;
300 :
301 1058 : entry->updatable = true;
302 :
303 1058 : idkey = RelationGetIndexAttrBitmap(entry->localrel,
304 : INDEX_ATTR_BITMAP_IDENTITY_KEY);
305 : /* fallback to PK if no replica identity */
306 1058 : if (idkey == NULL)
307 : {
308 380 : idkey = RelationGetIndexAttrBitmap(entry->localrel,
309 : INDEX_ATTR_BITMAP_PRIMARY_KEY);
310 :
311 : /*
312 : * If no replica identity index and no PK, the published table must
313 : * have replica identity FULL.
314 : */
315 380 : if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL)
316 242 : entry->updatable = false;
317 : }
318 :
319 1058 : i = -1;
320 1740 : while ((i = bms_next_member(idkey, i)) >= 0)
321 : {
322 710 : int attnum = i + FirstLowInvalidHeapAttributeNumber;
323 :
324 710 : if (!AttrNumberIsForUserDefinedAttr(attnum))
325 0 : ereport(ERROR,
326 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
327 : errmsg("logical replication target relation \"%s.%s\" uses "
328 : "system columns in REPLICA IDENTITY index",
329 : remoterel->nspname, remoterel->relname)));
330 :
331 710 : attnum = AttrNumberGetAttrOffset(attnum);
332 :
333 710 : if (entry->attrmap->attnums[attnum] < 0 ||
334 708 : !bms_is_member(entry->attrmap->attnums[attnum], remoterel->attkeys))
335 : {
336 28 : entry->updatable = false;
337 28 : break;
338 : }
339 : }
340 1058 : }
341 :
342 : /*
343 : * Open the local relation associated with the remote one.
344 : *
345 : * Rebuilds the Relcache mapping if it was invalidated by local DDL.
346 : */
347 : LogicalRepRelMapEntry *
348 297720 : logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode)
349 : {
350 : LogicalRepRelMapEntry *entry;
351 : bool found;
352 : LogicalRepRelation *remoterel;
353 :
354 297720 : if (LogicalRepRelMap == NULL)
355 0 : logicalrep_relmap_init();
356 :
357 : /* Search for existing entry. */
358 297720 : entry = hash_search(LogicalRepRelMap, &remoteid,
359 : HASH_FIND, &found);
360 :
361 297720 : if (!found)
362 0 : elog(ERROR, "no relation map entry for remote relation ID %u",
363 : remoteid);
364 :
365 297720 : remoterel = &entry->remoterel;
366 :
367 : /* Ensure we don't leak a relcache refcount. */
368 297720 : if (entry->localrel)
369 0 : elog(ERROR, "remote relation ID %u is already open", remoteid);
370 :
371 : /*
372 : * When opening and locking a relation, pending invalidation messages are
373 : * processed which can invalidate the relation. Hence, if the entry is
374 : * currently considered valid, try to open the local relation by OID and
375 : * see if invalidation ensues.
376 : */
377 297720 : if (entry->localrelvalid)
378 : {
379 296676 : entry->localrel = try_table_open(entry->localreloid, lockmode);
380 296676 : if (!entry->localrel)
381 : {
382 : /* Table was renamed or dropped. */
383 0 : entry->localrelvalid = false;
384 : }
385 296676 : else if (!entry->localrelvalid)
386 : {
387 : /* Note we release the no-longer-useful lock here. */
388 0 : table_close(entry->localrel, lockmode);
389 0 : entry->localrel = NULL;
390 : }
391 : }
392 :
393 : /*
394 : * If the entry has been marked invalid since we last had lock on it,
395 : * re-open the local relation by name and rebuild all derived data.
396 : */
397 297720 : if (!entry->localrelvalid)
398 : {
399 : Oid relid;
400 : TupleDesc desc;
401 : MemoryContext oldctx;
402 : int i;
403 : Bitmapset *missingatts;
404 1044 : Bitmapset *generatedattrs = NULL;
405 :
406 : /* Release the no-longer-useful attrmap, if any. */
407 1044 : if (entry->attrmap)
408 : {
409 24 : free_attrmap(entry->attrmap);
410 24 : entry->attrmap = NULL;
411 : }
412 :
413 : /* Try to find and lock the relation by name. */
414 1044 : relid = RangeVarGetRelid(makeRangeVar(remoterel->nspname,
415 : remoterel->relname, -1),
416 : lockmode, true);
417 1044 : if (!OidIsValid(relid))
418 14 : ereport(ERROR,
419 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
420 : errmsg("logical replication target relation \"%s.%s\" does not exist",
421 : remoterel->nspname, remoterel->relname)));
422 1030 : entry->localrel = table_open(relid, NoLock);
423 1030 : entry->localreloid = relid;
424 :
425 : /* Check for supported relkind. */
426 1030 : CheckSubscriptionRelkind(entry->localrel->rd_rel->relkind,
427 1030 : remoterel->nspname, remoterel->relname);
428 :
429 : /*
430 : * Build the mapping of local attribute numbers to remote attribute
431 : * numbers and validate that we don't miss any replicated columns as
432 : * that would result in potentially unwanted data loss.
433 : */
434 1030 : desc = RelationGetDescr(entry->localrel);
435 1030 : oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
436 1030 : entry->attrmap = make_attrmap(desc->natts);
437 1030 : MemoryContextSwitchTo(oldctx);
438 :
439 : /* check and report missing attrs, if any */
440 1030 : missingatts = bms_add_range(NULL, 0, remoterel->natts - 1);
441 3406 : for (i = 0; i < desc->natts; i++)
442 : {
443 : int attnum;
444 2376 : Form_pg_attribute attr = TupleDescAttr(desc, i);
445 :
446 2376 : if (attr->attisdropped)
447 : {
448 4 : entry->attrmap->attnums[i] = -1;
449 4 : continue;
450 : }
451 :
452 2372 : attnum = logicalrep_rel_att_by_name(remoterel,
453 2372 : NameStr(attr->attname));
454 :
455 2372 : entry->attrmap->attnums[i] = attnum;
456 2372 : if (attnum >= 0)
457 : {
458 : /* Remember which subscriber columns are generated. */
459 1826 : if (attr->attgenerated)
460 4 : generatedattrs = bms_add_member(generatedattrs, attnum);
461 :
462 1826 : missingatts = bms_del_member(missingatts, attnum);
463 : }
464 : }
465 :
466 1030 : logicalrep_report_missing_or_gen_attrs(remoterel, missingatts,
467 : generatedattrs);
468 :
469 : /* be tidy */
470 1028 : bms_free(generatedattrs);
471 1028 : bms_free(missingatts);
472 :
473 : /*
474 : * Set if the table's replica identity is enough to apply
475 : * update/delete.
476 : */
477 1028 : logicalrep_rel_mark_updatable(entry);
478 :
479 : /*
480 : * Finding a usable index is an infrequent task. It occurs when an
481 : * operation is first performed on the relation, or after invalidation
482 : * of the relation cache entry (such as ANALYZE or CREATE/DROP index
483 : * on the relation).
484 : */
485 1028 : entry->localindexoid = FindLogicalRepLocalIndex(entry->localrel, remoterel,
486 : entry->attrmap);
487 :
488 1028 : entry->localrelvalid = true;
489 : }
490 :
491 297704 : if (entry->state != SUBREL_STATE_READY)
492 1086 : entry->state = GetSubscriptionRelState(MySubscription->oid,
493 : entry->localreloid,
494 : &entry->statelsn);
495 :
496 297704 : return entry;
497 : }
498 :
499 : /*
500 : * Close the previously opened logical relation.
501 : */
502 : void
503 297650 : logicalrep_rel_close(LogicalRepRelMapEntry *rel, LOCKMODE lockmode)
504 : {
505 297650 : table_close(rel->localrel, lockmode);
506 297650 : rel->localrel = NULL;
507 297650 : }
508 :
509 : /*
510 : * Partition cache: look up partition LogicalRepRelMapEntry's
511 : *
512 : * Unlike relation map cache, this is keyed by partition OID, not remote
513 : * relation OID, because we only have to use this cache in the case where
514 : * partitions are not directly mapped to any remote relation, such as when
515 : * replication is occurring with one of their ancestors as target.
516 : */
517 :
518 : /*
519 : * Relcache invalidation callback
520 : */
521 : static void
522 488 : logicalrep_partmap_invalidate_cb(Datum arg, Oid reloid)
523 : {
524 : LogicalRepPartMapEntry *entry;
525 :
526 : /* Just to be sure. */
527 488 : if (LogicalRepPartMap == NULL)
528 0 : return;
529 :
530 488 : if (reloid != InvalidOid)
531 : {
532 : HASH_SEQ_STATUS status;
533 :
534 488 : hash_seq_init(&status, LogicalRepPartMap);
535 :
536 : /* TODO, use inverse lookup hashtable? */
537 1380 : while ((entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
538 : {
539 904 : if (entry->relmapentry.localreloid == reloid)
540 : {
541 12 : entry->relmapentry.localrelvalid = false;
542 12 : hash_seq_term(&status);
543 12 : break;
544 : }
545 : }
546 : }
547 : else
548 : {
549 : /* invalidate all cache entries */
550 : HASH_SEQ_STATUS status;
551 :
552 0 : hash_seq_init(&status, LogicalRepPartMap);
553 :
554 0 : while ((entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
555 0 : entry->relmapentry.localrelvalid = false;
556 : }
557 : }
558 :
559 : /*
560 : * Reset the entries in the partition map that refer to remoterel.
561 : *
562 : * Called when new relation mapping is sent by the publisher to update our
563 : * expected view of incoming data from said publisher.
564 : *
565 : * Note that we don't update the remoterel information in the entry here,
566 : * we will update the information in logicalrep_partition_open to avoid
567 : * unnecessary work.
568 : */
569 : void
570 788 : logicalrep_partmap_reset_relmap(LogicalRepRelation *remoterel)
571 : {
572 : HASH_SEQ_STATUS status;
573 : LogicalRepPartMapEntry *part_entry;
574 : LogicalRepRelMapEntry *entry;
575 :
576 788 : if (LogicalRepPartMap == NULL)
577 720 : return;
578 :
579 68 : hash_seq_init(&status, LogicalRepPartMap);
580 174 : while ((part_entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
581 : {
582 106 : entry = &part_entry->relmapentry;
583 :
584 106 : if (entry->remoterel.remoteid != remoterel->remoteid)
585 90 : continue;
586 :
587 16 : logicalrep_relmap_free_entry(entry);
588 :
589 16 : memset(entry, 0, sizeof(LogicalRepRelMapEntry));
590 : }
591 : }
592 :
593 : /*
594 : * Initialize the partition map cache.
595 : */
596 : static void
597 12 : logicalrep_partmap_init(void)
598 : {
599 : HASHCTL ctl;
600 :
601 12 : if (!LogicalRepPartMapContext)
602 12 : LogicalRepPartMapContext =
603 12 : AllocSetContextCreate(CacheMemoryContext,
604 : "LogicalRepPartMapContext",
605 : ALLOCSET_DEFAULT_SIZES);
606 :
607 : /* Initialize the relation hash table. */
608 12 : ctl.keysize = sizeof(Oid); /* partition OID */
609 12 : ctl.entrysize = sizeof(LogicalRepPartMapEntry);
610 12 : ctl.hcxt = LogicalRepPartMapContext;
611 :
612 12 : LogicalRepPartMap = hash_create("logicalrep partition map cache", 64, &ctl,
613 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
614 :
615 : /* Watch for invalidation events. */
616 12 : CacheRegisterRelcacheCallback(logicalrep_partmap_invalidate_cb,
617 : (Datum) 0);
618 12 : }
619 :
620 : /*
621 : * logicalrep_partition_open
622 : *
623 : * Returned entry reuses most of the values of the root table's entry, save
624 : * the attribute map, which can be different for the partition. However,
625 : * we must physically copy all the data, in case the root table's entry
626 : * gets freed/rebuilt.
627 : *
628 : * Note there's no logicalrep_partition_close, because the caller closes the
629 : * component relation.
630 : */
631 : LogicalRepRelMapEntry *
632 60 : logicalrep_partition_open(LogicalRepRelMapEntry *root,
633 : Relation partrel, AttrMap *map)
634 : {
635 : LogicalRepRelMapEntry *entry;
636 : LogicalRepPartMapEntry *part_entry;
637 60 : LogicalRepRelation *remoterel = &root->remoterel;
638 60 : Oid partOid = RelationGetRelid(partrel);
639 60 : AttrMap *attrmap = root->attrmap;
640 : bool found;
641 : MemoryContext oldctx;
642 :
643 60 : if (LogicalRepPartMap == NULL)
644 12 : logicalrep_partmap_init();
645 :
646 : /* Search for existing entry. */
647 60 : part_entry = (LogicalRepPartMapEntry *) hash_search(LogicalRepPartMap,
648 : &partOid,
649 : HASH_ENTER, &found);
650 :
651 60 : entry = &part_entry->relmapentry;
652 :
653 : /*
654 : * We must always overwrite entry->localrel with the latest partition
655 : * Relation pointer, because the Relation pointed to by the old value may
656 : * have been cleared after the caller would have closed the partition
657 : * relation after the last use of this entry. Note that localrelvalid is
658 : * only updated by the relcache invalidation callback, so it may still be
659 : * true irrespective of whether the Relation pointed to by localrel has
660 : * been cleared or not.
661 : */
662 60 : if (found && entry->localrelvalid)
663 : {
664 30 : entry->localrel = partrel;
665 30 : return entry;
666 : }
667 :
668 : /* Switch to longer-lived context. */
669 30 : oldctx = MemoryContextSwitchTo(LogicalRepPartMapContext);
670 :
671 30 : if (!found)
672 : {
673 18 : memset(part_entry, 0, sizeof(LogicalRepPartMapEntry));
674 18 : part_entry->partoid = partOid;
675 : }
676 :
677 : /* Release the no-longer-useful attrmap, if any. */
678 30 : if (entry->attrmap)
679 : {
680 2 : free_attrmap(entry->attrmap);
681 2 : entry->attrmap = NULL;
682 : }
683 :
684 30 : if (!entry->remoterel.remoteid)
685 : {
686 : int i;
687 :
688 : /* Remote relation is copied as-is from the root entry. */
689 28 : entry->remoterel.remoteid = remoterel->remoteid;
690 28 : entry->remoterel.nspname = pstrdup(remoterel->nspname);
691 28 : entry->remoterel.relname = pstrdup(remoterel->relname);
692 28 : entry->remoterel.natts = remoterel->natts;
693 28 : entry->remoterel.attnames = palloc(remoterel->natts * sizeof(char *));
694 28 : entry->remoterel.atttyps = palloc(remoterel->natts * sizeof(Oid));
695 88 : for (i = 0; i < remoterel->natts; i++)
696 : {
697 60 : entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]);
698 60 : entry->remoterel.atttyps[i] = remoterel->atttyps[i];
699 : }
700 28 : entry->remoterel.replident = remoterel->replident;
701 28 : entry->remoterel.attkeys = bms_copy(remoterel->attkeys);
702 : }
703 :
704 30 : entry->localrel = partrel;
705 30 : entry->localreloid = partOid;
706 :
707 : /*
708 : * If the partition's attributes don't match the root relation's, we'll
709 : * need to make a new attrmap which maps partition attribute numbers to
710 : * remoterel's, instead of the original which maps root relation's
711 : * attribute numbers to remoterel's.
712 : *
713 : * Note that 'map' which comes from the tuple routing data structure
714 : * contains 1-based attribute numbers (of the parent relation). However,
715 : * the map in 'entry', a logical replication data structure, contains
716 : * 0-based attribute numbers (of the remote relation).
717 : */
718 30 : if (map)
719 : {
720 : AttrNumber attno;
721 :
722 16 : entry->attrmap = make_attrmap(map->maplen);
723 68 : for (attno = 0; attno < entry->attrmap->maplen; attno++)
724 : {
725 52 : AttrNumber root_attno = map->attnums[attno];
726 :
727 : /* 0 means it's a dropped attribute. See comments atop AttrMap. */
728 52 : if (root_attno == 0)
729 4 : entry->attrmap->attnums[attno] = -1;
730 : else
731 48 : entry->attrmap->attnums[attno] = attrmap->attnums[root_attno - 1];
732 : }
733 : }
734 : else
735 : {
736 : /* Lacking copy_attmap, do this the hard way. */
737 14 : entry->attrmap = make_attrmap(attrmap->maplen);
738 14 : memcpy(entry->attrmap->attnums, attrmap->attnums,
739 14 : attrmap->maplen * sizeof(AttrNumber));
740 : }
741 :
742 : /* Set if the table's replica identity is enough to apply update/delete. */
743 30 : logicalrep_rel_mark_updatable(entry);
744 :
745 : /* state and statelsn are left set to 0. */
746 30 : MemoryContextSwitchTo(oldctx);
747 :
748 : /*
749 : * Finding a usable index is an infrequent task. It occurs when an
750 : * operation is first performed on the relation, or after invalidation of
751 : * the relation cache entry (such as ANALYZE or CREATE/DROP index on the
752 : * relation).
753 : *
754 : * We also prefer to run this code on the oldctx so that we do not leak
755 : * anything in the LogicalRepPartMapContext (hence CacheMemoryContext).
756 : */
757 30 : entry->localindexoid = FindLogicalRepLocalIndex(partrel, remoterel,
758 : entry->attrmap);
759 :
760 30 : entry->localrelvalid = true;
761 :
762 30 : return entry;
763 : }
764 :
765 : /*
766 : * Returns the oid of an index that can be used by the apply worker to scan
767 : * the relation.
768 : *
769 : * We expect to call this function when REPLICA IDENTITY FULL is defined for
770 : * the remote relation.
771 : *
772 : * If no suitable index is found, returns InvalidOid.
773 : */
774 : static Oid
775 116 : FindUsableIndexForReplicaIdentityFull(Relation localrel, AttrMap *attrmap)
776 : {
777 116 : List *idxlist = RelationGetIndexList(localrel);
778 :
779 208 : foreach_oid(idxoid, idxlist)
780 : {
781 : bool isUsableIdx;
782 : Relation idxRel;
783 :
784 40 : idxRel = index_open(idxoid, AccessShareLock);
785 40 : isUsableIdx = IsIndexUsableForReplicaIdentityFull(idxRel, attrmap);
786 40 : index_close(idxRel, AccessShareLock);
787 :
788 : /* Return the first eligible index found */
789 40 : if (isUsableIdx)
790 32 : return idxoid;
791 : }
792 :
793 84 : return InvalidOid;
794 : }
795 :
796 : /*
797 : * Returns true if the index is usable for replica identity full.
798 : *
799 : * The index must have an equal strategy for each key column, be non-partial,
800 : * and the leftmost field must be a column (not an expression) that references
801 : * the remote relation column. These limitations help to keep the index scan
802 : * similar to PK/RI index scans.
803 : *
804 : * attrmap is a map of local attributes to remote ones. We can consult this
805 : * map to check whether the local index attribute has a corresponding remote
806 : * attribute.
807 : *
808 : * Note that the limitations of index scans for replica identity full only
809 : * adheres to a subset of the limitations of PK/RI. For example, we support
810 : * columns that are marked as [NULL] or we are not interested in the [NOT
811 : * DEFERRABLE] aspect of constraints here. It works for us because we always
812 : * compare the tuples for non-PK/RI index scans. See
813 : * RelationFindReplTupleByIndex().
814 : *
815 : * XXX: To support partial indexes, the required changes are likely to be larger.
816 : * If none of the tuples satisfy the expression for the index scan, we fall-back
817 : * to sequential execution, which might not be a good idea in some cases.
818 : */
819 : bool
820 40 : IsIndexUsableForReplicaIdentityFull(Relation idxrel, AttrMap *attrmap)
821 : {
822 : AttrNumber keycol;
823 : oidvector *indclass;
824 :
825 : /* The index must not be a partial index */
826 40 : if (!heap_attisnull(idxrel->rd_indextuple, Anum_pg_index_indpred, NULL))
827 4 : return false;
828 :
829 : Assert(idxrel->rd_index->indnatts >= 1);
830 :
831 36 : indclass = (oidvector *) DatumGetPointer(SysCacheGetAttrNotNull(INDEXRELID,
832 36 : idxrel->rd_indextuple,
833 : Anum_pg_index_indclass));
834 :
835 : /* Ensure that the index has a valid equal strategy for each key column */
836 104 : for (int i = 0; i < idxrel->rd_index->indnkeyatts; i++)
837 : {
838 68 : if (get_equal_strategy_number(indclass->values[i]) == InvalidStrategy)
839 0 : return false;
840 : }
841 :
842 : /*
843 : * For indexes other than PK and REPLICA IDENTITY, we need to match the
844 : * local and remote tuples. The equality routine tuples_equal() cannot
845 : * accept a data type where the type cache cannot provide an equality
846 : * operator.
847 : */
848 104 : for (int i = 0; i < idxrel->rd_att->natts; i++)
849 : {
850 : TypeCacheEntry *typentry;
851 :
852 68 : typentry = lookup_type_cache(TupleDescAttr(idxrel->rd_att, i)->atttypid, TYPECACHE_EQ_OPR_FINFO);
853 68 : if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
854 0 : return false;
855 : }
856 :
857 : /* The leftmost index field must not be an expression */
858 36 : keycol = idxrel->rd_index->indkey.values[0];
859 36 : if (!AttributeNumberIsValid(keycol))
860 4 : return false;
861 :
862 : /*
863 : * And the leftmost index field must reference the remote relation column.
864 : * This is because if it doesn't, the sequential scan is favorable over
865 : * index scan in most cases.
866 : */
867 32 : if (attrmap->maplen <= AttrNumberGetAttrOffset(keycol) ||
868 32 : attrmap->attnums[AttrNumberGetAttrOffset(keycol)] < 0)
869 0 : return false;
870 :
871 : /*
872 : * The given index access method must implement "amgettuple", which will
873 : * be used later to fetch the tuples. See RelationFindReplTupleByIndex().
874 : */
875 32 : if (GetIndexAmRoutineByAmId(idxrel->rd_rel->relam, false)->amgettuple == NULL)
876 0 : return false;
877 :
878 32 : return true;
879 : }
880 :
881 : /*
882 : * Return the OID of the replica identity index if one is defined;
883 : * the OID of the PK if one exists and is not deferrable;
884 : * otherwise, InvalidOid.
885 : */
886 : Oid
887 145194 : GetRelationIdentityOrPK(Relation rel)
888 : {
889 : Oid idxoid;
890 :
891 145194 : idxoid = RelationGetReplicaIndex(rel);
892 :
893 145194 : if (!OidIsValid(idxoid))
894 398 : idxoid = RelationGetPrimaryKeyIndex(rel, false);
895 :
896 145194 : return idxoid;
897 : }
898 :
899 : /*
900 : * Returns the index oid if we can use an index for subscriber. Otherwise,
901 : * returns InvalidOid.
902 : */
903 : static Oid
904 1058 : FindLogicalRepLocalIndex(Relation localrel, LogicalRepRelation *remoterel,
905 : AttrMap *attrMap)
906 : {
907 : Oid idxoid;
908 :
909 : /*
910 : * We never need index oid for partitioned tables, always rely on leaf
911 : * partition's index.
912 : */
913 1058 : if (localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
914 98 : return InvalidOid;
915 :
916 : /*
917 : * Simple case, we already have a primary key or a replica identity index.
918 : */
919 960 : idxoid = GetRelationIdentityOrPK(localrel);
920 960 : if (OidIsValid(idxoid))
921 628 : return idxoid;
922 :
923 332 : if (remoterel->replident == REPLICA_IDENTITY_FULL)
924 : {
925 : /*
926 : * We are looking for one more opportunity for using an index. If
927 : * there are any indexes defined on the local relation, try to pick a
928 : * suitable index.
929 : *
930 : * The index selection safely assumes that all the columns are going
931 : * to be available for the index scan given that remote relation has
932 : * replica identity full.
933 : *
934 : * Note that we are not using the planner to find the cheapest method
935 : * to scan the relation as that would require us to either use lower
936 : * level planner functions which would be a maintenance burden in the
937 : * long run or use the full-fledged planner which could cause
938 : * overhead.
939 : */
940 116 : return FindUsableIndexForReplicaIdentityFull(localrel, attrMap);
941 : }
942 :
943 216 : return InvalidOid;
944 : }
|