Line data Source code
1 : /*-------------------------------------------------------------------------
2 : * relation.c
3 : * PostgreSQL logical replication relation mapping cache
4 : *
5 : * Copyright (c) 2016-2026, PostgreSQL Global Development Group
6 : *
7 : * IDENTIFICATION
8 : * src/backend/replication/logical/relation.c
9 : *
10 : * NOTES
11 : * Routines in this file mainly have to do with mapping the properties
12 : * of local replication target relations to the properties of their
13 : * remote counterpart.
14 : *
15 : *-------------------------------------------------------------------------
16 : */
17 :
18 : #include "postgres.h"
19 :
20 : #include "access/amapi.h"
21 : #include "access/genam.h"
22 : #include "access/table.h"
23 : #include "catalog/namespace.h"
24 : #include "catalog/pg_subscription_rel.h"
25 : #include "executor/executor.h"
26 : #include "nodes/makefuncs.h"
27 : #include "replication/logicalrelation.h"
28 : #include "replication/worker_internal.h"
29 : #include "utils/inval.h"
30 : #include "utils/lsyscache.h"
31 : #include "utils/syscache.h"
32 : #include "utils/typcache.h"
33 :
34 :
35 : static MemoryContext LogicalRepRelMapContext = NULL;
36 :
37 : static HTAB *LogicalRepRelMap = NULL;
38 :
39 : /*
40 : * Partition map (LogicalRepPartMap)
41 : *
42 : * When a partitioned table is used as replication target, replicated
43 : * operations are actually performed on its leaf partitions, which requires
44 : * the partitions to also be mapped to the remote relation. Parent's entry
45 : * (LogicalRepRelMapEntry) cannot be used as-is for all partitions, because
46 : * individual partitions may have different attribute numbers, which means
47 : * attribute mappings to remote relation's attributes must be maintained
48 : * separately for each partition.
49 : */
50 : static MemoryContext LogicalRepPartMapContext = NULL;
51 : static HTAB *LogicalRepPartMap = NULL;
52 : typedef struct LogicalRepPartMapEntry
53 : {
54 : Oid partoid; /* LogicalRepPartMap's key */
55 : LogicalRepRelMapEntry relmapentry;
56 : } LogicalRepPartMapEntry;
57 :
58 : static Oid FindLogicalRepLocalIndex(Relation localrel, LogicalRepRelation *remoterel,
59 : AttrMap *attrMap);
60 :
61 : /*
62 : * Relcache invalidation callback for our relation map cache.
63 : */
64 : static void
65 722 : logicalrep_relmap_invalidate_cb(Datum arg, Oid reloid)
66 : {
67 : LogicalRepRelMapEntry *entry;
68 :
69 : /* Just to be sure. */
70 722 : if (LogicalRepRelMap == NULL)
71 0 : return;
72 :
73 722 : if (reloid != InvalidOid)
74 : {
75 : HASH_SEQ_STATUS status;
76 :
77 722 : hash_seq_init(&status, LogicalRepRelMap);
78 :
79 : /* TODO, use inverse lookup hashtable? */
80 3143 : while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
81 : {
82 2556 : if (entry->localreloid == reloid)
83 : {
84 135 : entry->localrelvalid = false;
85 135 : hash_seq_term(&status);
86 135 : break;
87 : }
88 : }
89 : }
90 : else
91 : {
92 : /* invalidate all cache entries */
93 : HASH_SEQ_STATUS status;
94 :
95 0 : hash_seq_init(&status, LogicalRepRelMap);
96 :
97 0 : while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
98 0 : entry->localrelvalid = false;
99 : }
100 : }
101 :
102 : /*
103 : * Initialize the relation map cache.
104 : */
105 : static void
106 439 : logicalrep_relmap_init(void)
107 : {
108 : HASHCTL ctl;
109 :
110 439 : if (!LogicalRepRelMapContext)
111 439 : LogicalRepRelMapContext =
112 439 : AllocSetContextCreate(CacheMemoryContext,
113 : "LogicalRepRelMapContext",
114 : ALLOCSET_DEFAULT_SIZES);
115 :
116 : /* Initialize the relation hash table. */
117 439 : ctl.keysize = sizeof(LogicalRepRelId);
118 439 : ctl.entrysize = sizeof(LogicalRepRelMapEntry);
119 439 : ctl.hcxt = LogicalRepRelMapContext;
120 :
121 439 : LogicalRepRelMap = hash_create("logicalrep relation map cache", 128, &ctl,
122 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
123 :
124 : /* Watch for invalidation events. */
125 439 : CacheRegisterRelcacheCallback(logicalrep_relmap_invalidate_cb,
126 : (Datum) 0);
127 439 : }
128 :
129 : /*
130 : * Free the entry of a relation map cache.
131 : */
132 : static void
133 142 : logicalrep_relmap_free_entry(LogicalRepRelMapEntry *entry)
134 : {
135 : LogicalRepRelation *remoterel;
136 :
137 142 : remoterel = &entry->remoterel;
138 :
139 142 : pfree(remoterel->nspname);
140 142 : pfree(remoterel->relname);
141 :
142 142 : if (remoterel->natts > 0)
143 : {
144 : int i;
145 :
146 426 : for (i = 0; i < remoterel->natts; i++)
147 284 : pfree(remoterel->attnames[i]);
148 :
149 142 : pfree(remoterel->attnames);
150 142 : pfree(remoterel->atttyps);
151 : }
152 142 : bms_free(remoterel->attkeys);
153 :
154 142 : if (entry->attrmap)
155 120 : free_attrmap(entry->attrmap);
156 142 : }
157 :
158 : /*
159 : * Add new entry or update existing entry in the relation map cache.
160 : *
161 : * Called when new relation mapping is sent by the publisher to update
162 : * our expected view of incoming data from said publisher.
163 : */
164 : void
165 691 : logicalrep_relmap_update(LogicalRepRelation *remoterel)
166 : {
167 : MemoryContext oldctx;
168 : LogicalRepRelMapEntry *entry;
169 : bool found;
170 : int i;
171 :
172 691 : if (LogicalRepRelMap == NULL)
173 439 : logicalrep_relmap_init();
174 :
175 : /*
176 : * HASH_ENTER returns the existing entry if present or creates a new one.
177 : */
178 691 : entry = hash_search(LogicalRepRelMap, &remoterel->remoteid,
179 : HASH_ENTER, &found);
180 :
181 691 : if (found)
182 134 : logicalrep_relmap_free_entry(entry);
183 :
184 691 : memset(entry, 0, sizeof(LogicalRepRelMapEntry));
185 :
186 : /* Make cached copy of the data */
187 691 : oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
188 691 : entry->remoterel.remoteid = remoterel->remoteid;
189 691 : entry->remoterel.nspname = pstrdup(remoterel->nspname);
190 691 : entry->remoterel.relname = pstrdup(remoterel->relname);
191 691 : entry->remoterel.natts = remoterel->natts;
192 691 : entry->remoterel.attnames = palloc_array(char *, remoterel->natts);
193 691 : entry->remoterel.atttyps = palloc_array(Oid, remoterel->natts);
194 1954 : for (i = 0; i < remoterel->natts; i++)
195 : {
196 1263 : entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]);
197 1263 : entry->remoterel.atttyps[i] = remoterel->atttyps[i];
198 : }
199 691 : entry->remoterel.replident = remoterel->replident;
200 :
201 : /*
202 : * XXX The walsender currently does not transmit the relkind of the remote
203 : * relation when replicating changes. Since we support replicating only
204 : * table changes at present, we default to initializing relkind as
205 : * RELKIND_RELATION. This is needed in CheckSubscriptionRelkind() to check
206 : * if the publisher and subscriber relation kinds are compatible.
207 : */
208 691 : entry->remoterel.relkind =
209 691 : (remoterel->relkind == 0) ? RELKIND_RELATION : remoterel->relkind;
210 :
211 691 : entry->remoterel.attkeys = bms_copy(remoterel->attkeys);
212 691 : MemoryContextSwitchTo(oldctx);
213 691 : }
214 :
215 : /*
216 : * Find attribute index in TupleDesc struct by attribute name.
217 : *
218 : * Returns -1 if not found.
219 : */
220 : static int
221 1421 : logicalrep_rel_att_by_name(LogicalRepRelation *remoterel, const char *attname)
222 : {
223 : int i;
224 :
225 2670 : for (i = 0; i < remoterel->natts; i++)
226 : {
227 2390 : if (strcmp(remoterel->attnames[i], attname) == 0)
228 1141 : return i;
229 : }
230 :
231 280 : return -1;
232 : }
233 :
234 : /*
235 : * Returns a comma-separated string of attribute names based on the provided
236 : * relation and bitmap indicating which attributes to include.
237 : */
238 : static char *
239 3 : logicalrep_get_attrs_str(LogicalRepRelation *remoterel, Bitmapset *atts)
240 : {
241 : StringInfoData attsbuf;
242 3 : bool first = true;
243 3 : int i = -1;
244 :
245 : Assert(!bms_is_empty(atts));
246 :
247 3 : initStringInfo(&attsbuf);
248 :
249 9 : while ((i = bms_next_member(atts, i)) >= 0)
250 : {
251 6 : if (first)
252 3 : appendStringInfo(&attsbuf, _("\"%s\""), remoterel->attnames[i]);
253 : else
254 3 : appendStringInfo(&attsbuf, _(", \"%s\""), remoterel->attnames[i]);
255 6 : first = false;
256 : }
257 :
258 3 : return attsbuf.data;
259 : }
260 :
261 : /*
262 : * If attempting to replicate missing or generated columns, report an error.
263 : * Prioritize 'missing' errors if both occur though the prioritization is
264 : * arbitrary.
265 : */
266 : static void
267 623 : logicalrep_report_missing_or_gen_attrs(LogicalRepRelation *remoterel,
268 : Bitmapset *missingatts,
269 : Bitmapset *generatedatts)
270 : {
271 623 : if (!bms_is_empty(missingatts))
272 1 : ereport(ERROR,
273 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
274 : errmsg_plural("logical replication target relation \"%s.%s\" is missing replicated column: %s",
275 : "logical replication target relation \"%s.%s\" is missing replicated columns: %s",
276 : bms_num_members(missingatts),
277 : remoterel->nspname,
278 : remoterel->relname,
279 : logicalrep_get_attrs_str(remoterel,
280 : missingatts)));
281 :
282 622 : if (!bms_is_empty(generatedatts))
283 2 : ereport(ERROR,
284 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
285 : errmsg_plural("logical replication target relation \"%s.%s\" has incompatible generated column: %s",
286 : "logical replication target relation \"%s.%s\" has incompatible generated columns: %s",
287 : bms_num_members(generatedatts),
288 : remoterel->nspname,
289 : remoterel->relname,
290 : logicalrep_get_attrs_str(remoterel,
291 : generatedatts)));
292 620 : }
293 :
294 : /*
295 : * Check if replica identity matches and mark the updatable flag.
296 : *
297 : * We allow for stricter replica identity (fewer columns) on subscriber as
298 : * that will not stop us from finding unique tuple. IE, if publisher has
299 : * identity (id,timestamp) and subscriber just (id) this will not be a
300 : * problem, but in the opposite scenario it will.
301 : *
302 : * We just mark the relation entry as not updatable here if the local
303 : * replica identity is found to be insufficient for applying
304 : * updates/deletes (inserts don't care!) and leave it to
305 : * check_relation_updatable() to throw the actual error if needed.
306 : */
307 : static void
308 635 : logicalrep_rel_mark_updatable(LogicalRepRelMapEntry *entry)
309 : {
310 : Bitmapset *idkey;
311 635 : LogicalRepRelation *remoterel = &entry->remoterel;
312 : int i;
313 :
314 635 : entry->updatable = true;
315 :
316 635 : idkey = RelationGetIndexAttrBitmap(entry->localrel,
317 : INDEX_ATTR_BITMAP_IDENTITY_KEY);
318 : /* fallback to PK if no replica identity */
319 635 : if (idkey == NULL)
320 : {
321 232 : idkey = RelationGetIndexAttrBitmap(entry->localrel,
322 : INDEX_ATTR_BITMAP_PRIMARY_KEY);
323 :
324 : /*
325 : * If no replica identity index and no PK, the published table must
326 : * have replica identity FULL.
327 : */
328 232 : if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL)
329 152 : entry->updatable = false;
330 : }
331 :
332 635 : i = -1;
333 1039 : while ((i = bms_next_member(idkey, i)) >= 0)
334 : {
335 420 : int attnum = i + FirstLowInvalidHeapAttributeNumber;
336 :
337 420 : if (!AttrNumberIsForUserDefinedAttr(attnum))
338 0 : ereport(ERROR,
339 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
340 : errmsg("logical replication target relation \"%s.%s\" uses "
341 : "system columns in REPLICA IDENTITY index",
342 : remoterel->nspname, remoterel->relname)));
343 :
344 420 : attnum = AttrNumberGetAttrOffset(attnum);
345 :
346 420 : if (entry->attrmap->attnums[attnum] < 0 ||
347 419 : !bms_is_member(entry->attrmap->attnums[attnum], remoterel->attkeys))
348 : {
349 16 : entry->updatable = false;
350 16 : break;
351 : }
352 : }
353 635 : }
354 :
355 : /*
356 : * Open the local relation associated with the remote one.
357 : *
358 : * Rebuilds the Relcache mapping if it was invalidated by local DDL.
359 : */
360 : LogicalRepRelMapEntry *
361 169071 : logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode)
362 : {
363 : LogicalRepRelMapEntry *entry;
364 : bool found;
365 : LogicalRepRelation *remoterel;
366 :
367 169071 : if (LogicalRepRelMap == NULL)
368 0 : logicalrep_relmap_init();
369 :
370 : /* Search for existing entry. */
371 169071 : entry = hash_search(LogicalRepRelMap, &remoteid,
372 : HASH_FIND, &found);
373 :
374 169071 : if (!found)
375 0 : elog(ERROR, "no relation map entry for remote relation ID %u",
376 : remoteid);
377 :
378 169071 : remoterel = &entry->remoterel;
379 :
380 : /* Ensure we don't leak a relcache refcount. */
381 169071 : if (entry->localrel)
382 0 : elog(ERROR, "remote relation ID %u is already open", remoteid);
383 :
384 : /*
385 : * When opening and locking a relation, pending invalidation messages are
386 : * processed which can invalidate the relation. Hence, if the entry is
387 : * currently considered valid, try to open the local relation by OID and
388 : * see if invalidation ensues.
389 : */
390 169071 : if (entry->localrelvalid)
391 : {
392 168438 : entry->localrel = try_table_open(entry->localreloid, lockmode);
393 168438 : if (!entry->localrel)
394 : {
395 : /* Table was renamed or dropped. */
396 0 : entry->localrelvalid = false;
397 : }
398 168438 : else if (!entry->localrelvalid)
399 : {
400 : /* Note we release the no-longer-useful lock here. */
401 0 : table_close(entry->localrel, lockmode);
402 0 : entry->localrel = NULL;
403 : }
404 : }
405 :
406 : /*
407 : * If the entry has been marked invalid since we last had lock on it,
408 : * re-open the local relation by name and rebuild all derived data.
409 : */
410 169071 : if (!entry->localrelvalid)
411 : {
412 : Oid relid;
413 : TupleDesc desc;
414 : MemoryContext oldctx;
415 : int i;
416 : Bitmapset *missingatts;
417 633 : Bitmapset *generatedattrs = NULL;
418 :
419 : /* Release the no-longer-useful attrmap, if any. */
420 633 : if (entry->attrmap)
421 : {
422 13 : free_attrmap(entry->attrmap);
423 13 : entry->attrmap = NULL;
424 : }
425 :
426 : /* Try to find and lock the relation by name. */
427 633 : relid = RangeVarGetRelid(makeRangeVar(remoterel->nspname,
428 : remoterel->relname, -1),
429 : lockmode, true);
430 633 : if (!OidIsValid(relid))
431 10 : ereport(ERROR,
432 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
433 : errmsg("logical replication target relation \"%s.%s\" does not exist",
434 : remoterel->nspname, remoterel->relname)));
435 623 : entry->localrel = table_open(relid, NoLock);
436 623 : entry->localreloid = relid;
437 :
438 : /* Check for supported relkind. */
439 623 : CheckSubscriptionRelkind(entry->localrel->rd_rel->relkind,
440 623 : remoterel->relkind,
441 623 : remoterel->nspname, remoterel->relname);
442 :
443 : /*
444 : * Build the mapping of local attribute numbers to remote attribute
445 : * numbers and validate that we don't miss any replicated columns as
446 : * that would result in potentially unwanted data loss.
447 : */
448 623 : desc = RelationGetDescr(entry->localrel);
449 623 : oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
450 623 : entry->attrmap = make_attrmap(desc->natts);
451 623 : MemoryContextSwitchTo(oldctx);
452 :
453 : /* check and report missing attrs, if any */
454 623 : missingatts = bms_add_range(NULL, 0, remoterel->natts - 1);
455 2046 : for (i = 0; i < desc->natts; i++)
456 : {
457 : int attnum;
458 1423 : Form_pg_attribute attr = TupleDescAttr(desc, i);
459 :
460 1423 : if (attr->attisdropped)
461 : {
462 2 : entry->attrmap->attnums[i] = -1;
463 2 : continue;
464 : }
465 :
466 1421 : attnum = logicalrep_rel_att_by_name(remoterel,
467 1421 : NameStr(attr->attname));
468 :
469 1421 : entry->attrmap->attnums[i] = attnum;
470 1421 : if (attnum >= 0)
471 : {
472 : /* Remember which subscriber columns are generated. */
473 1141 : if (attr->attgenerated)
474 4 : generatedattrs = bms_add_member(generatedattrs, attnum);
475 :
476 1141 : missingatts = bms_del_member(missingatts, attnum);
477 : }
478 : }
479 :
480 623 : logicalrep_report_missing_or_gen_attrs(remoterel, missingatts,
481 : generatedattrs);
482 :
483 : /* be tidy */
484 620 : bms_free(generatedattrs);
485 620 : bms_free(missingatts);
486 :
487 : /*
488 : * Set if the table's replica identity is enough to apply
489 : * update/delete.
490 : */
491 620 : logicalrep_rel_mark_updatable(entry);
492 :
493 : /*
494 : * Finding a usable index is an infrequent task. It occurs when an
495 : * operation is first performed on the relation, or after invalidation
496 : * of the relation cache entry (such as ANALYZE or CREATE/DROP index
497 : * on the relation).
498 : */
499 620 : entry->localindexoid = FindLogicalRepLocalIndex(entry->localrel, remoterel,
500 : entry->attrmap);
501 :
502 620 : entry->localrelvalid = true;
503 : }
504 :
505 169058 : if (entry->state != SUBREL_STATE_READY)
506 655 : entry->state = GetSubscriptionRelState(MySubscription->oid,
507 : entry->localreloid,
508 : &entry->statelsn);
509 :
510 169058 : return entry;
511 : }
512 :
513 : /*
514 : * Close the previously opened logical relation.
515 : */
516 : void
517 168977 : logicalrep_rel_close(LogicalRepRelMapEntry *rel, LOCKMODE lockmode)
518 : {
519 168977 : table_close(rel->localrel, lockmode);
520 168977 : rel->localrel = NULL;
521 168977 : }
522 :
523 : /*
524 : * Partition cache: look up partition LogicalRepRelMapEntry's
525 : *
526 : * Unlike relation map cache, this is keyed by partition OID, not remote
527 : * relation OID, because we only have to use this cache in the case where
528 : * partitions are not directly mapped to any remote relation, such as when
529 : * replication is occurring with one of their ancestors as target.
530 : */
531 :
532 : /*
533 : * Relcache invalidation callback
534 : */
535 : static void
536 288 : logicalrep_partmap_invalidate_cb(Datum arg, Oid reloid)
537 : {
538 : LogicalRepPartMapEntry *entry;
539 :
540 : /* Just to be sure. */
541 288 : if (LogicalRepPartMap == NULL)
542 0 : return;
543 :
544 288 : if (reloid != InvalidOid)
545 : {
546 : HASH_SEQ_STATUS status;
547 :
548 288 : hash_seq_init(&status, LogicalRepPartMap);
549 :
550 : /* TODO, use inverse lookup hashtable? */
551 822 : while ((entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
552 : {
553 540 : if (entry->relmapentry.localreloid == reloid)
554 : {
555 6 : entry->relmapentry.localrelvalid = false;
556 6 : hash_seq_term(&status);
557 6 : break;
558 : }
559 : }
560 : }
561 : else
562 : {
563 : /* invalidate all cache entries */
564 : HASH_SEQ_STATUS status;
565 :
566 0 : hash_seq_init(&status, LogicalRepPartMap);
567 :
568 0 : while ((entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
569 0 : entry->relmapentry.localrelvalid = false;
570 : }
571 : }
572 :
573 : /*
574 : * Reset the entries in the partition map that refer to remoterel.
575 : *
576 : * Called when new relation mapping is sent by the publisher to update our
577 : * expected view of incoming data from said publisher.
578 : *
579 : * Note that we don't update the remoterel information in the entry here,
580 : * we will update the information in logicalrep_partition_open to avoid
581 : * unnecessary work.
582 : */
583 : void
584 478 : logicalrep_partmap_reset_relmap(LogicalRepRelation *remoterel)
585 : {
586 : HASH_SEQ_STATUS status;
587 : LogicalRepPartMapEntry *part_entry;
588 : LogicalRepRelMapEntry *entry;
589 :
590 478 : if (LogicalRepPartMap == NULL)
591 444 : return;
592 :
593 34 : hash_seq_init(&status, LogicalRepPartMap);
594 87 : while ((part_entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
595 : {
596 53 : entry = &part_entry->relmapentry;
597 :
598 53 : if (entry->remoterel.remoteid != remoterel->remoteid)
599 45 : continue;
600 :
601 8 : logicalrep_relmap_free_entry(entry);
602 :
603 8 : memset(entry, 0, sizeof(LogicalRepRelMapEntry));
604 : }
605 : }
606 :
607 : /*
608 : * Initialize the partition map cache.
609 : */
610 : static void
611 6 : logicalrep_partmap_init(void)
612 : {
613 : HASHCTL ctl;
614 :
615 6 : if (!LogicalRepPartMapContext)
616 6 : LogicalRepPartMapContext =
617 6 : AllocSetContextCreate(CacheMemoryContext,
618 : "LogicalRepPartMapContext",
619 : ALLOCSET_DEFAULT_SIZES);
620 :
621 : /* Initialize the relation hash table. */
622 6 : ctl.keysize = sizeof(Oid); /* partition OID */
623 6 : ctl.entrysize = sizeof(LogicalRepPartMapEntry);
624 6 : ctl.hcxt = LogicalRepPartMapContext;
625 :
626 6 : LogicalRepPartMap = hash_create("logicalrep partition map cache", 64, &ctl,
627 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
628 :
629 : /* Watch for invalidation events. */
630 6 : CacheRegisterRelcacheCallback(logicalrep_partmap_invalidate_cb,
631 : (Datum) 0);
632 6 : }
633 :
634 : /*
635 : * logicalrep_partition_open
636 : *
637 : * Returned entry reuses most of the values of the root table's entry, save
638 : * the attribute map, which can be different for the partition. However,
639 : * we must physically copy all the data, in case the root table's entry
640 : * gets freed/rebuilt.
641 : *
642 : * Note there's no logicalrep_partition_close, because the caller closes the
643 : * component relation.
644 : */
645 : LogicalRepRelMapEntry *
646 30 : logicalrep_partition_open(LogicalRepRelMapEntry *root,
647 : Relation partrel, AttrMap *map)
648 : {
649 : LogicalRepRelMapEntry *entry;
650 : LogicalRepPartMapEntry *part_entry;
651 30 : LogicalRepRelation *remoterel = &root->remoterel;
652 30 : Oid partOid = RelationGetRelid(partrel);
653 30 : AttrMap *attrmap = root->attrmap;
654 : bool found;
655 : MemoryContext oldctx;
656 :
657 30 : if (LogicalRepPartMap == NULL)
658 6 : logicalrep_partmap_init();
659 :
660 : /* Search for existing entry. */
661 30 : part_entry = (LogicalRepPartMapEntry *) hash_search(LogicalRepPartMap,
662 : &partOid,
663 : HASH_ENTER, &found);
664 :
665 30 : entry = &part_entry->relmapentry;
666 :
667 : /*
668 : * We must always overwrite entry->localrel with the latest partition
669 : * Relation pointer, because the Relation pointed to by the old value may
670 : * have been cleared after the caller would have closed the partition
671 : * relation after the last use of this entry. Note that localrelvalid is
672 : * only updated by the relcache invalidation callback, so it may still be
673 : * true irrespective of whether the Relation pointed to by localrel has
674 : * been cleared or not.
675 : */
676 30 : if (found && entry->localrelvalid)
677 : {
678 15 : entry->localrel = partrel;
679 15 : return entry;
680 : }
681 :
682 : /* Switch to longer-lived context. */
683 15 : oldctx = MemoryContextSwitchTo(LogicalRepPartMapContext);
684 :
685 15 : if (!found)
686 : {
687 9 : memset(part_entry, 0, sizeof(LogicalRepPartMapEntry));
688 9 : part_entry->partoid = partOid;
689 : }
690 :
691 : /* Release the no-longer-useful attrmap, if any. */
692 15 : if (entry->attrmap)
693 : {
694 1 : free_attrmap(entry->attrmap);
695 1 : entry->attrmap = NULL;
696 : }
697 :
698 15 : if (!entry->remoterel.remoteid)
699 : {
700 : int i;
701 :
702 : /* Remote relation is copied as-is from the root entry. */
703 14 : entry->remoterel.remoteid = remoterel->remoteid;
704 14 : entry->remoterel.nspname = pstrdup(remoterel->nspname);
705 14 : entry->remoterel.relname = pstrdup(remoterel->relname);
706 14 : entry->remoterel.natts = remoterel->natts;
707 14 : entry->remoterel.attnames = palloc_array(char *, remoterel->natts);
708 14 : entry->remoterel.atttyps = palloc_array(Oid, remoterel->natts);
709 44 : for (i = 0; i < remoterel->natts; i++)
710 : {
711 30 : entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]);
712 30 : entry->remoterel.atttyps[i] = remoterel->atttyps[i];
713 : }
714 14 : entry->remoterel.replident = remoterel->replident;
715 14 : entry->remoterel.attkeys = bms_copy(remoterel->attkeys);
716 : }
717 :
718 15 : entry->localrel = partrel;
719 15 : entry->localreloid = partOid;
720 :
721 : /*
722 : * If the partition's attributes don't match the root relation's, we'll
723 : * need to make a new attrmap which maps partition attribute numbers to
724 : * remoterel's, instead of the original which maps root relation's
725 : * attribute numbers to remoterel's.
726 : *
727 : * Note that 'map' which comes from the tuple routing data structure
728 : * contains 1-based attribute numbers (of the parent relation). However,
729 : * the map in 'entry', a logical replication data structure, contains
730 : * 0-based attribute numbers (of the remote relation).
731 : */
732 15 : if (map)
733 : {
734 : AttrNumber attno;
735 :
736 8 : entry->attrmap = make_attrmap(map->maplen);
737 34 : for (attno = 0; attno < entry->attrmap->maplen; attno++)
738 : {
739 26 : AttrNumber root_attno = map->attnums[attno];
740 :
741 : /* 0 means it's a dropped attribute. See comments atop AttrMap. */
742 26 : if (root_attno == 0)
743 2 : entry->attrmap->attnums[attno] = -1;
744 : else
745 24 : entry->attrmap->attnums[attno] = attrmap->attnums[root_attno - 1];
746 : }
747 : }
748 : else
749 : {
750 : /* Lacking copy_attmap, do this the hard way. */
751 7 : entry->attrmap = make_attrmap(attrmap->maplen);
752 7 : memcpy(entry->attrmap->attnums, attrmap->attnums,
753 7 : attrmap->maplen * sizeof(AttrNumber));
754 : }
755 :
756 : /* Set if the table's replica identity is enough to apply update/delete. */
757 15 : logicalrep_rel_mark_updatable(entry);
758 :
759 : /* state and statelsn are left set to 0. */
760 15 : MemoryContextSwitchTo(oldctx);
761 :
762 : /*
763 : * Finding a usable index is an infrequent task. It occurs when an
764 : * operation is first performed on the relation, or after invalidation of
765 : * the relation cache entry (such as ANALYZE or CREATE/DROP index on the
766 : * relation).
767 : *
768 : * We also prefer to run this code on the oldctx so that we do not leak
769 : * anything in the LogicalRepPartMapContext (hence CacheMemoryContext).
770 : */
771 15 : entry->localindexoid = FindLogicalRepLocalIndex(partrel, remoterel,
772 : entry->attrmap);
773 :
774 15 : entry->localrelvalid = true;
775 :
776 15 : return entry;
777 : }
778 :
779 : /*
780 : * Returns the oid of an index that can be used by the apply worker to scan
781 : * the relation.
782 : *
783 : * We expect to call this function when REPLICA IDENTITY FULL is defined for
784 : * the remote relation.
785 : *
786 : * If no suitable index is found, returns InvalidOid.
787 : */
788 : static Oid
789 68 : FindUsableIndexForReplicaIdentityFull(Relation localrel, AttrMap *attrmap)
790 : {
791 68 : List *idxlist = RelationGetIndexList(localrel);
792 :
793 124 : foreach_oid(idxoid, idxlist)
794 : {
795 : bool isUsableIdx;
796 : Relation idxRel;
797 :
798 20 : idxRel = index_open(idxoid, AccessShareLock);
799 20 : isUsableIdx = IsIndexUsableForReplicaIdentityFull(idxRel, attrmap);
800 20 : index_close(idxRel, AccessShareLock);
801 :
802 : /* Return the first eligible index found */
803 20 : if (isUsableIdx)
804 16 : return idxoid;
805 : }
806 :
807 52 : return InvalidOid;
808 : }
809 :
810 : /*
811 : * Returns true if the index is usable for replica identity full.
812 : *
813 : * The index must have an equal strategy for each key column, be non-partial,
814 : * and the leftmost field must be a column (not an expression) that references
815 : * the remote relation column. These limitations help to keep the index scan
816 : * similar to PK/RI index scans.
817 : *
818 : * attrmap is a map of local attributes to remote ones. We can consult this
819 : * map to check whether the local index attribute has a corresponding remote
820 : * attribute.
821 : *
822 : * Note that the limitations of index scans for replica identity full only
823 : * adheres to a subset of the limitations of PK/RI. For example, we support
824 : * columns that are marked as [NULL] or we are not interested in the [NOT
825 : * DEFERRABLE] aspect of constraints here. It works for us because we always
826 : * compare the tuples for non-PK/RI index scans. See
827 : * RelationFindReplTupleByIndex().
828 : *
829 : * XXX: To support partial indexes, the required changes are likely to be larger.
830 : * If none of the tuples satisfy the expression for the index scan, we fall-back
831 : * to sequential execution, which might not be a good idea in some cases.
832 : */
833 : bool
834 20 : IsIndexUsableForReplicaIdentityFull(Relation idxrel, AttrMap *attrmap)
835 : {
836 : AttrNumber keycol;
837 : oidvector *indclass;
838 :
839 : /* The index must not be a partial index */
840 20 : if (!heap_attisnull(idxrel->rd_indextuple, Anum_pg_index_indpred, NULL))
841 2 : return false;
842 :
843 : Assert(idxrel->rd_index->indnatts >= 1);
844 :
845 18 : indclass = (oidvector *) DatumGetPointer(SysCacheGetAttrNotNull(INDEXRELID,
846 18 : idxrel->rd_indextuple,
847 : Anum_pg_index_indclass));
848 :
849 : /* Ensure that the index has a valid equal strategy for each key column */
850 52 : for (int i = 0; i < idxrel->rd_index->indnkeyatts; i++)
851 : {
852 : Oid opfamily;
853 :
854 34 : opfamily = get_opclass_family(indclass->values[i]);
855 34 : if (IndexAmTranslateCompareType(COMPARE_EQ, idxrel->rd_rel->relam, opfamily, true) == InvalidStrategy)
856 0 : return false;
857 : }
858 :
859 : /*
860 : * For indexes other than PK and REPLICA IDENTITY, we need to match the
861 : * local and remote tuples. The equality routine tuples_equal() cannot
862 : * accept a data type where the type cache cannot provide an equality
863 : * operator.
864 : */
865 52 : for (int i = 0; i < idxrel->rd_att->natts; i++)
866 : {
867 : TypeCacheEntry *typentry;
868 :
869 34 : typentry = lookup_type_cache(TupleDescAttr(idxrel->rd_att, i)->atttypid, TYPECACHE_EQ_OPR_FINFO);
870 34 : if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
871 0 : return false;
872 : }
873 :
874 : /* The leftmost index field must not be an expression */
875 18 : keycol = idxrel->rd_index->indkey.values[0];
876 18 : if (!AttributeNumberIsValid(keycol))
877 2 : return false;
878 :
879 : /*
880 : * And the leftmost index field must reference the remote relation column.
881 : * This is because if it doesn't, the sequential scan is favorable over
882 : * index scan in most cases.
883 : */
884 16 : if (attrmap->maplen <= AttrNumberGetAttrOffset(keycol) ||
885 16 : attrmap->attnums[AttrNumberGetAttrOffset(keycol)] < 0)
886 0 : return false;
887 :
888 : /*
889 : * The given index access method must implement "amgettuple", which will
890 : * be used later to fetch the tuples. See RelationFindReplTupleByIndex().
891 : */
892 16 : if (GetIndexAmRoutineByAmId(idxrel->rd_rel->relam, false)->amgettuple == NULL)
893 0 : return false;
894 :
895 16 : return true;
896 : }
897 :
898 : /*
899 : * Return the OID of the replica identity index if one is defined;
900 : * the OID of the PK if one exists and is not deferrable;
901 : * otherwise, InvalidOid.
902 : */
903 : Oid
904 72694 : GetRelationIdentityOrPK(Relation rel)
905 : {
906 : Oid idxoid;
907 :
908 72694 : idxoid = RelationGetReplicaIndex(rel);
909 :
910 72694 : if (!OidIsValid(idxoid))
911 252 : idxoid = RelationGetPrimaryKeyIndex(rel, false);
912 :
913 72694 : return idxoid;
914 : }
915 :
916 : /*
917 : * Returns the index oid if we can use an index for subscriber. Otherwise,
918 : * returns InvalidOid.
919 : */
920 : static Oid
921 635 : FindLogicalRepLocalIndex(Relation localrel, LogicalRepRelation *remoterel,
922 : AttrMap *attrMap)
923 : {
924 : Oid idxoid;
925 :
926 : /*
927 : * We never need index oid for partitioned tables, always rely on leaf
928 : * partition's index.
929 : */
930 635 : if (localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
931 94 : return InvalidOid;
932 :
933 : /*
934 : * Simple case, we already have a primary key or a replica identity index.
935 : */
936 541 : idxoid = GetRelationIdentityOrPK(localrel);
937 541 : if (OidIsValid(idxoid))
938 335 : return idxoid;
939 :
940 206 : if (remoterel->replident == REPLICA_IDENTITY_FULL)
941 : {
942 : /*
943 : * We are looking for one more opportunity for using an index. If
944 : * there are any indexes defined on the local relation, try to pick a
945 : * suitable index.
946 : *
947 : * The index selection safely assumes that all the columns are going
948 : * to be available for the index scan given that remote relation has
949 : * replica identity full.
950 : *
951 : * Note that we are not using the planner to find the cheapest method
952 : * to scan the relation as that would require us to either use lower
953 : * level planner functions which would be a maintenance burden in the
954 : * long run or use the full-fledged planner which could cause
955 : * overhead.
956 : */
957 68 : return FindUsableIndexForReplicaIdentityFull(localrel, attrMap);
958 : }
959 :
960 138 : return InvalidOid;
961 : }
|