Line data Source code
1 : /*-------------------------------------------------------------------------
2 : * relation.c
3 : * PostgreSQL logical replication relation mapping cache
4 : *
5 : * Copyright (c) 2016-2025, PostgreSQL Global Development Group
6 : *
7 : * IDENTIFICATION
8 : * src/backend/replication/logical/relation.c
9 : *
10 : * NOTES
11 : * Routines in this file mainly have to do with mapping the properties
12 : * of local replication target relations to the properties of their
13 : * remote counterpart.
14 : *
15 : *-------------------------------------------------------------------------
16 : */
17 :
18 : #include "postgres.h"
19 :
20 : #include "access/amapi.h"
21 : #include "access/genam.h"
22 : #include "access/table.h"
23 : #include "catalog/namespace.h"
24 : #include "catalog/pg_subscription_rel.h"
25 : #include "executor/executor.h"
26 : #include "nodes/makefuncs.h"
27 : #include "replication/logicalrelation.h"
28 : #include "replication/worker_internal.h"
29 : #include "utils/inval.h"
30 : #include "utils/lsyscache.h"
31 : #include "utils/syscache.h"
32 :
33 :
34 : static MemoryContext LogicalRepRelMapContext = NULL;
35 :
36 : static HTAB *LogicalRepRelMap = NULL;
37 :
38 : /*
39 : * Partition map (LogicalRepPartMap)
40 : *
41 : * When a partitioned table is used as replication target, replicated
42 : * operations are actually performed on its leaf partitions, which requires
43 : * the partitions to also be mapped to the remote relation. Parent's entry
44 : * (LogicalRepRelMapEntry) cannot be used as-is for all partitions, because
45 : * individual partitions may have different attribute numbers, which means
46 : * attribute mappings to remote relation's attributes must be maintained
47 : * separately for each partition.
48 : */
49 : static MemoryContext LogicalRepPartMapContext = NULL;
50 : static HTAB *LogicalRepPartMap = NULL;
51 : typedef struct LogicalRepPartMapEntry
52 : {
53 : Oid partoid; /* LogicalRepPartMap's key */
54 : LogicalRepRelMapEntry relmapentry;
55 : } LogicalRepPartMapEntry;
56 :
57 : static Oid FindLogicalRepLocalIndex(Relation localrel, LogicalRepRelation *remoterel,
58 : AttrMap *attrMap);
59 :
60 : /*
61 : * Relcache invalidation callback for our relation map cache.
62 : */
63 : static void
64 1392 : logicalrep_relmap_invalidate_cb(Datum arg, Oid reloid)
65 : {
66 : LogicalRepRelMapEntry *entry;
67 :
68 : /* Just to be sure. */
69 1392 : if (LogicalRepRelMap == NULL)
70 0 : return;
71 :
72 1392 : if (reloid != InvalidOid)
73 : {
74 : HASH_SEQ_STATUS status;
75 :
76 1392 : hash_seq_init(&status, LogicalRepRelMap);
77 :
78 : /* TODO, use inverse lookup hashtable? */
79 6078 : while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
80 : {
81 4954 : if (entry->localreloid == reloid)
82 : {
83 268 : entry->localrelvalid = false;
84 268 : hash_seq_term(&status);
85 268 : break;
86 : }
87 : }
88 : }
89 : else
90 : {
91 : /* invalidate all cache entries */
92 : HASH_SEQ_STATUS status;
93 :
94 0 : hash_seq_init(&status, LogicalRepRelMap);
95 :
96 0 : while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
97 0 : entry->localrelvalid = false;
98 : }
99 : }
100 :
101 : /*
102 : * Initialize the relation map cache.
103 : */
104 : static void
105 734 : logicalrep_relmap_init(void)
106 : {
107 : HASHCTL ctl;
108 :
109 734 : if (!LogicalRepRelMapContext)
110 734 : LogicalRepRelMapContext =
111 734 : AllocSetContextCreate(CacheMemoryContext,
112 : "LogicalRepRelMapContext",
113 : ALLOCSET_DEFAULT_SIZES);
114 :
115 : /* Initialize the relation hash table. */
116 734 : ctl.keysize = sizeof(LogicalRepRelId);
117 734 : ctl.entrysize = sizeof(LogicalRepRelMapEntry);
118 734 : ctl.hcxt = LogicalRepRelMapContext;
119 :
120 734 : LogicalRepRelMap = hash_create("logicalrep relation map cache", 128, &ctl,
121 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
122 :
123 : /* Watch for invalidation events. */
124 734 : CacheRegisterRelcacheCallback(logicalrep_relmap_invalidate_cb,
125 : (Datum) 0);
126 734 : }
127 :
128 : /*
129 : * Free the entry of a relation map cache.
130 : */
131 : static void
132 280 : logicalrep_relmap_free_entry(LogicalRepRelMapEntry *entry)
133 : {
134 : LogicalRepRelation *remoterel;
135 :
136 280 : remoterel = &entry->remoterel;
137 :
138 280 : pfree(remoterel->nspname);
139 280 : pfree(remoterel->relname);
140 :
141 280 : if (remoterel->natts > 0)
142 : {
143 : int i;
144 :
145 840 : for (i = 0; i < remoterel->natts; i++)
146 560 : pfree(remoterel->attnames[i]);
147 :
148 280 : pfree(remoterel->attnames);
149 280 : pfree(remoterel->atttyps);
150 : }
151 280 : bms_free(remoterel->attkeys);
152 :
153 280 : if (entry->attrmap)
154 236 : free_attrmap(entry->attrmap);
155 280 : }
156 :
157 : /*
158 : * Add new entry or update existing entry in the relation map cache.
159 : *
160 : * Called when new relation mapping is sent by the publisher to update
161 : * our expected view of incoming data from said publisher.
162 : */
163 : void
164 1234 : logicalrep_relmap_update(LogicalRepRelation *remoterel)
165 : {
166 : MemoryContext oldctx;
167 : LogicalRepRelMapEntry *entry;
168 : bool found;
169 : int i;
170 :
171 1234 : if (LogicalRepRelMap == NULL)
172 734 : logicalrep_relmap_init();
173 :
174 : /*
175 : * HASH_ENTER returns the existing entry if present or creates a new one.
176 : */
177 1234 : entry = hash_search(LogicalRepRelMap, &remoterel->remoteid,
178 : HASH_ENTER, &found);
179 :
180 1234 : if (found)
181 264 : logicalrep_relmap_free_entry(entry);
182 :
183 1234 : memset(entry, 0, sizeof(LogicalRepRelMapEntry));
184 :
185 : /* Make cached copy of the data */
186 1234 : oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
187 1234 : entry->remoterel.remoteid = remoterel->remoteid;
188 1234 : entry->remoterel.nspname = pstrdup(remoterel->nspname);
189 1234 : entry->remoterel.relname = pstrdup(remoterel->relname);
190 1234 : entry->remoterel.natts = remoterel->natts;
191 1234 : entry->remoterel.attnames = palloc(remoterel->natts * sizeof(char *));
192 1234 : entry->remoterel.atttyps = palloc(remoterel->natts * sizeof(Oid));
193 3474 : for (i = 0; i < remoterel->natts; i++)
194 : {
195 2240 : entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]);
196 2240 : entry->remoterel.atttyps[i] = remoterel->atttyps[i];
197 : }
198 1234 : entry->remoterel.replident = remoterel->replident;
199 :
200 : /*
201 : * XXX The walsender currently does not transmit the relkind of the remote
202 : * relation when replicating changes. Since we support replicating only
203 : * table changes at present, we default to initializing relkind as
204 : * RELKIND_RELATION. This is needed in CheckSubscriptionRelkind() to check
205 : * if the publisher and subscriber relation kinds are compatible.
206 : */
207 1234 : entry->remoterel.relkind =
208 1234 : (remoterel->relkind == 0) ? RELKIND_RELATION : remoterel->relkind;
209 :
210 1234 : entry->remoterel.attkeys = bms_copy(remoterel->attkeys);
211 1234 : MemoryContextSwitchTo(oldctx);
212 1234 : }
213 :
214 : /*
215 : * Find attribute index in TupleDesc struct by attribute name.
216 : *
217 : * Returns -1 if not found.
218 : */
219 : static int
220 2566 : logicalrep_rel_att_by_name(LogicalRepRelation *remoterel, const char *attname)
221 : {
222 : int i;
223 :
224 4852 : for (i = 0; i < remoterel->natts; i++)
225 : {
226 4298 : if (strcmp(remoterel->attnames[i], attname) == 0)
227 2012 : return i;
228 : }
229 :
230 554 : return -1;
231 : }
232 :
233 : /*
234 : * Returns a comma-separated string of attribute names based on the provided
235 : * relation and bitmap indicating which attributes to include.
236 : */
237 : static char *
238 4 : logicalrep_get_attrs_str(LogicalRepRelation *remoterel, Bitmapset *atts)
239 : {
240 : StringInfoData attsbuf;
241 4 : int attcnt = 0;
242 4 : int i = -1;
243 :
244 : Assert(!bms_is_empty(atts));
245 :
246 4 : initStringInfo(&attsbuf);
247 :
248 12 : while ((i = bms_next_member(atts, i)) >= 0)
249 : {
250 8 : attcnt++;
251 8 : if (attcnt > 1)
252 4 : appendStringInfoString(&attsbuf, _(", "));
253 :
254 8 : appendStringInfo(&attsbuf, _("\"%s\""), remoterel->attnames[i]);
255 : }
256 :
257 4 : return attsbuf.data;
258 : }
259 :
260 : /*
261 : * If attempting to replicate missing or generated columns, report an error.
262 : * Prioritize 'missing' errors if both occur though the prioritization is
263 : * arbitrary.
264 : */
265 : static void
266 1110 : logicalrep_report_missing_or_gen_attrs(LogicalRepRelation *remoterel,
267 : Bitmapset *missingatts,
268 : Bitmapset *generatedatts)
269 : {
270 1110 : if (!bms_is_empty(missingatts))
271 2 : ereport(ERROR,
272 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
273 : errmsg_plural("logical replication target relation \"%s.%s\" is missing replicated column: %s",
274 : "logical replication target relation \"%s.%s\" is missing replicated columns: %s",
275 : bms_num_members(missingatts),
276 : remoterel->nspname,
277 : remoterel->relname,
278 : logicalrep_get_attrs_str(remoterel,
279 : missingatts)));
280 :
281 1108 : if (!bms_is_empty(generatedatts))
282 2 : ereport(ERROR,
283 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
284 : errmsg_plural("logical replication target relation \"%s.%s\" has incompatible generated column: %s",
285 : "logical replication target relation \"%s.%s\" has incompatible generated columns: %s",
286 : bms_num_members(generatedatts),
287 : remoterel->nspname,
288 : remoterel->relname,
289 : logicalrep_get_attrs_str(remoterel,
290 : generatedatts)));
291 1106 : }
292 :
293 : /*
294 : * Check if replica identity matches and mark the updatable flag.
295 : *
296 : * We allow for stricter replica identity (fewer columns) on subscriber as
297 : * that will not stop us from finding unique tuple. IE, if publisher has
298 : * identity (id,timestamp) and subscriber just (id) this will not be a
299 : * problem, but in the opposite scenario it will.
300 : *
301 : * We just mark the relation entry as not updatable here if the local
302 : * replica identity is found to be insufficient for applying
303 : * updates/deletes (inserts don't care!) and leave it to
304 : * check_relation_updatable() to throw the actual error if needed.
305 : */
306 : static void
307 1136 : logicalrep_rel_mark_updatable(LogicalRepRelMapEntry *entry)
308 : {
309 : Bitmapset *idkey;
310 1136 : LogicalRepRelation *remoterel = &entry->remoterel;
311 : int i;
312 :
313 1136 : entry->updatable = true;
314 :
315 1136 : idkey = RelationGetIndexAttrBitmap(entry->localrel,
316 : INDEX_ATTR_BITMAP_IDENTITY_KEY);
317 : /* fallback to PK if no replica identity */
318 1136 : if (idkey == NULL)
319 : {
320 406 : idkey = RelationGetIndexAttrBitmap(entry->localrel,
321 : INDEX_ATTR_BITMAP_PRIMARY_KEY);
322 :
323 : /*
324 : * If no replica identity index and no PK, the published table must
325 : * have replica identity FULL.
326 : */
327 406 : if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL)
328 256 : entry->updatable = false;
329 : }
330 :
331 1136 : i = -1;
332 1872 : while ((i = bms_next_member(idkey, i)) >= 0)
333 : {
334 764 : int attnum = i + FirstLowInvalidHeapAttributeNumber;
335 :
336 764 : if (!AttrNumberIsForUserDefinedAttr(attnum))
337 0 : ereport(ERROR,
338 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
339 : errmsg("logical replication target relation \"%s.%s\" uses "
340 : "system columns in REPLICA IDENTITY index",
341 : remoterel->nspname, remoterel->relname)));
342 :
343 764 : attnum = AttrNumberGetAttrOffset(attnum);
344 :
345 764 : if (entry->attrmap->attnums[attnum] < 0 ||
346 762 : !bms_is_member(entry->attrmap->attnums[attnum], remoterel->attkeys))
347 : {
348 28 : entry->updatable = false;
349 28 : break;
350 : }
351 : }
352 1136 : }
353 :
354 : /*
355 : * Open the local relation associated with the remote one.
356 : *
357 : * Rebuilds the Relcache mapping if it was invalidated by local DDL.
358 : */
359 : LogicalRepRelMapEntry *
360 297196 : logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode)
361 : {
362 : LogicalRepRelMapEntry *entry;
363 : bool found;
364 : LogicalRepRelation *remoterel;
365 :
366 297196 : if (LogicalRepRelMap == NULL)
367 0 : logicalrep_relmap_init();
368 :
369 : /* Search for existing entry. */
370 297196 : entry = hash_search(LogicalRepRelMap, &remoteid,
371 : HASH_FIND, &found);
372 :
373 297196 : if (!found)
374 0 : elog(ERROR, "no relation map entry for remote relation ID %u",
375 : remoteid);
376 :
377 297196 : remoterel = &entry->remoterel;
378 :
379 : /* Ensure we don't leak a relcache refcount. */
380 297196 : if (entry->localrel)
381 0 : elog(ERROR, "remote relation ID %u is already open", remoteid);
382 :
383 : /*
384 : * When opening and locking a relation, pending invalidation messages are
385 : * processed which can invalidate the relation. Hence, if the entry is
386 : * currently considered valid, try to open the local relation by OID and
387 : * see if invalidation ensues.
388 : */
389 297196 : if (entry->localrelvalid)
390 : {
391 296072 : entry->localrel = try_table_open(entry->localreloid, lockmode);
392 296072 : if (!entry->localrel)
393 : {
394 : /* Table was renamed or dropped. */
395 0 : entry->localrelvalid = false;
396 : }
397 296072 : else if (!entry->localrelvalid)
398 : {
399 : /* Note we release the no-longer-useful lock here. */
400 0 : table_close(entry->localrel, lockmode);
401 0 : entry->localrel = NULL;
402 : }
403 : }
404 :
405 : /*
406 : * If the entry has been marked invalid since we last had lock on it,
407 : * re-open the local relation by name and rebuild all derived data.
408 : */
409 297196 : if (!entry->localrelvalid)
410 : {
411 : Oid relid;
412 : TupleDesc desc;
413 : MemoryContext oldctx;
414 : int i;
415 : Bitmapset *missingatts;
416 1124 : Bitmapset *generatedattrs = NULL;
417 :
418 : /* Release the no-longer-useful attrmap, if any. */
419 1124 : if (entry->attrmap)
420 : {
421 26 : free_attrmap(entry->attrmap);
422 26 : entry->attrmap = NULL;
423 : }
424 :
425 : /* Try to find and lock the relation by name. */
426 1124 : relid = RangeVarGetRelid(makeRangeVar(remoterel->nspname,
427 : remoterel->relname, -1),
428 : lockmode, true);
429 1124 : if (!OidIsValid(relid))
430 14 : ereport(ERROR,
431 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
432 : errmsg("logical replication target relation \"%s.%s\" does not exist",
433 : remoterel->nspname, remoterel->relname)));
434 1110 : entry->localrel = table_open(relid, NoLock);
435 1110 : entry->localreloid = relid;
436 :
437 : /* Check for supported relkind. */
438 1110 : CheckSubscriptionRelkind(entry->localrel->rd_rel->relkind,
439 1110 : remoterel->relkind,
440 1110 : remoterel->nspname, remoterel->relname);
441 :
442 : /*
443 : * Build the mapping of local attribute numbers to remote attribute
444 : * numbers and validate that we don't miss any replicated columns as
445 : * that would result in potentially unwanted data loss.
446 : */
447 1110 : desc = RelationGetDescr(entry->localrel);
448 1110 : oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
449 1110 : entry->attrmap = make_attrmap(desc->natts);
450 1110 : MemoryContextSwitchTo(oldctx);
451 :
452 : /* check and report missing attrs, if any */
453 1110 : missingatts = bms_add_range(NULL, 0, remoterel->natts - 1);
454 3680 : for (i = 0; i < desc->natts; i++)
455 : {
456 : int attnum;
457 2570 : Form_pg_attribute attr = TupleDescAttr(desc, i);
458 :
459 2570 : if (attr->attisdropped)
460 : {
461 4 : entry->attrmap->attnums[i] = -1;
462 4 : continue;
463 : }
464 :
465 2566 : attnum = logicalrep_rel_att_by_name(remoterel,
466 2566 : NameStr(attr->attname));
467 :
468 2566 : entry->attrmap->attnums[i] = attnum;
469 2566 : if (attnum >= 0)
470 : {
471 : /* Remember which subscriber columns are generated. */
472 2012 : if (attr->attgenerated)
473 4 : generatedattrs = bms_add_member(generatedattrs, attnum);
474 :
475 2012 : missingatts = bms_del_member(missingatts, attnum);
476 : }
477 : }
478 :
479 1110 : logicalrep_report_missing_or_gen_attrs(remoterel, missingatts,
480 : generatedattrs);
481 :
482 : /* be tidy */
483 1106 : bms_free(generatedattrs);
484 1106 : bms_free(missingatts);
485 :
486 : /*
487 : * Set if the table's replica identity is enough to apply
488 : * update/delete.
489 : */
490 1106 : logicalrep_rel_mark_updatable(entry);
491 :
492 : /*
493 : * Finding a usable index is an infrequent task. It occurs when an
494 : * operation is first performed on the relation, or after invalidation
495 : * of the relation cache entry (such as ANALYZE or CREATE/DROP index
496 : * on the relation).
497 : */
498 1106 : entry->localindexoid = FindLogicalRepLocalIndex(entry->localrel, remoterel,
499 : entry->attrmap);
500 :
501 1106 : entry->localrelvalid = true;
502 : }
503 :
504 297178 : if (entry->state != SUBREL_STATE_READY)
505 1200 : entry->state = GetSubscriptionRelState(MySubscription->oid,
506 : entry->localreloid,
507 : &entry->statelsn);
508 :
509 297178 : return entry;
510 : }
511 :
512 : /*
513 : * Close the previously opened logical relation.
514 : */
515 : void
516 297092 : logicalrep_rel_close(LogicalRepRelMapEntry *rel, LOCKMODE lockmode)
517 : {
518 297092 : table_close(rel->localrel, lockmode);
519 297092 : rel->localrel = NULL;
520 297092 : }
521 :
522 : /*
523 : * Partition cache: look up partition LogicalRepRelMapEntry's
524 : *
525 : * Unlike relation map cache, this is keyed by partition OID, not remote
526 : * relation OID, because we only have to use this cache in the case where
527 : * partitions are not directly mapped to any remote relation, such as when
528 : * replication is occurring with one of their ancestors as target.
529 : */
530 :
531 : /*
532 : * Relcache invalidation callback
533 : */
534 : static void
535 576 : logicalrep_partmap_invalidate_cb(Datum arg, Oid reloid)
536 : {
537 : LogicalRepPartMapEntry *entry;
538 :
539 : /* Just to be sure. */
540 576 : if (LogicalRepPartMap == NULL)
541 0 : return;
542 :
543 576 : if (reloid != InvalidOid)
544 : {
545 : HASH_SEQ_STATUS status;
546 :
547 576 : hash_seq_init(&status, LogicalRepPartMap);
548 :
549 : /* TODO, use inverse lookup hashtable? */
550 1644 : while ((entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
551 : {
552 1080 : if (entry->relmapentry.localreloid == reloid)
553 : {
554 12 : entry->relmapentry.localrelvalid = false;
555 12 : hash_seq_term(&status);
556 12 : break;
557 : }
558 : }
559 : }
560 : else
561 : {
562 : /* invalidate all cache entries */
563 : HASH_SEQ_STATUS status;
564 :
565 0 : hash_seq_init(&status, LogicalRepPartMap);
566 :
567 0 : while ((entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
568 0 : entry->relmapentry.localrelvalid = false;
569 : }
570 : }
571 :
572 : /*
573 : * Reset the entries in the partition map that refer to remoterel.
574 : *
575 : * Called when new relation mapping is sent by the publisher to update our
576 : * expected view of incoming data from said publisher.
577 : *
578 : * Note that we don't update the remoterel information in the entry here,
579 : * we will update the information in logicalrep_partition_open to avoid
580 : * unnecessary work.
581 : */
582 : void
583 846 : logicalrep_partmap_reset_relmap(LogicalRepRelation *remoterel)
584 : {
585 : HASH_SEQ_STATUS status;
586 : LogicalRepPartMapEntry *part_entry;
587 : LogicalRepRelMapEntry *entry;
588 :
589 846 : if (LogicalRepPartMap == NULL)
590 778 : return;
591 :
592 68 : hash_seq_init(&status, LogicalRepPartMap);
593 174 : while ((part_entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
594 : {
595 106 : entry = &part_entry->relmapentry;
596 :
597 106 : if (entry->remoterel.remoteid != remoterel->remoteid)
598 90 : continue;
599 :
600 16 : logicalrep_relmap_free_entry(entry);
601 :
602 16 : memset(entry, 0, sizeof(LogicalRepRelMapEntry));
603 : }
604 : }
605 :
606 : /*
607 : * Initialize the partition map cache.
608 : */
609 : static void
610 12 : logicalrep_partmap_init(void)
611 : {
612 : HASHCTL ctl;
613 :
614 12 : if (!LogicalRepPartMapContext)
615 12 : LogicalRepPartMapContext =
616 12 : AllocSetContextCreate(CacheMemoryContext,
617 : "LogicalRepPartMapContext",
618 : ALLOCSET_DEFAULT_SIZES);
619 :
620 : /* Initialize the relation hash table. */
621 12 : ctl.keysize = sizeof(Oid); /* partition OID */
622 12 : ctl.entrysize = sizeof(LogicalRepPartMapEntry);
623 12 : ctl.hcxt = LogicalRepPartMapContext;
624 :
625 12 : LogicalRepPartMap = hash_create("logicalrep partition map cache", 64, &ctl,
626 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
627 :
628 : /* Watch for invalidation events. */
629 12 : CacheRegisterRelcacheCallback(logicalrep_partmap_invalidate_cb,
630 : (Datum) 0);
631 12 : }
632 :
633 : /*
634 : * logicalrep_partition_open
635 : *
636 : * Returned entry reuses most of the values of the root table's entry, save
637 : * the attribute map, which can be different for the partition. However,
638 : * we must physically copy all the data, in case the root table's entry
639 : * gets freed/rebuilt.
640 : *
641 : * Note there's no logicalrep_partition_close, because the caller closes the
642 : * component relation.
643 : */
644 : LogicalRepRelMapEntry *
645 60 : logicalrep_partition_open(LogicalRepRelMapEntry *root,
646 : Relation partrel, AttrMap *map)
647 : {
648 : LogicalRepRelMapEntry *entry;
649 : LogicalRepPartMapEntry *part_entry;
650 60 : LogicalRepRelation *remoterel = &root->remoterel;
651 60 : Oid partOid = RelationGetRelid(partrel);
652 60 : AttrMap *attrmap = root->attrmap;
653 : bool found;
654 : MemoryContext oldctx;
655 :
656 60 : if (LogicalRepPartMap == NULL)
657 12 : logicalrep_partmap_init();
658 :
659 : /* Search for existing entry. */
660 60 : part_entry = (LogicalRepPartMapEntry *) hash_search(LogicalRepPartMap,
661 : &partOid,
662 : HASH_ENTER, &found);
663 :
664 60 : entry = &part_entry->relmapentry;
665 :
666 : /*
667 : * We must always overwrite entry->localrel with the latest partition
668 : * Relation pointer, because the Relation pointed to by the old value may
669 : * have been cleared after the caller would have closed the partition
670 : * relation after the last use of this entry. Note that localrelvalid is
671 : * only updated by the relcache invalidation callback, so it may still be
672 : * true irrespective of whether the Relation pointed to by localrel has
673 : * been cleared or not.
674 : */
675 60 : if (found && entry->localrelvalid)
676 : {
677 30 : entry->localrel = partrel;
678 30 : return entry;
679 : }
680 :
681 : /* Switch to longer-lived context. */
682 30 : oldctx = MemoryContextSwitchTo(LogicalRepPartMapContext);
683 :
684 30 : if (!found)
685 : {
686 18 : memset(part_entry, 0, sizeof(LogicalRepPartMapEntry));
687 18 : part_entry->partoid = partOid;
688 : }
689 :
690 : /* Release the no-longer-useful attrmap, if any. */
691 30 : if (entry->attrmap)
692 : {
693 2 : free_attrmap(entry->attrmap);
694 2 : entry->attrmap = NULL;
695 : }
696 :
697 30 : if (!entry->remoterel.remoteid)
698 : {
699 : int i;
700 :
701 : /* Remote relation is copied as-is from the root entry. */
702 28 : entry->remoterel.remoteid = remoterel->remoteid;
703 28 : entry->remoterel.nspname = pstrdup(remoterel->nspname);
704 28 : entry->remoterel.relname = pstrdup(remoterel->relname);
705 28 : entry->remoterel.natts = remoterel->natts;
706 28 : entry->remoterel.attnames = palloc(remoterel->natts * sizeof(char *));
707 28 : entry->remoterel.atttyps = palloc(remoterel->natts * sizeof(Oid));
708 88 : for (i = 0; i < remoterel->natts; i++)
709 : {
710 60 : entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]);
711 60 : entry->remoterel.atttyps[i] = remoterel->atttyps[i];
712 : }
713 28 : entry->remoterel.replident = remoterel->replident;
714 28 : entry->remoterel.attkeys = bms_copy(remoterel->attkeys);
715 : }
716 :
717 30 : entry->localrel = partrel;
718 30 : entry->localreloid = partOid;
719 :
720 : /*
721 : * If the partition's attributes don't match the root relation's, we'll
722 : * need to make a new attrmap which maps partition attribute numbers to
723 : * remoterel's, instead of the original which maps root relation's
724 : * attribute numbers to remoterel's.
725 : *
726 : * Note that 'map' which comes from the tuple routing data structure
727 : * contains 1-based attribute numbers (of the parent relation). However,
728 : * the map in 'entry', a logical replication data structure, contains
729 : * 0-based attribute numbers (of the remote relation).
730 : */
731 30 : if (map)
732 : {
733 : AttrNumber attno;
734 :
735 16 : entry->attrmap = make_attrmap(map->maplen);
736 68 : for (attno = 0; attno < entry->attrmap->maplen; attno++)
737 : {
738 52 : AttrNumber root_attno = map->attnums[attno];
739 :
740 : /* 0 means it's a dropped attribute. See comments atop AttrMap. */
741 52 : if (root_attno == 0)
742 4 : entry->attrmap->attnums[attno] = -1;
743 : else
744 48 : entry->attrmap->attnums[attno] = attrmap->attnums[root_attno - 1];
745 : }
746 : }
747 : else
748 : {
749 : /* Lacking copy_attmap, do this the hard way. */
750 14 : entry->attrmap = make_attrmap(attrmap->maplen);
751 14 : memcpy(entry->attrmap->attnums, attrmap->attnums,
752 14 : attrmap->maplen * sizeof(AttrNumber));
753 : }
754 :
755 : /* Set if the table's replica identity is enough to apply update/delete. */
756 30 : logicalrep_rel_mark_updatable(entry);
757 :
758 : /* state and statelsn are left set to 0. */
759 30 : MemoryContextSwitchTo(oldctx);
760 :
761 : /*
762 : * Finding a usable index is an infrequent task. It occurs when an
763 : * operation is first performed on the relation, or after invalidation of
764 : * the relation cache entry (such as ANALYZE or CREATE/DROP index on the
765 : * relation).
766 : *
767 : * We also prefer to run this code on the oldctx so that we do not leak
768 : * anything in the LogicalRepPartMapContext (hence CacheMemoryContext).
769 : */
770 30 : entry->localindexoid = FindLogicalRepLocalIndex(partrel, remoterel,
771 : entry->attrmap);
772 :
773 30 : entry->localrelvalid = true;
774 :
775 30 : return entry;
776 : }
777 :
778 : /*
779 : * Returns the oid of an index that can be used by the apply worker to scan
780 : * the relation.
781 : *
782 : * We expect to call this function when REPLICA IDENTITY FULL is defined for
783 : * the remote relation.
784 : *
785 : * If no suitable index is found, returns InvalidOid.
786 : */
787 : static Oid
788 126 : FindUsableIndexForReplicaIdentityFull(Relation localrel, AttrMap *attrmap)
789 : {
790 126 : List *idxlist = RelationGetIndexList(localrel);
791 :
792 228 : foreach_oid(idxoid, idxlist)
793 : {
794 : bool isUsableIdx;
795 : Relation idxRel;
796 :
797 40 : idxRel = index_open(idxoid, AccessShareLock);
798 40 : isUsableIdx = IsIndexUsableForReplicaIdentityFull(idxRel, attrmap);
799 40 : index_close(idxRel, AccessShareLock);
800 :
801 : /* Return the first eligible index found */
802 40 : if (isUsableIdx)
803 32 : return idxoid;
804 : }
805 :
806 94 : return InvalidOid;
807 : }
808 :
809 : /*
810 : * Returns true if the index is usable for replica identity full.
811 : *
812 : * The index must have an equal strategy for each key column, be non-partial,
813 : * and the leftmost field must be a column (not an expression) that references
814 : * the remote relation column. These limitations help to keep the index scan
815 : * similar to PK/RI index scans.
816 : *
817 : * attrmap is a map of local attributes to remote ones. We can consult this
818 : * map to check whether the local index attribute has a corresponding remote
819 : * attribute.
820 : *
821 : * Note that the limitations of index scans for replica identity full only
822 : * adheres to a subset of the limitations of PK/RI. For example, we support
823 : * columns that are marked as [NULL] or we are not interested in the [NOT
824 : * DEFERRABLE] aspect of constraints here. It works for us because we always
825 : * compare the tuples for non-PK/RI index scans. See
826 : * RelationFindReplTupleByIndex().
827 : *
828 : * XXX: To support partial indexes, the required changes are likely to be larger.
829 : * If none of the tuples satisfy the expression for the index scan, we fall-back
830 : * to sequential execution, which might not be a good idea in some cases.
831 : */
832 : bool
833 40 : IsIndexUsableForReplicaIdentityFull(Relation idxrel, AttrMap *attrmap)
834 : {
835 : AttrNumber keycol;
836 : oidvector *indclass;
837 :
838 : /* The index must not be a partial index */
839 40 : if (!heap_attisnull(idxrel->rd_indextuple, Anum_pg_index_indpred, NULL))
840 4 : return false;
841 :
842 : Assert(idxrel->rd_index->indnatts >= 1);
843 :
844 36 : indclass = (oidvector *) DatumGetPointer(SysCacheGetAttrNotNull(INDEXRELID,
845 36 : idxrel->rd_indextuple,
846 : Anum_pg_index_indclass));
847 :
848 : /* Ensure that the index has a valid equal strategy for each key column */
849 104 : for (int i = 0; i < idxrel->rd_index->indnkeyatts; i++)
850 : {
851 : Oid opfamily;
852 :
853 68 : opfamily = get_opclass_family(indclass->values[i]);
854 68 : if (IndexAmTranslateCompareType(COMPARE_EQ, idxrel->rd_rel->relam, opfamily, true) == InvalidStrategy)
855 0 : return false;
856 : }
857 :
858 : /*
859 : * For indexes other than PK and REPLICA IDENTITY, we need to match the
860 : * local and remote tuples. The equality routine tuples_equal() cannot
861 : * accept a data type where the type cache cannot provide an equality
862 : * operator.
863 : */
864 104 : for (int i = 0; i < idxrel->rd_att->natts; i++)
865 : {
866 : TypeCacheEntry *typentry;
867 :
868 68 : typentry = lookup_type_cache(TupleDescAttr(idxrel->rd_att, i)->atttypid, TYPECACHE_EQ_OPR_FINFO);
869 68 : if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
870 0 : return false;
871 : }
872 :
873 : /* The leftmost index field must not be an expression */
874 36 : keycol = idxrel->rd_index->indkey.values[0];
875 36 : if (!AttributeNumberIsValid(keycol))
876 4 : return false;
877 :
878 : /*
879 : * And the leftmost index field must reference the remote relation column.
880 : * This is because if it doesn't, the sequential scan is favorable over
881 : * index scan in most cases.
882 : */
883 32 : if (attrmap->maplen <= AttrNumberGetAttrOffset(keycol) ||
884 32 : attrmap->attnums[AttrNumberGetAttrOffset(keycol)] < 0)
885 0 : return false;
886 :
887 : /*
888 : * The given index access method must implement "amgettuple", which will
889 : * be used later to fetch the tuples. See RelationFindReplTupleByIndex().
890 : */
891 32 : if (GetIndexAmRoutineByAmId(idxrel->rd_rel->relam, false)->amgettuple == NULL)
892 0 : return false;
893 :
894 32 : return true;
895 : }
896 :
897 : /*
898 : * Return the OID of the replica identity index if one is defined;
899 : * the OID of the PK if one exists and is not deferrable;
900 : * otherwise, InvalidOid.
901 : */
902 : Oid
903 145282 : GetRelationIdentityOrPK(Relation rel)
904 : {
905 : Oid idxoid;
906 :
907 145282 : idxoid = RelationGetReplicaIndex(rel);
908 :
909 145282 : if (!OidIsValid(idxoid))
910 434 : idxoid = RelationGetPrimaryKeyIndex(rel, false);
911 :
912 145282 : return idxoid;
913 : }
914 :
915 : /*
916 : * Returns the index oid if we can use an index for subscriber. Otherwise,
917 : * returns InvalidOid.
918 : */
919 : static Oid
920 1136 : FindLogicalRepLocalIndex(Relation localrel, LogicalRepRelation *remoterel,
921 : AttrMap *attrMap)
922 : {
923 : Oid idxoid;
924 :
925 : /*
926 : * We never need index oid for partitioned tables, always rely on leaf
927 : * partition's index.
928 : */
929 1136 : if (localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
930 130 : return InvalidOid;
931 :
932 : /*
933 : * Simple case, we already have a primary key or a replica identity index.
934 : */
935 1006 : idxoid = GetRelationIdentityOrPK(localrel);
936 1006 : if (OidIsValid(idxoid))
937 652 : return idxoid;
938 :
939 354 : if (remoterel->replident == REPLICA_IDENTITY_FULL)
940 : {
941 : /*
942 : * We are looking for one more opportunity for using an index. If
943 : * there are any indexes defined on the local relation, try to pick a
944 : * suitable index.
945 : *
946 : * The index selection safely assumes that all the columns are going
947 : * to be available for the index scan given that remote relation has
948 : * replica identity full.
949 : *
950 : * Note that we are not using the planner to find the cheapest method
951 : * to scan the relation as that would require us to either use lower
952 : * level planner functions which would be a maintenance burden in the
953 : * long run or use the full-fledged planner which could cause
954 : * overhead.
955 : */
956 126 : return FindUsableIndexForReplicaIdentityFull(localrel, attrMap);
957 : }
958 :
959 228 : return InvalidOid;
960 : }
|