Line data Source code
1 : /*-------------------------------------------------------------------------
2 : * relation.c
3 : * PostgreSQL logical replication relation mapping cache
4 : *
5 : * Copyright (c) 2016-2026, PostgreSQL Global Development Group
6 : *
7 : * IDENTIFICATION
8 : * src/backend/replication/logical/relation.c
9 : *
10 : * NOTES
11 : * Routines in this file mainly have to do with mapping the properties
12 : * of local replication target relations to the properties of their
13 : * remote counterpart.
14 : *
15 : *-------------------------------------------------------------------------
16 : */
17 :
18 : #include "postgres.h"
19 :
20 : #include "access/amapi.h"
21 : #include "access/genam.h"
22 : #include "access/table.h"
23 : #include "catalog/namespace.h"
24 : #include "catalog/pg_subscription_rel.h"
25 : #include "executor/executor.h"
26 : #include "nodes/makefuncs.h"
27 : #include "replication/logicalrelation.h"
28 : #include "replication/worker_internal.h"
29 : #include "utils/inval.h"
30 : #include "utils/lsyscache.h"
31 : #include "utils/syscache.h"
32 : #include "utils/typcache.h"
33 :
34 :
35 : static MemoryContext LogicalRepRelMapContext = NULL;
36 :
37 : static HTAB *LogicalRepRelMap = NULL;
38 :
39 : /*
40 : * Partition map (LogicalRepPartMap)
41 : *
42 : * When a partitioned table is used as replication target, replicated
43 : * operations are actually performed on its leaf partitions, which requires
44 : * the partitions to also be mapped to the remote relation. Parent's entry
45 : * (LogicalRepRelMapEntry) cannot be used as-is for all partitions, because
46 : * individual partitions may have different attribute numbers, which means
47 : * attribute mappings to remote relation's attributes must be maintained
48 : * separately for each partition.
49 : */
50 : static MemoryContext LogicalRepPartMapContext = NULL;
51 : static HTAB *LogicalRepPartMap = NULL;
52 : typedef struct LogicalRepPartMapEntry
53 : {
54 : Oid partoid; /* LogicalRepPartMap's key */
55 : LogicalRepRelMapEntry relmapentry;
56 : } LogicalRepPartMapEntry;
57 :
58 : static Oid FindLogicalRepLocalIndex(Relation localrel, LogicalRepRelation *remoterel,
59 : AttrMap *attrMap);
60 :
61 : /*
62 : * Relcache invalidation callback for our relation map cache.
63 : */
64 : static void
65 1440 : logicalrep_relmap_invalidate_cb(Datum arg, Oid reloid)
66 : {
67 : LogicalRepRelMapEntry *entry;
68 :
69 : /* Just to be sure. */
70 1440 : if (LogicalRepRelMap == NULL)
71 0 : return;
72 :
73 1440 : if (reloid != InvalidOid)
74 : {
75 : HASH_SEQ_STATUS status;
76 :
77 1440 : hash_seq_init(&status, LogicalRepRelMap);
78 :
79 : /* TODO, use inverse lookup hashtable? */
80 6242 : while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
81 : {
82 5064 : if (entry->localreloid == reloid)
83 : {
84 262 : entry->localrelvalid = false;
85 262 : hash_seq_term(&status);
86 262 : break;
87 : }
88 : }
89 : }
90 : else
91 : {
92 : /* invalidate all cache entries */
93 : HASH_SEQ_STATUS status;
94 :
95 0 : hash_seq_init(&status, LogicalRepRelMap);
96 :
97 0 : while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
98 0 : entry->localrelvalid = false;
99 : }
100 : }
101 :
102 : /*
103 : * Initialize the relation map cache.
104 : */
105 : static void
106 778 : logicalrep_relmap_init(void)
107 : {
108 : HASHCTL ctl;
109 :
110 778 : if (!LogicalRepRelMapContext)
111 778 : LogicalRepRelMapContext =
112 778 : AllocSetContextCreate(CacheMemoryContext,
113 : "LogicalRepRelMapContext",
114 : ALLOCSET_DEFAULT_SIZES);
115 :
116 : /* Initialize the relation hash table. */
117 778 : ctl.keysize = sizeof(LogicalRepRelId);
118 778 : ctl.entrysize = sizeof(LogicalRepRelMapEntry);
119 778 : ctl.hcxt = LogicalRepRelMapContext;
120 :
121 778 : LogicalRepRelMap = hash_create("logicalrep relation map cache", 128, &ctl,
122 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
123 :
124 : /* Watch for invalidation events. */
125 778 : CacheRegisterRelcacheCallback(logicalrep_relmap_invalidate_cb,
126 : (Datum) 0);
127 778 : }
128 :
129 : /*
130 : * Free the entry of a relation map cache.
131 : */
132 : static void
133 284 : logicalrep_relmap_free_entry(LogicalRepRelMapEntry *entry)
134 : {
135 : LogicalRepRelation *remoterel;
136 :
137 284 : remoterel = &entry->remoterel;
138 :
139 284 : pfree(remoterel->nspname);
140 284 : pfree(remoterel->relname);
141 :
142 284 : if (remoterel->natts > 0)
143 : {
144 : int i;
145 :
146 860 : for (i = 0; i < remoterel->natts; i++)
147 576 : pfree(remoterel->attnames[i]);
148 :
149 284 : pfree(remoterel->attnames);
150 284 : pfree(remoterel->atttyps);
151 : }
152 284 : bms_free(remoterel->attkeys);
153 :
154 284 : if (entry->attrmap)
155 240 : free_attrmap(entry->attrmap);
156 284 : }
157 :
158 : /*
159 : * Add new entry or update existing entry in the relation map cache.
160 : *
161 : * Called when new relation mapping is sent by the publisher to update
162 : * our expected view of incoming data from said publisher.
163 : */
164 : void
165 1280 : logicalrep_relmap_update(LogicalRepRelation *remoterel)
166 : {
167 : MemoryContext oldctx;
168 : LogicalRepRelMapEntry *entry;
169 : bool found;
170 : int i;
171 :
172 1280 : if (LogicalRepRelMap == NULL)
173 778 : logicalrep_relmap_init();
174 :
175 : /*
176 : * HASH_ENTER returns the existing entry if present or creates a new one.
177 : */
178 1280 : entry = hash_search(LogicalRepRelMap, &remoterel->remoteid,
179 : HASH_ENTER, &found);
180 :
181 1280 : if (found)
182 268 : logicalrep_relmap_free_entry(entry);
183 :
184 1280 : memset(entry, 0, sizeof(LogicalRepRelMapEntry));
185 :
186 : /* Make cached copy of the data */
187 1280 : oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
188 1280 : entry->remoterel.remoteid = remoterel->remoteid;
189 1280 : entry->remoterel.nspname = pstrdup(remoterel->nspname);
190 1280 : entry->remoterel.relname = pstrdup(remoterel->relname);
191 1280 : entry->remoterel.natts = remoterel->natts;
192 1280 : entry->remoterel.attnames = palloc_array(char *, remoterel->natts);
193 1280 : entry->remoterel.atttyps = palloc_array(Oid, remoterel->natts);
194 3610 : for (i = 0; i < remoterel->natts; i++)
195 : {
196 2330 : entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]);
197 2330 : entry->remoterel.atttyps[i] = remoterel->atttyps[i];
198 : }
199 1280 : entry->remoterel.replident = remoterel->replident;
200 :
201 : /*
202 : * XXX The walsender currently does not transmit the relkind of the remote
203 : * relation when replicating changes. Since we support replicating only
204 : * table changes at present, we default to initializing relkind as
205 : * RELKIND_RELATION. This is needed in CheckSubscriptionRelkind() to check
206 : * if the publisher and subscriber relation kinds are compatible.
207 : */
208 1280 : entry->remoterel.relkind =
209 1280 : (remoterel->relkind == 0) ? RELKIND_RELATION : remoterel->relkind;
210 :
211 1280 : entry->remoterel.attkeys = bms_copy(remoterel->attkeys);
212 1280 : MemoryContextSwitchTo(oldctx);
213 1280 : }
214 :
215 : /*
216 : * Find attribute index in TupleDesc struct by attribute name.
217 : *
218 : * Returns -1 if not found.
219 : */
220 : static int
221 2654 : logicalrep_rel_att_by_name(LogicalRepRelation *remoterel, const char *attname)
222 : {
223 : int i;
224 :
225 5036 : for (i = 0; i < remoterel->natts; i++)
226 : {
227 4474 : if (strcmp(remoterel->attnames[i], attname) == 0)
228 2092 : return i;
229 : }
230 :
231 562 : return -1;
232 : }
233 :
234 : /*
235 : * Returns a comma-separated string of attribute names based on the provided
236 : * relation and bitmap indicating which attributes to include.
237 : */
238 : static char *
239 4 : logicalrep_get_attrs_str(LogicalRepRelation *remoterel, Bitmapset *atts)
240 : {
241 : StringInfoData attsbuf;
242 4 : int attcnt = 0;
243 4 : int i = -1;
244 :
245 : Assert(!bms_is_empty(atts));
246 :
247 4 : initStringInfo(&attsbuf);
248 :
249 12 : while ((i = bms_next_member(atts, i)) >= 0)
250 : {
251 8 : attcnt++;
252 8 : if (attcnt > 1)
253 : /* translator: This is a separator in a list of entity names. */
254 4 : appendStringInfoString(&attsbuf, _(", "));
255 :
256 8 : appendStringInfo(&attsbuf, _("\"%s\""), remoterel->attnames[i]);
257 : }
258 :
259 4 : return attsbuf.data;
260 : }
261 :
262 : /*
263 : * If attempting to replicate missing or generated columns, report an error.
264 : * Prioritize 'missing' errors if both occur though the prioritization is
265 : * arbitrary.
266 : */
267 : static void
268 1150 : logicalrep_report_missing_or_gen_attrs(LogicalRepRelation *remoterel,
269 : Bitmapset *missingatts,
270 : Bitmapset *generatedatts)
271 : {
272 1150 : if (!bms_is_empty(missingatts))
273 2 : ereport(ERROR,
274 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
275 : errmsg_plural("logical replication target relation \"%s.%s\" is missing replicated column: %s",
276 : "logical replication target relation \"%s.%s\" is missing replicated columns: %s",
277 : bms_num_members(missingatts),
278 : remoterel->nspname,
279 : remoterel->relname,
280 : logicalrep_get_attrs_str(remoterel,
281 : missingatts)));
282 :
283 1148 : if (!bms_is_empty(generatedatts))
284 2 : ereport(ERROR,
285 : errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
286 : errmsg_plural("logical replication target relation \"%s.%s\" has incompatible generated column: %s",
287 : "logical replication target relation \"%s.%s\" has incompatible generated columns: %s",
288 : bms_num_members(generatedatts),
289 : remoterel->nspname,
290 : remoterel->relname,
291 : logicalrep_get_attrs_str(remoterel,
292 : generatedatts)));
293 1146 : }
294 :
295 : /*
296 : * Check if replica identity matches and mark the updatable flag.
297 : *
298 : * We allow for stricter replica identity (fewer columns) on subscriber as
299 : * that will not stop us from finding unique tuple. IE, if publisher has
300 : * identity (id,timestamp) and subscriber just (id) this will not be a
301 : * problem, but in the opposite scenario it will.
302 : *
303 : * We just mark the relation entry as not updatable here if the local
304 : * replica identity is found to be insufficient for applying
305 : * updates/deletes (inserts don't care!) and leave it to
306 : * check_relation_updatable() to throw the actual error if needed.
307 : */
308 : static void
309 1176 : logicalrep_rel_mark_updatable(LogicalRepRelMapEntry *entry)
310 : {
311 : Bitmapset *idkey;
312 1176 : LogicalRepRelation *remoterel = &entry->remoterel;
313 : int i;
314 :
315 1176 : entry->updatable = true;
316 :
317 1176 : idkey = RelationGetIndexAttrBitmap(entry->localrel,
318 : INDEX_ATTR_BITMAP_IDENTITY_KEY);
319 : /* fallback to PK if no replica identity */
320 1176 : if (idkey == NULL)
321 : {
322 418 : idkey = RelationGetIndexAttrBitmap(entry->localrel,
323 : INDEX_ATTR_BITMAP_PRIMARY_KEY);
324 :
325 : /*
326 : * If no replica identity index and no PK, the published table must
327 : * have replica identity FULL.
328 : */
329 418 : if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL)
330 266 : entry->updatable = false;
331 : }
332 :
333 1176 : i = -1;
334 1936 : while ((i = bms_next_member(idkey, i)) >= 0)
335 : {
336 792 : int attnum = i + FirstLowInvalidHeapAttributeNumber;
337 :
338 792 : if (!AttrNumberIsForUserDefinedAttr(attnum))
339 0 : ereport(ERROR,
340 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
341 : errmsg("logical replication target relation \"%s.%s\" uses "
342 : "system columns in REPLICA IDENTITY index",
343 : remoterel->nspname, remoterel->relname)));
344 :
345 792 : attnum = AttrNumberGetAttrOffset(attnum);
346 :
347 792 : if (entry->attrmap->attnums[attnum] < 0 ||
348 790 : !bms_is_member(entry->attrmap->attnums[attnum], remoterel->attkeys))
349 : {
350 32 : entry->updatable = false;
351 32 : break;
352 : }
353 : }
354 1176 : }
355 :
356 : /*
357 : * Open the local relation associated with the remote one.
358 : *
359 : * Rebuilds the Relcache mapping if it was invalidated by local DDL.
360 : */
361 : LogicalRepRelMapEntry *
362 297702 : logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode)
363 : {
364 : LogicalRepRelMapEntry *entry;
365 : bool found;
366 : LogicalRepRelation *remoterel;
367 :
368 297702 : if (LogicalRepRelMap == NULL)
369 0 : logicalrep_relmap_init();
370 :
371 : /* Search for existing entry. */
372 297702 : entry = hash_search(LogicalRepRelMap, &remoteid,
373 : HASH_FIND, &found);
374 :
375 297702 : if (!found)
376 0 : elog(ERROR, "no relation map entry for remote relation ID %u",
377 : remoteid);
378 :
379 297702 : remoterel = &entry->remoterel;
380 :
381 : /* Ensure we don't leak a relcache refcount. */
382 297702 : if (entry->localrel)
383 0 : elog(ERROR, "remote relation ID %u is already open", remoteid);
384 :
385 : /*
386 : * When opening and locking a relation, pending invalidation messages are
387 : * processed which can invalidate the relation. Hence, if the entry is
388 : * currently considered valid, try to open the local relation by OID and
389 : * see if invalidation ensues.
390 : */
391 297702 : if (entry->localrelvalid)
392 : {
393 296538 : entry->localrel = try_table_open(entry->localreloid, lockmode);
394 296538 : if (!entry->localrel)
395 : {
396 : /* Table was renamed or dropped. */
397 0 : entry->localrelvalid = false;
398 : }
399 296538 : else if (!entry->localrelvalid)
400 : {
401 : /* Note we release the no-longer-useful lock here. */
402 0 : table_close(entry->localrel, lockmode);
403 0 : entry->localrel = NULL;
404 : }
405 : }
406 :
407 : /*
408 : * If the entry has been marked invalid since we last had lock on it,
409 : * re-open the local relation by name and rebuild all derived data.
410 : */
411 297702 : if (!entry->localrelvalid)
412 : {
413 : Oid relid;
414 : TupleDesc desc;
415 : MemoryContext oldctx;
416 : int i;
417 : Bitmapset *missingatts;
418 1164 : Bitmapset *generatedattrs = NULL;
419 :
420 : /* Release the no-longer-useful attrmap, if any. */
421 1164 : if (entry->attrmap)
422 : {
423 26 : free_attrmap(entry->attrmap);
424 26 : entry->attrmap = NULL;
425 : }
426 :
427 : /* Try to find and lock the relation by name. */
428 1164 : relid = RangeVarGetRelid(makeRangeVar(remoterel->nspname,
429 : remoterel->relname, -1),
430 : lockmode, true);
431 1164 : if (!OidIsValid(relid))
432 14 : ereport(ERROR,
433 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
434 : errmsg("logical replication target relation \"%s.%s\" does not exist",
435 : remoterel->nspname, remoterel->relname)));
436 1150 : entry->localrel = table_open(relid, NoLock);
437 1150 : entry->localreloid = relid;
438 :
439 : /* Check for supported relkind. */
440 1150 : CheckSubscriptionRelkind(entry->localrel->rd_rel->relkind,
441 1150 : remoterel->relkind,
442 1150 : remoterel->nspname, remoterel->relname);
443 :
444 : /*
445 : * Build the mapping of local attribute numbers to remote attribute
446 : * numbers and validate that we don't miss any replicated columns as
447 : * that would result in potentially unwanted data loss.
448 : */
449 1150 : desc = RelationGetDescr(entry->localrel);
450 1150 : oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
451 1150 : entry->attrmap = make_attrmap(desc->natts);
452 1150 : MemoryContextSwitchTo(oldctx);
453 :
454 : /* check and report missing attrs, if any */
455 1150 : missingatts = bms_add_range(NULL, 0, remoterel->natts - 1);
456 3808 : for (i = 0; i < desc->natts; i++)
457 : {
458 : int attnum;
459 2658 : Form_pg_attribute attr = TupleDescAttr(desc, i);
460 :
461 2658 : if (attr->attisdropped)
462 : {
463 4 : entry->attrmap->attnums[i] = -1;
464 4 : continue;
465 : }
466 :
467 2654 : attnum = logicalrep_rel_att_by_name(remoterel,
468 2654 : NameStr(attr->attname));
469 :
470 2654 : entry->attrmap->attnums[i] = attnum;
471 2654 : if (attnum >= 0)
472 : {
473 : /* Remember which subscriber columns are generated. */
474 2092 : if (attr->attgenerated)
475 4 : generatedattrs = bms_add_member(generatedattrs, attnum);
476 :
477 2092 : missingatts = bms_del_member(missingatts, attnum);
478 : }
479 : }
480 :
481 1150 : logicalrep_report_missing_or_gen_attrs(remoterel, missingatts,
482 : generatedattrs);
483 :
484 : /* be tidy */
485 1146 : bms_free(generatedattrs);
486 1146 : bms_free(missingatts);
487 :
488 : /*
489 : * Set if the table's replica identity is enough to apply
490 : * update/delete.
491 : */
492 1146 : logicalrep_rel_mark_updatable(entry);
493 :
494 : /*
495 : * Finding a usable index is an infrequent task. It occurs when an
496 : * operation is first performed on the relation, or after invalidation
497 : * of the relation cache entry (such as ANALYZE or CREATE/DROP index
498 : * on the relation).
499 : */
500 1146 : entry->localindexoid = FindLogicalRepLocalIndex(entry->localrel, remoterel,
501 : entry->attrmap);
502 :
503 1146 : entry->localrelvalid = true;
504 : }
505 :
506 297684 : if (entry->state != SUBREL_STATE_READY)
507 1228 : entry->state = GetSubscriptionRelState(MySubscription->oid,
508 : entry->localreloid,
509 : &entry->statelsn);
510 :
511 297684 : return entry;
512 : }
513 :
514 : /*
515 : * Close the previously opened logical relation.
516 : */
517 : void
518 297570 : logicalrep_rel_close(LogicalRepRelMapEntry *rel, LOCKMODE lockmode)
519 : {
520 297570 : table_close(rel->localrel, lockmode);
521 297570 : rel->localrel = NULL;
522 297570 : }
523 :
524 : /*
525 : * Partition cache: look up partition LogicalRepRelMapEntry's
526 : *
527 : * Unlike relation map cache, this is keyed by partition OID, not remote
528 : * relation OID, because we only have to use this cache in the case where
529 : * partitions are not directly mapped to any remote relation, such as when
530 : * replication is occurring with one of their ancestors as target.
531 : */
532 :
533 : /*
534 : * Relcache invalidation callback
535 : */
536 : static void
537 576 : logicalrep_partmap_invalidate_cb(Datum arg, Oid reloid)
538 : {
539 : LogicalRepPartMapEntry *entry;
540 :
541 : /* Just to be sure. */
542 576 : if (LogicalRepPartMap == NULL)
543 0 : return;
544 :
545 576 : if (reloid != InvalidOid)
546 : {
547 : HASH_SEQ_STATUS status;
548 :
549 576 : hash_seq_init(&status, LogicalRepPartMap);
550 :
551 : /* TODO, use inverse lookup hashtable? */
552 1644 : while ((entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
553 : {
554 1080 : if (entry->relmapentry.localreloid == reloid)
555 : {
556 12 : entry->relmapentry.localrelvalid = false;
557 12 : hash_seq_term(&status);
558 12 : break;
559 : }
560 : }
561 : }
562 : else
563 : {
564 : /* invalidate all cache entries */
565 : HASH_SEQ_STATUS status;
566 :
567 0 : hash_seq_init(&status, LogicalRepPartMap);
568 :
569 0 : while ((entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
570 0 : entry->relmapentry.localrelvalid = false;
571 : }
572 : }
573 :
574 : /*
575 : * Reset the entries in the partition map that refer to remoterel.
576 : *
577 : * Called when new relation mapping is sent by the publisher to update our
578 : * expected view of incoming data from said publisher.
579 : *
580 : * Note that we don't update the remoterel information in the entry here,
581 : * we will update the information in logicalrep_partition_open to avoid
582 : * unnecessary work.
583 : */
584 : void
585 880 : logicalrep_partmap_reset_relmap(LogicalRepRelation *remoterel)
586 : {
587 : HASH_SEQ_STATUS status;
588 : LogicalRepPartMapEntry *part_entry;
589 : LogicalRepRelMapEntry *entry;
590 :
591 880 : if (LogicalRepPartMap == NULL)
592 812 : return;
593 :
594 68 : hash_seq_init(&status, LogicalRepPartMap);
595 174 : while ((part_entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
596 : {
597 106 : entry = &part_entry->relmapentry;
598 :
599 106 : if (entry->remoterel.remoteid != remoterel->remoteid)
600 90 : continue;
601 :
602 16 : logicalrep_relmap_free_entry(entry);
603 :
604 16 : memset(entry, 0, sizeof(LogicalRepRelMapEntry));
605 : }
606 : }
607 :
608 : /*
609 : * Initialize the partition map cache.
610 : */
611 : static void
612 12 : logicalrep_partmap_init(void)
613 : {
614 : HASHCTL ctl;
615 :
616 12 : if (!LogicalRepPartMapContext)
617 12 : LogicalRepPartMapContext =
618 12 : AllocSetContextCreate(CacheMemoryContext,
619 : "LogicalRepPartMapContext",
620 : ALLOCSET_DEFAULT_SIZES);
621 :
622 : /* Initialize the relation hash table. */
623 12 : ctl.keysize = sizeof(Oid); /* partition OID */
624 12 : ctl.entrysize = sizeof(LogicalRepPartMapEntry);
625 12 : ctl.hcxt = LogicalRepPartMapContext;
626 :
627 12 : LogicalRepPartMap = hash_create("logicalrep partition map cache", 64, &ctl,
628 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
629 :
630 : /* Watch for invalidation events. */
631 12 : CacheRegisterRelcacheCallback(logicalrep_partmap_invalidate_cb,
632 : (Datum) 0);
633 12 : }
634 :
635 : /*
636 : * logicalrep_partition_open
637 : *
638 : * Returned entry reuses most of the values of the root table's entry, save
639 : * the attribute map, which can be different for the partition. However,
640 : * we must physically copy all the data, in case the root table's entry
641 : * gets freed/rebuilt.
642 : *
643 : * Note there's no logicalrep_partition_close, because the caller closes the
644 : * component relation.
645 : */
646 : LogicalRepRelMapEntry *
647 60 : logicalrep_partition_open(LogicalRepRelMapEntry *root,
648 : Relation partrel, AttrMap *map)
649 : {
650 : LogicalRepRelMapEntry *entry;
651 : LogicalRepPartMapEntry *part_entry;
652 60 : LogicalRepRelation *remoterel = &root->remoterel;
653 60 : Oid partOid = RelationGetRelid(partrel);
654 60 : AttrMap *attrmap = root->attrmap;
655 : bool found;
656 : MemoryContext oldctx;
657 :
658 60 : if (LogicalRepPartMap == NULL)
659 12 : logicalrep_partmap_init();
660 :
661 : /* Search for existing entry. */
662 60 : part_entry = (LogicalRepPartMapEntry *) hash_search(LogicalRepPartMap,
663 : &partOid,
664 : HASH_ENTER, &found);
665 :
666 60 : entry = &part_entry->relmapentry;
667 :
668 : /*
669 : * We must always overwrite entry->localrel with the latest partition
670 : * Relation pointer, because the Relation pointed to by the old value may
671 : * have been cleared after the caller would have closed the partition
672 : * relation after the last use of this entry. Note that localrelvalid is
673 : * only updated by the relcache invalidation callback, so it may still be
674 : * true irrespective of whether the Relation pointed to by localrel has
675 : * been cleared or not.
676 : */
677 60 : if (found && entry->localrelvalid)
678 : {
679 30 : entry->localrel = partrel;
680 30 : return entry;
681 : }
682 :
683 : /* Switch to longer-lived context. */
684 30 : oldctx = MemoryContextSwitchTo(LogicalRepPartMapContext);
685 :
686 30 : if (!found)
687 : {
688 18 : memset(part_entry, 0, sizeof(LogicalRepPartMapEntry));
689 18 : part_entry->partoid = partOid;
690 : }
691 :
692 : /* Release the no-longer-useful attrmap, if any. */
693 30 : if (entry->attrmap)
694 : {
695 2 : free_attrmap(entry->attrmap);
696 2 : entry->attrmap = NULL;
697 : }
698 :
699 30 : if (!entry->remoterel.remoteid)
700 : {
701 : int i;
702 :
703 : /* Remote relation is copied as-is from the root entry. */
704 28 : entry->remoterel.remoteid = remoterel->remoteid;
705 28 : entry->remoterel.nspname = pstrdup(remoterel->nspname);
706 28 : entry->remoterel.relname = pstrdup(remoterel->relname);
707 28 : entry->remoterel.natts = remoterel->natts;
708 28 : entry->remoterel.attnames = palloc_array(char *, remoterel->natts);
709 28 : entry->remoterel.atttyps = palloc_array(Oid, remoterel->natts);
710 88 : for (i = 0; i < remoterel->natts; i++)
711 : {
712 60 : entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]);
713 60 : entry->remoterel.atttyps[i] = remoterel->atttyps[i];
714 : }
715 28 : entry->remoterel.replident = remoterel->replident;
716 28 : entry->remoterel.attkeys = bms_copy(remoterel->attkeys);
717 : }
718 :
719 30 : entry->localrel = partrel;
720 30 : entry->localreloid = partOid;
721 :
722 : /*
723 : * If the partition's attributes don't match the root relation's, we'll
724 : * need to make a new attrmap which maps partition attribute numbers to
725 : * remoterel's, instead of the original which maps root relation's
726 : * attribute numbers to remoterel's.
727 : *
728 : * Note that 'map' which comes from the tuple routing data structure
729 : * contains 1-based attribute numbers (of the parent relation). However,
730 : * the map in 'entry', a logical replication data structure, contains
731 : * 0-based attribute numbers (of the remote relation).
732 : */
733 30 : if (map)
734 : {
735 : AttrNumber attno;
736 :
737 16 : entry->attrmap = make_attrmap(map->maplen);
738 68 : for (attno = 0; attno < entry->attrmap->maplen; attno++)
739 : {
740 52 : AttrNumber root_attno = map->attnums[attno];
741 :
742 : /* 0 means it's a dropped attribute. See comments atop AttrMap. */
743 52 : if (root_attno == 0)
744 4 : entry->attrmap->attnums[attno] = -1;
745 : else
746 48 : entry->attrmap->attnums[attno] = attrmap->attnums[root_attno - 1];
747 : }
748 : }
749 : else
750 : {
751 : /* Lacking copy_attmap, do this the hard way. */
752 14 : entry->attrmap = make_attrmap(attrmap->maplen);
753 14 : memcpy(entry->attrmap->attnums, attrmap->attnums,
754 14 : attrmap->maplen * sizeof(AttrNumber));
755 : }
756 :
757 : /* Set if the table's replica identity is enough to apply update/delete. */
758 30 : logicalrep_rel_mark_updatable(entry);
759 :
760 : /* state and statelsn are left set to 0. */
761 30 : MemoryContextSwitchTo(oldctx);
762 :
763 : /*
764 : * Finding a usable index is an infrequent task. It occurs when an
765 : * operation is first performed on the relation, or after invalidation of
766 : * the relation cache entry (such as ANALYZE or CREATE/DROP index on the
767 : * relation).
768 : *
769 : * We also prefer to run this code on the oldctx so that we do not leak
770 : * anything in the LogicalRepPartMapContext (hence CacheMemoryContext).
771 : */
772 30 : entry->localindexoid = FindLogicalRepLocalIndex(partrel, remoterel,
773 : entry->attrmap);
774 :
775 30 : entry->localrelvalid = true;
776 :
777 30 : return entry;
778 : }
779 :
780 : /*
781 : * Returns the oid of an index that can be used by the apply worker to scan
782 : * the relation.
783 : *
784 : * We expect to call this function when REPLICA IDENTITY FULL is defined for
785 : * the remote relation.
786 : *
787 : * If no suitable index is found, returns InvalidOid.
788 : */
789 : static Oid
790 128 : FindUsableIndexForReplicaIdentityFull(Relation localrel, AttrMap *attrmap)
791 : {
792 128 : List *idxlist = RelationGetIndexList(localrel);
793 :
794 232 : foreach_oid(idxoid, idxlist)
795 : {
796 : bool isUsableIdx;
797 : Relation idxRel;
798 :
799 40 : idxRel = index_open(idxoid, AccessShareLock);
800 40 : isUsableIdx = IsIndexUsableForReplicaIdentityFull(idxRel, attrmap);
801 40 : index_close(idxRel, AccessShareLock);
802 :
803 : /* Return the first eligible index found */
804 40 : if (isUsableIdx)
805 32 : return idxoid;
806 : }
807 :
808 96 : return InvalidOid;
809 : }
810 :
811 : /*
812 : * Returns true if the index is usable for replica identity full.
813 : *
814 : * The index must have an equal strategy for each key column, be non-partial,
815 : * and the leftmost field must be a column (not an expression) that references
816 : * the remote relation column. These limitations help to keep the index scan
817 : * similar to PK/RI index scans.
818 : *
819 : * attrmap is a map of local attributes to remote ones. We can consult this
820 : * map to check whether the local index attribute has a corresponding remote
821 : * attribute.
822 : *
823 : * Note that the limitations of index scans for replica identity full only
824 : * adheres to a subset of the limitations of PK/RI. For example, we support
825 : * columns that are marked as [NULL] or we are not interested in the [NOT
826 : * DEFERRABLE] aspect of constraints here. It works for us because we always
827 : * compare the tuples for non-PK/RI index scans. See
828 : * RelationFindReplTupleByIndex().
829 : *
830 : * XXX: To support partial indexes, the required changes are likely to be larger.
831 : * If none of the tuples satisfy the expression for the index scan, we fall-back
832 : * to sequential execution, which might not be a good idea in some cases.
833 : */
834 : bool
835 40 : IsIndexUsableForReplicaIdentityFull(Relation idxrel, AttrMap *attrmap)
836 : {
837 : AttrNumber keycol;
838 : oidvector *indclass;
839 :
840 : /* The index must not be a partial index */
841 40 : if (!heap_attisnull(idxrel->rd_indextuple, Anum_pg_index_indpred, NULL))
842 4 : return false;
843 :
844 : Assert(idxrel->rd_index->indnatts >= 1);
845 :
846 36 : indclass = (oidvector *) DatumGetPointer(SysCacheGetAttrNotNull(INDEXRELID,
847 36 : idxrel->rd_indextuple,
848 : Anum_pg_index_indclass));
849 :
850 : /* Ensure that the index has a valid equal strategy for each key column */
851 104 : for (int i = 0; i < idxrel->rd_index->indnkeyatts; i++)
852 : {
853 : Oid opfamily;
854 :
855 68 : opfamily = get_opclass_family(indclass->values[i]);
856 68 : if (IndexAmTranslateCompareType(COMPARE_EQ, idxrel->rd_rel->relam, opfamily, true) == InvalidStrategy)
857 0 : return false;
858 : }
859 :
860 : /*
861 : * For indexes other than PK and REPLICA IDENTITY, we need to match the
862 : * local and remote tuples. The equality routine tuples_equal() cannot
863 : * accept a data type where the type cache cannot provide an equality
864 : * operator.
865 : */
866 104 : for (int i = 0; i < idxrel->rd_att->natts; i++)
867 : {
868 : TypeCacheEntry *typentry;
869 :
870 68 : typentry = lookup_type_cache(TupleDescAttr(idxrel->rd_att, i)->atttypid, TYPECACHE_EQ_OPR_FINFO);
871 68 : if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
872 0 : return false;
873 : }
874 :
875 : /* The leftmost index field must not be an expression */
876 36 : keycol = idxrel->rd_index->indkey.values[0];
877 36 : if (!AttributeNumberIsValid(keycol))
878 4 : return false;
879 :
880 : /*
881 : * And the leftmost index field must reference the remote relation column.
882 : * This is because if it doesn't, the sequential scan is favorable over
883 : * index scan in most cases.
884 : */
885 32 : if (attrmap->maplen <= AttrNumberGetAttrOffset(keycol) ||
886 32 : attrmap->attnums[AttrNumberGetAttrOffset(keycol)] < 0)
887 0 : return false;
888 :
889 : /*
890 : * The given index access method must implement "amgettuple", which will
891 : * be used later to fetch the tuples. See RelationFindReplTupleByIndex().
892 : */
893 32 : if (GetIndexAmRoutineByAmId(idxrel->rd_rel->relam, false)->amgettuple == NULL)
894 0 : return false;
895 :
896 32 : return true;
897 : }
898 :
899 : /*
900 : * Return the OID of the replica identity index if one is defined;
901 : * the OID of the PK if one exists and is not deferrable;
902 : * otherwise, InvalidOid.
903 : */
904 : Oid
905 145296 : GetRelationIdentityOrPK(Relation rel)
906 : {
907 : Oid idxoid;
908 :
909 145296 : idxoid = RelationGetReplicaIndex(rel);
910 :
911 145296 : if (!OidIsValid(idxoid))
912 446 : idxoid = RelationGetPrimaryKeyIndex(rel, false);
913 :
914 145296 : return idxoid;
915 : }
916 :
917 : /*
918 : * Returns the index oid if we can use an index for subscriber. Otherwise,
919 : * returns InvalidOid.
920 : */
921 : static Oid
922 1176 : FindLogicalRepLocalIndex(Relation localrel, LogicalRepRelation *remoterel,
923 : AttrMap *attrMap)
924 : {
925 : Oid idxoid;
926 :
927 : /*
928 : * We never need index oid for partitioned tables, always rely on leaf
929 : * partition's index.
930 : */
931 1176 : if (localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
932 148 : return InvalidOid;
933 :
934 : /*
935 : * Simple case, we already have a primary key or a replica identity index.
936 : */
937 1028 : idxoid = GetRelationIdentityOrPK(localrel);
938 1028 : if (OidIsValid(idxoid))
939 662 : return idxoid;
940 :
941 366 : if (remoterel->replident == REPLICA_IDENTITY_FULL)
942 : {
943 : /*
944 : * We are looking for one more opportunity for using an index. If
945 : * there are any indexes defined on the local relation, try to pick a
946 : * suitable index.
947 : *
948 : * The index selection safely assumes that all the columns are going
949 : * to be available for the index scan given that remote relation has
950 : * replica identity full.
951 : *
952 : * Note that we are not using the planner to find the cheapest method
953 : * to scan the relation as that would require us to either use lower
954 : * level planner functions which would be a maintenance burden in the
955 : * long run or use the full-fledged planner which could cause
956 : * overhead.
957 : */
958 128 : return FindUsableIndexForReplicaIdentityFull(localrel, attrMap);
959 : }
960 :
961 238 : return InvalidOid;
962 : }
|