LCOV - code coverage report
Current view: top level - src/backend/utils/cache - relmapper.c (source / functions) Hit Total Coverage
Test: PostgreSQL 16beta1 Lines: 249 278 89.6 %
Date: 2023-05-31 00:12:04 Functions: 26 26 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * relmapper.c
       4             :  *    Catalog-to-filenumber mapping
       5             :  *
       6             :  * For most tables, the physical file underlying the table is specified by
       7             :  * pg_class.relfilenode.  However, that obviously won't work for pg_class
       8             :  * itself, nor for the other "nailed" catalogs for which we have to be able
       9             :  * to set up working Relation entries without access to pg_class.  It also
      10             :  * does not work for shared catalogs, since there is no practical way to
      11             :  * update other databases' pg_class entries when relocating a shared catalog.
      12             :  * Therefore, for these special catalogs (henceforth referred to as "mapped
      13             :  * catalogs") we rely on a separately maintained file that shows the mapping
      14             :  * from catalog OIDs to filenumbers.  Each database has a map file for
      15             :  * its local mapped catalogs, and there is a separate map file for shared
      16             :  * catalogs.  Mapped catalogs have zero in their pg_class.relfilenode entries.
      17             :  *
      18             :  * Relocation of a normal table is committed (ie, the new physical file becomes
      19             :  * authoritative) when the pg_class row update commits.  For mapped catalogs,
      20             :  * the act of updating the map file is effectively commit of the relocation.
      21             :  * We postpone the file update till just before commit of the transaction
      22             :  * doing the rewrite, but there is necessarily a window between.  Therefore
      23             :  * mapped catalogs can only be relocated by operations such as VACUUM FULL
      24             :  * and CLUSTER, which make no transactionally-significant changes: it must be
      25             :  * safe for the new file to replace the old, even if the transaction itself
      26             :  * aborts.  An important factor here is that the indexes and toast table of
      27             :  * a mapped catalog must also be mapped, so that the rewrites/relocations of
      28             :  * all these files commit in a single map file update rather than being tied
      29             :  * to transaction commit.
      30             :  *
      31             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      32             :  * Portions Copyright (c) 1994, Regents of the University of California
      33             :  *
      34             :  *
      35             :  * IDENTIFICATION
      36             :  *    src/backend/utils/cache/relmapper.c
      37             :  *
      38             :  *-------------------------------------------------------------------------
      39             :  */
      40             : #include "postgres.h"
      41             : 
      42             : #include <fcntl.h>
      43             : #include <sys/stat.h>
      44             : #include <unistd.h>
      45             : 
      46             : #include "access/xact.h"
      47             : #include "access/xlog.h"
      48             : #include "access/xloginsert.h"
      49             : #include "catalog/catalog.h"
      50             : #include "catalog/pg_tablespace.h"
      51             : #include "catalog/storage.h"
      52             : #include "miscadmin.h"
      53             : #include "pgstat.h"
      54             : #include "storage/fd.h"
      55             : #include "storage/lwlock.h"
      56             : #include "utils/inval.h"
      57             : #include "utils/relmapper.h"
      58             : 
      59             : 
      60             : /*
      61             :  * The map file is critical data: we have no automatic method for recovering
      62             :  * from loss or corruption of it.  We use a CRC so that we can detect
      63             :  * corruption.  Since the file might be more than one standard-size disk
      64             :  * sector in size, we cannot rely on overwrite-in-place. Instead, we generate
      65             :  * a new file and rename it into place, atomically replacing the original file.
      66             :  *
      67             :  * Entries in the mappings[] array are in no particular order.  We could
      68             :  * speed searching by insisting on OID order, but it really shouldn't be
      69             :  * worth the trouble given the intended size of the mapping sets.
      70             :  */
      71             : #define RELMAPPER_FILENAME      "pg_filenode.map"
      72             : #define RELMAPPER_TEMP_FILENAME "pg_filenode.map.tmp"
      73             : 
      74             : #define RELMAPPER_FILEMAGIC     0x592717    /* version ID value */
      75             : 
      76             : /*
      77             :  * There's no need for this constant to have any particular value, and we
      78             :  * can raise it as necessary if we end up with more mapped relations. For
      79             :  * now, we just pick a round number that is modestly larger than the expected
      80             :  * number of mappings.
      81             :  */
      82             : #define MAX_MAPPINGS            64
      83             : 
      84             : typedef struct RelMapping
      85             : {
      86             :     Oid         mapoid;         /* OID of a catalog */
      87             :     RelFileNumber mapfilenumber;    /* its rel file number */
      88             : } RelMapping;
      89             : 
      90             : typedef struct RelMapFile
      91             : {
      92             :     int32       magic;          /* always RELMAPPER_FILEMAGIC */
      93             :     int32       num_mappings;   /* number of valid RelMapping entries */
      94             :     RelMapping  mappings[MAX_MAPPINGS];
      95             :     pg_crc32c   crc;            /* CRC of all above */
      96             : } RelMapFile;
      97             : 
      98             : /*
      99             :  * State for serializing local and shared relmappings for parallel workers
     100             :  * (active states only).  See notes on active_* and pending_* updates state.
     101             :  */
     102             : typedef struct SerializedActiveRelMaps
     103             : {
     104             :     RelMapFile  active_shared_updates;
     105             :     RelMapFile  active_local_updates;
     106             : } SerializedActiveRelMaps;
     107             : 
     108             : /*
     109             :  * The currently known contents of the shared map file and our database's
     110             :  * local map file are stored here.  These can be reloaded from disk
     111             :  * immediately whenever we receive an update sinval message.
     112             :  */
     113             : static RelMapFile shared_map;
     114             : static RelMapFile local_map;
     115             : 
     116             : /*
     117             :  * We use the same RelMapFile data structure to track uncommitted local
     118             :  * changes in the mappings (but note the magic and crc fields are not made
     119             :  * valid in these variables).  Currently, map updates are not allowed within
     120             :  * subtransactions, so one set of transaction-level changes is sufficient.
     121             :  *
     122             :  * The active_xxx variables contain updates that are valid in our transaction
     123             :  * and should be honored by RelationMapOidToFilenumber.  The pending_xxx
     124             :  * variables contain updates we have been told about that aren't active yet;
     125             :  * they will become active at the next CommandCounterIncrement.  This setup
     126             :  * lets map updates act similarly to updates of pg_class rows, ie, they
     127             :  * become visible only at the next CommandCounterIncrement boundary.
     128             :  *
     129             :  * Active shared and active local updates are serialized by the parallel
     130             :  * infrastructure, and deserialized within parallel workers.
     131             :  */
     132             : static RelMapFile active_shared_updates;
     133             : static RelMapFile active_local_updates;
     134             : static RelMapFile pending_shared_updates;
     135             : static RelMapFile pending_local_updates;
     136             : 
     137             : 
     138             : /* non-export function prototypes */
     139             : static void apply_map_update(RelMapFile *map, Oid relationId,
     140             :                              RelFileNumber fileNumber, bool add_okay);
     141             : static void merge_map_updates(RelMapFile *map, const RelMapFile *updates,
     142             :                               bool add_okay);
     143             : static void load_relmap_file(bool shared, bool lock_held);
     144             : static void read_relmap_file(RelMapFile *map, char *dbpath, bool lock_held,
     145             :                              int elevel);
     146             : static void write_relmap_file(RelMapFile *newmap, bool write_wal,
     147             :                               bool send_sinval, bool preserve_files,
     148             :                               Oid dbid, Oid tsid, const char *dbpath);
     149             : static void perform_relmap_update(bool shared, const RelMapFile *updates);
     150             : 
     151             : 
     152             : /*
     153             :  * RelationMapOidToFilenumber
     154             :  *
     155             :  * The raison d' etre ... given a relation OID, look up its filenumber.
     156             :  *
     157             :  * Although shared and local relation OIDs should never overlap, the caller
     158             :  * always knows which we need --- so pass that information to avoid useless
     159             :  * searching.
     160             :  *
     161             :  * Returns InvalidRelFileNumber if the OID is not known (which should never
     162             :  * happen, but the caller is in a better position to report a meaningful
     163             :  * error).
     164             :  */
     165             : RelFileNumber
     166     1899872 : RelationMapOidToFilenumber(Oid relationId, bool shared)
     167             : {
     168             :     const RelMapFile *map;
     169             :     int32       i;
     170             : 
     171             :     /* If there are active updates, believe those over the main maps */
     172     1899872 :     if (shared)
     173             :     {
     174      780354 :         map = &active_shared_updates;
     175      782052 :         for (i = 0; i < map->num_mappings; i++)
     176             :         {
     177        2486 :             if (relationId == map->mappings[i].mapoid)
     178         788 :                 return map->mappings[i].mapfilenumber;
     179             :         }
     180      779566 :         map = &shared_map;
     181    19122988 :         for (i = 0; i < map->num_mappings; i++)
     182             :         {
     183    19122988 :             if (relationId == map->mappings[i].mapoid)
     184      779566 :                 return map->mappings[i].mapfilenumber;
     185             :         }
     186             :     }
     187             :     else
     188             :     {
     189     1119518 :         map = &active_local_updates;
     190     1122176 :         for (i = 0; i < map->num_mappings; i++)
     191             :         {
     192        4192 :             if (relationId == map->mappings[i].mapoid)
     193        1534 :                 return map->mappings[i].mapfilenumber;
     194             :         }
     195     1117984 :         map = &local_map;
     196     4375472 :         for (i = 0; i < map->num_mappings; i++)
     197             :         {
     198     4375472 :             if (relationId == map->mappings[i].mapoid)
     199     1117984 :                 return map->mappings[i].mapfilenumber;
     200             :         }
     201             :     }
     202             : 
     203           0 :     return InvalidRelFileNumber;
     204             : }
     205             : 
     206             : /*
     207             :  * RelationMapFilenumberToOid
     208             :  *
     209             :  * Do the reverse of the normal direction of mapping done in
     210             :  * RelationMapOidToFilenumber.
     211             :  *
     212             :  * This is not supposed to be used during normal running but rather for
     213             :  * information purposes when looking at the filesystem or xlog.
     214             :  *
     215             :  * Returns InvalidOid if the OID is not known; this can easily happen if the
     216             :  * relfilenumber doesn't pertain to a mapped relation.
     217             :  */
     218             : Oid
     219        1028 : RelationMapFilenumberToOid(RelFileNumber filenumber, bool shared)
     220             : {
     221             :     const RelMapFile *map;
     222             :     int32       i;
     223             : 
     224             :     /* If there are active updates, believe those over the main maps */
     225        1028 :     if (shared)
     226             :     {
     227         314 :         map = &active_shared_updates;
     228         314 :         for (i = 0; i < map->num_mappings; i++)
     229             :         {
     230           0 :             if (filenumber == map->mappings[i].mapfilenumber)
     231           0 :                 return map->mappings[i].mapoid;
     232             :         }
     233         314 :         map = &shared_map;
     234        7988 :         for (i = 0; i < map->num_mappings; i++)
     235             :         {
     236        7988 :             if (filenumber == map->mappings[i].mapfilenumber)
     237         314 :                 return map->mappings[i].mapoid;
     238             :         }
     239             :     }
     240             :     else
     241             :     {
     242         714 :         map = &active_local_updates;
     243         714 :         for (i = 0; i < map->num_mappings; i++)
     244             :         {
     245           0 :             if (filenumber == map->mappings[i].mapfilenumber)
     246           0 :                 return map->mappings[i].mapoid;
     247             :         }
     248         714 :         map = &local_map;
     249        4006 :         for (i = 0; i < map->num_mappings; i++)
     250             :         {
     251        3900 :             if (filenumber == map->mappings[i].mapfilenumber)
     252         608 :                 return map->mappings[i].mapoid;
     253             :         }
     254             :     }
     255             : 
     256         106 :     return InvalidOid;
     257             : }
     258             : 
     259             : /*
     260             :  * RelationMapOidToFilenumberForDatabase
     261             :  *
     262             :  * Like RelationMapOidToFilenumber, but reads the mapping from the indicated
     263             :  * path instead of using the one for the current database.
     264             :  */
     265             : RelFileNumber
     266        6624 : RelationMapOidToFilenumberForDatabase(char *dbpath, Oid relationId)
     267             : {
     268             :     RelMapFile  map;
     269             :     int         i;
     270             : 
     271             :     /* Read the relmap file from the source database. */
     272        6624 :     read_relmap_file(&map, dbpath, false, ERROR);
     273             : 
     274             :     /* Iterate over the relmap entries to find the input relation OID. */
     275       56672 :     for (i = 0; i < map.num_mappings; i++)
     276             :     {
     277       56672 :         if (relationId == map.mappings[i].mapoid)
     278        6624 :             return map.mappings[i].mapfilenumber;
     279             :     }
     280             : 
     281           0 :     return InvalidRelFileNumber;
     282             : }
     283             : 
     284             : /*
     285             :  * RelationMapCopy
     286             :  *
     287             :  * Copy relmapfile from source db path to the destination db path and WAL log
     288             :  * the operation. This is intended for use in creating a new relmap file
     289             :  * for a database that doesn't have one yet, not for replacing an existing
     290             :  * relmap file.
     291             :  */
     292             : void
     293         368 : RelationMapCopy(Oid dbid, Oid tsid, char *srcdbpath, char *dstdbpath)
     294             : {
     295             :     RelMapFile  map;
     296             : 
     297             :     /*
     298             :      * Read the relmap file from the source database.
     299             :      */
     300         368 :     read_relmap_file(&map, srcdbpath, false, ERROR);
     301             : 
     302             :     /*
     303             :      * Write the same data into the destination database's relmap file.
     304             :      *
     305             :      * No sinval is needed because no one can be connected to the destination
     306             :      * database yet. For the same reason, there is no need to acquire
     307             :      * RelationMappingLock.
     308             :      *
     309             :      * There's no point in trying to preserve files here. The new database
     310             :      * isn't usable yet anyway, and won't ever be if we can't install a relmap
     311             :      * file.
     312             :      */
     313         368 :     write_relmap_file(&map, true, false, false, dbid, tsid, dstdbpath);
     314         368 : }
     315             : 
     316             : /*
     317             :  * RelationMapUpdateMap
     318             :  *
     319             :  * Install a new relfilenumber mapping for the specified relation.
     320             :  *
     321             :  * If immediate is true (or we're bootstrapping), the mapping is activated
     322             :  * immediately.  Otherwise it is made pending until CommandCounterIncrement.
     323             :  */
     324             : void
     325       43918 : RelationMapUpdateMap(Oid relationId, RelFileNumber fileNumber, bool shared,
     326             :                      bool immediate)
     327             : {
     328             :     RelMapFile *map;
     329             : 
     330       43918 :     if (IsBootstrapProcessingMode())
     331             :     {
     332             :         /*
     333             :          * In bootstrap mode, the mapping gets installed in permanent map.
     334             :          */
     335       43026 :         if (shared)
     336       30300 :             map = &shared_map;
     337             :         else
     338       12726 :             map = &local_map;
     339             :     }
     340             :     else
     341             :     {
     342             :         /*
     343             :          * We don't currently support map changes within subtransactions, or
     344             :          * when in parallel mode.  This could be done with more bookkeeping
     345             :          * infrastructure, but it doesn't presently seem worth it.
     346             :          */
     347         892 :         if (GetCurrentTransactionNestLevel() > 1)
     348           0 :             elog(ERROR, "cannot change relation mapping within subtransaction");
     349             : 
     350         892 :         if (IsInParallelMode())
     351           0 :             elog(ERROR, "cannot change relation mapping in parallel mode");
     352             : 
     353         892 :         if (immediate)
     354             :         {
     355             :             /* Make it active, but only locally */
     356         156 :             if (shared)
     357           0 :                 map = &active_shared_updates;
     358             :             else
     359         156 :                 map = &active_local_updates;
     360             :         }
     361             :         else
     362             :         {
     363             :             /* Make it pending */
     364         736 :             if (shared)
     365         342 :                 map = &pending_shared_updates;
     366             :             else
     367         394 :                 map = &pending_local_updates;
     368             :         }
     369             :     }
     370       43918 :     apply_map_update(map, relationId, fileNumber, true);
     371       43918 : }
     372             : 
     373             : /*
     374             :  * apply_map_update
     375             :  *
     376             :  * Insert a new mapping into the given map variable, replacing any existing
     377             :  * mapping for the same relation.
     378             :  *
     379             :  * In some cases the caller knows there must be an existing mapping; pass
     380             :  * add_okay = false to draw an error if not.
     381             :  */
     382             : static void
     383       45202 : apply_map_update(RelMapFile *map, Oid relationId, RelFileNumber fileNumber,
     384             :                  bool add_okay)
     385             : {
     386             :     int32       i;
     387             : 
     388             :     /* Replace any existing mapping */
     389      886948 :     for (i = 0; i < map->num_mappings; i++)
     390             :     {
     391      844898 :         if (relationId == map->mappings[i].mapoid)
     392             :         {
     393        3152 :             map->mappings[i].mapfilenumber = fileNumber;
     394        3152 :             return;
     395             :         }
     396             :     }
     397             : 
     398             :     /* Nope, need to add a new mapping */
     399       42050 :     if (!add_okay)
     400           0 :         elog(ERROR, "attempt to apply a mapping to unmapped relation %u",
     401             :              relationId);
     402       42050 :     if (map->num_mappings >= MAX_MAPPINGS)
     403           0 :         elog(ERROR, "ran out of space in relation map");
     404       42050 :     map->mappings[map->num_mappings].mapoid = relationId;
     405       42050 :     map->mappings[map->num_mappings].mapfilenumber = fileNumber;
     406       42050 :     map->num_mappings++;
     407             : }
     408             : 
     409             : /*
     410             :  * merge_map_updates
     411             :  *
     412             :  * Merge all the updates in the given pending-update map into the target map.
     413             :  * This is just a bulk form of apply_map_update.
     414             :  */
     415             : static void
     416         692 : merge_map_updates(RelMapFile *map, const RelMapFile *updates, bool add_okay)
     417             : {
     418             :     int32       i;
     419             : 
     420        1976 :     for (i = 0; i < updates->num_mappings; i++)
     421             :     {
     422        1284 :         apply_map_update(map,
     423             :                          updates->mappings[i].mapoid,
     424             :                          updates->mappings[i].mapfilenumber,
     425             :                          add_okay);
     426             :     }
     427         692 : }
     428             : 
     429             : /*
     430             :  * RelationMapRemoveMapping
     431             :  *
     432             :  * Remove a relation's entry in the map.  This is only allowed for "active"
     433             :  * (but not committed) local mappings.  We need it so we can back out the
     434             :  * entry for the transient target file when doing VACUUM FULL/CLUSTER on
     435             :  * a mapped relation.
     436             :  */
     437             : void
     438         156 : RelationMapRemoveMapping(Oid relationId)
     439             : {
     440         156 :     RelMapFile *map = &active_local_updates;
     441             :     int32       i;
     442             : 
     443         246 :     for (i = 0; i < map->num_mappings; i++)
     444             :     {
     445         246 :         if (relationId == map->mappings[i].mapoid)
     446             :         {
     447             :             /* Found it, collapse it out */
     448         156 :             map->mappings[i] = map->mappings[map->num_mappings - 1];
     449         156 :             map->num_mappings--;
     450         156 :             return;
     451             :         }
     452             :     }
     453           0 :     elog(ERROR, "could not find temporary mapping for relation %u",
     454             :          relationId);
     455             : }
     456             : 
     457             : /*
     458             :  * RelationMapInvalidate
     459             :  *
     460             :  * This routine is invoked for SI cache flush messages.  We must re-read
     461             :  * the indicated map file.  However, we might receive a SI message in a
     462             :  * process that hasn't yet, and might never, load the mapping files;
     463             :  * for example the autovacuum launcher, which *must not* try to read
     464             :  * a local map since it is attached to no particular database.
     465             :  * So, re-read only if the map is valid now.
     466             :  */
     467             : void
     468         264 : RelationMapInvalidate(bool shared)
     469             : {
     470         264 :     if (shared)
     471             :     {
     472         162 :         if (shared_map.magic == RELMAPPER_FILEMAGIC)
     473         162 :             load_relmap_file(true, false);
     474             :     }
     475             :     else
     476             :     {
     477         102 :         if (local_map.magic == RELMAPPER_FILEMAGIC)
     478         102 :             load_relmap_file(false, false);
     479             :     }
     480         264 : }
     481             : 
     482             : /*
     483             :  * RelationMapInvalidateAll
     484             :  *
     485             :  * Reload all map files.  This is used to recover from SI message buffer
     486             :  * overflow: we can't be sure if we missed an inval message.
     487             :  * Again, reload only currently-valid maps.
     488             :  */
     489             : void
     490        4072 : RelationMapInvalidateAll(void)
     491             : {
     492        4072 :     if (shared_map.magic == RELMAPPER_FILEMAGIC)
     493        4072 :         load_relmap_file(true, false);
     494        4072 :     if (local_map.magic == RELMAPPER_FILEMAGIC)
     495        3992 :         load_relmap_file(false, false);
     496        4072 : }
     497             : 
     498             : /*
     499             :  * AtCCI_RelationMap
     500             :  *
     501             :  * Activate any "pending" relation map updates at CommandCounterIncrement time.
     502             :  */
     503             : void
     504     1990630 : AtCCI_RelationMap(void)
     505             : {
     506     1990630 :     if (pending_shared_updates.num_mappings != 0)
     507             :     {
     508         282 :         merge_map_updates(&active_shared_updates,
     509             :                           &pending_shared_updates,
     510             :                           true);
     511         282 :         pending_shared_updates.num_mappings = 0;
     512             :     }
     513     1990630 :     if (pending_local_updates.num_mappings != 0)
     514             :     {
     515         248 :         merge_map_updates(&active_local_updates,
     516             :                           &pending_local_updates,
     517             :                           true);
     518         248 :         pending_local_updates.num_mappings = 0;
     519             :     }
     520     1990630 : }
     521             : 
     522             : /*
     523             :  * AtEOXact_RelationMap
     524             :  *
     525             :  * Handle relation mapping at main-transaction commit or abort.
     526             :  *
     527             :  * During commit, this must be called as late as possible before the actual
     528             :  * transaction commit, so as to minimize the window where the transaction
     529             :  * could still roll back after committing map changes.  Although nothing
     530             :  * critically bad happens in such a case, we still would prefer that it
     531             :  * not happen, since we'd possibly be losing useful updates to the relations'
     532             :  * pg_class row(s).
     533             :  *
     534             :  * During abort, we just have to throw away any pending map changes.
     535             :  * Normal post-abort cleanup will take care of fixing relcache entries.
     536             :  * Parallel worker commit/abort is handled by resetting active mappings
     537             :  * that may have been received from the leader process.  (There should be
     538             :  * no pending updates in parallel workers.)
     539             :  */
     540             : void
     541      975762 : AtEOXact_RelationMap(bool isCommit, bool isParallelWorker)
     542             : {
     543      975762 :     if (isCommit && !isParallelWorker)
     544             :     {
     545             :         /*
     546             :          * We should not get here with any "pending" updates.  (We could
     547             :          * logically choose to treat such as committed, but in the current
     548             :          * code this should never happen.)
     549             :          */
     550             :         Assert(pending_shared_updates.num_mappings == 0);
     551             :         Assert(pending_local_updates.num_mappings == 0);
     552             : 
     553             :         /*
     554             :          * Write any active updates to the actual map files, then reset them.
     555             :          */
     556      932868 :         if (active_shared_updates.num_mappings != 0)
     557             :         {
     558         102 :             perform_relmap_update(true, &active_shared_updates);
     559         102 :             active_shared_updates.num_mappings = 0;
     560             :         }
     561      932868 :         if (active_local_updates.num_mappings != 0)
     562             :         {
     563          60 :             perform_relmap_update(false, &active_local_updates);
     564          60 :             active_local_updates.num_mappings = 0;
     565             :         }
     566             :     }
     567             :     else
     568             :     {
     569             :         /* Abort or parallel worker --- drop all local and pending updates */
     570             :         Assert(!isParallelWorker || pending_shared_updates.num_mappings == 0);
     571             :         Assert(!isParallelWorker || pending_local_updates.num_mappings == 0);
     572             : 
     573       42894 :         active_shared_updates.num_mappings = 0;
     574       42894 :         active_local_updates.num_mappings = 0;
     575       42894 :         pending_shared_updates.num_mappings = 0;
     576       42894 :         pending_local_updates.num_mappings = 0;
     577             :     }
     578      975762 : }
     579             : 
     580             : /*
     581             :  * AtPrepare_RelationMap
     582             :  *
     583             :  * Handle relation mapping at PREPARE.
     584             :  *
     585             :  * Currently, we don't support preparing any transaction that changes the map.
     586             :  */
     587             : void
     588         736 : AtPrepare_RelationMap(void)
     589             : {
     590         736 :     if (active_shared_updates.num_mappings != 0 ||
     591         736 :         active_local_updates.num_mappings != 0 ||
     592         736 :         pending_shared_updates.num_mappings != 0 ||
     593         736 :         pending_local_updates.num_mappings != 0)
     594           0 :         ereport(ERROR,
     595             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     596             :                  errmsg("cannot PREPARE a transaction that modified relation mapping")));
     597         736 : }
     598             : 
     599             : /*
     600             :  * CheckPointRelationMap
     601             :  *
     602             :  * This is called during a checkpoint.  It must ensure that any relation map
     603             :  * updates that were WAL-logged before the start of the checkpoint are
     604             :  * securely flushed to disk and will not need to be replayed later.  This
     605             :  * seems unlikely to be a performance-critical issue, so we use a simple
     606             :  * method: we just take and release the RelationMappingLock.  This ensures
     607             :  * that any already-logged map update is complete, because write_relmap_file
     608             :  * will fsync the map file before the lock is released.
     609             :  */
     610             : void
     611        4690 : CheckPointRelationMap(void)
     612             : {
     613        4690 :     LWLockAcquire(RelationMappingLock, LW_SHARED);
     614        4690 :     LWLockRelease(RelationMappingLock);
     615        4690 : }
     616             : 
     617             : /*
     618             :  * RelationMapFinishBootstrap
     619             :  *
     620             :  * Write out the initial relation mapping files at the completion of
     621             :  * bootstrap.  All the mapped files should have been made known to us
     622             :  * via RelationMapUpdateMap calls.
     623             :  */
     624             : void
     625         606 : RelationMapFinishBootstrap(void)
     626             : {
     627             :     Assert(IsBootstrapProcessingMode());
     628             : 
     629             :     /* Shouldn't be anything "pending" ... */
     630             :     Assert(active_shared_updates.num_mappings == 0);
     631             :     Assert(active_local_updates.num_mappings == 0);
     632             :     Assert(pending_shared_updates.num_mappings == 0);
     633             :     Assert(pending_local_updates.num_mappings == 0);
     634             : 
     635             :     /* Write the files; no WAL or sinval needed */
     636         606 :     write_relmap_file(&shared_map, false, false, false,
     637             :                       InvalidOid, GLOBALTABLESPACE_OID, "global");
     638         606 :     write_relmap_file(&local_map, false, false, false,
     639             :                       MyDatabaseId, MyDatabaseTableSpace, DatabasePath);
     640         606 : }
     641             : 
     642             : /*
     643             :  * RelationMapInitialize
     644             :  *
     645             :  * This initializes the mapper module at process startup.  We can't access the
     646             :  * database yet, so just make sure the maps are empty.
     647             :  */
     648             : void
     649       24062 : RelationMapInitialize(void)
     650             : {
     651             :     /* The static variables should initialize to zeroes, but let's be sure */
     652       24062 :     shared_map.magic = 0;       /* mark it not loaded */
     653       24062 :     local_map.magic = 0;
     654       24062 :     shared_map.num_mappings = 0;
     655       24062 :     local_map.num_mappings = 0;
     656       24062 :     active_shared_updates.num_mappings = 0;
     657       24062 :     active_local_updates.num_mappings = 0;
     658       24062 :     pending_shared_updates.num_mappings = 0;
     659       24062 :     pending_local_updates.num_mappings = 0;
     660       24062 : }
     661             : 
     662             : /*
     663             :  * RelationMapInitializePhase2
     664             :  *
     665             :  * This is called to prepare for access to pg_database during startup.
     666             :  * We should be able to read the shared map file now.
     667             :  */
     668             : void
     669       24062 : RelationMapInitializePhase2(void)
     670             : {
     671             :     /*
     672             :      * In bootstrap mode, the map file isn't there yet, so do nothing.
     673             :      */
     674       24062 :     if (IsBootstrapProcessingMode())
     675         606 :         return;
     676             : 
     677             :     /*
     678             :      * Load the shared map file, die on error.
     679             :      */
     680       23456 :     load_relmap_file(true, false);
     681             : }
     682             : 
     683             : /*
     684             :  * RelationMapInitializePhase3
     685             :  *
     686             :  * This is called as soon as we have determined MyDatabaseId and set up
     687             :  * DatabasePath.  At this point we should be able to read the local map file.
     688             :  */
     689             : void
     690       22004 : RelationMapInitializePhase3(void)
     691             : {
     692             :     /*
     693             :      * In bootstrap mode, the map file isn't there yet, so do nothing.
     694             :      */
     695       22004 :     if (IsBootstrapProcessingMode())
     696         606 :         return;
     697             : 
     698             :     /*
     699             :      * Load the local map file, die on error.
     700             :      */
     701       21398 :     load_relmap_file(false, false);
     702             : }
     703             : 
     704             : /*
     705             :  * EstimateRelationMapSpace
     706             :  *
     707             :  * Estimate space needed to pass active shared and local relmaps to parallel
     708             :  * workers.
     709             :  */
     710             : Size
     711         810 : EstimateRelationMapSpace(void)
     712             : {
     713         810 :     return sizeof(SerializedActiveRelMaps);
     714             : }
     715             : 
     716             : /*
     717             :  * SerializeRelationMap
     718             :  *
     719             :  * Serialize active shared and local relmap state for parallel workers.
     720             :  */
     721             : void
     722         810 : SerializeRelationMap(Size maxSize, char *startAddress)
     723             : {
     724             :     SerializedActiveRelMaps *relmaps;
     725             : 
     726             :     Assert(maxSize >= EstimateRelationMapSpace());
     727             : 
     728         810 :     relmaps = (SerializedActiveRelMaps *) startAddress;
     729         810 :     relmaps->active_shared_updates = active_shared_updates;
     730         810 :     relmaps->active_local_updates = active_local_updates;
     731         810 : }
     732             : 
     733             : /*
     734             :  * RestoreRelationMap
     735             :  *
     736             :  * Restore active shared and local relmap state within a parallel worker.
     737             :  */
     738             : void
     739        2602 : RestoreRelationMap(char *startAddress)
     740             : {
     741             :     SerializedActiveRelMaps *relmaps;
     742             : 
     743        2602 :     if (active_shared_updates.num_mappings != 0 ||
     744        2602 :         active_local_updates.num_mappings != 0 ||
     745        2602 :         pending_shared_updates.num_mappings != 0 ||
     746        2602 :         pending_local_updates.num_mappings != 0)
     747           0 :         elog(ERROR, "parallel worker has existing mappings");
     748             : 
     749        2602 :     relmaps = (SerializedActiveRelMaps *) startAddress;
     750        2602 :     active_shared_updates = relmaps->active_shared_updates;
     751        2602 :     active_local_updates = relmaps->active_local_updates;
     752        2602 : }
     753             : 
     754             : /*
     755             :  * load_relmap_file -- load the shared or local map file
     756             :  *
     757             :  * Because these files are essential for access to core system catalogs,
     758             :  * failure to load either of them is a fatal error.
     759             :  *
     760             :  * Note that the local case requires DatabasePath to be set up.
     761             :  */
     762             : static void
     763       53344 : load_relmap_file(bool shared, bool lock_held)
     764             : {
     765       53344 :     if (shared)
     766       27792 :         read_relmap_file(&shared_map, "global", lock_held, FATAL);
     767             :     else
     768       25552 :         read_relmap_file(&local_map, DatabasePath, lock_held, FATAL);
     769       53344 : }
     770             : 
     771             : /*
     772             :  * read_relmap_file -- load data from any relation mapper file
     773             :  *
     774             :  * dbpath must be the relevant database path, or "global" for shared relations.
     775             :  *
     776             :  * RelationMappingLock will be acquired released unless lock_held = true.
     777             :  *
     778             :  * Errors will be reported at the indicated elevel, which should be at least
     779             :  * ERROR.
     780             :  */
     781             : static void
     782       60336 : read_relmap_file(RelMapFile *map, char *dbpath, bool lock_held, int elevel)
     783             : {
     784             :     char        mapfilename[MAXPGPATH];
     785             :     pg_crc32c   crc;
     786             :     int         fd;
     787             :     int         r;
     788             : 
     789             :     Assert(elevel >= ERROR);
     790             : 
     791             :     /*
     792             :      * Grab the lock to prevent the file from being updated while we read it,
     793             :      * unless the caller is already holding the lock.  If the file is updated
     794             :      * shortly after we look, the sinval signaling mechanism will make us
     795             :      * re-read it before we are able to access any relation that's affected by
     796             :      * the change.
     797             :      */
     798       60336 :     if (!lock_held)
     799       60174 :         LWLockAcquire(RelationMappingLock, LW_SHARED);
     800             : 
     801             :     /*
     802             :      * Open the target file.
     803             :      *
     804             :      * Because Windows isn't happy about the idea of renaming over a file that
     805             :      * someone has open, we only open this file after acquiring the lock, and
     806             :      * for the same reason, we close it before releasing the lock. That way,
     807             :      * by the time write_relmap_file() acquires an exclusive lock, no one else
     808             :      * will have it open.
     809             :      */
     810       60336 :     snprintf(mapfilename, sizeof(mapfilename), "%s/%s", dbpath,
     811             :              RELMAPPER_FILENAME);
     812       60336 :     fd = OpenTransientFile(mapfilename, O_RDONLY | PG_BINARY);
     813       60336 :     if (fd < 0)
     814           0 :         ereport(elevel,
     815             :                 (errcode_for_file_access(),
     816             :                  errmsg("could not open file \"%s\": %m",
     817             :                         mapfilename)));
     818             : 
     819             :     /* Now read the data. */
     820       60336 :     pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ);
     821       60336 :     r = read(fd, map, sizeof(RelMapFile));
     822       60336 :     if (r != sizeof(RelMapFile))
     823             :     {
     824           0 :         if (r < 0)
     825           0 :             ereport(elevel,
     826             :                     (errcode_for_file_access(),
     827             :                      errmsg("could not read file \"%s\": %m", mapfilename)));
     828             :         else
     829           0 :             ereport(elevel,
     830             :                     (errcode(ERRCODE_DATA_CORRUPTED),
     831             :                      errmsg("could not read file \"%s\": read %d of %zu",
     832             :                             mapfilename, r, sizeof(RelMapFile))));
     833             :     }
     834       60336 :     pgstat_report_wait_end();
     835             : 
     836       60336 :     if (CloseTransientFile(fd) != 0)
     837           0 :         ereport(elevel,
     838             :                 (errcode_for_file_access(),
     839             :                  errmsg("could not close file \"%s\": %m",
     840             :                         mapfilename)));
     841             : 
     842       60336 :     if (!lock_held)
     843       60174 :         LWLockRelease(RelationMappingLock);
     844             : 
     845             :     /* check for correct magic number, etc */
     846       60336 :     if (map->magic != RELMAPPER_FILEMAGIC ||
     847       60336 :         map->num_mappings < 0 ||
     848       60336 :         map->num_mappings > MAX_MAPPINGS)
     849           0 :         ereport(elevel,
     850             :                 (errmsg("relation mapping file \"%s\" contains invalid data",
     851             :                         mapfilename)));
     852             : 
     853             :     /* verify the CRC */
     854       60336 :     INIT_CRC32C(crc);
     855       60336 :     COMP_CRC32C(crc, (char *) map, offsetof(RelMapFile, crc));
     856       60336 :     FIN_CRC32C(crc);
     857             : 
     858       60336 :     if (!EQ_CRC32C(crc, map->crc))
     859           0 :         ereport(elevel,
     860             :                 (errmsg("relation mapping file \"%s\" contains incorrect checksum",
     861             :                         mapfilename)));
     862       60336 : }
     863             : 
     864             : /*
     865             :  * Write out a new shared or local map file with the given contents.
     866             :  *
     867             :  * The magic number and CRC are automatically updated in *newmap.  On
     868             :  * success, we copy the data to the appropriate permanent static variable.
     869             :  *
     870             :  * If write_wal is true then an appropriate WAL message is emitted.
     871             :  * (It will be false for bootstrap and WAL replay cases.)
     872             :  *
     873             :  * If send_sinval is true then a SI invalidation message is sent.
     874             :  * (This should be true except in bootstrap case.)
     875             :  *
     876             :  * If preserve_files is true then the storage manager is warned not to
     877             :  * delete the files listed in the map.
     878             :  *
     879             :  * Because this may be called during WAL replay when MyDatabaseId,
     880             :  * DatabasePath, etc aren't valid, we require the caller to pass in suitable
     881             :  * values. Pass dbpath as "global" for the shared map.
     882             :  *
     883             :  * The caller is also responsible for being sure no concurrent map update
     884             :  * could be happening.
     885             :  */
     886             : static void
     887        1786 : write_relmap_file(RelMapFile *newmap, bool write_wal, bool send_sinval,
     888             :                   bool preserve_files, Oid dbid, Oid tsid, const char *dbpath)
     889             : {
     890             :     int         fd;
     891             :     char        mapfilename[MAXPGPATH];
     892             :     char        maptempfilename[MAXPGPATH];
     893             : 
     894             :     /*
     895             :      * Fill in the overhead fields and update CRC.
     896             :      */
     897        1786 :     newmap->magic = RELMAPPER_FILEMAGIC;
     898        1786 :     if (newmap->num_mappings < 0 || newmap->num_mappings > MAX_MAPPINGS)
     899           0 :         elog(ERROR, "attempt to write bogus relation mapping");
     900             : 
     901        1786 :     INIT_CRC32C(newmap->crc);
     902        1786 :     COMP_CRC32C(newmap->crc, (char *) newmap, offsetof(RelMapFile, crc));
     903        1786 :     FIN_CRC32C(newmap->crc);
     904             : 
     905             :     /*
     906             :      * Construct filenames -- a temporary file that we'll create to write the
     907             :      * data initially, and then the permanent name to which we will rename it.
     908             :      */
     909        1786 :     snprintf(mapfilename, sizeof(mapfilename), "%s/%s",
     910             :              dbpath, RELMAPPER_FILENAME);
     911        1786 :     snprintf(maptempfilename, sizeof(maptempfilename), "%s/%s",
     912             :              dbpath, RELMAPPER_TEMP_FILENAME);
     913             : 
     914             :     /*
     915             :      * Open a temporary file. If a file already exists with this name, it must
     916             :      * be left over from a previous crash, so we can overwrite it. Concurrent
     917             :      * calls to this function are not allowed.
     918             :      */
     919        1786 :     fd = OpenTransientFile(maptempfilename,
     920             :                            O_WRONLY | O_CREAT | O_TRUNC | PG_BINARY);
     921        1786 :     if (fd < 0)
     922           0 :         ereport(ERROR,
     923             :                 (errcode_for_file_access(),
     924             :                  errmsg("could not open file \"%s\": %m",
     925             :                         maptempfilename)));
     926             : 
     927             :     /* Write new data to the file. */
     928        1786 :     pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_WRITE);
     929        1786 :     if (write(fd, newmap, sizeof(RelMapFile)) != sizeof(RelMapFile))
     930             :     {
     931             :         /* if write didn't set errno, assume problem is no disk space */
     932           0 :         if (errno == 0)
     933           0 :             errno = ENOSPC;
     934           0 :         ereport(ERROR,
     935             :                 (errcode_for_file_access(),
     936             :                  errmsg("could not write file \"%s\": %m",
     937             :                         maptempfilename)));
     938             :     }
     939        1786 :     pgstat_report_wait_end();
     940             : 
     941             :     /* And close the file. */
     942        1786 :     if (CloseTransientFile(fd) != 0)
     943           0 :         ereport(ERROR,
     944             :                 (errcode_for_file_access(),
     945             :                  errmsg("could not close file \"%s\": %m",
     946             :                         maptempfilename)));
     947             : 
     948        1786 :     if (write_wal)
     949             :     {
     950             :         xl_relmap_update xlrec;
     951             :         XLogRecPtr  lsn;
     952             : 
     953             :         /* now errors are fatal ... */
     954         530 :         START_CRIT_SECTION();
     955             : 
     956         530 :         xlrec.dbid = dbid;
     957         530 :         xlrec.tsid = tsid;
     958         530 :         xlrec.nbytes = sizeof(RelMapFile);
     959             : 
     960         530 :         XLogBeginInsert();
     961         530 :         XLogRegisterData((char *) (&xlrec), MinSizeOfRelmapUpdate);
     962         530 :         XLogRegisterData((char *) newmap, sizeof(RelMapFile));
     963             : 
     964         530 :         lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE);
     965             : 
     966             :         /* As always, WAL must hit the disk before the data update does */
     967         530 :         XLogFlush(lsn);
     968             :     }
     969             : 
     970             :     /*
     971             :      * durable_rename() does all the hard work of making sure that we rename
     972             :      * the temporary file into place in a crash-safe manner.
     973             :      *
     974             :      * NB: Although we instruct durable_rename() to use ERROR, we will often
     975             :      * be in a critical section at this point; if so, ERROR will become PANIC.
     976             :      */
     977        1786 :     pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_REPLACE);
     978        1786 :     durable_rename(maptempfilename, mapfilename, ERROR);
     979        1786 :     pgstat_report_wait_end();
     980             : 
     981             :     /*
     982             :      * Now that the file is safely on disk, send sinval message to let other
     983             :      * backends know to re-read it.  We must do this inside the critical
     984             :      * section: if for some reason we fail to send the message, we have to
     985             :      * force a database-wide PANIC.  Otherwise other backends might continue
     986             :      * execution with stale mapping information, which would be catastrophic
     987             :      * as soon as others began to use the now-committed data.
     988             :      */
     989        1786 :     if (send_sinval)
     990         206 :         CacheInvalidateRelmap(dbid);
     991             : 
     992             :     /*
     993             :      * Make sure that the files listed in the map are not deleted if the outer
     994             :      * transaction aborts.  This had better be within the critical section
     995             :      * too: it's not likely to fail, but if it did, we'd arrive at transaction
     996             :      * abort with the files still vulnerable.  PANICing will leave things in a
     997             :      * good state on-disk.
     998             :      *
     999             :      * Note: we're cheating a little bit here by assuming that mapped files
    1000             :      * are either in pg_global or the database's default tablespace.
    1001             :      */
    1002        1786 :     if (preserve_files)
    1003             :     {
    1004             :         int32       i;
    1005             : 
    1006        6282 :         for (i = 0; i < newmap->num_mappings; i++)
    1007             :         {
    1008             :             RelFileLocator rlocator;
    1009             : 
    1010        6120 :             rlocator.spcOid = tsid;
    1011        6120 :             rlocator.dbOid = dbid;
    1012        6120 :             rlocator.relNumber = newmap->mappings[i].mapfilenumber;
    1013        6120 :             RelationPreserveStorage(rlocator, false);
    1014             :         }
    1015             :     }
    1016             : 
    1017             :     /* Critical section done */
    1018        1786 :     if (write_wal)
    1019         530 :         END_CRIT_SECTION();
    1020        1786 : }
    1021             : 
    1022             : /*
    1023             :  * Merge the specified updates into the appropriate "real" map,
    1024             :  * and write out the changes.  This function must be used for committing
    1025             :  * updates during normal multiuser operation.
    1026             :  */
    1027             : static void
    1028         162 : perform_relmap_update(bool shared, const RelMapFile *updates)
    1029             : {
    1030             :     RelMapFile  newmap;
    1031             : 
    1032             :     /*
    1033             :      * Anyone updating a relation's mapping info should take exclusive lock on
    1034             :      * that rel and hold it until commit.  This ensures that there will not be
    1035             :      * concurrent updates on the same mapping value; but there could easily be
    1036             :      * concurrent updates on different values in the same file. We cover that
    1037             :      * by acquiring the RelationMappingLock, re-reading the target file to
    1038             :      * ensure it's up to date, applying the updates, and writing the data
    1039             :      * before releasing RelationMappingLock.
    1040             :      *
    1041             :      * There is only one RelationMappingLock.  In principle we could try to
    1042             :      * have one per mapping file, but it seems unlikely to be worth the
    1043             :      * trouble.
    1044             :      */
    1045         162 :     LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE);
    1046             : 
    1047             :     /* Be certain we see any other updates just made */
    1048         162 :     load_relmap_file(shared, true);
    1049             : 
    1050             :     /* Prepare updated data in a local variable */
    1051         162 :     if (shared)
    1052         102 :         memcpy(&newmap, &shared_map, sizeof(RelMapFile));
    1053             :     else
    1054          60 :         memcpy(&newmap, &local_map, sizeof(RelMapFile));
    1055             : 
    1056             :     /*
    1057             :      * Apply the updates to newmap.  No new mappings should appear, unless
    1058             :      * somebody is adding indexes to system catalogs.
    1059             :      */
    1060         162 :     merge_map_updates(&newmap, updates, allowSystemTableMods);
    1061             : 
    1062             :     /* Write out the updated map and do other necessary tasks */
    1063         162 :     write_relmap_file(&newmap, true, true, true,
    1064             :                       (shared ? InvalidOid : MyDatabaseId),
    1065             :                       (shared ? GLOBALTABLESPACE_OID : MyDatabaseTableSpace),
    1066             :                       (shared ? "global" : DatabasePath));
    1067             : 
    1068             :     /*
    1069             :      * We successfully wrote the updated file, so it's now safe to rely on the
    1070             :      * new values in this process, too.
    1071             :      */
    1072         162 :     if (shared)
    1073         102 :         memcpy(&shared_map, &newmap, sizeof(RelMapFile));
    1074             :     else
    1075          60 :         memcpy(&local_map, &newmap, sizeof(RelMapFile));
    1076             : 
    1077             :     /* Now we can release the lock */
    1078         162 :     LWLockRelease(RelationMappingLock);
    1079         162 : }
    1080             : 
    1081             : /*
    1082             :  * RELMAP resource manager's routines
    1083             :  */
    1084             : void
    1085          44 : relmap_redo(XLogReaderState *record)
    1086             : {
    1087          44 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
    1088             : 
    1089             :     /* Backup blocks are not used in relmap records */
    1090             :     Assert(!XLogRecHasAnyBlockRefs(record));
    1091             : 
    1092          44 :     if (info == XLOG_RELMAP_UPDATE)
    1093             :     {
    1094          44 :         xl_relmap_update *xlrec = (xl_relmap_update *) XLogRecGetData(record);
    1095             :         RelMapFile  newmap;
    1096             :         char       *dbpath;
    1097             : 
    1098          44 :         if (xlrec->nbytes != sizeof(RelMapFile))
    1099           0 :             elog(PANIC, "relmap_redo: wrong size %u in relmap update record",
    1100             :                  xlrec->nbytes);
    1101          44 :         memcpy(&newmap, xlrec->data, sizeof(newmap));
    1102             : 
    1103             :         /* We need to construct the pathname for this database */
    1104          44 :         dbpath = GetDatabasePath(xlrec->dbid, xlrec->tsid);
    1105             : 
    1106             :         /*
    1107             :          * Write out the new map and send sinval, but of course don't write a
    1108             :          * new WAL entry.  There's no surrounding transaction to tell to
    1109             :          * preserve files, either.
    1110             :          *
    1111             :          * There shouldn't be anyone else updating relmaps during WAL replay,
    1112             :          * but grab the lock to interlock against load_relmap_file().
    1113             :          *
    1114             :          * Note that we use the same WAL record for updating the relmap of an
    1115             :          * existing database as we do for creating a new database. In the
    1116             :          * latter case, taking the relmap log and sending sinval messages is
    1117             :          * unnecessary, but harmless. If we wanted to avoid it, we could add a
    1118             :          * flag to the WAL record to indicate which operation is being
    1119             :          * performed.
    1120             :          */
    1121          44 :         LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE);
    1122          44 :         write_relmap_file(&newmap, false, true, false,
    1123             :                           xlrec->dbid, xlrec->tsid, dbpath);
    1124          44 :         LWLockRelease(RelationMappingLock);
    1125             : 
    1126          44 :         pfree(dbpath);
    1127             :     }
    1128             :     else
    1129           0 :         elog(PANIC, "relmap_redo: unknown op code %u", info);
    1130          44 : }

Generated by: LCOV version 1.14