LCOV - code coverage report
Current view: top level - src/backend/utils/cache - relmapper.c (source / functions) Hit Total Coverage
Test: PostgreSQL 12beta2 Lines: 225 264 85.2 %
Date: 2019-06-18 07:06:57 Functions: 22 23 95.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * relmapper.c
       4             :  *    Catalog-to-filenode mapping
       5             :  *
       6             :  * For most tables, the physical file underlying the table is specified by
       7             :  * pg_class.relfilenode.  However, that obviously won't work for pg_class
       8             :  * itself, nor for the other "nailed" catalogs for which we have to be able
       9             :  * to set up working Relation entries without access to pg_class.  It also
      10             :  * does not work for shared catalogs, since there is no practical way to
      11             :  * update other databases' pg_class entries when relocating a shared catalog.
      12             :  * Therefore, for these special catalogs (henceforth referred to as "mapped
      13             :  * catalogs") we rely on a separately maintained file that shows the mapping
      14             :  * from catalog OIDs to filenode numbers.  Each database has a map file for
      15             :  * its local mapped catalogs, and there is a separate map file for shared
      16             :  * catalogs.  Mapped catalogs have zero in their pg_class.relfilenode entries.
      17             :  *
      18             :  * Relocation of a normal table is committed (ie, the new physical file becomes
      19             :  * authoritative) when the pg_class row update commits.  For mapped catalogs,
      20             :  * the act of updating the map file is effectively commit of the relocation.
      21             :  * We postpone the file update till just before commit of the transaction
      22             :  * doing the rewrite, but there is necessarily a window between.  Therefore
      23             :  * mapped catalogs can only be relocated by operations such as VACUUM FULL
      24             :  * and CLUSTER, which make no transactionally-significant changes: it must be
      25             :  * safe for the new file to replace the old, even if the transaction itself
      26             :  * aborts.  An important factor here is that the indexes and toast table of
      27             :  * a mapped catalog must also be mapped, so that the rewrites/relocations of
      28             :  * all these files commit in a single map file update rather than being tied
      29             :  * to transaction commit.
      30             :  *
      31             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
      32             :  * Portions Copyright (c) 1994, Regents of the University of California
      33             :  *
      34             :  *
      35             :  * IDENTIFICATION
      36             :  *    src/backend/utils/cache/relmapper.c
      37             :  *
      38             :  *-------------------------------------------------------------------------
      39             :  */
      40             : #include "postgres.h"
      41             : 
      42             : #include <fcntl.h>
      43             : #include <sys/stat.h>
      44             : #include <unistd.h>
      45             : 
      46             : #include "access/xact.h"
      47             : #include "access/xlog.h"
      48             : #include "access/xloginsert.h"
      49             : #include "catalog/catalog.h"
      50             : #include "catalog/pg_tablespace.h"
      51             : #include "catalog/storage.h"
      52             : #include "miscadmin.h"
      53             : #include "pgstat.h"
      54             : #include "storage/fd.h"
      55             : #include "storage/lwlock.h"
      56             : #include "utils/inval.h"
      57             : #include "utils/relmapper.h"
      58             : 
      59             : 
      60             : /*
      61             :  * The map file is critical data: we have no automatic method for recovering
      62             :  * from loss or corruption of it.  We use a CRC so that we can detect
      63             :  * corruption.  To minimize the risk of failed updates, the map file should
      64             :  * be kept to no more than one standard-size disk sector (ie 512 bytes),
      65             :  * and we use overwrite-in-place rather than playing renaming games.
      66             :  * The struct layout below is designed to occupy exactly 512 bytes, which
      67             :  * might make filesystem updates a bit more efficient.
      68             :  *
      69             :  * Entries in the mappings[] array are in no particular order.  We could
      70             :  * speed searching by insisting on OID order, but it really shouldn't be
      71             :  * worth the trouble given the intended size of the mapping sets.
      72             :  */
      73             : #define RELMAPPER_FILENAME      "pg_filenode.map"
      74             : 
      75             : #define RELMAPPER_FILEMAGIC     0x592717    /* version ID value */
      76             : 
      77             : #define MAX_MAPPINGS            62  /* 62 * 8 + 16 = 512 */
      78             : 
      79             : typedef struct RelMapping
      80             : {
      81             :     Oid         mapoid;         /* OID of a catalog */
      82             :     Oid         mapfilenode;    /* its filenode number */
      83             : } RelMapping;
      84             : 
      85             : typedef struct RelMapFile
      86             : {
      87             :     int32       magic;          /* always RELMAPPER_FILEMAGIC */
      88             :     int32       num_mappings;   /* number of valid RelMapping entries */
      89             :     RelMapping  mappings[MAX_MAPPINGS];
      90             :     pg_crc32c   crc;            /* CRC of all above */
      91             :     int32       pad;            /* to make the struct size be 512 exactly */
      92             : } RelMapFile;
      93             : 
      94             : /*
      95             :  * State for serializing local and shared relmappings for parallel workers
      96             :  * (active states only).  See notes on active_* and pending_* updates state.
      97             :  */
      98             : typedef struct SerializedActiveRelMaps
      99             : {
     100             :     RelMapFile  active_shared_updates;
     101             :     RelMapFile  active_local_updates;
     102             : } SerializedActiveRelMaps;
     103             : 
     104             : /*
     105             :  * The currently known contents of the shared map file and our database's
     106             :  * local map file are stored here.  These can be reloaded from disk
     107             :  * immediately whenever we receive an update sinval message.
     108             :  */
     109             : static RelMapFile shared_map;
     110             : static RelMapFile local_map;
     111             : 
     112             : /*
     113             :  * We use the same RelMapFile data structure to track uncommitted local
     114             :  * changes in the mappings (but note the magic and crc fields are not made
     115             :  * valid in these variables).  Currently, map updates are not allowed within
     116             :  * subtransactions, so one set of transaction-level changes is sufficient.
     117             :  *
     118             :  * The active_xxx variables contain updates that are valid in our transaction
     119             :  * and should be honored by RelationMapOidToFilenode.  The pending_xxx
     120             :  * variables contain updates we have been told about that aren't active yet;
     121             :  * they will become active at the next CommandCounterIncrement.  This setup
     122             :  * lets map updates act similarly to updates of pg_class rows, ie, they
     123             :  * become visible only at the next CommandCounterIncrement boundary.
     124             :  *
     125             :  * Active shared and active local updates are serialized by the parallel
     126             :  * infrastructure, and deserialized within parallel workers.
     127             :  */
     128             : static RelMapFile active_shared_updates;
     129             : static RelMapFile active_local_updates;
     130             : static RelMapFile pending_shared_updates;
     131             : static RelMapFile pending_local_updates;
     132             : 
     133             : 
     134             : /* non-export function prototypes */
     135             : static void apply_map_update(RelMapFile *map, Oid relationId, Oid fileNode,
     136             :                              bool add_okay);
     137             : static void merge_map_updates(RelMapFile *map, const RelMapFile *updates,
     138             :                               bool add_okay);
     139             : static void load_relmap_file(bool shared);
     140             : static void write_relmap_file(bool shared, RelMapFile *newmap,
     141             :                               bool write_wal, bool send_sinval, bool preserve_files,
     142             :                               Oid dbid, Oid tsid, const char *dbpath);
     143             : static void perform_relmap_update(bool shared, const RelMapFile *updates);
     144             : 
     145             : 
     146             : /*
     147             :  * RelationMapOidToFilenode
     148             :  *
     149             :  * The raison d' etre ... given a relation OID, look up its filenode.
     150             :  *
     151             :  * Although shared and local relation OIDs should never overlap, the caller
     152             :  * always knows which we need --- so pass that information to avoid useless
     153             :  * searching.
     154             :  *
     155             :  * Returns InvalidOid if the OID is not known (which should never happen,
     156             :  * but the caller is in a better position to report a meaningful error).
     157             :  */
     158             : Oid
     159      817170 : RelationMapOidToFilenode(Oid relationId, bool shared)
     160             : {
     161             :     const RelMapFile *map;
     162             :     int32       i;
     163             : 
     164             :     /* If there are active updates, believe those over the main maps */
     165      817170 :     if (shared)
     166             :     {
     167      304638 :         map = &active_shared_updates;
     168      308694 :         for (i = 0; i < map->num_mappings; i++)
     169             :         {
     170        6140 :             if (relationId == map->mappings[i].mapoid)
     171        2084 :                 return map->mappings[i].mapfilenode;
     172             :         }
     173      302554 :         map = &shared_map;
     174     7050770 :         for (i = 0; i < map->num_mappings; i++)
     175             :         {
     176     7050770 :             if (relationId == map->mappings[i].mapoid)
     177      302554 :                 return map->mappings[i].mapfilenode;
     178             :         }
     179             :     }
     180             :     else
     181             :     {
     182      512532 :         map = &active_local_updates;
     183      516208 :         for (i = 0; i < map->num_mappings; i++)
     184             :         {
     185        5670 :             if (relationId == map->mappings[i].mapoid)
     186        1994 :                 return map->mappings[i].mapfilenode;
     187             :         }
     188      510538 :         map = &local_map;
     189     1887192 :         for (i = 0; i < map->num_mappings; i++)
     190             :         {
     191     1887192 :             if (relationId == map->mappings[i].mapoid)
     192      510538 :                 return map->mappings[i].mapfilenode;
     193             :         }
     194             :     }
     195             : 
     196           0 :     return InvalidOid;
     197             : }
     198             : 
     199             : /*
     200             :  * RelationMapFilenodeToOid
     201             :  *
     202             :  * Do the reverse of the normal direction of mapping done in
     203             :  * RelationMapOidToFilenode.
     204             :  *
     205             :  * This is not supposed to be used during normal running but rather for
     206             :  * information purposes when looking at the filesystem or xlog.
     207             :  *
     208             :  * Returns InvalidOid if the OID is not known; this can easily happen if the
     209             :  * relfilenode doesn't pertain to a mapped relation.
     210             :  */
     211             : Oid
     212         652 : RelationMapFilenodeToOid(Oid filenode, bool shared)
     213             : {
     214             :     const RelMapFile *map;
     215             :     int32       i;
     216             : 
     217             :     /* If there are active updates, believe those over the main maps */
     218         652 :     if (shared)
     219             :     {
     220         188 :         map = &active_shared_updates;
     221         188 :         for (i = 0; i < map->num_mappings; i++)
     222             :         {
     223           0 :             if (filenode == map->mappings[i].mapfilenode)
     224           0 :                 return map->mappings[i].mapoid;
     225             :         }
     226         188 :         map = &shared_map;
     227        4512 :         for (i = 0; i < map->num_mappings; i++)
     228             :         {
     229        4512 :             if (filenode == map->mappings[i].mapfilenode)
     230         188 :                 return map->mappings[i].mapoid;
     231             :         }
     232             :     }
     233             :     else
     234             :     {
     235         464 :         map = &active_local_updates;
     236         464 :         for (i = 0; i < map->num_mappings; i++)
     237             :         {
     238           0 :             if (filenode == map->mappings[i].mapfilenode)
     239           0 :                 return map->mappings[i].mapoid;
     240             :         }
     241         464 :         map = &local_map;
     242        3230 :         for (i = 0; i < map->num_mappings; i++)
     243             :         {
     244        3114 :             if (filenode == map->mappings[i].mapfilenode)
     245         348 :                 return map->mappings[i].mapoid;
     246             :         }
     247             :     }
     248             : 
     249         116 :     return InvalidOid;
     250             : }
     251             : 
     252             : /*
     253             :  * RelationMapUpdateMap
     254             :  *
     255             :  * Install a new relfilenode mapping for the specified relation.
     256             :  *
     257             :  * If immediate is true (or we're bootstrapping), the mapping is activated
     258             :  * immediately.  Otherwise it is made pending until CommandCounterIncrement.
     259             :  */
     260             : void
     261       23276 : RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared,
     262             :                      bool immediate)
     263             : {
     264             :     RelMapFile *map;
     265             : 
     266       23276 :     if (IsBootstrapProcessingMode())
     267             :     {
     268             :         /*
     269             :          * In bootstrap mode, the mapping gets installed in permanent map.
     270             :          */
     271       21624 :         if (shared)
     272       14946 :             map = &shared_map;
     273             :         else
     274        6678 :             map = &local_map;
     275             :     }
     276             :     else
     277             :     {
     278             :         /*
     279             :          * We don't currently support map changes within subtransactions, or
     280             :          * when in parallel mode.  This could be done with more bookkeeping
     281             :          * infrastructure, but it doesn't presently seem worth it.
     282             :          */
     283        1652 :         if (GetCurrentTransactionNestLevel() > 1)
     284           0 :             elog(ERROR, "cannot change relation mapping within subtransaction");
     285             : 
     286        1652 :         if (IsInParallelMode())
     287           0 :             elog(ERROR, "cannot change relation mapping in parallel mode");
     288             : 
     289        1652 :         if (immediate)
     290             :         {
     291             :             /* Make it active, but only locally */
     292         146 :             if (shared)
     293           0 :                 map = &active_shared_updates;
     294             :             else
     295         146 :                 map = &active_local_updates;
     296             :         }
     297             :         else
     298             :         {
     299             :             /* Make it pending */
     300        1506 :             if (shared)
     301         896 :                 map = &pending_shared_updates;
     302             :             else
     303         610 :                 map = &pending_local_updates;
     304             :         }
     305             :     }
     306       23276 :     apply_map_update(map, relationId, fileNode, true);
     307       23276 : }
     308             : 
     309             : /*
     310             :  * apply_map_update
     311             :  *
     312             :  * Insert a new mapping into the given map variable, replacing any existing
     313             :  * mapping for the same relation.
     314             :  *
     315             :  * In some cases the caller knows there must be an existing mapping; pass
     316             :  * add_okay = false to draw an error if not.
     317             :  */
     318             : static void
     319       26110 : apply_map_update(RelMapFile *map, Oid relationId, Oid fileNode, bool add_okay)
     320             : {
     321             :     int32       i;
     322             : 
     323             :     /* Replace any existing mapping */
     324      448688 :     for (i = 0; i < map->num_mappings; i++)
     325             :     {
     326      425348 :         if (relationId == map->mappings[i].mapoid)
     327             :         {
     328        2770 :             map->mappings[i].mapfilenode = fileNode;
     329        2770 :             return;
     330             :         }
     331             :     }
     332             : 
     333             :     /* Nope, need to add a new mapping */
     334       23340 :     if (!add_okay)
     335           0 :         elog(ERROR, "attempt to apply a mapping to unmapped relation %u",
     336             :              relationId);
     337       23340 :     if (map->num_mappings >= MAX_MAPPINGS)
     338           0 :         elog(ERROR, "ran out of space in relation map");
     339       23340 :     map->mappings[map->num_mappings].mapoid = relationId;
     340       23340 :     map->mappings[map->num_mappings].mapfilenode = fileNode;
     341       23340 :     map->num_mappings++;
     342             : }
     343             : 
     344             : /*
     345             :  * merge_map_updates
     346             :  *
     347             :  * Merge all the updates in the given pending-update map into the target map.
     348             :  * This is just a bulk form of apply_map_update.
     349             :  */
     350             : static void
     351        1796 : merge_map_updates(RelMapFile *map, const RelMapFile *updates, bool add_okay)
     352             : {
     353             :     int32       i;
     354             : 
     355        4630 :     for (i = 0; i < updates->num_mappings; i++)
     356             :     {
     357        2834 :         apply_map_update(map,
     358             :                          updates->mappings[i].mapoid,
     359             :                          updates->mappings[i].mapfilenode,
     360             :                          add_okay);
     361             :     }
     362        1796 : }
     363             : 
     364             : /*
     365             :  * RelationMapRemoveMapping
     366             :  *
     367             :  * Remove a relation's entry in the map.  This is only allowed for "active"
     368             :  * (but not committed) local mappings.  We need it so we can back out the
     369             :  * entry for the transient target file when doing VACUUM FULL/CLUSTER on
     370             :  * a mapped relation.
     371             :  */
     372             : void
     373         146 : RelationMapRemoveMapping(Oid relationId)
     374             : {
     375         146 :     RelMapFile *map = &active_local_updates;
     376             :     int32       i;
     377             : 
     378         234 :     for (i = 0; i < map->num_mappings; i++)
     379             :     {
     380         234 :         if (relationId == map->mappings[i].mapoid)
     381             :         {
     382             :             /* Found it, collapse it out */
     383         146 :             map->mappings[i] = map->mappings[map->num_mappings - 1];
     384         146 :             map->num_mappings--;
     385         146 :             return;
     386             :         }
     387             :     }
     388           0 :     elog(ERROR, "could not find temporary mapping for relation %u",
     389             :          relationId);
     390             : }
     391             : 
     392             : /*
     393             :  * RelationMapInvalidate
     394             :  *
     395             :  * This routine is invoked for SI cache flush messages.  We must re-read
     396             :  * the indicated map file.  However, we might receive a SI message in a
     397             :  * process that hasn't yet, and might never, load the mapping files;
     398             :  * for example the autovacuum launcher, which *must not* try to read
     399             :  * a local map since it is attached to no particular database.
     400             :  * So, re-read only if the map is valid now.
     401             :  */
     402             : void
     403         546 : RelationMapInvalidate(bool shared)
     404             : {
     405         546 :     if (shared)
     406             :     {
     407         384 :         if (shared_map.magic == RELMAPPER_FILEMAGIC)
     408         384 :             load_relmap_file(true);
     409             :     }
     410             :     else
     411             :     {
     412         162 :         if (local_map.magic == RELMAPPER_FILEMAGIC)
     413         162 :             load_relmap_file(false);
     414             :     }
     415         546 : }
     416             : 
     417             : /*
     418             :  * RelationMapInvalidateAll
     419             :  *
     420             :  * Reload all map files.  This is used to recover from SI message buffer
     421             :  * overflow: we can't be sure if we missed an inval message.
     422             :  * Again, reload only currently-valid maps.
     423             :  */
     424             : void
     425        2212 : RelationMapInvalidateAll(void)
     426             : {
     427        2212 :     if (shared_map.magic == RELMAPPER_FILEMAGIC)
     428        2212 :         load_relmap_file(true);
     429        2212 :     if (local_map.magic == RELMAPPER_FILEMAGIC)
     430        2204 :         load_relmap_file(false);
     431        2212 : }
     432             : 
     433             : /*
     434             :  * AtCCI_RelationMap
     435             :  *
     436             :  * Activate any "pending" relation map updates at CommandCounterIncrement time.
     437             :  */
     438             : void
     439      580326 : AtCCI_RelationMap(void)
     440             : {
     441      580326 :     if (pending_shared_updates.num_mappings != 0)
     442             :     {
     443         840 :         merge_map_updates(&active_shared_updates,
     444             :                           &pending_shared_updates,
     445             :                           true);
     446         840 :         pending_shared_updates.num_mappings = 0;
     447             :     }
     448      580326 :     if (pending_local_updates.num_mappings != 0)
     449             :     {
     450         472 :         merge_map_updates(&active_local_updates,
     451             :                           &pending_local_updates,
     452             :                           true);
     453         472 :         pending_local_updates.num_mappings = 0;
     454             :     }
     455      580326 : }
     456             : 
     457             : /*
     458             :  * AtEOXact_RelationMap
     459             :  *
     460             :  * Handle relation mapping at main-transaction commit or abort.
     461             :  *
     462             :  * During commit, this must be called as late as possible before the actual
     463             :  * transaction commit, so as to minimize the window where the transaction
     464             :  * could still roll back after committing map changes.  Although nothing
     465             :  * critically bad happens in such a case, we still would prefer that it
     466             :  * not happen, since we'd possibly be losing useful updates to the relations'
     467             :  * pg_class row(s).
     468             :  *
     469             :  * During abort, we just have to throw away any pending map changes.
     470             :  * Normal post-abort cleanup will take care of fixing relcache entries.
     471             :  * Parallel worker commit/abort is handled by resetting active mappings
     472             :  * that may have been received from the leader process.  (There should be
     473             :  * no pending updates in parallel workers.)
     474             :  */
     475             : void
     476      455784 : AtEOXact_RelationMap(bool isCommit, bool isParallelWorker)
     477             : {
     478      455784 :     if (isCommit && !isParallelWorker)
     479             :     {
     480             :         /*
     481             :          * We should not get here with any "pending" updates.  (We could
     482             :          * logically choose to treat such as committed, but in the current
     483             :          * code this should never happen.)
     484             :          */
     485             :         Assert(pending_shared_updates.num_mappings == 0);
     486             :         Assert(pending_local_updates.num_mappings == 0);
     487             : 
     488             :         /*
     489             :          * Write any active updates to the actual map files, then reset them.
     490             :          */
     491      435058 :         if (active_shared_updates.num_mappings != 0)
     492             :         {
     493         340 :             perform_relmap_update(true, &active_shared_updates);
     494         340 :             active_shared_updates.num_mappings = 0;
     495             :         }
     496      870116 :         if (active_local_updates.num_mappings != 0)
     497             :         {
     498         144 :             perform_relmap_update(false, &active_local_updates);
     499         144 :             active_local_updates.num_mappings = 0;
     500             :         }
     501             :     }
     502             :     else
     503             :     {
     504             :         /* Abort or parallel worker --- drop all local and pending updates */
     505             :         Assert(!isParallelWorker || pending_shared_updates.num_mappings == 0);
     506             :         Assert(!isParallelWorker || pending_local_updates.num_mappings == 0);
     507             : 
     508       20726 :         active_shared_updates.num_mappings = 0;
     509       20726 :         active_local_updates.num_mappings = 0;
     510       20726 :         pending_shared_updates.num_mappings = 0;
     511       20726 :         pending_local_updates.num_mappings = 0;
     512             :     }
     513      455784 : }
     514             : 
     515             : /*
     516             :  * AtPrepare_RelationMap
     517             :  *
     518             :  * Handle relation mapping at PREPARE.
     519             :  *
     520             :  * Currently, we don't support preparing any transaction that changes the map.
     521             :  */
     522             : void
     523          66 : AtPrepare_RelationMap(void)
     524             : {
     525         132 :     if (active_shared_updates.num_mappings != 0 ||
     526         132 :         active_local_updates.num_mappings != 0 ||
     527         132 :         pending_shared_updates.num_mappings != 0 ||
     528          66 :         pending_local_updates.num_mappings != 0)
     529           0 :         ereport(ERROR,
     530             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     531             :                  errmsg("cannot PREPARE a transaction that modified relation mapping")));
     532          66 : }
     533             : 
     534             : /*
     535             :  * CheckPointRelationMap
     536             :  *
     537             :  * This is called during a checkpoint.  It must ensure that any relation map
     538             :  * updates that were WAL-logged before the start of the checkpoint are
     539             :  * securely flushed to disk and will not need to be replayed later.  This
     540             :  * seems unlikely to be a performance-critical issue, so we use a simple
     541             :  * method: we just take and release the RelationMappingLock.  This ensures
     542             :  * that any already-logged map update is complete, because write_relmap_file
     543             :  * will fsync the map file before the lock is released.
     544             :  */
     545             : void
     546        2820 : CheckPointRelationMap(void)
     547             : {
     548        2820 :     LWLockAcquire(RelationMappingLock, LW_SHARED);
     549        2820 :     LWLockRelease(RelationMappingLock);
     550        2820 : }
     551             : 
     552             : /*
     553             :  * RelationMapFinishBootstrap
     554             :  *
     555             :  * Write out the initial relation mapping files at the completion of
     556             :  * bootstrap.  All the mapped files should have been made known to us
     557             :  * via RelationMapUpdateMap calls.
     558             :  */
     559             : void
     560         318 : RelationMapFinishBootstrap(void)
     561             : {
     562             :     Assert(IsBootstrapProcessingMode());
     563             : 
     564             :     /* Shouldn't be anything "pending" ... */
     565             :     Assert(active_shared_updates.num_mappings == 0);
     566             :     Assert(active_local_updates.num_mappings == 0);
     567             :     Assert(pending_shared_updates.num_mappings == 0);
     568             :     Assert(pending_local_updates.num_mappings == 0);
     569             : 
     570             :     /* Write the files; no WAL or sinval needed */
     571         318 :     write_relmap_file(true, &shared_map, false, false, false,
     572             :                       InvalidOid, GLOBALTABLESPACE_OID, NULL);
     573         318 :     write_relmap_file(false, &local_map, false, false, false,
     574             :                       MyDatabaseId, MyDatabaseTableSpace, DatabasePath);
     575         318 : }
     576             : 
     577             : /*
     578             :  * RelationMapInitialize
     579             :  *
     580             :  * This initializes the mapper module at process startup.  We can't access the
     581             :  * database yet, so just make sure the maps are empty.
     582             :  */
     583             : void
     584        9798 : RelationMapInitialize(void)
     585             : {
     586             :     /* The static variables should initialize to zeroes, but let's be sure */
     587        9798 :     shared_map.magic = 0;       /* mark it not loaded */
     588        9798 :     local_map.magic = 0;
     589        9798 :     shared_map.num_mappings = 0;
     590        9798 :     local_map.num_mappings = 0;
     591        9798 :     active_shared_updates.num_mappings = 0;
     592        9798 :     active_local_updates.num_mappings = 0;
     593        9798 :     pending_shared_updates.num_mappings = 0;
     594        9798 :     pending_local_updates.num_mappings = 0;
     595        9798 : }
     596             : 
     597             : /*
     598             :  * RelationMapInitializePhase2
     599             :  *
     600             :  * This is called to prepare for access to pg_database during startup.
     601             :  * We should be able to read the shared map file now.
     602             :  */
     603             : void
     604        9798 : RelationMapInitializePhase2(void)
     605             : {
     606             :     /*
     607             :      * In bootstrap mode, the map file isn't there yet, so do nothing.
     608             :      */
     609        9798 :     if (IsBootstrapProcessingMode())
     610         318 :         return;
     611             : 
     612             :     /*
     613             :      * Load the shared map file, die on error.
     614             :      */
     615        9480 :     load_relmap_file(true);
     616             : }
     617             : 
     618             : /*
     619             :  * RelationMapInitializePhase3
     620             :  *
     621             :  * This is called as soon as we have determined MyDatabaseId and set up
     622             :  * DatabasePath.  At this point we should be able to read the local map file.
     623             :  */
     624             : void
     625        8744 : RelationMapInitializePhase3(void)
     626             : {
     627             :     /*
     628             :      * In bootstrap mode, the map file isn't there yet, so do nothing.
     629             :      */
     630        8744 :     if (IsBootstrapProcessingMode())
     631         318 :         return;
     632             : 
     633             :     /*
     634             :      * Load the local map file, die on error.
     635             :      */
     636        8426 :     load_relmap_file(false);
     637             : }
     638             : 
     639             : /*
     640             :  * EstimateRelationMapSpace
     641             :  *
     642             :  * Estimate space needed to pass active shared and local relmaps to parallel
     643             :  * workers.
     644             :  */
     645             : Size
     646         490 : EstimateRelationMapSpace(void)
     647             : {
     648         490 :     return sizeof(SerializedActiveRelMaps);
     649             : }
     650             : 
     651             : /*
     652             :  * SerializeRelationMap
     653             :  *
     654             :  * Serialize active shared and local relmap state for parallel workers.
     655             :  */
     656             : void
     657         490 : SerializeRelationMap(Size maxSize, char *startAddress)
     658             : {
     659             :     SerializedActiveRelMaps *relmaps;
     660             : 
     661             :     Assert(maxSize >= EstimateRelationMapSpace());
     662             : 
     663         490 :     relmaps = (SerializedActiveRelMaps *) startAddress;
     664         490 :     relmaps->active_shared_updates = active_shared_updates;
     665         490 :     relmaps->active_local_updates = active_local_updates;
     666         490 : }
     667             : 
     668             : /*
     669             :  * RestoreRelationMap
     670             :  *
     671             :  * Restore active shared and local relmap state within a parallel worker.
     672             :  */
     673             : void
     674        1642 : RestoreRelationMap(char *startAddress)
     675             : {
     676             :     SerializedActiveRelMaps *relmaps;
     677             : 
     678        3284 :     if (active_shared_updates.num_mappings != 0 ||
     679        3284 :         active_local_updates.num_mappings != 0 ||
     680        3284 :         pending_shared_updates.num_mappings != 0 ||
     681        1642 :         pending_local_updates.num_mappings != 0)
     682           0 :         elog(ERROR, "parallel worker has existing mappings");
     683             : 
     684        1642 :     relmaps = (SerializedActiveRelMaps *) startAddress;
     685        1642 :     active_shared_updates = relmaps->active_shared_updates;
     686        1642 :     active_local_updates = relmaps->active_local_updates;
     687        1642 : }
     688             : 
     689             : /*
     690             :  * load_relmap_file -- load data from the shared or local map file
     691             :  *
     692             :  * Because the map file is essential for access to core system catalogs,
     693             :  * failure to read it is a fatal error.
     694             :  *
     695             :  * Note that the local case requires DatabasePath to be set up.
     696             :  */
     697             : static void
     698       23352 : load_relmap_file(bool shared)
     699             : {
     700             :     RelMapFile *map;
     701             :     char        mapfilename[MAXPGPATH];
     702             :     pg_crc32c   crc;
     703             :     int         fd;
     704             :     int         r;
     705             : 
     706       23352 :     if (shared)
     707             :     {
     708       12416 :         snprintf(mapfilename, sizeof(mapfilename), "global/%s",
     709             :                  RELMAPPER_FILENAME);
     710       12416 :         map = &shared_map;
     711             :     }
     712             :     else
     713             :     {
     714       10936 :         snprintf(mapfilename, sizeof(mapfilename), "%s/%s",
     715             :                  DatabasePath, RELMAPPER_FILENAME);
     716       10936 :         map = &local_map;
     717             :     }
     718             : 
     719             :     /* Read data ... */
     720       23352 :     fd = OpenTransientFile(mapfilename, O_RDONLY | PG_BINARY);
     721       23352 :     if (fd < 0)
     722           0 :         ereport(FATAL,
     723             :                 (errcode_for_file_access(),
     724             :                  errmsg("could not open file \"%s\": %m",
     725             :                         mapfilename)));
     726             : 
     727             :     /*
     728             :      * Note: we could take RelationMappingLock in shared mode here, but it
     729             :      * seems unnecessary since our read() should be atomic against any
     730             :      * concurrent updater's write().  If the file is updated shortly after we
     731             :      * look, the sinval signaling mechanism will make us re-read it before we
     732             :      * are able to access any relation that's affected by the change.
     733             :      */
     734       23352 :     pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ);
     735       23352 :     r = read(fd, map, sizeof(RelMapFile));
     736       23352 :     if (r != sizeof(RelMapFile))
     737             :     {
     738           0 :         if (r < 0)
     739           0 :             ereport(FATAL,
     740             :                     (errcode_for_file_access(),
     741             :                      errmsg("could not read file \"%s\": %m", mapfilename)));
     742             :         else
     743           0 :             ereport(FATAL,
     744             :                     (errcode(ERRCODE_DATA_CORRUPTED),
     745             :                      errmsg("could not read file \"%s\": read %d of %zu",
     746             :                             mapfilename, r, sizeof(RelMapFile))));
     747             :     }
     748       23352 :     pgstat_report_wait_end();
     749             : 
     750       23352 :     if (CloseTransientFile(fd))
     751           0 :         ereport(FATAL,
     752             :                 (errcode_for_file_access(),
     753             :                  errmsg("could not close file \"%s\": %m",
     754             :                         mapfilename)));
     755             : 
     756             :     /* check for correct magic number, etc */
     757       46704 :     if (map->magic != RELMAPPER_FILEMAGIC ||
     758       46704 :         map->num_mappings < 0 ||
     759       23352 :         map->num_mappings > MAX_MAPPINGS)
     760           0 :         ereport(FATAL,
     761             :                 (errmsg("relation mapping file \"%s\" contains invalid data",
     762             :                         mapfilename)));
     763             : 
     764             :     /* verify the CRC */
     765       23352 :     INIT_CRC32C(crc);
     766       23352 :     COMP_CRC32C(crc, (char *) map, offsetof(RelMapFile, crc));
     767       23352 :     FIN_CRC32C(crc);
     768             : 
     769       23352 :     if (!EQ_CRC32C(crc, map->crc))
     770           0 :         ereport(FATAL,
     771             :                 (errmsg("relation mapping file \"%s\" contains incorrect checksum",
     772             :                         mapfilename)));
     773       23352 : }
     774             : 
     775             : /*
     776             :  * Write out a new shared or local map file with the given contents.
     777             :  *
     778             :  * The magic number and CRC are automatically updated in *newmap.  On
     779             :  * success, we copy the data to the appropriate permanent static variable.
     780             :  *
     781             :  * If write_wal is true then an appropriate WAL message is emitted.
     782             :  * (It will be false for bootstrap and WAL replay cases.)
     783             :  *
     784             :  * If send_sinval is true then a SI invalidation message is sent.
     785             :  * (This should be true except in bootstrap case.)
     786             :  *
     787             :  * If preserve_files is true then the storage manager is warned not to
     788             :  * delete the files listed in the map.
     789             :  *
     790             :  * Because this may be called during WAL replay when MyDatabaseId,
     791             :  * DatabasePath, etc aren't valid, we require the caller to pass in suitable
     792             :  * values.  The caller is also responsible for being sure no concurrent
     793             :  * map update could be happening.
     794             :  */
     795             : static void
     796        1120 : write_relmap_file(bool shared, RelMapFile *newmap,
     797             :                   bool write_wal, bool send_sinval, bool preserve_files,
     798             :                   Oid dbid, Oid tsid, const char *dbpath)
     799             : {
     800             :     int         fd;
     801             :     RelMapFile *realmap;
     802             :     char        mapfilename[MAXPGPATH];
     803             : 
     804             :     /*
     805             :      * Fill in the overhead fields and update CRC.
     806             :      */
     807        1120 :     newmap->magic = RELMAPPER_FILEMAGIC;
     808        1120 :     if (newmap->num_mappings < 0 || newmap->num_mappings > MAX_MAPPINGS)
     809           0 :         elog(ERROR, "attempt to write bogus relation mapping");
     810             : 
     811        1120 :     INIT_CRC32C(newmap->crc);
     812        1120 :     COMP_CRC32C(newmap->crc, (char *) newmap, offsetof(RelMapFile, crc));
     813        1120 :     FIN_CRC32C(newmap->crc);
     814             : 
     815             :     /*
     816             :      * Open the target file.  We prefer to do this before entering the
     817             :      * critical section, so that an open() failure need not force PANIC.
     818             :      */
     819        1120 :     if (shared)
     820             :     {
     821         658 :         snprintf(mapfilename, sizeof(mapfilename), "global/%s",
     822             :                  RELMAPPER_FILENAME);
     823         658 :         realmap = &shared_map;
     824             :     }
     825             :     else
     826             :     {
     827         462 :         snprintf(mapfilename, sizeof(mapfilename), "%s/%s",
     828             :                  dbpath, RELMAPPER_FILENAME);
     829         462 :         realmap = &local_map;
     830             :     }
     831             : 
     832        1120 :     fd = OpenTransientFile(mapfilename, O_WRONLY | O_CREAT | PG_BINARY);
     833        1120 :     if (fd < 0)
     834           0 :         ereport(ERROR,
     835             :                 (errcode_for_file_access(),
     836             :                  errmsg("could not open file \"%s\": %m",
     837             :                         mapfilename)));
     838             : 
     839        1120 :     if (write_wal)
     840             :     {
     841             :         xl_relmap_update xlrec;
     842             :         XLogRecPtr  lsn;
     843             : 
     844             :         /* now errors are fatal ... */
     845         484 :         START_CRIT_SECTION();
     846             : 
     847         484 :         xlrec.dbid = dbid;
     848         484 :         xlrec.tsid = tsid;
     849         484 :         xlrec.nbytes = sizeof(RelMapFile);
     850             : 
     851         484 :         XLogBeginInsert();
     852         484 :         XLogRegisterData((char *) (&xlrec), MinSizeOfRelmapUpdate);
     853         484 :         XLogRegisterData((char *) newmap, sizeof(RelMapFile));
     854             : 
     855         484 :         lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE);
     856             : 
     857             :         /* As always, WAL must hit the disk before the data update does */
     858         484 :         XLogFlush(lsn);
     859             :     }
     860             : 
     861        1120 :     errno = 0;
     862        1120 :     pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_WRITE);
     863        1120 :     if (write(fd, newmap, sizeof(RelMapFile)) != sizeof(RelMapFile))
     864             :     {
     865             :         /* if write didn't set errno, assume problem is no disk space */
     866           0 :         if (errno == 0)
     867           0 :             errno = ENOSPC;
     868           0 :         ereport(ERROR,
     869             :                 (errcode_for_file_access(),
     870             :                  errmsg("could not write file \"%s\": %m",
     871             :                         mapfilename)));
     872             :     }
     873        1120 :     pgstat_report_wait_end();
     874             : 
     875             :     /*
     876             :      * We choose to fsync the data to disk before considering the task done.
     877             :      * It would be possible to relax this if it turns out to be a performance
     878             :      * issue, but it would complicate checkpointing --- see notes for
     879             :      * CheckPointRelationMap.
     880             :      */
     881        1120 :     pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_SYNC);
     882        1120 :     if (pg_fsync(fd) != 0)
     883           0 :         ereport(data_sync_elevel(ERROR),
     884             :                 (errcode_for_file_access(),
     885             :                  errmsg("could not fsync file \"%s\": %m",
     886             :                         mapfilename)));
     887        1120 :     pgstat_report_wait_end();
     888             : 
     889        1120 :     if (CloseTransientFile(fd))
     890           0 :         ereport(ERROR,
     891             :                 (errcode_for_file_access(),
     892             :                  errmsg("could not close file \"%s\": %m",
     893             :                         mapfilename)));
     894             : 
     895             :     /*
     896             :      * Now that the file is safely on disk, send sinval message to let other
     897             :      * backends know to re-read it.  We must do this inside the critical
     898             :      * section: if for some reason we fail to send the message, we have to
     899             :      * force a database-wide PANIC.  Otherwise other backends might continue
     900             :      * execution with stale mapping information, which would be catastrophic
     901             :      * as soon as others began to use the now-committed data.
     902             :      */
     903        1120 :     if (send_sinval)
     904         484 :         CacheInvalidateRelmap(dbid);
     905             : 
     906             :     /*
     907             :      * Make sure that the files listed in the map are not deleted if the outer
     908             :      * transaction aborts.  This had better be within the critical section
     909             :      * too: it's not likely to fail, but if it did, we'd arrive at transaction
     910             :      * abort with the files still vulnerable.  PANICing will leave things in a
     911             :      * good state on-disk.
     912             :      *
     913             :      * Note: we're cheating a little bit here by assuming that mapped files
     914             :      * are either in pg_global or the database's default tablespace.
     915             :      */
     916        1120 :     if (preserve_files)
     917             :     {
     918             :         int32       i;
     919             : 
     920       18912 :         for (i = 0; i < newmap->num_mappings; i++)
     921             :         {
     922             :             RelFileNode rnode;
     923             : 
     924       18428 :             rnode.spcNode = tsid;
     925       18428 :             rnode.dbNode = dbid;
     926       18428 :             rnode.relNode = newmap->mappings[i].mapfilenode;
     927       18428 :             RelationPreserveStorage(rnode, false);
     928             :         }
     929             :     }
     930             : 
     931             :     /* Success, update permanent copy */
     932        1120 :     memcpy(realmap, newmap, sizeof(RelMapFile));
     933             : 
     934             :     /* Critical section done */
     935        1120 :     if (write_wal)
     936         484 :         END_CRIT_SECTION();
     937        1120 : }
     938             : 
     939             : /*
     940             :  * Merge the specified updates into the appropriate "real" map,
     941             :  * and write out the changes.  This function must be used for committing
     942             :  * updates during normal multiuser operation.
     943             :  */
     944             : static void
     945         484 : perform_relmap_update(bool shared, const RelMapFile *updates)
     946             : {
     947             :     RelMapFile  newmap;
     948             : 
     949             :     /*
     950             :      * Anyone updating a relation's mapping info should take exclusive lock on
     951             :      * that rel and hold it until commit.  This ensures that there will not be
     952             :      * concurrent updates on the same mapping value; but there could easily be
     953             :      * concurrent updates on different values in the same file. We cover that
     954             :      * by acquiring the RelationMappingLock, re-reading the target file to
     955             :      * ensure it's up to date, applying the updates, and writing the data
     956             :      * before releasing RelationMappingLock.
     957             :      *
     958             :      * There is only one RelationMappingLock.  In principle we could try to
     959             :      * have one per mapping file, but it seems unlikely to be worth the
     960             :      * trouble.
     961             :      */
     962         484 :     LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE);
     963             : 
     964             :     /* Be certain we see any other updates just made */
     965         484 :     load_relmap_file(shared);
     966             : 
     967             :     /* Prepare updated data in a local variable */
     968         484 :     if (shared)
     969         340 :         memcpy(&newmap, &shared_map, sizeof(RelMapFile));
     970             :     else
     971         144 :         memcpy(&newmap, &local_map, sizeof(RelMapFile));
     972             : 
     973             :     /*
     974             :      * Apply the updates to newmap.  No new mappings should appear, unless
     975             :      * somebody is adding indexes to system catalogs.
     976             :      */
     977         484 :     merge_map_updates(&newmap, updates, allowSystemTableMods);
     978             : 
     979             :     /* Write out the updated map and do other necessary tasks */
     980         484 :     write_relmap_file(shared, &newmap, true, true, true,
     981             :                       (shared ? InvalidOid : MyDatabaseId),
     982             :                       (shared ? GLOBALTABLESPACE_OID : MyDatabaseTableSpace),
     983             :                       DatabasePath);
     984             : 
     985             :     /* Now we can release the lock */
     986         484 :     LWLockRelease(RelationMappingLock);
     987         484 : }
     988             : 
     989             : /*
     990             :  * RELMAP resource manager's routines
     991             :  */
     992             : void
     993           0 : relmap_redo(XLogReaderState *record)
     994             : {
     995           0 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
     996             : 
     997             :     /* Backup blocks are not used in relmap records */
     998             :     Assert(!XLogRecHasAnyBlockRefs(record));
     999             : 
    1000           0 :     if (info == XLOG_RELMAP_UPDATE)
    1001             :     {
    1002           0 :         xl_relmap_update *xlrec = (xl_relmap_update *) XLogRecGetData(record);
    1003             :         RelMapFile  newmap;
    1004             :         char       *dbpath;
    1005             : 
    1006           0 :         if (xlrec->nbytes != sizeof(RelMapFile))
    1007           0 :             elog(PANIC, "relmap_redo: wrong size %u in relmap update record",
    1008             :                  xlrec->nbytes);
    1009           0 :         memcpy(&newmap, xlrec->data, sizeof(newmap));
    1010             : 
    1011             :         /* We need to construct the pathname for this database */
    1012           0 :         dbpath = GetDatabasePath(xlrec->dbid, xlrec->tsid);
    1013             : 
    1014             :         /*
    1015             :          * Write out the new map and send sinval, but of course don't write a
    1016             :          * new WAL entry.  There's no surrounding transaction to tell to
    1017             :          * preserve files, either.
    1018             :          *
    1019             :          * There shouldn't be anyone else updating relmaps during WAL replay,
    1020             :          * so we don't bother to take the RelationMappingLock.  We would need
    1021             :          * to do so if load_relmap_file needed to interlock against writers.
    1022             :          */
    1023           0 :         write_relmap_file((xlrec->dbid == InvalidOid), &newmap,
    1024             :                           false, true, false,
    1025             :                           xlrec->dbid, xlrec->tsid, dbpath);
    1026             : 
    1027           0 :         pfree(dbpath);
    1028             :     }
    1029             :     else
    1030           0 :         elog(PANIC, "relmap_redo: unknown op code %u", info);
    1031           0 : }

Generated by: LCOV version 1.13