LCOV - code coverage report
Current view: top level - src/bin/pg_upgrade - multixact_read_v18.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 0 93 0.0 %
Date: 2026-01-18 09:17:07 Functions: 0 9 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * multixact_read_v18.c
       3             :  *
       4             :  * Functions to read multixact SLRUs from clusters of PostgreSQL version 18
       5             :  * and older.  In version 19, the multixid offsets were expanded from 32 to 64
       6             :  * bits.
       7             :  *
       8             :  * Copyright (c) 2025-2026, PostgreSQL Global Development Group
       9             :  * src/bin/pg_upgrade/multixact_read_v18.c
      10             :  */
      11             : 
      12             : #include "postgres_fe.h"
      13             : 
      14             : #include "multixact_read_v18.h"
      15             : #include "pg_upgrade.h"
      16             : 
      17             : /*
      18             :  * NOTE: below are a bunch of definitions that are copy-pasted from
      19             :  * multixact.c from version 18.  It's important that this file doesn't
      20             :  * #include the new definitions with same names from "multixact_internal.h"!
      21             :  *
      22             :  * To further avoid confusion in the functions exposed outside this source
      23             :  * file, we use MultiXactOffset32 to represent the old-style 32-bit multixid
      24             :  * offsets.  The new 64-bit MultiXactOffset should not be used anywhere in
      25             :  * this file.
      26             :  */
      27             : #ifdef MULTIXACT_INTERNAL_H
      28             : #error multixact_internal.h should not be included in multixact_read_v18.c
      29             : #endif
      30             : #define MultiXactOffset should_not_be_used
      31             : 
      32             : /* We need four bytes per offset and 8 bytes per base for each page. */
      33             : #define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset32))
      34             : 
      35             : static inline int64
      36           0 : MultiXactIdToOffsetPage(MultiXactId multi)
      37             : {
      38           0 :     return multi / MULTIXACT_OFFSETS_PER_PAGE;
      39             : }
      40             : 
      41             : static inline int
      42           0 : MultiXactIdToOffsetEntry(MultiXactId multi)
      43             : {
      44           0 :     return multi % MULTIXACT_OFFSETS_PER_PAGE;
      45             : }
      46             : 
      47             : /*
      48             :  * The situation for members is a bit more complex: we store one byte of
      49             :  * additional flag bits for each TransactionId.  To do this without getting
      50             :  * into alignment issues, we store four bytes of flags, and then the
      51             :  * corresponding 4 Xids.  Each such 5-word (20-byte) set we call a "group", and
      52             :  * are stored as a whole in pages.  Thus, with 8kB BLCKSZ, we keep 409 groups
      53             :  * per page.  This wastes 12 bytes per page, but that's OK -- simplicity (and
      54             :  * performance) trumps space efficiency here.
      55             :  *
      56             :  * Note that the "offset" macros work with byte offset, not array indexes, so
      57             :  * arithmetic must be done using "char *" pointers.
      58             :  */
      59             : /* We need eight bits per xact, so one xact fits in a byte */
      60             : #define MXACT_MEMBER_BITS_PER_XACT          8
      61             : #define MXACT_MEMBER_FLAGS_PER_BYTE         1
      62             : #define MXACT_MEMBER_XACT_BITMASK   ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
      63             : 
      64             : /* how many full bytes of flags are there in a group? */
      65             : #define MULTIXACT_FLAGBYTES_PER_GROUP       4
      66             : #define MULTIXACT_MEMBERS_PER_MEMBERGROUP   \
      67             :     (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
      68             : /* size in bytes of a complete group */
      69             : #define MULTIXACT_MEMBERGROUP_SIZE \
      70             :     (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
      71             : #define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
      72             : #define MULTIXACT_MEMBERS_PER_PAGE  \
      73             :     (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
      74             : 
      75             : /* page in which a member is to be found */
      76             : static inline int64
      77           0 : MXOffsetToMemberPage(MultiXactOffset32 offset)
      78             : {
      79           0 :     return offset / MULTIXACT_MEMBERS_PER_PAGE;
      80             : }
      81             : 
      82             : /* Location (byte offset within page) of flag word for a given member */
      83             : static inline int
      84           0 : MXOffsetToFlagsOffset(MultiXactOffset32 offset)
      85             : {
      86           0 :     MultiXactOffset32 group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
      87           0 :     int         grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
      88           0 :     int         byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
      89             : 
      90           0 :     return byteoff;
      91             : }
      92             : 
      93             : /* Location (byte offset within page) of TransactionId of given member */
      94             : static inline int
      95           0 : MXOffsetToMemberOffset(MultiXactOffset32 offset)
      96             : {
      97           0 :     int         member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
      98             : 
      99           0 :     return MXOffsetToFlagsOffset(offset) +
     100           0 :         MULTIXACT_FLAGBYTES_PER_GROUP +
     101             :         member_in_group * sizeof(TransactionId);
     102             : }
     103             : 
     104             : static inline int
     105           0 : MXOffsetToFlagsBitShift(MultiXactOffset32 offset)
     106             : {
     107           0 :     int         member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
     108           0 :     int         bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
     109             : 
     110           0 :     return bshift;
     111             : }
     112             : 
     113             : /*
     114             :  * Construct reader of old multixacts.
     115             :  *
     116             :  * Returns the malloced memory used by the all other calls in this module.
     117             :  */
     118             : OldMultiXactReader *
     119           0 : AllocOldMultiXactRead(char *pgdata, MultiXactId nextMulti,
     120             :                       MultiXactOffset32 nextOffset)
     121             : {
     122           0 :     OldMultiXactReader *state = pg_malloc_object(OldMultiXactReader);
     123           0 :     char        dir[MAXPGPATH] = {0};
     124             : 
     125           0 :     state->nextMXact = nextMulti;
     126           0 :     state->nextOffset = nextOffset;
     127             : 
     128           0 :     pg_sprintf(dir, "%s/pg_multixact/offsets", pgdata);
     129           0 :     state->offset = AllocSlruRead(dir, false);
     130             : 
     131           0 :     pg_sprintf(dir, "%s/pg_multixact/members", pgdata);
     132           0 :     state->members = AllocSlruRead(dir, false);
     133             : 
     134           0 :     return state;
     135             : }
     136             : 
     137             : /*
     138             :  * This is a simplified version of the GetMultiXactIdMembers() server
     139             :  * function:
     140             :  *
     141             :  * - Only return the updating member, if any.  Upgrade only cares about the
     142             :  *   updaters.  If there is no updating member, return somewhat arbitrarily
     143             :  *   the first locking-only member, because we don't have any way to represent
     144             :  *   "no members".
     145             :  *
     146             :  * - Because there's no concurrent activity, we don't need to worry about
     147             :  *   locking and some corner cases.
     148             :  *
     149             :  * - Don't bail out on invalid entries that could've been left behind after a
     150             :  *   server crash.  Such multixids won't appear anywhere else on disk, so the
     151             :  *   server will never try to read them.  During upgrade, however, we scan
     152             :  *   through all multixids in order, and will encounter such invalid but
     153             :  *   unreferenced multixids too.  We try to distinguish between entries that
     154             :  *   are invalid because of missed disk writes, like entries with zeros in
     155             :  *   offsets or members, and entries that look corrupt in other ways that
     156             :  *   should not happen even on a server crash.
     157             :  *
     158             :  * Returns true on success, false if the multixact was invalid.
     159             :  */
     160             : bool
     161           0 : GetOldMultiXactIdSingleMember(OldMultiXactReader *state, MultiXactId multi,
     162             :                               MultiXactMember *member)
     163             : {
     164             :     MultiXactId nextMXact,
     165             :                 nextOffset,
     166             :                 tmpMXact;
     167             :     int64       pageno,
     168             :                 prev_pageno;
     169             :     int         entryno,
     170             :                 length;
     171             :     char       *buf;
     172             :     MultiXactOffset32 *offptr,
     173             :                 offset;
     174             :     MultiXactOffset32 nextMXOffset;
     175           0 :     TransactionId result_xid = InvalidTransactionId;
     176           0 :     MultiXactStatus result_status = 0;
     177             : 
     178           0 :     nextMXact = state->nextMXact;
     179           0 :     nextOffset = state->nextOffset;
     180             : 
     181             :     /*
     182             :      * Comment copied from GetMultiXactIdMembers in PostgreSQL v18
     183             :      * multixact.c:
     184             :      *
     185             :      * Find out the offset at which we need to start reading MultiXactMembers
     186             :      * and the number of members in the multixact.  We determine the latter as
     187             :      * the difference between this multixact's starting offset and the next
     188             :      * one's.  However, there are some corner cases to worry about:
     189             :      *
     190             :      * 1. This multixact may be the latest one created, in which case there is
     191             :      * no next one to look at.  The next multixact's offset should be set
     192             :      * already, as we set it in RecordNewMultiXact(), but we used to not do
     193             :      * that in older minor versions.  To cope with that case, if this
     194             :      * multixact is the latest one created, use the nextOffset value we read
     195             :      * above as the endpoint.
     196             :      *
     197             :      * 2. Because GetNewMultiXactId skips over offset zero, to reserve zero
     198             :      * for to mean "unset", there is an ambiguity near the point of offset
     199             :      * wraparound.  If we see next multixact's offset is one, is that our
     200             :      * multixact's actual endpoint, or did it end at zero with a subsequent
     201             :      * increment?  We handle this using the knowledge that if the zero'th
     202             :      * member slot wasn't filled, it'll contain zero, and zero isn't a valid
     203             :      * transaction ID so it can't be a multixact member.  Therefore, if we
     204             :      * read a zero from the members array, just ignore it.
     205             :      */
     206             : 
     207           0 :     pageno = MultiXactIdToOffsetPage(multi);
     208           0 :     entryno = MultiXactIdToOffsetEntry(multi);
     209             : 
     210           0 :     buf = SlruReadSwitchPage(state->offset, pageno);
     211           0 :     offptr = (MultiXactOffset32 *) buf;
     212           0 :     offptr += entryno;
     213           0 :     offset = *offptr;
     214             : 
     215           0 :     if (offset == 0)
     216             :     {
     217             :         /* Invalid entry.  These can be left behind on a server crash. */
     218           0 :         return false;
     219             :     }
     220             : 
     221             :     /*
     222             :      * Use the same increment rule as GetNewMultiXactId(), that is, don't
     223             :      * handle wraparound explicitly until needed.
     224             :      */
     225           0 :     tmpMXact = multi + 1;
     226             : 
     227           0 :     if (nextMXact == tmpMXact)
     228             :     {
     229             :         /* Corner case 1: there is no next multixact */
     230           0 :         nextMXOffset = nextOffset;
     231             :     }
     232             :     else
     233             :     {
     234             :         /* handle wraparound if needed */
     235           0 :         if (tmpMXact < FirstMultiXactId)
     236           0 :             tmpMXact = FirstMultiXactId;
     237             : 
     238           0 :         prev_pageno = pageno;
     239             : 
     240           0 :         pageno = MultiXactIdToOffsetPage(tmpMXact);
     241           0 :         entryno = MultiXactIdToOffsetEntry(tmpMXact);
     242             : 
     243           0 :         if (pageno != prev_pageno)
     244           0 :             buf = SlruReadSwitchPage(state->offset, pageno);
     245             : 
     246           0 :         offptr = (MultiXactOffset32 *) buf;
     247           0 :         offptr += entryno;
     248           0 :         nextMXOffset = *offptr;
     249             :     }
     250             : 
     251           0 :     if (nextMXOffset == 0)
     252             :     {
     253             :         /* Invalid entry.  These can be left behind on a server crash. */
     254           0 :         return false;
     255             :     }
     256           0 :     length = nextMXOffset - offset;
     257             : 
     258           0 :     if (length < 0)
     259             :     {
     260             :         /*
     261             :          * This entry is corrupt.  We should not see these even after a server
     262             :          * crash.
     263             :          */
     264           0 :         pg_fatal("multixact %u has an invalid length (%d)", multi, length);
     265             :     }
     266           0 :     if (length == 0)
     267             :     {
     268             :         /*
     269             :          * Invalid entry.  The server never writes multixids with zero
     270             :          * members, but it's not clear if a server crash or using pg_resetwal
     271             :          * could leave them behind.  Seems best to accept them.
     272             :          */
     273           0 :         return false;
     274             :     }
     275             : 
     276             :     /* read the members */
     277           0 :     prev_pageno = -1;
     278           0 :     for (int i = 0; i < length; i++, offset++)
     279             :     {
     280             :         TransactionId *xactptr;
     281             :         uint32     *flagsptr;
     282             :         int         flagsoff;
     283             :         int         bshift;
     284             :         int         memberoff;
     285             :         MultiXactStatus status;
     286             : 
     287           0 :         pageno = MXOffsetToMemberPage(offset);
     288           0 :         memberoff = MXOffsetToMemberOffset(offset);
     289             : 
     290           0 :         if (pageno != prev_pageno)
     291             :         {
     292           0 :             buf = SlruReadSwitchPage(state->members, pageno);
     293           0 :             prev_pageno = pageno;
     294             :         }
     295             : 
     296           0 :         xactptr = (TransactionId *) (buf + memberoff);
     297           0 :         if (!TransactionIdIsValid(*xactptr))
     298             :         {
     299             :             /*
     300             :              * Corner case 2: offset must have wrapped around to unused slot
     301             :              * zero.
     302             :              */
     303           0 :             if (offset == 0)
     304           0 :                 continue;
     305             : 
     306             :             /*
     307             :              * Otherwise this is an invalid entry that should not be
     308             :              * referenced from anywhere in the heap.  These can be left behind
     309             :              * on a server crash.  We could return 'false' here, but we prefer
     310             :              * to continue reading the members and converting them the best we
     311             :              * can, to preserve evidence in case this is corruption that
     312             :              * should not have happened.
     313             :              */
     314             :         }
     315             : 
     316           0 :         flagsoff = MXOffsetToFlagsOffset(offset);
     317           0 :         bshift = MXOffsetToFlagsBitShift(offset);
     318           0 :         flagsptr = (uint32 *) (buf + flagsoff);
     319             : 
     320           0 :         status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
     321             : 
     322             :         /*
     323             :          * Remember the updating XID among the members, or first locking XID
     324             :          * if no updating XID.
     325             :          */
     326           0 :         if (ISUPDATE_from_mxstatus(status))
     327             :         {
     328             :             /* sanity check */
     329           0 :             if (ISUPDATE_from_mxstatus(result_status))
     330             :             {
     331             :                 /*
     332             :                  * We don't expect to see more than one updating member, even
     333             :                  * if the server had crashed.
     334             :                  */
     335           0 :                 pg_fatal("multixact %u has more than one updating member",
     336             :                          multi);
     337             :             }
     338           0 :             result_xid = *xactptr;
     339           0 :             result_status = status;
     340             :         }
     341           0 :         else if (!TransactionIdIsValid(result_xid))
     342             :         {
     343           0 :             result_xid = *xactptr;
     344           0 :             result_status = status;
     345             :         }
     346             :     }
     347             : 
     348           0 :     member->xid = result_xid;
     349           0 :     member->status = result_status;
     350           0 :     return true;
     351             : }
     352             : 
     353             : /*
     354             :  * Frees the malloced reader.
     355             :  */
     356             : void
     357           0 : FreeOldMultiXactReader(OldMultiXactReader *state)
     358             : {
     359           0 :     FreeSlruRead(state->offset);
     360           0 :     FreeSlruRead(state->members);
     361             : 
     362           0 :     pfree(state);
     363           0 : }

Generated by: LCOV version 1.16