LCOV - code coverage report
Current view: top level - src/backend/access/transam - twophase.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 601 686 87.6 %
Date: 2020-05-31 22:07:05 Functions: 36 37 97.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * twophase.c
       4             :  *      Two-phase commit support functions.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *      src/backend/access/transam/twophase.c
      11             :  *
      12             :  * NOTES
      13             :  *      Each global transaction is associated with a global transaction
      14             :  *      identifier (GID). The client assigns a GID to a postgres
      15             :  *      transaction with the PREPARE TRANSACTION command.
      16             :  *
      17             :  *      We keep all active global transactions in a shared memory array.
      18             :  *      When the PREPARE TRANSACTION command is issued, the GID is
      19             :  *      reserved for the transaction in the array. This is done before
      20             :  *      a WAL entry is made, because the reservation checks for duplicate
      21             :  *      GIDs and aborts the transaction if there already is a global
      22             :  *      transaction in prepared state with the same GID.
      23             :  *
      24             :  *      A global transaction (gxact) also has dummy PGXACT and PGPROC; this is
      25             :  *      what keeps the XID considered running by TransactionIdIsInProgress.
      26             :  *      It is also convenient as a PGPROC to hook the gxact's locks to.
      27             :  *
      28             :  *      Information to recover prepared transactions in case of crash is
      29             :  *      now stored in WAL for the common case. In some cases there will be
      30             :  *      an extended period between preparing a GXACT and commit/abort, in
      31             :  *      which case we need to separately record prepared transaction data
      32             :  *      in permanent storage. This includes locking information, pending
      33             :  *      notifications etc. All that state information is written to the
      34             :  *      per-transaction state file in the pg_twophase directory.
      35             :  *      All prepared transactions will be written prior to shutdown.
      36             :  *
      37             :  *      Life track of state data is following:
      38             :  *
      39             :  *      * On PREPARE TRANSACTION backend writes state data only to the WAL and
      40             :  *        stores pointer to the start of the WAL record in
      41             :  *        gxact->prepare_start_lsn.
      42             :  *      * If COMMIT occurs before checkpoint then backend reads data from WAL
      43             :  *        using prepare_start_lsn.
      44             :  *      * On checkpoint state data copied to files in pg_twophase directory and
      45             :  *        fsynced
      46             :  *      * If COMMIT happens after checkpoint then backend reads state data from
      47             :  *        files
      48             :  *
      49             :  *      During replay and replication, TwoPhaseState also holds information
      50             :  *      about active prepared transactions that haven't been moved to disk yet.
      51             :  *
      52             :  *      Replay of twophase records happens by the following rules:
      53             :  *
      54             :  *      * At the beginning of recovery, pg_twophase is scanned once, filling
      55             :  *        TwoPhaseState with entries marked with gxact->inredo and
      56             :  *        gxact->ondisk.  Two-phase file data older than the XID horizon of
      57             :  *        the redo position are discarded.
      58             :  *      * On PREPARE redo, the transaction is added to TwoPhaseState->prepXacts.
      59             :  *        gxact->inredo is set to true for such entries.
      60             :  *      * On Checkpoint we iterate through TwoPhaseState->prepXacts entries
      61             :  *        that have gxact->inredo set and are behind the redo_horizon. We
      62             :  *        save them to disk and then switch gxact->ondisk to true.
      63             :  *      * On COMMIT/ABORT we delete the entry from TwoPhaseState->prepXacts.
      64             :  *        If gxact->ondisk is true, the corresponding entry from the disk
      65             :  *        is additionally deleted.
      66             :  *      * RecoverPreparedTransactions(), StandbyRecoverPreparedTransactions()
      67             :  *        and PrescanPreparedTransactions() have been modified to go through
      68             :  *        gxact->inredo entries that have not made it to disk.
      69             :  *
      70             :  *-------------------------------------------------------------------------
      71             :  */
      72             : #include "postgres.h"
      73             : 
      74             : #include <fcntl.h>
      75             : #include <sys/stat.h>
      76             : #include <time.h>
      77             : #include <unistd.h>
      78             : 
      79             : #include "access/commit_ts.h"
      80             : #include "access/htup_details.h"
      81             : #include "access/subtrans.h"
      82             : #include "access/transam.h"
      83             : #include "access/twophase.h"
      84             : #include "access/twophase_rmgr.h"
      85             : #include "access/xact.h"
      86             : #include "access/xlog.h"
      87             : #include "access/xloginsert.h"
      88             : #include "access/xlogreader.h"
      89             : #include "access/xlogutils.h"
      90             : #include "catalog/pg_type.h"
      91             : #include "catalog/storage.h"
      92             : #include "funcapi.h"
      93             : #include "miscadmin.h"
      94             : #include "pg_trace.h"
      95             : #include "pgstat.h"
      96             : #include "replication/origin.h"
      97             : #include "replication/syncrep.h"
      98             : #include "replication/walsender.h"
      99             : #include "storage/fd.h"
     100             : #include "storage/ipc.h"
     101             : #include "storage/md.h"
     102             : #include "storage/predicate.h"
     103             : #include "storage/proc.h"
     104             : #include "storage/procarray.h"
     105             : #include "storage/sinvaladt.h"
     106             : #include "storage/smgr.h"
     107             : #include "utils/builtins.h"
     108             : #include "utils/memutils.h"
     109             : #include "utils/timestamp.h"
     110             : 
     111             : /*
     112             :  * Directory where Two-phase commit files reside within PGDATA
     113             :  */
     114             : #define TWOPHASE_DIR "pg_twophase"
     115             : 
     116             : /* GUC variable, can't be changed after startup */
     117             : int         max_prepared_xacts = 0;
     118             : 
     119             : /*
     120             :  * This struct describes one global transaction that is in prepared state
     121             :  * or attempting to become prepared.
     122             :  *
     123             :  * The lifecycle of a global transaction is:
     124             :  *
     125             :  * 1. After checking that the requested GID is not in use, set up an entry in
     126             :  * the TwoPhaseState->prepXacts array with the correct GID and valid = false,
     127             :  * and mark it as locked by my backend.
     128             :  *
     129             :  * 2. After successfully completing prepare, set valid = true and enter the
     130             :  * referenced PGPROC into the global ProcArray.
     131             :  *
     132             :  * 3. To begin COMMIT PREPARED or ROLLBACK PREPARED, check that the entry is
     133             :  * valid and not locked, then mark the entry as locked by storing my current
     134             :  * backend ID into locking_backend.  This prevents concurrent attempts to
     135             :  * commit or rollback the same prepared xact.
     136             :  *
     137             :  * 4. On completion of COMMIT PREPARED or ROLLBACK PREPARED, remove the entry
     138             :  * from the ProcArray and the TwoPhaseState->prepXacts array and return it to
     139             :  * the freelist.
     140             :  *
     141             :  * Note that if the preparing transaction fails between steps 1 and 2, the
     142             :  * entry must be removed so that the GID and the GlobalTransaction struct
     143             :  * can be reused.  See AtAbort_Twophase().
     144             :  *
     145             :  * typedef struct GlobalTransactionData *GlobalTransaction appears in
     146             :  * twophase.h
     147             :  */
     148             : 
     149             : typedef struct GlobalTransactionData
     150             : {
     151             :     GlobalTransaction next;     /* list link for free list */
     152             :     int         pgprocno;       /* ID of associated dummy PGPROC */
     153             :     BackendId   dummyBackendId; /* similar to backend id for backends */
     154             :     TimestampTz prepared_at;    /* time of preparation */
     155             : 
     156             :     /*
     157             :      * Note that we need to keep track of two LSNs for each GXACT. We keep
     158             :      * track of the start LSN because this is the address we must use to read
     159             :      * state data back from WAL when committing a prepared GXACT. We keep
     160             :      * track of the end LSN because that is the LSN we need to wait for prior
     161             :      * to commit.
     162             :      */
     163             :     XLogRecPtr  prepare_start_lsn;  /* XLOG offset of prepare record start */
     164             :     XLogRecPtr  prepare_end_lsn;    /* XLOG offset of prepare record end */
     165             :     TransactionId xid;          /* The GXACT id */
     166             : 
     167             :     Oid         owner;          /* ID of user that executed the xact */
     168             :     BackendId   locking_backend;    /* backend currently working on the xact */
     169             :     bool        valid;          /* true if PGPROC entry is in proc array */
     170             :     bool        ondisk;         /* true if prepare state file is on disk */
     171             :     bool        inredo;         /* true if entry was added via xlog_redo */
     172             :     char        gid[GIDSIZE];   /* The GID assigned to the prepared xact */
     173             : }           GlobalTransactionData;
     174             : 
     175             : /*
     176             :  * Two Phase Commit shared state.  Access to this struct is protected
     177             :  * by TwoPhaseStateLock.
     178             :  */
     179             : typedef struct TwoPhaseStateData
     180             : {
     181             :     /* Head of linked list of free GlobalTransactionData structs */
     182             :     GlobalTransaction freeGXacts;
     183             : 
     184             :     /* Number of valid prepXacts entries. */
     185             :     int         numPrepXacts;
     186             : 
     187             :     /* There are max_prepared_xacts items in this array */
     188             :     GlobalTransaction prepXacts[FLEXIBLE_ARRAY_MEMBER];
     189             : } TwoPhaseStateData;
     190             : 
     191             : static TwoPhaseStateData *TwoPhaseState;
     192             : 
     193             : /*
     194             :  * Global transaction entry currently locked by us, if any.  Note that any
     195             :  * access to the entry pointed to by this variable must be protected by
     196             :  * TwoPhaseStateLock, though obviously the pointer itself doesn't need to be
     197             :  * (since it's just local memory).
     198             :  */
     199             : static GlobalTransaction MyLockedGxact = NULL;
     200             : 
     201             : static bool twophaseExitRegistered = false;
     202             : 
     203             : static void RecordTransactionCommitPrepared(TransactionId xid,
     204             :                                             int nchildren,
     205             :                                             TransactionId *children,
     206             :                                             int nrels,
     207             :                                             RelFileNode *rels,
     208             :                                             int ninvalmsgs,
     209             :                                             SharedInvalidationMessage *invalmsgs,
     210             :                                             bool initfileinval,
     211             :                                             const char *gid);
     212             : static void RecordTransactionAbortPrepared(TransactionId xid,
     213             :                                            int nchildren,
     214             :                                            TransactionId *children,
     215             :                                            int nrels,
     216             :                                            RelFileNode *rels,
     217             :                                            const char *gid);
     218             : static void ProcessRecords(char *bufptr, TransactionId xid,
     219             :                            const TwoPhaseCallback callbacks[]);
     220             : static void RemoveGXact(GlobalTransaction gxact);
     221             : 
     222             : static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len);
     223             : static char *ProcessTwoPhaseBuffer(TransactionId xid,
     224             :                                    XLogRecPtr prepare_start_lsn,
     225             :                                    bool fromdisk, bool setParent, bool setNextXid);
     226             : static void MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid,
     227             :                                 const char *gid, TimestampTz prepared_at, Oid owner,
     228             :                                 Oid databaseid);
     229             : static void RemoveTwoPhaseFile(TransactionId xid, bool giveWarning);
     230             : static void RecreateTwoPhaseFile(TransactionId xid, void *content, int len);
     231             : 
     232             : /*
     233             :  * Initialization of shared memory
     234             :  */
     235             : Size
     236        4344 : TwoPhaseShmemSize(void)
     237             : {
     238             :     Size        size;
     239             : 
     240             :     /* Need the fixed struct, the array of pointers, and the GTD structs */
     241        4344 :     size = offsetof(TwoPhaseStateData, prepXacts);
     242        4344 :     size = add_size(size, mul_size(max_prepared_xacts,
     243             :                                    sizeof(GlobalTransaction)));
     244        4344 :     size = MAXALIGN(size);
     245        4344 :     size = add_size(size, mul_size(max_prepared_xacts,
     246             :                                    sizeof(GlobalTransactionData)));
     247             : 
     248        4344 :     return size;
     249             : }
     250             : 
     251             : void
     252        2170 : TwoPhaseShmemInit(void)
     253             : {
     254             :     bool        found;
     255             : 
     256        2170 :     TwoPhaseState = ShmemInitStruct("Prepared Transaction Table",
     257             :                                     TwoPhaseShmemSize(),
     258             :                                     &found);
     259        2170 :     if (!IsUnderPostmaster)
     260             :     {
     261             :         GlobalTransaction gxacts;
     262             :         int         i;
     263             : 
     264             :         Assert(!found);
     265        2170 :         TwoPhaseState->freeGXacts = NULL;
     266        2170 :         TwoPhaseState->numPrepXacts = 0;
     267             : 
     268             :         /*
     269             :          * Initialize the linked list of free GlobalTransactionData structs
     270             :          */
     271        2170 :         gxacts = (GlobalTransaction)
     272        2170 :             ((char *) TwoPhaseState +
     273        2170 :              MAXALIGN(offsetof(TwoPhaseStateData, prepXacts) +
     274             :                       sizeof(GlobalTransaction) * max_prepared_xacts));
     275        2912 :         for (i = 0; i < max_prepared_xacts; i++)
     276             :         {
     277             :             /* insert into linked list */
     278         742 :             gxacts[i].next = TwoPhaseState->freeGXacts;
     279         742 :             TwoPhaseState->freeGXacts = &gxacts[i];
     280             : 
     281             :             /* associate it with a PGPROC assigned by InitProcGlobal */
     282         742 :             gxacts[i].pgprocno = PreparedXactProcs[i].pgprocno;
     283             : 
     284             :             /*
     285             :              * Assign a unique ID for each dummy proc, so that the range of
     286             :              * dummy backend IDs immediately follows the range of normal
     287             :              * backend IDs. We don't dare to assign a real backend ID to dummy
     288             :              * procs, because prepared transactions don't take part in cache
     289             :              * invalidation like a real backend ID would imply, but having a
     290             :              * unique ID for them is nevertheless handy. This arrangement
     291             :              * allows you to allocate an array of size (MaxBackends +
     292             :              * max_prepared_xacts + 1), and have a slot for every backend and
     293             :              * prepared transaction. Currently multixact.c uses that
     294             :              * technique.
     295             :              */
     296         742 :             gxacts[i].dummyBackendId = MaxBackends + 1 + i;
     297             :         }
     298             :     }
     299             :     else
     300             :         Assert(found);
     301        2170 : }
     302             : 
     303             : /*
     304             :  * Exit hook to unlock the global transaction entry we're working on.
     305             :  */
     306             : static void
     307          76 : AtProcExit_Twophase(int code, Datum arg)
     308             : {
     309             :     /* same logic as abort */
     310          76 :     AtAbort_Twophase();
     311          76 : }
     312             : 
     313             : /*
     314             :  * Abort hook to unlock the global transaction entry we're working on.
     315             :  */
     316             : void
     317       20414 : AtAbort_Twophase(void)
     318             : {
     319       20414 :     if (MyLockedGxact == NULL)
     320       20414 :         return;
     321             : 
     322             :     /*
     323             :      * What to do with the locked global transaction entry?  If we were in the
     324             :      * process of preparing the transaction, but haven't written the WAL
     325             :      * record and state file yet, the transaction must not be considered as
     326             :      * prepared.  Likewise, if we are in the process of finishing an
     327             :      * already-prepared transaction, and fail after having already written the
     328             :      * 2nd phase commit or rollback record to the WAL, the transaction should
     329             :      * not be considered as prepared anymore.  In those cases, just remove the
     330             :      * entry from shared memory.
     331             :      *
     332             :      * Otherwise, the entry must be left in place so that the transaction can
     333             :      * be finished later, so just unlock it.
     334             :      *
     335             :      * If we abort during prepare, after having written the WAL record, we
     336             :      * might not have transferred all locks and other state to the prepared
     337             :      * transaction yet.  Likewise, if we abort during commit or rollback,
     338             :      * after having written the WAL record, we might not have released all the
     339             :      * resources held by the transaction yet.  In those cases, the in-memory
     340             :      * state can be wrong, but it's too late to back out.
     341             :      */
     342           0 :     LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
     343           0 :     if (!MyLockedGxact->valid)
     344           0 :         RemoveGXact(MyLockedGxact);
     345             :     else
     346           0 :         MyLockedGxact->locking_backend = InvalidBackendId;
     347           0 :     LWLockRelease(TwoPhaseStateLock);
     348             : 
     349           0 :     MyLockedGxact = NULL;
     350             : }
     351             : 
     352             : /*
     353             :  * This is called after we have finished transferring state to the prepared
     354             :  * PGXACT entry.
     355             :  */
     356             : void
     357          82 : PostPrepare_Twophase(void)
     358             : {
     359          82 :     LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
     360          82 :     MyLockedGxact->locking_backend = InvalidBackendId;
     361          82 :     LWLockRelease(TwoPhaseStateLock);
     362             : 
     363          82 :     MyLockedGxact = NULL;
     364          82 : }
     365             : 
     366             : 
     367             : /*
     368             :  * MarkAsPreparing
     369             :  *      Reserve the GID for the given transaction.
     370             :  */
     371             : GlobalTransaction
     372          70 : MarkAsPreparing(TransactionId xid, const char *gid,
     373             :                 TimestampTz prepared_at, Oid owner, Oid databaseid)
     374             : {
     375             :     GlobalTransaction gxact;
     376             :     int         i;
     377             : 
     378          70 :     if (strlen(gid) >= GIDSIZE)
     379           0 :         ereport(ERROR,
     380             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     381             :                  errmsg("transaction identifier \"%s\" is too long",
     382             :                         gid)));
     383             : 
     384             :     /* fail immediately if feature is disabled */
     385          70 :     if (max_prepared_xacts == 0)
     386          16 :         ereport(ERROR,
     387             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     388             :                  errmsg("prepared transactions are disabled"),
     389             :                  errhint("Set max_prepared_transactions to a nonzero value.")));
     390             : 
     391             :     /* on first call, register the exit hook */
     392          54 :     if (!twophaseExitRegistered)
     393             :     {
     394          34 :         before_shmem_exit(AtProcExit_Twophase, 0);
     395          34 :         twophaseExitRegistered = true;
     396             :     }
     397             : 
     398          54 :     LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
     399             : 
     400             :     /* Check for conflicting GID */
     401          60 :     for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
     402             :     {
     403           8 :         gxact = TwoPhaseState->prepXacts[i];
     404           8 :         if (strcmp(gxact->gid, gid) == 0)
     405             :         {
     406           2 :             ereport(ERROR,
     407             :                     (errcode(ERRCODE_DUPLICATE_OBJECT),
     408             :                      errmsg("transaction identifier \"%s\" is already in use",
     409             :                             gid)));
     410             :         }
     411             :     }
     412             : 
     413             :     /* Get a free gxact from the freelist */
     414          52 :     if (TwoPhaseState->freeGXacts == NULL)
     415           0 :         ereport(ERROR,
     416             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     417             :                  errmsg("maximum number of prepared transactions reached"),
     418             :                  errhint("Increase max_prepared_transactions (currently %d).",
     419             :                          max_prepared_xacts)));
     420          52 :     gxact = TwoPhaseState->freeGXacts;
     421          52 :     TwoPhaseState->freeGXacts = gxact->next;
     422             : 
     423          52 :     MarkAsPreparingGuts(gxact, xid, gid, prepared_at, owner, databaseid);
     424             : 
     425          52 :     gxact->ondisk = false;
     426             : 
     427             :     /* And insert it into the active array */
     428             :     Assert(TwoPhaseState->numPrepXacts < max_prepared_xacts);
     429          52 :     TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts++] = gxact;
     430             : 
     431          52 :     LWLockRelease(TwoPhaseStateLock);
     432             : 
     433          52 :     return gxact;
     434             : }
     435             : 
     436             : /*
     437             :  * MarkAsPreparingGuts
     438             :  *
     439             :  * This uses a gxact struct and puts it into the active array.
     440             :  * NOTE: this is also used when reloading a gxact after a crash; so avoid
     441             :  * assuming that we can use very much backend context.
     442             :  *
     443             :  * Note: This function should be called with appropriate locks held.
     444             :  */
     445             : static void
     446          82 : MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
     447             :                     TimestampTz prepared_at, Oid owner, Oid databaseid)
     448             : {
     449             :     PGPROC     *proc;
     450             :     PGXACT     *pgxact;
     451             :     int         i;
     452             : 
     453             :     Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE));
     454             : 
     455             :     Assert(gxact != NULL);
     456          82 :     proc = &ProcGlobal->allProcs[gxact->pgprocno];
     457          82 :     pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
     458             : 
     459             :     /* Initialize the PGPROC entry */
     460        8856 :     MemSet(proc, 0, sizeof(PGPROC));
     461          82 :     proc->pgprocno = gxact->pgprocno;
     462          82 :     SHMQueueElemInit(&(proc->links));
     463          82 :     proc->waitStatus = STATUS_OK;
     464             :     /* We set up the gxact's VXID as InvalidBackendId/XID */
     465          82 :     proc->lxid = (LocalTransactionId) xid;
     466          82 :     pgxact->xid = xid;
     467          82 :     pgxact->xmin = InvalidTransactionId;
     468          82 :     proc->delayChkpt = false;
     469          82 :     pgxact->vacuumFlags = 0;
     470          82 :     proc->pid = 0;
     471          82 :     proc->backendId = InvalidBackendId;
     472          82 :     proc->databaseId = databaseid;
     473          82 :     proc->roleId = owner;
     474          82 :     proc->tempNamespaceId = InvalidOid;
     475          82 :     proc->isBackgroundWorker = false;
     476          82 :     proc->lwWaiting = false;
     477          82 :     proc->lwWaitMode = 0;
     478          82 :     proc->waitLock = NULL;
     479          82 :     proc->waitProcLock = NULL;
     480        1394 :     for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
     481        1312 :         SHMQueueInit(&(proc->myProcLocks[i]));
     482             :     /* subxid data must be filled later by GXactLoadSubxactData */
     483          82 :     pgxact->overflowed = false;
     484          82 :     pgxact->nxids = 0;
     485             : 
     486          82 :     gxact->prepared_at = prepared_at;
     487          82 :     gxact->xid = xid;
     488          82 :     gxact->owner = owner;
     489          82 :     gxact->locking_backend = MyBackendId;
     490          82 :     gxact->valid = false;
     491          82 :     gxact->inredo = false;
     492          82 :     strcpy(gxact->gid, gid);
     493             : 
     494             :     /*
     495             :      * Remember that we have this GlobalTransaction entry locked for us. If we
     496             :      * abort after this, we must release it.
     497             :      */
     498          82 :     MyLockedGxact = gxact;
     499          82 : }
     500             : 
     501             : /*
     502             :  * GXactLoadSubxactData
     503             :  *
     504             :  * If the transaction being persisted had any subtransactions, this must
     505             :  * be called before MarkAsPrepared() to load information into the dummy
     506             :  * PGPROC.
     507             :  */
     508             : static void
     509          60 : GXactLoadSubxactData(GlobalTransaction gxact, int nsubxacts,
     510             :                      TransactionId *children)
     511             : {
     512          60 :     PGPROC     *proc = &ProcGlobal->allProcs[gxact->pgprocno];
     513          60 :     PGXACT     *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
     514             : 
     515             :     /* We need no extra lock since the GXACT isn't valid yet */
     516          60 :     if (nsubxacts > PGPROC_MAX_CACHED_SUBXIDS)
     517             :     {
     518           8 :         pgxact->overflowed = true;
     519           8 :         nsubxacts = PGPROC_MAX_CACHED_SUBXIDS;
     520             :     }
     521          60 :     if (nsubxacts > 0)
     522             :     {
     523          60 :         memcpy(proc->subxids.xids, children,
     524             :                nsubxacts * sizeof(TransactionId));
     525          60 :         pgxact->nxids = nsubxacts;
     526             :     }
     527          60 : }
     528             : 
     529             : /*
     530             :  * MarkAsPrepared
     531             :  *      Mark the GXACT as fully valid, and enter it into the global ProcArray.
     532             :  *
     533             :  * lock_held indicates whether caller already holds TwoPhaseStateLock.
     534             :  */
     535             : static void
     536          82 : MarkAsPrepared(GlobalTransaction gxact, bool lock_held)
     537             : {
     538             :     /* Lock here may be overkill, but I'm not convinced of that ... */
     539          82 :     if (!lock_held)
     540          52 :         LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
     541             :     Assert(!gxact->valid);
     542          82 :     gxact->valid = true;
     543          82 :     if (!lock_held)
     544          52 :         LWLockRelease(TwoPhaseStateLock);
     545             : 
     546             :     /*
     547             :      * Put it into the global ProcArray so TransactionIdIsInProgress considers
     548             :      * the XID as still running.
     549             :      */
     550          82 :     ProcArrayAdd(&ProcGlobal->allProcs[gxact->pgprocno]);
     551          82 : }
     552             : 
     553             : /*
     554             :  * LockGXact
     555             :  *      Locate the prepared transaction and mark it busy for COMMIT or PREPARE.
     556             :  */
     557             : static GlobalTransaction
     558          74 : LockGXact(const char *gid, Oid user)
     559             : {
     560             :     int         i;
     561             : 
     562             :     /* on first call, register the exit hook */
     563          74 :     if (!twophaseExitRegistered)
     564             :     {
     565          42 :         before_shmem_exit(AtProcExit_Twophase, 0);
     566          42 :         twophaseExitRegistered = true;
     567             :     }
     568             : 
     569          74 :     LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
     570             : 
     571          74 :     for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
     572             :     {
     573          62 :         GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
     574          62 :         PGPROC     *proc = &ProcGlobal->allProcs[gxact->pgprocno];
     575             : 
     576             :         /* Ignore not-yet-valid GIDs */
     577          62 :         if (!gxact->valid)
     578           0 :             continue;
     579          62 :         if (strcmp(gxact->gid, gid) != 0)
     580           0 :             continue;
     581             : 
     582             :         /* Found it, but has someone else got it locked? */
     583          62 :         if (gxact->locking_backend != InvalidBackendId)
     584           0 :             ereport(ERROR,
     585             :                     (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     586             :                      errmsg("prepared transaction with identifier \"%s\" is busy",
     587             :                             gid)));
     588             : 
     589          62 :         if (user != gxact->owner && !superuser_arg(user))
     590           0 :             ereport(ERROR,
     591             :                     (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
     592             :                      errmsg("permission denied to finish prepared transaction"),
     593             :                      errhint("Must be superuser or the user that prepared the transaction.")));
     594             : 
     595             :         /*
     596             :          * Note: it probably would be possible to allow committing from
     597             :          * another database; but at the moment NOTIFY is known not to work and
     598             :          * there may be some other issues as well.  Hence disallow until
     599             :          * someone gets motivated to make it work.
     600             :          */
     601          62 :         if (MyDatabaseId != proc->databaseId)
     602           0 :             ereport(ERROR,
     603             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     604             :                      errmsg("prepared transaction belongs to another database"),
     605             :                      errhint("Connect to the database where the transaction was prepared to finish it.")));
     606             : 
     607             :         /* OK for me to lock it */
     608          62 :         gxact->locking_backend = MyBackendId;
     609          62 :         MyLockedGxact = gxact;
     610             : 
     611          62 :         LWLockRelease(TwoPhaseStateLock);
     612             : 
     613          62 :         return gxact;
     614             :     }
     615             : 
     616          12 :     LWLockRelease(TwoPhaseStateLock);
     617             : 
     618          12 :     ereport(ERROR,
     619             :             (errcode(ERRCODE_UNDEFINED_OBJECT),
     620             :              errmsg("prepared transaction with identifier \"%s\" does not exist",
     621             :                     gid)));
     622             : 
     623             :     /* NOTREACHED */
     624             :     return NULL;
     625             : }
     626             : 
     627             : /*
     628             :  * RemoveGXact
     629             :  *      Remove the prepared transaction from the shared memory array.
     630             :  *
     631             :  * NB: caller should have already removed it from ProcArray
     632             :  */
     633             : static void
     634         104 : RemoveGXact(GlobalTransaction gxact)
     635             : {
     636             :     int         i;
     637             : 
     638             :     Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE));
     639             : 
     640         104 :     for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
     641             :     {
     642         104 :         if (gxact == TwoPhaseState->prepXacts[i])
     643             :         {
     644             :             /* remove from the active array */
     645         104 :             TwoPhaseState->numPrepXacts--;
     646         104 :             TwoPhaseState->prepXacts[i] = TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts];
     647             : 
     648             :             /* and put it back in the freelist */
     649         104 :             gxact->next = TwoPhaseState->freeGXacts;
     650         104 :             TwoPhaseState->freeGXacts = gxact;
     651             : 
     652         104 :             return;
     653             :         }
     654             :     }
     655             : 
     656           0 :     elog(ERROR, "failed to find %p in GlobalTransaction array", gxact);
     657             : }
     658             : 
     659             : /*
     660             :  * Returns an array of all prepared transactions for the user-level
     661             :  * function pg_prepared_xact.
     662             :  *
     663             :  * The returned array and all its elements are copies of internal data
     664             :  * structures, to minimize the time we need to hold the TwoPhaseStateLock.
     665             :  *
     666             :  * WARNING -- we return even those transactions that are not fully prepared
     667             :  * yet.  The caller should filter them out if he doesn't want them.
     668             :  *
     669             :  * The returned array is palloc'd.
     670             :  */
     671             : static int
     672          60 : GetPreparedTransactionList(GlobalTransaction *gxacts)
     673             : {
     674             :     GlobalTransaction array;
     675             :     int         num;
     676             :     int         i;
     677             : 
     678          60 :     LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
     679             : 
     680          60 :     if (TwoPhaseState->numPrepXacts == 0)
     681             :     {
     682          42 :         LWLockRelease(TwoPhaseStateLock);
     683             : 
     684          42 :         *gxacts = NULL;
     685          42 :         return 0;
     686             :     }
     687             : 
     688          18 :     num = TwoPhaseState->numPrepXacts;
     689          18 :     array = (GlobalTransaction) palloc(sizeof(GlobalTransactionData) * num);
     690          18 :     *gxacts = array;
     691          40 :     for (i = 0; i < num; i++)
     692          22 :         memcpy(array + i, TwoPhaseState->prepXacts[i],
     693             :                sizeof(GlobalTransactionData));
     694             : 
     695          18 :     LWLockRelease(TwoPhaseStateLock);
     696             : 
     697          18 :     return num;
     698             : }
     699             : 
     700             : 
     701             : /* Working status for pg_prepared_xact */
     702             : typedef struct
     703             : {
     704             :     GlobalTransaction array;
     705             :     int         ngxacts;
     706             :     int         currIdx;
     707             : } Working_State;
     708             : 
     709             : /*
     710             :  * pg_prepared_xact
     711             :  *      Produce a view with one row per prepared transaction.
     712             :  *
     713             :  * This function is here so we don't have to export the
     714             :  * GlobalTransactionData struct definition.
     715             :  */
     716             : Datum
     717          82 : pg_prepared_xact(PG_FUNCTION_ARGS)
     718             : {
     719             :     FuncCallContext *funcctx;
     720             :     Working_State *status;
     721             : 
     722          82 :     if (SRF_IS_FIRSTCALL())
     723             :     {
     724             :         TupleDesc   tupdesc;
     725             :         MemoryContext oldcontext;
     726             : 
     727             :         /* create a function context for cross-call persistence */
     728          60 :         funcctx = SRF_FIRSTCALL_INIT();
     729             : 
     730             :         /*
     731             :          * Switch to memory context appropriate for multiple function calls
     732             :          */
     733          60 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     734             : 
     735             :         /* build tupdesc for result tuples */
     736             :         /* this had better match pg_prepared_xacts view in system_views.sql */
     737          60 :         tupdesc = CreateTemplateTupleDesc(5);
     738          60 :         TupleDescInitEntry(tupdesc, (AttrNumber) 1, "transaction",
     739             :                            XIDOID, -1, 0);
     740          60 :         TupleDescInitEntry(tupdesc, (AttrNumber) 2, "gid",
     741             :                            TEXTOID, -1, 0);
     742          60 :         TupleDescInitEntry(tupdesc, (AttrNumber) 3, "prepared",
     743             :                            TIMESTAMPTZOID, -1, 0);
     744          60 :         TupleDescInitEntry(tupdesc, (AttrNumber) 4, "ownerid",
     745             :                            OIDOID, -1, 0);
     746          60 :         TupleDescInitEntry(tupdesc, (AttrNumber) 5, "dbid",
     747             :                            OIDOID, -1, 0);
     748             : 
     749          60 :         funcctx->tuple_desc = BlessTupleDesc(tupdesc);
     750             : 
     751             :         /*
     752             :          * Collect all the 2PC status information that we will format and send
     753             :          * out as a result set.
     754             :          */
     755          60 :         status = (Working_State *) palloc(sizeof(Working_State));
     756          60 :         funcctx->user_fctx = (void *) status;
     757             : 
     758          60 :         status->ngxacts = GetPreparedTransactionList(&status->array);
     759          60 :         status->currIdx = 0;
     760             : 
     761          60 :         MemoryContextSwitchTo(oldcontext);
     762             :     }
     763             : 
     764          82 :     funcctx = SRF_PERCALL_SETUP();
     765          82 :     status = (Working_State *) funcctx->user_fctx;
     766             : 
     767          82 :     while (status->array != NULL && status->currIdx < status->ngxacts)
     768             :     {
     769          22 :         GlobalTransaction gxact = &status->array[status->currIdx++];
     770          22 :         PGPROC     *proc = &ProcGlobal->allProcs[gxact->pgprocno];
     771          22 :         PGXACT     *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
     772             :         Datum       values[5];
     773             :         bool        nulls[5];
     774             :         HeapTuple   tuple;
     775             :         Datum       result;
     776             : 
     777          22 :         if (!gxact->valid)
     778           0 :             continue;
     779             : 
     780             :         /*
     781             :          * Form tuple with appropriate data.
     782             :          */
     783         132 :         MemSet(values, 0, sizeof(values));
     784          22 :         MemSet(nulls, 0, sizeof(nulls));
     785             : 
     786          22 :         values[0] = TransactionIdGetDatum(pgxact->xid);
     787          22 :         values[1] = CStringGetTextDatum(gxact->gid);
     788          22 :         values[2] = TimestampTzGetDatum(gxact->prepared_at);
     789          22 :         values[3] = ObjectIdGetDatum(gxact->owner);
     790          22 :         values[4] = ObjectIdGetDatum(proc->databaseId);
     791             : 
     792          22 :         tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     793          22 :         result = HeapTupleGetDatum(tuple);
     794          22 :         SRF_RETURN_NEXT(funcctx, result);
     795             :     }
     796             : 
     797          60 :     SRF_RETURN_DONE(funcctx);
     798             : }
     799             : 
     800             : /*
     801             :  * TwoPhaseGetGXact
     802             :  *      Get the GlobalTransaction struct for a prepared transaction
     803             :  *      specified by XID
     804             :  *
     805             :  * If lock_held is set to true, TwoPhaseStateLock will not be taken, so the
     806             :  * caller had better hold it.
     807             :  */
     808             : static GlobalTransaction
     809         382 : TwoPhaseGetGXact(TransactionId xid, bool lock_held)
     810             : {
     811         382 :     GlobalTransaction result = NULL;
     812             :     int         i;
     813             : 
     814             :     static TransactionId cached_xid = InvalidTransactionId;
     815             :     static GlobalTransaction cached_gxact = NULL;
     816             : 
     817             :     Assert(!lock_held || LWLockHeldByMe(TwoPhaseStateLock));
     818             : 
     819             :     /*
     820             :      * During a recovery, COMMIT PREPARED, or ABORT PREPARED, we'll be called
     821             :      * repeatedly for the same XID.  We can save work with a simple cache.
     822             :      */
     823         382 :     if (xid == cached_xid)
     824         260 :         return cached_gxact;
     825             : 
     826         122 :     if (!lock_held)
     827          82 :         LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
     828             : 
     829         136 :     for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
     830             :     {
     831         136 :         GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
     832         136 :         PGXACT     *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
     833             : 
     834         136 :         if (pgxact->xid == xid)
     835             :         {
     836         122 :             result = gxact;
     837         122 :             break;
     838             :         }
     839             :     }
     840             : 
     841         122 :     if (!lock_held)
     842          82 :         LWLockRelease(TwoPhaseStateLock);
     843             : 
     844         122 :     if (result == NULL)         /* should not happen */
     845           0 :         elog(ERROR, "failed to find GlobalTransaction for xid %u", xid);
     846             : 
     847         122 :     cached_xid = xid;
     848         122 :     cached_gxact = result;
     849             : 
     850         122 :     return result;
     851             : }
     852             : 
     853             : /*
     854             :  * TwoPhaseGetDummyBackendId
     855             :  *      Get the dummy backend ID for prepared transaction specified by XID
     856             :  *
     857             :  * Dummy backend IDs are similar to real backend IDs of real backends.
     858             :  * They start at MaxBackends + 1, and are unique across all currently active
     859             :  * real backends and prepared transactions.  If lock_held is set to true,
     860             :  * TwoPhaseStateLock will not be taken, so the caller had better hold it.
     861             :  */
     862             : BackendId
     863          44 : TwoPhaseGetDummyBackendId(TransactionId xid, bool lock_held)
     864             : {
     865          44 :     GlobalTransaction gxact = TwoPhaseGetGXact(xid, lock_held);
     866             : 
     867          44 :     return gxact->dummyBackendId;
     868             : }
     869             : 
     870             : /*
     871             :  * TwoPhaseGetDummyProc
     872             :  *      Get the PGPROC that represents a prepared transaction specified by XID
     873             :  *
     874             :  * If lock_held is set to true, TwoPhaseStateLock will not be taken, so the
     875             :  * caller had better hold it.
     876             :  */
     877             : PGPROC *
     878         338 : TwoPhaseGetDummyProc(TransactionId xid, bool lock_held)
     879             : {
     880         338 :     GlobalTransaction gxact = TwoPhaseGetGXact(xid, lock_held);
     881             : 
     882         338 :     return &ProcGlobal->allProcs[gxact->pgprocno];
     883             : }
     884             : 
     885             : /************************************************************************/
     886             : /* State file support                                                   */
     887             : /************************************************************************/
     888             : 
     889             : #define TwoPhaseFilePath(path, xid) \
     890             :     snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X", xid)
     891             : 
     892             : /*
     893             :  * 2PC state file format:
     894             :  *
     895             :  *  1. TwoPhaseFileHeader
     896             :  *  2. TransactionId[] (subtransactions)
     897             :  *  3. RelFileNode[] (files to be deleted at commit)
     898             :  *  4. RelFileNode[] (files to be deleted at abort)
     899             :  *  5. SharedInvalidationMessage[] (inval messages to be sent at commit)
     900             :  *  6. TwoPhaseRecordOnDisk
     901             :  *  7. ...
     902             :  *  8. TwoPhaseRecordOnDisk (end sentinel, rmid == TWOPHASE_RM_END_ID)
     903             :  *  9. checksum (CRC-32C)
     904             :  *
     905             :  * Each segment except the final checksum is MAXALIGN'd.
     906             :  */
     907             : 
     908             : /*
     909             :  * Header for a 2PC state file
     910             :  */
     911             : #define TWOPHASE_MAGIC  0x57F94534  /* format identifier */
     912             : 
     913             : typedef xl_xact_prepare TwoPhaseFileHeader;
     914             : 
     915             : /*
     916             :  * Header for each record in a state file
     917             :  *
     918             :  * NOTE: len counts only the rmgr data, not the TwoPhaseRecordOnDisk header.
     919             :  * The rmgr data will be stored starting on a MAXALIGN boundary.
     920             :  */
     921             : typedef struct TwoPhaseRecordOnDisk
     922             : {
     923             :     uint32      len;            /* length of rmgr data */
     924             :     TwoPhaseRmgrId rmid;        /* resource manager for this record */
     925             :     uint16      info;           /* flag bits for use by rmgr */
     926             : } TwoPhaseRecordOnDisk;
     927             : 
     928             : /*
     929             :  * During prepare, the state file is assembled in memory before writing it
     930             :  * to WAL and the actual state file.  We use a chain of StateFileChunk blocks
     931             :  * for that.
     932             :  */
     933             : typedef struct StateFileChunk
     934             : {
     935             :     char       *data;
     936             :     uint32      len;
     937             :     struct StateFileChunk *next;
     938             : } StateFileChunk;
     939             : 
     940             : static struct xllist
     941             : {
     942             :     StateFileChunk *head;       /* first data block in the chain */
     943             :     StateFileChunk *tail;       /* last block in chain */
     944             :     uint32      num_chunks;
     945             :     uint32      bytes_free;     /* free bytes left in tail block */
     946             :     uint32      total_len;      /* total data bytes in chain */
     947             : }           records;
     948             : 
     949             : 
     950             : /*
     951             :  * Append a block of data to records data structure.
     952             :  *
     953             :  * NB: each block is padded to a MAXALIGN multiple.  This must be
     954             :  * accounted for when the file is later read!
     955             :  *
     956             :  * The data is copied, so the caller is free to modify it afterwards.
     957             :  */
     958             : static void
     959         816 : save_state_data(const void *data, uint32 len)
     960             : {
     961         816 :     uint32      padlen = MAXALIGN(len);
     962             : 
     963         816 :     if (padlen > records.bytes_free)
     964             :     {
     965          40 :         records.tail->next = palloc0(sizeof(StateFileChunk));
     966          40 :         records.tail = records.tail->next;
     967          40 :         records.tail->len = 0;
     968          40 :         records.tail->next = NULL;
     969          40 :         records.num_chunks++;
     970             : 
     971          40 :         records.bytes_free = Max(padlen, 512);
     972          40 :         records.tail->data = palloc(records.bytes_free);
     973             :     }
     974             : 
     975         816 :     memcpy(((char *) records.tail->data) + records.tail->len, data, len);
     976         816 :     records.tail->len += padlen;
     977         816 :     records.bytes_free -= padlen;
     978         816 :     records.total_len += padlen;
     979         816 : }
     980             : 
     981             : /*
     982             :  * Start preparing a state file.
     983             :  *
     984             :  * Initializes data structure and inserts the 2PC file header record.
     985             :  */
     986             : void
     987          52 : StartPrepare(GlobalTransaction gxact)
     988             : {
     989          52 :     PGPROC     *proc = &ProcGlobal->allProcs[gxact->pgprocno];
     990          52 :     PGXACT     *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
     991          52 :     TransactionId xid = pgxact->xid;
     992             :     TwoPhaseFileHeader hdr;
     993             :     TransactionId *children;
     994             :     RelFileNode *commitrels;
     995             :     RelFileNode *abortrels;
     996             :     SharedInvalidationMessage *invalmsgs;
     997             : 
     998             :     /* Initialize linked list */
     999          52 :     records.head = palloc0(sizeof(StateFileChunk));
    1000          52 :     records.head->len = 0;
    1001          52 :     records.head->next = NULL;
    1002             : 
    1003          52 :     records.bytes_free = Max(sizeof(TwoPhaseFileHeader), 512);
    1004          52 :     records.head->data = palloc(records.bytes_free);
    1005             : 
    1006          52 :     records.tail = records.head;
    1007          52 :     records.num_chunks = 1;
    1008             : 
    1009          52 :     records.total_len = 0;
    1010             : 
    1011             :     /* Create header */
    1012          52 :     hdr.magic = TWOPHASE_MAGIC;
    1013          52 :     hdr.total_len = 0;          /* EndPrepare will fill this in */
    1014          52 :     hdr.xid = xid;
    1015          52 :     hdr.database = proc->databaseId;
    1016          52 :     hdr.prepared_at = gxact->prepared_at;
    1017          52 :     hdr.owner = gxact->owner;
    1018          52 :     hdr.nsubxacts = xactGetCommittedChildren(&children);
    1019          52 :     hdr.ncommitrels = smgrGetPendingDeletes(true, &commitrels);
    1020          52 :     hdr.nabortrels = smgrGetPendingDeletes(false, &abortrels);
    1021          52 :     hdr.ninvalmsgs = xactGetCommittedInvalidationMessages(&invalmsgs,
    1022             :                                                           &hdr.initfileinval);
    1023          52 :     hdr.gidlen = strlen(gxact->gid) + 1; /* Include '\0' */
    1024             : 
    1025          52 :     save_state_data(&hdr, sizeof(TwoPhaseFileHeader));
    1026          52 :     save_state_data(gxact->gid, hdr.gidlen);
    1027             : 
    1028             :     /*
    1029             :      * Add the additional info about subxacts, deletable files and cache
    1030             :      * invalidation messages.
    1031             :      */
    1032          52 :     if (hdr.nsubxacts > 0)
    1033             :     {
    1034          30 :         save_state_data(children, hdr.nsubxacts * sizeof(TransactionId));
    1035             :         /* While we have the child-xact data, stuff it in the gxact too */
    1036          30 :         GXactLoadSubxactData(gxact, hdr.nsubxacts, children);
    1037             :     }
    1038          52 :     if (hdr.ncommitrels > 0)
    1039             :     {
    1040           2 :         save_state_data(commitrels, hdr.ncommitrels * sizeof(RelFileNode));
    1041           2 :         pfree(commitrels);
    1042             :     }
    1043          52 :     if (hdr.nabortrels > 0)
    1044             :     {
    1045          12 :         save_state_data(abortrels, hdr.nabortrels * sizeof(RelFileNode));
    1046          12 :         pfree(abortrels);
    1047             :     }
    1048          52 :     if (hdr.ninvalmsgs > 0)
    1049             :     {
    1050          12 :         save_state_data(invalmsgs,
    1051          12 :                         hdr.ninvalmsgs * sizeof(SharedInvalidationMessage));
    1052          12 :         pfree(invalmsgs);
    1053             :     }
    1054          52 : }
    1055             : 
    1056             : /*
    1057             :  * Finish preparing state data and writing it to WAL.
    1058             :  */
    1059             : void
    1060          52 : EndPrepare(GlobalTransaction gxact)
    1061             : {
    1062             :     TwoPhaseFileHeader *hdr;
    1063             :     StateFileChunk *record;
    1064             :     bool        replorigin;
    1065             : 
    1066             :     /* Add the end sentinel to the list of 2PC records */
    1067          52 :     RegisterTwoPhaseRecord(TWOPHASE_RM_END_ID, 0,
    1068             :                            NULL, 0);
    1069             : 
    1070             :     /* Go back and fill in total_len in the file header record */
    1071          52 :     hdr = (TwoPhaseFileHeader *) records.head->data;
    1072             :     Assert(hdr->magic == TWOPHASE_MAGIC);
    1073          52 :     hdr->total_len = records.total_len + sizeof(pg_crc32c);
    1074             : 
    1075          52 :     replorigin = (replorigin_session_origin != InvalidRepOriginId &&
    1076           0 :                   replorigin_session_origin != DoNotReplicateId);
    1077             : 
    1078          52 :     if (replorigin)
    1079             :     {
    1080             :         Assert(replorigin_session_origin_lsn != InvalidXLogRecPtr);
    1081           0 :         hdr->origin_lsn = replorigin_session_origin_lsn;
    1082           0 :         hdr->origin_timestamp = replorigin_session_origin_timestamp;
    1083             :     }
    1084             :     else
    1085             :     {
    1086          52 :         hdr->origin_lsn = InvalidXLogRecPtr;
    1087          52 :         hdr->origin_timestamp = 0;
    1088             :     }
    1089             : 
    1090             :     /*
    1091             :      * If the data size exceeds MaxAllocSize, we won't be able to read it in
    1092             :      * ReadTwoPhaseFile. Check for that now, rather than fail in the case
    1093             :      * where we write data to file and then re-read at commit time.
    1094             :      */
    1095          52 :     if (hdr->total_len > MaxAllocSize)
    1096           0 :         ereport(ERROR,
    1097             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1098             :                  errmsg("two-phase state file maximum length exceeded")));
    1099             : 
    1100             :     /*
    1101             :      * Now writing 2PC state data to WAL. We let the WAL's CRC protection
    1102             :      * cover us, so no need to calculate a separate CRC.
    1103             :      *
    1104             :      * We have to set delayChkpt here, too; otherwise a checkpoint starting
    1105             :      * immediately after the WAL record is inserted could complete without
    1106             :      * fsync'ing our state file.  (This is essentially the same kind of race
    1107             :      * condition as the COMMIT-to-clog-write case that RecordTransactionCommit
    1108             :      * uses delayChkpt for; see notes there.)
    1109             :      *
    1110             :      * We save the PREPARE record's location in the gxact for later use by
    1111             :      * CheckPointTwoPhase.
    1112             :      */
    1113          52 :     XLogEnsureRecordSpace(0, records.num_chunks);
    1114             : 
    1115          52 :     START_CRIT_SECTION();
    1116             : 
    1117          52 :     MyProc->delayChkpt = true;
    1118             : 
    1119          52 :     XLogBeginInsert();
    1120         144 :     for (record = records.head; record != NULL; record = record->next)
    1121          92 :         XLogRegisterData(record->data, record->len);
    1122             : 
    1123          52 :     XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
    1124             : 
    1125          52 :     gxact->prepare_end_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE);
    1126             : 
    1127          52 :     if (replorigin)
    1128             :     {
    1129             :         /* Move LSNs forward for this replication origin */
    1130           0 :         replorigin_session_advance(replorigin_session_origin_lsn,
    1131             :                                    gxact->prepare_end_lsn);
    1132             :     }
    1133             : 
    1134          52 :     XLogFlush(gxact->prepare_end_lsn);
    1135             : 
    1136             :     /* If we crash now, we have prepared: WAL replay will fix things */
    1137             : 
    1138             :     /* Store record's start location to read that later on Commit */
    1139          52 :     gxact->prepare_start_lsn = ProcLastRecPtr;
    1140             : 
    1141             :     /*
    1142             :      * Mark the prepared transaction as valid.  As soon as xact.c marks
    1143             :      * MyPgXact as not running our XID (which it will do immediately after
    1144             :      * this function returns), others can commit/rollback the xact.
    1145             :      *
    1146             :      * NB: a side effect of this is to make a dummy ProcArray entry for the
    1147             :      * prepared XID.  This must happen before we clear the XID from MyPgXact,
    1148             :      * else there is a window where the XID is not running according to
    1149             :      * TransactionIdIsInProgress, and onlookers would be entitled to assume
    1150             :      * the xact crashed.  Instead we have a window where the same XID appears
    1151             :      * twice in ProcArray, which is OK.
    1152             :      */
    1153          52 :     MarkAsPrepared(gxact, false);
    1154             : 
    1155             :     /*
    1156             :      * Now we can mark ourselves as out of the commit critical section: a
    1157             :      * checkpoint starting after this will certainly see the gxact as a
    1158             :      * candidate for fsyncing.
    1159             :      */
    1160          52 :     MyProc->delayChkpt = false;
    1161             : 
    1162             :     /*
    1163             :      * Remember that we have this GlobalTransaction entry locked for us.  If
    1164             :      * we crash after this point, it's too late to abort, but we must unlock
    1165             :      * it so that the prepared transaction can be committed or rolled back.
    1166             :      */
    1167          52 :     MyLockedGxact = gxact;
    1168             : 
    1169          52 :     END_CRIT_SECTION();
    1170             : 
    1171             :     /*
    1172             :      * Wait for synchronous replication, if required.
    1173             :      *
    1174             :      * Note that at this stage we have marked the prepare, but still show as
    1175             :      * running in the procarray (twice!) and continue to hold locks.
    1176             :      */
    1177          52 :     SyncRepWaitForLSN(gxact->prepare_end_lsn, false);
    1178             : 
    1179          52 :     records.tail = records.head = NULL;
    1180          52 :     records.num_chunks = 0;
    1181          52 : }
    1182             : 
    1183             : /*
    1184             :  * Register a 2PC record to be written to state file.
    1185             :  */
    1186             : void
    1187         354 : RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info,
    1188             :                        const void *data, uint32 len)
    1189             : {
    1190             :     TwoPhaseRecordOnDisk record;
    1191             : 
    1192         354 :     record.rmid = rmid;
    1193         354 :     record.info = info;
    1194         354 :     record.len = len;
    1195         354 :     save_state_data(&record, sizeof(TwoPhaseRecordOnDisk));
    1196         354 :     if (len > 0)
    1197         302 :         save_state_data(data, len);
    1198         354 : }
    1199             : 
    1200             : 
    1201             : /*
    1202             :  * Read and validate the state file for xid.
    1203             :  *
    1204             :  * If it looks OK (has a valid magic number and CRC), return the palloc'd
    1205             :  * contents of the file, issuing an error when finding corrupted data.  If
    1206             :  * missing_ok is true, which indicates that missing files can be safely
    1207             :  * ignored, then return NULL.  This state can be reached when doing recovery.
    1208             :  */
    1209             : static char *
    1210          86 : ReadTwoPhaseFile(TransactionId xid, bool missing_ok)
    1211             : {
    1212             :     char        path[MAXPGPATH];
    1213             :     char       *buf;
    1214             :     TwoPhaseFileHeader *hdr;
    1215             :     int         fd;
    1216             :     struct stat stat;
    1217             :     uint32      crc_offset;
    1218             :     pg_crc32c   calc_crc,
    1219             :                 file_crc;
    1220             :     int         r;
    1221             : 
    1222          86 :     TwoPhaseFilePath(path, xid);
    1223             : 
    1224          86 :     fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
    1225          86 :     if (fd < 0)
    1226             :     {
    1227           0 :         if (missing_ok && errno == ENOENT)
    1228           0 :             return NULL;
    1229             : 
    1230           0 :         ereport(ERROR,
    1231             :                 (errcode_for_file_access(),
    1232             :                  errmsg("could not open file \"%s\": %m", path)));
    1233             :     }
    1234             : 
    1235             :     /*
    1236             :      * Check file length.  We can determine a lower bound pretty easily. We
    1237             :      * set an upper bound to avoid palloc() failure on a corrupt file, though
    1238             :      * we can't guarantee that we won't get an out of memory error anyway,
    1239             :      * even on a valid file.
    1240             :      */
    1241          86 :     if (fstat(fd, &stat))
    1242           0 :         ereport(ERROR,
    1243             :                 (errcode_for_file_access(),
    1244             :                  errmsg("could not stat file \"%s\": %m", path)));
    1245             : 
    1246          86 :     if (stat.st_size < (MAXALIGN(sizeof(TwoPhaseFileHeader)) +
    1247             :                         MAXALIGN(sizeof(TwoPhaseRecordOnDisk)) +
    1248          86 :                         sizeof(pg_crc32c)) ||
    1249          86 :         stat.st_size > MaxAllocSize)
    1250           0 :         ereport(ERROR,
    1251             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1252             :                  errmsg_plural("incorrect size of file \"%s\": %zu byte",
    1253             :                                "incorrect size of file \"%s\": %zu bytes",
    1254             :                                (Size) stat.st_size, path,
    1255             :                                (Size) stat.st_size)));
    1256             : 
    1257          86 :     crc_offset = stat.st_size - sizeof(pg_crc32c);
    1258          86 :     if (crc_offset != MAXALIGN(crc_offset))
    1259           0 :         ereport(ERROR,
    1260             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1261             :                  errmsg("incorrect alignment of CRC offset for file \"%s\"",
    1262             :                         path)));
    1263             : 
    1264             :     /*
    1265             :      * OK, slurp in the file.
    1266             :      */
    1267          86 :     buf = (char *) palloc(stat.st_size);
    1268             : 
    1269          86 :     pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
    1270          86 :     r = read(fd, buf, stat.st_size);
    1271          86 :     if (r != stat.st_size)
    1272             :     {
    1273           0 :         if (r < 0)
    1274           0 :             ereport(ERROR,
    1275             :                     (errcode_for_file_access(),
    1276             :                      errmsg("could not read file \"%s\": %m", path)));
    1277             :         else
    1278           0 :             ereport(ERROR,
    1279             :                     (errmsg("could not read file \"%s\": read %d of %zu",
    1280             :                             path, r, (Size) stat.st_size)));
    1281             :     }
    1282             : 
    1283          86 :     pgstat_report_wait_end();
    1284             : 
    1285          86 :     if (CloseTransientFile(fd) != 0)
    1286           0 :         ereport(ERROR,
    1287             :                 (errcode_for_file_access(),
    1288             :                  errmsg("could not close file \"%s\": %m", path)));
    1289             : 
    1290          86 :     hdr = (TwoPhaseFileHeader *) buf;
    1291          86 :     if (hdr->magic != TWOPHASE_MAGIC)
    1292           0 :         ereport(ERROR,
    1293             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1294             :                  errmsg("invalid magic number stored in file \"%s\"",
    1295             :                         path)));
    1296             : 
    1297          86 :     if (hdr->total_len != stat.st_size)
    1298           0 :         ereport(ERROR,
    1299             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1300             :                  errmsg("invalid size stored in file \"%s\"",
    1301             :                         path)));
    1302             : 
    1303          86 :     INIT_CRC32C(calc_crc);
    1304          86 :     COMP_CRC32C(calc_crc, buf, crc_offset);
    1305          86 :     FIN_CRC32C(calc_crc);
    1306             : 
    1307          86 :     file_crc = *((pg_crc32c *) (buf + crc_offset));
    1308             : 
    1309          86 :     if (!EQ_CRC32C(calc_crc, file_crc))
    1310           0 :         ereport(ERROR,
    1311             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1312             :                  errmsg("calculated CRC checksum does not match value stored in file \"%s\"",
    1313             :                         path)));
    1314             : 
    1315          86 :     return buf;
    1316             : }
    1317             : 
    1318             : 
    1319             : /*
    1320             :  * Reads 2PC data from xlog. During checkpoint this data will be moved to
    1321             :  * twophase files and ReadTwoPhaseFile should be used instead.
    1322             :  *
    1323             :  * Note clearly that this function can access WAL during normal operation,
    1324             :  * similarly to the way WALSender or Logical Decoding would do.
    1325             :  */
    1326             : static void
    1327         132 : XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len)
    1328             : {
    1329             :     XLogRecord *record;
    1330             :     XLogReaderState *xlogreader;
    1331             :     char       *errormsg;
    1332             : 
    1333         132 :     xlogreader = XLogReaderAllocate(wal_segment_size, NULL,
    1334         132 :                                     XL_ROUTINE(.page_read = &read_local_xlog_page,
    1335             :                                                .segment_open = &wal_segment_open,
    1336             :                                                .segment_close = &wal_segment_close),
    1337             :                                     NULL);
    1338         132 :     if (!xlogreader)
    1339           0 :         ereport(ERROR,
    1340             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
    1341             :                  errmsg("out of memory"),
    1342             :                  errdetail("Failed while allocating a WAL reading processor.")));
    1343             : 
    1344         132 :     XLogBeginRead(xlogreader, lsn);
    1345         132 :     record = XLogReadRecord(xlogreader, &errormsg);
    1346         132 :     if (record == NULL)
    1347           0 :         ereport(ERROR,
    1348             :                 (errcode_for_file_access(),
    1349             :                  errmsg("could not read two-phase state from WAL at %X/%X",
    1350             :                         (uint32) (lsn >> 32),
    1351             :                         (uint32) lsn)));
    1352             : 
    1353         132 :     if (XLogRecGetRmid(xlogreader) != RM_XACT_ID ||
    1354         132 :         (XLogRecGetInfo(xlogreader) & XLOG_XACT_OPMASK) != XLOG_XACT_PREPARE)
    1355           0 :         ereport(ERROR,
    1356             :                 (errcode_for_file_access(),
    1357             :                  errmsg("expected two-phase state data is not present in WAL at %X/%X",
    1358             :                         (uint32) (lsn >> 32),
    1359             :                         (uint32) lsn)));
    1360             : 
    1361         132 :     if (len != NULL)
    1362          34 :         *len = XLogRecGetDataLen(xlogreader);
    1363             : 
    1364         132 :     *buf = palloc(sizeof(char) * XLogRecGetDataLen(xlogreader));
    1365         132 :     memcpy(*buf, XLogRecGetData(xlogreader), sizeof(char) * XLogRecGetDataLen(xlogreader));
    1366             : 
    1367         132 :     XLogReaderFree(xlogreader);
    1368         132 : }
    1369             : 
    1370             : 
    1371             : /*
    1372             :  * Confirms an xid is prepared, during recovery
    1373             :  */
    1374             : bool
    1375           0 : StandbyTransactionIdIsPrepared(TransactionId xid)
    1376             : {
    1377             :     char       *buf;
    1378             :     TwoPhaseFileHeader *hdr;
    1379             :     bool        result;
    1380             : 
    1381             :     Assert(TransactionIdIsValid(xid));
    1382             : 
    1383           0 :     if (max_prepared_xacts <= 0)
    1384           0 :         return false;           /* nothing to do */
    1385             : 
    1386             :     /* Read and validate file */
    1387           0 :     buf = ReadTwoPhaseFile(xid, true);
    1388           0 :     if (buf == NULL)
    1389           0 :         return false;
    1390             : 
    1391             :     /* Check header also */
    1392           0 :     hdr = (TwoPhaseFileHeader *) buf;
    1393           0 :     result = TransactionIdEquals(hdr->xid, xid);
    1394           0 :     pfree(buf);
    1395             : 
    1396           0 :     return result;
    1397             : }
    1398             : 
    1399             : /*
    1400             :  * FinishPreparedTransaction: execute COMMIT PREPARED or ROLLBACK PREPARED
    1401             :  */
    1402             : void
    1403          74 : FinishPreparedTransaction(const char *gid, bool isCommit)
    1404             : {
    1405             :     GlobalTransaction gxact;
    1406             :     PGPROC     *proc;
    1407             :     PGXACT     *pgxact;
    1408             :     TransactionId xid;
    1409             :     char       *buf;
    1410             :     char       *bufptr;
    1411             :     TwoPhaseFileHeader *hdr;
    1412             :     TransactionId latestXid;
    1413             :     TransactionId *children;
    1414             :     RelFileNode *commitrels;
    1415             :     RelFileNode *abortrels;
    1416             :     RelFileNode *delrels;
    1417             :     int         ndelrels;
    1418             :     SharedInvalidationMessage *invalmsgs;
    1419             : 
    1420             :     /*
    1421             :      * Validate the GID, and lock the GXACT to ensure that two backends do not
    1422             :      * try to commit the same GID at once.
    1423             :      */
    1424          74 :     gxact = LockGXact(gid, GetUserId());
    1425          62 :     proc = &ProcGlobal->allProcs[gxact->pgprocno];
    1426          62 :     pgxact = &ProcGlobal->allPgXact[gxact->pgprocno];
    1427          62 :     xid = pgxact->xid;
    1428             : 
    1429             :     /*
    1430             :      * Read and validate 2PC state data. State data will typically be stored
    1431             :      * in WAL files if the LSN is after the last checkpoint record, or moved
    1432             :      * to disk if for some reason they have lived for a long time.
    1433             :      */
    1434          62 :     if (gxact->ondisk)
    1435          30 :         buf = ReadTwoPhaseFile(xid, false);
    1436             :     else
    1437          32 :         XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, NULL);
    1438             : 
    1439             : 
    1440             :     /*
    1441             :      * Disassemble the header area
    1442             :      */
    1443          62 :     hdr = (TwoPhaseFileHeader *) buf;
    1444             :     Assert(TransactionIdEquals(hdr->xid, xid));
    1445          62 :     bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
    1446          62 :     bufptr += MAXALIGN(hdr->gidlen);
    1447          62 :     children = (TransactionId *) bufptr;
    1448          62 :     bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
    1449          62 :     commitrels = (RelFileNode *) bufptr;
    1450          62 :     bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
    1451          62 :     abortrels = (RelFileNode *) bufptr;
    1452          62 :     bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
    1453          62 :     invalmsgs = (SharedInvalidationMessage *) bufptr;
    1454          62 :     bufptr += MAXALIGN(hdr->ninvalmsgs * sizeof(SharedInvalidationMessage));
    1455             : 
    1456             :     /* compute latestXid among all children */
    1457          62 :     latestXid = TransactionIdLatest(xid, hdr->nsubxacts, children);
    1458             : 
    1459             :     /* Prevent cancel/die interrupt while cleaning up */
    1460          62 :     HOLD_INTERRUPTS();
    1461             : 
    1462             :     /*
    1463             :      * The order of operations here is critical: make the XLOG entry for
    1464             :      * commit or abort, then mark the transaction committed or aborted in
    1465             :      * pg_xact, then remove its PGPROC from the global ProcArray (which means
    1466             :      * TransactionIdIsInProgress will stop saying the prepared xact is in
    1467             :      * progress), then run the post-commit or post-abort callbacks. The
    1468             :      * callbacks will release the locks the transaction held.
    1469             :      */
    1470          62 :     if (isCommit)
    1471          42 :         RecordTransactionCommitPrepared(xid,
    1472             :                                         hdr->nsubxacts, children,
    1473             :                                         hdr->ncommitrels, commitrels,
    1474             :                                         hdr->ninvalmsgs, invalmsgs,
    1475          42 :                                         hdr->initfileinval, gid);
    1476             :     else
    1477          20 :         RecordTransactionAbortPrepared(xid,
    1478             :                                        hdr->nsubxacts, children,
    1479             :                                        hdr->nabortrels, abortrels,
    1480             :                                        gid);
    1481             : 
    1482          62 :     ProcArrayRemove(proc, latestXid);
    1483             : 
    1484             :     /*
    1485             :      * In case we fail while running the callbacks, mark the gxact invalid so
    1486             :      * no one else will try to commit/rollback, and so it will be recycled if
    1487             :      * we fail after this point.  It is still locked by our backend so it
    1488             :      * won't go away yet.
    1489             :      *
    1490             :      * (We assume it's safe to do this without taking TwoPhaseStateLock.)
    1491             :      */
    1492          62 :     gxact->valid = false;
    1493             : 
    1494             :     /*
    1495             :      * We have to remove any files that were supposed to be dropped. For
    1496             :      * consistency with the regular xact.c code paths, must do this before
    1497             :      * releasing locks, so do it before running the callbacks.
    1498             :      *
    1499             :      * NB: this code knows that we couldn't be dropping any temp rels ...
    1500             :      */
    1501          62 :     if (isCommit)
    1502             :     {
    1503          42 :         delrels = commitrels;
    1504          42 :         ndelrels = hdr->ncommitrels;
    1505             :     }
    1506             :     else
    1507             :     {
    1508          20 :         delrels = abortrels;
    1509          20 :         ndelrels = hdr->nabortrels;
    1510             :     }
    1511             : 
    1512             :     /* Make sure files supposed to be dropped are dropped */
    1513          62 :     DropRelationFiles(delrels, ndelrels, false);
    1514             : 
    1515             :     /*
    1516             :      * Handle cache invalidation messages.
    1517             :      *
    1518             :      * Relcache init file invalidation requires processing both before and
    1519             :      * after we send the SI messages. See AtEOXact_Inval()
    1520             :      */
    1521          62 :     if (hdr->initfileinval)
    1522           0 :         RelationCacheInitFilePreInvalidate();
    1523          62 :     SendSharedInvalidMessages(invalmsgs, hdr->ninvalmsgs);
    1524          62 :     if (hdr->initfileinval)
    1525           0 :         RelationCacheInitFilePostInvalidate();
    1526             : 
    1527             :     /*
    1528             :      * Acquire the two-phase lock.  We want to work on the two-phase callbacks
    1529             :      * while holding it to avoid potential conflicts with other transactions
    1530             :      * attempting to use the same GID, so the lock is released once the shared
    1531             :      * memory state is cleared.
    1532             :      */
    1533          62 :     LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
    1534             : 
    1535             :     /* And now do the callbacks */
    1536          62 :     if (isCommit)
    1537          42 :         ProcessRecords(bufptr, xid, twophase_postcommit_callbacks);
    1538             :     else
    1539          20 :         ProcessRecords(bufptr, xid, twophase_postabort_callbacks);
    1540             : 
    1541          62 :     PredicateLockTwoPhaseFinish(xid, isCommit);
    1542             : 
    1543             :     /* Clear shared memory state */
    1544          62 :     RemoveGXact(gxact);
    1545             : 
    1546             :     /*
    1547             :      * Release the lock as all callbacks are called and shared memory cleanup
    1548             :      * is done.
    1549             :      */
    1550          62 :     LWLockRelease(TwoPhaseStateLock);
    1551             : 
    1552             :     /* Count the prepared xact as committed or aborted */
    1553          62 :     AtEOXact_PgStat(isCommit, false);
    1554             : 
    1555             :     /*
    1556             :      * And now we can clean up any files we may have left.
    1557             :      */
    1558          62 :     if (gxact->ondisk)
    1559          30 :         RemoveTwoPhaseFile(xid, true);
    1560             : 
    1561          62 :     MyLockedGxact = NULL;
    1562             : 
    1563          62 :     RESUME_INTERRUPTS();
    1564             : 
    1565          62 :     pfree(buf);
    1566          62 : }
    1567             : 
    1568             : /*
    1569             :  * Scan 2PC state data in memory and call the indicated callbacks for each 2PC record.
    1570             :  */
    1571             : static void
    1572         634 : ProcessRecords(char *bufptr, TransactionId xid,
    1573             :                const TwoPhaseCallback callbacks[])
    1574             : {
    1575             :     for (;;)
    1576         542 :     {
    1577         634 :         TwoPhaseRecordOnDisk *record = (TwoPhaseRecordOnDisk *) bufptr;
    1578             : 
    1579             :         Assert(record->rmid <= TWOPHASE_RM_MAX_ID);
    1580         634 :         if (record->rmid == TWOPHASE_RM_END_ID)
    1581          92 :             break;
    1582             : 
    1583         542 :         bufptr += MAXALIGN(sizeof(TwoPhaseRecordOnDisk));
    1584             : 
    1585         542 :         if (callbacks[record->rmid] != NULL)
    1586         450 :             callbacks[record->rmid] (xid, record->info,
    1587             :                                      (void *) bufptr, record->len);
    1588             : 
    1589         542 :         bufptr += MAXALIGN(record->len);
    1590             :     }
    1591          92 : }
    1592             : 
    1593             : /*
    1594             :  * Remove the 2PC file for the specified XID.
    1595             :  *
    1596             :  * If giveWarning is false, do not complain about file-not-present;
    1597             :  * this is an expected case during WAL replay.
    1598             :  */
    1599             : static void
    1600          34 : RemoveTwoPhaseFile(TransactionId xid, bool giveWarning)
    1601             : {
    1602             :     char        path[MAXPGPATH];
    1603             : 
    1604          34 :     TwoPhaseFilePath(path, xid);
    1605          34 :     if (unlink(path))
    1606           0 :         if (errno != ENOENT || giveWarning)
    1607           0 :             ereport(WARNING,
    1608             :                     (errcode_for_file_access(),
    1609             :                      errmsg("could not remove file \"%s\": %m", path)));
    1610          34 : }
    1611             : 
    1612             : /*
    1613             :  * Recreates a state file. This is used in WAL replay and during
    1614             :  * checkpoint creation.
    1615             :  *
    1616             :  * Note: content and len don't include CRC.
    1617             :  */
    1618             : static void
    1619          34 : RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
    1620             : {
    1621             :     char        path[MAXPGPATH];
    1622             :     pg_crc32c   statefile_crc;
    1623             :     int         fd;
    1624             : 
    1625             :     /* Recompute CRC */
    1626          34 :     INIT_CRC32C(statefile_crc);
    1627          34 :     COMP_CRC32C(statefile_crc, content, len);
    1628          34 :     FIN_CRC32C(statefile_crc);
    1629             : 
    1630          34 :     TwoPhaseFilePath(path, xid);
    1631             : 
    1632          34 :     fd = OpenTransientFile(path,
    1633             :                            O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY);
    1634          34 :     if (fd < 0)
    1635           0 :         ereport(ERROR,
    1636             :                 (errcode_for_file_access(),
    1637             :                  errmsg("could not recreate file \"%s\": %m", path)));
    1638             : 
    1639             :     /* Write content and CRC */
    1640          34 :     errno = 0;
    1641          34 :     pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_WRITE);
    1642          34 :     if (write(fd, content, len) != len)
    1643             :     {
    1644             :         /* if write didn't set errno, assume problem is no disk space */
    1645           0 :         if (errno == 0)
    1646           0 :             errno = ENOSPC;
    1647           0 :         ereport(ERROR,
    1648             :                 (errcode_for_file_access(),
    1649             :                  errmsg("could not write file \"%s\": %m", path)));
    1650             :     }
    1651          34 :     if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c))
    1652             :     {
    1653             :         /* if write didn't set errno, assume problem is no disk space */
    1654           0 :         if (errno == 0)
    1655           0 :             errno = ENOSPC;
    1656           0 :         ereport(ERROR,
    1657             :                 (errcode_for_file_access(),
    1658             :                  errmsg("could not write file \"%s\": %m", path)));
    1659             :     }
    1660          34 :     pgstat_report_wait_end();
    1661             : 
    1662             :     /*
    1663             :      * We must fsync the file because the end-of-replay checkpoint will not do
    1664             :      * so, there being no GXACT in shared memory yet to tell it to.
    1665             :      */
    1666          34 :     pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_SYNC);
    1667          34 :     if (pg_fsync(fd) != 0)
    1668           0 :         ereport(ERROR,
    1669             :                 (errcode_for_file_access(),
    1670             :                  errmsg("could not fsync file \"%s\": %m", path)));
    1671          34 :     pgstat_report_wait_end();
    1672             : 
    1673          34 :     if (CloseTransientFile(fd) != 0)
    1674           0 :         ereport(ERROR,
    1675             :                 (errcode_for_file_access(),
    1676             :                  errmsg("could not close file \"%s\": %m", path)));
    1677          34 : }
    1678             : 
    1679             : /*
    1680             :  * CheckPointTwoPhase -- handle 2PC component of checkpointing.
    1681             :  *
    1682             :  * We must fsync the state file of any GXACT that is valid or has been
    1683             :  * generated during redo and has a PREPARE LSN <= the checkpoint's redo
    1684             :  * horizon.  (If the gxact isn't valid yet, has not been generated in
    1685             :  * redo, or has a later LSN, this checkpoint is not responsible for
    1686             :  * fsyncing it.)
    1687             :  *
    1688             :  * This is deliberately run as late as possible in the checkpoint sequence,
    1689             :  * because GXACTs ordinarily have short lifespans, and so it is quite
    1690             :  * possible that GXACTs that were valid at checkpoint start will no longer
    1691             :  * exist if we wait a little bit. With typical checkpoint settings this
    1692             :  * will be about 3 minutes for an online checkpoint, so as a result we
    1693             :  * expect that there will be no GXACTs that need to be copied to disk.
    1694             :  *
    1695             :  * If a GXACT remains valid across multiple checkpoints, it will already
    1696             :  * be on disk so we don't bother to repeat that write.
    1697             :  */
    1698             : void
    1699        3172 : CheckPointTwoPhase(XLogRecPtr redo_horizon)
    1700             : {
    1701             :     int         i;
    1702        3172 :     int         serialized_xacts = 0;
    1703             : 
    1704        3172 :     if (max_prepared_xacts <= 0)
    1705        2640 :         return;                 /* nothing to do */
    1706             : 
    1707             :     TRACE_POSTGRESQL_TWOPHASE_CHECKPOINT_START();
    1708             : 
    1709             :     /*
    1710             :      * We are expecting there to be zero GXACTs that need to be copied to
    1711             :      * disk, so we perform all I/O while holding TwoPhaseStateLock for
    1712             :      * simplicity. This prevents any new xacts from preparing while this
    1713             :      * occurs, which shouldn't be a problem since the presence of long-lived
    1714             :      * prepared xacts indicates the transaction manager isn't active.
    1715             :      *
    1716             :      * It's also possible to move I/O out of the lock, but on every error we
    1717             :      * should check whether somebody committed our transaction in different
    1718             :      * backend. Let's leave this optimization for future, if somebody will
    1719             :      * spot that this place cause bottleneck.
    1720             :      *
    1721             :      * Note that it isn't possible for there to be a GXACT with a
    1722             :      * prepare_end_lsn set prior to the last checkpoint yet is marked invalid,
    1723             :      * because of the efforts with delayChkpt.
    1724             :      */
    1725         532 :     LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
    1726         576 :     for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
    1727             :     {
    1728             :         /*
    1729             :          * Note that we are using gxact not pgxact so this works in recovery
    1730             :          * also
    1731             :          */
    1732          44 :         GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
    1733             : 
    1734          44 :         if ((gxact->valid || gxact->inredo) &&
    1735          44 :             !gxact->ondisk &&
    1736          40 :             gxact->prepare_end_lsn <= redo_horizon)
    1737             :         {
    1738             :             char       *buf;
    1739             :             int         len;
    1740             : 
    1741          34 :             XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, &len);
    1742          34 :             RecreateTwoPhaseFile(gxact->xid, buf, len);
    1743          34 :             gxact->ondisk = true;
    1744          34 :             gxact->prepare_start_lsn = InvalidXLogRecPtr;
    1745          34 :             gxact->prepare_end_lsn = InvalidXLogRecPtr;
    1746          34 :             pfree(buf);
    1747          34 :             serialized_xacts++;
    1748             :         }
    1749             :     }
    1750         532 :     LWLockRelease(TwoPhaseStateLock);
    1751             : 
    1752             :     /*
    1753             :      * Flush unconditionally the parent directory to make any information
    1754             :      * durable on disk.  Two-phase files could have been removed and those
    1755             :      * removals need to be made persistent as well as any files newly created
    1756             :      * previously since the last checkpoint.
    1757             :      */
    1758         532 :     fsync_fname(TWOPHASE_DIR, true);
    1759             : 
    1760             :     TRACE_POSTGRESQL_TWOPHASE_CHECKPOINT_DONE();
    1761             : 
    1762         532 :     if (log_checkpoints && serialized_xacts > 0)
    1763          24 :         ereport(LOG,
    1764             :                 (errmsg_plural("%u two-phase state file was written "
    1765             :                                "for a long-running prepared transaction",
    1766             :                                "%u two-phase state files were written "
    1767             :                                "for long-running prepared transactions",
    1768             :                                serialized_xacts,
    1769             :                                serialized_xacts)));
    1770             : }
    1771             : 
    1772             : /*
    1773             :  * restoreTwoPhaseData
    1774             :  *
    1775             :  * Scan pg_twophase and fill TwoPhaseState depending on the on-disk data.
    1776             :  * This is called once at the beginning of recovery, saving any extra
    1777             :  * lookups in the future.  Two-phase files that are newer than the
    1778             :  * minimum XID horizon are discarded on the way.
    1779             :  */
    1780             : void
    1781        1390 : restoreTwoPhaseData(void)
    1782             : {
    1783             :     DIR        *cldir;
    1784             :     struct dirent *clde;
    1785             : 
    1786        1390 :     LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
    1787        1390 :     cldir = AllocateDir(TWOPHASE_DIR);
    1788        4184 :     while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL)
    1789             :     {
    1790        2794 :         if (strlen(clde->d_name) == 8 &&
    1791          14 :             strspn(clde->d_name, "0123456789ABCDEF") == 8)
    1792             :         {
    1793             :             TransactionId xid;
    1794             :             char       *buf;
    1795             : 
    1796          14 :             xid = (TransactionId) strtoul(clde->d_name, NULL, 16);
    1797             : 
    1798          14 :             buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr,
    1799             :                                         true, false, false);
    1800          14 :             if (buf == NULL)
    1801           0 :                 continue;
    1802             : 
    1803          14 :             PrepareRedoAdd(buf, InvalidXLogRecPtr,
    1804             :                            InvalidXLogRecPtr, InvalidRepOriginId);
    1805             :         }
    1806             :     }
    1807        1390 :     LWLockRelease(TwoPhaseStateLock);
    1808        1390 :     FreeDir(cldir);
    1809        1390 : }
    1810             : 
    1811             : /*
    1812             :  * PrescanPreparedTransactions
    1813             :  *
    1814             :  * Scan the shared memory entries of TwoPhaseState and determine the range
    1815             :  * of valid XIDs present.  This is run during database startup, after we
    1816             :  * have completed reading WAL.  ShmemVariableCache->nextFullXid has been set to
    1817             :  * one more than the highest XID for which evidence exists in WAL.
    1818             :  *
    1819             :  * We throw away any prepared xacts with main XID beyond nextFullXid --- if any
    1820             :  * are present, it suggests that the DBA has done a PITR recovery to an
    1821             :  * earlier point in time without cleaning out pg_twophase.  We dare not
    1822             :  * try to recover such prepared xacts since they likely depend on database
    1823             :  * state that doesn't exist now.
    1824             :  *
    1825             :  * However, we will advance nextFullXid beyond any subxact XIDs belonging to
    1826             :  * valid prepared xacts.  We need to do this since subxact commit doesn't
    1827             :  * write a WAL entry, and so there might be no evidence in WAL of those
    1828             :  * subxact XIDs.
    1829             :  *
    1830             :  * On corrupted two-phase files, fail immediately.  Keeping around broken
    1831             :  * entries and let replay continue causes harm on the system, and a new
    1832             :  * backup should be rolled in.
    1833             :  *
    1834             :  * Our other responsibility is to determine and return the oldest valid XID
    1835             :  * among the prepared xacts (if none, return ShmemVariableCache->nextFullXid).
    1836             :  * This is needed to synchronize pg_subtrans startup properly.
    1837             :  *
    1838             :  * If xids_p and nxids_p are not NULL, pointer to a palloc'd array of all
    1839             :  * top-level xids is stored in *xids_p. The number of entries in the array
    1840             :  * is returned in *nxids_p.
    1841             :  */
    1842             : TransactionId
    1843        1398 : PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
    1844             : {
    1845        1398 :     FullTransactionId nextFullXid = ShmemVariableCache->nextFullXid;
    1846        1398 :     TransactionId origNextXid = XidFromFullTransactionId(nextFullXid);
    1847        1398 :     TransactionId result = origNextXid;
    1848        1398 :     TransactionId *xids = NULL;
    1849        1398 :     int         nxids = 0;
    1850        1398 :     int         allocsize = 0;
    1851             :     int         i;
    1852             : 
    1853        1398 :     LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
    1854        1452 :     for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
    1855             :     {
    1856             :         TransactionId xid;
    1857             :         char       *buf;
    1858          54 :         GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
    1859             : 
    1860             :         Assert(gxact->inredo);
    1861             : 
    1862          54 :         xid = gxact->xid;
    1863             : 
    1864          54 :         buf = ProcessTwoPhaseBuffer(xid,
    1865             :                                     gxact->prepare_start_lsn,
    1866          54 :                                     gxact->ondisk, false, true);
    1867             : 
    1868          54 :         if (buf == NULL)
    1869           0 :             continue;
    1870             : 
    1871             :         /*
    1872             :          * OK, we think this file is valid.  Incorporate xid into the
    1873             :          * running-minimum result.
    1874             :          */
    1875          54 :         if (TransactionIdPrecedes(xid, result))
    1876          42 :             result = xid;
    1877             : 
    1878          54 :         if (xids_p)
    1879             :         {
    1880          24 :             if (nxids == allocsize)
    1881             :             {
    1882          20 :                 if (nxids == 0)
    1883             :                 {
    1884          20 :                     allocsize = 10;
    1885          20 :                     xids = palloc(allocsize * sizeof(TransactionId));
    1886             :                 }
    1887             :                 else
    1888             :                 {
    1889           0 :                     allocsize = allocsize * 2;
    1890           0 :                     xids = repalloc(xids, allocsize * sizeof(TransactionId));
    1891             :                 }
    1892             :             }
    1893          24 :             xids[nxids++] = xid;
    1894             :         }
    1895             : 
    1896          54 :         pfree(buf);
    1897             :     }
    1898        1398 :     LWLockRelease(TwoPhaseStateLock);
    1899             : 
    1900        1398 :     if (xids_p)
    1901             :     {
    1902          42 :         *xids_p = xids;
    1903          42 :         *nxids_p = nxids;
    1904             :     }
    1905             : 
    1906        1398 :     return result;
    1907             : }
    1908             : 
    1909             : /*
    1910             :  * StandbyRecoverPreparedTransactions
    1911             :  *
    1912             :  * Scan the shared memory entries of TwoPhaseState and setup all the required
    1913             :  * information to allow standby queries to treat prepared transactions as still
    1914             :  * active.
    1915             :  *
    1916             :  * This is never called at the end of recovery - we use
    1917             :  * RecoverPreparedTransactions() at that point.
    1918             :  *
    1919             :  * The lack of calls to SubTransSetParent() calls here is by design;
    1920             :  * those calls are made by RecoverPreparedTransactions() at the end of recovery
    1921             :  * for those xacts that need this.
    1922             :  */
    1923             : void
    1924          42 : StandbyRecoverPreparedTransactions(void)
    1925             : {
    1926             :     int         i;
    1927             : 
    1928          42 :     LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
    1929          66 :     for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
    1930             :     {
    1931             :         TransactionId xid;
    1932             :         char       *buf;
    1933          24 :         GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
    1934             : 
    1935             :         Assert(gxact->inredo);
    1936             : 
    1937          24 :         xid = gxact->xid;
    1938             : 
    1939          24 :         buf = ProcessTwoPhaseBuffer(xid,
    1940             :                                     gxact->prepare_start_lsn,
    1941          24 :                                     gxact->ondisk, false, false);
    1942          24 :         if (buf != NULL)
    1943          24 :             pfree(buf);
    1944             :     }
    1945          42 :     LWLockRelease(TwoPhaseStateLock);
    1946          42 : }
    1947             : 
    1948             : /*
    1949             :  * RecoverPreparedTransactions
    1950             :  *
    1951             :  * Scan the shared memory entries of TwoPhaseState and reload the state for
    1952             :  * each prepared transaction (reacquire locks, etc).
    1953             :  *
    1954             :  * This is run at the end of recovery, but before we allow backends to write
    1955             :  * WAL.
    1956             :  *
    1957             :  * At the end of recovery the way we take snapshots will change. We now need
    1958             :  * to mark all running transactions with their full SubTransSetParent() info
    1959             :  * to allow normal snapshots to work correctly if snapshots overflow.
    1960             :  * We do this here because by definition prepared transactions are the only
    1961             :  * type of write transaction still running, so this is necessary and
    1962             :  * complete.
    1963             :  */
    1964             : void
    1965        1356 : RecoverPreparedTransactions(void)
    1966             : {
    1967             :     int         i;
    1968             : 
    1969        1356 :     LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
    1970        1386 :     for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
    1971             :     {
    1972             :         TransactionId xid;
    1973             :         char       *buf;
    1974          30 :         GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
    1975             :         char       *bufptr;
    1976             :         TwoPhaseFileHeader *hdr;
    1977             :         TransactionId *subxids;
    1978             :         const char *gid;
    1979             : 
    1980          30 :         xid = gxact->xid;
    1981             : 
    1982             :         /*
    1983             :          * Reconstruct subtrans state for the transaction --- needed because
    1984             :          * pg_subtrans is not preserved over a restart.  Note that we are
    1985             :          * linking all the subtransactions directly to the top-level XID;
    1986             :          * there may originally have been a more complex hierarchy, but
    1987             :          * there's no need to restore that exactly. It's possible that
    1988             :          * SubTransSetParent has been set before, if the prepared transaction
    1989             :          * generated xid assignment records.
    1990             :          */
    1991          30 :         buf = ProcessTwoPhaseBuffer(xid,
    1992             :                                     gxact->prepare_start_lsn,
    1993          30 :                                     gxact->ondisk, true, false);
    1994          30 :         if (buf == NULL)
    1995           0 :             continue;
    1996             : 
    1997          30 :         ereport(LOG,
    1998             :                 (errmsg("recovering prepared transaction %u from shared memory", xid)));
    1999             : 
    2000          30 :         hdr = (TwoPhaseFileHeader *) buf;
    2001             :         Assert(TransactionIdEquals(hdr->xid, xid));
    2002          30 :         bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
    2003          30 :         gid = (const char *) bufptr;
    2004          30 :         bufptr += MAXALIGN(hdr->gidlen);
    2005          30 :         subxids = (TransactionId *) bufptr;
    2006          30 :         bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
    2007          30 :         bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
    2008          30 :         bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
    2009          30 :         bufptr += MAXALIGN(hdr->ninvalmsgs * sizeof(SharedInvalidationMessage));
    2010             : 
    2011             :         /*
    2012             :          * Recreate its GXACT and dummy PGPROC. But, check whether it was
    2013             :          * added in redo and already has a shmem entry for it.
    2014             :          */
    2015          30 :         MarkAsPreparingGuts(gxact, xid, gid,
    2016             :                             hdr->prepared_at,
    2017             :                             hdr->owner, hdr->database);
    2018             : 
    2019             :         /* recovered, so reset the flag for entries generated by redo */
    2020          30 :         gxact->inredo = false;
    2021             : 
    2022          30 :         GXactLoadSubxactData(gxact, hdr->nsubxacts, subxids);
    2023          30 :         MarkAsPrepared(gxact, true);
    2024             : 
    2025          30 :         LWLockRelease(TwoPhaseStateLock);
    2026             : 
    2027             :         /*
    2028             :          * Recover other state (notably locks) using resource managers.
    2029             :          */
    2030          30 :         ProcessRecords(bufptr, xid, twophase_recover_callbacks);
    2031             : 
    2032             :         /*
    2033             :          * Release locks held by the standby process after we process each
    2034             :          * prepared transaction. As a result, we don't need too many
    2035             :          * additional locks at any one time.
    2036             :          */
    2037          30 :         if (InHotStandby)
    2038          10 :             StandbyReleaseLockTree(xid, hdr->nsubxacts, subxids);
    2039             : 
    2040             :         /*
    2041             :          * We're done with recovering this transaction. Clear MyLockedGxact,
    2042             :          * like we do in PrepareTransaction() during normal operation.
    2043             :          */
    2044          30 :         PostPrepare_Twophase();
    2045             : 
    2046          30 :         pfree(buf);
    2047             : 
    2048          30 :         LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
    2049             :     }
    2050             : 
    2051        1356 :     LWLockRelease(TwoPhaseStateLock);
    2052        1356 : }
    2053             : 
    2054             : /*
    2055             :  * ProcessTwoPhaseBuffer
    2056             :  *
    2057             :  * Given a transaction id, read it either from disk or read it directly
    2058             :  * via shmem xlog record pointer using the provided "prepare_start_lsn".
    2059             :  *
    2060             :  * If setParent is true, set up subtransaction parent linkages.
    2061             :  *
    2062             :  * If setNextXid is true, set ShmemVariableCache->nextFullXid to the newest
    2063             :  * value scanned.
    2064             :  */
    2065             : static char *
    2066         122 : ProcessTwoPhaseBuffer(TransactionId xid,
    2067             :                       XLogRecPtr prepare_start_lsn,
    2068             :                       bool fromdisk,
    2069             :                       bool setParent, bool setNextXid)
    2070             : {
    2071         122 :     FullTransactionId nextFullXid = ShmemVariableCache->nextFullXid;
    2072         122 :     TransactionId origNextXid = XidFromFullTransactionId(nextFullXid);
    2073             :     TransactionId *subxids;
    2074             :     char       *buf;
    2075             :     TwoPhaseFileHeader *hdr;
    2076             :     int         i;
    2077             : 
    2078             :     Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE));
    2079             : 
    2080         122 :     if (!fromdisk)
    2081             :         Assert(prepare_start_lsn != InvalidXLogRecPtr);
    2082             : 
    2083             :     /* Already processed? */
    2084         122 :     if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
    2085             :     {
    2086           0 :         if (fromdisk)
    2087             :         {
    2088           0 :             ereport(WARNING,
    2089             :                     (errmsg("removing stale two-phase state file for transaction %u",
    2090             :                             xid)));
    2091           0 :             RemoveTwoPhaseFile(xid, true);
    2092             :         }
    2093             :         else
    2094             :         {
    2095           0 :             ereport(WARNING,
    2096             :                     (errmsg("removing stale two-phase state from memory for transaction %u",
    2097             :                             xid)));
    2098           0 :             PrepareRedoRemove(xid, true);
    2099             :         }
    2100           0 :         return NULL;
    2101             :     }
    2102             : 
    2103             :     /* Reject XID if too new */
    2104         122 :     if (TransactionIdFollowsOrEquals(xid, origNextXid))
    2105             :     {
    2106           0 :         if (fromdisk)
    2107             :         {
    2108           0 :             ereport(WARNING,
    2109             :                     (errmsg("removing future two-phase state file for transaction %u",
    2110             :                             xid)));
    2111           0 :             RemoveTwoPhaseFile(xid, true);
    2112             :         }
    2113             :         else
    2114             :         {
    2115           0 :             ereport(WARNING,
    2116             :                     (errmsg("removing future two-phase state from memory for transaction %u",
    2117             :                             xid)));
    2118           0 :             PrepareRedoRemove(xid, true);
    2119             :         }
    2120           0 :         return NULL;
    2121             :     }
    2122             : 
    2123         122 :     if (fromdisk)
    2124             :     {
    2125             :         /* Read and validate file */
    2126          56 :         buf = ReadTwoPhaseFile(xid, false);
    2127             :     }
    2128             :     else
    2129             :     {
    2130             :         /* Read xlog data */
    2131          66 :         XlogReadTwoPhaseData(prepare_start_lsn, &buf, NULL);
    2132             :     }
    2133             : 
    2134             :     /* Deconstruct header */
    2135         122 :     hdr = (TwoPhaseFileHeader *) buf;
    2136         122 :     if (!TransactionIdEquals(hdr->xid, xid))
    2137             :     {
    2138           0 :         if (fromdisk)
    2139           0 :             ereport(ERROR,
    2140             :                     (errcode(ERRCODE_DATA_CORRUPTED),
    2141             :                      errmsg("corrupted two-phase state file for transaction %u",
    2142             :                             xid)));
    2143             :         else
    2144           0 :             ereport(ERROR,
    2145             :                     (errcode(ERRCODE_DATA_CORRUPTED),
    2146             :                      errmsg("corrupted two-phase state in memory for transaction %u",
    2147             :                             xid)));
    2148             :     }
    2149             : 
    2150             :     /*
    2151             :      * Examine subtransaction XIDs ... they should all follow main XID, and
    2152             :      * they may force us to advance nextFullXid.
    2153             :      */
    2154         122 :     subxids = (TransactionId *) (buf +
    2155         244 :                                  MAXALIGN(sizeof(TwoPhaseFileHeader)) +
    2156         122 :                                  MAXALIGN(hdr->gidlen));
    2157        3648 :     for (i = 0; i < hdr->nsubxacts; i++)
    2158             :     {
    2159        3526 :         TransactionId subxid = subxids[i];
    2160             : 
    2161             :         Assert(TransactionIdFollows(subxid, xid));
    2162             : 
    2163             :         /* update nextFullXid if needed */
    2164        3526 :         if (setNextXid)
    2165        1626 :             AdvanceNextFullTransactionIdPastXid(subxid);
    2166             : 
    2167        3526 :         if (setParent)
    2168         690 :             SubTransSetParent(subxid, xid);
    2169             :     }
    2170             : 
    2171         122 :     return buf;
    2172             : }
    2173             : 
    2174             : 
    2175             : /*
    2176             :  *  RecordTransactionCommitPrepared
    2177             :  *
    2178             :  * This is basically the same as RecordTransactionCommit (q.v. if you change
    2179             :  * this function): in particular, we must set the delayChkpt flag to avoid a
    2180             :  * race condition.
    2181             :  *
    2182             :  * We know the transaction made at least one XLOG entry (its PREPARE),
    2183             :  * so it is never possible to optimize out the commit record.
    2184             :  */
    2185             : static void
    2186          42 : RecordTransactionCommitPrepared(TransactionId xid,
    2187             :                                 int nchildren,
    2188             :                                 TransactionId *children,
    2189             :                                 int nrels,
    2190             :                                 RelFileNode *rels,
    2191             :                                 int ninvalmsgs,
    2192             :                                 SharedInvalidationMessage *invalmsgs,
    2193             :                                 bool initfileinval,
    2194             :                                 const char *gid)
    2195             : {
    2196             :     XLogRecPtr  recptr;
    2197          42 :     TimestampTz committs = GetCurrentTimestamp();
    2198             :     bool        replorigin;
    2199             : 
    2200             :     /*
    2201             :      * Are we using the replication origins feature?  Or, in other words, are
    2202             :      * we replaying remote actions?
    2203             :      */
    2204          42 :     replorigin = (replorigin_session_origin != InvalidRepOriginId &&
    2205           0 :                   replorigin_session_origin != DoNotReplicateId);
    2206             : 
    2207          42 :     START_CRIT_SECTION();
    2208             : 
    2209             :     /* See notes in RecordTransactionCommit */
    2210          42 :     MyProc->delayChkpt = true;
    2211             : 
    2212             :     /*
    2213             :      * Emit the XLOG commit record. Note that we mark 2PC commits as
    2214             :      * potentially having AccessExclusiveLocks since we don't know whether or
    2215             :      * not they do.
    2216             :      */
    2217          42 :     recptr = XactLogCommitRecord(committs,
    2218             :                                  nchildren, children, nrels, rels,
    2219             :                                  ninvalmsgs, invalmsgs,
    2220             :                                  initfileinval, false,
    2221          42 :                                  MyXactFlags | XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK,
    2222             :                                  xid, gid);
    2223             : 
    2224             : 
    2225          42 :     if (replorigin)
    2226             :         /* Move LSNs forward for this replication origin */
    2227           0 :         replorigin_session_advance(replorigin_session_origin_lsn,
    2228             :                                    XactLastRecEnd);
    2229             : 
    2230             :     /*
    2231             :      * Record commit timestamp.  The value comes from plain commit timestamp
    2232             :      * if replorigin is not enabled, or replorigin already set a value for us
    2233             :      * in replorigin_session_origin_timestamp otherwise.
    2234             :      *
    2235             :      * We don't need to WAL-log anything here, as the commit record written
    2236             :      * above already contains the data.
    2237             :      */
    2238          42 :     if (!replorigin || replorigin_session_origin_timestamp == 0)
    2239          42 :         replorigin_session_origin_timestamp = committs;
    2240             : 
    2241          42 :     TransactionTreeSetCommitTsData(xid, nchildren, children,
    2242             :                                    replorigin_session_origin_timestamp,
    2243             :                                    replorigin_session_origin, false);
    2244             : 
    2245             :     /*
    2246             :      * We don't currently try to sleep before flush here ... nor is there any
    2247             :      * support for async commit of a prepared xact (the very idea is probably
    2248             :      * a contradiction)
    2249             :      */
    2250             : 
    2251             :     /* Flush XLOG to disk */
    2252          42 :     XLogFlush(recptr);
    2253             : 
    2254             :     /* Mark the transaction committed in pg_xact */
    2255          42 :     TransactionIdCommitTree(xid, nchildren, children);
    2256             : 
    2257             :     /* Checkpoint can proceed now */
    2258          42 :     MyProc->delayChkpt = false;
    2259             : 
    2260          42 :     END_CRIT_SECTION();
    2261             : 
    2262             :     /*
    2263             :      * Wait for synchronous replication, if required.
    2264             :      *
    2265             :      * Note that at this stage we have marked clog, but still show as running
    2266             :      * in the procarray and continue to hold locks.
    2267             :      */
    2268          42 :     SyncRepWaitForLSN(recptr, true);
    2269          42 : }
    2270             : 
    2271             : /*
    2272             :  *  RecordTransactionAbortPrepared
    2273             :  *
    2274             :  * This is basically the same as RecordTransactionAbort.
    2275             :  *
    2276             :  * We know the transaction made at least one XLOG entry (its PREPARE),
    2277             :  * so it is never possible to optimize out the abort record.
    2278             :  */
    2279             : static void
    2280          20 : RecordTransactionAbortPrepared(TransactionId xid,
    2281             :                                int nchildren,
    2282             :                                TransactionId *children,
    2283             :                                int nrels,
    2284             :                                RelFileNode *rels,
    2285             :                                const char *gid)
    2286             : {
    2287             :     XLogRecPtr  recptr;
    2288             : 
    2289             :     /*
    2290             :      * Catch the scenario where we aborted partway through
    2291             :      * RecordTransactionCommitPrepared ...
    2292             :      */
    2293          20 :     if (TransactionIdDidCommit(xid))
    2294           0 :         elog(PANIC, "cannot abort transaction %u, it was already committed",
    2295             :              xid);
    2296             : 
    2297          20 :     START_CRIT_SECTION();
    2298             : 
    2299             :     /*
    2300             :      * Emit the XLOG commit record. Note that we mark 2PC aborts as
    2301             :      * potentially having AccessExclusiveLocks since we don't know whether or
    2302             :      * not they do.
    2303             :      */
    2304          20 :     recptr = XactLogAbortRecord(GetCurrentTimestamp(),
    2305             :                                 nchildren, children,
    2306             :                                 nrels, rels,
    2307          20 :                                 MyXactFlags | XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK,
    2308             :                                 xid, gid);
    2309             : 
    2310             :     /* Always flush, since we're about to remove the 2PC state file */
    2311          20 :     XLogFlush(recptr);
    2312             : 
    2313             :     /*
    2314             :      * Mark the transaction aborted in clog.  This is not absolutely necessary
    2315             :      * but we may as well do it while we are here.
    2316             :      */
    2317          20 :     TransactionIdAbortTree(xid, nchildren, children);
    2318             : 
    2319          20 :     END_CRIT_SECTION();
    2320             : 
    2321             :     /*
    2322             :      * Wait for synchronous replication, if required.
    2323             :      *
    2324             :      * Note that at this stage we have marked clog, but still show as running
    2325             :      * in the procarray and continue to hold locks.
    2326             :      */
    2327          20 :     SyncRepWaitForLSN(recptr, false);
    2328          20 : }
    2329             : 
    2330             : /*
    2331             :  * PrepareRedoAdd
    2332             :  *
    2333             :  * Store pointers to the start/end of the WAL record along with the xid in
    2334             :  * a gxact entry in shared memory TwoPhaseState structure.  If caller
    2335             :  * specifies InvalidXLogRecPtr as WAL location to fetch the two-phase
    2336             :  * data, the entry is marked as located on disk.
    2337             :  */
    2338             : void
    2339          74 : PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
    2340             :                XLogRecPtr end_lsn, RepOriginId origin_id)
    2341             : {
    2342          74 :     TwoPhaseFileHeader *hdr = (TwoPhaseFileHeader *) buf;
    2343             :     char       *bufptr;
    2344             :     const char *gid;
    2345             :     GlobalTransaction gxact;
    2346             : 
    2347             :     Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE));
    2348             :     Assert(RecoveryInProgress());
    2349             : 
    2350          74 :     bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
    2351          74 :     gid = (const char *) bufptr;
    2352             : 
    2353             :     /*
    2354             :      * Reserve the GID for the given transaction in the redo code path.
    2355             :      *
    2356             :      * This creates a gxact struct and puts it into the active array.
    2357             :      *
    2358             :      * In redo, this struct is mainly used to track PREPARE/COMMIT entries in
    2359             :      * shared memory. Hence, we only fill up the bare minimum contents here.
    2360             :      * The gxact also gets marked with gxact->inredo set to true to indicate
    2361             :      * that it got added in the redo phase
    2362             :      */
    2363             : 
    2364             :     /* Get a free gxact from the freelist */
    2365          74 :     if (TwoPhaseState->freeGXacts == NULL)
    2366           0 :         ereport(ERROR,
    2367             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
    2368             :                  errmsg("maximum number of prepared transactions reached"),
    2369             :                  errhint("Increase max_prepared_transactions (currently %d).",
    2370             :                          max_prepared_xacts)));
    2371          74 :     gxact = TwoPhaseState->freeGXacts;
    2372          74 :     TwoPhaseState->freeGXacts = gxact->next;
    2373             : 
    2374          74 :     gxact->prepared_at = hdr->prepared_at;
    2375          74 :     gxact->prepare_start_lsn = start_lsn;
    2376          74 :     gxact->prepare_end_lsn = end_lsn;
    2377          74 :     gxact->xid = hdr->xid;
    2378          74 :     gxact->owner = hdr->owner;
    2379          74 :     gxact->locking_backend = InvalidBackendId;
    2380          74 :     gxact->valid = false;
    2381          74 :     gxact->ondisk = XLogRecPtrIsInvalid(start_lsn);
    2382          74 :     gxact->inredo = true;        /* yes, added in redo */
    2383          74 :     strcpy(gxact->gid, gid);
    2384             : 
    2385             :     /* And insert it into the active array */
    2386             :     Assert(TwoPhaseState->numPrepXacts < max_prepared_xacts);
    2387          74 :     TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts++] = gxact;
    2388             : 
    2389          74 :     if (origin_id != InvalidRepOriginId)
    2390             :     {
    2391             :         /* recover apply progress */
    2392           0 :         replorigin_advance(origin_id, hdr->origin_lsn, end_lsn,
    2393             :                            false /* backward */ , false /* WAL */ );
    2394             :     }
    2395             : 
    2396          74 :     elog(DEBUG2, "added 2PC data in shared memory for transaction %u", gxact->xid);
    2397          74 : }
    2398             : 
    2399             : /*
    2400             :  * PrepareRedoRemove
    2401             :  *
    2402             :  * Remove the corresponding gxact entry from TwoPhaseState. Also remove
    2403             :  * the 2PC file if a prepared transaction was saved via an earlier checkpoint.
    2404             :  *
    2405             :  * Caller must hold TwoPhaseStateLock in exclusive mode, because TwoPhaseState
    2406             :  * is updated.
    2407             :  */
    2408             : void
    2409          48 : PrepareRedoRemove(TransactionId xid, bool giveWarning)
    2410             : {
    2411          48 :     GlobalTransaction gxact = NULL;
    2412             :     int         i;
    2413          48 :     bool        found = false;
    2414             : 
    2415             :     Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE));
    2416             :     Assert(RecoveryInProgress());
    2417             : 
    2418          48 :     for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
    2419             :     {
    2420          42 :         gxact = TwoPhaseState->prepXacts[i];
    2421             : 
    2422          42 :         if (gxact->xid == xid)
    2423             :         {
    2424             :             Assert(gxact->inredo);
    2425          42 :             found = true;
    2426          42 :             break;
    2427             :         }
    2428             :     }
    2429             : 
    2430             :     /*
    2431             :      * Just leave if there is nothing, this is expected during WAL replay.
    2432             :      */
    2433          48 :     if (!found)
    2434           6 :         return;
    2435             : 
    2436             :     /*
    2437             :      * And now we can clean up any files we may have left.
    2438             :      */
    2439          42 :     elog(DEBUG2, "removing 2PC data for transaction %u", xid);
    2440          42 :     if (gxact->ondisk)
    2441           4 :         RemoveTwoPhaseFile(xid, giveWarning);
    2442          42 :     RemoveGXact(gxact);
    2443             : }

Generated by: LCOV version 1.13