LCOV - code coverage report
Current view: top level - src/bin/pg_rewind - parsexlog.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 80.4 % 148 119
Test Date: 2026-03-03 10:15:07 Functions: 100.0 % 5 5
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * parsexlog.c
       4              :  *    Functions for reading Write-Ahead-Log
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *-------------------------------------------------------------------------
      10              :  */
      11              : 
      12              : #include "postgres_fe.h"
      13              : 
      14              : #include <unistd.h>
      15              : 
      16              : #include "access/rmgr.h"
      17              : #include "access/xact.h"
      18              : #include "access/xlog_internal.h"
      19              : #include "access/xlogreader.h"
      20              : #include "catalog/pg_control.h"
      21              : #include "catalog/storage_xlog.h"
      22              : #include "commands/dbcommands_xlog.h"
      23              : #include "fe_utils/archive.h"
      24              : #include "filemap.h"
      25              : #include "pg_rewind.h"
      26              : 
      27              : /*
      28              :  * RmgrNames is an array of the built-in resource manager names, to make error
      29              :  * messages a bit nicer.
      30              :  */
      31              : #define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask,decode) \
      32              :   name,
      33              : 
      34              : static const char *const RmgrNames[RM_MAX_ID + 1] = {
      35              : #include "access/rmgrlist.h"
      36              : };
      37              : 
      38              : #define RmgrName(rmid) (((rmid) <= RM_MAX_BUILTIN_ID) ? \
      39              :                         RmgrNames[rmid] : "custom")
      40              : 
      41              : static void extractPageInfo(XLogReaderState *record);
      42              : 
      43              : static int  xlogreadfd = -1;
      44              : static XLogSegNo xlogreadsegno = 0;
      45              : static char xlogfpath[MAXPGPATH];
      46              : 
      47              : typedef struct XLogPageReadPrivate
      48              : {
      49              :     const char *restoreCommand;
      50              :     int         tliIndex;
      51              : } XLogPageReadPrivate;
      52              : 
      53              : static int  SimpleXLogPageRead(XLogReaderState *xlogreader,
      54              :                                XLogRecPtr targetPagePtr,
      55              :                                int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
      56              : 
      57              : /*
      58              :  * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
      59              :  * index 'tliIndex' in target timeline history, until 'endpoint'. Make note of
      60              :  * the data blocks touched by the WAL records, and return them in a page map.
      61              :  *
      62              :  * 'endpoint' is the end of the last record to read. The record starting at
      63              :  * 'endpoint' is the first one that is not read.
      64              :  */
      65              : void
      66           15 : extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
      67              :                XLogRecPtr endpoint, const char *restoreCommand)
      68              : {
      69              :     XLogRecord *record;
      70              :     XLogReaderState *xlogreader;
      71              :     char       *errormsg;
      72              :     XLogPageReadPrivate private;
      73              : 
      74           15 :     private.tliIndex = tliIndex;
      75           15 :     private.restoreCommand = restoreCommand;
      76           15 :     xlogreader = XLogReaderAllocate(WalSegSz, datadir,
      77           15 :                                     XL_ROUTINE(.page_read = &SimpleXLogPageRead),
      78              :                                     &private);
      79           15 :     if (xlogreader == NULL)
      80            0 :         pg_fatal("out of memory while allocating a WAL reading processor");
      81              : 
      82           15 :     XLogBeginRead(xlogreader, startpoint);
      83              :     do
      84              :     {
      85        86491 :         record = XLogReadRecord(xlogreader, &errormsg);
      86              : 
      87        86491 :         if (record == NULL)
      88              :         {
      89            0 :             XLogRecPtr  errptr = xlogreader->EndRecPtr;
      90              : 
      91            0 :             if (errormsg)
      92            0 :                 pg_fatal("could not read WAL record at %X/%08X: %s",
      93              :                          LSN_FORMAT_ARGS(errptr),
      94              :                          errormsg);
      95              :             else
      96            0 :                 pg_fatal("could not read WAL record at %X/%08X",
      97              :                          LSN_FORMAT_ARGS(errptr));
      98              :         }
      99              : 
     100        86491 :         extractPageInfo(xlogreader);
     101        86491 :     } while (xlogreader->EndRecPtr < endpoint);
     102              : 
     103              :     /*
     104              :      * If 'endpoint' didn't point exactly at a record boundary, the caller
     105              :      * messed up.
     106              :      */
     107           15 :     if (xlogreader->EndRecPtr != endpoint)
     108            0 :         pg_fatal("end pointer %X/%08X is not a valid end point; expected %X/%08X",
     109              :                  LSN_FORMAT_ARGS(endpoint), LSN_FORMAT_ARGS(xlogreader->EndRecPtr));
     110              : 
     111           15 :     XLogReaderFree(xlogreader);
     112           15 :     if (xlogreadfd != -1)
     113              :     {
     114           15 :         close(xlogreadfd);
     115           15 :         xlogreadfd = -1;
     116              :     }
     117           15 : }
     118              : 
     119              : /*
     120              :  * Reads one WAL record. Returns the end position of the record, without
     121              :  * doing anything with the record itself.
     122              :  */
     123              : XLogRecPtr
     124           15 : readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex,
     125              :               const char *restoreCommand)
     126              : {
     127              :     XLogRecord *record;
     128              :     XLogReaderState *xlogreader;
     129              :     char       *errormsg;
     130              :     XLogPageReadPrivate private;
     131              :     XLogRecPtr  endptr;
     132              : 
     133           15 :     private.tliIndex = tliIndex;
     134           15 :     private.restoreCommand = restoreCommand;
     135           15 :     xlogreader = XLogReaderAllocate(WalSegSz, datadir,
     136           15 :                                     XL_ROUTINE(.page_read = &SimpleXLogPageRead),
     137              :                                     &private);
     138           15 :     if (xlogreader == NULL)
     139            0 :         pg_fatal("out of memory while allocating a WAL reading processor");
     140              : 
     141           15 :     XLogBeginRead(xlogreader, ptr);
     142           15 :     record = XLogReadRecord(xlogreader, &errormsg);
     143           15 :     if (record == NULL)
     144              :     {
     145            0 :         if (errormsg)
     146            0 :             pg_fatal("could not read WAL record at %X/%08X: %s",
     147              :                      LSN_FORMAT_ARGS(ptr), errormsg);
     148              :         else
     149            0 :             pg_fatal("could not read WAL record at %X/%08X",
     150              :                      LSN_FORMAT_ARGS(ptr));
     151              :     }
     152           15 :     endptr = xlogreader->EndRecPtr;
     153              : 
     154           15 :     XLogReaderFree(xlogreader);
     155           15 :     if (xlogreadfd != -1)
     156              :     {
     157           15 :         close(xlogreadfd);
     158           15 :         xlogreadfd = -1;
     159              :     }
     160              : 
     161           15 :     return endptr;
     162              : }
     163              : 
     164              : /*
     165              :  * Find the previous checkpoint preceding given WAL location.
     166              :  */
     167              : void
     168           15 : findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
     169              :                    XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
     170              :                    XLogRecPtr *lastchkptredo, const char *restoreCommand)
     171              : {
     172              :     /* Walk backwards, starting from the given record */
     173              :     XLogRecord *record;
     174              :     XLogRecPtr  searchptr;
     175              :     XLogReaderState *xlogreader;
     176              :     char       *errormsg;
     177              :     XLogPageReadPrivate private;
     178           15 :     XLogSegNo   current_segno = 0;
     179           15 :     TimeLineID  current_tli = 0;
     180              : 
     181              :     /*
     182              :      * The given fork pointer points to the end of the last common record,
     183              :      * which is not necessarily the beginning of the next record, if the
     184              :      * previous record happens to end at a page boundary. Skip over the page
     185              :      * header in that case to find the next record.
     186              :      */
     187           15 :     if (forkptr % XLOG_BLCKSZ == 0)
     188              :     {
     189            1 :         if (XLogSegmentOffset(forkptr, WalSegSz) == 0)
     190            1 :             forkptr += SizeOfXLogLongPHD;
     191              :         else
     192            0 :             forkptr += SizeOfXLogShortPHD;
     193              :     }
     194              : 
     195           15 :     private.tliIndex = tliIndex;
     196           15 :     private.restoreCommand = restoreCommand;
     197           15 :     xlogreader = XLogReaderAllocate(WalSegSz, datadir,
     198           15 :                                     XL_ROUTINE(.page_read = &SimpleXLogPageRead),
     199              :                                     &private);
     200           15 :     if (xlogreader == NULL)
     201            0 :         pg_fatal("out of memory while allocating a WAL reading processor");
     202              : 
     203           15 :     searchptr = forkptr;
     204              :     for (;;)
     205         2649 :     {
     206              :         uint8       info;
     207              : 
     208         2664 :         XLogBeginRead(xlogreader, searchptr);
     209         2664 :         record = XLogReadRecord(xlogreader, &errormsg);
     210              : 
     211         2664 :         if (record == NULL)
     212              :         {
     213            0 :             if (errormsg)
     214            0 :                 pg_fatal("could not find previous WAL record at %X/%08X: %s",
     215              :                          LSN_FORMAT_ARGS(searchptr),
     216              :                          errormsg);
     217              :             else
     218            0 :                 pg_fatal("could not find previous WAL record at %X/%08X",
     219              :                          LSN_FORMAT_ARGS(searchptr));
     220              :         }
     221              : 
     222              :         /* Detect if a new WAL file has been opened */
     223         2664 :         if (xlogreader->seg.ws_tli != current_tli ||
     224         2649 :             xlogreader->seg.ws_segno != current_segno)
     225              :         {
     226              :             char        xlogfname[MAXFNAMELEN];
     227              : 
     228           20 :             snprintf(xlogfname, MAXFNAMELEN, XLOGDIR "/");
     229              : 
     230              :             /* update current values */
     231           20 :             current_tli = xlogreader->seg.ws_tli;
     232           20 :             current_segno = xlogreader->seg.ws_segno;
     233              : 
     234           20 :             XLogFileName(xlogfname + sizeof(XLOGDIR),
     235              :                          current_tli, current_segno, WalSegSz);
     236              : 
     237              :             /* Track this filename as one to not remove */
     238           20 :             keepwal_add_entry(xlogfname);
     239              :         }
     240              : 
     241              :         /*
     242              :          * Check if it is a checkpoint record. This checkpoint record needs to
     243              :          * be the latest checkpoint before WAL forked and not the checkpoint
     244              :          * where the primary has been stopped to be rewound.
     245              :          */
     246         2664 :         info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
     247         2664 :         if (searchptr < forkptr &&
     248         2649 :             XLogRecGetRmid(xlogreader) == RM_XLOG_ID &&
     249         1920 :             (info == XLOG_CHECKPOINT_SHUTDOWN ||
     250              :              info == XLOG_CHECKPOINT_ONLINE))
     251              :         {
     252              :             CheckPoint  checkPoint;
     253              : 
     254           15 :             memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
     255           15 :             *lastchkptrec = searchptr;
     256           15 :             *lastchkpttli = checkPoint.ThisTimeLineID;
     257           15 :             *lastchkptredo = checkPoint.redo;
     258           15 :             break;
     259              :         }
     260              : 
     261              :         /* Walk backwards to previous record. */
     262         2649 :         searchptr = record->xl_prev;
     263              :     }
     264              : 
     265           15 :     XLogReaderFree(xlogreader);
     266           15 :     if (xlogreadfd != -1)
     267              :     {
     268           15 :         close(xlogreadfd);
     269           15 :         xlogreadfd = -1;
     270              :     }
     271           15 : }
     272              : 
     273              : /* XLogReader callback function, to read a WAL page */
     274              : static int
     275         5923 : SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
     276              :                    int reqLen, XLogRecPtr targetRecPtr, char *readBuf)
     277              : {
     278         5923 :     XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data;
     279              :     uint32      targetPageOff;
     280              :     XLogRecPtr  targetSegEnd;
     281              :     XLogSegNo   targetSegNo;
     282              :     int         r;
     283              : 
     284         5923 :     XLByteToSeg(targetPagePtr, targetSegNo, WalSegSz);
     285         5923 :     XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, WalSegSz, targetSegEnd);
     286         5923 :     targetPageOff = XLogSegmentOffset(targetPagePtr, WalSegSz);
     287              : 
     288              :     /*
     289              :      * See if we need to switch to a new segment because the requested record
     290              :      * is not in the currently open one.
     291              :      */
     292         5923 :     if (xlogreadfd >= 0 &&
     293         5878 :         !XLByteInSeg(targetPagePtr, xlogreadsegno, WalSegSz))
     294              :     {
     295           10 :         close(xlogreadfd);
     296           10 :         xlogreadfd = -1;
     297              :     }
     298              : 
     299         5923 :     XLByteToSeg(targetPagePtr, xlogreadsegno, WalSegSz);
     300              : 
     301         5923 :     if (xlogreadfd < 0)
     302              :     {
     303              :         char        xlogfname[MAXFNAMELEN];
     304              : 
     305              :         /*
     306              :          * Since incomplete segments are copied into next timelines, switch to
     307              :          * the timeline holding the required segment. Assuming this scan can
     308              :          * be done both forward and backward, consider also switching timeline
     309              :          * accordingly.
     310              :          */
     311           57 :         while (private->tliIndex < targetNentries - 1 &&
     312            2 :                targetHistory[private->tliIndex].end < targetSegEnd)
     313            2 :             private->tliIndex++;
     314           55 :         while (private->tliIndex > 0 &&
     315            6 :                targetHistory[private->tliIndex].begin >= targetSegEnd)
     316            0 :             private->tliIndex--;
     317              : 
     318           55 :         XLogFileName(xlogfname, targetHistory[private->tliIndex].tli,
     319              :                      xlogreadsegno, WalSegSz);
     320              : 
     321           55 :         snprintf(xlogfpath, MAXPGPATH, "%s/" XLOGDIR "/%s",
     322           55 :                  xlogreader->segcxt.ws_dir, xlogfname);
     323              : 
     324           55 :         xlogreadfd = open(xlogfpath, O_RDONLY | PG_BINARY, 0);
     325              : 
     326           55 :         if (xlogreadfd < 0)
     327              :         {
     328              :             /*
     329              :              * If we have no restore_command to execute, then exit.
     330              :              */
     331            1 :             if (private->restoreCommand == NULL)
     332              :             {
     333            0 :                 pg_log_error("could not open file \"%s\": %m", xlogfpath);
     334            0 :                 return -1;
     335              :             }
     336              : 
     337              :             /*
     338              :              * Since we have restore_command, then try to retrieve missing WAL
     339              :              * file from the archive.
     340              :              */
     341            1 :             xlogreadfd = RestoreArchivedFile(xlogreader->segcxt.ws_dir,
     342              :                                              xlogfname,
     343              :                                              WalSegSz,
     344              :                                              private->restoreCommand);
     345              : 
     346            1 :             if (xlogreadfd < 0)
     347            0 :                 return -1;
     348              :             else
     349            1 :                 pg_log_debug("using file \"%s\" restored from archive",
     350              :                              xlogfpath);
     351              :         }
     352              :     }
     353              : 
     354              :     /*
     355              :      * At this point, we have the right segment open.
     356              :      */
     357              :     Assert(xlogreadfd != -1);
     358              : 
     359              :     /* Read the requested page */
     360         5923 :     if (lseek(xlogreadfd, (off_t) targetPageOff, SEEK_SET) < 0)
     361              :     {
     362            0 :         pg_log_error("could not seek in file \"%s\": %m", xlogfpath);
     363            0 :         return -1;
     364              :     }
     365              : 
     366              : 
     367         5923 :     r = read(xlogreadfd, readBuf, XLOG_BLCKSZ);
     368         5923 :     if (r != XLOG_BLCKSZ)
     369              :     {
     370            0 :         if (r < 0)
     371            0 :             pg_log_error("could not read file \"%s\": %m", xlogfpath);
     372              :         else
     373            0 :             pg_log_error("could not read file \"%s\": read %d of %zu",
     374              :                          xlogfpath, r, (Size) XLOG_BLCKSZ);
     375              : 
     376            0 :         return -1;
     377              :     }
     378              : 
     379              :     Assert(targetSegNo == xlogreadsegno);
     380              : 
     381         5923 :     xlogreader->seg.ws_tli = targetHistory[private->tliIndex].tli;
     382         5923 :     return XLOG_BLCKSZ;
     383              : }
     384              : 
     385              : /*
     386              :  * Extract information on which blocks the current record modifies.
     387              :  */
     388              : static void
     389        86491 : extractPageInfo(XLogReaderState *record)
     390              : {
     391              :     int         block_id;
     392        86491 :     RmgrId      rmid = XLogRecGetRmid(record);
     393        86491 :     uint8       info = XLogRecGetInfo(record);
     394        86491 :     uint8       rminfo = info & ~XLR_INFO_MASK;
     395              : 
     396              :     /* Is this a special record type that I recognize? */
     397              : 
     398        86491 :     if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE_FILE_COPY)
     399              :     {
     400              :         /*
     401              :          * New databases can be safely ignored. It won't be present in the
     402              :          * source system, so it will be deleted. There's one corner-case,
     403              :          * though: if a new, different, database is also created in the source
     404              :          * system, we'll see that the files already exist and not copy them.
     405              :          * That's OK, though; WAL replay of creating the new database, from
     406              :          * the source systems's WAL, will re-copy the new database,
     407              :          * overwriting the database created in the target system.
     408              :          */
     409              :     }
     410        86491 :     else if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE_WAL_LOG)
     411              :     {
     412              :         /*
     413              :          * New databases can be safely ignored. It won't be present in the
     414              :          * source system, so it will be deleted.
     415              :          */
     416              :     }
     417        86487 :     else if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_DROP)
     418              :     {
     419              :         /*
     420              :          * An existing database was dropped. We'll see that the files don't
     421              :          * exist in the target data dir, and copy them in toto from the source
     422              :          * system. No need to do anything special here.
     423              :          */
     424              :     }
     425        86487 :     else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_CREATE)
     426              :     {
     427              :         /*
     428              :          * We can safely ignore these. The file will be removed from the
     429              :          * target, if it doesn't exist in source system. If a file with same
     430              :          * name is created in source system, too, there will be WAL records
     431              :          * for all the blocks in it.
     432              :          */
     433              :     }
     434        85290 :     else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_TRUNCATE)
     435              :     {
     436              :         /*
     437              :          * We can safely ignore these. When we compare the sizes later on,
     438              :          * we'll notice that they differ, and copy the missing tail from
     439              :          * source system.
     440              :          */
     441              :     }
     442        85286 :     else if (rmid == RM_XACT_ID &&
     443           43 :              ((rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_COMMIT ||
     444            0 :               (rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_COMMIT_PREPARED ||
     445            0 :               (rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_ABORT ||
     446            0 :               (rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_ABORT_PREPARED))
     447              :     {
     448              :         /*
     449              :          * These records can include "dropped rels". We can safely ignore
     450              :          * them, we will see that they are missing and copy them from the
     451              :          * source.
     452              :          */
     453              :     }
     454        85243 :     else if (info & XLR_SPECIAL_REL_UPDATE)
     455              :     {
     456              :         /*
     457              :          * This record type modifies a relation file in some special way, but
     458              :          * we don't recognize the type. That's bad - we don't know how to
     459              :          * track that change.
     460              :          */
     461            0 :         pg_fatal("WAL record modifies a relation, but record type is not recognized:\n"
     462              :                  "lsn: %X/%08X, rmid: %d, rmgr: %s, info: %02X",
     463              :                  LSN_FORMAT_ARGS(record->ReadRecPtr),
     464              :                  rmid, RmgrName(rmid), info);
     465              :     }
     466              : 
     467       171990 :     for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
     468              :     {
     469              :         RelFileLocator rlocator;
     470              :         ForkNumber  forknum;
     471              :         BlockNumber blkno;
     472              : 
     473        85499 :         if (!XLogRecGetBlockTagExtended(record, block_id,
     474              :                                         &rlocator, &forknum, &blkno, NULL))
     475          948 :             continue;
     476              : 
     477              :         /* We only care about the main fork; others are copied in toto */
     478        85499 :         if (forknum != MAIN_FORKNUM)
     479          948 :             continue;
     480              : 
     481        84551 :         process_target_wal_block_change(forknum, rlocator, blkno);
     482              :     }
     483        86491 : }
        

Generated by: LCOV version 2.0-1