LCOV - code coverage report
Current view: top level - src/bin/pg_rewind - parsexlog.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 110 139 79.1 %
Date: 2024-09-08 23:12:01 Functions: 5 5 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * parsexlog.c
       4             :  *    Functions for reading Write-Ahead-Log
       5             :  *
       6             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *-------------------------------------------------------------------------
      10             :  */
      11             : 
      12             : #include "postgres_fe.h"
      13             : 
      14             : #include <unistd.h>
      15             : 
      16             : #include "access/rmgr.h"
      17             : #include "access/xact.h"
      18             : #include "access/xlog_internal.h"
      19             : #include "access/xlogreader.h"
      20             : #include "catalog/pg_control.h"
      21             : #include "catalog/storage_xlog.h"
      22             : #include "commands/dbcommands_xlog.h"
      23             : #include "fe_utils/archive.h"
      24             : #include "filemap.h"
      25             : #include "pg_rewind.h"
      26             : 
      27             : /*
      28             :  * RmgrNames is an array of the built-in resource manager names, to make error
      29             :  * messages a bit nicer.
      30             :  */
      31             : #define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask,decode) \
      32             :   name,
      33             : 
      34             : static const char *const RmgrNames[RM_MAX_ID + 1] = {
      35             : #include "access/rmgrlist.h"
      36             : };
      37             : 
      38             : #define RmgrName(rmid) (((rmid) <= RM_MAX_BUILTIN_ID) ? \
      39             :                         RmgrNames[rmid] : "custom")
      40             : 
      41             : static void extractPageInfo(XLogReaderState *record);
      42             : 
      43             : static int  xlogreadfd = -1;
      44             : static XLogSegNo xlogreadsegno = 0;
      45             : static char xlogfpath[MAXPGPATH];
      46             : 
      47             : typedef struct XLogPageReadPrivate
      48             : {
      49             :     const char *restoreCommand;
      50             :     int         tliIndex;
      51             : } XLogPageReadPrivate;
      52             : 
      53             : static int  SimpleXLogPageRead(XLogReaderState *xlogreader,
      54             :                                XLogRecPtr targetPagePtr,
      55             :                                int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
      56             : 
      57             : /*
      58             :  * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
      59             :  * index 'tliIndex' in target timeline history, until 'endpoint'. Make note of
      60             :  * the data blocks touched by the WAL records, and return them in a page map.
      61             :  *
      62             :  * 'endpoint' is the end of the last record to read. The record starting at
      63             :  * 'endpoint' is the first one that is not read.
      64             :  */
      65             : void
      66          26 : extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
      67             :                XLogRecPtr endpoint, const char *restoreCommand)
      68             : {
      69             :     XLogRecord *record;
      70             :     XLogReaderState *xlogreader;
      71             :     char       *errormsg;
      72             :     XLogPageReadPrivate private;
      73             : 
      74          26 :     private.tliIndex = tliIndex;
      75          26 :     private.restoreCommand = restoreCommand;
      76          26 :     xlogreader = XLogReaderAllocate(WalSegSz, datadir,
      77          26 :                                     XL_ROUTINE(.page_read = &SimpleXLogPageRead),
      78             :                                     &private);
      79          26 :     if (xlogreader == NULL)
      80           0 :         pg_fatal("out of memory while allocating a WAL reading processor");
      81             : 
      82          26 :     XLogBeginRead(xlogreader, startpoint);
      83             :     do
      84             :     {
      85      172604 :         record = XLogReadRecord(xlogreader, &errormsg);
      86             : 
      87      172604 :         if (record == NULL)
      88             :         {
      89           0 :             XLogRecPtr  errptr = xlogreader->EndRecPtr;
      90             : 
      91           0 :             if (errormsg)
      92           0 :                 pg_fatal("could not read WAL record at %X/%X: %s",
      93             :                          LSN_FORMAT_ARGS(errptr),
      94             :                          errormsg);
      95             :             else
      96           0 :                 pg_fatal("could not read WAL record at %X/%X",
      97             :                          LSN_FORMAT_ARGS(errptr));
      98             :         }
      99             : 
     100      172604 :         extractPageInfo(xlogreader);
     101      172604 :     } while (xlogreader->EndRecPtr < endpoint);
     102             : 
     103             :     /*
     104             :      * If 'endpoint' didn't point exactly at a record boundary, the caller
     105             :      * messed up.
     106             :      */
     107          26 :     if (xlogreader->EndRecPtr != endpoint)
     108           0 :         pg_fatal("end pointer %X/%X is not a valid end point; expected %X/%X",
     109             :                  LSN_FORMAT_ARGS(endpoint), LSN_FORMAT_ARGS(xlogreader->EndRecPtr));
     110             : 
     111          26 :     XLogReaderFree(xlogreader);
     112          26 :     if (xlogreadfd != -1)
     113             :     {
     114          26 :         close(xlogreadfd);
     115          26 :         xlogreadfd = -1;
     116             :     }
     117          26 : }
     118             : 
     119             : /*
     120             :  * Reads one WAL record. Returns the end position of the record, without
     121             :  * doing anything with the record itself.
     122             :  */
     123             : XLogRecPtr
     124          26 : readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex,
     125             :               const char *restoreCommand)
     126             : {
     127             :     XLogRecord *record;
     128             :     XLogReaderState *xlogreader;
     129             :     char       *errormsg;
     130             :     XLogPageReadPrivate private;
     131             :     XLogRecPtr  endptr;
     132             : 
     133          26 :     private.tliIndex = tliIndex;
     134          26 :     private.restoreCommand = restoreCommand;
     135          26 :     xlogreader = XLogReaderAllocate(WalSegSz, datadir,
     136          26 :                                     XL_ROUTINE(.page_read = &SimpleXLogPageRead),
     137             :                                     &private);
     138          26 :     if (xlogreader == NULL)
     139           0 :         pg_fatal("out of memory while allocating a WAL reading processor");
     140             : 
     141          26 :     XLogBeginRead(xlogreader, ptr);
     142          26 :     record = XLogReadRecord(xlogreader, &errormsg);
     143          26 :     if (record == NULL)
     144             :     {
     145           0 :         if (errormsg)
     146           0 :             pg_fatal("could not read WAL record at %X/%X: %s",
     147             :                      LSN_FORMAT_ARGS(ptr), errormsg);
     148             :         else
     149           0 :             pg_fatal("could not read WAL record at %X/%X",
     150             :                      LSN_FORMAT_ARGS(ptr));
     151             :     }
     152          26 :     endptr = xlogreader->EndRecPtr;
     153             : 
     154          26 :     XLogReaderFree(xlogreader);
     155          26 :     if (xlogreadfd != -1)
     156             :     {
     157          26 :         close(xlogreadfd);
     158          26 :         xlogreadfd = -1;
     159             :     }
     160             : 
     161          26 :     return endptr;
     162             : }
     163             : 
     164             : /*
     165             :  * Find the previous checkpoint preceding given WAL location.
     166             :  */
     167             : void
     168          26 : findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
     169             :                    XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
     170             :                    XLogRecPtr *lastchkptredo, const char *restoreCommand)
     171             : {
     172             :     /* Walk backwards, starting from the given record */
     173             :     XLogRecord *record;
     174             :     XLogRecPtr  searchptr;
     175             :     XLogReaderState *xlogreader;
     176             :     char       *errormsg;
     177             :     XLogPageReadPrivate private;
     178             : 
     179             :     /*
     180             :      * The given fork pointer points to the end of the last common record,
     181             :      * which is not necessarily the beginning of the next record, if the
     182             :      * previous record happens to end at a page boundary. Skip over the page
     183             :      * header in that case to find the next record.
     184             :      */
     185          26 :     if (forkptr % XLOG_BLCKSZ == 0)
     186             :     {
     187           4 :         if (XLogSegmentOffset(forkptr, WalSegSz) == 0)
     188           4 :             forkptr += SizeOfXLogLongPHD;
     189             :         else
     190           0 :             forkptr += SizeOfXLogShortPHD;
     191             :     }
     192             : 
     193          26 :     private.tliIndex = tliIndex;
     194          26 :     private.restoreCommand = restoreCommand;
     195          26 :     xlogreader = XLogReaderAllocate(WalSegSz, datadir,
     196          26 :                                     XL_ROUTINE(.page_read = &SimpleXLogPageRead),
     197             :                                     &private);
     198          26 :     if (xlogreader == NULL)
     199           0 :         pg_fatal("out of memory while allocating a WAL reading processor");
     200             : 
     201          26 :     searchptr = forkptr;
     202             :     for (;;)
     203        5074 :     {
     204             :         uint8       info;
     205             : 
     206        5100 :         XLogBeginRead(xlogreader, searchptr);
     207        5100 :         record = XLogReadRecord(xlogreader, &errormsg);
     208             : 
     209        5100 :         if (record == NULL)
     210             :         {
     211           0 :             if (errormsg)
     212           0 :                 pg_fatal("could not find previous WAL record at %X/%X: %s",
     213             :                          LSN_FORMAT_ARGS(searchptr),
     214             :                          errormsg);
     215             :             else
     216           0 :                 pg_fatal("could not find previous WAL record at %X/%X",
     217             :                          LSN_FORMAT_ARGS(searchptr));
     218             :         }
     219             : 
     220             :         /*
     221             :          * Check if it is a checkpoint record. This checkpoint record needs to
     222             :          * be the latest checkpoint before WAL forked and not the checkpoint
     223             :          * where the primary has been stopped to be rewound.
     224             :          */
     225        5100 :         info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
     226        5100 :         if (searchptr < forkptr &&
     227        5074 :             XLogRecGetRmid(xlogreader) == RM_XLOG_ID &&
     228        3706 :             (info == XLOG_CHECKPOINT_SHUTDOWN ||
     229             :              info == XLOG_CHECKPOINT_ONLINE))
     230             :         {
     231             :             CheckPoint  checkPoint;
     232             : 
     233          26 :             memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
     234          26 :             *lastchkptrec = searchptr;
     235          26 :             *lastchkpttli = checkPoint.ThisTimeLineID;
     236          26 :             *lastchkptredo = checkPoint.redo;
     237          26 :             break;
     238             :         }
     239             : 
     240             :         /* Walk backwards to previous record. */
     241        5074 :         searchptr = record->xl_prev;
     242             :     }
     243             : 
     244          26 :     XLogReaderFree(xlogreader);
     245          26 :     if (xlogreadfd != -1)
     246             :     {
     247          26 :         close(xlogreadfd);
     248          26 :         xlogreadfd = -1;
     249             :     }
     250          26 : }
     251             : 
     252             : /* XLogReader callback function, to read a WAL page */
     253             : static int
     254       11330 : SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
     255             :                    int reqLen, XLogRecPtr targetRecPtr, char *readBuf)
     256             : {
     257       11330 :     XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data;
     258             :     uint32      targetPageOff;
     259             :     XLogRecPtr  targetSegEnd;
     260             :     XLogSegNo   targetSegNo;
     261             :     int         r;
     262             : 
     263       11330 :     XLByteToSeg(targetPagePtr, targetSegNo, WalSegSz);
     264       11330 :     XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, WalSegSz, targetSegEnd);
     265       11330 :     targetPageOff = XLogSegmentOffset(targetPagePtr, WalSegSz);
     266             : 
     267             :     /*
     268             :      * See if we need to switch to a new segment because the requested record
     269             :      * is not in the currently open one.
     270             :      */
     271       11330 :     if (xlogreadfd >= 0 &&
     272       11252 :         !XLByteInSeg(targetPagePtr, xlogreadsegno, WalSegSz))
     273             :     {
     274          16 :         close(xlogreadfd);
     275          16 :         xlogreadfd = -1;
     276             :     }
     277             : 
     278       11330 :     XLByteToSeg(targetPagePtr, xlogreadsegno, WalSegSz);
     279             : 
     280       11330 :     if (xlogreadfd < 0)
     281             :     {
     282             :         char        xlogfname[MAXFNAMELEN];
     283             : 
     284             :         /*
     285             :          * Since incomplete segments are copied into next timelines, switch to
     286             :          * the timeline holding the required segment. Assuming this scan can
     287             :          * be done both forward and backward, consider also switching timeline
     288             :          * accordingly.
     289             :          */
     290          98 :         while (private->tliIndex < targetNentries - 1 &&
     291           4 :                targetHistory[private->tliIndex].end < targetSegEnd)
     292           4 :             private->tliIndex++;
     293          94 :         while (private->tliIndex > 0 &&
     294          12 :                targetHistory[private->tliIndex].begin >= targetSegEnd)
     295           0 :             private->tliIndex--;
     296             : 
     297          94 :         XLogFileName(xlogfname, targetHistory[private->tliIndex].tli,
     298             :                      xlogreadsegno, WalSegSz);
     299             : 
     300          94 :         snprintf(xlogfpath, MAXPGPATH, "%s/" XLOGDIR "/%s",
     301          94 :                  xlogreader->segcxt.ws_dir, xlogfname);
     302             : 
     303          94 :         xlogreadfd = open(xlogfpath, O_RDONLY | PG_BINARY, 0);
     304             : 
     305          94 :         if (xlogreadfd < 0)
     306             :         {
     307             :             /*
     308             :              * If we have no restore_command to execute, then exit.
     309             :              */
     310           2 :             if (private->restoreCommand == NULL)
     311             :             {
     312           0 :                 pg_log_error("could not open file \"%s\": %m", xlogfpath);
     313           0 :                 return -1;
     314             :             }
     315             : 
     316             :             /*
     317             :              * Since we have restore_command, then try to retrieve missing WAL
     318             :              * file from the archive.
     319             :              */
     320           2 :             xlogreadfd = RestoreArchivedFile(xlogreader->segcxt.ws_dir,
     321             :                                              xlogfname,
     322             :                                              WalSegSz,
     323             :                                              private->restoreCommand);
     324             : 
     325           2 :             if (xlogreadfd < 0)
     326           0 :                 return -1;
     327             :             else
     328           2 :                 pg_log_debug("using file \"%s\" restored from archive",
     329             :                              xlogfpath);
     330             :         }
     331             :     }
     332             : 
     333             :     /*
     334             :      * At this point, we have the right segment open.
     335             :      */
     336             :     Assert(xlogreadfd != -1);
     337             : 
     338             :     /* Read the requested page */
     339       11330 :     if (lseek(xlogreadfd, (off_t) targetPageOff, SEEK_SET) < 0)
     340             :     {
     341           0 :         pg_log_error("could not seek in file \"%s\": %m", xlogfpath);
     342           0 :         return -1;
     343             :     }
     344             : 
     345             : 
     346       11330 :     r = read(xlogreadfd, readBuf, XLOG_BLCKSZ);
     347       11330 :     if (r != XLOG_BLCKSZ)
     348             :     {
     349           0 :         if (r < 0)
     350           0 :             pg_log_error("could not read file \"%s\": %m", xlogfpath);
     351             :         else
     352           0 :             pg_log_error("could not read file \"%s\": read %d of %zu",
     353             :                          xlogfpath, r, (Size) XLOG_BLCKSZ);
     354             : 
     355           0 :         return -1;
     356             :     }
     357             : 
     358             :     Assert(targetSegNo == xlogreadsegno);
     359             : 
     360       11330 :     xlogreader->seg.ws_tli = targetHistory[private->tliIndex].tli;
     361       11330 :     return XLOG_BLCKSZ;
     362             : }
     363             : 
     364             : /*
     365             :  * Extract information on which blocks the current record modifies.
     366             :  */
     367             : static void
     368      172604 : extractPageInfo(XLogReaderState *record)
     369             : {
     370             :     int         block_id;
     371      172604 :     RmgrId      rmid = XLogRecGetRmid(record);
     372      172604 :     uint8       info = XLogRecGetInfo(record);
     373      172604 :     uint8       rminfo = info & ~XLR_INFO_MASK;
     374             : 
     375             :     /* Is this a special record type that I recognize? */
     376             : 
     377      172604 :     if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE_FILE_COPY)
     378             :     {
     379             :         /*
     380             :          * New databases can be safely ignored. It won't be present in the
     381             :          * source system, so it will be deleted. There's one corner-case,
     382             :          * though: if a new, different, database is also created in the source
     383             :          * system, we'll see that the files already exist and not copy them.
     384             :          * That's OK, though; WAL replay of creating the new database, from
     385             :          * the source systems's WAL, will re-copy the new database,
     386             :          * overwriting the database created in the target system.
     387             :          */
     388             :     }
     389      172604 :     else if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE_WAL_LOG)
     390             :     {
     391             :         /*
     392             :          * New databases can be safely ignored. It won't be present in the
     393             :          * source system, so it will be deleted.
     394             :          */
     395             :     }
     396      172596 :     else if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_DROP)
     397             :     {
     398             :         /*
     399             :          * An existing database was dropped. We'll see that the files don't
     400             :          * exist in the target data dir, and copy them in toto from the source
     401             :          * system. No need to do anything special here.
     402             :          */
     403             :     }
     404      172596 :     else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_CREATE)
     405             :     {
     406             :         /*
     407             :          * We can safely ignore these. The file will be removed from the
     408             :          * target, if it doesn't exist in source system. If a file with same
     409             :          * name is created in source system, too, there will be WAL records
     410             :          * for all the blocks in it.
     411             :          */
     412             :     }
     413      170220 :     else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_TRUNCATE)
     414             :     {
     415             :         /*
     416             :          * We can safely ignore these. When we compare the sizes later on,
     417             :          * we'll notice that they differ, and copy the missing tail from
     418             :          * source system.
     419             :          */
     420             :     }
     421      170212 :     else if (rmid == RM_XACT_ID &&
     422          82 :              ((rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_COMMIT ||
     423           0 :               (rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_COMMIT_PREPARED ||
     424           0 :               (rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_ABORT ||
     425           0 :               (rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_ABORT_PREPARED))
     426             :     {
     427             :         /*
     428             :          * These records can include "dropped rels". We can safely ignore
     429             :          * them, we will see that they are missing and copy them from the
     430             :          * source.
     431             :          */
     432             :     }
     433      170130 :     else if (info & XLR_SPECIAL_REL_UPDATE)
     434             :     {
     435             :         /*
     436             :          * This record type modifies a relation file in some special way, but
     437             :          * we don't recognize the type. That's bad - we don't know how to
     438             :          * track that change.
     439             :          */
     440           0 :         pg_fatal("WAL record modifies a relation, but record type is not recognized: "
     441             :                  "lsn: %X/%X, rmid: %d, rmgr: %s, info: %02X",
     442             :                  LSN_FORMAT_ARGS(record->ReadRecPtr),
     443             :                  rmid, RmgrName(rmid), info);
     444             :     }
     445             : 
     446      343286 :     for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
     447             :     {
     448             :         RelFileLocator rlocator;
     449             :         ForkNumber  forknum;
     450             :         BlockNumber blkno;
     451             : 
     452      170682 :         if (!XLogRecGetBlockTagExtended(record, block_id,
     453             :                                         &rlocator, &forknum, &blkno, NULL))
     454        1896 :             continue;
     455             : 
     456             :         /* We only care about the main fork; others are copied in toto */
     457      170682 :         if (forknum != MAIN_FORKNUM)
     458        1896 :             continue;
     459             : 
     460      168786 :         process_target_wal_block_change(forknum, rlocator, blkno);
     461             :     }
     462      172604 : }

Generated by: LCOV version 1.14