LCOV - code coverage report
Current view: top level - src/backend/access/nbtree - nbtxlog.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 476 493 96.6 %
Date: 2025-10-31 04:18:36 Functions: 17 17 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * nbtxlog.c
       4             :  *    WAL replay logic for btrees.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/nbtree/nbtxlog.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/bufmask.h"
      18             : #include "access/nbtree.h"
      19             : #include "access/nbtxlog.h"
      20             : #include "access/transam.h"
      21             : #include "access/xlogutils.h"
      22             : #include "storage/standby.h"
      23             : #include "utils/memutils.h"
      24             : 
      25             : static MemoryContext opCtx;     /* working memory for operations */
      26             : 
      27             : /*
      28             :  * _bt_restore_page -- re-enter all the index tuples on a page
      29             :  *
      30             :  * The page is freshly init'd, and *from (length len) is a copy of what
      31             :  * had been its upper part (pd_upper to pd_special).  We assume that the
      32             :  * tuples had been added to the page in item-number order, and therefore
      33             :  * the one with highest item number appears first (lowest on the page).
      34             :  */
      35             : static void
      36        3010 : _bt_restore_page(Page page, char *from, int len)
      37             : {
      38             :     IndexTupleData itupdata;
      39             :     Size        itemsz;
      40        3010 :     char       *end = from + len;
      41             :     void       *items[MaxIndexTuplesPerPage];
      42             :     uint16      itemsizes[MaxIndexTuplesPerPage];
      43             :     int         i;
      44             :     int         nitems;
      45             : 
      46             :     /*
      47             :      * To get the items back in the original order, we add them to the page in
      48             :      * reverse.  To figure out where one tuple ends and another begins, we
      49             :      * have to scan them in forward order first.
      50             :      */
      51        3010 :     i = 0;
      52      201914 :     while (from < end)
      53             :     {
      54             :         /*
      55             :          * As we step through the items, 'from' won't always be properly
      56             :          * aligned, so we need to use memcpy().  Further, we use void * here
      57             :          * for our items array for the same reason; wouldn't want the compiler
      58             :          * or anyone thinking that an item is aligned when it isn't.
      59             :          */
      60      198904 :         memcpy(&itupdata, from, sizeof(IndexTupleData));
      61      198904 :         itemsz = IndexTupleSize(&itupdata);
      62      198904 :         itemsz = MAXALIGN(itemsz);
      63             : 
      64      198904 :         items[i] = from;
      65      198904 :         itemsizes[i] = itemsz;
      66      198904 :         i++;
      67             : 
      68      198904 :         from += itemsz;
      69             :     }
      70        3010 :     nitems = i;
      71             : 
      72      201914 :     for (i = nitems - 1; i >= 0; i--)
      73             :     {
      74      198904 :         if (PageAddItem(page, items[i], itemsizes[i], nitems - i, false, false) == InvalidOffsetNumber)
      75           0 :             elog(PANIC, "_bt_restore_page: cannot add item to page");
      76             :     }
      77        3010 : }
      78             : 
      79             : static void
      80        1472 : _bt_restore_meta(XLogReaderState *record, uint8 block_id)
      81             : {
      82        1472 :     XLogRecPtr  lsn = record->EndRecPtr;
      83             :     Buffer      metabuf;
      84             :     Page        metapg;
      85             :     BTMetaPageData *md;
      86             :     BTPageOpaque pageop;
      87             :     xl_btree_metadata *xlrec;
      88             :     char       *ptr;
      89             :     Size        len;
      90             : 
      91        1472 :     metabuf = XLogInitBufferForRedo(record, block_id);
      92        1472 :     ptr = XLogRecGetBlockData(record, block_id, &len);
      93             : 
      94             :     Assert(len == sizeof(xl_btree_metadata));
      95             :     Assert(BufferGetBlockNumber(metabuf) == BTREE_METAPAGE);
      96        1472 :     xlrec = (xl_btree_metadata *) ptr;
      97        1472 :     metapg = BufferGetPage(metabuf);
      98             : 
      99        1472 :     _bt_pageinit(metapg, BufferGetPageSize(metabuf));
     100             : 
     101        1472 :     md = BTPageGetMeta(metapg);
     102        1472 :     md->btm_magic = BTREE_MAGIC;
     103        1472 :     md->btm_version = xlrec->version;
     104        1472 :     md->btm_root = xlrec->root;
     105        1472 :     md->btm_level = xlrec->level;
     106        1472 :     md->btm_fastroot = xlrec->fastroot;
     107        1472 :     md->btm_fastlevel = xlrec->fastlevel;
     108             :     /* Cannot log BTREE_MIN_VERSION index metapage without upgrade */
     109             :     Assert(md->btm_version >= BTREE_NOVAC_VERSION);
     110        1472 :     md->btm_last_cleanup_num_delpages = xlrec->last_cleanup_num_delpages;
     111        1472 :     md->btm_last_cleanup_num_heap_tuples = -1.0;
     112        1472 :     md->btm_allequalimage = xlrec->allequalimage;
     113             : 
     114        1472 :     pageop = BTPageGetOpaque(metapg);
     115        1472 :     pageop->btpo_flags = BTP_META;
     116             : 
     117             :     /*
     118             :      * Set pd_lower just past the end of the metadata.  This is essential,
     119             :      * because without doing so, metadata will be lost if xlog.c compresses
     120             :      * the page.
     121             :      */
     122        1472 :     ((PageHeader) metapg)->pd_lower =
     123        1472 :         ((char *) md + sizeof(BTMetaPageData)) - (char *) metapg;
     124             : 
     125        1472 :     PageSetLSN(metapg, lsn);
     126        1472 :     MarkBufferDirty(metabuf);
     127        1472 :     UnlockReleaseBuffer(metabuf);
     128        1472 : }
     129             : 
     130             : /*
     131             :  * _bt_clear_incomplete_split -- clear INCOMPLETE_SPLIT flag on a page
     132             :  *
     133             :  * This is a common subroutine of the redo functions of all the WAL record
     134             :  * types that can insert a downlink: insert, split, and newroot.
     135             :  */
     136             : static void
     137        2916 : _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
     138             : {
     139        2916 :     XLogRecPtr  lsn = record->EndRecPtr;
     140             :     Buffer      buf;
     141             : 
     142        2916 :     if (XLogReadBufferForRedo(record, block_id, &buf) == BLK_NEEDS_REDO)
     143             :     {
     144        2916 :         Page        page = BufferGetPage(buf);
     145        2916 :         BTPageOpaque pageop = BTPageGetOpaque(page);
     146             : 
     147             :         Assert(P_INCOMPLETE_SPLIT(pageop));
     148        2916 :         pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
     149             : 
     150        2916 :         PageSetLSN(page, lsn);
     151        2916 :         MarkBufferDirty(buf);
     152             :     }
     153        2916 :     if (BufferIsValid(buf))
     154        2916 :         UnlockReleaseBuffer(buf);
     155        2916 : }
     156             : 
     157             : static void
     158     1065182 : btree_xlog_insert(bool isleaf, bool ismeta, bool posting,
     159             :                   XLogReaderState *record)
     160             : {
     161     1065182 :     XLogRecPtr  lsn = record->EndRecPtr;
     162     1065182 :     xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
     163             :     Buffer      buffer;
     164             :     Page        page;
     165             : 
     166             :     /*
     167             :      * Insertion to an internal page finishes an incomplete split at the child
     168             :      * level.  Clear the incomplete-split flag in the child.  Note: during
     169             :      * normal operation, the child and parent pages are locked at the same
     170             :      * time (the locks are coupled), so that clearing the flag and inserting
     171             :      * the downlink appear atomic to other backends.  We don't bother with
     172             :      * that during replay, because readers don't care about the
     173             :      * incomplete-split flag and there cannot be updates happening.
     174             :      */
     175     1065182 :     if (!isleaf)
     176        2776 :         _bt_clear_incomplete_split(record, 1);
     177     1065182 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
     178             :     {
     179             :         Size        datalen;
     180     1046210 :         char       *datapos = XLogRecGetBlockData(record, 0, &datalen);
     181             : 
     182     1046210 :         page = BufferGetPage(buffer);
     183             : 
     184     1046210 :         if (!posting)
     185             :         {
     186             :             /* Simple retail insertion */
     187     1039510 :             if (PageAddItem(page, datapos, datalen, xlrec->offnum, false, false) == InvalidOffsetNumber)
     188           0 :                 elog(PANIC, "failed to add new item");
     189             :         }
     190             :         else
     191             :         {
     192             :             ItemId      itemid;
     193             :             IndexTuple  oposting,
     194             :                         newitem,
     195             :                         nposting;
     196             :             uint16      postingoff;
     197             : 
     198             :             /*
     199             :              * A posting list split occurred during leaf page insertion.  WAL
     200             :              * record data will start with an offset number representing the
     201             :              * point in an existing posting list that a split occurs at.
     202             :              *
     203             :              * Use _bt_swap_posting() to repeat posting list split steps from
     204             :              * primary.  Note that newitem from WAL record is 'orignewitem',
     205             :              * not the final version of newitem that is actually inserted on
     206             :              * page.
     207             :              */
     208        6700 :             postingoff = *((uint16 *) datapos);
     209        6700 :             datapos += sizeof(uint16);
     210        6700 :             datalen -= sizeof(uint16);
     211             : 
     212        6700 :             itemid = PageGetItemId(page, OffsetNumberPrev(xlrec->offnum));
     213        6700 :             oposting = (IndexTuple) PageGetItem(page, itemid);
     214             : 
     215             :             /* Use mutable, aligned newitem copy in _bt_swap_posting() */
     216             :             Assert(isleaf && postingoff > 0);
     217        6700 :             newitem = CopyIndexTuple((IndexTuple) datapos);
     218        6700 :             nposting = _bt_swap_posting(newitem, oposting, postingoff);
     219             : 
     220             :             /* Replace existing posting list with post-split version */
     221        6700 :             memcpy(oposting, nposting, MAXALIGN(IndexTupleSize(nposting)));
     222             : 
     223             :             /* Insert "final" new item (not orignewitem from WAL stream) */
     224             :             Assert(IndexTupleSize(newitem) == datalen);
     225        6700 :             if (PageAddItem(page, newitem, datalen, xlrec->offnum, false, false) == InvalidOffsetNumber)
     226           0 :                 elog(PANIC, "failed to add posting split new item");
     227             :         }
     228             : 
     229     1046210 :         PageSetLSN(page, lsn);
     230     1046210 :         MarkBufferDirty(buffer);
     231             :     }
     232     1065182 :     if (BufferIsValid(buffer))
     233     1065182 :         UnlockReleaseBuffer(buffer);
     234             : 
     235             :     /*
     236             :      * Note: in normal operation, we'd update the metapage while still holding
     237             :      * lock on the page we inserted into.  But during replay it's not
     238             :      * necessary to hold that lock, since no other index updates can be
     239             :      * happening concurrently, and readers will cope fine with following an
     240             :      * obsolete link from the metapage.
     241             :      */
     242     1065182 :     if (ismeta)
     243           8 :         _bt_restore_meta(record, 2);
     244     1065182 : }
     245             : 
     246             : static void
     247        2916 : btree_xlog_split(bool newitemonleft, XLogReaderState *record)
     248             : {
     249        2916 :     XLogRecPtr  lsn = record->EndRecPtr;
     250        2916 :     xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
     251        2916 :     bool        isleaf = (xlrec->level == 0);
     252             :     Buffer      buf;
     253             :     Buffer      rbuf;
     254             :     Page        rpage;
     255             :     BTPageOpaque ropaque;
     256             :     char       *datapos;
     257             :     Size        datalen;
     258             :     BlockNumber origpagenumber;
     259             :     BlockNumber rightpagenumber;
     260             :     BlockNumber spagenumber;
     261             : 
     262        2916 :     XLogRecGetBlockTag(record, 0, NULL, NULL, &origpagenumber);
     263        2916 :     XLogRecGetBlockTag(record, 1, NULL, NULL, &rightpagenumber);
     264        2916 :     if (!XLogRecGetBlockTagExtended(record, 2, NULL, NULL, &spagenumber, NULL))
     265        1840 :         spagenumber = P_NONE;
     266             : 
     267             :     /*
     268             :      * Clear the incomplete split flag on the appropriate child page one level
     269             :      * down when origpage/buf is an internal page (there must have been
     270             :      * cascading page splits during original execution in the event of an
     271             :      * internal page split).  This is like the corresponding btree_xlog_insert
     272             :      * call for internal pages.  We're not clearing the incomplete split flag
     273             :      * for the current page split here (you can think of this as part of the
     274             :      * insert of newitem that the page split action needs to perform in
     275             :      * passing).
     276             :      *
     277             :      * Like in btree_xlog_insert, this can be done before locking other pages.
     278             :      * We never need to couple cross-level locks in REDO routines.
     279             :      */
     280        2916 :     if (!isleaf)
     281          46 :         _bt_clear_incomplete_split(record, 3);
     282             : 
     283             :     /* Reconstruct right (new) sibling page from scratch */
     284        2916 :     rbuf = XLogInitBufferForRedo(record, 1);
     285        2916 :     datapos = XLogRecGetBlockData(record, 1, &datalen);
     286        2916 :     rpage = BufferGetPage(rbuf);
     287             : 
     288        2916 :     _bt_pageinit(rpage, BufferGetPageSize(rbuf));
     289        2916 :     ropaque = BTPageGetOpaque(rpage);
     290             : 
     291        2916 :     ropaque->btpo_prev = origpagenumber;
     292        2916 :     ropaque->btpo_next = spagenumber;
     293        2916 :     ropaque->btpo_level = xlrec->level;
     294        2916 :     ropaque->btpo_flags = isleaf ? BTP_LEAF : 0;
     295        2916 :     ropaque->btpo_cycleid = 0;
     296             : 
     297        2916 :     _bt_restore_page(rpage, datapos, datalen);
     298             : 
     299        2916 :     PageSetLSN(rpage, lsn);
     300        2916 :     MarkBufferDirty(rbuf);
     301             : 
     302             :     /* Now reconstruct original page (left half of split) */
     303        2916 :     if (XLogReadBufferForRedo(record, 0, &buf) == BLK_NEEDS_REDO)
     304             :     {
     305             :         /*
     306             :          * To retain the same physical order of the tuples that they had, we
     307             :          * initialize a temporary empty page for the left page and add all the
     308             :          * items to that in item number order.  This mirrors how _bt_split()
     309             :          * works.  Retaining the same physical order makes WAL consistency
     310             :          * checking possible.  See also _bt_restore_page(), which does the
     311             :          * same for the right page.
     312             :          */
     313        2870 :         Page        origpage = BufferGetPage(buf);
     314        2870 :         BTPageOpaque oopaque = BTPageGetOpaque(origpage);
     315             :         OffsetNumber off;
     316        2870 :         IndexTuple  newitem = NULL,
     317        2870 :                     left_hikey = NULL,
     318        2870 :                     nposting = NULL;
     319        2870 :         Size        newitemsz = 0,
     320        2870 :                     left_hikeysz = 0;
     321             :         Page        leftpage;
     322             :         OffsetNumber leftoff,
     323        2870 :                     replacepostingoff = InvalidOffsetNumber;
     324             : 
     325        2870 :         datapos = XLogRecGetBlockData(record, 0, &datalen);
     326             : 
     327        2870 :         if (newitemonleft || xlrec->postingoff != 0)
     328             :         {
     329         280 :             newitem = (IndexTuple) datapos;
     330         280 :             newitemsz = MAXALIGN(IndexTupleSize(newitem));
     331         280 :             datapos += newitemsz;
     332         280 :             datalen -= newitemsz;
     333             : 
     334         280 :             if (xlrec->postingoff != 0)
     335             :             {
     336             :                 ItemId      itemid;
     337             :                 IndexTuple  oposting;
     338             : 
     339             :                 /* Posting list must be at offset number before new item's */
     340           8 :                 replacepostingoff = OffsetNumberPrev(xlrec->newitemoff);
     341             : 
     342             :                 /* Use mutable, aligned newitem copy in _bt_swap_posting() */
     343           8 :                 newitem = CopyIndexTuple(newitem);
     344           8 :                 itemid = PageGetItemId(origpage, replacepostingoff);
     345           8 :                 oposting = (IndexTuple) PageGetItem(origpage, itemid);
     346           8 :                 nposting = _bt_swap_posting(newitem, oposting,
     347           8 :                                             xlrec->postingoff);
     348             :             }
     349             :         }
     350             : 
     351             :         /*
     352             :          * Extract left hikey and its size.  We assume that 16-bit alignment
     353             :          * is enough to apply IndexTupleSize (since it's fetching from a
     354             :          * uint16 field).
     355             :          */
     356        2870 :         left_hikey = (IndexTuple) datapos;
     357        2870 :         left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
     358        2870 :         datapos += left_hikeysz;
     359        2870 :         datalen -= left_hikeysz;
     360             : 
     361             :         Assert(datalen == 0);
     362             : 
     363        2870 :         leftpage = PageGetTempPageCopySpecial(origpage);
     364             : 
     365             :         /* Add high key tuple from WAL record to temp page */
     366        2870 :         leftoff = P_HIKEY;
     367        2870 :         if (PageAddItem(leftpage, left_hikey, left_hikeysz, P_HIKEY, false, false) == InvalidOffsetNumber)
     368           0 :             elog(ERROR, "failed to add high key to left page after split");
     369        2870 :         leftoff = OffsetNumberNext(leftoff);
     370             : 
     371      662436 :         for (off = P_FIRSTDATAKEY(oopaque); off < xlrec->firstrightoff; off++)
     372             :         {
     373             :             ItemId      itemid;
     374             :             Size        itemsz;
     375             :             IndexTuple  item;
     376             : 
     377             :             /* Add replacement posting list when required */
     378      659566 :             if (off == replacepostingoff)
     379             :             {
     380             :                 Assert(newitemonleft ||
     381             :                        xlrec->firstrightoff == xlrec->newitemoff);
     382           8 :                 if (PageAddItem(leftpage, nposting, MAXALIGN(IndexTupleSize(nposting)), leftoff, false, false) == InvalidOffsetNumber)
     383           0 :                     elog(ERROR, "failed to add new posting list item to left page after split");
     384           8 :                 leftoff = OffsetNumberNext(leftoff);
     385           8 :                 continue;       /* don't insert oposting */
     386             :             }
     387             : 
     388             :             /* add the new item if it was inserted on left page */
     389      659558 :             else if (newitemonleft && off == xlrec->newitemoff)
     390             :             {
     391         264 :                 if (PageAddItem(leftpage, newitem, newitemsz, leftoff, false, false) == InvalidOffsetNumber)
     392           0 :                     elog(ERROR, "failed to add new item to left page after split");
     393         264 :                 leftoff = OffsetNumberNext(leftoff);
     394             :             }
     395             : 
     396      659558 :             itemid = PageGetItemId(origpage, off);
     397      659558 :             itemsz = ItemIdGetLength(itemid);
     398      659558 :             item = (IndexTuple) PageGetItem(origpage, itemid);
     399      659558 :             if (PageAddItem(leftpage, item, itemsz, leftoff, false, false) == InvalidOffsetNumber)
     400           0 :                 elog(ERROR, "failed to add old item to left page after split");
     401      659558 :             leftoff = OffsetNumberNext(leftoff);
     402             :         }
     403             : 
     404             :         /* cope with possibility that newitem goes at the end */
     405        2870 :         if (newitemonleft && off == xlrec->newitemoff)
     406             :         {
     407          14 :             if (PageAddItem(leftpage, newitem, newitemsz, leftoff, false, false) == InvalidOffsetNumber)
     408           0 :                 elog(ERROR, "failed to add new item to left page after split");
     409          14 :             leftoff = OffsetNumberNext(leftoff);
     410             :         }
     411             : 
     412        2870 :         PageRestoreTempPage(leftpage, origpage);
     413             : 
     414             :         /* Fix opaque fields */
     415        2870 :         oopaque->btpo_flags = BTP_INCOMPLETE_SPLIT;
     416        2870 :         if (isleaf)
     417        2824 :             oopaque->btpo_flags |= BTP_LEAF;
     418        2870 :         oopaque->btpo_next = rightpagenumber;
     419        2870 :         oopaque->btpo_cycleid = 0;
     420             : 
     421        2870 :         PageSetLSN(origpage, lsn);
     422        2870 :         MarkBufferDirty(buf);
     423             :     }
     424             : 
     425             :     /* Fix left-link of the page to the right of the new right sibling */
     426        2916 :     if (spagenumber != P_NONE)
     427             :     {
     428             :         Buffer      sbuf;
     429             : 
     430        1076 :         if (XLogReadBufferForRedo(record, 2, &sbuf) == BLK_NEEDS_REDO)
     431             :         {
     432         508 :             Page        spage = BufferGetPage(sbuf);
     433         508 :             BTPageOpaque spageop = BTPageGetOpaque(spage);
     434             : 
     435         508 :             spageop->btpo_prev = rightpagenumber;
     436             : 
     437         508 :             PageSetLSN(spage, lsn);
     438         508 :             MarkBufferDirty(sbuf);
     439             :         }
     440        1076 :         if (BufferIsValid(sbuf))
     441        1076 :             UnlockReleaseBuffer(sbuf);
     442             :     }
     443             : 
     444             :     /*
     445             :      * Finally, release the remaining buffers.  sbuf, rbuf, and buf must be
     446             :      * released together, so that readers cannot observe inconsistencies.
     447             :      */
     448        2916 :     UnlockReleaseBuffer(rbuf);
     449        2916 :     if (BufferIsValid(buf))
     450        2916 :         UnlockReleaseBuffer(buf);
     451        2916 : }
     452             : 
     453             : static void
     454        4554 : btree_xlog_dedup(XLogReaderState *record)
     455             : {
     456        4554 :     XLogRecPtr  lsn = record->EndRecPtr;
     457        4554 :     xl_btree_dedup *xlrec = (xl_btree_dedup *) XLogRecGetData(record);
     458             :     Buffer      buf;
     459             : 
     460        4554 :     if (XLogReadBufferForRedo(record, 0, &buf) == BLK_NEEDS_REDO)
     461             :     {
     462        4534 :         char       *ptr = XLogRecGetBlockData(record, 0, NULL);
     463        4534 :         Page        page = BufferGetPage(buf);
     464        4534 :         BTPageOpaque opaque = BTPageGetOpaque(page);
     465             :         OffsetNumber offnum,
     466             :                     minoff,
     467             :                     maxoff;
     468             :         BTDedupState state;
     469             :         BTDedupInterval *intervals;
     470             :         Page        newpage;
     471             : 
     472        4534 :         state = (BTDedupState) palloc(sizeof(BTDedupStateData));
     473        4534 :         state->deduplicate = true;   /* unused */
     474        4534 :         state->nmaxitems = 0;    /* unused */
     475             :         /* Conservatively use larger maxpostingsize than primary */
     476        4534 :         state->maxpostingsize = BTMaxItemSize;
     477        4534 :         state->base = NULL;
     478        4534 :         state->baseoff = InvalidOffsetNumber;
     479        4534 :         state->basetupsize = 0;
     480        4534 :         state->htids = palloc(state->maxpostingsize);
     481        4534 :         state->nhtids = 0;
     482        4534 :         state->nitems = 0;
     483        4534 :         state->phystupsize = 0;
     484        4534 :         state->nintervals = 0;
     485             : 
     486        4534 :         minoff = P_FIRSTDATAKEY(opaque);
     487        4534 :         maxoff = PageGetMaxOffsetNumber(page);
     488        4534 :         newpage = PageGetTempPageCopySpecial(page);
     489             : 
     490        4534 :         if (!P_RIGHTMOST(opaque))
     491             :         {
     492        3934 :             ItemId      itemid = PageGetItemId(page, P_HIKEY);
     493        3934 :             Size        itemsz = ItemIdGetLength(itemid);
     494        3934 :             IndexTuple  item = (IndexTuple) PageGetItem(page, itemid);
     495             : 
     496        3934 :             if (PageAddItem(newpage, item, itemsz, P_HIKEY, false, false) == InvalidOffsetNumber)
     497           0 :                 elog(ERROR, "deduplication failed to add highkey");
     498             :         }
     499             : 
     500        4534 :         intervals = (BTDedupInterval *) ptr;
     501        4534 :         for (offnum = minoff;
     502     1048278 :              offnum <= maxoff;
     503     1043744 :              offnum = OffsetNumberNext(offnum))
     504             :         {
     505     1043744 :             ItemId      itemid = PageGetItemId(page, offnum);
     506     1043744 :             IndexTuple  itup = (IndexTuple) PageGetItem(page, itemid);
     507             : 
     508     1043744 :             if (offnum == minoff)
     509        4534 :                 _bt_dedup_start_pending(state, itup, offnum);
     510     1039210 :             else if (state->nintervals < xlrec->nintervals &&
     511      768130 :                      state->baseoff == intervals[state->nintervals].baseoff &&
     512      263986 :                      state->nitems < intervals[state->nintervals].nitems)
     513             :             {
     514      175832 :                 if (!_bt_dedup_save_htid(state, itup))
     515           0 :                     elog(ERROR, "deduplication failed to add heap tid to pending posting list");
     516             :             }
     517             :             else
     518             :             {
     519      863378 :                 _bt_dedup_finish_pending(newpage, state);
     520      863378 :                 _bt_dedup_start_pending(state, itup, offnum);
     521             :             }
     522             :         }
     523             : 
     524        4534 :         _bt_dedup_finish_pending(newpage, state);
     525             :         Assert(state->nintervals == xlrec->nintervals);
     526             :         Assert(memcmp(state->intervals, intervals,
     527             :                       state->nintervals * sizeof(BTDedupInterval)) == 0);
     528             : 
     529        4534 :         if (P_HAS_GARBAGE(opaque))
     530             :         {
     531           0 :             BTPageOpaque nopaque = BTPageGetOpaque(newpage);
     532             : 
     533           0 :             nopaque->btpo_flags &= ~BTP_HAS_GARBAGE;
     534             :         }
     535             : 
     536        4534 :         PageRestoreTempPage(newpage, page);
     537        4534 :         PageSetLSN(page, lsn);
     538        4534 :         MarkBufferDirty(buf);
     539             :     }
     540             : 
     541        4554 :     if (BufferIsValid(buf))
     542        4554 :         UnlockReleaseBuffer(buf);
     543        4554 : }
     544             : 
     545             : static void
     546         254 : btree_xlog_updates(Page page, OffsetNumber *updatedoffsets,
     547             :                    xl_btree_update *updates, int nupdated)
     548             : {
     549             :     BTVacuumPosting vacposting;
     550             :     IndexTuple  origtuple;
     551             :     ItemId      itemid;
     552             :     Size        itemsz;
     553             : 
     554        6478 :     for (int i = 0; i < nupdated; i++)
     555             :     {
     556        6224 :         itemid = PageGetItemId(page, updatedoffsets[i]);
     557        6224 :         origtuple = (IndexTuple) PageGetItem(page, itemid);
     558             : 
     559        6224 :         vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
     560        6224 :                             updates->ndeletedtids * sizeof(uint16));
     561        6224 :         vacposting->updatedoffset = updatedoffsets[i];
     562        6224 :         vacposting->itup = origtuple;
     563        6224 :         vacposting->ndeletedtids = updates->ndeletedtids;
     564        6224 :         memcpy(vacposting->deletetids,
     565             :                (char *) updates + SizeOfBtreeUpdate,
     566        6224 :                updates->ndeletedtids * sizeof(uint16));
     567             : 
     568        6224 :         _bt_update_posting(vacposting);
     569             : 
     570             :         /* Overwrite updated version of tuple */
     571        6224 :         itemsz = MAXALIGN(IndexTupleSize(vacposting->itup));
     572        6224 :         if (!PageIndexTupleOverwrite(page, updatedoffsets[i], vacposting->itup, itemsz))
     573           0 :             elog(PANIC, "failed to update partially dead item");
     574             : 
     575        6224 :         pfree(vacposting->itup);
     576        6224 :         pfree(vacposting);
     577             : 
     578             :         /* advance to next xl_btree_update from array */
     579        6224 :         updates = (xl_btree_update *)
     580        6224 :             ((char *) updates + SizeOfBtreeUpdate +
     581        6224 :              updates->ndeletedtids * sizeof(uint16));
     582             :     }
     583         254 : }
     584             : 
     585             : static void
     586        3414 : btree_xlog_vacuum(XLogReaderState *record)
     587             : {
     588        3414 :     XLogRecPtr  lsn = record->EndRecPtr;
     589        3414 :     xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
     590             :     Buffer      buffer;
     591             :     Page        page;
     592             :     BTPageOpaque opaque;
     593             : 
     594             :     /*
     595             :      * We need to take a cleanup lock here, just like btvacuumpage(). However,
     596             :      * it isn't necessary to exhaustively get a cleanup lock on every block in
     597             :      * the index during recovery (just getting a cleanup lock on pages with
     598             :      * items to kill suffices).  See nbtree/README for details.
     599             :      */
     600        3414 :     if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer)
     601             :         == BLK_NEEDS_REDO)
     602             :     {
     603        1390 :         char       *ptr = XLogRecGetBlockData(record, 0, NULL);
     604             : 
     605        1390 :         page = BufferGetPage(buffer);
     606             : 
     607        1390 :         if (xlrec->nupdated > 0)
     608             :         {
     609             :             OffsetNumber *updatedoffsets;
     610             :             xl_btree_update *updates;
     611             : 
     612          46 :             updatedoffsets = (OffsetNumber *)
     613          46 :                 (ptr + xlrec->ndeleted * sizeof(OffsetNumber));
     614          46 :             updates = (xl_btree_update *) ((char *) updatedoffsets +
     615          46 :                                            xlrec->nupdated *
     616             :                                            sizeof(OffsetNumber));
     617             : 
     618          46 :             btree_xlog_updates(page, updatedoffsets, updates, xlrec->nupdated);
     619             :         }
     620             : 
     621        1390 :         if (xlrec->ndeleted > 0)
     622        1388 :             PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
     623             : 
     624             :         /*
     625             :          * Clear the vacuum cycle ID, and mark the page as not containing any
     626             :          * LP_DEAD items
     627             :          */
     628        1390 :         opaque = BTPageGetOpaque(page);
     629        1390 :         opaque->btpo_cycleid = 0;
     630        1390 :         opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
     631             : 
     632        1390 :         PageSetLSN(page, lsn);
     633        1390 :         MarkBufferDirty(buffer);
     634             :     }
     635        3414 :     if (BufferIsValid(buffer))
     636        3414 :         UnlockReleaseBuffer(buffer);
     637        3414 : }
     638             : 
     639             : static void
     640        1664 : btree_xlog_delete(XLogReaderState *record)
     641             : {
     642        1664 :     XLogRecPtr  lsn = record->EndRecPtr;
     643        1664 :     xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
     644             :     Buffer      buffer;
     645             :     Page        page;
     646             :     BTPageOpaque opaque;
     647             : 
     648             :     /*
     649             :      * If we have any conflict processing to do, it must happen before we
     650             :      * update the page
     651             :      */
     652        1664 :     if (InHotStandby)
     653             :     {
     654             :         RelFileLocator rlocator;
     655             : 
     656        1664 :         XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
     657             : 
     658        1664 :         ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
     659        1664 :                                             xlrec->isCatalogRel,
     660             :                                             rlocator);
     661             :     }
     662             : 
     663             :     /*
     664             :      * We don't need to take a cleanup lock to apply these changes. See
     665             :      * nbtree/README for details.
     666             :      */
     667        1664 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
     668             :     {
     669        1638 :         char       *ptr = XLogRecGetBlockData(record, 0, NULL);
     670             : 
     671        1638 :         page = BufferGetPage(buffer);
     672             : 
     673        1638 :         if (xlrec->nupdated > 0)
     674             :         {
     675             :             OffsetNumber *updatedoffsets;
     676             :             xl_btree_update *updates;
     677             : 
     678         208 :             updatedoffsets = (OffsetNumber *)
     679         208 :                 (ptr + xlrec->ndeleted * sizeof(OffsetNumber));
     680         208 :             updates = (xl_btree_update *) ((char *) updatedoffsets +
     681         208 :                                            xlrec->nupdated *
     682             :                                            sizeof(OffsetNumber));
     683             : 
     684         208 :             btree_xlog_updates(page, updatedoffsets, updates, xlrec->nupdated);
     685             :         }
     686             : 
     687        1638 :         if (xlrec->ndeleted > 0)
     688        1604 :             PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
     689             : 
     690             :         /*
     691             :          * Do *not* clear the vacuum cycle ID, but do mark the page as not
     692             :          * containing any LP_DEAD items
     693             :          */
     694        1638 :         opaque = BTPageGetOpaque(page);
     695        1638 :         opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
     696             : 
     697        1638 :         PageSetLSN(page, lsn);
     698        1638 :         MarkBufferDirty(buffer);
     699             :     }
     700        1664 :     if (BufferIsValid(buffer))
     701        1664 :         UnlockReleaseBuffer(buffer);
     702        1664 : }
     703             : 
     704             : static void
     705        1252 : btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
     706             : {
     707        1252 :     XLogRecPtr  lsn = record->EndRecPtr;
     708        1252 :     xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) XLogRecGetData(record);
     709             :     Buffer      buffer;
     710             :     Page        page;
     711             :     BTPageOpaque pageop;
     712             :     IndexTupleData trunctuple;
     713             : 
     714             :     /*
     715             :      * In normal operation, we would lock all the pages this WAL record
     716             :      * touches before changing any of them.  In WAL replay, it should be okay
     717             :      * to lock just one page at a time, since no concurrent index updates can
     718             :      * be happening, and readers should not care whether they arrive at the
     719             :      * target page or not (since it's surely empty).
     720             :      */
     721             : 
     722             :     /* to-be-deleted subtree's parent page */
     723        1252 :     if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
     724             :     {
     725             :         OffsetNumber poffset;
     726             :         ItemId      itemid;
     727             :         IndexTuple  itup;
     728             :         OffsetNumber nextoffset;
     729             :         BlockNumber rightsib;
     730             : 
     731        1214 :         page = BufferGetPage(buffer);
     732        1214 :         pageop = BTPageGetOpaque(page);
     733             : 
     734        1214 :         poffset = xlrec->poffset;
     735             : 
     736        1214 :         nextoffset = OffsetNumberNext(poffset);
     737        1214 :         itemid = PageGetItemId(page, nextoffset);
     738        1214 :         itup = (IndexTuple) PageGetItem(page, itemid);
     739        1214 :         rightsib = BTreeTupleGetDownLink(itup);
     740             : 
     741        1214 :         itemid = PageGetItemId(page, poffset);
     742        1214 :         itup = (IndexTuple) PageGetItem(page, itemid);
     743        1214 :         BTreeTupleSetDownLink(itup, rightsib);
     744        1214 :         nextoffset = OffsetNumberNext(poffset);
     745        1214 :         PageIndexTupleDelete(page, nextoffset);
     746             : 
     747        1214 :         PageSetLSN(page, lsn);
     748        1214 :         MarkBufferDirty(buffer);
     749             :     }
     750             : 
     751             :     /*
     752             :      * Don't need to couple cross-level locks in REDO routines, so release
     753             :      * lock on internal page immediately
     754             :      */
     755        1252 :     if (BufferIsValid(buffer))
     756        1252 :         UnlockReleaseBuffer(buffer);
     757             : 
     758             :     /* Rewrite the leaf page as a halfdead page */
     759        1252 :     buffer = XLogInitBufferForRedo(record, 0);
     760        1252 :     page = BufferGetPage(buffer);
     761             : 
     762        1252 :     _bt_pageinit(page, BufferGetPageSize(buffer));
     763        1252 :     pageop = BTPageGetOpaque(page);
     764             : 
     765        1252 :     pageop->btpo_prev = xlrec->leftblk;
     766        1252 :     pageop->btpo_next = xlrec->rightblk;
     767        1252 :     pageop->btpo_level = 0;
     768        1252 :     pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF;
     769        1252 :     pageop->btpo_cycleid = 0;
     770             : 
     771             :     /*
     772             :      * Construct a dummy high key item that points to top parent page (value
     773             :      * is InvalidBlockNumber when the top parent page is the leaf page itself)
     774             :      */
     775        1252 :     MemSet(&trunctuple, 0, sizeof(IndexTupleData));
     776        1252 :     trunctuple.t_info = sizeof(IndexTupleData);
     777        1252 :     BTreeTupleSetTopParent(&trunctuple, xlrec->topparent);
     778             : 
     779        1252 :     if (PageAddItem(page, &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber)
     780           0 :         elog(ERROR, "could not add dummy high key to half-dead page");
     781             : 
     782        1252 :     PageSetLSN(page, lsn);
     783        1252 :     MarkBufferDirty(buffer);
     784        1252 :     UnlockReleaseBuffer(buffer);
     785        1252 : }
     786             : 
     787             : 
     788             : static void
     789        1260 : btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
     790             : {
     791        1260 :     XLogRecPtr  lsn = record->EndRecPtr;
     792        1260 :     xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) XLogRecGetData(record);
     793             :     BlockNumber leftsib;
     794             :     BlockNumber rightsib;
     795             :     uint32      level;
     796             :     bool        isleaf;
     797             :     FullTransactionId safexid;
     798             :     Buffer      leftbuf;
     799             :     Buffer      target;
     800             :     Buffer      rightbuf;
     801             :     Page        page;
     802             :     BTPageOpaque pageop;
     803             : 
     804        1260 :     leftsib = xlrec->leftsib;
     805        1260 :     rightsib = xlrec->rightsib;
     806        1260 :     level = xlrec->level;
     807        1260 :     isleaf = (level == 0);
     808        1260 :     safexid = xlrec->safexid;
     809             : 
     810             :     /* No leaftopparent for level 0 (leaf page) or level 1 target */
     811             :     Assert(!BlockNumberIsValid(xlrec->leaftopparent) || level > 1);
     812             : 
     813             :     /*
     814             :      * In normal operation, we would lock all the pages this WAL record
     815             :      * touches before changing any of them.  In WAL replay, we at least lock
     816             :      * the pages in the same standard left-to-right order (leftsib, target,
     817             :      * rightsib), and don't release the sibling locks until the target is
     818             :      * marked deleted.
     819             :      */
     820             : 
     821             :     /* Fix right-link of left sibling, if any */
     822        1260 :     if (leftsib != P_NONE)
     823             :     {
     824         148 :         if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
     825             :         {
     826         148 :             page = BufferGetPage(leftbuf);
     827         148 :             pageop = BTPageGetOpaque(page);
     828         148 :             pageop->btpo_next = rightsib;
     829             : 
     830         148 :             PageSetLSN(page, lsn);
     831         148 :             MarkBufferDirty(leftbuf);
     832             :         }
     833             :     }
     834             :     else
     835        1112 :         leftbuf = InvalidBuffer;
     836             : 
     837             :     /* Rewrite target page as empty deleted page */
     838        1260 :     target = XLogInitBufferForRedo(record, 0);
     839        1260 :     page = BufferGetPage(target);
     840             : 
     841        1260 :     _bt_pageinit(page, BufferGetPageSize(target));
     842        1260 :     pageop = BTPageGetOpaque(page);
     843             : 
     844        1260 :     pageop->btpo_prev = leftsib;
     845        1260 :     pageop->btpo_next = rightsib;
     846        1260 :     pageop->btpo_level = level;
     847        1260 :     BTPageSetDeleted(page, safexid);
     848        1260 :     if (isleaf)
     849        1250 :         pageop->btpo_flags |= BTP_LEAF;
     850        1260 :     pageop->btpo_cycleid = 0;
     851             : 
     852        1260 :     PageSetLSN(page, lsn);
     853        1260 :     MarkBufferDirty(target);
     854             : 
     855             :     /* Fix left-link of right sibling */
     856        1260 :     if (XLogReadBufferForRedo(record, 2, &rightbuf) == BLK_NEEDS_REDO)
     857             :     {
     858          22 :         page = BufferGetPage(rightbuf);
     859          22 :         pageop = BTPageGetOpaque(page);
     860          22 :         pageop->btpo_prev = leftsib;
     861             : 
     862          22 :         PageSetLSN(page, lsn);
     863          22 :         MarkBufferDirty(rightbuf);
     864             :     }
     865             : 
     866             :     /* Release siblings */
     867        1260 :     if (BufferIsValid(leftbuf))
     868         148 :         UnlockReleaseBuffer(leftbuf);
     869        1260 :     if (BufferIsValid(rightbuf))
     870        1260 :         UnlockReleaseBuffer(rightbuf);
     871             : 
     872             :     /* Release target */
     873        1260 :     UnlockReleaseBuffer(target);
     874             : 
     875             :     /*
     876             :      * If we deleted a parent of the targeted leaf page, instead of the leaf
     877             :      * itself, update the leaf to point to the next remaining child in the
     878             :      * to-be-deleted subtree
     879             :      */
     880        1260 :     if (XLogRecHasBlockRef(record, 3))
     881             :     {
     882             :         /*
     883             :          * There is no real data on the page, so we just re-create it from
     884             :          * scratch using the information from the WAL record.
     885             :          *
     886             :          * Note that we don't end up here when the target page is also the
     887             :          * leafbuf page.  There is no need to add a dummy hikey item with a
     888             :          * top parent link when deleting leafbuf because it's the last page
     889             :          * we'll delete in the subtree undergoing deletion.
     890             :          */
     891             :         Buffer      leafbuf;
     892             :         IndexTupleData trunctuple;
     893             : 
     894             :         Assert(!isleaf);
     895             : 
     896          10 :         leafbuf = XLogInitBufferForRedo(record, 3);
     897          10 :         page = BufferGetPage(leafbuf);
     898             : 
     899          10 :         _bt_pageinit(page, BufferGetPageSize(leafbuf));
     900          10 :         pageop = BTPageGetOpaque(page);
     901             : 
     902          10 :         pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF;
     903          10 :         pageop->btpo_prev = xlrec->leafleftsib;
     904          10 :         pageop->btpo_next = xlrec->leafrightsib;
     905          10 :         pageop->btpo_level = 0;
     906          10 :         pageop->btpo_cycleid = 0;
     907             : 
     908             :         /* Add a dummy hikey item */
     909          20 :         MemSet(&trunctuple, 0, sizeof(IndexTupleData));
     910          10 :         trunctuple.t_info = sizeof(IndexTupleData);
     911          10 :         BTreeTupleSetTopParent(&trunctuple, xlrec->leaftopparent);
     912             : 
     913          10 :         if (PageAddItem(page, &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber)
     914           0 :             elog(ERROR, "could not add dummy high key to half-dead page");
     915             : 
     916          10 :         PageSetLSN(page, lsn);
     917          10 :         MarkBufferDirty(leafbuf);
     918          10 :         UnlockReleaseBuffer(leafbuf);
     919             :     }
     920             : 
     921             :     /* Update metapage if needed */
     922        1260 :     if (info == XLOG_BTREE_UNLINK_PAGE_META)
     923          12 :         _bt_restore_meta(record, 4);
     924        1260 : }
     925             : 
     926             : static void
     927        1412 : btree_xlog_newroot(XLogReaderState *record)
     928             : {
     929        1412 :     XLogRecPtr  lsn = record->EndRecPtr;
     930        1412 :     xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record);
     931             :     Buffer      buffer;
     932             :     Page        page;
     933             :     BTPageOpaque pageop;
     934             :     char       *ptr;
     935             :     Size        len;
     936             : 
     937        1412 :     buffer = XLogInitBufferForRedo(record, 0);
     938        1412 :     page = BufferGetPage(buffer);
     939             : 
     940        1412 :     _bt_pageinit(page, BufferGetPageSize(buffer));
     941        1412 :     pageop = BTPageGetOpaque(page);
     942             : 
     943        1412 :     pageop->btpo_flags = BTP_ROOT;
     944        1412 :     pageop->btpo_prev = pageop->btpo_next = P_NONE;
     945        1412 :     pageop->btpo_level = xlrec->level;
     946        1412 :     if (xlrec->level == 0)
     947        1318 :         pageop->btpo_flags |= BTP_LEAF;
     948        1412 :     pageop->btpo_cycleid = 0;
     949             : 
     950        1412 :     if (xlrec->level > 0)
     951             :     {
     952          94 :         ptr = XLogRecGetBlockData(record, 0, &len);
     953          94 :         _bt_restore_page(page, ptr, len);
     954             : 
     955             :         /* Clear the incomplete-split flag in left child */
     956          94 :         _bt_clear_incomplete_split(record, 1);
     957             :     }
     958             : 
     959        1412 :     PageSetLSN(page, lsn);
     960        1412 :     MarkBufferDirty(buffer);
     961        1412 :     UnlockReleaseBuffer(buffer);
     962             : 
     963        1412 :     _bt_restore_meta(record, 2);
     964        1412 : }
     965             : 
     966             : /*
     967             :  * In general VACUUM must defer recycling as a way of avoiding certain race
     968             :  * conditions.  Deleted pages contain a safexid value that is used by VACUUM
     969             :  * to determine whether or not it's safe to place a page that was deleted by
     970             :  * VACUUM earlier into the FSM now.  See nbtree/README.
     971             :  *
     972             :  * As far as any backend operating during original execution is concerned, the
     973             :  * FSM is a cache of recycle-safe pages; the mere presence of the page in the
     974             :  * FSM indicates that the page must already be safe to recycle (actually,
     975             :  * _bt_allocbuf() verifies it's safe using BTPageIsRecyclable(), but that's
     976             :  * just because it would be unwise to completely trust the FSM, given its
     977             :  * current limitations).
     978             :  *
     979             :  * This isn't sufficient to prevent similar concurrent recycling race
     980             :  * conditions during Hot Standby, though.  For that we need to log a
     981             :  * xl_btree_reuse_page record at the point that a page is actually recycled
     982             :  * and reused for an entirely unrelated page inside _bt_split().  These
     983             :  * records include the same safexid value from the original deleted page,
     984             :  * stored in the record's snapshotConflictHorizon field.
     985             :  *
     986             :  * The GlobalVisCheckRemovableFullXid() test in BTPageIsRecyclable() is used
     987             :  * to determine if it's safe to recycle a page.  This mirrors our own test:
     988             :  * the PGPROC->xmin > limitXmin test inside GetConflictingVirtualXIDs().
     989             :  * Consequently, one XID value achieves the same exclusion effect on primary
     990             :  * and standby.
     991             :  */
     992             : static void
     993         108 : btree_xlog_reuse_page(XLogReaderState *record)
     994             : {
     995         108 :     xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
     996             : 
     997         108 :     if (InHotStandby)
     998         108 :         ResolveRecoveryConflictWithSnapshotFullXid(xlrec->snapshotConflictHorizon,
     999         108 :                                                    xlrec->isCatalogRel,
    1000             :                                                    xlrec->locator);
    1001         108 : }
    1002             : 
    1003             : void
    1004     1081802 : btree_redo(XLogReaderState *record)
    1005             : {
    1006     1081802 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
    1007             :     MemoryContext oldCtx;
    1008             : 
    1009     1081802 :     oldCtx = MemoryContextSwitchTo(opCtx);
    1010     1081802 :     switch (info)
    1011             :     {
    1012     1055480 :         case XLOG_BTREE_INSERT_LEAF:
    1013     1055480 :             btree_xlog_insert(true, false, false, record);
    1014     1055480 :             break;
    1015        2768 :         case XLOG_BTREE_INSERT_UPPER:
    1016        2768 :             btree_xlog_insert(false, false, false, record);
    1017        2768 :             break;
    1018           8 :         case XLOG_BTREE_INSERT_META:
    1019           8 :             btree_xlog_insert(false, true, false, record);
    1020           8 :             break;
    1021         292 :         case XLOG_BTREE_SPLIT_L:
    1022         292 :             btree_xlog_split(true, record);
    1023         292 :             break;
    1024        2624 :         case XLOG_BTREE_SPLIT_R:
    1025        2624 :             btree_xlog_split(false, record);
    1026        2624 :             break;
    1027        6926 :         case XLOG_BTREE_INSERT_POST:
    1028        6926 :             btree_xlog_insert(true, false, true, record);
    1029        6926 :             break;
    1030        4554 :         case XLOG_BTREE_DEDUP:
    1031        4554 :             btree_xlog_dedup(record);
    1032        4554 :             break;
    1033        3414 :         case XLOG_BTREE_VACUUM:
    1034        3414 :             btree_xlog_vacuum(record);
    1035        3414 :             break;
    1036        1664 :         case XLOG_BTREE_DELETE:
    1037        1664 :             btree_xlog_delete(record);
    1038        1664 :             break;
    1039        1252 :         case XLOG_BTREE_MARK_PAGE_HALFDEAD:
    1040        1252 :             btree_xlog_mark_page_halfdead(info, record);
    1041        1252 :             break;
    1042        1260 :         case XLOG_BTREE_UNLINK_PAGE:
    1043             :         case XLOG_BTREE_UNLINK_PAGE_META:
    1044        1260 :             btree_xlog_unlink_page(info, record);
    1045        1260 :             break;
    1046        1412 :         case XLOG_BTREE_NEWROOT:
    1047        1412 :             btree_xlog_newroot(record);
    1048        1412 :             break;
    1049         108 :         case XLOG_BTREE_REUSE_PAGE:
    1050         108 :             btree_xlog_reuse_page(record);
    1051         108 :             break;
    1052          40 :         case XLOG_BTREE_META_CLEANUP:
    1053          40 :             _bt_restore_meta(record, 0);
    1054          40 :             break;
    1055           0 :         default:
    1056           0 :             elog(PANIC, "btree_redo: unknown op code %u", info);
    1057             :     }
    1058     1081802 :     MemoryContextSwitchTo(oldCtx);
    1059     1081802 :     MemoryContextReset(opCtx);
    1060     1081802 : }
    1061             : 
    1062             : void
    1063         410 : btree_xlog_startup(void)
    1064             : {
    1065         410 :     opCtx = AllocSetContextCreate(CurrentMemoryContext,
    1066             :                                   "Btree recovery temporary context",
    1067             :                                   ALLOCSET_DEFAULT_SIZES);
    1068         410 : }
    1069             : 
    1070             : void
    1071         296 : btree_xlog_cleanup(void)
    1072             : {
    1073         296 :     MemoryContextDelete(opCtx);
    1074         296 :     opCtx = NULL;
    1075         296 : }
    1076             : 
    1077             : /*
    1078             :  * Mask a btree page before performing consistency checks on it.
    1079             :  */
    1080             : void
    1081     1730216 : btree_mask(char *pagedata, BlockNumber blkno)
    1082             : {
    1083     1730216 :     Page        page = (Page) pagedata;
    1084             :     BTPageOpaque maskopaq;
    1085             : 
    1086     1730216 :     mask_page_lsn_and_checksum(page);
    1087             : 
    1088     1730216 :     mask_page_hint_bits(page);
    1089     1730216 :     mask_unused_space(page);
    1090             : 
    1091     1730216 :     maskopaq = BTPageGetOpaque(page);
    1092             : 
    1093     1730216 :     if (P_ISLEAF(maskopaq))
    1094             :     {
    1095             :         /*
    1096             :          * In btree leaf pages, it is possible to modify the LP_FLAGS without
    1097             :          * emitting any WAL record. Hence, mask the line pointer flags. See
    1098             :          * _bt_killitems(), _bt_check_unique() for details.
    1099             :          */
    1100     1722584 :         mask_lp_flags(page);
    1101             :     }
    1102             : 
    1103             :     /*
    1104             :      * BTP_HAS_GARBAGE is just an un-logged hint bit. So, mask it. See
    1105             :      * _bt_delete_or_dedup_one_page(), _bt_killitems(), and _bt_check_unique()
    1106             :      * for details.
    1107             :      */
    1108     1730216 :     maskopaq->btpo_flags &= ~BTP_HAS_GARBAGE;
    1109             : 
    1110             :     /*
    1111             :      * During replay of a btree page split, we don't set the BTP_SPLIT_END
    1112             :      * flag of the right sibling and initialize the cycle_id to 0 for the same
    1113             :      * page. See btree_xlog_split() for details.
    1114             :      */
    1115     1730216 :     maskopaq->btpo_flags &= ~BTP_SPLIT_END;
    1116     1730216 :     maskopaq->btpo_cycleid = 0;
    1117     1730216 : }

Generated by: LCOV version 1.16