LCOV - code coverage report
Current view: top level - src/backend/access/nbtree - nbtxlog.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 467 493 94.7 %
Date: 2025-01-18 05:15:39 Functions: 16 17 94.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * nbtxlog.c
       4             :  *    WAL replay logic for btrees.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/nbtree/nbtxlog.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/bufmask.h"
      18             : #include "access/nbtree.h"
      19             : #include "access/nbtxlog.h"
      20             : #include "access/transam.h"
      21             : #include "access/xlogutils.h"
      22             : #include "storage/standby.h"
      23             : #include "utils/memutils.h"
      24             : 
      25             : static MemoryContext opCtx;     /* working memory for operations */
      26             : 
      27             : /*
      28             :  * _bt_restore_page -- re-enter all the index tuples on a page
      29             :  *
      30             :  * The page is freshly init'd, and *from (length len) is a copy of what
      31             :  * had been its upper part (pd_upper to pd_special).  We assume that the
      32             :  * tuples had been added to the page in item-number order, and therefore
      33             :  * the one with highest item number appears first (lowest on the page).
      34             :  */
      35             : static void
      36        3202 : _bt_restore_page(Page page, char *from, int len)
      37             : {
      38             :     IndexTupleData itupdata;
      39             :     Size        itemsz;
      40        3202 :     char       *end = from + len;
      41             :     Item        items[MaxIndexTuplesPerPage];
      42             :     uint16      itemsizes[MaxIndexTuplesPerPage];
      43             :     int         i;
      44             :     int         nitems;
      45             : 
      46             :     /*
      47             :      * To get the items back in the original order, we add them to the page in
      48             :      * reverse.  To figure out where one tuple ends and another begins, we
      49             :      * have to scan them in forward order first.
      50             :      */
      51        3202 :     i = 0;
      52      205930 :     while (from < end)
      53             :     {
      54             :         /*
      55             :          * As we step through the items, 'from' won't always be properly
      56             :          * aligned, so we need to use memcpy().  Further, we use Item (which
      57             :          * is just a char*) here for our items array for the same reason;
      58             :          * wouldn't want the compiler or anyone thinking that an item is
      59             :          * aligned when it isn't.
      60             :          */
      61      202728 :         memcpy(&itupdata, from, sizeof(IndexTupleData));
      62      202728 :         itemsz = IndexTupleSize(&itupdata);
      63      202728 :         itemsz = MAXALIGN(itemsz);
      64             : 
      65      202728 :         items[i] = (Item) from;
      66      202728 :         itemsizes[i] = itemsz;
      67      202728 :         i++;
      68             : 
      69      202728 :         from += itemsz;
      70             :     }
      71        3202 :     nitems = i;
      72             : 
      73      205930 :     for (i = nitems - 1; i >= 0; i--)
      74             :     {
      75      202728 :         if (PageAddItem(page, items[i], itemsizes[i], nitems - i,
      76             :                         false, false) == InvalidOffsetNumber)
      77           0 :             elog(PANIC, "_bt_restore_page: cannot add item to page");
      78             :     }
      79        3202 : }
      80             : 
      81             : static void
      82        1334 : _bt_restore_meta(XLogReaderState *record, uint8 block_id)
      83             : {
      84        1334 :     XLogRecPtr  lsn = record->EndRecPtr;
      85             :     Buffer      metabuf;
      86             :     Page        metapg;
      87             :     BTMetaPageData *md;
      88             :     BTPageOpaque pageop;
      89             :     xl_btree_metadata *xlrec;
      90             :     char       *ptr;
      91             :     Size        len;
      92             : 
      93        1334 :     metabuf = XLogInitBufferForRedo(record, block_id);
      94        1334 :     ptr = XLogRecGetBlockData(record, block_id, &len);
      95             : 
      96             :     Assert(len == sizeof(xl_btree_metadata));
      97             :     Assert(BufferGetBlockNumber(metabuf) == BTREE_METAPAGE);
      98        1334 :     xlrec = (xl_btree_metadata *) ptr;
      99        1334 :     metapg = BufferGetPage(metabuf);
     100             : 
     101        1334 :     _bt_pageinit(metapg, BufferGetPageSize(metabuf));
     102             : 
     103        1334 :     md = BTPageGetMeta(metapg);
     104        1334 :     md->btm_magic = BTREE_MAGIC;
     105        1334 :     md->btm_version = xlrec->version;
     106        1334 :     md->btm_root = xlrec->root;
     107        1334 :     md->btm_level = xlrec->level;
     108        1334 :     md->btm_fastroot = xlrec->fastroot;
     109        1334 :     md->btm_fastlevel = xlrec->fastlevel;
     110             :     /* Cannot log BTREE_MIN_VERSION index metapage without upgrade */
     111             :     Assert(md->btm_version >= BTREE_NOVAC_VERSION);
     112        1334 :     md->btm_last_cleanup_num_delpages = xlrec->last_cleanup_num_delpages;
     113        1334 :     md->btm_last_cleanup_num_heap_tuples = -1.0;
     114        1334 :     md->btm_allequalimage = xlrec->allequalimage;
     115             : 
     116        1334 :     pageop = BTPageGetOpaque(metapg);
     117        1334 :     pageop->btpo_flags = BTP_META;
     118             : 
     119             :     /*
     120             :      * Set pd_lower just past the end of the metadata.  This is essential,
     121             :      * because without doing so, metadata will be lost if xlog.c compresses
     122             :      * the page.
     123             :      */
     124        1334 :     ((PageHeader) metapg)->pd_lower =
     125        1334 :         ((char *) md + sizeof(BTMetaPageData)) - (char *) metapg;
     126             : 
     127        1334 :     PageSetLSN(metapg, lsn);
     128        1334 :     MarkBufferDirty(metabuf);
     129        1334 :     UnlockReleaseBuffer(metabuf);
     130        1334 : }
     131             : 
     132             : /*
     133             :  * _bt_clear_incomplete_split -- clear INCOMPLETE_SPLIT flag on a page
     134             :  *
     135             :  * This is a common subroutine of the redo functions of all the WAL record
     136             :  * types that can insert a downlink: insert, split, and newroot.
     137             :  */
     138             : static void
     139        3100 : _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
     140             : {
     141        3100 :     XLogRecPtr  lsn = record->EndRecPtr;
     142             :     Buffer      buf;
     143             : 
     144        3100 :     if (XLogReadBufferForRedo(record, block_id, &buf) == BLK_NEEDS_REDO)
     145             :     {
     146        3100 :         Page        page = (Page) BufferGetPage(buf);
     147        3100 :         BTPageOpaque pageop = BTPageGetOpaque(page);
     148             : 
     149             :         Assert(P_INCOMPLETE_SPLIT(pageop));
     150        3100 :         pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
     151             : 
     152        3100 :         PageSetLSN(page, lsn);
     153        3100 :         MarkBufferDirty(buf);
     154             :     }
     155        3100 :     if (BufferIsValid(buf))
     156        3100 :         UnlockReleaseBuffer(buf);
     157        3100 : }
     158             : 
     159             : static void
     160     1020742 : btree_xlog_insert(bool isleaf, bool ismeta, bool posting,
     161             :                   XLogReaderState *record)
     162             : {
     163     1020742 :     XLogRecPtr  lsn = record->EndRecPtr;
     164     1020742 :     xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
     165             :     Buffer      buffer;
     166             :     Page        page;
     167             : 
     168             :     /*
     169             :      * Insertion to an internal page finishes an incomplete split at the child
     170             :      * level.  Clear the incomplete-split flag in the child.  Note: during
     171             :      * normal operation, the child and parent pages are locked at the same
     172             :      * time (the locks are coupled), so that clearing the flag and inserting
     173             :      * the downlink appear atomic to other backends.  We don't bother with
     174             :      * that during replay, because readers don't care about the
     175             :      * incomplete-split flag and there cannot be updates happening.
     176             :      */
     177     1020742 :     if (!isleaf)
     178        2896 :         _bt_clear_incomplete_split(record, 1);
     179     1020742 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
     180             :     {
     181             :         Size        datalen;
     182     1002464 :         char       *datapos = XLogRecGetBlockData(record, 0, &datalen);
     183             : 
     184     1002464 :         page = BufferGetPage(buffer);
     185             : 
     186     1002464 :         if (!posting)
     187             :         {
     188             :             /* Simple retail insertion */
     189      997134 :             if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
     190             :                             false, false) == InvalidOffsetNumber)
     191           0 :                 elog(PANIC, "failed to add new item");
     192             :         }
     193             :         else
     194             :         {
     195             :             ItemId      itemid;
     196             :             IndexTuple  oposting,
     197             :                         newitem,
     198             :                         nposting;
     199             :             uint16      postingoff;
     200             : 
     201             :             /*
     202             :              * A posting list split occurred during leaf page insertion.  WAL
     203             :              * record data will start with an offset number representing the
     204             :              * point in an existing posting list that a split occurs at.
     205             :              *
     206             :              * Use _bt_swap_posting() to repeat posting list split steps from
     207             :              * primary.  Note that newitem from WAL record is 'orignewitem',
     208             :              * not the final version of newitem that is actually inserted on
     209             :              * page.
     210             :              */
     211        5330 :             postingoff = *((uint16 *) datapos);
     212        5330 :             datapos += sizeof(uint16);
     213        5330 :             datalen -= sizeof(uint16);
     214             : 
     215        5330 :             itemid = PageGetItemId(page, OffsetNumberPrev(xlrec->offnum));
     216        5330 :             oposting = (IndexTuple) PageGetItem(page, itemid);
     217             : 
     218             :             /* Use mutable, aligned newitem copy in _bt_swap_posting() */
     219             :             Assert(isleaf && postingoff > 0);
     220        5330 :             newitem = CopyIndexTuple((IndexTuple) datapos);
     221        5330 :             nposting = _bt_swap_posting(newitem, oposting, postingoff);
     222             : 
     223             :             /* Replace existing posting list with post-split version */
     224        5330 :             memcpy(oposting, nposting, MAXALIGN(IndexTupleSize(nposting)));
     225             : 
     226             :             /* Insert "final" new item (not orignewitem from WAL stream) */
     227             :             Assert(IndexTupleSize(newitem) == datalen);
     228        5330 :             if (PageAddItem(page, (Item) newitem, datalen, xlrec->offnum,
     229             :                             false, false) == InvalidOffsetNumber)
     230           0 :                 elog(PANIC, "failed to add posting split new item");
     231             :         }
     232             : 
     233     1002464 :         PageSetLSN(page, lsn);
     234     1002464 :         MarkBufferDirty(buffer);
     235             :     }
     236     1020742 :     if (BufferIsValid(buffer))
     237     1020742 :         UnlockReleaseBuffer(buffer);
     238             : 
     239             :     /*
     240             :      * Note: in normal operation, we'd update the metapage while still holding
     241             :      * lock on the page we inserted into.  But during replay it's not
     242             :      * necessary to hold that lock, since no other index updates can be
     243             :      * happening concurrently, and readers will cope fine with following an
     244             :      * obsolete link from the metapage.
     245             :      */
     246     1020742 :     if (ismeta)
     247           8 :         _bt_restore_meta(record, 2);
     248     1020742 : }
     249             : 
     250             : static void
     251        3100 : btree_xlog_split(bool newitemonleft, XLogReaderState *record)
     252             : {
     253        3100 :     XLogRecPtr  lsn = record->EndRecPtr;
     254        3100 :     xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
     255        3100 :     bool        isleaf = (xlrec->level == 0);
     256             :     Buffer      buf;
     257             :     Buffer      rbuf;
     258             :     Page        rpage;
     259             :     BTPageOpaque ropaque;
     260             :     char       *datapos;
     261             :     Size        datalen;
     262             :     BlockNumber origpagenumber;
     263             :     BlockNumber rightpagenumber;
     264             :     BlockNumber spagenumber;
     265             : 
     266        3100 :     XLogRecGetBlockTag(record, 0, NULL, NULL, &origpagenumber);
     267        3100 :     XLogRecGetBlockTag(record, 1, NULL, NULL, &rightpagenumber);
     268        3100 :     if (!XLogRecGetBlockTagExtended(record, 2, NULL, NULL, &spagenumber, NULL))
     269        1910 :         spagenumber = P_NONE;
     270             : 
     271             :     /*
     272             :      * Clear the incomplete split flag on the appropriate child page one level
     273             :      * down when origpage/buf is an internal page (there must have been
     274             :      * cascading page splits during original execution in the event of an
     275             :      * internal page split).  This is like the corresponding btree_xlog_insert
     276             :      * call for internal pages.  We're not clearing the incomplete split flag
     277             :      * for the current page split here (you can think of this as part of the
     278             :      * insert of newitem that the page split action needs to perform in
     279             :      * passing).
     280             :      *
     281             :      * Like in btree_xlog_insert, this can be done before locking other pages.
     282             :      * We never need to couple cross-level locks in REDO routines.
     283             :      */
     284        3100 :     if (!isleaf)
     285         102 :         _bt_clear_incomplete_split(record, 3);
     286             : 
     287             :     /* Reconstruct right (new) sibling page from scratch */
     288        3100 :     rbuf = XLogInitBufferForRedo(record, 1);
     289        3100 :     datapos = XLogRecGetBlockData(record, 1, &datalen);
     290        3100 :     rpage = (Page) BufferGetPage(rbuf);
     291             : 
     292        3100 :     _bt_pageinit(rpage, BufferGetPageSize(rbuf));
     293        3100 :     ropaque = BTPageGetOpaque(rpage);
     294             : 
     295        3100 :     ropaque->btpo_prev = origpagenumber;
     296        3100 :     ropaque->btpo_next = spagenumber;
     297        3100 :     ropaque->btpo_level = xlrec->level;
     298        3100 :     ropaque->btpo_flags = isleaf ? BTP_LEAF : 0;
     299        3100 :     ropaque->btpo_cycleid = 0;
     300             : 
     301        3100 :     _bt_restore_page(rpage, datapos, datalen);
     302             : 
     303        3100 :     PageSetLSN(rpage, lsn);
     304        3100 :     MarkBufferDirty(rbuf);
     305             : 
     306             :     /* Now reconstruct original page (left half of split) */
     307        3100 :     if (XLogReadBufferForRedo(record, 0, &buf) == BLK_NEEDS_REDO)
     308             :     {
     309             :         /*
     310             :          * To retain the same physical order of the tuples that they had, we
     311             :          * initialize a temporary empty page for the left page and add all the
     312             :          * items to that in item number order.  This mirrors how _bt_split()
     313             :          * works.  Retaining the same physical order makes WAL consistency
     314             :          * checking possible.  See also _bt_restore_page(), which does the
     315             :          * same for the right page.
     316             :          */
     317        3028 :         Page        origpage = (Page) BufferGetPage(buf);
     318        3028 :         BTPageOpaque oopaque = BTPageGetOpaque(origpage);
     319             :         OffsetNumber off;
     320        3028 :         IndexTuple  newitem = NULL,
     321        3028 :                     left_hikey = NULL,
     322        3028 :                     nposting = NULL;
     323        3028 :         Size        newitemsz = 0,
     324        3028 :                     left_hikeysz = 0;
     325             :         Page        leftpage;
     326             :         OffsetNumber leftoff,
     327        3028 :                     replacepostingoff = InvalidOffsetNumber;
     328             : 
     329        3028 :         datapos = XLogRecGetBlockData(record, 0, &datalen);
     330             : 
     331        3028 :         if (newitemonleft || xlrec->postingoff != 0)
     332             :         {
     333         338 :             newitem = (IndexTuple) datapos;
     334         338 :             newitemsz = MAXALIGN(IndexTupleSize(newitem));
     335         338 :             datapos += newitemsz;
     336         338 :             datalen -= newitemsz;
     337             : 
     338         338 :             if (xlrec->postingoff != 0)
     339             :             {
     340             :                 ItemId      itemid;
     341             :                 IndexTuple  oposting;
     342             : 
     343             :                 /* Posting list must be at offset number before new item's */
     344           6 :                 replacepostingoff = OffsetNumberPrev(xlrec->newitemoff);
     345             : 
     346             :                 /* Use mutable, aligned newitem copy in _bt_swap_posting() */
     347           6 :                 newitem = CopyIndexTuple(newitem);
     348           6 :                 itemid = PageGetItemId(origpage, replacepostingoff);
     349           6 :                 oposting = (IndexTuple) PageGetItem(origpage, itemid);
     350           6 :                 nposting = _bt_swap_posting(newitem, oposting,
     351           6 :                                             xlrec->postingoff);
     352             :             }
     353             :         }
     354             : 
     355             :         /*
     356             :          * Extract left hikey and its size.  We assume that 16-bit alignment
     357             :          * is enough to apply IndexTupleSize (since it's fetching from a
     358             :          * uint16 field).
     359             :          */
     360        3028 :         left_hikey = (IndexTuple) datapos;
     361        3028 :         left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
     362        3028 :         datapos += left_hikeysz;
     363        3028 :         datalen -= left_hikeysz;
     364             : 
     365             :         Assert(datalen == 0);
     366             : 
     367        3028 :         leftpage = PageGetTempPageCopySpecial(origpage);
     368             : 
     369             :         /* Add high key tuple from WAL record to temp page */
     370        3028 :         leftoff = P_HIKEY;
     371        3028 :         if (PageAddItem(leftpage, (Item) left_hikey, left_hikeysz, P_HIKEY,
     372             :                         false, false) == InvalidOffsetNumber)
     373           0 :             elog(ERROR, "failed to add high key to left page after split");
     374        3028 :         leftoff = OffsetNumberNext(leftoff);
     375             : 
     376      677260 :         for (off = P_FIRSTDATAKEY(oopaque); off < xlrec->firstrightoff; off++)
     377             :         {
     378             :             ItemId      itemid;
     379             :             Size        itemsz;
     380             :             IndexTuple  item;
     381             : 
     382             :             /* Add replacement posting list when required */
     383      674232 :             if (off == replacepostingoff)
     384             :             {
     385             :                 Assert(newitemonleft ||
     386             :                        xlrec->firstrightoff == xlrec->newitemoff);
     387           6 :                 if (PageAddItem(leftpage, (Item) nposting,
     388             :                                 MAXALIGN(IndexTupleSize(nposting)), leftoff,
     389             :                                 false, false) == InvalidOffsetNumber)
     390           0 :                     elog(ERROR, "failed to add new posting list item to left page after split");
     391           6 :                 leftoff = OffsetNumberNext(leftoff);
     392           6 :                 continue;       /* don't insert oposting */
     393             :             }
     394             : 
     395             :             /* add the new item if it was inserted on left page */
     396      674226 :             else if (newitemonleft && off == xlrec->newitemoff)
     397             :             {
     398         300 :                 if (PageAddItem(leftpage, (Item) newitem, newitemsz, leftoff,
     399             :                                 false, false) == InvalidOffsetNumber)
     400           0 :                     elog(ERROR, "failed to add new item to left page after split");
     401         300 :                 leftoff = OffsetNumberNext(leftoff);
     402             :             }
     403             : 
     404      674226 :             itemid = PageGetItemId(origpage, off);
     405      674226 :             itemsz = ItemIdGetLength(itemid);
     406      674226 :             item = (IndexTuple) PageGetItem(origpage, itemid);
     407      674226 :             if (PageAddItem(leftpage, (Item) item, itemsz, leftoff,
     408             :                             false, false) == InvalidOffsetNumber)
     409           0 :                 elog(ERROR, "failed to add old item to left page after split");
     410      674226 :             leftoff = OffsetNumberNext(leftoff);
     411             :         }
     412             : 
     413             :         /* cope with possibility that newitem goes at the end */
     414        3028 :         if (newitemonleft && off == xlrec->newitemoff)
     415             :         {
     416          36 :             if (PageAddItem(leftpage, (Item) newitem, newitemsz, leftoff,
     417             :                             false, false) == InvalidOffsetNumber)
     418           0 :                 elog(ERROR, "failed to add new item to left page after split");
     419          36 :             leftoff = OffsetNumberNext(leftoff);
     420             :         }
     421             : 
     422        3028 :         PageRestoreTempPage(leftpage, origpage);
     423             : 
     424             :         /* Fix opaque fields */
     425        3028 :         oopaque->btpo_flags = BTP_INCOMPLETE_SPLIT;
     426        3028 :         if (isleaf)
     427        2926 :             oopaque->btpo_flags |= BTP_LEAF;
     428        3028 :         oopaque->btpo_next = rightpagenumber;
     429        3028 :         oopaque->btpo_cycleid = 0;
     430             : 
     431        3028 :         PageSetLSN(origpage, lsn);
     432        3028 :         MarkBufferDirty(buf);
     433             :     }
     434             : 
     435             :     /* Fix left-link of the page to the right of the new right sibling */
     436        3100 :     if (spagenumber != P_NONE)
     437             :     {
     438             :         Buffer      sbuf;
     439             : 
     440        1190 :         if (XLogReadBufferForRedo(record, 2, &sbuf) == BLK_NEEDS_REDO)
     441             :         {
     442         626 :             Page        spage = (Page) BufferGetPage(sbuf);
     443         626 :             BTPageOpaque spageop = BTPageGetOpaque(spage);
     444             : 
     445         626 :             spageop->btpo_prev = rightpagenumber;
     446             : 
     447         626 :             PageSetLSN(spage, lsn);
     448         626 :             MarkBufferDirty(sbuf);
     449             :         }
     450        1190 :         if (BufferIsValid(sbuf))
     451        1190 :             UnlockReleaseBuffer(sbuf);
     452             :     }
     453             : 
     454             :     /*
     455             :      * Finally, release the remaining buffers.  sbuf, rbuf, and buf must be
     456             :      * released together, so that readers cannot observe inconsistencies.
     457             :      */
     458        3100 :     UnlockReleaseBuffer(rbuf);
     459        3100 :     if (BufferIsValid(buf))
     460        3100 :         UnlockReleaseBuffer(buf);
     461        3100 : }
     462             : 
     463             : static void
     464        4410 : btree_xlog_dedup(XLogReaderState *record)
     465             : {
     466        4410 :     XLogRecPtr  lsn = record->EndRecPtr;
     467        4410 :     xl_btree_dedup *xlrec = (xl_btree_dedup *) XLogRecGetData(record);
     468             :     Buffer      buf;
     469             : 
     470        4410 :     if (XLogReadBufferForRedo(record, 0, &buf) == BLK_NEEDS_REDO)
     471             :     {
     472        4370 :         char       *ptr = XLogRecGetBlockData(record, 0, NULL);
     473        4370 :         Page        page = (Page) BufferGetPage(buf);
     474        4370 :         BTPageOpaque opaque = BTPageGetOpaque(page);
     475             :         OffsetNumber offnum,
     476             :                     minoff,
     477             :                     maxoff;
     478             :         BTDedupState state;
     479             :         BTDedupInterval *intervals;
     480             :         Page        newpage;
     481             : 
     482        4370 :         state = (BTDedupState) palloc(sizeof(BTDedupStateData));
     483        4370 :         state->deduplicate = true;   /* unused */
     484        4370 :         state->nmaxitems = 0;    /* unused */
     485             :         /* Conservatively use larger maxpostingsize than primary */
     486        4370 :         state->maxpostingsize = BTMaxItemSize(page);
     487        4370 :         state->base = NULL;
     488        4370 :         state->baseoff = InvalidOffsetNumber;
     489        4370 :         state->basetupsize = 0;
     490        4370 :         state->htids = palloc(state->maxpostingsize);
     491        4370 :         state->nhtids = 0;
     492        4370 :         state->nitems = 0;
     493        4370 :         state->phystupsize = 0;
     494        4370 :         state->nintervals = 0;
     495             : 
     496        4370 :         minoff = P_FIRSTDATAKEY(opaque);
     497        4370 :         maxoff = PageGetMaxOffsetNumber(page);
     498        4370 :         newpage = PageGetTempPageCopySpecial(page);
     499             : 
     500        4370 :         if (!P_RIGHTMOST(opaque))
     501             :         {
     502        3768 :             ItemId      itemid = PageGetItemId(page, P_HIKEY);
     503        3768 :             Size        itemsz = ItemIdGetLength(itemid);
     504        3768 :             IndexTuple  item = (IndexTuple) PageGetItem(page, itemid);
     505             : 
     506        3768 :             if (PageAddItem(newpage, (Item) item, itemsz, P_HIKEY,
     507             :                             false, false) == InvalidOffsetNumber)
     508           0 :                 elog(ERROR, "deduplication failed to add highkey");
     509             :         }
     510             : 
     511        4370 :         intervals = (BTDedupInterval *) ptr;
     512     1006794 :         for (offnum = minoff;
     513             :              offnum <= maxoff;
     514     1002424 :              offnum = OffsetNumberNext(offnum))
     515             :         {
     516     1002424 :             ItemId      itemid = PageGetItemId(page, offnum);
     517     1002424 :             IndexTuple  itup = (IndexTuple) PageGetItem(page, itemid);
     518             : 
     519     1002424 :             if (offnum == minoff)
     520        4370 :                 _bt_dedup_start_pending(state, itup, offnum);
     521      998054 :             else if (state->nintervals < xlrec->nintervals &&
     522      751558 :                      state->baseoff == intervals[state->nintervals].baseoff &&
     523      257980 :                      state->nitems < intervals[state->nintervals].nitems)
     524             :             {
     525      171972 :                 if (!_bt_dedup_save_htid(state, itup))
     526           0 :                     elog(ERROR, "deduplication failed to add heap tid to pending posting list");
     527             :             }
     528             :             else
     529             :             {
     530      826082 :                 _bt_dedup_finish_pending(newpage, state);
     531      826082 :                 _bt_dedup_start_pending(state, itup, offnum);
     532             :             }
     533             :         }
     534             : 
     535        4370 :         _bt_dedup_finish_pending(newpage, state);
     536             :         Assert(state->nintervals == xlrec->nintervals);
     537             :         Assert(memcmp(state->intervals, intervals,
     538             :                       state->nintervals * sizeof(BTDedupInterval)) == 0);
     539             : 
     540        4370 :         if (P_HAS_GARBAGE(opaque))
     541             :         {
     542           0 :             BTPageOpaque nopaque = BTPageGetOpaque(newpage);
     543             : 
     544           0 :             nopaque->btpo_flags &= ~BTP_HAS_GARBAGE;
     545             :         }
     546             : 
     547        4370 :         PageRestoreTempPage(newpage, page);
     548        4370 :         PageSetLSN(page, lsn);
     549        4370 :         MarkBufferDirty(buf);
     550             :     }
     551             : 
     552        4410 :     if (BufferIsValid(buf))
     553        4410 :         UnlockReleaseBuffer(buf);
     554        4410 : }
     555             : 
     556             : static void
     557         234 : btree_xlog_updates(Page page, OffsetNumber *updatedoffsets,
     558             :                    xl_btree_update *updates, int nupdated)
     559             : {
     560             :     BTVacuumPosting vacposting;
     561             :     IndexTuple  origtuple;
     562             :     ItemId      itemid;
     563             :     Size        itemsz;
     564             : 
     565        6296 :     for (int i = 0; i < nupdated; i++)
     566             :     {
     567        6062 :         itemid = PageGetItemId(page, updatedoffsets[i]);
     568        6062 :         origtuple = (IndexTuple) PageGetItem(page, itemid);
     569             : 
     570        6062 :         vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
     571        6062 :                             updates->ndeletedtids * sizeof(uint16));
     572        6062 :         vacposting->updatedoffset = updatedoffsets[i];
     573        6062 :         vacposting->itup = origtuple;
     574        6062 :         vacposting->ndeletedtids = updates->ndeletedtids;
     575        6062 :         memcpy(vacposting->deletetids,
     576             :                (char *) updates + SizeOfBtreeUpdate,
     577        6062 :                updates->ndeletedtids * sizeof(uint16));
     578             : 
     579        6062 :         _bt_update_posting(vacposting);
     580             : 
     581             :         /* Overwrite updated version of tuple */
     582        6062 :         itemsz = MAXALIGN(IndexTupleSize(vacposting->itup));
     583        6062 :         if (!PageIndexTupleOverwrite(page, updatedoffsets[i],
     584        6062 :                                      (Item) vacposting->itup, itemsz))
     585           0 :             elog(PANIC, "failed to update partially dead item");
     586             : 
     587        6062 :         pfree(vacposting->itup);
     588        6062 :         pfree(vacposting);
     589             : 
     590             :         /* advance to next xl_btree_update from array */
     591        6062 :         updates = (xl_btree_update *)
     592        6062 :             ((char *) updates + SizeOfBtreeUpdate +
     593        6062 :              updates->ndeletedtids * sizeof(uint16));
     594             :     }
     595         234 : }
     596             : 
     597             : static void
     598        1916 : btree_xlog_vacuum(XLogReaderState *record)
     599             : {
     600        1916 :     XLogRecPtr  lsn = record->EndRecPtr;
     601        1916 :     xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
     602             :     Buffer      buffer;
     603             :     Page        page;
     604             :     BTPageOpaque opaque;
     605             : 
     606             :     /*
     607             :      * We need to take a cleanup lock here, just like btvacuumpage(). However,
     608             :      * it isn't necessary to exhaustively get a cleanup lock on every block in
     609             :      * the index during recovery (just getting a cleanup lock on pages with
     610             :      * items to kill suffices).  See nbtree/README for details.
     611             :      */
     612        1916 :     if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer)
     613             :         == BLK_NEEDS_REDO)
     614             :     {
     615        1376 :         char       *ptr = XLogRecGetBlockData(record, 0, NULL);
     616             : 
     617        1376 :         page = (Page) BufferGetPage(buffer);
     618             : 
     619        1376 :         if (xlrec->nupdated > 0)
     620             :         {
     621             :             OffsetNumber *updatedoffsets;
     622             :             xl_btree_update *updates;
     623             : 
     624          30 :             updatedoffsets = (OffsetNumber *)
     625          30 :                 (ptr + xlrec->ndeleted * sizeof(OffsetNumber));
     626          30 :             updates = (xl_btree_update *) ((char *) updatedoffsets +
     627          30 :                                            xlrec->nupdated *
     628             :                                            sizeof(OffsetNumber));
     629             : 
     630          30 :             btree_xlog_updates(page, updatedoffsets, updates, xlrec->nupdated);
     631             :         }
     632             : 
     633        1376 :         if (xlrec->ndeleted > 0)
     634        1376 :             PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
     635             : 
     636             :         /*
     637             :          * Clear the vacuum cycle ID, and mark the page as not containing any
     638             :          * LP_DEAD items
     639             :          */
     640        1376 :         opaque = BTPageGetOpaque(page);
     641        1376 :         opaque->btpo_cycleid = 0;
     642        1376 :         opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
     643             : 
     644        1376 :         PageSetLSN(page, lsn);
     645        1376 :         MarkBufferDirty(buffer);
     646             :     }
     647        1916 :     if (BufferIsValid(buffer))
     648        1916 :         UnlockReleaseBuffer(buffer);
     649        1916 : }
     650             : 
     651             : static void
     652        1652 : btree_xlog_delete(XLogReaderState *record)
     653             : {
     654        1652 :     XLogRecPtr  lsn = record->EndRecPtr;
     655        1652 :     xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
     656             :     Buffer      buffer;
     657             :     Page        page;
     658             :     BTPageOpaque opaque;
     659             : 
     660             :     /*
     661             :      * If we have any conflict processing to do, it must happen before we
     662             :      * update the page
     663             :      */
     664        1652 :     if (InHotStandby)
     665             :     {
     666             :         RelFileLocator rlocator;
     667             : 
     668        1648 :         XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
     669             : 
     670        1648 :         ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
     671        1648 :                                             xlrec->isCatalogRel,
     672             :                                             rlocator);
     673             :     }
     674             : 
     675             :     /*
     676             :      * We don't need to take a cleanup lock to apply these changes. See
     677             :      * nbtree/README for details.
     678             :      */
     679        1652 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
     680             :     {
     681        1610 :         char       *ptr = XLogRecGetBlockData(record, 0, NULL);
     682             : 
     683        1610 :         page = (Page) BufferGetPage(buffer);
     684             : 
     685        1610 :         if (xlrec->nupdated > 0)
     686             :         {
     687             :             OffsetNumber *updatedoffsets;
     688             :             xl_btree_update *updates;
     689             : 
     690         204 :             updatedoffsets = (OffsetNumber *)
     691         204 :                 (ptr + xlrec->ndeleted * sizeof(OffsetNumber));
     692         204 :             updates = (xl_btree_update *) ((char *) updatedoffsets +
     693         204 :                                            xlrec->nupdated *
     694             :                                            sizeof(OffsetNumber));
     695             : 
     696         204 :             btree_xlog_updates(page, updatedoffsets, updates, xlrec->nupdated);
     697             :         }
     698             : 
     699        1610 :         if (xlrec->ndeleted > 0)
     700        1574 :             PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
     701             : 
     702             :         /*
     703             :          * Do *not* clear the vacuum cycle ID, but do mark the page as not
     704             :          * containing any LP_DEAD items
     705             :          */
     706        1610 :         opaque = BTPageGetOpaque(page);
     707        1610 :         opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
     708             : 
     709        1610 :         PageSetLSN(page, lsn);
     710        1610 :         MarkBufferDirty(buffer);
     711             :     }
     712        1652 :     if (BufferIsValid(buffer))
     713        1652 :         UnlockReleaseBuffer(buffer);
     714        1652 : }
     715             : 
     716             : static void
     717        1238 : btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
     718             : {
     719        1238 :     XLogRecPtr  lsn = record->EndRecPtr;
     720        1238 :     xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) XLogRecGetData(record);
     721             :     Buffer      buffer;
     722             :     Page        page;
     723             :     BTPageOpaque pageop;
     724             :     IndexTupleData trunctuple;
     725             : 
     726             :     /*
     727             :      * In normal operation, we would lock all the pages this WAL record
     728             :      * touches before changing any of them.  In WAL replay, it should be okay
     729             :      * to lock just one page at a time, since no concurrent index updates can
     730             :      * be happening, and readers should not care whether they arrive at the
     731             :      * target page or not (since it's surely empty).
     732             :      */
     733             : 
     734             :     /* to-be-deleted subtree's parent page */
     735        1238 :     if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
     736             :     {
     737             :         OffsetNumber poffset;
     738             :         ItemId      itemid;
     739             :         IndexTuple  itup;
     740             :         OffsetNumber nextoffset;
     741             :         BlockNumber rightsib;
     742             : 
     743        1226 :         page = (Page) BufferGetPage(buffer);
     744        1226 :         pageop = BTPageGetOpaque(page);
     745             : 
     746        1226 :         poffset = xlrec->poffset;
     747             : 
     748        1226 :         nextoffset = OffsetNumberNext(poffset);
     749        1226 :         itemid = PageGetItemId(page, nextoffset);
     750        1226 :         itup = (IndexTuple) PageGetItem(page, itemid);
     751        1226 :         rightsib = BTreeTupleGetDownLink(itup);
     752             : 
     753        1226 :         itemid = PageGetItemId(page, poffset);
     754        1226 :         itup = (IndexTuple) PageGetItem(page, itemid);
     755        1226 :         BTreeTupleSetDownLink(itup, rightsib);
     756        1226 :         nextoffset = OffsetNumberNext(poffset);
     757        1226 :         PageIndexTupleDelete(page, nextoffset);
     758             : 
     759        1226 :         PageSetLSN(page, lsn);
     760        1226 :         MarkBufferDirty(buffer);
     761             :     }
     762             : 
     763             :     /*
     764             :      * Don't need to couple cross-level locks in REDO routines, so release
     765             :      * lock on internal page immediately
     766             :      */
     767        1238 :     if (BufferIsValid(buffer))
     768        1238 :         UnlockReleaseBuffer(buffer);
     769             : 
     770             :     /* Rewrite the leaf page as a halfdead page */
     771        1238 :     buffer = XLogInitBufferForRedo(record, 0);
     772        1238 :     page = (Page) BufferGetPage(buffer);
     773             : 
     774        1238 :     _bt_pageinit(page, BufferGetPageSize(buffer));
     775        1238 :     pageop = BTPageGetOpaque(page);
     776             : 
     777        1238 :     pageop->btpo_prev = xlrec->leftblk;
     778        1238 :     pageop->btpo_next = xlrec->rightblk;
     779        1238 :     pageop->btpo_level = 0;
     780        1238 :     pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF;
     781        1238 :     pageop->btpo_cycleid = 0;
     782             : 
     783             :     /*
     784             :      * Construct a dummy high key item that points to top parent page (value
     785             :      * is InvalidBlockNumber when the top parent page is the leaf page itself)
     786             :      */
     787        1238 :     MemSet(&trunctuple, 0, sizeof(IndexTupleData));
     788        1238 :     trunctuple.t_info = sizeof(IndexTupleData);
     789        1238 :     BTreeTupleSetTopParent(&trunctuple, xlrec->topparent);
     790             : 
     791        1238 :     if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
     792             :                     false, false) == InvalidOffsetNumber)
     793           0 :         elog(ERROR, "could not add dummy high key to half-dead page");
     794             : 
     795        1238 :     PageSetLSN(page, lsn);
     796        1238 :     MarkBufferDirty(buffer);
     797        1238 :     UnlockReleaseBuffer(buffer);
     798        1238 : }
     799             : 
     800             : 
     801             : static void
     802        1330 : btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
     803             : {
     804        1330 :     XLogRecPtr  lsn = record->EndRecPtr;
     805        1330 :     xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) XLogRecGetData(record);
     806             :     BlockNumber leftsib;
     807             :     BlockNumber rightsib;
     808             :     uint32      level;
     809             :     bool        isleaf;
     810             :     FullTransactionId safexid;
     811             :     Buffer      leftbuf;
     812             :     Buffer      target;
     813             :     Buffer      rightbuf;
     814             :     Page        page;
     815             :     BTPageOpaque pageop;
     816             : 
     817        1330 :     leftsib = xlrec->leftsib;
     818        1330 :     rightsib = xlrec->rightsib;
     819        1330 :     level = xlrec->level;
     820        1330 :     isleaf = (level == 0);
     821        1330 :     safexid = xlrec->safexid;
     822             : 
     823             :     /* No leaftopparent for level 0 (leaf page) or level 1 target */
     824             :     Assert(!BlockNumberIsValid(xlrec->leaftopparent) || level > 1);
     825             : 
     826             :     /*
     827             :      * In normal operation, we would lock all the pages this WAL record
     828             :      * touches before changing any of them.  In WAL replay, we at least lock
     829             :      * the pages in the same standard left-to-right order (leftsib, target,
     830             :      * rightsib), and don't release the sibling locks until the target is
     831             :      * marked deleted.
     832             :      */
     833             : 
     834             :     /* Fix right-link of left sibling, if any */
     835        1330 :     if (leftsib != P_NONE)
     836             :     {
     837         106 :         if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
     838             :         {
     839         106 :             page = (Page) BufferGetPage(leftbuf);
     840         106 :             pageop = BTPageGetOpaque(page);
     841         106 :             pageop->btpo_next = rightsib;
     842             : 
     843         106 :             PageSetLSN(page, lsn);
     844         106 :             MarkBufferDirty(leftbuf);
     845             :         }
     846             :     }
     847             :     else
     848        1224 :         leftbuf = InvalidBuffer;
     849             : 
     850             :     /* Rewrite target page as empty deleted page */
     851        1330 :     target = XLogInitBufferForRedo(record, 0);
     852        1330 :     page = (Page) BufferGetPage(target);
     853             : 
     854        1330 :     _bt_pageinit(page, BufferGetPageSize(target));
     855        1330 :     pageop = BTPageGetOpaque(page);
     856             : 
     857        1330 :     pageop->btpo_prev = leftsib;
     858        1330 :     pageop->btpo_next = rightsib;
     859        1330 :     pageop->btpo_level = level;
     860        1330 :     BTPageSetDeleted(page, safexid);
     861        1330 :     if (isleaf)
     862        1236 :         pageop->btpo_flags |= BTP_LEAF;
     863        1330 :     pageop->btpo_cycleid = 0;
     864             : 
     865        1330 :     PageSetLSN(page, lsn);
     866        1330 :     MarkBufferDirty(target);
     867             : 
     868             :     /* Fix left-link of right sibling */
     869        1330 :     if (XLogReadBufferForRedo(record, 2, &rightbuf) == BLK_NEEDS_REDO)
     870             :     {
     871         160 :         page = (Page) BufferGetPage(rightbuf);
     872         160 :         pageop = BTPageGetOpaque(page);
     873         160 :         pageop->btpo_prev = leftsib;
     874             : 
     875         160 :         PageSetLSN(page, lsn);
     876         160 :         MarkBufferDirty(rightbuf);
     877             :     }
     878             : 
     879             :     /* Release siblings */
     880        1330 :     if (BufferIsValid(leftbuf))
     881         106 :         UnlockReleaseBuffer(leftbuf);
     882        1330 :     if (BufferIsValid(rightbuf))
     883        1330 :         UnlockReleaseBuffer(rightbuf);
     884             : 
     885             :     /* Release target */
     886        1330 :     UnlockReleaseBuffer(target);
     887             : 
     888             :     /*
     889             :      * If we deleted a parent of the targeted leaf page, instead of the leaf
     890             :      * itself, update the leaf to point to the next remaining child in the
     891             :      * to-be-deleted subtree
     892             :      */
     893        1330 :     if (XLogRecHasBlockRef(record, 3))
     894             :     {
     895             :         /*
     896             :          * There is no real data on the page, so we just re-create it from
     897             :          * scratch using the information from the WAL record.
     898             :          *
     899             :          * Note that we don't end up here when the target page is also the
     900             :          * leafbuf page.  There is no need to add a dummy hikey item with a
     901             :          * top parent link when deleting leafbuf because it's the last page
     902             :          * we'll delete in the subtree undergoing deletion.
     903             :          */
     904             :         Buffer      leafbuf;
     905             :         IndexTupleData trunctuple;
     906             : 
     907             :         Assert(!isleaf);
     908             : 
     909          94 :         leafbuf = XLogInitBufferForRedo(record, 3);
     910          94 :         page = (Page) BufferGetPage(leafbuf);
     911             : 
     912          94 :         _bt_pageinit(page, BufferGetPageSize(leafbuf));
     913          94 :         pageop = BTPageGetOpaque(page);
     914             : 
     915          94 :         pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF;
     916          94 :         pageop->btpo_prev = xlrec->leafleftsib;
     917          94 :         pageop->btpo_next = xlrec->leafrightsib;
     918          94 :         pageop->btpo_level = 0;
     919          94 :         pageop->btpo_cycleid = 0;
     920             : 
     921             :         /* Add a dummy hikey item */
     922         188 :         MemSet(&trunctuple, 0, sizeof(IndexTupleData));
     923          94 :         trunctuple.t_info = sizeof(IndexTupleData);
     924          94 :         BTreeTupleSetTopParent(&trunctuple, xlrec->leaftopparent);
     925             : 
     926          94 :         if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
     927             :                         false, false) == InvalidOffsetNumber)
     928           0 :             elog(ERROR, "could not add dummy high key to half-dead page");
     929             : 
     930          94 :         PageSetLSN(page, lsn);
     931          94 :         MarkBufferDirty(leafbuf);
     932          94 :         UnlockReleaseBuffer(leafbuf);
     933             :     }
     934             : 
     935             :     /* Update metapage if needed */
     936        1330 :     if (info == XLOG_BTREE_UNLINK_PAGE_META)
     937          18 :         _bt_restore_meta(record, 4);
     938        1330 : }
     939             : 
     940             : static void
     941        1284 : btree_xlog_newroot(XLogReaderState *record)
     942             : {
     943        1284 :     XLogRecPtr  lsn = record->EndRecPtr;
     944        1284 :     xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record);
     945             :     Buffer      buffer;
     946             :     Page        page;
     947             :     BTPageOpaque pageop;
     948             :     char       *ptr;
     949             :     Size        len;
     950             : 
     951        1284 :     buffer = XLogInitBufferForRedo(record, 0);
     952        1284 :     page = (Page) BufferGetPage(buffer);
     953             : 
     954        1284 :     _bt_pageinit(page, BufferGetPageSize(buffer));
     955        1284 :     pageop = BTPageGetOpaque(page);
     956             : 
     957        1284 :     pageop->btpo_flags = BTP_ROOT;
     958        1284 :     pageop->btpo_prev = pageop->btpo_next = P_NONE;
     959        1284 :     pageop->btpo_level = xlrec->level;
     960        1284 :     if (xlrec->level == 0)
     961        1182 :         pageop->btpo_flags |= BTP_LEAF;
     962        1284 :     pageop->btpo_cycleid = 0;
     963             : 
     964        1284 :     if (xlrec->level > 0)
     965             :     {
     966         102 :         ptr = XLogRecGetBlockData(record, 0, &len);
     967         102 :         _bt_restore_page(page, ptr, len);
     968             : 
     969             :         /* Clear the incomplete-split flag in left child */
     970         102 :         _bt_clear_incomplete_split(record, 1);
     971             :     }
     972             : 
     973        1284 :     PageSetLSN(page, lsn);
     974        1284 :     MarkBufferDirty(buffer);
     975        1284 :     UnlockReleaseBuffer(buffer);
     976             : 
     977        1284 :     _bt_restore_meta(record, 2);
     978        1284 : }
     979             : 
     980             : /*
     981             :  * In general VACUUM must defer recycling as a way of avoiding certain race
     982             :  * conditions.  Deleted pages contain a safexid value that is used by VACUUM
     983             :  * to determine whether or not it's safe to place a page that was deleted by
     984             :  * VACUUM earlier into the FSM now.  See nbtree/README.
     985             :  *
     986             :  * As far as any backend operating during original execution is concerned, the
     987             :  * FSM is a cache of recycle-safe pages; the mere presence of the page in the
     988             :  * FSM indicates that the page must already be safe to recycle (actually,
     989             :  * _bt_allocbuf() verifies it's safe using BTPageIsRecyclable(), but that's
     990             :  * just because it would be unwise to completely trust the FSM, given its
     991             :  * current limitations).
     992             :  *
     993             :  * This isn't sufficient to prevent similar concurrent recycling race
     994             :  * conditions during Hot Standby, though.  For that we need to log a
     995             :  * xl_btree_reuse_page record at the point that a page is actually recycled
     996             :  * and reused for an entirely unrelated page inside _bt_split().  These
     997             :  * records include the same safexid value from the original deleted page,
     998             :  * stored in the record's snapshotConflictHorizon field.
     999             :  *
    1000             :  * The GlobalVisCheckRemovableFullXid() test in BTPageIsRecyclable() is used
    1001             :  * to determine if it's safe to recycle a page.  This mirrors our own test:
    1002             :  * the PGPROC->xmin > limitXmin test inside GetConflictingVirtualXIDs().
    1003             :  * Consequently, one XID value achieves the same exclusion effect on primary
    1004             :  * and standby.
    1005             :  */
    1006             : static void
    1007           0 : btree_xlog_reuse_page(XLogReaderState *record)
    1008             : {
    1009           0 :     xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
    1010             : 
    1011           0 :     if (InHotStandby)
    1012           0 :         ResolveRecoveryConflictWithSnapshotFullXid(xlrec->snapshotConflictHorizon,
    1013           0 :                                                    xlrec->isCatalogRel,
    1014             :                                                    xlrec->locator);
    1015           0 : }
    1016             : 
    1017             : void
    1018     1035696 : btree_redo(XLogReaderState *record)
    1019             : {
    1020     1035696 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
    1021             :     MemoryContext oldCtx;
    1022             : 
    1023     1035696 :     oldCtx = MemoryContextSwitchTo(opCtx);
    1024     1035696 :     switch (info)
    1025             :     {
    1026     1012334 :         case XLOG_BTREE_INSERT_LEAF:
    1027     1012334 :             btree_xlog_insert(true, false, false, record);
    1028     1012334 :             break;
    1029        2888 :         case XLOG_BTREE_INSERT_UPPER:
    1030        2888 :             btree_xlog_insert(false, false, false, record);
    1031        2888 :             break;
    1032           8 :         case XLOG_BTREE_INSERT_META:
    1033           8 :             btree_xlog_insert(false, true, false, record);
    1034           8 :             break;
    1035         380 :         case XLOG_BTREE_SPLIT_L:
    1036         380 :             btree_xlog_split(true, record);
    1037         380 :             break;
    1038        2720 :         case XLOG_BTREE_SPLIT_R:
    1039        2720 :             btree_xlog_split(false, record);
    1040        2720 :             break;
    1041        5512 :         case XLOG_BTREE_INSERT_POST:
    1042        5512 :             btree_xlog_insert(true, false, true, record);
    1043        5512 :             break;
    1044        4410 :         case XLOG_BTREE_DEDUP:
    1045        4410 :             btree_xlog_dedup(record);
    1046        4410 :             break;
    1047        1916 :         case XLOG_BTREE_VACUUM:
    1048        1916 :             btree_xlog_vacuum(record);
    1049        1916 :             break;
    1050        1652 :         case XLOG_BTREE_DELETE:
    1051        1652 :             btree_xlog_delete(record);
    1052        1652 :             break;
    1053        1238 :         case XLOG_BTREE_MARK_PAGE_HALFDEAD:
    1054        1238 :             btree_xlog_mark_page_halfdead(info, record);
    1055        1238 :             break;
    1056        1330 :         case XLOG_BTREE_UNLINK_PAGE:
    1057             :         case XLOG_BTREE_UNLINK_PAGE_META:
    1058        1330 :             btree_xlog_unlink_page(info, record);
    1059        1330 :             break;
    1060        1284 :         case XLOG_BTREE_NEWROOT:
    1061        1284 :             btree_xlog_newroot(record);
    1062        1284 :             break;
    1063           0 :         case XLOG_BTREE_REUSE_PAGE:
    1064           0 :             btree_xlog_reuse_page(record);
    1065           0 :             break;
    1066          24 :         case XLOG_BTREE_META_CLEANUP:
    1067          24 :             _bt_restore_meta(record, 0);
    1068          24 :             break;
    1069           0 :         default:
    1070           0 :             elog(PANIC, "btree_redo: unknown op code %u", info);
    1071             :     }
    1072     1035696 :     MemoryContextSwitchTo(oldCtx);
    1073     1035696 :     MemoryContextReset(opCtx);
    1074     1035696 : }
    1075             : 
    1076             : void
    1077         392 : btree_xlog_startup(void)
    1078             : {
    1079         392 :     opCtx = AllocSetContextCreate(CurrentMemoryContext,
    1080             :                                   "Btree recovery temporary context",
    1081             :                                   ALLOCSET_DEFAULT_SIZES);
    1082         392 : }
    1083             : 
    1084             : void
    1085         288 : btree_xlog_cleanup(void)
    1086             : {
    1087         288 :     MemoryContextDelete(opCtx);
    1088         288 :     opCtx = NULL;
    1089         288 : }
    1090             : 
    1091             : /*
    1092             :  * Mask a btree page before performing consistency checks on it.
    1093             :  */
    1094             : void
    1095     1644524 : btree_mask(char *pagedata, BlockNumber blkno)
    1096             : {
    1097     1644524 :     Page        page = (Page) pagedata;
    1098             :     BTPageOpaque maskopaq;
    1099             : 
    1100     1644524 :     mask_page_lsn_and_checksum(page);
    1101             : 
    1102     1644524 :     mask_page_hint_bits(page);
    1103     1644524 :     mask_unused_space(page);
    1104             : 
    1105     1644524 :     maskopaq = BTPageGetOpaque(page);
    1106             : 
    1107     1644524 :     if (P_ISLEAF(maskopaq))
    1108             :     {
    1109             :         /*
    1110             :          * In btree leaf pages, it is possible to modify the LP_FLAGS without
    1111             :          * emitting any WAL record. Hence, mask the line pointer flags. See
    1112             :          * _bt_killitems(), _bt_check_unique() for details.
    1113             :          */
    1114     1636232 :         mask_lp_flags(page);
    1115             :     }
    1116             : 
    1117             :     /*
    1118             :      * BTP_HAS_GARBAGE is just an un-logged hint bit. So, mask it. See
    1119             :      * _bt_delete_or_dedup_one_page(), _bt_killitems(), and _bt_check_unique()
    1120             :      * for details.
    1121             :      */
    1122     1644524 :     maskopaq->btpo_flags &= ~BTP_HAS_GARBAGE;
    1123             : 
    1124             :     /*
    1125             :      * During replay of a btree page split, we don't set the BTP_SPLIT_END
    1126             :      * flag of the right sibling and initialize the cycle_id to 0 for the same
    1127             :      * page. See btree_xlog_split() for details.
    1128             :      */
    1129     1644524 :     maskopaq->btpo_flags &= ~BTP_SPLIT_END;
    1130     1644524 :     maskopaq->btpo_cycleid = 0;
    1131     1644524 : }

Generated by: LCOV version 1.14