LCOV - code coverage report
Current view: top level - src/backend/access/nbtree - nbtxlog.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 454 492 92.3 %
Date: 2024-03-29 05:11:05 Functions: 15 17 88.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * nbtxlog.c
       4             :  *    WAL replay logic for btrees.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/nbtree/nbtxlog.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/bufmask.h"
      18             : #include "access/nbtree.h"
      19             : #include "access/nbtxlog.h"
      20             : #include "access/transam.h"
      21             : #include "access/xlogutils.h"
      22             : #include "storage/standby.h"
      23             : #include "utils/memutils.h"
      24             : 
      25             : static MemoryContext opCtx;     /* working memory for operations */
      26             : 
      27             : /*
      28             :  * _bt_restore_page -- re-enter all the index tuples on a page
      29             :  *
      30             :  * The page is freshly init'd, and *from (length len) is a copy of what
      31             :  * had been its upper part (pd_upper to pd_special).  We assume that the
      32             :  * tuples had been added to the page in item-number order, and therefore
      33             :  * the one with highest item number appears first (lowest on the page).
      34             :  */
      35             : static void
      36        3044 : _bt_restore_page(Page page, char *from, int len)
      37             : {
      38             :     IndexTupleData itupdata;
      39             :     Size        itemsz;
      40        3044 :     char       *end = from + len;
      41             :     Item        items[MaxIndexTuplesPerPage];
      42             :     uint16      itemsizes[MaxIndexTuplesPerPage];
      43             :     int         i;
      44             :     int         nitems;
      45             : 
      46             :     /*
      47             :      * To get the items back in the original order, we add them to the page in
      48             :      * reverse.  To figure out where one tuple ends and another begins, we
      49             :      * have to scan them in forward order first.
      50             :      */
      51        3044 :     i = 0;
      52      196702 :     while (from < end)
      53             :     {
      54             :         /*
      55             :          * As we step through the items, 'from' won't always be properly
      56             :          * aligned, so we need to use memcpy().  Further, we use Item (which
      57             :          * is just a char*) here for our items array for the same reason;
      58             :          * wouldn't want the compiler or anyone thinking that an item is
      59             :          * aligned when it isn't.
      60             :          */
      61      193658 :         memcpy(&itupdata, from, sizeof(IndexTupleData));
      62      193658 :         itemsz = IndexTupleSize(&itupdata);
      63      193658 :         itemsz = MAXALIGN(itemsz);
      64             : 
      65      193658 :         items[i] = (Item) from;
      66      193658 :         itemsizes[i] = itemsz;
      67      193658 :         i++;
      68             : 
      69      193658 :         from += itemsz;
      70             :     }
      71        3044 :     nitems = i;
      72             : 
      73      196702 :     for (i = nitems - 1; i >= 0; i--)
      74             :     {
      75      193658 :         if (PageAddItem(page, items[i], itemsizes[i], nitems - i,
      76             :                         false, false) == InvalidOffsetNumber)
      77           0 :             elog(PANIC, "_bt_restore_page: cannot add item to page");
      78             :     }
      79        3044 : }
      80             : 
      81             : static void
      82        1266 : _bt_restore_meta(XLogReaderState *record, uint8 block_id)
      83             : {
      84        1266 :     XLogRecPtr  lsn = record->EndRecPtr;
      85             :     Buffer      metabuf;
      86             :     Page        metapg;
      87             :     BTMetaPageData *md;
      88             :     BTPageOpaque pageop;
      89             :     xl_btree_metadata *xlrec;
      90             :     char       *ptr;
      91             :     Size        len;
      92             : 
      93        1266 :     metabuf = XLogInitBufferForRedo(record, block_id);
      94        1266 :     ptr = XLogRecGetBlockData(record, block_id, &len);
      95             : 
      96             :     Assert(len == sizeof(xl_btree_metadata));
      97             :     Assert(BufferGetBlockNumber(metabuf) == BTREE_METAPAGE);
      98        1266 :     xlrec = (xl_btree_metadata *) ptr;
      99        1266 :     metapg = BufferGetPage(metabuf);
     100             : 
     101        1266 :     _bt_pageinit(metapg, BufferGetPageSize(metabuf));
     102             : 
     103        1266 :     md = BTPageGetMeta(metapg);
     104        1266 :     md->btm_magic = BTREE_MAGIC;
     105        1266 :     md->btm_version = xlrec->version;
     106        1266 :     md->btm_root = xlrec->root;
     107        1266 :     md->btm_level = xlrec->level;
     108        1266 :     md->btm_fastroot = xlrec->fastroot;
     109        1266 :     md->btm_fastlevel = xlrec->fastlevel;
     110             :     /* Cannot log BTREE_MIN_VERSION index metapage without upgrade */
     111             :     Assert(md->btm_version >= BTREE_NOVAC_VERSION);
     112        1266 :     md->btm_last_cleanup_num_delpages = xlrec->last_cleanup_num_delpages;
     113        1266 :     md->btm_last_cleanup_num_heap_tuples = -1.0;
     114        1266 :     md->btm_allequalimage = xlrec->allequalimage;
     115             : 
     116        1266 :     pageop = BTPageGetOpaque(metapg);
     117        1266 :     pageop->btpo_flags = BTP_META;
     118             : 
     119             :     /*
     120             :      * Set pd_lower just past the end of the metadata.  This is essential,
     121             :      * because without doing so, metadata will be lost if xlog.c compresses
     122             :      * the page.
     123             :      */
     124        1266 :     ((PageHeader) metapg)->pd_lower =
     125        1266 :         ((char *) md + sizeof(BTMetaPageData)) - (char *) metapg;
     126             : 
     127        1266 :     PageSetLSN(metapg, lsn);
     128        1266 :     MarkBufferDirty(metabuf);
     129        1266 :     UnlockReleaseBuffer(metabuf);
     130        1266 : }
     131             : 
     132             : /*
     133             :  * _bt_clear_incomplete_split -- clear INCOMPLETE_SPLIT flag on a page
     134             :  *
     135             :  * This is a common subroutine of the redo functions of all the WAL record
     136             :  * types that can insert a downlink: insert, split, and newroot.
     137             :  */
     138             : static void
     139        2938 : _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
     140             : {
     141        2938 :     XLogRecPtr  lsn = record->EndRecPtr;
     142             :     Buffer      buf;
     143             : 
     144        2938 :     if (XLogReadBufferForRedo(record, block_id, &buf) == BLK_NEEDS_REDO)
     145             :     {
     146        2938 :         Page        page = (Page) BufferGetPage(buf);
     147        2938 :         BTPageOpaque pageop = BTPageGetOpaque(page);
     148             : 
     149             :         Assert(P_INCOMPLETE_SPLIT(pageop));
     150        2938 :         pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
     151             : 
     152        2938 :         PageSetLSN(page, lsn);
     153        2938 :         MarkBufferDirty(buf);
     154             :     }
     155        2938 :     if (BufferIsValid(buf))
     156        2938 :         UnlockReleaseBuffer(buf);
     157        2938 : }
     158             : 
     159             : static void
     160      977354 : btree_xlog_insert(bool isleaf, bool ismeta, bool posting,
     161             :                   XLogReaderState *record)
     162             : {
     163      977354 :     XLogRecPtr  lsn = record->EndRecPtr;
     164      977354 :     xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
     165             :     Buffer      buffer;
     166             :     Page        page;
     167             : 
     168             :     /*
     169             :      * Insertion to an internal page finishes an incomplete split at the child
     170             :      * level.  Clear the incomplete-split flag in the child.  Note: during
     171             :      * normal operation, the child and parent pages are locked at the same
     172             :      * time (the locks are coupled), so that clearing the flag and inserting
     173             :      * the downlink appear atomic to other backends.  We don't bother with
     174             :      * that during replay, because readers don't care about the
     175             :      * incomplete-split flag and there cannot be updates happening.
     176             :      */
     177      977354 :     if (!isleaf)
     178        2730 :         _bt_clear_incomplete_split(record, 1);
     179      977354 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
     180             :     {
     181             :         Size        datalen;
     182      972386 :         char       *datapos = XLogRecGetBlockData(record, 0, &datalen);
     183             : 
     184      972386 :         page = BufferGetPage(buffer);
     185             : 
     186      972386 :         if (!posting)
     187             :         {
     188             :             /* Simple retail insertion */
     189      969006 :             if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
     190             :                             false, false) == InvalidOffsetNumber)
     191           0 :                 elog(PANIC, "failed to add new item");
     192             :         }
     193             :         else
     194             :         {
     195             :             ItemId      itemid;
     196             :             IndexTuple  oposting,
     197             :                         newitem,
     198             :                         nposting;
     199             :             uint16      postingoff;
     200             : 
     201             :             /*
     202             :              * A posting list split occurred during leaf page insertion.  WAL
     203             :              * record data will start with an offset number representing the
     204             :              * point in an existing posting list that a split occurs at.
     205             :              *
     206             :              * Use _bt_swap_posting() to repeat posting list split steps from
     207             :              * primary.  Note that newitem from WAL record is 'orignewitem',
     208             :              * not the final version of newitem that is actually inserted on
     209             :              * page.
     210             :              */
     211        3380 :             postingoff = *((uint16 *) datapos);
     212        3380 :             datapos += sizeof(uint16);
     213        3380 :             datalen -= sizeof(uint16);
     214             : 
     215        3380 :             itemid = PageGetItemId(page, OffsetNumberPrev(xlrec->offnum));
     216        3380 :             oposting = (IndexTuple) PageGetItem(page, itemid);
     217             : 
     218             :             /* Use mutable, aligned newitem copy in _bt_swap_posting() */
     219             :             Assert(isleaf && postingoff > 0);
     220        3380 :             newitem = CopyIndexTuple((IndexTuple) datapos);
     221        3380 :             nposting = _bt_swap_posting(newitem, oposting, postingoff);
     222             : 
     223             :             /* Replace existing posting list with post-split version */
     224        3380 :             memcpy(oposting, nposting, MAXALIGN(IndexTupleSize(nposting)));
     225             : 
     226             :             /* Insert "final" new item (not orignewitem from WAL stream) */
     227             :             Assert(IndexTupleSize(newitem) == datalen);
     228        3380 :             if (PageAddItem(page, (Item) newitem, datalen, xlrec->offnum,
     229             :                             false, false) == InvalidOffsetNumber)
     230           0 :                 elog(PANIC, "failed to add posting split new item");
     231             :         }
     232             : 
     233      972386 :         PageSetLSN(page, lsn);
     234      972386 :         MarkBufferDirty(buffer);
     235             :     }
     236      977354 :     if (BufferIsValid(buffer))
     237      977354 :         UnlockReleaseBuffer(buffer);
     238             : 
     239             :     /*
     240             :      * Note: in normal operation, we'd update the metapage while still holding
     241             :      * lock on the page we inserted into.  But during replay it's not
     242             :      * necessary to hold that lock, since no other index updates can be
     243             :      * happening concurrently, and readers will cope fine with following an
     244             :      * obsolete link from the metapage.
     245             :      */
     246      977354 :     if (ismeta)
     247           8 :         _bt_restore_meta(record, 2);
     248      977354 : }
     249             : 
     250             : static void
     251        2938 : btree_xlog_split(bool newitemonleft, XLogReaderState *record)
     252             : {
     253        2938 :     XLogRecPtr  lsn = record->EndRecPtr;
     254        2938 :     xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
     255        2938 :     bool        isleaf = (xlrec->level == 0);
     256             :     Buffer      buf;
     257             :     Buffer      rbuf;
     258             :     Page        rpage;
     259             :     BTPageOpaque ropaque;
     260             :     char       *datapos;
     261             :     Size        datalen;
     262             :     BlockNumber origpagenumber;
     263             :     BlockNumber rightpagenumber;
     264             :     BlockNumber spagenumber;
     265             : 
     266        2938 :     XLogRecGetBlockTag(record, 0, NULL, NULL, &origpagenumber);
     267        2938 :     XLogRecGetBlockTag(record, 1, NULL, NULL, &rightpagenumber);
     268        2938 :     if (!XLogRecGetBlockTagExtended(record, 2, NULL, NULL, &spagenumber, NULL))
     269        1800 :         spagenumber = P_NONE;
     270             : 
     271             :     /*
     272             :      * Clear the incomplete split flag on the appropriate child page one level
     273             :      * down when origpage/buf is an internal page (there must have been
     274             :      * cascading page splits during original execution in the event of an
     275             :      * internal page split).  This is like the corresponding btree_xlog_insert
     276             :      * call for internal pages.  We're not clearing the incomplete split flag
     277             :      * for the current page split here (you can think of this as part of the
     278             :      * insert of newitem that the page split action needs to perform in
     279             :      * passing).
     280             :      *
     281             :      * Like in btree_xlog_insert, this can be done before locking other pages.
     282             :      * We never need to couple cross-level locks in REDO routines.
     283             :      */
     284        2938 :     if (!isleaf)
     285         102 :         _bt_clear_incomplete_split(record, 3);
     286             : 
     287             :     /* Reconstruct right (new) sibling page from scratch */
     288        2938 :     rbuf = XLogInitBufferForRedo(record, 1);
     289        2938 :     datapos = XLogRecGetBlockData(record, 1, &datalen);
     290        2938 :     rpage = (Page) BufferGetPage(rbuf);
     291             : 
     292        2938 :     _bt_pageinit(rpage, BufferGetPageSize(rbuf));
     293        2938 :     ropaque = BTPageGetOpaque(rpage);
     294             : 
     295        2938 :     ropaque->btpo_prev = origpagenumber;
     296        2938 :     ropaque->btpo_next = spagenumber;
     297        2938 :     ropaque->btpo_level = xlrec->level;
     298        2938 :     ropaque->btpo_flags = isleaf ? BTP_LEAF : 0;
     299        2938 :     ropaque->btpo_cycleid = 0;
     300             : 
     301        2938 :     _bt_restore_page(rpage, datapos, datalen);
     302             : 
     303        2938 :     PageSetLSN(rpage, lsn);
     304        2938 :     MarkBufferDirty(rbuf);
     305             : 
     306             :     /* Now reconstruct original page (left half of split) */
     307        2938 :     if (XLogReadBufferForRedo(record, 0, &buf) == BLK_NEEDS_REDO)
     308             :     {
     309             :         /*
     310             :          * To retain the same physical order of the tuples that they had, we
     311             :          * initialize a temporary empty page for the left page and add all the
     312             :          * items to that in item number order.  This mirrors how _bt_split()
     313             :          * works.  Retaining the same physical order makes WAL consistency
     314             :          * checking possible.  See also _bt_restore_page(), which does the
     315             :          * same for the right page.
     316             :          */
     317        2898 :         Page        origpage = (Page) BufferGetPage(buf);
     318        2898 :         BTPageOpaque oopaque = BTPageGetOpaque(origpage);
     319             :         OffsetNumber off;
     320        2898 :         IndexTuple  newitem = NULL,
     321        2898 :                     left_hikey = NULL,
     322        2898 :                     nposting = NULL;
     323        2898 :         Size        newitemsz = 0,
     324        2898 :                     left_hikeysz = 0;
     325             :         Page        leftpage;
     326             :         OffsetNumber leftoff,
     327        2898 :                     replacepostingoff = InvalidOffsetNumber;
     328             : 
     329        2898 :         datapos = XLogRecGetBlockData(record, 0, &datalen);
     330             : 
     331        2898 :         if (newitemonleft || xlrec->postingoff != 0)
     332             :         {
     333         330 :             newitem = (IndexTuple) datapos;
     334         330 :             newitemsz = MAXALIGN(IndexTupleSize(newitem));
     335         330 :             datapos += newitemsz;
     336         330 :             datalen -= newitemsz;
     337             : 
     338         330 :             if (xlrec->postingoff != 0)
     339             :             {
     340             :                 ItemId      itemid;
     341             :                 IndexTuple  oposting;
     342             : 
     343             :                 /* Posting list must be at offset number before new item's */
     344          12 :                 replacepostingoff = OffsetNumberPrev(xlrec->newitemoff);
     345             : 
     346             :                 /* Use mutable, aligned newitem copy in _bt_swap_posting() */
     347          12 :                 newitem = CopyIndexTuple(newitem);
     348          12 :                 itemid = PageGetItemId(origpage, replacepostingoff);
     349          12 :                 oposting = (IndexTuple) PageGetItem(origpage, itemid);
     350          12 :                 nposting = _bt_swap_posting(newitem, oposting,
     351          12 :                                             xlrec->postingoff);
     352             :             }
     353             :         }
     354             : 
     355             :         /*
     356             :          * Extract left hikey and its size.  We assume that 16-bit alignment
     357             :          * is enough to apply IndexTupleSize (since it's fetching from a
     358             :          * uint16 field).
     359             :          */
     360        2898 :         left_hikey = (IndexTuple) datapos;
     361        2898 :         left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
     362        2898 :         datapos += left_hikeysz;
     363        2898 :         datalen -= left_hikeysz;
     364             : 
     365             :         Assert(datalen == 0);
     366             : 
     367        2898 :         leftpage = PageGetTempPageCopySpecial(origpage);
     368             : 
     369             :         /* Add high key tuple from WAL record to temp page */
     370        2898 :         leftoff = P_HIKEY;
     371        2898 :         if (PageAddItem(leftpage, (Item) left_hikey, left_hikeysz, P_HIKEY,
     372             :                         false, false) == InvalidOffsetNumber)
     373           0 :             elog(ERROR, "failed to add high key to left page after split");
     374        2898 :         leftoff = OffsetNumberNext(leftoff);
     375             : 
     376      646216 :         for (off = P_FIRSTDATAKEY(oopaque); off < xlrec->firstrightoff; off++)
     377             :         {
     378             :             ItemId      itemid;
     379             :             Size        itemsz;
     380             :             IndexTuple  item;
     381             : 
     382             :             /* Add replacement posting list when required */
     383      643318 :             if (off == replacepostingoff)
     384             :             {
     385             :                 Assert(newitemonleft ||
     386             :                        xlrec->firstrightoff == xlrec->newitemoff);
     387          12 :                 if (PageAddItem(leftpage, (Item) nposting,
     388             :                                 MAXALIGN(IndexTupleSize(nposting)), leftoff,
     389             :                                 false, false) == InvalidOffsetNumber)
     390           0 :                     elog(ERROR, "failed to add new posting list item to left page after split");
     391          12 :                 leftoff = OffsetNumberNext(leftoff);
     392          12 :                 continue;       /* don't insert oposting */
     393             :             }
     394             : 
     395             :             /* add the new item if it was inserted on left page */
     396      643306 :             else if (newitemonleft && off == xlrec->newitemoff)
     397             :             {
     398         286 :                 if (PageAddItem(leftpage, (Item) newitem, newitemsz, leftoff,
     399             :                                 false, false) == InvalidOffsetNumber)
     400           0 :                     elog(ERROR, "failed to add new item to left page after split");
     401         286 :                 leftoff = OffsetNumberNext(leftoff);
     402             :             }
     403             : 
     404      643306 :             itemid = PageGetItemId(origpage, off);
     405      643306 :             itemsz = ItemIdGetLength(itemid);
     406      643306 :             item = (IndexTuple) PageGetItem(origpage, itemid);
     407      643306 :             if (PageAddItem(leftpage, (Item) item, itemsz, leftoff,
     408             :                             false, false) == InvalidOffsetNumber)
     409           0 :                 elog(ERROR, "failed to add old item to left page after split");
     410      643306 :             leftoff = OffsetNumberNext(leftoff);
     411             :         }
     412             : 
     413             :         /* cope with possibility that newitem goes at the end */
     414        2898 :         if (newitemonleft && off == xlrec->newitemoff)
     415             :         {
     416          42 :             if (PageAddItem(leftpage, (Item) newitem, newitemsz, leftoff,
     417             :                             false, false) == InvalidOffsetNumber)
     418           0 :                 elog(ERROR, "failed to add new item to left page after split");
     419          42 :             leftoff = OffsetNumberNext(leftoff);
     420             :         }
     421             : 
     422        2898 :         PageRestoreTempPage(leftpage, origpage);
     423             : 
     424             :         /* Fix opaque fields */
     425        2898 :         oopaque->btpo_flags = BTP_INCOMPLETE_SPLIT;
     426        2898 :         if (isleaf)
     427        2796 :             oopaque->btpo_flags |= BTP_LEAF;
     428        2898 :         oopaque->btpo_next = rightpagenumber;
     429        2898 :         oopaque->btpo_cycleid = 0;
     430             : 
     431        2898 :         PageSetLSN(origpage, lsn);
     432        2898 :         MarkBufferDirty(buf);
     433             :     }
     434             : 
     435             :     /* Fix left-link of the page to the right of the new right sibling */
     436        2938 :     if (spagenumber != P_NONE)
     437             :     {
     438             :         Buffer      sbuf;
     439             : 
     440        1138 :         if (XLogReadBufferForRedo(record, 2, &sbuf) == BLK_NEEDS_REDO)
     441             :         {
     442        1040 :             Page        spage = (Page) BufferGetPage(sbuf);
     443        1040 :             BTPageOpaque spageop = BTPageGetOpaque(spage);
     444             : 
     445        1040 :             spageop->btpo_prev = rightpagenumber;
     446             : 
     447        1040 :             PageSetLSN(spage, lsn);
     448        1040 :             MarkBufferDirty(sbuf);
     449             :         }
     450        1138 :         if (BufferIsValid(sbuf))
     451        1138 :             UnlockReleaseBuffer(sbuf);
     452             :     }
     453             : 
     454             :     /*
     455             :      * Finally, release the remaining buffers.  sbuf, rbuf, and buf must be
     456             :      * released together, so that readers cannot observe inconsistencies.
     457             :      */
     458        2938 :     UnlockReleaseBuffer(rbuf);
     459        2938 :     if (BufferIsValid(buf))
     460        2938 :         UnlockReleaseBuffer(buf);
     461        2938 : }
     462             : 
     463             : static void
     464        4266 : btree_xlog_dedup(XLogReaderState *record)
     465             : {
     466        4266 :     XLogRecPtr  lsn = record->EndRecPtr;
     467        4266 :     xl_btree_dedup *xlrec = (xl_btree_dedup *) XLogRecGetData(record);
     468             :     Buffer      buf;
     469             : 
     470        4266 :     if (XLogReadBufferForRedo(record, 0, &buf) == BLK_NEEDS_REDO)
     471             :     {
     472        4262 :         char       *ptr = XLogRecGetBlockData(record, 0, NULL);
     473        4262 :         Page        page = (Page) BufferGetPage(buf);
     474        4262 :         BTPageOpaque opaque = BTPageGetOpaque(page);
     475             :         OffsetNumber offnum,
     476             :                     minoff,
     477             :                     maxoff;
     478             :         BTDedupState state;
     479             :         BTDedupInterval *intervals;
     480             :         Page        newpage;
     481             : 
     482        4262 :         state = (BTDedupState) palloc(sizeof(BTDedupStateData));
     483        4262 :         state->deduplicate = true;   /* unused */
     484        4262 :         state->nmaxitems = 0;    /* unused */
     485             :         /* Conservatively use larger maxpostingsize than primary */
     486        4262 :         state->maxpostingsize = BTMaxItemSize(page);
     487        4262 :         state->base = NULL;
     488        4262 :         state->baseoff = InvalidOffsetNumber;
     489        4262 :         state->basetupsize = 0;
     490        4262 :         state->htids = palloc(state->maxpostingsize);
     491        4262 :         state->nhtids = 0;
     492        4262 :         state->nitems = 0;
     493        4262 :         state->phystupsize = 0;
     494        4262 :         state->nintervals = 0;
     495             : 
     496        4262 :         minoff = P_FIRSTDATAKEY(opaque);
     497        4262 :         maxoff = PageGetMaxOffsetNumber(page);
     498        4262 :         newpage = PageGetTempPageCopySpecial(page);
     499             : 
     500        4262 :         if (!P_RIGHTMOST(opaque))
     501             :         {
     502        3698 :             ItemId      itemid = PageGetItemId(page, P_HIKEY);
     503        3698 :             Size        itemsz = ItemIdGetLength(itemid);
     504        3698 :             IndexTuple  item = (IndexTuple) PageGetItem(page, itemid);
     505             : 
     506        3698 :             if (PageAddItem(newpage, (Item) item, itemsz, P_HIKEY,
     507             :                             false, false) == InvalidOffsetNumber)
     508           0 :                 elog(ERROR, "deduplication failed to add highkey");
     509             :         }
     510             : 
     511        4262 :         intervals = (BTDedupInterval *) ptr;
     512      975752 :         for (offnum = minoff;
     513             :              offnum <= maxoff;
     514      971490 :              offnum = OffsetNumberNext(offnum))
     515             :         {
     516      971490 :             ItemId      itemid = PageGetItemId(page, offnum);
     517      971490 :             IndexTuple  itup = (IndexTuple) PageGetItem(page, itemid);
     518             : 
     519      971490 :             if (offnum == minoff)
     520        4262 :                 _bt_dedup_start_pending(state, itup, offnum);
     521      967228 :             else if (state->nintervals < xlrec->nintervals &&
     522      718868 :                      state->baseoff == intervals[state->nintervals].baseoff &&
     523      248436 :                      state->nitems < intervals[state->nintervals].nitems)
     524             :             {
     525      164222 :                 if (!_bt_dedup_save_htid(state, itup))
     526           0 :                     elog(ERROR, "deduplication failed to add heap tid to pending posting list");
     527             :             }
     528             :             else
     529             :             {
     530      803006 :                 _bt_dedup_finish_pending(newpage, state);
     531      803006 :                 _bt_dedup_start_pending(state, itup, offnum);
     532             :             }
     533             :         }
     534             : 
     535        4262 :         _bt_dedup_finish_pending(newpage, state);
     536             :         Assert(state->nintervals == xlrec->nintervals);
     537             :         Assert(memcmp(state->intervals, intervals,
     538             :                       state->nintervals * sizeof(BTDedupInterval)) == 0);
     539             : 
     540        4262 :         if (P_HAS_GARBAGE(opaque))
     541             :         {
     542           0 :             BTPageOpaque nopaque = BTPageGetOpaque(newpage);
     543             : 
     544           0 :             nopaque->btpo_flags &= ~BTP_HAS_GARBAGE;
     545             :         }
     546             : 
     547        4262 :         PageRestoreTempPage(newpage, page);
     548        4262 :         PageSetLSN(page, lsn);
     549        4262 :         MarkBufferDirty(buf);
     550             :     }
     551             : 
     552        4266 :     if (BufferIsValid(buf))
     553        4266 :         UnlockReleaseBuffer(buf);
     554        4266 : }
     555             : 
     556             : static void
     557         248 : btree_xlog_updates(Page page, OffsetNumber *updatedoffsets,
     558             :                    xl_btree_update *updates, int nupdated)
     559             : {
     560             :     BTVacuumPosting vacposting;
     561             :     IndexTuple  origtuple;
     562             :     ItemId      itemid;
     563             :     Size        itemsz;
     564             : 
     565        8088 :     for (int i = 0; i < nupdated; i++)
     566             :     {
     567        7840 :         itemid = PageGetItemId(page, updatedoffsets[i]);
     568        7840 :         origtuple = (IndexTuple) PageGetItem(page, itemid);
     569             : 
     570        7840 :         vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
     571        7840 :                             updates->ndeletedtids * sizeof(uint16));
     572        7840 :         vacposting->updatedoffset = updatedoffsets[i];
     573        7840 :         vacposting->itup = origtuple;
     574        7840 :         vacposting->ndeletedtids = updates->ndeletedtids;
     575        7840 :         memcpy(vacposting->deletetids,
     576             :                (char *) updates + SizeOfBtreeUpdate,
     577        7840 :                updates->ndeletedtids * sizeof(uint16));
     578             : 
     579        7840 :         _bt_update_posting(vacposting);
     580             : 
     581             :         /* Overwrite updated version of tuple */
     582        7840 :         itemsz = MAXALIGN(IndexTupleSize(vacposting->itup));
     583        7840 :         if (!PageIndexTupleOverwrite(page, updatedoffsets[i],
     584        7840 :                                      (Item) vacposting->itup, itemsz))
     585           0 :             elog(PANIC, "failed to update partially dead item");
     586             : 
     587        7840 :         pfree(vacposting->itup);
     588        7840 :         pfree(vacposting);
     589             : 
     590             :         /* advance to next xl_btree_update from array */
     591        7840 :         updates = (xl_btree_update *)
     592        7840 :             ((char *) updates + SizeOfBtreeUpdate +
     593        7840 :              updates->ndeletedtids * sizeof(uint16));
     594             :     }
     595         248 : }
     596             : 
     597             : static void
     598        1854 : btree_xlog_vacuum(XLogReaderState *record)
     599             : {
     600        1854 :     XLogRecPtr  lsn = record->EndRecPtr;
     601        1854 :     xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
     602             :     Buffer      buffer;
     603             :     Page        page;
     604             :     BTPageOpaque opaque;
     605             : 
     606             :     /*
     607             :      * We need to take a cleanup lock here, just like btvacuumpage(). However,
     608             :      * it isn't necessary to exhaustively get a cleanup lock on every block in
     609             :      * the index during recovery (just getting a cleanup lock on pages with
     610             :      * items to kill suffices).  See nbtree/README for details.
     611             :      */
     612        1854 :     if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer)
     613             :         == BLK_NEEDS_REDO)
     614             :     {
     615        1654 :         char       *ptr = XLogRecGetBlockData(record, 0, NULL);
     616             : 
     617        1654 :         page = (Page) BufferGetPage(buffer);
     618             : 
     619        1654 :         if (xlrec->nupdated > 0)
     620             :         {
     621             :             OffsetNumber *updatedoffsets;
     622             :             xl_btree_update *updates;
     623             : 
     624          68 :             updatedoffsets = (OffsetNumber *)
     625          68 :                 (ptr + xlrec->ndeleted * sizeof(OffsetNumber));
     626          68 :             updates = (xl_btree_update *) ((char *) updatedoffsets +
     627          68 :                                            xlrec->nupdated *
     628             :                                            sizeof(OffsetNumber));
     629             : 
     630          68 :             btree_xlog_updates(page, updatedoffsets, updates, xlrec->nupdated);
     631             :         }
     632             : 
     633        1654 :         if (xlrec->ndeleted > 0)
     634        1636 :             PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
     635             : 
     636             :         /*
     637             :          * Mark the page as not containing any LP_DEAD items --- see comments
     638             :          * in _bt_delitems_vacuum().
     639             :          */
     640        1654 :         opaque = BTPageGetOpaque(page);
     641        1654 :         opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
     642             : 
     643        1654 :         PageSetLSN(page, lsn);
     644        1654 :         MarkBufferDirty(buffer);
     645             :     }
     646        1854 :     if (BufferIsValid(buffer))
     647        1854 :         UnlockReleaseBuffer(buffer);
     648        1854 : }
     649             : 
     650             : static void
     651        1464 : btree_xlog_delete(XLogReaderState *record)
     652             : {
     653        1464 :     XLogRecPtr  lsn = record->EndRecPtr;
     654        1464 :     xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
     655             :     Buffer      buffer;
     656             :     Page        page;
     657             :     BTPageOpaque opaque;
     658             : 
     659             :     /*
     660             :      * If we have any conflict processing to do, it must happen before we
     661             :      * update the page
     662             :      */
     663        1464 :     if (InHotStandby)
     664             :     {
     665             :         RelFileLocator rlocator;
     666             : 
     667        1460 :         XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
     668             : 
     669        1460 :         ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
     670        1460 :                                             xlrec->isCatalogRel,
     671             :                                             rlocator);
     672             :     }
     673             : 
     674             :     /*
     675             :      * We don't need to take a cleanup lock to apply these changes. See
     676             :      * nbtree/README for details.
     677             :      */
     678        1464 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
     679             :     {
     680        1464 :         char       *ptr = XLogRecGetBlockData(record, 0, NULL);
     681             : 
     682        1464 :         page = (Page) BufferGetPage(buffer);
     683             : 
     684        1464 :         if (xlrec->nupdated > 0)
     685             :         {
     686             :             OffsetNumber *updatedoffsets;
     687             :             xl_btree_update *updates;
     688             : 
     689         180 :             updatedoffsets = (OffsetNumber *)
     690         180 :                 (ptr + xlrec->ndeleted * sizeof(OffsetNumber));
     691         180 :             updates = (xl_btree_update *) ((char *) updatedoffsets +
     692         180 :                                            xlrec->nupdated *
     693             :                                            sizeof(OffsetNumber));
     694             : 
     695         180 :             btree_xlog_updates(page, updatedoffsets, updates, xlrec->nupdated);
     696             :         }
     697             : 
     698        1464 :         if (xlrec->ndeleted > 0)
     699        1430 :             PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
     700             : 
     701             :         /* Mark the page as not containing any LP_DEAD items */
     702        1464 :         opaque = BTPageGetOpaque(page);
     703        1464 :         opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
     704             : 
     705        1464 :         PageSetLSN(page, lsn);
     706        1464 :         MarkBufferDirty(buffer);
     707             :     }
     708        1464 :     if (BufferIsValid(buffer))
     709        1464 :         UnlockReleaseBuffer(buffer);
     710        1464 : }
     711             : 
     712             : static void
     713        1232 : btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
     714             : {
     715        1232 :     XLogRecPtr  lsn = record->EndRecPtr;
     716        1232 :     xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) XLogRecGetData(record);
     717             :     Buffer      buffer;
     718             :     Page        page;
     719             :     BTPageOpaque pageop;
     720             :     IndexTupleData trunctuple;
     721             : 
     722             :     /*
     723             :      * In normal operation, we would lock all the pages this WAL record
     724             :      * touches before changing any of them.  In WAL replay, it should be okay
     725             :      * to lock just one page at a time, since no concurrent index updates can
     726             :      * be happening, and readers should not care whether they arrive at the
     727             :      * target page or not (since it's surely empty).
     728             :      */
     729             : 
     730             :     /* to-be-deleted subtree's parent page */
     731        1232 :     if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
     732             :     {
     733             :         OffsetNumber poffset;
     734             :         ItemId      itemid;
     735             :         IndexTuple  itup;
     736             :         OffsetNumber nextoffset;
     737             :         BlockNumber rightsib;
     738             : 
     739        1232 :         page = (Page) BufferGetPage(buffer);
     740        1232 :         pageop = BTPageGetOpaque(page);
     741             : 
     742        1232 :         poffset = xlrec->poffset;
     743             : 
     744        1232 :         nextoffset = OffsetNumberNext(poffset);
     745        1232 :         itemid = PageGetItemId(page, nextoffset);
     746        1232 :         itup = (IndexTuple) PageGetItem(page, itemid);
     747        1232 :         rightsib = BTreeTupleGetDownLink(itup);
     748             : 
     749        1232 :         itemid = PageGetItemId(page, poffset);
     750        1232 :         itup = (IndexTuple) PageGetItem(page, itemid);
     751        1232 :         BTreeTupleSetDownLink(itup, rightsib);
     752        1232 :         nextoffset = OffsetNumberNext(poffset);
     753        1232 :         PageIndexTupleDelete(page, nextoffset);
     754             : 
     755        1232 :         PageSetLSN(page, lsn);
     756        1232 :         MarkBufferDirty(buffer);
     757             :     }
     758             : 
     759             :     /*
     760             :      * Don't need to couple cross-level locks in REDO routines, so release
     761             :      * lock on internal page immediately
     762             :      */
     763        1232 :     if (BufferIsValid(buffer))
     764        1232 :         UnlockReleaseBuffer(buffer);
     765             : 
     766             :     /* Rewrite the leaf page as a halfdead page */
     767        1232 :     buffer = XLogInitBufferForRedo(record, 0);
     768        1232 :     page = (Page) BufferGetPage(buffer);
     769             : 
     770        1232 :     _bt_pageinit(page, BufferGetPageSize(buffer));
     771        1232 :     pageop = BTPageGetOpaque(page);
     772             : 
     773        1232 :     pageop->btpo_prev = xlrec->leftblk;
     774        1232 :     pageop->btpo_next = xlrec->rightblk;
     775        1232 :     pageop->btpo_level = 0;
     776        1232 :     pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF;
     777        1232 :     pageop->btpo_cycleid = 0;
     778             : 
     779             :     /*
     780             :      * Construct a dummy high key item that points to top parent page (value
     781             :      * is InvalidBlockNumber when the top parent page is the leaf page itself)
     782             :      */
     783        1232 :     MemSet(&trunctuple, 0, sizeof(IndexTupleData));
     784        1232 :     trunctuple.t_info = sizeof(IndexTupleData);
     785        1232 :     BTreeTupleSetTopParent(&trunctuple, xlrec->topparent);
     786             : 
     787        1232 :     if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
     788             :                     false, false) == InvalidOffsetNumber)
     789           0 :         elog(ERROR, "could not add dummy high key to half-dead page");
     790             : 
     791        1232 :     PageSetLSN(page, lsn);
     792        1232 :     MarkBufferDirty(buffer);
     793        1232 :     UnlockReleaseBuffer(buffer);
     794        1232 : }
     795             : 
     796             : 
     797             : static void
     798        1324 : btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
     799             : {
     800        1324 :     XLogRecPtr  lsn = record->EndRecPtr;
     801        1324 :     xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) XLogRecGetData(record);
     802             :     BlockNumber leftsib;
     803             :     BlockNumber rightsib;
     804             :     uint32      level;
     805             :     bool        isleaf;
     806             :     FullTransactionId safexid;
     807             :     Buffer      leftbuf;
     808             :     Buffer      target;
     809             :     Buffer      rightbuf;
     810             :     Page        page;
     811             :     BTPageOpaque pageop;
     812             : 
     813        1324 :     leftsib = xlrec->leftsib;
     814        1324 :     rightsib = xlrec->rightsib;
     815        1324 :     level = xlrec->level;
     816        1324 :     isleaf = (level == 0);
     817        1324 :     safexid = xlrec->safexid;
     818             : 
     819             :     /* No leaftopparent for level 0 (leaf page) or level 1 target */
     820             :     Assert(!BlockNumberIsValid(xlrec->leaftopparent) || level > 1);
     821             : 
     822             :     /*
     823             :      * In normal operation, we would lock all the pages this WAL record
     824             :      * touches before changing any of them.  In WAL replay, we at least lock
     825             :      * the pages in the same standard left-to-right order (leftsib, target,
     826             :      * rightsib), and don't release the sibling locks until the target is
     827             :      * marked deleted.
     828             :      */
     829             : 
     830             :     /* Fix right-link of left sibling, if any */
     831        1324 :     if (leftsib != P_NONE)
     832             :     {
     833         102 :         if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
     834             :         {
     835         102 :             page = (Page) BufferGetPage(leftbuf);
     836         102 :             pageop = BTPageGetOpaque(page);
     837         102 :             pageop->btpo_next = rightsib;
     838             : 
     839         102 :             PageSetLSN(page, lsn);
     840         102 :             MarkBufferDirty(leftbuf);
     841             :         }
     842             :     }
     843             :     else
     844        1222 :         leftbuf = InvalidBuffer;
     845             : 
     846             :     /* Rewrite target page as empty deleted page */
     847        1324 :     target = XLogInitBufferForRedo(record, 0);
     848        1324 :     page = (Page) BufferGetPage(target);
     849             : 
     850        1324 :     _bt_pageinit(page, BufferGetPageSize(target));
     851        1324 :     pageop = BTPageGetOpaque(page);
     852             : 
     853        1324 :     pageop->btpo_prev = leftsib;
     854        1324 :     pageop->btpo_next = rightsib;
     855        1324 :     pageop->btpo_level = level;
     856        1324 :     BTPageSetDeleted(page, safexid);
     857        1324 :     if (isleaf)
     858        1230 :         pageop->btpo_flags |= BTP_LEAF;
     859        1324 :     pageop->btpo_cycleid = 0;
     860             : 
     861        1324 :     PageSetLSN(page, lsn);
     862        1324 :     MarkBufferDirty(target);
     863             : 
     864             :     /* Fix left-link of right sibling */
     865        1324 :     if (XLogReadBufferForRedo(record, 2, &rightbuf) == BLK_NEEDS_REDO)
     866             :     {
     867        1284 :         page = (Page) BufferGetPage(rightbuf);
     868        1284 :         pageop = BTPageGetOpaque(page);
     869        1284 :         pageop->btpo_prev = leftsib;
     870             : 
     871        1284 :         PageSetLSN(page, lsn);
     872        1284 :         MarkBufferDirty(rightbuf);
     873             :     }
     874             : 
     875             :     /* Release siblings */
     876        1324 :     if (BufferIsValid(leftbuf))
     877         102 :         UnlockReleaseBuffer(leftbuf);
     878        1324 :     if (BufferIsValid(rightbuf))
     879        1324 :         UnlockReleaseBuffer(rightbuf);
     880             : 
     881             :     /* Release target */
     882        1324 :     UnlockReleaseBuffer(target);
     883             : 
     884             :     /*
     885             :      * If we deleted a parent of the targeted leaf page, instead of the leaf
     886             :      * itself, update the leaf to point to the next remaining child in the
     887             :      * to-be-deleted subtree
     888             :      */
     889        1324 :     if (XLogRecHasBlockRef(record, 3))
     890             :     {
     891             :         /*
     892             :          * There is no real data on the page, so we just re-create it from
     893             :          * scratch using the information from the WAL record.
     894             :          *
     895             :          * Note that we don't end up here when the target page is also the
     896             :          * leafbuf page.  There is no need to add a dummy hikey item with a
     897             :          * top parent link when deleting leafbuf because it's the last page
     898             :          * we'll delete in the subtree undergoing deletion.
     899             :          */
     900             :         Buffer      leafbuf;
     901             :         IndexTupleData trunctuple;
     902             : 
     903             :         Assert(!isleaf);
     904             : 
     905          94 :         leafbuf = XLogInitBufferForRedo(record, 3);
     906          94 :         page = (Page) BufferGetPage(leafbuf);
     907             : 
     908          94 :         _bt_pageinit(page, BufferGetPageSize(leafbuf));
     909          94 :         pageop = BTPageGetOpaque(page);
     910             : 
     911          94 :         pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF;
     912          94 :         pageop->btpo_prev = xlrec->leafleftsib;
     913          94 :         pageop->btpo_next = xlrec->leafrightsib;
     914          94 :         pageop->btpo_level = 0;
     915          94 :         pageop->btpo_cycleid = 0;
     916             : 
     917             :         /* Add a dummy hikey item */
     918         188 :         MemSet(&trunctuple, 0, sizeof(IndexTupleData));
     919          94 :         trunctuple.t_info = sizeof(IndexTupleData);
     920          94 :         BTreeTupleSetTopParent(&trunctuple, xlrec->leaftopparent);
     921             : 
     922          94 :         if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
     923             :                         false, false) == InvalidOffsetNumber)
     924           0 :             elog(ERROR, "could not add dummy high key to half-dead page");
     925             : 
     926          94 :         PageSetLSN(page, lsn);
     927          94 :         MarkBufferDirty(leafbuf);
     928          94 :         UnlockReleaseBuffer(leafbuf);
     929             :     }
     930             : 
     931             :     /* Update metapage if needed */
     932        1324 :     if (info == XLOG_BTREE_UNLINK_PAGE_META)
     933          16 :         _bt_restore_meta(record, 4);
     934        1324 : }
     935             : 
     936             : static void
     937        1220 : btree_xlog_newroot(XLogReaderState *record)
     938             : {
     939        1220 :     XLogRecPtr  lsn = record->EndRecPtr;
     940        1220 :     xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record);
     941             :     Buffer      buffer;
     942             :     Page        page;
     943             :     BTPageOpaque pageop;
     944             :     char       *ptr;
     945             :     Size        len;
     946             : 
     947        1220 :     buffer = XLogInitBufferForRedo(record, 0);
     948        1220 :     page = (Page) BufferGetPage(buffer);
     949             : 
     950        1220 :     _bt_pageinit(page, BufferGetPageSize(buffer));
     951        1220 :     pageop = BTPageGetOpaque(page);
     952             : 
     953        1220 :     pageop->btpo_flags = BTP_ROOT;
     954        1220 :     pageop->btpo_prev = pageop->btpo_next = P_NONE;
     955        1220 :     pageop->btpo_level = xlrec->level;
     956        1220 :     if (xlrec->level == 0)
     957        1114 :         pageop->btpo_flags |= BTP_LEAF;
     958        1220 :     pageop->btpo_cycleid = 0;
     959             : 
     960        1220 :     if (xlrec->level > 0)
     961             :     {
     962         106 :         ptr = XLogRecGetBlockData(record, 0, &len);
     963         106 :         _bt_restore_page(page, ptr, len);
     964             : 
     965             :         /* Clear the incomplete-split flag in left child */
     966         106 :         _bt_clear_incomplete_split(record, 1);
     967             :     }
     968             : 
     969        1220 :     PageSetLSN(page, lsn);
     970        1220 :     MarkBufferDirty(buffer);
     971        1220 :     UnlockReleaseBuffer(buffer);
     972             : 
     973        1220 :     _bt_restore_meta(record, 2);
     974        1220 : }
     975             : 
     976             : /*
     977             :  * In general VACUUM must defer recycling as a way of avoiding certain race
     978             :  * conditions.  Deleted pages contain a safexid value that is used by VACUUM
     979             :  * to determine whether or not it's safe to place a page that was deleted by
     980             :  * VACUUM earlier into the FSM now.  See nbtree/README.
     981             :  *
     982             :  * As far as any backend operating during original execution is concerned, the
     983             :  * FSM is a cache of recycle-safe pages; the mere presence of the page in the
     984             :  * FSM indicates that the page must already be safe to recycle (actually,
     985             :  * _bt_getbuf() verifies it's safe using BTPageIsRecyclable(), but that's just
     986             :  * because it would be unwise to completely trust the FSM, given its current
     987             :  * limitations).
     988             :  *
     989             :  * This isn't sufficient to prevent similar concurrent recycling race
     990             :  * conditions during Hot Standby, though.  For that we need to log a
     991             :  * xl_btree_reuse_page record at the point that a page is actually recycled
     992             :  * and reused for an entirely unrelated page inside _bt_split().  These
     993             :  * records include the same safexid value from the original deleted page,
     994             :  * stored in the record's snapshotConflictHorizon field.
     995             :  *
     996             :  * The GlobalVisCheckRemovableFullXid() test in BTPageIsRecyclable() is used
     997             :  * to determine if it's safe to recycle a page.  This mirrors our own test:
     998             :  * the PGPROC->xmin > limitXmin test inside GetConflictingVirtualXIDs().
     999             :  * Consequently, one XID value achieves the same exclusion effect on primary
    1000             :  * and standby.
    1001             :  */
    1002             : static void
    1003           0 : btree_xlog_reuse_page(XLogReaderState *record)
    1004             : {
    1005           0 :     xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
    1006             : 
    1007           0 :     if (InHotStandby)
    1008           0 :         ResolveRecoveryConflictWithSnapshotFullXid(xlrec->snapshotConflictHorizon,
    1009           0 :                                                    xlrec->isCatalogRel,
    1010             :                                                    xlrec->locator);
    1011           0 : }
    1012             : 
    1013             : void
    1014      991674 : btree_redo(XLogReaderState *record)
    1015             : {
    1016      991674 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
    1017             :     MemoryContext oldCtx;
    1018             : 
    1019      991674 :     oldCtx = MemoryContextSwitchTo(opCtx);
    1020      991674 :     switch (info)
    1021             :     {
    1022      971234 :         case XLOG_BTREE_INSERT_LEAF:
    1023      971234 :             btree_xlog_insert(true, false, false, record);
    1024      971234 :             break;
    1025        2722 :         case XLOG_BTREE_INSERT_UPPER:
    1026        2722 :             btree_xlog_insert(false, false, false, record);
    1027        2722 :             break;
    1028           8 :         case XLOG_BTREE_INSERT_META:
    1029           8 :             btree_xlog_insert(false, true, false, record);
    1030           8 :             break;
    1031         360 :         case XLOG_BTREE_SPLIT_L:
    1032         360 :             btree_xlog_split(true, record);
    1033         360 :             break;
    1034        2578 :         case XLOG_BTREE_SPLIT_R:
    1035        2578 :             btree_xlog_split(false, record);
    1036        2578 :             break;
    1037        3390 :         case XLOG_BTREE_INSERT_POST:
    1038        3390 :             btree_xlog_insert(true, false, true, record);
    1039        3390 :             break;
    1040        4266 :         case XLOG_BTREE_DEDUP:
    1041        4266 :             btree_xlog_dedup(record);
    1042        4266 :             break;
    1043        1854 :         case XLOG_BTREE_VACUUM:
    1044        1854 :             btree_xlog_vacuum(record);
    1045        1854 :             break;
    1046        1464 :         case XLOG_BTREE_DELETE:
    1047        1464 :             btree_xlog_delete(record);
    1048        1464 :             break;
    1049        1232 :         case XLOG_BTREE_MARK_PAGE_HALFDEAD:
    1050        1232 :             btree_xlog_mark_page_halfdead(info, record);
    1051        1232 :             break;
    1052        1324 :         case XLOG_BTREE_UNLINK_PAGE:
    1053             :         case XLOG_BTREE_UNLINK_PAGE_META:
    1054        1324 :             btree_xlog_unlink_page(info, record);
    1055        1324 :             break;
    1056        1220 :         case XLOG_BTREE_NEWROOT:
    1057        1220 :             btree_xlog_newroot(record);
    1058        1220 :             break;
    1059           0 :         case XLOG_BTREE_REUSE_PAGE:
    1060           0 :             btree_xlog_reuse_page(record);
    1061           0 :             break;
    1062          22 :         case XLOG_BTREE_META_CLEANUP:
    1063          22 :             _bt_restore_meta(record, 0);
    1064          22 :             break;
    1065           0 :         default:
    1066           0 :             elog(PANIC, "btree_redo: unknown op code %u", info);
    1067             :     }
    1068      991674 :     MemoryContextSwitchTo(oldCtx);
    1069      991674 :     MemoryContextReset(opCtx);
    1070      991674 : }
    1071             : 
    1072             : void
    1073         374 : btree_xlog_startup(void)
    1074             : {
    1075         374 :     opCtx = AllocSetContextCreate(CurrentMemoryContext,
    1076             :                                   "Btree recovery temporary context",
    1077             :                                   ALLOCSET_DEFAULT_SIZES);
    1078         374 : }
    1079             : 
    1080             : void
    1081         276 : btree_xlog_cleanup(void)
    1082             : {
    1083         276 :     MemoryContextDelete(opCtx);
    1084         276 :     opCtx = NULL;
    1085         276 : }
    1086             : 
    1087             : /*
    1088             :  * Mask a btree page before performing consistency checks on it.
    1089             :  */
    1090             : void
    1091           0 : btree_mask(char *pagedata, BlockNumber blkno)
    1092             : {
    1093           0 :     Page        page = (Page) pagedata;
    1094             :     BTPageOpaque maskopaq;
    1095             : 
    1096           0 :     mask_page_lsn_and_checksum(page);
    1097             : 
    1098           0 :     mask_page_hint_bits(page);
    1099           0 :     mask_unused_space(page);
    1100             : 
    1101           0 :     maskopaq = BTPageGetOpaque(page);
    1102             : 
    1103           0 :     if (P_ISLEAF(maskopaq))
    1104             :     {
    1105             :         /*
    1106             :          * In btree leaf pages, it is possible to modify the LP_FLAGS without
    1107             :          * emitting any WAL record. Hence, mask the line pointer flags. See
    1108             :          * _bt_killitems(), _bt_check_unique() for details.
    1109             :          */
    1110           0 :         mask_lp_flags(page);
    1111             :     }
    1112             : 
    1113             :     /*
    1114             :      * BTP_HAS_GARBAGE is just an un-logged hint bit. So, mask it. See
    1115             :      * _bt_delete_or_dedup_one_page(), _bt_killitems(), and _bt_check_unique()
    1116             :      * for details.
    1117             :      */
    1118           0 :     maskopaq->btpo_flags &= ~BTP_HAS_GARBAGE;
    1119             : 
    1120             :     /*
    1121             :      * During replay of a btree page split, we don't set the BTP_SPLIT_END
    1122             :      * flag of the right sibling and initialize the cycle_id to 0 for the same
    1123             :      * page. See btree_xlog_split() for details.
    1124             :      */
    1125           0 :     maskopaq->btpo_flags &= ~BTP_SPLIT_END;
    1126           0 :     maskopaq->btpo_cycleid = 0;
    1127           0 : }

Generated by: LCOV version 1.14