LCOV - code coverage report
Current view: top level - src/backend/access/nbtree - nbtxlog.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 96.6 % 493 476
Test Date: 2026-03-01 02:14:51 Functions: 100.0 % 17 17
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * nbtxlog.c
       4              :  *    WAL replay logic for btrees.
       5              :  *
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  * Portions Copyright (c) 1994, Regents of the University of California
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/access/nbtree/nbtxlog.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/bufmask.h"
      18              : #include "access/nbtree.h"
      19              : #include "access/nbtxlog.h"
      20              : #include "access/transam.h"
      21              : #include "access/xlogutils.h"
      22              : #include "storage/standby.h"
      23              : #include "utils/memutils.h"
      24              : 
      25              : static MemoryContext opCtx;     /* working memory for operations */
      26              : 
      27              : /*
      28              :  * _bt_restore_page -- re-enter all the index tuples on a page
      29              :  *
      30              :  * The page is freshly init'd, and *from (length len) is a copy of what
      31              :  * had been its upper part (pd_upper to pd_special).  We assume that the
      32              :  * tuples had been added to the page in item-number order, and therefore
      33              :  * the one with highest item number appears first (lowest on the page).
      34              :  */
      35              : static void
      36         1601 : _bt_restore_page(Page page, char *from, int len)
      37              : {
      38              :     IndexTupleData itupdata;
      39              :     Size        itemsz;
      40         1601 :     char       *end = from + len;
      41              :     void       *items[MaxIndexTuplesPerPage];
      42              :     uint16      itemsizes[MaxIndexTuplesPerPage];
      43              :     int         i;
      44              :     int         nitems;
      45              : 
      46              :     /*
      47              :      * To get the items back in the original order, we add them to the page in
      48              :      * reverse.  To figure out where one tuple ends and another begins, we
      49              :      * have to scan them in forward order first.
      50              :      */
      51         1601 :     i = 0;
      52       105163 :     while (from < end)
      53              :     {
      54              :         /*
      55              :          * As we step through the items, 'from' won't always be properly
      56              :          * aligned, so we need to use memcpy().  Further, we use void * here
      57              :          * for our items array for the same reason; wouldn't want the compiler
      58              :          * or anyone thinking that an item is aligned when it isn't.
      59              :          */
      60       103562 :         memcpy(&itupdata, from, sizeof(IndexTupleData));
      61       103562 :         itemsz = IndexTupleSize(&itupdata);
      62       103562 :         itemsz = MAXALIGN(itemsz);
      63              : 
      64       103562 :         items[i] = from;
      65       103562 :         itemsizes[i] = itemsz;
      66       103562 :         i++;
      67              : 
      68       103562 :         from += itemsz;
      69              :     }
      70         1601 :     nitems = i;
      71              : 
      72       105163 :     for (i = nitems - 1; i >= 0; i--)
      73              :     {
      74       103562 :         if (PageAddItem(page, items[i], itemsizes[i], nitems - i, false, false) == InvalidOffsetNumber)
      75            0 :             elog(PANIC, "_bt_restore_page: cannot add item to page");
      76              :     }
      77         1601 : }
      78              : 
      79              : static void
      80          822 : _bt_restore_meta(XLogReaderState *record, uint8 block_id)
      81              : {
      82          822 :     XLogRecPtr  lsn = record->EndRecPtr;
      83              :     Buffer      metabuf;
      84              :     Page        metapg;
      85              :     BTMetaPageData *md;
      86              :     BTPageOpaque pageop;
      87              :     xl_btree_metadata *xlrec;
      88              :     char       *ptr;
      89              :     Size        len;
      90              : 
      91          822 :     metabuf = XLogInitBufferForRedo(record, block_id);
      92          822 :     ptr = XLogRecGetBlockData(record, block_id, &len);
      93              : 
      94              :     Assert(len == sizeof(xl_btree_metadata));
      95              :     Assert(BufferGetBlockNumber(metabuf) == BTREE_METAPAGE);
      96          822 :     xlrec = (xl_btree_metadata *) ptr;
      97          822 :     metapg = BufferGetPage(metabuf);
      98              : 
      99          822 :     _bt_pageinit(metapg, BufferGetPageSize(metabuf));
     100              : 
     101          822 :     md = BTPageGetMeta(metapg);
     102          822 :     md->btm_magic = BTREE_MAGIC;
     103          822 :     md->btm_version = xlrec->version;
     104          822 :     md->btm_root = xlrec->root;
     105          822 :     md->btm_level = xlrec->level;
     106          822 :     md->btm_fastroot = xlrec->fastroot;
     107          822 :     md->btm_fastlevel = xlrec->fastlevel;
     108              :     /* Cannot log BTREE_MIN_VERSION index metapage without upgrade */
     109              :     Assert(md->btm_version >= BTREE_NOVAC_VERSION);
     110          822 :     md->btm_last_cleanup_num_delpages = xlrec->last_cleanup_num_delpages;
     111          822 :     md->btm_last_cleanup_num_heap_tuples = -1.0;
     112          822 :     md->btm_allequalimage = xlrec->allequalimage;
     113              : 
     114          822 :     pageop = BTPageGetOpaque(metapg);
     115          822 :     pageop->btpo_flags = BTP_META;
     116              : 
     117              :     /*
     118              :      * Set pd_lower just past the end of the metadata.  This is essential,
     119              :      * because without doing so, metadata will be lost if xlog.c compresses
     120              :      * the page.
     121              :      */
     122          822 :     ((PageHeader) metapg)->pd_lower =
     123          822 :         ((char *) md + sizeof(BTMetaPageData)) - (char *) metapg;
     124              : 
     125          822 :     PageSetLSN(metapg, lsn);
     126          822 :     MarkBufferDirty(metabuf);
     127          822 :     UnlockReleaseBuffer(metabuf);
     128          822 : }
     129              : 
     130              : /*
     131              :  * _bt_clear_incomplete_split -- clear INCOMPLETE_SPLIT flag on a page
     132              :  *
     133              :  * This is a common subroutine of the redo functions of all the WAL record
     134              :  * types that can insert a downlink: insert, split, and newroot.
     135              :  */
     136              : static void
     137         1551 : _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
     138              : {
     139         1551 :     XLogRecPtr  lsn = record->EndRecPtr;
     140              :     Buffer      buf;
     141              : 
     142         1551 :     if (XLogReadBufferForRedo(record, block_id, &buf) == BLK_NEEDS_REDO)
     143              :     {
     144         1550 :         Page        page = BufferGetPage(buf);
     145         1550 :         BTPageOpaque pageop = BTPageGetOpaque(page);
     146              : 
     147              :         Assert(P_INCOMPLETE_SPLIT(pageop));
     148         1550 :         pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
     149              : 
     150         1550 :         PageSetLSN(page, lsn);
     151         1550 :         MarkBufferDirty(buf);
     152              :     }
     153         1551 :     if (BufferIsValid(buf))
     154         1551 :         UnlockReleaseBuffer(buf);
     155         1551 : }
     156              : 
     157              : static void
     158       553332 : btree_xlog_insert(bool isleaf, bool ismeta, bool posting,
     159              :                   XLogReaderState *record)
     160              : {
     161       553332 :     XLogRecPtr  lsn = record->EndRecPtr;
     162       553332 :     xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
     163              :     Buffer      buffer;
     164              :     Page        page;
     165              : 
     166              :     /*
     167              :      * Insertion to an internal page finishes an incomplete split at the child
     168              :      * level.  Clear the incomplete-split flag in the child.  Note: during
     169              :      * normal operation, the child and parent pages are locked at the same
     170              :      * time (the locks are coupled), so that clearing the flag and inserting
     171              :      * the downlink appear atomic to other backends.  We don't bother with
     172              :      * that during replay, because readers don't care about the
     173              :      * incomplete-split flag and there cannot be updates happening.
     174              :      */
     175       553332 :     if (!isleaf)
     176         1450 :         _bt_clear_incomplete_split(record, 1);
     177       553332 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
     178              :     {
     179              :         Size        datalen;
     180       543226 :         char       *datapos = XLogRecGetBlockData(record, 0, &datalen);
     181              : 
     182       543226 :         page = BufferGetPage(buffer);
     183              : 
     184       543226 :         if (!posting)
     185              :         {
     186              :             /* Simple retail insertion */
     187       538880 :             if (PageAddItem(page, datapos, datalen, xlrec->offnum, false, false) == InvalidOffsetNumber)
     188            0 :                 elog(PANIC, "failed to add new item");
     189              :         }
     190              :         else
     191              :         {
     192              :             ItemId      itemid;
     193              :             IndexTuple  oposting,
     194              :                         newitem,
     195              :                         nposting;
     196              :             uint16      postingoff;
     197              : 
     198              :             /*
     199              :              * A posting list split occurred during leaf page insertion.  WAL
     200              :              * record data will start with an offset number representing the
     201              :              * point in an existing posting list that a split occurs at.
     202              :              *
     203              :              * Use _bt_swap_posting() to repeat posting list split steps from
     204              :              * primary.  Note that newitem from WAL record is 'orignewitem',
     205              :              * not the final version of newitem that is actually inserted on
     206              :              * page.
     207              :              */
     208         4346 :             postingoff = *((uint16 *) datapos);
     209         4346 :             datapos += sizeof(uint16);
     210         4346 :             datalen -= sizeof(uint16);
     211              : 
     212         4346 :             itemid = PageGetItemId(page, OffsetNumberPrev(xlrec->offnum));
     213         4346 :             oposting = (IndexTuple) PageGetItem(page, itemid);
     214              : 
     215              :             /* Use mutable, aligned newitem copy in _bt_swap_posting() */
     216              :             Assert(isleaf && postingoff > 0);
     217         4346 :             newitem = CopyIndexTuple((IndexTuple) datapos);
     218         4346 :             nposting = _bt_swap_posting(newitem, oposting, postingoff);
     219              : 
     220              :             /* Replace existing posting list with post-split version */
     221         4346 :             memcpy(oposting, nposting, MAXALIGN(IndexTupleSize(nposting)));
     222              : 
     223              :             /* Insert "final" new item (not orignewitem from WAL stream) */
     224              :             Assert(IndexTupleSize(newitem) == datalen);
     225         4346 :             if (PageAddItem(page, newitem, datalen, xlrec->offnum, false, false) == InvalidOffsetNumber)
     226            0 :                 elog(PANIC, "failed to add posting split new item");
     227              :         }
     228              : 
     229       543226 :         PageSetLSN(page, lsn);
     230       543226 :         MarkBufferDirty(buffer);
     231              :     }
     232       553332 :     if (BufferIsValid(buffer))
     233       553332 :         UnlockReleaseBuffer(buffer);
     234              : 
     235              :     /*
     236              :      * Note: in normal operation, we'd update the metapage while still holding
     237              :      * lock on the page we inserted into.  But during replay it's not
     238              :      * necessary to hold that lock, since no other index updates can be
     239              :      * happening concurrently, and readers will cope fine with following an
     240              :      * obsolete link from the metapage.
     241              :      */
     242       553332 :     if (ismeta)
     243            4 :         _bt_restore_meta(record, 2);
     244       553332 : }
     245              : 
     246              : static void
     247         1551 : btree_xlog_split(bool newitemonleft, XLogReaderState *record)
     248              : {
     249         1551 :     XLogRecPtr  lsn = record->EndRecPtr;
     250         1551 :     xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
     251         1551 :     bool        isleaf = (xlrec->level == 0);
     252              :     Buffer      buf;
     253              :     Buffer      rbuf;
     254              :     Page        rpage;
     255              :     BTPageOpaque ropaque;
     256              :     char       *datapos;
     257              :     Size        datalen;
     258              :     BlockNumber origpagenumber;
     259              :     BlockNumber rightpagenumber;
     260              :     BlockNumber spagenumber;
     261              : 
     262         1551 :     XLogRecGetBlockTag(record, 0, NULL, NULL, &origpagenumber);
     263         1551 :     XLogRecGetBlockTag(record, 1, NULL, NULL, &rightpagenumber);
     264         1551 :     if (!XLogRecGetBlockTagExtended(record, 2, NULL, NULL, &spagenumber, NULL))
     265          953 :         spagenumber = P_NONE;
     266              : 
     267              :     /*
     268              :      * Clear the incomplete split flag on the appropriate child page one level
     269              :      * down when origpage/buf is an internal page (there must have been
     270              :      * cascading page splits during original execution in the event of an
     271              :      * internal page split).  This is like the corresponding btree_xlog_insert
     272              :      * call for internal pages.  We're not clearing the incomplete split flag
     273              :      * for the current page split here (you can think of this as part of the
     274              :      * insert of newitem that the page split action needs to perform in
     275              :      * passing).
     276              :      *
     277              :      * Like in btree_xlog_insert, this can be done before locking other pages.
     278              :      * We never need to couple cross-level locks in REDO routines.
     279              :      */
     280         1551 :     if (!isleaf)
     281           51 :         _bt_clear_incomplete_split(record, 3);
     282              : 
     283              :     /* Reconstruct right (new) sibling page from scratch */
     284         1551 :     rbuf = XLogInitBufferForRedo(record, 1);
     285         1551 :     datapos = XLogRecGetBlockData(record, 1, &datalen);
     286         1551 :     rpage = BufferGetPage(rbuf);
     287              : 
     288         1551 :     _bt_pageinit(rpage, BufferGetPageSize(rbuf));
     289         1551 :     ropaque = BTPageGetOpaque(rpage);
     290              : 
     291         1551 :     ropaque->btpo_prev = origpagenumber;
     292         1551 :     ropaque->btpo_next = spagenumber;
     293         1551 :     ropaque->btpo_level = xlrec->level;
     294         1551 :     ropaque->btpo_flags = isleaf ? BTP_LEAF : 0;
     295         1551 :     ropaque->btpo_cycleid = 0;
     296              : 
     297         1551 :     _bt_restore_page(rpage, datapos, datalen);
     298              : 
     299         1551 :     PageSetLSN(rpage, lsn);
     300         1551 :     MarkBufferDirty(rbuf);
     301              : 
     302              :     /* Now reconstruct original page (left half of split) */
     303         1551 :     if (XLogReadBufferForRedo(record, 0, &buf) == BLK_NEEDS_REDO)
     304              :     {
     305              :         /*
     306              :          * To retain the same physical order of the tuples that they had, we
     307              :          * initialize a temporary empty page for the left page and add all the
     308              :          * items to that in item number order.  This mirrors how _bt_split()
     309              :          * works.  Retaining the same physical order makes WAL consistency
     310              :          * checking possible.  See also _bt_restore_page(), which does the
     311              :          * same for the right page.
     312              :          */
     313         1527 :         Page        origpage = BufferGetPage(buf);
     314         1527 :         BTPageOpaque oopaque = BTPageGetOpaque(origpage);
     315              :         OffsetNumber off;
     316         1527 :         IndexTuple  newitem = NULL,
     317         1527 :                     left_hikey = NULL,
     318         1527 :                     nposting = NULL;
     319         1527 :         Size        newitemsz = 0,
     320         1527 :                     left_hikeysz = 0;
     321              :         Page        leftpage;
     322              :         OffsetNumber leftoff,
     323         1527 :                     replacepostingoff = InvalidOffsetNumber;
     324              : 
     325         1527 :         datapos = XLogRecGetBlockData(record, 0, &datalen);
     326              : 
     327         1527 :         if (newitemonleft || xlrec->postingoff != 0)
     328              :         {
     329          160 :             newitem = (IndexTuple) datapos;
     330          160 :             newitemsz = MAXALIGN(IndexTupleSize(newitem));
     331          160 :             datapos += newitemsz;
     332          160 :             datalen -= newitemsz;
     333              : 
     334          160 :             if (xlrec->postingoff != 0)
     335              :             {
     336              :                 ItemId      itemid;
     337              :                 IndexTuple  oposting;
     338              : 
     339              :                 /* Posting list must be at offset number before new item's */
     340            2 :                 replacepostingoff = OffsetNumberPrev(xlrec->newitemoff);
     341              : 
     342              :                 /* Use mutable, aligned newitem copy in _bt_swap_posting() */
     343            2 :                 newitem = CopyIndexTuple(newitem);
     344            2 :                 itemid = PageGetItemId(origpage, replacepostingoff);
     345            2 :                 oposting = (IndexTuple) PageGetItem(origpage, itemid);
     346            2 :                 nposting = _bt_swap_posting(newitem, oposting,
     347            2 :                                             xlrec->postingoff);
     348              :             }
     349              :         }
     350              : 
     351              :         /*
     352              :          * Extract left hikey and its size.  We assume that 16-bit alignment
     353              :          * is enough to apply IndexTupleSize (since it's fetching from a
     354              :          * uint16 field).
     355              :          */
     356         1527 :         left_hikey = (IndexTuple) datapos;
     357         1527 :         left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
     358         1527 :         datapos += left_hikeysz;
     359         1527 :         datalen -= left_hikeysz;
     360              : 
     361              :         Assert(datalen == 0);
     362              : 
     363         1527 :         leftpage = PageGetTempPageCopySpecial(origpage);
     364              : 
     365              :         /* Add high key tuple from WAL record to temp page */
     366         1527 :         leftoff = P_HIKEY;
     367         1527 :         if (PageAddItem(leftpage, left_hikey, left_hikeysz, P_HIKEY, false, false) == InvalidOffsetNumber)
     368            0 :             elog(ERROR, "failed to add high key to left page after split");
     369         1527 :         leftoff = OffsetNumberNext(leftoff);
     370              : 
     371       344531 :         for (off = P_FIRSTDATAKEY(oopaque); off < xlrec->firstrightoff; off++)
     372              :         {
     373              :             ItemId      itemid;
     374              :             Size        itemsz;
     375              :             IndexTuple  item;
     376              : 
     377              :             /* Add replacement posting list when required */
     378       343004 :             if (off == replacepostingoff)
     379              :             {
     380              :                 Assert(newitemonleft ||
     381              :                        xlrec->firstrightoff == xlrec->newitemoff);
     382            2 :                 if (PageAddItem(leftpage, nposting, MAXALIGN(IndexTupleSize(nposting)), leftoff, false, false) == InvalidOffsetNumber)
     383            0 :                     elog(ERROR, "failed to add new posting list item to left page after split");
     384            2 :                 leftoff = OffsetNumberNext(leftoff);
     385            2 :                 continue;       /* don't insert oposting */
     386              :             }
     387              : 
     388              :             /* add the new item if it was inserted on left page */
     389       343002 :             else if (newitemonleft && off == xlrec->newitemoff)
     390              :             {
     391          138 :                 if (PageAddItem(leftpage, newitem, newitemsz, leftoff, false, false) == InvalidOffsetNumber)
     392            0 :                     elog(ERROR, "failed to add new item to left page after split");
     393          138 :                 leftoff = OffsetNumberNext(leftoff);
     394              :             }
     395              : 
     396       343002 :             itemid = PageGetItemId(origpage, off);
     397       343002 :             itemsz = ItemIdGetLength(itemid);
     398       343002 :             item = (IndexTuple) PageGetItem(origpage, itemid);
     399       343002 :             if (PageAddItem(leftpage, item, itemsz, leftoff, false, false) == InvalidOffsetNumber)
     400            0 :                 elog(ERROR, "failed to add old item to left page after split");
     401       343002 :             leftoff = OffsetNumberNext(leftoff);
     402              :         }
     403              : 
     404              :         /* cope with possibility that newitem goes at the end */
     405         1527 :         if (newitemonleft && off == xlrec->newitemoff)
     406              :         {
     407           21 :             if (PageAddItem(leftpage, newitem, newitemsz, leftoff, false, false) == InvalidOffsetNumber)
     408            0 :                 elog(ERROR, "failed to add new item to left page after split");
     409           21 :             leftoff = OffsetNumberNext(leftoff);
     410              :         }
     411              : 
     412         1527 :         PageRestoreTempPage(leftpage, origpage);
     413              : 
     414              :         /* Fix opaque fields */
     415         1527 :         oopaque->btpo_flags = BTP_INCOMPLETE_SPLIT;
     416         1527 :         if (isleaf)
     417         1476 :             oopaque->btpo_flags |= BTP_LEAF;
     418         1527 :         oopaque->btpo_next = rightpagenumber;
     419         1527 :         oopaque->btpo_cycleid = 0;
     420              : 
     421         1527 :         PageSetLSN(origpage, lsn);
     422         1527 :         MarkBufferDirty(buf);
     423              :     }
     424              : 
     425              :     /* Fix left-link of the page to the right of the new right sibling */
     426         1551 :     if (spagenumber != P_NONE)
     427              :     {
     428              :         Buffer      sbuf;
     429              : 
     430          598 :         if (XLogReadBufferForRedo(record, 2, &sbuf) == BLK_NEEDS_REDO)
     431              :         {
     432          292 :             Page        spage = BufferGetPage(sbuf);
     433          292 :             BTPageOpaque spageop = BTPageGetOpaque(spage);
     434              : 
     435          292 :             spageop->btpo_prev = rightpagenumber;
     436              : 
     437          292 :             PageSetLSN(spage, lsn);
     438          292 :             MarkBufferDirty(sbuf);
     439              :         }
     440          598 :         if (BufferIsValid(sbuf))
     441          598 :             UnlockReleaseBuffer(sbuf);
     442              :     }
     443              : 
     444              :     /*
     445              :      * Finally, release the remaining buffers.  sbuf, rbuf, and buf must be
     446              :      * released together, so that readers cannot observe inconsistencies.
     447              :      */
     448         1551 :     UnlockReleaseBuffer(rbuf);
     449         1551 :     if (BufferIsValid(buf))
     450         1551 :         UnlockReleaseBuffer(buf);
     451         1551 : }
     452              : 
     453              : static void
     454         2327 : btree_xlog_dedup(XLogReaderState *record)
     455              : {
     456         2327 :     XLogRecPtr  lsn = record->EndRecPtr;
     457         2327 :     xl_btree_dedup *xlrec = (xl_btree_dedup *) XLogRecGetData(record);
     458              :     Buffer      buf;
     459              : 
     460         2327 :     if (XLogReadBufferForRedo(record, 0, &buf) == BLK_NEEDS_REDO)
     461              :     {
     462         2300 :         char       *ptr = XLogRecGetBlockData(record, 0, NULL);
     463         2300 :         Page        page = BufferGetPage(buf);
     464         2300 :         BTPageOpaque opaque = BTPageGetOpaque(page);
     465              :         OffsetNumber offnum,
     466              :                     minoff,
     467              :                     maxoff;
     468              :         BTDedupState state;
     469              :         BTDedupInterval *intervals;
     470              :         Page        newpage;
     471              : 
     472         2300 :         state = palloc_object(BTDedupStateData);
     473         2300 :         state->deduplicate = true;   /* unused */
     474         2300 :         state->nmaxitems = 0;    /* unused */
     475              :         /* Conservatively use larger maxpostingsize than primary */
     476         2300 :         state->maxpostingsize = BTMaxItemSize;
     477         2300 :         state->base = NULL;
     478         2300 :         state->baseoff = InvalidOffsetNumber;
     479         2300 :         state->basetupsize = 0;
     480         2300 :         state->htids = palloc(state->maxpostingsize);
     481         2300 :         state->nhtids = 0;
     482         2300 :         state->nitems = 0;
     483         2300 :         state->phystupsize = 0;
     484         2300 :         state->nintervals = 0;
     485              : 
     486         2300 :         minoff = P_FIRSTDATAKEY(opaque);
     487         2300 :         maxoff = PageGetMaxOffsetNumber(page);
     488         2300 :         newpage = PageGetTempPageCopySpecial(page);
     489              : 
     490         2300 :         if (!P_RIGHTMOST(opaque))
     491              :         {
     492         1994 :             ItemId      itemid = PageGetItemId(page, P_HIKEY);
     493         1994 :             Size        itemsz = ItemIdGetLength(itemid);
     494         1994 :             IndexTuple  item = (IndexTuple) PageGetItem(page, itemid);
     495              : 
     496         1994 :             if (PageAddItem(newpage, item, itemsz, P_HIKEY, false, false) == InvalidOffsetNumber)
     497            0 :                 elog(ERROR, "deduplication failed to add highkey");
     498              :         }
     499              : 
     500         2300 :         intervals = (BTDedupInterval *) ptr;
     501         2300 :         for (offnum = minoff;
     502       537802 :              offnum <= maxoff;
     503       535502 :              offnum = OffsetNumberNext(offnum))
     504              :         {
     505       535502 :             ItemId      itemid = PageGetItemId(page, offnum);
     506       535502 :             IndexTuple  itup = (IndexTuple) PageGetItem(page, itemid);
     507              : 
     508       535502 :             if (offnum == minoff)
     509         2300 :                 _bt_dedup_start_pending(state, itup, offnum);
     510       533202 :             else if (state->nintervals < xlrec->nintervals &&
     511       400005 :                      state->baseoff == intervals[state->nintervals].baseoff &&
     512       134565 :                      state->nitems < intervals[state->nintervals].nitems)
     513              :             {
     514        89228 :                 if (!_bt_dedup_save_htid(state, itup))
     515            0 :                     elog(ERROR, "deduplication failed to add heap tid to pending posting list");
     516              :             }
     517              :             else
     518              :             {
     519       443974 :                 _bt_dedup_finish_pending(newpage, state);
     520       443974 :                 _bt_dedup_start_pending(state, itup, offnum);
     521              :             }
     522              :         }
     523              : 
     524         2300 :         _bt_dedup_finish_pending(newpage, state);
     525              :         Assert(state->nintervals == xlrec->nintervals);
     526              :         Assert(memcmp(state->intervals, intervals,
     527              :                       state->nintervals * sizeof(BTDedupInterval)) == 0);
     528              : 
     529         2300 :         if (P_HAS_GARBAGE(opaque))
     530              :         {
     531            0 :             BTPageOpaque nopaque = BTPageGetOpaque(newpage);
     532              : 
     533            0 :             nopaque->btpo_flags &= ~BTP_HAS_GARBAGE;
     534              :         }
     535              : 
     536         2300 :         PageRestoreTempPage(newpage, page);
     537         2300 :         PageSetLSN(page, lsn);
     538         2300 :         MarkBufferDirty(buf);
     539              :     }
     540              : 
     541         2327 :     if (BufferIsValid(buf))
     542         2327 :         UnlockReleaseBuffer(buf);
     543         2327 : }
     544              : 
     545              : static void
     546          149 : btree_xlog_updates(Page page, OffsetNumber *updatedoffsets,
     547              :                    xl_btree_update *updates, int nupdated)
     548              : {
     549              :     BTVacuumPosting vacposting;
     550              :     IndexTuple  origtuple;
     551              :     ItemId      itemid;
     552              :     Size        itemsz;
     553              : 
     554         3944 :     for (int i = 0; i < nupdated; i++)
     555              :     {
     556         3795 :         itemid = PageGetItemId(page, updatedoffsets[i]);
     557         3795 :         origtuple = (IndexTuple) PageGetItem(page, itemid);
     558              : 
     559         3795 :         vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
     560         3795 :                             updates->ndeletedtids * sizeof(uint16));
     561         3795 :         vacposting->updatedoffset = updatedoffsets[i];
     562         3795 :         vacposting->itup = origtuple;
     563         3795 :         vacposting->ndeletedtids = updates->ndeletedtids;
     564         3795 :         memcpy(vacposting->deletetids,
     565              :                (char *) updates + SizeOfBtreeUpdate,
     566         3795 :                updates->ndeletedtids * sizeof(uint16));
     567              : 
     568         3795 :         _bt_update_posting(vacposting);
     569              : 
     570              :         /* Overwrite updated version of tuple */
     571         3795 :         itemsz = MAXALIGN(IndexTupleSize(vacposting->itup));
     572         3795 :         if (!PageIndexTupleOverwrite(page, updatedoffsets[i], vacposting->itup, itemsz))
     573            0 :             elog(PANIC, "failed to update partially dead item");
     574              : 
     575         3795 :         pfree(vacposting->itup);
     576         3795 :         pfree(vacposting);
     577              : 
     578              :         /* advance to next xl_btree_update from array */
     579         3795 :         updates = (xl_btree_update *)
     580         3795 :             ((char *) updates + SizeOfBtreeUpdate +
     581         3795 :              updates->ndeletedtids * sizeof(uint16));
     582              :     }
     583          149 : }
     584              : 
     585              : static void
     586         1725 : btree_xlog_vacuum(XLogReaderState *record)
     587              : {
     588         1725 :     XLogRecPtr  lsn = record->EndRecPtr;
     589         1725 :     xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
     590              :     Buffer      buffer;
     591              :     Page        page;
     592              :     BTPageOpaque opaque;
     593              : 
     594              :     /*
     595              :      * We need to take a cleanup lock here, just like btvacuumpage(). However,
     596              :      * it isn't necessary to exhaustively get a cleanup lock on every block in
     597              :      * the index during recovery (just getting a cleanup lock on pages with
     598              :      * items to kill suffices).  See nbtree/README for details.
     599              :      */
     600         1725 :     if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer)
     601              :         == BLK_NEEDS_REDO)
     602              :     {
     603          857 :         char       *ptr = XLogRecGetBlockData(record, 0, NULL);
     604              : 
     605          857 :         page = BufferGetPage(buffer);
     606              : 
     607          857 :         if (xlrec->nupdated > 0)
     608              :         {
     609              :             OffsetNumber *updatedoffsets;
     610              :             xl_btree_update *updates;
     611              : 
     612           44 :             updatedoffsets = (OffsetNumber *)
     613           44 :                 (ptr + xlrec->ndeleted * sizeof(OffsetNumber));
     614           44 :             updates = (xl_btree_update *) ((char *) updatedoffsets +
     615           44 :                                            xlrec->nupdated *
     616              :                                            sizeof(OffsetNumber));
     617              : 
     618           44 :             btree_xlog_updates(page, updatedoffsets, updates, xlrec->nupdated);
     619              :         }
     620              : 
     621          857 :         if (xlrec->ndeleted > 0)
     622          850 :             PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
     623              : 
     624              :         /*
     625              :          * Clear the vacuum cycle ID, and mark the page as not containing any
     626              :          * LP_DEAD items
     627              :          */
     628          857 :         opaque = BTPageGetOpaque(page);
     629          857 :         opaque->btpo_cycleid = 0;
     630          857 :         opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
     631              : 
     632          857 :         PageSetLSN(page, lsn);
     633          857 :         MarkBufferDirty(buffer);
     634              :     }
     635         1725 :     if (BufferIsValid(buffer))
     636         1725 :         UnlockReleaseBuffer(buffer);
     637         1725 : }
     638              : 
     639              : static void
     640          852 : btree_xlog_delete(XLogReaderState *record)
     641              : {
     642          852 :     XLogRecPtr  lsn = record->EndRecPtr;
     643          852 :     xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
     644              :     Buffer      buffer;
     645              :     Page        page;
     646              :     BTPageOpaque opaque;
     647              : 
     648              :     /*
     649              :      * If we have any conflict processing to do, it must happen before we
     650              :      * update the page
     651              :      */
     652          852 :     if (InHotStandby)
     653              :     {
     654              :         RelFileLocator rlocator;
     655              : 
     656          852 :         XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
     657              : 
     658          852 :         ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
     659          852 :                                             xlrec->isCatalogRel,
     660              :                                             rlocator);
     661              :     }
     662              : 
     663              :     /*
     664              :      * We don't need to take a cleanup lock to apply these changes. See
     665              :      * nbtree/README for details.
     666              :      */
     667          852 :     if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
     668              :     {
     669          834 :         char       *ptr = XLogRecGetBlockData(record, 0, NULL);
     670              : 
     671          834 :         page = BufferGetPage(buffer);
     672              : 
     673          834 :         if (xlrec->nupdated > 0)
     674              :         {
     675              :             OffsetNumber *updatedoffsets;
     676              :             xl_btree_update *updates;
     677              : 
     678          105 :             updatedoffsets = (OffsetNumber *)
     679          105 :                 (ptr + xlrec->ndeleted * sizeof(OffsetNumber));
     680          105 :             updates = (xl_btree_update *) ((char *) updatedoffsets +
     681          105 :                                            xlrec->nupdated *
     682              :                                            sizeof(OffsetNumber));
     683              : 
     684          105 :             btree_xlog_updates(page, updatedoffsets, updates, xlrec->nupdated);
     685              :         }
     686              : 
     687          834 :         if (xlrec->ndeleted > 0)
     688          816 :             PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
     689              : 
     690              :         /*
     691              :          * Do *not* clear the vacuum cycle ID, but do mark the page as not
     692              :          * containing any LP_DEAD items
     693              :          */
     694          834 :         opaque = BTPageGetOpaque(page);
     695          834 :         opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
     696              : 
     697          834 :         PageSetLSN(page, lsn);
     698          834 :         MarkBufferDirty(buffer);
     699              :     }
     700          852 :     if (BufferIsValid(buffer))
     701          852 :         UnlockReleaseBuffer(buffer);
     702          852 : }
     703              : 
     704              : static void
     705          663 : btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
     706              : {
     707          663 :     XLogRecPtr  lsn = record->EndRecPtr;
     708          663 :     xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) XLogRecGetData(record);
     709              :     Buffer      buffer;
     710              :     Page        page;
     711              :     BTPageOpaque pageop;
     712              :     IndexTupleData trunctuple;
     713              : 
     714              :     /*
     715              :      * In normal operation, we would lock all the pages this WAL record
     716              :      * touches before changing any of them.  In WAL replay, it should be okay
     717              :      * to lock just one page at a time, since no concurrent index updates can
     718              :      * be happening, and readers should not care whether they arrive at the
     719              :      * target page or not (since it's surely empty).
     720              :      */
     721              : 
     722              :     /* to-be-deleted subtree's parent page */
     723          663 :     if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
     724              :     {
     725              :         OffsetNumber poffset;
     726              :         ItemId      itemid;
     727              :         IndexTuple  itup;
     728              :         OffsetNumber nextoffset;
     729              :         BlockNumber rightsib;
     730              : 
     731          647 :         page = BufferGetPage(buffer);
     732          647 :         pageop = BTPageGetOpaque(page);
     733              : 
     734          647 :         poffset = xlrec->poffset;
     735              : 
     736          647 :         nextoffset = OffsetNumberNext(poffset);
     737          647 :         itemid = PageGetItemId(page, nextoffset);
     738          647 :         itup = (IndexTuple) PageGetItem(page, itemid);
     739          647 :         rightsib = BTreeTupleGetDownLink(itup);
     740              : 
     741          647 :         itemid = PageGetItemId(page, poffset);
     742          647 :         itup = (IndexTuple) PageGetItem(page, itemid);
     743          647 :         BTreeTupleSetDownLink(itup, rightsib);
     744          647 :         nextoffset = OffsetNumberNext(poffset);
     745          647 :         PageIndexTupleDelete(page, nextoffset);
     746              : 
     747          647 :         PageSetLSN(page, lsn);
     748          647 :         MarkBufferDirty(buffer);
     749              :     }
     750              : 
     751              :     /*
     752              :      * Don't need to couple cross-level locks in REDO routines, so release
     753              :      * lock on internal page immediately
     754              :      */
     755          663 :     if (BufferIsValid(buffer))
     756          663 :         UnlockReleaseBuffer(buffer);
     757              : 
     758              :     /* Rewrite the leaf page as a halfdead page */
     759          663 :     buffer = XLogInitBufferForRedo(record, 0);
     760          663 :     page = BufferGetPage(buffer);
     761              : 
     762          663 :     _bt_pageinit(page, BufferGetPageSize(buffer));
     763          663 :     pageop = BTPageGetOpaque(page);
     764              : 
     765          663 :     pageop->btpo_prev = xlrec->leftblk;
     766          663 :     pageop->btpo_next = xlrec->rightblk;
     767          663 :     pageop->btpo_level = 0;
     768          663 :     pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF;
     769          663 :     pageop->btpo_cycleid = 0;
     770              : 
     771              :     /*
     772              :      * Construct a dummy high key item that points to top parent page (value
     773              :      * is InvalidBlockNumber when the top parent page is the leaf page itself)
     774              :      */
     775          663 :     MemSet(&trunctuple, 0, sizeof(IndexTupleData));
     776          663 :     trunctuple.t_info = sizeof(IndexTupleData);
     777          663 :     BTreeTupleSetTopParent(&trunctuple, xlrec->topparent);
     778              : 
     779          663 :     if (PageAddItem(page, &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber)
     780            0 :         elog(ERROR, "could not add dummy high key to half-dead page");
     781              : 
     782          663 :     PageSetLSN(page, lsn);
     783          663 :     MarkBufferDirty(buffer);
     784          663 :     UnlockReleaseBuffer(buffer);
     785          663 : }
     786              : 
     787              : 
     788              : static void
     789          709 : btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
     790              : {
     791          709 :     XLogRecPtr  lsn = record->EndRecPtr;
     792          709 :     xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) XLogRecGetData(record);
     793              :     BlockNumber leftsib;
     794              :     BlockNumber rightsib;
     795              :     uint32      level;
     796              :     bool        isleaf;
     797              :     FullTransactionId safexid;
     798              :     Buffer      leftbuf;
     799              :     Buffer      target;
     800              :     Buffer      rightbuf;
     801              :     Page        page;
     802              :     BTPageOpaque pageop;
     803              : 
     804          709 :     leftsib = xlrec->leftsib;
     805          709 :     rightsib = xlrec->rightsib;
     806          709 :     level = xlrec->level;
     807          709 :     isleaf = (level == 0);
     808          709 :     safexid = xlrec->safexid;
     809              : 
     810              :     /* No leaftopparent for level 0 (leaf page) or level 1 target */
     811              :     Assert(!BlockNumberIsValid(xlrec->leaftopparent) || level > 1);
     812              : 
     813              :     /*
     814              :      * In normal operation, we would lock all the pages this WAL record
     815              :      * touches before changing any of them.  In WAL replay, we at least lock
     816              :      * the pages in the same standard left-to-right order (leftsib, target,
     817              :      * rightsib), and don't release the sibling locks until the target is
     818              :      * marked deleted.
     819              :      */
     820              : 
     821              :     /* Fix right-link of left sibling, if any */
     822          709 :     if (leftsib != P_NONE)
     823              :     {
     824           99 :         if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
     825              :         {
     826           97 :             page = BufferGetPage(leftbuf);
     827           97 :             pageop = BTPageGetOpaque(page);
     828           97 :             pageop->btpo_next = rightsib;
     829              : 
     830           97 :             PageSetLSN(page, lsn);
     831           97 :             MarkBufferDirty(leftbuf);
     832              :         }
     833              :     }
     834              :     else
     835          610 :         leftbuf = InvalidBuffer;
     836              : 
     837              :     /* Rewrite target page as empty deleted page */
     838          709 :     target = XLogInitBufferForRedo(record, 0);
     839          709 :     page = BufferGetPage(target);
     840              : 
     841          709 :     _bt_pageinit(page, BufferGetPageSize(target));
     842          709 :     pageop = BTPageGetOpaque(page);
     843              : 
     844          709 :     pageop->btpo_prev = leftsib;
     845          709 :     pageop->btpo_next = rightsib;
     846          709 :     pageop->btpo_level = level;
     847          709 :     BTPageSetDeleted(page, safexid);
     848          709 :     if (isleaf)
     849          662 :         pageop->btpo_flags |= BTP_LEAF;
     850          709 :     pageop->btpo_cycleid = 0;
     851              : 
     852          709 :     PageSetLSN(page, lsn);
     853          709 :     MarkBufferDirty(target);
     854              : 
     855              :     /* Fix left-link of right sibling */
     856          709 :     if (XLogReadBufferForRedo(record, 2, &rightbuf) == BLK_NEEDS_REDO)
     857              :     {
     858           87 :         page = BufferGetPage(rightbuf);
     859           87 :         pageop = BTPageGetOpaque(page);
     860           87 :         pageop->btpo_prev = leftsib;
     861              : 
     862           87 :         PageSetLSN(page, lsn);
     863           87 :         MarkBufferDirty(rightbuf);
     864              :     }
     865              : 
     866              :     /* Release siblings */
     867          709 :     if (BufferIsValid(leftbuf))
     868           99 :         UnlockReleaseBuffer(leftbuf);
     869          709 :     if (BufferIsValid(rightbuf))
     870          709 :         UnlockReleaseBuffer(rightbuf);
     871              : 
     872              :     /* Release target */
     873          709 :     UnlockReleaseBuffer(target);
     874              : 
     875              :     /*
     876              :      * If we deleted a parent of the targeted leaf page, instead of the leaf
     877              :      * itself, update the leaf to point to the next remaining child in the
     878              :      * to-be-deleted subtree
     879              :      */
     880          709 :     if (XLogRecHasBlockRef(record, 3))
     881              :     {
     882              :         /*
     883              :          * There is no real data on the page, so we just re-create it from
     884              :          * scratch using the information from the WAL record.
     885              :          *
     886              :          * Note that we don't end up here when the target page is also the
     887              :          * leafbuf page.  There is no need to add a dummy hikey item with a
     888              :          * top parent link when deleting leafbuf because it's the last page
     889              :          * we'll delete in the subtree undergoing deletion.
     890              :          */
     891              :         Buffer      leafbuf;
     892              :         IndexTupleData trunctuple;
     893              : 
     894              :         Assert(!isleaf);
     895              : 
     896           47 :         leafbuf = XLogInitBufferForRedo(record, 3);
     897           47 :         page = BufferGetPage(leafbuf);
     898              : 
     899           47 :         _bt_pageinit(page, BufferGetPageSize(leafbuf));
     900           47 :         pageop = BTPageGetOpaque(page);
     901              : 
     902           47 :         pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF;
     903           47 :         pageop->btpo_prev = xlrec->leafleftsib;
     904           47 :         pageop->btpo_next = xlrec->leafrightsib;
     905           47 :         pageop->btpo_level = 0;
     906           47 :         pageop->btpo_cycleid = 0;
     907              : 
     908              :         /* Add a dummy hikey item */
     909           94 :         MemSet(&trunctuple, 0, sizeof(IndexTupleData));
     910           47 :         trunctuple.t_info = sizeof(IndexTupleData);
     911           47 :         BTreeTupleSetTopParent(&trunctuple, xlrec->leaftopparent);
     912              : 
     913           47 :         if (PageAddItem(page, &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber)
     914            0 :             elog(ERROR, "could not add dummy high key to half-dead page");
     915              : 
     916           47 :         PageSetLSN(page, lsn);
     917           47 :         MarkBufferDirty(leafbuf);
     918           47 :         UnlockReleaseBuffer(leafbuf);
     919              :     }
     920              : 
     921              :     /* Update metapage if needed */
     922          709 :     if (info == XLOG_BTREE_UNLINK_PAGE_META)
     923           11 :         _bt_restore_meta(record, 4);
     924          709 : }
     925              : 
     926              : static void
     927          781 : btree_xlog_newroot(XLogReaderState *record)
     928              : {
     929          781 :     XLogRecPtr  lsn = record->EndRecPtr;
     930          781 :     xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record);
     931              :     Buffer      buffer;
     932              :     Page        page;
     933              :     BTPageOpaque pageop;
     934              :     char       *ptr;
     935              :     Size        len;
     936              : 
     937          781 :     buffer = XLogInitBufferForRedo(record, 0);
     938          781 :     page = BufferGetPage(buffer);
     939              : 
     940          781 :     _bt_pageinit(page, BufferGetPageSize(buffer));
     941          781 :     pageop = BTPageGetOpaque(page);
     942              : 
     943          781 :     pageop->btpo_flags = BTP_ROOT;
     944          781 :     pageop->btpo_prev = pageop->btpo_next = P_NONE;
     945          781 :     pageop->btpo_level = xlrec->level;
     946          781 :     if (xlrec->level == 0)
     947          731 :         pageop->btpo_flags |= BTP_LEAF;
     948          781 :     pageop->btpo_cycleid = 0;
     949              : 
     950          781 :     if (xlrec->level > 0)
     951              :     {
     952           50 :         ptr = XLogRecGetBlockData(record, 0, &len);
     953           50 :         _bt_restore_page(page, ptr, len);
     954              : 
     955              :         /* Clear the incomplete-split flag in left child */
     956           50 :         _bt_clear_incomplete_split(record, 1);
     957              :     }
     958              : 
     959          781 :     PageSetLSN(page, lsn);
     960          781 :     MarkBufferDirty(buffer);
     961          781 :     UnlockReleaseBuffer(buffer);
     962              : 
     963          781 :     _bt_restore_meta(record, 2);
     964          781 : }
     965              : 
     966              : /*
     967              :  * In general VACUUM must defer recycling as a way of avoiding certain race
     968              :  * conditions.  Deleted pages contain a safexid value that is used by VACUUM
     969              :  * to determine whether or not it's safe to place a page that was deleted by
     970              :  * VACUUM earlier into the FSM now.  See nbtree/README.
     971              :  *
     972              :  * As far as any backend operating during original execution is concerned, the
     973              :  * FSM is a cache of recycle-safe pages; the mere presence of the page in the
     974              :  * FSM indicates that the page must already be safe to recycle (actually,
     975              :  * _bt_allocbuf() verifies it's safe using BTPageIsRecyclable(), but that's
     976              :  * just because it would be unwise to completely trust the FSM, given its
     977              :  * current limitations).
     978              :  *
     979              :  * This isn't sufficient to prevent similar concurrent recycling race
     980              :  * conditions during Hot Standby, though.  For that we need to log a
     981              :  * xl_btree_reuse_page record at the point that a page is actually recycled
     982              :  * and reused for an entirely unrelated page inside _bt_split().  These
     983              :  * records include the same safexid value from the original deleted page,
     984              :  * stored in the record's snapshotConflictHorizon field.
     985              :  *
     986              :  * The GlobalVisCheckRemovableFullXid() test in BTPageIsRecyclable() is used
     987              :  * to determine if it's safe to recycle a page.  This mirrors our own test:
     988              :  * the PGPROC->xmin > limitXmin test inside GetConflictingVirtualXIDs().
     989              :  * Consequently, one XID value achieves the same exclusion effect on primary
     990              :  * and standby.
     991              :  */
     992              : static void
     993           71 : btree_xlog_reuse_page(XLogReaderState *record)
     994              : {
     995           71 :     xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
     996              : 
     997           71 :     if (InHotStandby)
     998           71 :         ResolveRecoveryConflictWithSnapshotFullXid(xlrec->snapshotConflictHorizon,
     999           71 :                                                    xlrec->isCatalogRel,
    1000              :                                                    xlrec->locator);
    1001           71 : }
    1002              : 
    1003              : void
    1004       562037 : btree_redo(XLogReaderState *record)
    1005              : {
    1006       562037 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
    1007              :     MemoryContext oldCtx;
    1008              : 
    1009       562037 :     oldCtx = MemoryContextSwitchTo(opCtx);
    1010       562037 :     switch (info)
    1011              :     {
    1012       547377 :         case XLOG_BTREE_INSERT_LEAF:
    1013       547377 :             btree_xlog_insert(true, false, false, record);
    1014       547377 :             break;
    1015         1446 :         case XLOG_BTREE_INSERT_UPPER:
    1016         1446 :             btree_xlog_insert(false, false, false, record);
    1017         1446 :             break;
    1018            4 :         case XLOG_BTREE_INSERT_META:
    1019            4 :             btree_xlog_insert(false, true, false, record);
    1020            4 :             break;
    1021          165 :         case XLOG_BTREE_SPLIT_L:
    1022          165 :             btree_xlog_split(true, record);
    1023          165 :             break;
    1024         1386 :         case XLOG_BTREE_SPLIT_R:
    1025         1386 :             btree_xlog_split(false, record);
    1026         1386 :             break;
    1027         4505 :         case XLOG_BTREE_INSERT_POST:
    1028         4505 :             btree_xlog_insert(true, false, true, record);
    1029         4505 :             break;
    1030         2327 :         case XLOG_BTREE_DEDUP:
    1031         2327 :             btree_xlog_dedup(record);
    1032         2327 :             break;
    1033         1725 :         case XLOG_BTREE_VACUUM:
    1034         1725 :             btree_xlog_vacuum(record);
    1035         1725 :             break;
    1036          852 :         case XLOG_BTREE_DELETE:
    1037          852 :             btree_xlog_delete(record);
    1038          852 :             break;
    1039          663 :         case XLOG_BTREE_MARK_PAGE_HALFDEAD:
    1040          663 :             btree_xlog_mark_page_halfdead(info, record);
    1041          663 :             break;
    1042          709 :         case XLOG_BTREE_UNLINK_PAGE:
    1043              :         case XLOG_BTREE_UNLINK_PAGE_META:
    1044          709 :             btree_xlog_unlink_page(info, record);
    1045          709 :             break;
    1046          781 :         case XLOG_BTREE_NEWROOT:
    1047          781 :             btree_xlog_newroot(record);
    1048          781 :             break;
    1049           71 :         case XLOG_BTREE_REUSE_PAGE:
    1050           71 :             btree_xlog_reuse_page(record);
    1051           71 :             break;
    1052           26 :         case XLOG_BTREE_META_CLEANUP:
    1053           26 :             _bt_restore_meta(record, 0);
    1054           26 :             break;
    1055            0 :         default:
    1056            0 :             elog(PANIC, "btree_redo: unknown op code %u", info);
    1057              :     }
    1058       562037 :     MemoryContextSwitchTo(oldCtx);
    1059       562037 :     MemoryContextReset(opCtx);
    1060       562037 : }
    1061              : 
    1062              : void
    1063          214 : btree_xlog_startup(void)
    1064              : {
    1065          214 :     opCtx = AllocSetContextCreate(CurrentMemoryContext,
    1066              :                                   "Btree recovery temporary context",
    1067              :                                   ALLOCSET_DEFAULT_SIZES);
    1068          214 : }
    1069              : 
    1070              : void
    1071          154 : btree_xlog_cleanup(void)
    1072              : {
    1073          154 :     MemoryContextDelete(opCtx);
    1074          154 :     opCtx = NULL;
    1075          154 : }
    1076              : 
    1077              : /*
    1078              :  * Mask a btree page before performing consistency checks on it.
    1079              :  */
    1080              : void
    1081       907100 : btree_mask(char *pagedata, BlockNumber blkno)
    1082              : {
    1083       907100 :     Page        page = (Page) pagedata;
    1084              :     BTPageOpaque maskopaq;
    1085              : 
    1086       907100 :     mask_page_lsn_and_checksum(page);
    1087              : 
    1088       907100 :     mask_page_hint_bits(page);
    1089       907100 :     mask_unused_space(page);
    1090              : 
    1091       907100 :     maskopaq = BTPageGetOpaque(page);
    1092              : 
    1093       907100 :     if (P_ISLEAF(maskopaq))
    1094              :     {
    1095              :         /*
    1096              :          * In btree leaf pages, it is possible to modify the LP_FLAGS without
    1097              :          * emitting any WAL record. Hence, mask the line pointer flags. See
    1098              :          * _bt_killitems(), _bt_check_unique() for details.
    1099              :          */
    1100       902586 :         mask_lp_flags(page);
    1101              :     }
    1102              : 
    1103              :     /*
    1104              :      * BTP_HAS_GARBAGE is just an un-logged hint bit. So, mask it. See
    1105              :      * _bt_delete_or_dedup_one_page(), _bt_killitems(), and _bt_check_unique()
    1106              :      * for details.
    1107              :      */
    1108       907100 :     maskopaq->btpo_flags &= ~BTP_HAS_GARBAGE;
    1109              : 
    1110              :     /*
    1111              :      * During replay of a btree page split, we don't set the BTP_SPLIT_END
    1112              :      * flag of the right sibling and initialize the cycle_id to 0 for the same
    1113              :      * page. See btree_xlog_split() for details.
    1114              :      */
    1115       907100 :     maskopaq->btpo_flags &= ~BTP_SPLIT_END;
    1116       907100 :     maskopaq->btpo_cycleid = 0;
    1117       907100 : }
        

Generated by: LCOV version 2.0-1