LCOV - code coverage report
Current view: top level - src/backend/storage/page - bufpage.c (source / functions) Hit Total Coverage
Test: PostgreSQL 14devel Lines: 382 420 91.0 %
Date: 2021-01-26 01:06:44 Functions: 19 19 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * bufpage.c
       4             :  *    POSTGRES standard buffer page code.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/storage/page/bufpage.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/htup_details.h"
      18             : #include "access/itup.h"
      19             : #include "access/xlog.h"
      20             : #include "pgstat.h"
      21             : #include "storage/checksum.h"
      22             : #include "utils/memdebug.h"
      23             : #include "utils/memutils.h"
      24             : 
      25             : 
      26             : /* GUC variable */
      27             : bool        ignore_checksum_failure = false;
      28             : 
      29             : 
      30             : /* ----------------------------------------------------------------
      31             :  *                      Page support functions
      32             :  * ----------------------------------------------------------------
      33             :  */
      34             : 
      35             : /*
      36             :  * PageInit
      37             :  *      Initializes the contents of a page.
      38             :  *      Note that we don't calculate an initial checksum here; that's not done
      39             :  *      until it's time to write.
      40             :  */
      41             : void
      42      709580 : PageInit(Page page, Size pageSize, Size specialSize)
      43             : {
      44      709580 :     PageHeader  p = (PageHeader) page;
      45             : 
      46      709580 :     specialSize = MAXALIGN(specialSize);
      47             : 
      48             :     Assert(pageSize == BLCKSZ);
      49             :     Assert(pageSize > specialSize + SizeOfPageHeaderData);
      50             : 
      51             :     /* Make sure all fields of page are zero, as well as unused space */
      52      709580 :     MemSet(p, 0, pageSize);
      53             : 
      54      709580 :     p->pd_flags = 0;
      55      709580 :     p->pd_lower = SizeOfPageHeaderData;
      56      709580 :     p->pd_upper = pageSize - specialSize;
      57      709580 :     p->pd_special = pageSize - specialSize;
      58      709580 :     PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION);
      59             :     /* p->pd_prune_xid = InvalidTransactionId;       done by above MemSet */
      60      709580 : }
      61             : 
      62             : 
      63             : /*
      64             :  * PageIsVerifiedExtended
      65             :  *      Check that the page header and checksum (if any) appear valid.
      66             :  *
      67             :  * This is called when a page has just been read in from disk.  The idea is
      68             :  * to cheaply detect trashed pages before we go nuts following bogus line
      69             :  * pointers, testing invalid transaction identifiers, etc.
      70             :  *
      71             :  * It turns out to be necessary to allow zeroed pages here too.  Even though
      72             :  * this routine is *not* called when deliberately adding a page to a relation,
      73             :  * there are scenarios in which a zeroed page might be found in a table.
      74             :  * (Example: a backend extends a relation, then crashes before it can write
      75             :  * any WAL entry about the new page.  The kernel will already have the
      76             :  * zeroed page in the file, and it will stay that way after restart.)  So we
      77             :  * allow zeroed pages here, and are careful that the page access macros
      78             :  * treat such a page as empty and without free space.  Eventually, VACUUM
      79             :  * will clean up such a page and make it usable.
      80             :  *
      81             :  * If flag PIV_LOG_WARNING is set, a WARNING is logged in the event of
      82             :  * a checksum failure.
      83             :  *
      84             :  * If flag PIV_REPORT_STAT is set, a checksum failure is reported directly
      85             :  * to pgstat.
      86             :  */
      87             : bool
      88      606620 : PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
      89             : {
      90      606620 :     PageHeader  p = (PageHeader) page;
      91             :     size_t     *pagebytes;
      92             :     int         i;
      93      606620 :     bool        checksum_failure = false;
      94      606620 :     bool        header_sane = false;
      95      606620 :     bool        all_zeroes = false;
      96      606620 :     uint16      checksum = 0;
      97             : 
      98             :     /*
      99             :      * Don't verify page data unless the page passes basic non-zero test
     100             :      */
     101      606620 :     if (!PageIsNew(page))
     102             :     {
     103      606616 :         if (DataChecksumsEnabled())
     104             :         {
     105        3762 :             checksum = pg_checksum_page((char *) page, blkno);
     106             : 
     107        3762 :             if (checksum != p->pd_checksum)
     108           0 :                 checksum_failure = true;
     109             :         }
     110             : 
     111             :         /*
     112             :          * The following checks don't prove the header is correct, only that
     113             :          * it looks sane enough to allow into the buffer pool. Later usage of
     114             :          * the block can still reveal problems, which is why we offer the
     115             :          * checksum option.
     116             :          */
     117      606616 :         if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
     118      606616 :             p->pd_lower <= p->pd_upper &&
     119      606616 :             p->pd_upper <= p->pd_special &&
     120      606616 :             p->pd_special <= BLCKSZ &&
     121      606616 :             p->pd_special == MAXALIGN(p->pd_special))
     122      606616 :             header_sane = true;
     123             : 
     124      606616 :         if (header_sane && !checksum_failure)
     125      606616 :             return true;
     126             :     }
     127             : 
     128             :     /* Check all-zeroes case */
     129           4 :     all_zeroes = true;
     130           4 :     pagebytes = (size_t *) page;
     131        4100 :     for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++)
     132             :     {
     133        4096 :         if (pagebytes[i] != 0)
     134             :         {
     135           0 :             all_zeroes = false;
     136           0 :             break;
     137             :         }
     138             :     }
     139             : 
     140           4 :     if (all_zeroes)
     141           4 :         return true;
     142             : 
     143             :     /*
     144             :      * Throw a WARNING if the checksum fails, but only after we've checked for
     145             :      * the all-zeroes case.
     146             :      */
     147           0 :     if (checksum_failure)
     148             :     {
     149           0 :         if ((flags & PIV_LOG_WARNING) != 0)
     150           0 :             ereport(WARNING,
     151             :                     (errcode(ERRCODE_DATA_CORRUPTED),
     152             :                      errmsg("page verification failed, calculated checksum %u but expected %u",
     153             :                             checksum, p->pd_checksum)));
     154             : 
     155           0 :         if ((flags & PIV_REPORT_STAT) != 0)
     156           0 :             pgstat_report_checksum_failure();
     157             : 
     158           0 :         if (header_sane && ignore_checksum_failure)
     159           0 :             return true;
     160             :     }
     161             : 
     162           0 :     return false;
     163             : }
     164             : 
     165             : 
     166             : /*
     167             :  *  PageAddItemExtended
     168             :  *
     169             :  *  Add an item to a page.  Return value is the offset at which it was
     170             :  *  inserted, or InvalidOffsetNumber if the item is not inserted for any
     171             :  *  reason.  A WARNING is issued indicating the reason for the refusal.
     172             :  *
     173             :  *  offsetNumber must be either InvalidOffsetNumber to specify finding a
     174             :  *  free line pointer, or a value between FirstOffsetNumber and one past
     175             :  *  the last existing item, to specify using that particular line pointer.
     176             :  *
     177             :  *  If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
     178             :  *  the item at the specified offsetNumber, which must be either a
     179             :  *  currently-unused line pointer, or one past the last existing item.
     180             :  *
     181             :  *  If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
     182             :  *  the item at the specified offsetNumber, moving existing items later
     183             :  *  in the array to make room.
     184             :  *
     185             :  *  If offsetNumber is not valid, then assign a slot by finding the first
     186             :  *  one that is both unused and deallocated.
     187             :  *
     188             :  *  If flag PAI_IS_HEAP is set, we enforce that there can't be more than
     189             :  *  MaxHeapTuplesPerPage line pointers on the page.
     190             :  *
     191             :  *  !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
     192             :  */
     193             : OffsetNumber
     194    74752202 : PageAddItemExtended(Page page,
     195             :                     Item item,
     196             :                     Size size,
     197             :                     OffsetNumber offsetNumber,
     198             :                     int flags)
     199             : {
     200    74752202 :     PageHeader  phdr = (PageHeader) page;
     201             :     Size        alignedSize;
     202             :     int         lower;
     203             :     int         upper;
     204             :     ItemId      itemId;
     205             :     OffsetNumber limit;
     206    74752202 :     bool        needshuffle = false;
     207             : 
     208             :     /*
     209             :      * Be wary about corrupted page pointers
     210             :      */
     211    74752202 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
     212    74752202 :         phdr->pd_lower > phdr->pd_upper ||
     213    74752202 :         phdr->pd_upper > phdr->pd_special ||
     214    74752202 :         phdr->pd_special > BLCKSZ)
     215           0 :         ereport(PANIC,
     216             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     217             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
     218             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
     219             : 
     220             :     /*
     221             :      * Select offsetNumber to place the new item at
     222             :      */
     223    74752202 :     limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
     224             : 
     225             :     /* was offsetNumber passed in? */
     226    74752202 :     if (OffsetNumberIsValid(offsetNumber))
     227             :     {
     228             :         /* yes, check it */
     229    50552292 :         if ((flags & PAI_OVERWRITE) != 0)
     230             :         {
     231      358468 :             if (offsetNumber < limit)
     232             :             {
     233        2496 :                 itemId = PageGetItemId(phdr, offsetNumber);
     234        2496 :                 if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
     235             :                 {
     236           0 :                     elog(WARNING, "will not overwrite a used ItemId");
     237           0 :                     return InvalidOffsetNumber;
     238             :                 }
     239             :             }
     240             :         }
     241             :         else
     242             :         {
     243    50193824 :             if (offsetNumber < limit)
     244     6931062 :                 needshuffle = true; /* need to move existing linp's */
     245             :         }
     246             :     }
     247             :     else
     248             :     {
     249             :         /* offsetNumber was not passed in, so find a free slot */
     250             :         /* if no free slot, we'll put it at limit (1st open slot) */
     251    24199910 :         if (PageHasFreeLinePointers(phdr))
     252             :         {
     253             :             /*
     254             :              * Look for "recyclable" (unused) ItemId.  We check for no storage
     255             :              * as well, just to be paranoid --- unused items should never have
     256             :              * storage.
     257             :              */
     258    12448100 :             for (offsetNumber = 1; offsetNumber < limit; offsetNumber++)
     259             :             {
     260    12437264 :                 itemId = PageGetItemId(phdr, offsetNumber);
     261    12437264 :                 if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
     262      232474 :                     break;
     263             :             }
     264      243310 :             if (offsetNumber >= limit)
     265             :             {
     266             :                 /* the hint is wrong, so reset it */
     267       10836 :                 PageClearHasFreeLinePointers(phdr);
     268             :             }
     269             :         }
     270             :         else
     271             :         {
     272             :             /* don't bother searching if hint says there's no free slot */
     273    23956600 :             offsetNumber = limit;
     274             :         }
     275             :     }
     276             : 
     277             :     /* Reject placing items beyond the first unused line pointer */
     278    74752202 :     if (offsetNumber > limit)
     279             :     {
     280           0 :         elog(WARNING, "specified item offset is too large");
     281           0 :         return InvalidOffsetNumber;
     282             :     }
     283             : 
     284             :     /* Reject placing items beyond heap boundary, if heap */
     285    74752202 :     if ((flags & PAI_IS_HEAP) != 0 && offsetNumber > MaxHeapTuplesPerPage)
     286             :     {
     287           0 :         elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
     288           0 :         return InvalidOffsetNumber;
     289             :     }
     290             : 
     291             :     /*
     292             :      * Compute new lower and upper pointers for page, see if it'll fit.
     293             :      *
     294             :      * Note: do arithmetic as signed ints, to avoid mistakes if, say,
     295             :      * alignedSize > pd_upper.
     296             :      */
     297    74752202 :     if (offsetNumber == limit || needshuffle)
     298    74517232 :         lower = phdr->pd_lower + sizeof(ItemIdData);
     299             :     else
     300      234970 :         lower = phdr->pd_lower;
     301             : 
     302    74752202 :     alignedSize = MAXALIGN(size);
     303             : 
     304    74752202 :     upper = (int) phdr->pd_upper - (int) alignedSize;
     305             : 
     306    74752202 :     if (lower > upper)
     307           0 :         return InvalidOffsetNumber;
     308             : 
     309             :     /*
     310             :      * OK to insert the item.  First, shuffle the existing pointers if needed.
     311             :      */
     312    74752202 :     itemId = PageGetItemId(phdr, offsetNumber);
     313             : 
     314    74752202 :     if (needshuffle)
     315     6931062 :         memmove(itemId + 1, itemId,
     316     6931062 :                 (limit - offsetNumber) * sizeof(ItemIdData));
     317             : 
     318             :     /* set the line pointer */
     319    74752202 :     ItemIdSetNormal(itemId, upper, size);
     320             : 
     321             :     /*
     322             :      * Items normally contain no uninitialized bytes.  Core bufpage consumers
     323             :      * conform, but this is not a necessary coding rule; a new index AM could
     324             :      * opt to depart from it.  However, data type input functions and other
     325             :      * C-language functions that synthesize datums should initialize all
     326             :      * bytes; datumIsEqual() relies on this.  Testing here, along with the
     327             :      * similar check in printtup(), helps to catch such mistakes.
     328             :      *
     329             :      * Values of the "name" type retrieved via index-only scans may contain
     330             :      * uninitialized bytes; see comment in btrescan().  Valgrind will report
     331             :      * this as an error, but it is safe to ignore.
     332             :      */
     333             :     VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
     334             : 
     335             :     /* copy the item's data onto the page */
     336    74752202 :     memcpy((char *) page + upper, item, size);
     337             : 
     338             :     /* adjust page header */
     339    74752202 :     phdr->pd_lower = (LocationIndex) lower;
     340    74752202 :     phdr->pd_upper = (LocationIndex) upper;
     341             : 
     342    74752202 :     return offsetNumber;
     343             : }
     344             : 
     345             : 
     346             : /*
     347             :  * PageGetTempPage
     348             :  *      Get a temporary page in local memory for special processing.
     349             :  *      The returned page is not initialized at all; caller must do that.
     350             :  */
     351             : Page
     352       51728 : PageGetTempPage(Page page)
     353             : {
     354             :     Size        pageSize;
     355             :     Page        temp;
     356             : 
     357       51728 :     pageSize = PageGetPageSize(page);
     358       51728 :     temp = (Page) palloc(pageSize);
     359             : 
     360       51728 :     return temp;
     361             : }
     362             : 
     363             : /*
     364             :  * PageGetTempPageCopy
     365             :  *      Get a temporary page in local memory for special processing.
     366             :  *      The page is initialized by copying the contents of the given page.
     367             :  */
     368             : Page
     369        6120 : PageGetTempPageCopy(Page page)
     370             : {
     371             :     Size        pageSize;
     372             :     Page        temp;
     373             : 
     374        6120 :     pageSize = PageGetPageSize(page);
     375        6120 :     temp = (Page) palloc(pageSize);
     376             : 
     377        6120 :     memcpy(temp, page, pageSize);
     378             : 
     379        6120 :     return temp;
     380             : }
     381             : 
     382             : /*
     383             :  * PageGetTempPageCopySpecial
     384             :  *      Get a temporary page in local memory for special processing.
     385             :  *      The page is PageInit'd with the same special-space size as the
     386             :  *      given page, and the special space is copied from the given page.
     387             :  */
     388             : Page
     389       23264 : PageGetTempPageCopySpecial(Page page)
     390             : {
     391             :     Size        pageSize;
     392             :     Page        temp;
     393             : 
     394       23264 :     pageSize = PageGetPageSize(page);
     395       23264 :     temp = (Page) palloc(pageSize);
     396             : 
     397       23264 :     PageInit(temp, pageSize, PageGetSpecialSize(page));
     398       46528 :     memcpy(PageGetSpecialPointer(temp),
     399       23264 :            PageGetSpecialPointer(page),
     400       23264 :            PageGetSpecialSize(page));
     401             : 
     402       23264 :     return temp;
     403             : }
     404             : 
     405             : /*
     406             :  * PageRestoreTempPage
     407             :  *      Copy temporary page back to permanent page after special processing
     408             :  *      and release the temporary page.
     409             :  */
     410             : void
     411       75684 : PageRestoreTempPage(Page tempPage, Page oldPage)
     412             : {
     413             :     Size        pageSize;
     414             : 
     415       75684 :     pageSize = PageGetPageSize(tempPage);
     416       75684 :     memcpy((char *) oldPage, (char *) tempPage, pageSize);
     417             : 
     418       75684 :     pfree(tempPage);
     419       75684 : }
     420             : 
     421             : /*
     422             :  * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
     423             :  */
     424             : typedef struct itemIdCompactData
     425             : {
     426             :     uint16      offsetindex;    /* linp array index */
     427             :     int16       itemoff;        /* page offset of item data */
     428             :     uint16      alignedlen;     /* MAXALIGN(item data len) */
     429             : } itemIdCompactData;
     430             : typedef itemIdCompactData *itemIdCompact;
     431             : 
     432             : /*
     433             :  * After removing or marking some line pointers unused, move the tuples to
     434             :  * remove the gaps caused by the removed items and reorder them back into
     435             :  * reverse line pointer order in the page.
     436             :  *
     437             :  * This function can often be fairly hot, so it pays to take some measures to
     438             :  * make it as optimal as possible.
     439             :  *
     440             :  * Callers may pass 'presorted' as true if the 'itemidbase' array is sorted in
     441             :  * descending order of itemoff.  When this is true we can just memmove()
     442             :  * tuples towards the end of the page.  This is quite a common case as it's
     443             :  * the order that tuples are initially inserted into pages.  When we call this
     444             :  * function to defragment the tuples in the page then any new line pointers
     445             :  * added to the page will keep that presorted order, so hitting this case is
     446             :  * still very common for tables that are commonly updated.
     447             :  *
     448             :  * When the 'itemidbase' array is not presorted then we're unable to just
     449             :  * memmove() tuples around freely.  Doing so could cause us to overwrite the
     450             :  * memory belonging to a tuple we've not moved yet.  In this case, we copy all
     451             :  * the tuples that need to be moved into a temporary buffer.  We can then
     452             :  * simply memcpy() out of that temp buffer back into the page at the correct
     453             :  * location.  Tuples are copied back into the page in the same order as the
     454             :  * 'itemidbase' array, so we end up reordering the tuples back into reverse
     455             :  * line pointer order.  This will increase the chances of hitting the
     456             :  * presorted case the next time around.
     457             :  *
     458             :  * Callers must ensure that nitems is > 0
     459             :  */
     460             : static void
     461      114668 : compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
     462             : {
     463      114668 :     PageHeader  phdr = (PageHeader) page;
     464             :     Offset      upper;
     465             :     Offset      copy_tail;
     466             :     Offset      copy_head;
     467             :     itemIdCompact itemidptr;
     468             :     int         i;
     469             : 
     470             :     /* Code within will not work correctly if nitems == 0 */
     471             :     Assert(nitems > 0);
     472             : 
     473      114668 :     if (presorted)
     474             :     {
     475             : 
     476             : #ifdef USE_ASSERT_CHECKING
     477             :         {
     478             :             /*
     479             :              * Verify we've not gotten any new callers that are incorrectly
     480             :              * passing a true presorted value.
     481             :              */
     482             :             Offset      lastoff = phdr->pd_special;
     483             : 
     484             :             for (i = 0; i < nitems; i++)
     485             :             {
     486             :                 itemidptr = &itemidbase[i];
     487             : 
     488             :                 Assert(lastoff > itemidptr->itemoff);
     489             : 
     490             :                 lastoff = itemidptr->itemoff;
     491             :             }
     492             :         }
     493             : #endif                          /* USE_ASSERT_CHECKING */
     494             : 
     495             :         /*
     496             :          * 'itemidbase' is already in the optimal order, i.e, lower item
     497             :          * pointers have a higher offset.  This allows us to memmove() the
     498             :          * tuples up to the end of the page without having to worry about
     499             :          * overwriting other tuples that have not been moved yet.
     500             :          *
     501             :          * There's a good chance that there are tuples already right at the
     502             :          * end of the page that we can simply skip over because they're
     503             :          * already in the correct location within the page.  We'll do that
     504             :          * first...
     505             :          */
     506       86716 :         upper = phdr->pd_special;
     507       86716 :         i = 0;
     508             :         do
     509             :         {
     510     2019768 :             itemidptr = &itemidbase[i];
     511     2019768 :             if (upper != itemidptr->itemoff + itemidptr->alignedlen)
     512       69634 :                 break;
     513     1950134 :             upper -= itemidptr->alignedlen;
     514             : 
     515     1950134 :             i++;
     516     1950134 :         } while (i < nitems);
     517             : 
     518             :         /*
     519             :          * Now that we've found the first tuple that needs to be moved, we can
     520             :          * do the tuple compactification.  We try and make the least number of
     521             :          * memmove() calls and only call memmove() when there's a gap.  When
     522             :          * we see a gap we just move all tuples after the gap up until the
     523             :          * point of the last move operation.
     524             :          */
     525       86716 :         copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
     526     1349050 :         for (; i < nitems; i++)
     527             :         {
     528             :             ItemId      lp;
     529             : 
     530     1262334 :             itemidptr = &itemidbase[i];
     531     1262334 :             lp = PageGetItemId(page, itemidptr->offsetindex + 1);
     532             : 
     533     1262334 :             if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
     534             :             {
     535      291104 :                 memmove((char *) page + upper,
     536      145552 :                         page + copy_head,
     537      145552 :                         copy_tail - copy_head);
     538             : 
     539             :                 /*
     540             :                  * We've now moved all tuples already seen, but not the
     541             :                  * current tuple, so we set the copy_tail to the end of this
     542             :                  * tuple so it can be moved in another iteration of the loop.
     543             :                  */
     544      145552 :                 copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
     545             :             }
     546             :             /* shift the target offset down by the length of this tuple */
     547     1262334 :             upper -= itemidptr->alignedlen;
     548             :             /* point the copy_head to the start of this tuple */
     549     1262334 :             copy_head = itemidptr->itemoff;
     550             : 
     551             :             /* update the line pointer to reference the new offset */
     552     1262334 :             lp->lp_off = upper;
     553             : 
     554             :         }
     555             : 
     556             :         /* move the remaining tuples. */
     557      173432 :         memmove((char *) page + upper,
     558       86716 :                 page + copy_head,
     559       86716 :                 copy_tail - copy_head);
     560             :     }
     561             :     else
     562             :     {
     563             :         PGAlignedBlock scratch;
     564       27952 :         char       *scratchptr = scratch.data;
     565             : 
     566             :         /*
     567             :          * Non-presorted case:  The tuples in the itemidbase array may be in
     568             :          * any order.  So, in order to move these to the end of the page we
     569             :          * must make a temp copy of each tuple that needs to be moved before
     570             :          * we copy them back into the page at the new offset.
     571             :          *
     572             :          * If a large percentage of tuples have been pruned (>75%) then we'll
     573             :          * copy these into the temp buffer tuple-by-tuple, otherwise, we'll
     574             :          * just do a single memcpy() for all tuples that need to be moved.
     575             :          * When so many tuples have been removed there's likely to be a lot of
     576             :          * gaps and it's unlikely that many non-movable tuples remain at the
     577             :          * end of the page.
     578             :          */
     579       27952 :         if (nitems < PageGetMaxOffsetNumber(page) / 4)
     580             :         {
     581         696 :             i = 0;
     582             :             do
     583             :             {
     584       15398 :                 itemidptr = &itemidbase[i];
     585       15398 :                 memcpy(scratchptr + itemidptr->itemoff, page + itemidptr->itemoff,
     586       15398 :                        itemidptr->alignedlen);
     587       15398 :                 i++;
     588       15398 :             } while (i < nitems);
     589             : 
     590             :             /* Set things up for the compactification code below */
     591         696 :             i = 0;
     592         696 :             itemidptr = &itemidbase[0];
     593         696 :             upper = phdr->pd_special;
     594             :         }
     595             :         else
     596             :         {
     597       27256 :             upper = phdr->pd_special;
     598             : 
     599             :             /*
     600             :              * Many tuples are likely to already be in the correct location.
     601             :              * There's no need to copy these into the temp buffer.  Instead
     602             :              * we'll just skip forward in the itemidbase array to the position
     603             :              * that we do need to move tuples from so that the code below just
     604             :              * leaves these ones alone.
     605             :              */
     606       27256 :             i = 0;
     607             :             do
     608             :             {
     609      563448 :                 itemidptr = &itemidbase[i];
     610      563448 :                 if (upper != itemidptr->itemoff + itemidptr->alignedlen)
     611       27256 :                     break;
     612      536192 :                 upper -= itemidptr->alignedlen;
     613             : 
     614      536192 :                 i++;
     615      536192 :             } while (i < nitems);
     616             : 
     617             :             /* Copy all tuples that need to be moved into the temp buffer */
     618       54512 :             memcpy(scratchptr + phdr->pd_upper,
     619       27256 :                    page + phdr->pd_upper,
     620       27256 :                    upper - phdr->pd_upper);
     621             :         }
     622             : 
     623             :         /*
     624             :          * Do the tuple compactification.  itemidptr is already pointing to
     625             :          * the first tuple that we're going to move.  Here we collapse the
     626             :          * memcpy calls for adjacent tuples into a single call.  This is done
     627             :          * by delaying the memcpy call until we find a gap that needs to be
     628             :          * closed.
     629             :          */
     630       27952 :         copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
     631     3154548 :         for (; i < nitems; i++)
     632             :         {
     633             :             ItemId      lp;
     634             : 
     635     3126596 :             itemidptr = &itemidbase[i];
     636     3126596 :             lp = PageGetItemId(page, itemidptr->offsetindex + 1);
     637             : 
     638             :             /* copy pending tuples when we detect a gap */
     639     3126596 :             if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
     640             :             {
     641     1122588 :                 memcpy((char *) page + upper,
     642      561294 :                        scratchptr + copy_head,
     643      561294 :                        copy_tail - copy_head);
     644             : 
     645             :                 /*
     646             :                  * We've now copied all tuples already seen, but not the
     647             :                  * current tuple, so we set the copy_tail to the end of this
     648             :                  * tuple.
     649             :                  */
     650      561294 :                 copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
     651             :             }
     652             :             /* shift the target offset down by the length of this tuple */
     653     3126596 :             upper -= itemidptr->alignedlen;
     654             :             /* point the copy_head to the start of this tuple */
     655     3126596 :             copy_head = itemidptr->itemoff;
     656             : 
     657             :             /* update the line pointer to reference the new offset */
     658     3126596 :             lp->lp_off = upper;
     659             : 
     660             :         }
     661             : 
     662             :         /* Copy the remaining chunk */
     663       55904 :         memcpy((char *) page + upper,
     664       27952 :                scratchptr + copy_head,
     665       27952 :                copy_tail - copy_head);
     666             :     }
     667             : 
     668      114668 :     phdr->pd_upper = upper;
     669      114668 : }
     670             : 
     671             : /*
     672             :  * PageRepairFragmentation
     673             :  *
     674             :  * Frees fragmented space on a page.
     675             :  * It doesn't remove unused line pointers! Please don't change this.
     676             :  *
     677             :  * This routine is usable for heap pages only, but see PageIndexMultiDelete.
     678             :  *
     679             :  * As a side effect, the page's PD_HAS_FREE_LINES hint bit is updated.
     680             :  */
     681             : void
     682      106532 : PageRepairFragmentation(Page page)
     683             : {
     684      106532 :     Offset      pd_lower = ((PageHeader) page)->pd_lower;
     685      106532 :     Offset      pd_upper = ((PageHeader) page)->pd_upper;
     686      106532 :     Offset      pd_special = ((PageHeader) page)->pd_special;
     687             :     Offset      last_offset;
     688             :     itemIdCompactData itemidbase[MaxHeapTuplesPerPage];
     689             :     itemIdCompact itemidptr;
     690             :     ItemId      lp;
     691             :     int         nline,
     692             :                 nstorage,
     693             :                 nunused;
     694             :     int         i;
     695             :     Size        totallen;
     696      106532 :     bool        presorted = true;   /* For now */
     697             : 
     698             :     /*
     699             :      * It's worth the trouble to be more paranoid here than in most places,
     700             :      * because we are about to reshuffle data in (what is usually) a shared
     701             :      * disk buffer.  If we aren't careful then corrupted pointers, lengths,
     702             :      * etc could cause us to clobber adjacent disk buffers, spreading the data
     703             :      * loss further.  So, check everything.
     704             :      */
     705      106532 :     if (pd_lower < SizeOfPageHeaderData ||
     706      106532 :         pd_lower > pd_upper ||
     707      106532 :         pd_upper > pd_special ||
     708      106532 :         pd_special > BLCKSZ ||
     709      106532 :         pd_special != MAXALIGN(pd_special))
     710           0 :         ereport(ERROR,
     711             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     712             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
     713             :                         pd_lower, pd_upper, pd_special)));
     714             : 
     715             :     /*
     716             :      * Run through the line pointer array and collect data about live items.
     717             :      */
     718      106532 :     nline = PageGetMaxOffsetNumber(page);
     719      106532 :     itemidptr = itemidbase;
     720      106532 :     nunused = totallen = 0;
     721      106532 :     last_offset = pd_special;
     722     7848926 :     for (i = FirstOffsetNumber; i <= nline; i++)
     723             :     {
     724     7742394 :         lp = PageGetItemId(page, i);
     725     7742394 :         if (ItemIdIsUsed(lp))
     726             :         {
     727     6206152 :             if (ItemIdHasStorage(lp))
     728             :             {
     729     2971840 :                 itemidptr->offsetindex = i - 1;
     730     2971840 :                 itemidptr->itemoff = ItemIdGetOffset(lp);
     731             : 
     732     2971840 :                 if (last_offset > itemidptr->itemoff)
     733     2858260 :                     last_offset = itemidptr->itemoff;
     734             :                 else
     735      113580 :                     presorted = false;
     736             : 
     737     2971840 :                 if (unlikely(itemidptr->itemoff < (int) pd_upper ||
     738             :                              itemidptr->itemoff >= (int) pd_special))
     739           0 :                     ereport(ERROR,
     740             :                             (errcode(ERRCODE_DATA_CORRUPTED),
     741             :                              errmsg("corrupted line pointer: %u",
     742             :                                     itemidptr->itemoff)));
     743     2971840 :                 itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
     744     2971840 :                 totallen += itemidptr->alignedlen;
     745     2971840 :                 itemidptr++;
     746             :             }
     747             :         }
     748             :         else
     749             :         {
     750             :             /* Unused entries should have lp_len = 0, but make sure */
     751     1536242 :             ItemIdSetUnused(lp);
     752     1536242 :             nunused++;
     753             :         }
     754             :     }
     755             : 
     756      106532 :     nstorage = itemidptr - itemidbase;
     757      106532 :     if (nstorage == 0)
     758             :     {
     759             :         /* Page is completely empty, so just reset it quickly */
     760       19182 :         ((PageHeader) page)->pd_upper = pd_special;
     761             :     }
     762             :     else
     763             :     {
     764             :         /* Need to compact the page the hard way */
     765       87350 :         if (totallen > (Size) (pd_special - pd_lower))
     766           0 :             ereport(ERROR,
     767             :                     (errcode(ERRCODE_DATA_CORRUPTED),
     768             :                      errmsg("corrupted item lengths: total %u, available space %u",
     769             :                             (unsigned int) totallen, pd_special - pd_lower)));
     770             : 
     771       87350 :         compactify_tuples(itemidbase, nstorage, page, presorted);
     772             :     }
     773             : 
     774             :     /* Set hint bit for PageAddItem */
     775      106532 :     if (nunused > 0)
     776       43518 :         PageSetHasFreeLinePointers(page);
     777             :     else
     778       63014 :         PageClearHasFreeLinePointers(page);
     779      106532 : }
     780             : 
     781             : /*
     782             :  * PageGetFreeSpace
     783             :  *      Returns the size of the free (allocatable) space on a page,
     784             :  *      reduced by the space needed for a new line pointer.
     785             :  *
     786             :  * Note: this should usually only be used on index pages.  Use
     787             :  * PageGetHeapFreeSpace on heap pages.
     788             :  */
     789             : Size
     790    69658288 : PageGetFreeSpace(Page page)
     791             : {
     792             :     int         space;
     793             : 
     794             :     /*
     795             :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     796             :      * pd_upper.
     797             :      */
     798   139316576 :     space = (int) ((PageHeader) page)->pd_upper -
     799    69658288 :         (int) ((PageHeader) page)->pd_lower;
     800             : 
     801    69658288 :     if (space < (int) sizeof(ItemIdData))
     802        9262 :         return 0;
     803    69649026 :     space -= sizeof(ItemIdData);
     804             : 
     805    69649026 :     return (Size) space;
     806             : }
     807             : 
     808             : /*
     809             :  * PageGetFreeSpaceForMultipleTuples
     810             :  *      Returns the size of the free (allocatable) space on a page,
     811             :  *      reduced by the space needed for multiple new line pointers.
     812             :  *
     813             :  * Note: this should usually only be used on index pages.  Use
     814             :  * PageGetHeapFreeSpace on heap pages.
     815             :  */
     816             : Size
     817       78176 : PageGetFreeSpaceForMultipleTuples(Page page, int ntups)
     818             : {
     819             :     int         space;
     820             : 
     821             :     /*
     822             :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     823             :      * pd_upper.
     824             :      */
     825      156352 :     space = (int) ((PageHeader) page)->pd_upper -
     826       78176 :         (int) ((PageHeader) page)->pd_lower;
     827             : 
     828       78176 :     if (space < (int) (ntups * sizeof(ItemIdData)))
     829           0 :         return 0;
     830       78176 :     space -= ntups * sizeof(ItemIdData);
     831             : 
     832       78176 :     return (Size) space;
     833             : }
     834             : 
     835             : /*
     836             :  * PageGetExactFreeSpace
     837             :  *      Returns the size of the free (allocatable) space on a page,
     838             :  *      without any consideration for adding/removing line pointers.
     839             :  */
     840             : Size
     841     2008300 : PageGetExactFreeSpace(Page page)
     842             : {
     843             :     int         space;
     844             : 
     845             :     /*
     846             :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     847             :      * pd_upper.
     848             :      */
     849     4016600 :     space = (int) ((PageHeader) page)->pd_upper -
     850     2008300 :         (int) ((PageHeader) page)->pd_lower;
     851             : 
     852     2008300 :     if (space < 0)
     853           0 :         return 0;
     854             : 
     855     2008300 :     return (Size) space;
     856             : }
     857             : 
     858             : 
     859             : /*
     860             :  * PageGetHeapFreeSpace
     861             :  *      Returns the size of the free (allocatable) space on a page,
     862             :  *      reduced by the space needed for a new line pointer.
     863             :  *
     864             :  * The difference between this and PageGetFreeSpace is that this will return
     865             :  * zero if there are already MaxHeapTuplesPerPage line pointers in the page
     866             :  * and none are free.  We use this to enforce that no more than
     867             :  * MaxHeapTuplesPerPage line pointers are created on a heap page.  (Although
     868             :  * no more tuples than that could fit anyway, in the presence of redirected
     869             :  * or dead line pointers it'd be possible to have too many line pointers.
     870             :  * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
     871             :  * on the number of line pointers, we make this extra check.)
     872             :  */
     873             : Size
     874    25349552 : PageGetHeapFreeSpace(Page page)
     875             : {
     876             :     Size        space;
     877             : 
     878    25349552 :     space = PageGetFreeSpace(page);
     879    25349552 :     if (space > 0)
     880             :     {
     881             :         OffsetNumber offnum,
     882             :                     nline;
     883             : 
     884             :         /*
     885             :          * Are there already MaxHeapTuplesPerPage line pointers in the page?
     886             :          */
     887    25308336 :         nline = PageGetMaxOffsetNumber(page);
     888    25308336 :         if (nline >= MaxHeapTuplesPerPage)
     889             :         {
     890      115040 :             if (PageHasFreeLinePointers((PageHeader) page))
     891             :             {
     892             :                 /*
     893             :                  * Since this is just a hint, we must confirm that there is
     894             :                  * indeed a free line pointer
     895             :                  */
     896    11648704 :                 for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
     897             :                 {
     898    11648422 :                     ItemId      lp = PageGetItemId(page, offnum);
     899             : 
     900    11648422 :                     if (!ItemIdIsUsed(lp))
     901      112746 :                         break;
     902             :                 }
     903             : 
     904      113028 :                 if (offnum > nline)
     905             :                 {
     906             :                     /*
     907             :                      * The hint is wrong, but we can't clear it here since we
     908             :                      * don't have the ability to mark the page dirty.
     909             :                      */
     910         282 :                     space = 0;
     911             :                 }
     912             :             }
     913             :             else
     914             :             {
     915             :                 /*
     916             :                  * Although the hint might be wrong, PageAddItem will believe
     917             :                  * it anyway, so we must believe it too.
     918             :                  */
     919        2012 :                 space = 0;
     920             :             }
     921             :         }
     922             :     }
     923    25349552 :     return space;
     924             : }
     925             : 
     926             : 
     927             : /*
     928             :  * PageIndexTupleDelete
     929             :  *
     930             :  * This routine does the work of removing a tuple from an index page.
     931             :  *
     932             :  * Unlike heap pages, we compact out the line pointer for the removed tuple.
     933             :  */
     934             : void
     935      488386 : PageIndexTupleDelete(Page page, OffsetNumber offnum)
     936             : {
     937      488386 :     PageHeader  phdr = (PageHeader) page;
     938             :     char       *addr;
     939             :     ItemId      tup;
     940             :     Size        size;
     941             :     unsigned    offset;
     942             :     int         nbytes;
     943             :     int         offidx;
     944             :     int         nline;
     945             : 
     946             :     /*
     947             :      * As with PageRepairFragmentation, paranoia seems justified.
     948             :      */
     949      488386 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
     950      488386 :         phdr->pd_lower > phdr->pd_upper ||
     951      488386 :         phdr->pd_upper > phdr->pd_special ||
     952      488386 :         phdr->pd_special > BLCKSZ ||
     953      488386 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
     954           0 :         ereport(ERROR,
     955             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     956             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
     957             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
     958             : 
     959      488386 :     nline = PageGetMaxOffsetNumber(page);
     960      488386 :     if ((int) offnum <= 0 || (int) offnum > nline)
     961           0 :         elog(ERROR, "invalid index offnum: %u", offnum);
     962             : 
     963             :     /* change offset number to offset index */
     964      488386 :     offidx = offnum - 1;
     965             : 
     966      488386 :     tup = PageGetItemId(page, offnum);
     967             :     Assert(ItemIdHasStorage(tup));
     968      488386 :     size = ItemIdGetLength(tup);
     969      488386 :     offset = ItemIdGetOffset(tup);
     970             : 
     971      488386 :     if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
     972      488386 :         offset != MAXALIGN(offset))
     973           0 :         ereport(ERROR,
     974             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     975             :                  errmsg("corrupted line pointer: offset = %u, size = %u",
     976             :                         offset, (unsigned int) size)));
     977             : 
     978             :     /* Amount of space to actually be deleted */
     979      488386 :     size = MAXALIGN(size);
     980             : 
     981             :     /*
     982             :      * First, we want to get rid of the pd_linp entry for the index tuple. We
     983             :      * copy all subsequent linp's back one slot in the array. We don't use
     984             :      * PageGetItemId, because we are manipulating the _array_, not individual
     985             :      * linp's.
     986             :      */
     987      976772 :     nbytes = phdr->pd_lower -
     988      488386 :         ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
     989             : 
     990      488386 :     if (nbytes > 0)
     991      952828 :         memmove((char *) &(phdr->pd_linp[offidx]),
     992      476414 :                 (char *) &(phdr->pd_linp[offidx + 1]),
     993             :                 nbytes);
     994             : 
     995             :     /*
     996             :      * Now move everything between the old upper bound (beginning of tuple
     997             :      * space) and the beginning of the deleted tuple forward, so that space in
     998             :      * the middle of the page is left free.  If we've just deleted the tuple
     999             :      * at the beginning of tuple space, then there's no need to do the copy.
    1000             :      */
    1001             : 
    1002             :     /* beginning of tuple space */
    1003      488386 :     addr = (char *) page + phdr->pd_upper;
    1004             : 
    1005      488386 :     if (offset > phdr->pd_upper)
    1006      476516 :         memmove(addr + size, addr, offset - phdr->pd_upper);
    1007             : 
    1008             :     /* adjust free space boundary pointers */
    1009      488386 :     phdr->pd_upper += size;
    1010      488386 :     phdr->pd_lower -= sizeof(ItemIdData);
    1011             : 
    1012             :     /*
    1013             :      * Finally, we need to adjust the linp entries that remain.
    1014             :      *
    1015             :      * Anything that used to be before the deleted tuple's data was moved
    1016             :      * forward by the size of the deleted tuple.
    1017             :      */
    1018      488386 :     if (!PageIsEmpty(page))
    1019             :     {
    1020             :         int         i;
    1021             : 
    1022      486970 :         nline--;                /* there's one less than when we started */
    1023    83055314 :         for (i = 1; i <= nline; i++)
    1024             :         {
    1025    82568344 :             ItemId      ii = PageGetItemId(phdr, i);
    1026             : 
    1027             :             Assert(ItemIdHasStorage(ii));
    1028    82568344 :             if (ItemIdGetOffset(ii) <= offset)
    1029    55752734 :                 ii->lp_off += size;
    1030             :         }
    1031             :     }
    1032      488386 : }
    1033             : 
    1034             : 
    1035             : /*
    1036             :  * PageIndexMultiDelete
    1037             :  *
    1038             :  * This routine handles the case of deleting multiple tuples from an
    1039             :  * index page at once.  It is considerably faster than a loop around
    1040             :  * PageIndexTupleDelete ... however, the caller *must* supply the array
    1041             :  * of item numbers to be deleted in item number order!
    1042             :  */
    1043             : void
    1044       31170 : PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
    1045             : {
    1046       31170 :     PageHeader  phdr = (PageHeader) page;
    1047       31170 :     Offset      pd_lower = phdr->pd_lower;
    1048       31170 :     Offset      pd_upper = phdr->pd_upper;
    1049       31170 :     Offset      pd_special = phdr->pd_special;
    1050             :     Offset      last_offset;
    1051             :     itemIdCompactData itemidbase[MaxIndexTuplesPerPage];
    1052             :     ItemIdData  newitemids[MaxIndexTuplesPerPage];
    1053             :     itemIdCompact itemidptr;
    1054             :     ItemId      lp;
    1055             :     int         nline,
    1056             :                 nused;
    1057             :     Size        totallen;
    1058             :     Size        size;
    1059             :     unsigned    offset;
    1060             :     int         nextitm;
    1061             :     OffsetNumber offnum;
    1062       31170 :     bool        presorted = true;   /* For now */
    1063             : 
    1064             :     Assert(nitems <= MaxIndexTuplesPerPage);
    1065             : 
    1066             :     /*
    1067             :      * If there aren't very many items to delete, then retail
    1068             :      * PageIndexTupleDelete is the best way.  Delete the items in reverse
    1069             :      * order so we don't have to think about adjusting item numbers for
    1070             :      * previous deletions.
    1071             :      *
    1072             :      * TODO: tune the magic number here
    1073             :      */
    1074       31170 :     if (nitems <= 2)
    1075             :     {
    1076        9688 :         while (--nitems >= 0)
    1077        5980 :             PageIndexTupleDelete(page, itemnos[nitems]);
    1078        3708 :         return;
    1079             :     }
    1080             : 
    1081             :     /*
    1082             :      * As with PageRepairFragmentation, paranoia seems justified.
    1083             :      */
    1084       27462 :     if (pd_lower < SizeOfPageHeaderData ||
    1085       27462 :         pd_lower > pd_upper ||
    1086       27462 :         pd_upper > pd_special ||
    1087       27462 :         pd_special > BLCKSZ ||
    1088       27462 :         pd_special != MAXALIGN(pd_special))
    1089           0 :         ereport(ERROR,
    1090             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1091             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1092             :                         pd_lower, pd_upper, pd_special)));
    1093             : 
    1094             :     /*
    1095             :      * Scan the line pointer array and build a list of just the ones we are
    1096             :      * going to keep.  Notice we do not modify the page yet, since we are
    1097             :      * still validity-checking.
    1098             :      */
    1099       27462 :     nline = PageGetMaxOffsetNumber(page);
    1100       27462 :     itemidptr = itemidbase;
    1101       27462 :     totallen = 0;
    1102       27462 :     nused = 0;
    1103       27462 :     nextitm = 0;
    1104       27462 :     last_offset = pd_special;
    1105     5969438 :     for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
    1106             :     {
    1107     5941976 :         lp = PageGetItemId(page, offnum);
    1108             :         Assert(ItemIdHasStorage(lp));
    1109     5941976 :         size = ItemIdGetLength(lp);
    1110     5941976 :         offset = ItemIdGetOffset(lp);
    1111     5941976 :         if (offset < pd_upper ||
    1112     5941976 :             (offset + size) > pd_special ||
    1113     5941976 :             offset != MAXALIGN(offset))
    1114           0 :             ereport(ERROR,
    1115             :                     (errcode(ERRCODE_DATA_CORRUPTED),
    1116             :                      errmsg("corrupted line pointer: offset = %u, size = %u",
    1117             :                             offset, (unsigned int) size)));
    1118             : 
    1119     5941976 :         if (nextitm < nitems && offnum == itemnos[nextitm])
    1120             :         {
    1121             :             /* skip item to be deleted */
    1122     2038560 :             nextitm++;
    1123             :         }
    1124             :         else
    1125             :         {
    1126     3903416 :             itemidptr->offsetindex = nused; /* where it will go */
    1127     3903416 :             itemidptr->itemoff = offset;
    1128             : 
    1129     3903416 :             if (last_offset > itemidptr->itemoff)
    1130     1485914 :                 last_offset = itemidptr->itemoff;
    1131             :             else
    1132     2417502 :                 presorted = false;
    1133             : 
    1134     3903416 :             itemidptr->alignedlen = MAXALIGN(size);
    1135     3903416 :             totallen += itemidptr->alignedlen;
    1136     3903416 :             newitemids[nused] = *lp;
    1137     3903416 :             itemidptr++;
    1138     3903416 :             nused++;
    1139             :         }
    1140             :     }
    1141             : 
    1142             :     /* this will catch invalid or out-of-order itemnos[] */
    1143       27462 :     if (nextitm != nitems)
    1144           0 :         elog(ERROR, "incorrect index offsets supplied");
    1145             : 
    1146       27462 :     if (totallen > (Size) (pd_special - pd_lower))
    1147           0 :         ereport(ERROR,
    1148             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1149             :                  errmsg("corrupted item lengths: total %u, available space %u",
    1150             :                         (unsigned int) totallen, pd_special - pd_lower)));
    1151             : 
    1152             :     /*
    1153             :      * Looks good. Overwrite the line pointers with the copy, from which we've
    1154             :      * removed all the unused items.
    1155             :      */
    1156       27462 :     memcpy(phdr->pd_linp, newitemids, nused * sizeof(ItemIdData));
    1157       27462 :     phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
    1158             : 
    1159             :     /* and compactify the tuple data */
    1160       27462 :     if (nused > 0)
    1161       27318 :         compactify_tuples(itemidbase, nused, page, presorted);
    1162             :     else
    1163         144 :         phdr->pd_upper = pd_special;
    1164             : }
    1165             : 
    1166             : 
    1167             : /*
    1168             :  * PageIndexTupleDeleteNoCompact
    1169             :  *
    1170             :  * Remove the specified tuple from an index page, but set its line pointer
    1171             :  * to "unused" instead of compacting it out, except that it can be removed
    1172             :  * if it's the last line pointer on the page.
    1173             :  *
    1174             :  * This is used for index AMs that require that existing TIDs of live tuples
    1175             :  * remain unchanged, and are willing to allow unused line pointers instead.
    1176             :  */
    1177             : void
    1178          12 : PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
    1179             : {
    1180          12 :     PageHeader  phdr = (PageHeader) page;
    1181             :     char       *addr;
    1182             :     ItemId      tup;
    1183             :     Size        size;
    1184             :     unsigned    offset;
    1185             :     int         nline;
    1186             : 
    1187             :     /*
    1188             :      * As with PageRepairFragmentation, paranoia seems justified.
    1189             :      */
    1190          12 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1191          12 :         phdr->pd_lower > phdr->pd_upper ||
    1192          12 :         phdr->pd_upper > phdr->pd_special ||
    1193          12 :         phdr->pd_special > BLCKSZ ||
    1194          12 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1195           0 :         ereport(ERROR,
    1196             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1197             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1198             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1199             : 
    1200          12 :     nline = PageGetMaxOffsetNumber(page);
    1201          12 :     if ((int) offnum <= 0 || (int) offnum > nline)
    1202           0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1203             : 
    1204          12 :     tup = PageGetItemId(page, offnum);
    1205             :     Assert(ItemIdHasStorage(tup));
    1206          12 :     size = ItemIdGetLength(tup);
    1207          12 :     offset = ItemIdGetOffset(tup);
    1208             : 
    1209          12 :     if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
    1210          12 :         offset != MAXALIGN(offset))
    1211           0 :         ereport(ERROR,
    1212             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1213             :                  errmsg("corrupted line pointer: offset = %u, size = %u",
    1214             :                         offset, (unsigned int) size)));
    1215             : 
    1216             :     /* Amount of space to actually be deleted */
    1217          12 :     size = MAXALIGN(size);
    1218             : 
    1219             :     /*
    1220             :      * Either set the line pointer to "unused", or zap it if it's the last
    1221             :      * one.  (Note: it's possible that the next-to-last one(s) are already
    1222             :      * unused, but we do not trouble to try to compact them out if so.)
    1223             :      */
    1224          12 :     if ((int) offnum < nline)
    1225           4 :         ItemIdSetUnused(tup);
    1226             :     else
    1227             :     {
    1228           8 :         phdr->pd_lower -= sizeof(ItemIdData);
    1229           8 :         nline--;                /* there's one less than when we started */
    1230             :     }
    1231             : 
    1232             :     /*
    1233             :      * Now move everything between the old upper bound (beginning of tuple
    1234             :      * space) and the beginning of the deleted tuple forward, so that space in
    1235             :      * the middle of the page is left free.  If we've just deleted the tuple
    1236             :      * at the beginning of tuple space, then there's no need to do the copy.
    1237             :      */
    1238             : 
    1239             :     /* beginning of tuple space */
    1240          12 :     addr = (char *) page + phdr->pd_upper;
    1241             : 
    1242          12 :     if (offset > phdr->pd_upper)
    1243           4 :         memmove(addr + size, addr, offset - phdr->pd_upper);
    1244             : 
    1245             :     /* adjust free space boundary pointer */
    1246          12 :     phdr->pd_upper += size;
    1247             : 
    1248             :     /*
    1249             :      * Finally, we need to adjust the linp entries that remain.
    1250             :      *
    1251             :      * Anything that used to be before the deleted tuple's data was moved
    1252             :      * forward by the size of the deleted tuple.
    1253             :      */
    1254          12 :     if (!PageIsEmpty(page))
    1255             :     {
    1256             :         int         i;
    1257             : 
    1258         136 :         for (i = 1; i <= nline; i++)
    1259             :         {
    1260         124 :             ItemId      ii = PageGetItemId(phdr, i);
    1261             : 
    1262         124 :             if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
    1263          36 :                 ii->lp_off += size;
    1264             :         }
    1265             :     }
    1266          12 : }
    1267             : 
    1268             : 
    1269             : /*
    1270             :  * PageIndexTupleOverwrite
    1271             :  *
    1272             :  * Replace a specified tuple on an index page.
    1273             :  *
    1274             :  * The new tuple is placed exactly where the old one had been, shifting
    1275             :  * other tuples' data up or down as needed to keep the page compacted.
    1276             :  * This is better than deleting and reinserting the tuple, because it
    1277             :  * avoids any data shifting when the tuple size doesn't change; and
    1278             :  * even when it does, we avoid moving the line pointers around.
    1279             :  * This could be used by an index AM that doesn't want to unset the
    1280             :  * LP_DEAD bit when it happens to be set.  It could conceivably also be
    1281             :  * used by an index AM that cares about the physical order of tuples as
    1282             :  * well as their logical/ItemId order.
    1283             :  *
    1284             :  * If there's insufficient space for the new tuple, return false.  Other
    1285             :  * errors represent data-corruption problems, so we just elog.
    1286             :  */
    1287             : bool
    1288      479868 : PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
    1289             :                         Item newtup, Size newsize)
    1290             : {
    1291      479868 :     PageHeader  phdr = (PageHeader) page;
    1292             :     ItemId      tupid;
    1293             :     int         oldsize;
    1294             :     unsigned    offset;
    1295             :     Size        alignednewsize;
    1296             :     int         size_diff;
    1297             :     int         itemcount;
    1298             : 
    1299             :     /*
    1300             :      * As with PageRepairFragmentation, paranoia seems justified.
    1301             :      */
    1302      479868 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1303      479868 :         phdr->pd_lower > phdr->pd_upper ||
    1304      479868 :         phdr->pd_upper > phdr->pd_special ||
    1305      479868 :         phdr->pd_special > BLCKSZ ||
    1306      479868 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1307           0 :         ereport(ERROR,
    1308             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1309             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1310             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1311             : 
    1312      479868 :     itemcount = PageGetMaxOffsetNumber(page);
    1313      479868 :     if ((int) offnum <= 0 || (int) offnum > itemcount)
    1314           0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1315             : 
    1316      479868 :     tupid = PageGetItemId(page, offnum);
    1317             :     Assert(ItemIdHasStorage(tupid));
    1318      479868 :     oldsize = ItemIdGetLength(tupid);
    1319      479868 :     offset = ItemIdGetOffset(tupid);
    1320             : 
    1321      479868 :     if (offset < phdr->pd_upper || (offset + oldsize) > phdr->pd_special ||
    1322      479868 :         offset != MAXALIGN(offset))
    1323           0 :         ereport(ERROR,
    1324             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1325             :                  errmsg("corrupted line pointer: offset = %u, size = %u",
    1326             :                         offset, (unsigned int) oldsize)));
    1327             : 
    1328             :     /*
    1329             :      * Determine actual change in space requirement, check for page overflow.
    1330             :      */
    1331      479868 :     oldsize = MAXALIGN(oldsize);
    1332      479868 :     alignednewsize = MAXALIGN(newsize);
    1333      479868 :     if (alignednewsize > oldsize + (phdr->pd_upper - phdr->pd_lower))
    1334           0 :         return false;
    1335             : 
    1336             :     /*
    1337             :      * Relocate existing data and update line pointers, unless the new tuple
    1338             :      * is the same size as the old (after alignment), in which case there's
    1339             :      * nothing to do.  Notice that what we have to relocate is data before the
    1340             :      * target tuple, not data after, so it's convenient to express size_diff
    1341             :      * as the amount by which the tuple's size is decreasing, making it the
    1342             :      * delta to add to pd_upper and affected line pointers.
    1343             :      */
    1344      479868 :     size_diff = oldsize - (int) alignednewsize;
    1345      479868 :     if (size_diff != 0)
    1346             :     {
    1347       42014 :         char       *addr = (char *) page + phdr->pd_upper;
    1348             :         int         i;
    1349             : 
    1350             :         /* relocate all tuple data before the target tuple */
    1351       42014 :         memmove(addr + size_diff, addr, offset - phdr->pd_upper);
    1352             : 
    1353             :         /* adjust free space boundary pointer */
    1354       42014 :         phdr->pd_upper += size_diff;
    1355             : 
    1356             :         /* adjust affected line pointers too */
    1357     6252502 :         for (i = FirstOffsetNumber; i <= itemcount; i++)
    1358             :         {
    1359     6210488 :             ItemId      ii = PageGetItemId(phdr, i);
    1360             : 
    1361             :             /* Allow items without storage; currently only BRIN needs that */
    1362     6210488 :             if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
    1363      787674 :                 ii->lp_off += size_diff;
    1364             :         }
    1365             :     }
    1366             : 
    1367             :     /* Update the item's tuple length without changing its lp_flags field */
    1368      479868 :     tupid->lp_off = offset + size_diff;
    1369      479868 :     tupid->lp_len = newsize;
    1370             : 
    1371             :     /* Copy new tuple data onto page */
    1372      479868 :     memcpy(PageGetItem(page, tupid), newtup, newsize);
    1373             : 
    1374      479868 :     return true;
    1375             : }
    1376             : 
    1377             : 
    1378             : /*
    1379             :  * Set checksum for a page in shared buffers.
    1380             :  *
    1381             :  * If checksums are disabled, or if the page is not initialized, just return
    1382             :  * the input.  Otherwise, we must make a copy of the page before calculating
    1383             :  * the checksum, to prevent concurrent modifications (e.g. setting hint bits)
    1384             :  * from making the final checksum invalid.  It doesn't matter if we include or
    1385             :  * exclude hints during the copy, as long as we write a valid page and
    1386             :  * associated checksum.
    1387             :  *
    1388             :  * Returns a pointer to the block-sized data that needs to be written. Uses
    1389             :  * statically-allocated memory, so the caller must immediately write the
    1390             :  * returned page and not refer to it again.
    1391             :  */
    1392             : char *
    1393      599558 : PageSetChecksumCopy(Page page, BlockNumber blkno)
    1394             : {
    1395             :     static char *pageCopy = NULL;
    1396             : 
    1397             :     /* If we don't need a checksum, just return the passed-in data */
    1398      599558 :     if (PageIsNew(page) || !DataChecksumsEnabled())
    1399      596532 :         return (char *) page;
    1400             : 
    1401             :     /*
    1402             :      * We allocate the copy space once and use it over on each subsequent
    1403             :      * call.  The point of palloc'ing here, rather than having a static char
    1404             :      * array, is first to ensure adequate alignment for the checksumming code
    1405             :      * and second to avoid wasting space in processes that never call this.
    1406             :      */
    1407        3026 :     if (pageCopy == NULL)
    1408          18 :         pageCopy = MemoryContextAlloc(TopMemoryContext, BLCKSZ);
    1409             : 
    1410        3026 :     memcpy(pageCopy, (char *) page, BLCKSZ);
    1411        3026 :     ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno);
    1412        3026 :     return pageCopy;
    1413             : }
    1414             : 
    1415             : /*
    1416             :  * Set checksum for a page in private memory.
    1417             :  *
    1418             :  * This must only be used when we know that no other process can be modifying
    1419             :  * the page buffer.
    1420             :  */
    1421             : void
    1422      258130 : PageSetChecksumInplace(Page page, BlockNumber blkno)
    1423             : {
    1424             :     /* If we don't need a checksum, just return */
    1425      258130 :     if (PageIsNew(page) || !DataChecksumsEnabled())
    1426      257112 :         return;
    1427             : 
    1428        1018 :     ((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno);
    1429             : }

Generated by: LCOV version 1.13