LCOV - code coverage report
Current view: top level - src/backend/storage/page - bufpage.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 405 441 91.8 %
Date: 2024-11-21 08:14:44 Functions: 20 20 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * bufpage.c
       4             :  *    POSTGRES standard buffer page code.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/storage/page/bufpage.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/htup_details.h"
      18             : #include "access/itup.h"
      19             : #include "access/xlog.h"
      20             : #include "pgstat.h"
      21             : #include "storage/checksum.h"
      22             : #include "utils/memdebug.h"
      23             : #include "utils/memutils.h"
      24             : 
      25             : 
      26             : /* GUC variable */
      27             : bool        ignore_checksum_failure = false;
      28             : 
      29             : 
      30             : /* ----------------------------------------------------------------
      31             :  *                      Page support functions
      32             :  * ----------------------------------------------------------------
      33             :  */
      34             : 
      35             : /*
      36             :  * PageInit
      37             :  *      Initializes the contents of a page.
      38             :  *      Note that we don't calculate an initial checksum here; that's not done
      39             :  *      until it's time to write.
      40             :  */
      41             : void
      42      647200 : PageInit(Page page, Size pageSize, Size specialSize)
      43             : {
      44      647200 :     PageHeader  p = (PageHeader) page;
      45             : 
      46      647200 :     specialSize = MAXALIGN(specialSize);
      47             : 
      48             :     Assert(pageSize == BLCKSZ);
      49             :     Assert(pageSize > specialSize + SizeOfPageHeaderData);
      50             : 
      51             :     /* Make sure all fields of page are zero, as well as unused space */
      52      647200 :     MemSet(p, 0, pageSize);
      53             : 
      54      647200 :     p->pd_flags = 0;
      55      647200 :     p->pd_lower = SizeOfPageHeaderData;
      56      647200 :     p->pd_upper = pageSize - specialSize;
      57      647200 :     p->pd_special = pageSize - specialSize;
      58      647200 :     PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION);
      59             :     /* p->pd_prune_xid = InvalidTransactionId;       done by above MemSet */
      60      647200 : }
      61             : 
      62             : 
      63             : /*
      64             :  * PageIsVerifiedExtended
      65             :  *      Check that the page header and checksum (if any) appear valid.
      66             :  *
      67             :  * This is called when a page has just been read in from disk.  The idea is
      68             :  * to cheaply detect trashed pages before we go nuts following bogus line
      69             :  * pointers, testing invalid transaction identifiers, etc.
      70             :  *
      71             :  * It turns out to be necessary to allow zeroed pages here too.  Even though
      72             :  * this routine is *not* called when deliberately adding a page to a relation,
      73             :  * there are scenarios in which a zeroed page might be found in a table.
      74             :  * (Example: a backend extends a relation, then crashes before it can write
      75             :  * any WAL entry about the new page.  The kernel will already have the
      76             :  * zeroed page in the file, and it will stay that way after restart.)  So we
      77             :  * allow zeroed pages here, and are careful that the page access macros
      78             :  * treat such a page as empty and without free space.  Eventually, VACUUM
      79             :  * will clean up such a page and make it usable.
      80             :  *
      81             :  * If flag PIV_LOG_WARNING is set, a WARNING is logged in the event of
      82             :  * a checksum failure.
      83             :  *
      84             :  * If flag PIV_REPORT_STAT is set, a checksum failure is reported directly
      85             :  * to pgstat.
      86             :  */
      87             : bool
      88     2331884 : PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
      89             : {
      90     2331884 :     PageHeader  p = (PageHeader) page;
      91             :     size_t     *pagebytes;
      92     2331884 :     bool        checksum_failure = false;
      93     2331884 :     bool        header_sane = false;
      94     2331884 :     uint16      checksum = 0;
      95             : 
      96             :     /*
      97             :      * Don't verify page data unless the page passes basic non-zero test
      98             :      */
      99     2331884 :     if (!PageIsNew(page))
     100             :     {
     101     2324534 :         if (DataChecksumsEnabled())
     102             :         {
     103     2302918 :             checksum = pg_checksum_page((char *) page, blkno);
     104             : 
     105     2302918 :             if (checksum != p->pd_checksum)
     106           0 :                 checksum_failure = true;
     107             :         }
     108             : 
     109             :         /*
     110             :          * The following checks don't prove the header is correct, only that
     111             :          * it looks sane enough to allow into the buffer pool. Later usage of
     112             :          * the block can still reveal problems, which is why we offer the
     113             :          * checksum option.
     114             :          */
     115     2324534 :         if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
     116     2324534 :             p->pd_lower <= p->pd_upper &&
     117     2324534 :             p->pd_upper <= p->pd_special &&
     118     2324534 :             p->pd_special <= BLCKSZ &&
     119     2324534 :             p->pd_special == MAXALIGN(p->pd_special))
     120     2324534 :             header_sane = true;
     121             : 
     122     2324534 :         if (header_sane && !checksum_failure)
     123     2324534 :             return true;
     124             :     }
     125             : 
     126             :     /* Check all-zeroes case */
     127        7350 :     pagebytes = (size_t *) page;
     128             : 
     129        7350 :     if (pg_memory_is_all_zeros(pagebytes, BLCKSZ))
     130        7350 :         return true;
     131             : 
     132             :     /*
     133             :      * Throw a WARNING if the checksum fails, but only after we've checked for
     134             :      * the all-zeroes case.
     135             :      */
     136           0 :     if (checksum_failure)
     137             :     {
     138           0 :         if ((flags & PIV_LOG_WARNING) != 0)
     139           0 :             ereport(WARNING,
     140             :                     (errcode(ERRCODE_DATA_CORRUPTED),
     141             :                      errmsg("page verification failed, calculated checksum %u but expected %u",
     142             :                             checksum, p->pd_checksum)));
     143             : 
     144           0 :         if ((flags & PIV_REPORT_STAT) != 0)
     145           0 :             pgstat_report_checksum_failure();
     146             : 
     147           0 :         if (header_sane && ignore_checksum_failure)
     148           0 :             return true;
     149             :     }
     150             : 
     151           0 :     return false;
     152             : }
     153             : 
     154             : 
     155             : /*
     156             :  *  PageAddItemExtended
     157             :  *
     158             :  *  Add an item to a page.  Return value is the offset at which it was
     159             :  *  inserted, or InvalidOffsetNumber if the item is not inserted for any
     160             :  *  reason.  A WARNING is issued indicating the reason for the refusal.
     161             :  *
     162             :  *  offsetNumber must be either InvalidOffsetNumber to specify finding a
     163             :  *  free line pointer, or a value between FirstOffsetNumber and one past
     164             :  *  the last existing item, to specify using that particular line pointer.
     165             :  *
     166             :  *  If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
     167             :  *  the item at the specified offsetNumber, which must be either a
     168             :  *  currently-unused line pointer, or one past the last existing item.
     169             :  *
     170             :  *  If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
     171             :  *  the item at the specified offsetNumber, moving existing items later
     172             :  *  in the array to make room.
     173             :  *
     174             :  *  If offsetNumber is not valid, then assign a slot by finding the first
     175             :  *  one that is both unused and deallocated.
     176             :  *
     177             :  *  If flag PAI_IS_HEAP is set, we enforce that there can't be more than
     178             :  *  MaxHeapTuplesPerPage line pointers on the page.
     179             :  *
     180             :  *  !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
     181             :  */
     182             : OffsetNumber
     183    65406092 : PageAddItemExtended(Page page,
     184             :                     Item item,
     185             :                     Size size,
     186             :                     OffsetNumber offsetNumber,
     187             :                     int flags)
     188             : {
     189    65406092 :     PageHeader  phdr = (PageHeader) page;
     190             :     Size        alignedSize;
     191             :     int         lower;
     192             :     int         upper;
     193             :     ItemId      itemId;
     194             :     OffsetNumber limit;
     195    65406092 :     bool        needshuffle = false;
     196             : 
     197             :     /*
     198             :      * Be wary about corrupted page pointers
     199             :      */
     200    65406092 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
     201    65406092 :         phdr->pd_lower > phdr->pd_upper ||
     202    65406092 :         phdr->pd_upper > phdr->pd_special ||
     203    65406092 :         phdr->pd_special > BLCKSZ)
     204           0 :         ereport(PANIC,
     205             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     206             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
     207             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
     208             : 
     209             :     /*
     210             :      * Select offsetNumber to place the new item at
     211             :      */
     212    65406092 :     limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
     213             : 
     214             :     /* was offsetNumber passed in? */
     215    65406092 :     if (OffsetNumberIsValid(offsetNumber))
     216             :     {
     217             :         /* yes, check it */
     218    43271950 :         if ((flags & PAI_OVERWRITE) != 0)
     219             :         {
     220     3076276 :             if (offsetNumber < limit)
     221             :             {
     222       22164 :                 itemId = PageGetItemId(page, offsetNumber);
     223       22164 :                 if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
     224             :                 {
     225           0 :                     elog(WARNING, "will not overwrite a used ItemId");
     226           0 :                     return InvalidOffsetNumber;
     227             :                 }
     228             :             }
     229             :         }
     230             :         else
     231             :         {
     232    40195674 :             if (offsetNumber < limit)
     233     5938658 :                 needshuffle = true; /* need to move existing linp's */
     234             :         }
     235             :     }
     236             :     else
     237             :     {
     238             :         /* offsetNumber was not passed in, so find a free slot */
     239             :         /* if no free slot, we'll put it at limit (1st open slot) */
     240    22134142 :         if (PageHasFreeLinePointers(page))
     241             :         {
     242             :             /*
     243             :              * Scan line pointer array to locate a "recyclable" (unused)
     244             :              * ItemId.
     245             :              *
     246             :              * Always use earlier items first.  PageTruncateLinePointerArray
     247             :              * can only truncate unused items when they appear as a contiguous
     248             :              * group at the end of the line pointer array.
     249             :              */
     250    16604106 :             for (offsetNumber = FirstOffsetNumber;
     251             :                  offsetNumber < limit;   /* limit is maxoff+1 */
     252    16371226 :                  offsetNumber++)
     253             :             {
     254    16591306 :                 itemId = PageGetItemId(page, offsetNumber);
     255             : 
     256             :                 /*
     257             :                  * We check for no storage as well, just to be paranoid;
     258             :                  * unused items should never have storage.  Assert() that the
     259             :                  * invariant is respected too.
     260             :                  */
     261             :                 Assert(ItemIdIsUsed(itemId) || !ItemIdHasStorage(itemId));
     262             : 
     263    16591306 :                 if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
     264      220080 :                     break;
     265             :             }
     266      232880 :             if (offsetNumber >= limit)
     267             :             {
     268             :                 /* the hint is wrong, so reset it */
     269       12800 :                 PageClearHasFreeLinePointers(page);
     270             :             }
     271             :         }
     272             :         else
     273             :         {
     274             :             /* don't bother searching if hint says there's no free slot */
     275    21901262 :             offsetNumber = limit;
     276             :         }
     277             :     }
     278             : 
     279             :     /* Reject placing items beyond the first unused line pointer */
     280    65406092 :     if (offsetNumber > limit)
     281             :     {
     282           0 :         elog(WARNING, "specified item offset is too large");
     283           0 :         return InvalidOffsetNumber;
     284             :     }
     285             : 
     286             :     /* Reject placing items beyond heap boundary, if heap */
     287    65406092 :     if ((flags & PAI_IS_HEAP) != 0 && offsetNumber > MaxHeapTuplesPerPage)
     288             :     {
     289           0 :         elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
     290           0 :         return InvalidOffsetNumber;
     291             :     }
     292             : 
     293             :     /*
     294             :      * Compute new lower and upper pointers for page, see if it'll fit.
     295             :      *
     296             :      * Note: do arithmetic as signed ints, to avoid mistakes if, say,
     297             :      * alignedSize > pd_upper.
     298             :      */
     299    65406092 :     if (offsetNumber == limit || needshuffle)
     300    65163848 :         lower = phdr->pd_lower + sizeof(ItemIdData);
     301             :     else
     302      242244 :         lower = phdr->pd_lower;
     303             : 
     304    65406092 :     alignedSize = MAXALIGN(size);
     305             : 
     306    65406092 :     upper = (int) phdr->pd_upper - (int) alignedSize;
     307             : 
     308    65406092 :     if (lower > upper)
     309           0 :         return InvalidOffsetNumber;
     310             : 
     311             :     /*
     312             :      * OK to insert the item.  First, shuffle the existing pointers if needed.
     313             :      */
     314    65406092 :     itemId = PageGetItemId(page, offsetNumber);
     315             : 
     316    65406092 :     if (needshuffle)
     317     5938658 :         memmove(itemId + 1, itemId,
     318     5938658 :                 (limit - offsetNumber) * sizeof(ItemIdData));
     319             : 
     320             :     /* set the line pointer */
     321    65406092 :     ItemIdSetNormal(itemId, upper, size);
     322             : 
     323             :     /*
     324             :      * Items normally contain no uninitialized bytes.  Core bufpage consumers
     325             :      * conform, but this is not a necessary coding rule; a new index AM could
     326             :      * opt to depart from it.  However, data type input functions and other
     327             :      * C-language functions that synthesize datums should initialize all
     328             :      * bytes; datumIsEqual() relies on this.  Testing here, along with the
     329             :      * similar check in printtup(), helps to catch such mistakes.
     330             :      *
     331             :      * Values of the "name" type retrieved via index-only scans may contain
     332             :      * uninitialized bytes; see comment in btrescan().  Valgrind will report
     333             :      * this as an error, but it is safe to ignore.
     334             :      */
     335             :     VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
     336             : 
     337             :     /* copy the item's data onto the page */
     338    65406092 :     memcpy((char *) page + upper, item, size);
     339             : 
     340             :     /* adjust page header */
     341    65406092 :     phdr->pd_lower = (LocationIndex) lower;
     342    65406092 :     phdr->pd_upper = (LocationIndex) upper;
     343             : 
     344    65406092 :     return offsetNumber;
     345             : }
     346             : 
     347             : 
     348             : /*
     349             :  * PageGetTempPage
     350             :  *      Get a temporary page in local memory for special processing.
     351             :  *      The returned page is not initialized at all; caller must do that.
     352             :  */
     353             : Page
     354       21872 : PageGetTempPage(Page page)
     355             : {
     356             :     Size        pageSize;
     357             :     Page        temp;
     358             : 
     359       21872 :     pageSize = PageGetPageSize(page);
     360       21872 :     temp = (Page) palloc(pageSize);
     361             : 
     362       21872 :     return temp;
     363             : }
     364             : 
     365             : /*
     366             :  * PageGetTempPageCopy
     367             :  *      Get a temporary page in local memory for special processing.
     368             :  *      The page is initialized by copying the contents of the given page.
     369             :  */
     370             : Page
     371       11280 : PageGetTempPageCopy(Page page)
     372             : {
     373             :     Size        pageSize;
     374             :     Page        temp;
     375             : 
     376       11280 :     pageSize = PageGetPageSize(page);
     377       11280 :     temp = (Page) palloc(pageSize);
     378             : 
     379       11280 :     memcpy(temp, page, pageSize);
     380             : 
     381       11280 :     return temp;
     382             : }
     383             : 
     384             : /*
     385             :  * PageGetTempPageCopySpecial
     386             :  *      Get a temporary page in local memory for special processing.
     387             :  *      The page is PageInit'd with the same special-space size as the
     388             :  *      given page, and the special space is copied from the given page.
     389             :  */
     390             : Page
     391       57826 : PageGetTempPageCopySpecial(Page page)
     392             : {
     393             :     Size        pageSize;
     394             :     Page        temp;
     395             : 
     396       57826 :     pageSize = PageGetPageSize(page);
     397       57826 :     temp = (Page) palloc(pageSize);
     398             : 
     399       57826 :     PageInit(temp, pageSize, PageGetSpecialSize(page));
     400       57826 :     memcpy(PageGetSpecialPointer(temp),
     401       57826 :            PageGetSpecialPointer(page),
     402       57826 :            PageGetSpecialSize(page));
     403             : 
     404       57826 :     return temp;
     405             : }
     406             : 
     407             : /*
     408             :  * PageRestoreTempPage
     409             :  *      Copy temporary page back to permanent page after special processing
     410             :  *      and release the temporary page.
     411             :  */
     412             : void
     413       76904 : PageRestoreTempPage(Page tempPage, Page oldPage)
     414             : {
     415             :     Size        pageSize;
     416             : 
     417       76904 :     pageSize = PageGetPageSize(tempPage);
     418       76904 :     memcpy((char *) oldPage, (char *) tempPage, pageSize);
     419             : 
     420       76904 :     pfree(tempPage);
     421       76904 : }
     422             : 
     423             : /*
     424             :  * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
     425             :  */
     426             : typedef struct itemIdCompactData
     427             : {
     428             :     uint16      offsetindex;    /* linp array index */
     429             :     int16       itemoff;        /* page offset of item data */
     430             :     uint16      alignedlen;     /* MAXALIGN(item data len) */
     431             : } itemIdCompactData;
     432             : typedef itemIdCompactData *itemIdCompact;
     433             : 
     434             : /*
     435             :  * After removing or marking some line pointers unused, move the tuples to
     436             :  * remove the gaps caused by the removed items and reorder them back into
     437             :  * reverse line pointer order in the page.
     438             :  *
     439             :  * This function can often be fairly hot, so it pays to take some measures to
     440             :  * make it as optimal as possible.
     441             :  *
     442             :  * Callers may pass 'presorted' as true if the 'itemidbase' array is sorted in
     443             :  * descending order of itemoff.  When this is true we can just memmove()
     444             :  * tuples towards the end of the page.  This is quite a common case as it's
     445             :  * the order that tuples are initially inserted into pages.  When we call this
     446             :  * function to defragment the tuples in the page then any new line pointers
     447             :  * added to the page will keep that presorted order, so hitting this case is
     448             :  * still very common for tables that are commonly updated.
     449             :  *
     450             :  * When the 'itemidbase' array is not presorted then we're unable to just
     451             :  * memmove() tuples around freely.  Doing so could cause us to overwrite the
     452             :  * memory belonging to a tuple we've not moved yet.  In this case, we copy all
     453             :  * the tuples that need to be moved into a temporary buffer.  We can then
     454             :  * simply memcpy() out of that temp buffer back into the page at the correct
     455             :  * location.  Tuples are copied back into the page in the same order as the
     456             :  * 'itemidbase' array, so we end up reordering the tuples back into reverse
     457             :  * line pointer order.  This will increase the chances of hitting the
     458             :  * presorted case the next time around.
     459             :  *
     460             :  * Callers must ensure that nitems is > 0
     461             :  */
     462             : static void
     463      117708 : compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
     464             : {
     465      117708 :     PageHeader  phdr = (PageHeader) page;
     466             :     Offset      upper;
     467             :     Offset      copy_tail;
     468             :     Offset      copy_head;
     469             :     itemIdCompact itemidptr;
     470             :     int         i;
     471             : 
     472             :     /* Code within will not work correctly if nitems == 0 */
     473             :     Assert(nitems > 0);
     474             : 
     475      117708 :     if (presorted)
     476             :     {
     477             : 
     478             : #ifdef USE_ASSERT_CHECKING
     479             :         {
     480             :             /*
     481             :              * Verify we've not gotten any new callers that are incorrectly
     482             :              * passing a true presorted value.
     483             :              */
     484             :             Offset      lastoff = phdr->pd_special;
     485             : 
     486             :             for (i = 0; i < nitems; i++)
     487             :             {
     488             :                 itemidptr = &itemidbase[i];
     489             : 
     490             :                 Assert(lastoff > itemidptr->itemoff);
     491             : 
     492             :                 lastoff = itemidptr->itemoff;
     493             :             }
     494             :         }
     495             : #endif                          /* USE_ASSERT_CHECKING */
     496             : 
     497             :         /*
     498             :          * 'itemidbase' is already in the optimal order, i.e, lower item
     499             :          * pointers have a higher offset.  This allows us to memmove() the
     500             :          * tuples up to the end of the page without having to worry about
     501             :          * overwriting other tuples that have not been moved yet.
     502             :          *
     503             :          * There's a good chance that there are tuples already right at the
     504             :          * end of the page that we can simply skip over because they're
     505             :          * already in the correct location within the page.  We'll do that
     506             :          * first...
     507             :          */
     508       89064 :         upper = phdr->pd_special;
     509       89064 :         i = 0;
     510             :         do
     511             :         {
     512     1348754 :             itemidptr = &itemidbase[i];
     513     1348754 :             if (upper != itemidptr->itemoff + itemidptr->alignedlen)
     514       80650 :                 break;
     515     1268104 :             upper -= itemidptr->alignedlen;
     516             : 
     517     1268104 :             i++;
     518     1268104 :         } while (i < nitems);
     519             : 
     520             :         /*
     521             :          * Now that we've found the first tuple that needs to be moved, we can
     522             :          * do the tuple compactification.  We try and make the least number of
     523             :          * memmove() calls and only call memmove() when there's a gap.  When
     524             :          * we see a gap we just move all tuples after the gap up until the
     525             :          * point of the last move operation.
     526             :          */
     527       89064 :         copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
     528     1999516 :         for (; i < nitems; i++)
     529             :         {
     530             :             ItemId      lp;
     531             : 
     532     1910452 :             itemidptr = &itemidbase[i];
     533     1910452 :             lp = PageGetItemId(page, itemidptr->offsetindex + 1);
     534             : 
     535     1910452 :             if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
     536             :             {
     537      227916 :                 memmove((char *) page + upper,
     538      227916 :                         page + copy_head,
     539      227916 :                         copy_tail - copy_head);
     540             : 
     541             :                 /*
     542             :                  * We've now moved all tuples already seen, but not the
     543             :                  * current tuple, so we set the copy_tail to the end of this
     544             :                  * tuple so it can be moved in another iteration of the loop.
     545             :                  */
     546      227916 :                 copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
     547             :             }
     548             :             /* shift the target offset down by the length of this tuple */
     549     1910452 :             upper -= itemidptr->alignedlen;
     550             :             /* point the copy_head to the start of this tuple */
     551     1910452 :             copy_head = itemidptr->itemoff;
     552             : 
     553             :             /* update the line pointer to reference the new offset */
     554     1910452 :             lp->lp_off = upper;
     555             :         }
     556             : 
     557             :         /* move the remaining tuples. */
     558       89064 :         memmove((char *) page + upper,
     559       89064 :                 page + copy_head,
     560       89064 :                 copy_tail - copy_head);
     561             :     }
     562             :     else
     563             :     {
     564             :         PGAlignedBlock scratch;
     565       28644 :         char       *scratchptr = scratch.data;
     566             : 
     567             :         /*
     568             :          * Non-presorted case:  The tuples in the itemidbase array may be in
     569             :          * any order.  So, in order to move these to the end of the page we
     570             :          * must make a temp copy of each tuple that needs to be moved before
     571             :          * we copy them back into the page at the new offset.
     572             :          *
     573             :          * If a large percentage of tuples have been pruned (>75%) then we'll
     574             :          * copy these into the temp buffer tuple-by-tuple, otherwise, we'll
     575             :          * just do a single memcpy() for all tuples that need to be moved.
     576             :          * When so many tuples have been removed there's likely to be a lot of
     577             :          * gaps and it's unlikely that many non-movable tuples remain at the
     578             :          * end of the page.
     579             :          */
     580       28644 :         if (nitems < PageGetMaxOffsetNumber(page) / 4)
     581             :         {
     582        1468 :             i = 0;
     583             :             do
     584             :             {
     585       28554 :                 itemidptr = &itemidbase[i];
     586       28554 :                 memcpy(scratchptr + itemidptr->itemoff, page + itemidptr->itemoff,
     587       28554 :                        itemidptr->alignedlen);
     588       28554 :                 i++;
     589       28554 :             } while (i < nitems);
     590             : 
     591             :             /* Set things up for the compactification code below */
     592        1468 :             i = 0;
     593        1468 :             itemidptr = &itemidbase[0];
     594        1468 :             upper = phdr->pd_special;
     595             :         }
     596             :         else
     597             :         {
     598       27176 :             upper = phdr->pd_special;
     599             : 
     600             :             /*
     601             :              * Many tuples are likely to already be in the correct location.
     602             :              * There's no need to copy these into the temp buffer.  Instead
     603             :              * we'll just skip forward in the itemidbase array to the position
     604             :              * that we do need to move tuples from so that the code below just
     605             :              * leaves these ones alone.
     606             :              */
     607       27176 :             i = 0;
     608             :             do
     609             :             {
     610      691096 :                 itemidptr = &itemidbase[i];
     611      691096 :                 if (upper != itemidptr->itemoff + itemidptr->alignedlen)
     612       27176 :                     break;
     613      663920 :                 upper -= itemidptr->alignedlen;
     614             : 
     615      663920 :                 i++;
     616      663920 :             } while (i < nitems);
     617             : 
     618             :             /* Copy all tuples that need to be moved into the temp buffer */
     619       27176 :             memcpy(scratchptr + phdr->pd_upper,
     620       27176 :                    page + phdr->pd_upper,
     621       27176 :                    upper - phdr->pd_upper);
     622             :         }
     623             : 
     624             :         /*
     625             :          * Do the tuple compactification.  itemidptr is already pointing to
     626             :          * the first tuple that we're going to move.  Here we collapse the
     627             :          * memcpy calls for adjacent tuples into a single call.  This is done
     628             :          * by delaying the memcpy call until we find a gap that needs to be
     629             :          * closed.
     630             :          */
     631       28644 :         copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
     632     3194814 :         for (; i < nitems; i++)
     633             :         {
     634             :             ItemId      lp;
     635             : 
     636     3166170 :             itemidptr = &itemidbase[i];
     637     3166170 :             lp = PageGetItemId(page, itemidptr->offsetindex + 1);
     638             : 
     639             :             /* copy pending tuples when we detect a gap */
     640     3166170 :             if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
     641             :             {
     642      838198 :                 memcpy((char *) page + upper,
     643      838198 :                        scratchptr + copy_head,
     644      838198 :                        copy_tail - copy_head);
     645             : 
     646             :                 /*
     647             :                  * We've now copied all tuples already seen, but not the
     648             :                  * current tuple, so we set the copy_tail to the end of this
     649             :                  * tuple.
     650             :                  */
     651      838198 :                 copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
     652             :             }
     653             :             /* shift the target offset down by the length of this tuple */
     654     3166170 :             upper -= itemidptr->alignedlen;
     655             :             /* point the copy_head to the start of this tuple */
     656     3166170 :             copy_head = itemidptr->itemoff;
     657             : 
     658             :             /* update the line pointer to reference the new offset */
     659     3166170 :             lp->lp_off = upper;
     660             :         }
     661             : 
     662             :         /* Copy the remaining chunk */
     663       28644 :         memcpy((char *) page + upper,
     664       28644 :                scratchptr + copy_head,
     665       28644 :                copy_tail - copy_head);
     666             :     }
     667             : 
     668      117708 :     phdr->pd_upper = upper;
     669      117708 : }
     670             : 
     671             : /*
     672             :  * PageRepairFragmentation
     673             :  *
     674             :  * Frees fragmented space on a heap page following pruning.
     675             :  *
     676             :  * This routine is usable for heap pages only, but see PageIndexMultiDelete.
     677             :  *
     678             :  * This routine removes unused line pointers from the end of the line pointer
     679             :  * array.  This is possible when dead heap-only tuples get removed by pruning,
     680             :  * especially when there were HOT chains with several tuples each beforehand.
     681             :  *
     682             :  * Caller had better have a full cleanup lock on page's buffer.  As a side
     683             :  * effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
     684             :  * needed.  Caller might also need to account for a reduction in the length of
     685             :  * the line pointer array following array truncation.
     686             :  */
     687             : void
     688      107034 : PageRepairFragmentation(Page page)
     689             : {
     690      107034 :     Offset      pd_lower = ((PageHeader) page)->pd_lower;
     691      107034 :     Offset      pd_upper = ((PageHeader) page)->pd_upper;
     692      107034 :     Offset      pd_special = ((PageHeader) page)->pd_special;
     693             :     Offset      last_offset;
     694             :     itemIdCompactData itemidbase[MaxHeapTuplesPerPage];
     695             :     itemIdCompact itemidptr;
     696             :     ItemId      lp;
     697             :     int         nline,
     698             :                 nstorage,
     699             :                 nunused;
     700      107034 :     OffsetNumber finalusedlp = InvalidOffsetNumber;
     701             :     int         i;
     702             :     Size        totallen;
     703      107034 :     bool        presorted = true;   /* For now */
     704             : 
     705             :     /*
     706             :      * It's worth the trouble to be more paranoid here than in most places,
     707             :      * because we are about to reshuffle data in (what is usually) a shared
     708             :      * disk buffer.  If we aren't careful then corrupted pointers, lengths,
     709             :      * etc could cause us to clobber adjacent disk buffers, spreading the data
     710             :      * loss further.  So, check everything.
     711             :      */
     712      107034 :     if (pd_lower < SizeOfPageHeaderData ||
     713      107034 :         pd_lower > pd_upper ||
     714      107034 :         pd_upper > pd_special ||
     715      107034 :         pd_special > BLCKSZ ||
     716      107034 :         pd_special != MAXALIGN(pd_special))
     717           0 :         ereport(ERROR,
     718             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     719             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
     720             :                         pd_lower, pd_upper, pd_special)));
     721             : 
     722             :     /*
     723             :      * Run through the line pointer array and collect data about live items.
     724             :      */
     725      107034 :     nline = PageGetMaxOffsetNumber(page);
     726      107034 :     itemidptr = itemidbase;
     727      107034 :     nunused = totallen = 0;
     728      107034 :     last_offset = pd_special;
     729     8898354 :     for (i = FirstOffsetNumber; i <= nline; i++)
     730             :     {
     731     8791320 :         lp = PageGetItemId(page, i);
     732     8791320 :         if (ItemIdIsUsed(lp))
     733             :         {
     734     8514900 :             if (ItemIdHasStorage(lp))
     735             :             {
     736     3154392 :                 itemidptr->offsetindex = i - 1;
     737     3154392 :                 itemidptr->itemoff = ItemIdGetOffset(lp);
     738             : 
     739     3154392 :                 if (last_offset > itemidptr->itemoff)
     740     2667834 :                     last_offset = itemidptr->itemoff;
     741             :                 else
     742      486558 :                     presorted = false;
     743             : 
     744     3154392 :                 if (unlikely(itemidptr->itemoff < (int) pd_upper ||
     745             :                              itemidptr->itemoff >= (int) pd_special))
     746           0 :                     ereport(ERROR,
     747             :                             (errcode(ERRCODE_DATA_CORRUPTED),
     748             :                              errmsg("corrupted line pointer: %u",
     749             :                                     itemidptr->itemoff)));
     750     3154392 :                 itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
     751     3154392 :                 totallen += itemidptr->alignedlen;
     752     3154392 :                 itemidptr++;
     753             :             }
     754             : 
     755     8514900 :             finalusedlp = i;    /* Could be the final non-LP_UNUSED item */
     756             :         }
     757             :         else
     758             :         {
     759             :             /* Unused entries should have lp_len = 0, but make sure */
     760             :             Assert(!ItemIdHasStorage(lp));
     761      276420 :             ItemIdSetUnused(lp);
     762      276420 :             nunused++;
     763             :         }
     764             :     }
     765             : 
     766      107034 :     nstorage = itemidptr - itemidbase;
     767      107034 :     if (nstorage == 0)
     768             :     {
     769             :         /* Page is completely empty, so just reset it quickly */
     770       22328 :         ((PageHeader) page)->pd_upper = pd_special;
     771             :     }
     772             :     else
     773             :     {
     774             :         /* Need to compact the page the hard way */
     775       84706 :         if (totallen > (Size) (pd_special - pd_lower))
     776           0 :             ereport(ERROR,
     777             :                     (errcode(ERRCODE_DATA_CORRUPTED),
     778             :                      errmsg("corrupted item lengths: total %u, available space %u",
     779             :                             (unsigned int) totallen, pd_special - pd_lower)));
     780             : 
     781       84706 :         compactify_tuples(itemidbase, nstorage, page, presorted);
     782             :     }
     783             : 
     784      107034 :     if (finalusedlp != nline)
     785             :     {
     786             :         /* The last line pointer is not the last used line pointer */
     787        3146 :         int         nunusedend = nline - finalusedlp;
     788             : 
     789             :         Assert(nunused >= nunusedend && nunusedend > 0);
     790             : 
     791             :         /* remove trailing unused line pointers from the count */
     792        3146 :         nunused -= nunusedend;
     793             :         /* truncate the line pointer array */
     794        3146 :         ((PageHeader) page)->pd_lower -= (sizeof(ItemIdData) * nunusedend);
     795             :     }
     796             : 
     797             :     /* Set hint bit for PageAddItemExtended */
     798      107034 :     if (nunused > 0)
     799       23428 :         PageSetHasFreeLinePointers(page);
     800             :     else
     801       83606 :         PageClearHasFreeLinePointers(page);
     802      107034 : }
     803             : 
     804             : /*
     805             :  * PageTruncateLinePointerArray
     806             :  *
     807             :  * Removes unused line pointers at the end of the line pointer array.
     808             :  *
     809             :  * This routine is usable for heap pages only.  It is called by VACUUM during
     810             :  * its second pass over the heap.  We expect at least one LP_UNUSED line
     811             :  * pointer on the page (if VACUUM didn't have an LP_DEAD item on the page that
     812             :  * it just set to LP_UNUSED then it should not call here).
     813             :  *
     814             :  * We avoid truncating the line pointer array to 0 items, if necessary by
     815             :  * leaving behind a single remaining LP_UNUSED item.  This is a little
     816             :  * arbitrary, but it seems like a good idea to avoid leaving a PageIsEmpty()
     817             :  * page behind.
     818             :  *
     819             :  * Caller can have either an exclusive lock or a full cleanup lock on page's
     820             :  * buffer.  The page's PD_HAS_FREE_LINES hint bit will be set or unset based
     821             :  * on whether or not we leave behind any remaining LP_UNUSED items.
     822             :  */
     823             : void
     824       24186 : PageTruncateLinePointerArray(Page page)
     825             : {
     826       24186 :     PageHeader  phdr = (PageHeader) page;
     827       24186 :     bool        countdone = false,
     828       24186 :                 sethint = false;
     829       24186 :     int         nunusedend = 0;
     830             : 
     831             :     /* Scan line pointer array back-to-front */
     832     1603150 :     for (int i = PageGetMaxOffsetNumber(page); i >= FirstOffsetNumber; i--)
     833             :     {
     834     1602382 :         ItemId      lp = PageGetItemId(page, i);
     835             : 
     836     1602382 :         if (!countdone && i > FirstOffsetNumber)
     837             :         {
     838             :             /*
     839             :              * Still determining which line pointers from the end of the array
     840             :              * will be truncated away.  Either count another line pointer as
     841             :              * safe to truncate, or notice that it's not safe to truncate
     842             :              * additional line pointers (stop counting line pointers).
     843             :              */
     844     1448344 :             if (!ItemIdIsUsed(lp))
     845     1436894 :                 nunusedend++;
     846             :             else
     847       11450 :                 countdone = true;
     848             :         }
     849             :         else
     850             :         {
     851             :             /*
     852             :              * Once we've stopped counting we still need to figure out if
     853             :              * there are any remaining LP_UNUSED line pointers somewhere more
     854             :              * towards the front of the array.
     855             :              */
     856      154038 :             if (!ItemIdIsUsed(lp))
     857             :             {
     858             :                 /*
     859             :                  * This is an unused line pointer that we won't be truncating
     860             :                  * away -- so there is at least one.  Set hint on page.
     861             :                  */
     862       23418 :                 sethint = true;
     863       23418 :                 break;
     864             :             }
     865             :         }
     866             :     }
     867             : 
     868       24186 :     if (nunusedend > 0)
     869             :     {
     870       15660 :         phdr->pd_lower -= sizeof(ItemIdData) * nunusedend;
     871             : 
     872             : #ifdef CLOBBER_FREED_MEMORY
     873             :         memset((char *) page + phdr->pd_lower, 0x7F,
     874             :                sizeof(ItemIdData) * nunusedend);
     875             : #endif
     876             :     }
     877             :     else
     878             :         Assert(sethint);
     879             : 
     880             :     /* Set hint bit for PageAddItemExtended */
     881       24186 :     if (sethint)
     882       23418 :         PageSetHasFreeLinePointers(page);
     883             :     else
     884         768 :         PageClearHasFreeLinePointers(page);
     885       24186 : }
     886             : 
     887             : /*
     888             :  * PageGetFreeSpace
     889             :  *      Returns the size of the free (allocatable) space on a page,
     890             :  *      reduced by the space needed for a new line pointer.
     891             :  *
     892             :  * Note: this should usually only be used on index pages.  Use
     893             :  * PageGetHeapFreeSpace on heap pages.
     894             :  */
     895             : Size
     896    54137808 : PageGetFreeSpace(Page page)
     897             : {
     898             :     int         space;
     899             : 
     900             :     /*
     901             :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     902             :      * pd_upper.
     903             :      */
     904    54137808 :     space = (int) ((PageHeader) page)->pd_upper -
     905    54137808 :         (int) ((PageHeader) page)->pd_lower;
     906             : 
     907    54137808 :     if (space < (int) sizeof(ItemIdData))
     908       11800 :         return 0;
     909    54126008 :     space -= sizeof(ItemIdData);
     910             : 
     911    54126008 :     return (Size) space;
     912             : }
     913             : 
     914             : /*
     915             :  * PageGetFreeSpaceForMultipleTuples
     916             :  *      Returns the size of the free (allocatable) space on a page,
     917             :  *      reduced by the space needed for multiple new line pointers.
     918             :  *
     919             :  * Note: this should usually only be used on index pages.  Use
     920             :  * PageGetHeapFreeSpace on heap pages.
     921             :  */
     922             : Size
     923      130384 : PageGetFreeSpaceForMultipleTuples(Page page, int ntups)
     924             : {
     925             :     int         space;
     926             : 
     927             :     /*
     928             :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     929             :      * pd_upper.
     930             :      */
     931      130384 :     space = (int) ((PageHeader) page)->pd_upper -
     932      130384 :         (int) ((PageHeader) page)->pd_lower;
     933             : 
     934      130384 :     if (space < (int) (ntups * sizeof(ItemIdData)))
     935           0 :         return 0;
     936      130384 :     space -= ntups * sizeof(ItemIdData);
     937             : 
     938      130384 :     return (Size) space;
     939             : }
     940             : 
     941             : /*
     942             :  * PageGetExactFreeSpace
     943             :  *      Returns the size of the free (allocatable) space on a page,
     944             :  *      without any consideration for adding/removing line pointers.
     945             :  */
     946             : Size
     947     3241928 : PageGetExactFreeSpace(Page page)
     948             : {
     949             :     int         space;
     950             : 
     951             :     /*
     952             :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     953             :      * pd_upper.
     954             :      */
     955     3241928 :     space = (int) ((PageHeader) page)->pd_upper -
     956     3241928 :         (int) ((PageHeader) page)->pd_lower;
     957             : 
     958     3241928 :     if (space < 0)
     959           0 :         return 0;
     960             : 
     961     3241928 :     return (Size) space;
     962             : }
     963             : 
     964             : 
     965             : /*
     966             :  * PageGetHeapFreeSpace
     967             :  *      Returns the size of the free (allocatable) space on a page,
     968             :  *      reduced by the space needed for a new line pointer.
     969             :  *
     970             :  * The difference between this and PageGetFreeSpace is that this will return
     971             :  * zero if there are already MaxHeapTuplesPerPage line pointers in the page
     972             :  * and none are free.  We use this to enforce that no more than
     973             :  * MaxHeapTuplesPerPage line pointers are created on a heap page.  (Although
     974             :  * no more tuples than that could fit anyway, in the presence of redirected
     975             :  * or dead line pointers it'd be possible to have too many line pointers.
     976             :  * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
     977             :  * on the number of line pointers, we make this extra check.)
     978             :  */
     979             : Size
     980    25620368 : PageGetHeapFreeSpace(Page page)
     981             : {
     982             :     Size        space;
     983             : 
     984    25620368 :     space = PageGetFreeSpace(page);
     985    25620368 :     if (space > 0)
     986             :     {
     987             :         OffsetNumber offnum,
     988             :                     nline;
     989             : 
     990             :         /*
     991             :          * Are there already MaxHeapTuplesPerPage line pointers in the page?
     992             :          */
     993    25592178 :         nline = PageGetMaxOffsetNumber(page);
     994    25592178 :         if (nline >= MaxHeapTuplesPerPage)
     995             :         {
     996        6558 :             if (PageHasFreeLinePointers(page))
     997             :             {
     998             :                 /*
     999             :                  * Since this is just a hint, we must confirm that there is
    1000             :                  * indeed a free line pointer
    1001             :                  */
    1002      685116 :                 for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
    1003             :                 {
    1004      684916 :                     ItemId      lp = PageGetItemId(page, offnum);
    1005             : 
    1006      684916 :                     if (!ItemIdIsUsed(lp))
    1007        3356 :                         break;
    1008             :                 }
    1009             : 
    1010        3556 :                 if (offnum > nline)
    1011             :                 {
    1012             :                     /*
    1013             :                      * The hint is wrong, but we can't clear it here since we
    1014             :                      * don't have the ability to mark the page dirty.
    1015             :                      */
    1016         200 :                     space = 0;
    1017             :                 }
    1018             :             }
    1019             :             else
    1020             :             {
    1021             :                 /*
    1022             :                  * Although the hint might be wrong, PageAddItem will believe
    1023             :                  * it anyway, so we must believe it too.
    1024             :                  */
    1025        3002 :                 space = 0;
    1026             :             }
    1027             :         }
    1028             :     }
    1029    25620368 :     return space;
    1030             : }
    1031             : 
    1032             : 
    1033             : /*
    1034             :  * PageIndexTupleDelete
    1035             :  *
    1036             :  * This routine does the work of removing a tuple from an index page.
    1037             :  *
    1038             :  * Unlike heap pages, we compact out the line pointer for the removed tuple.
    1039             :  */
    1040             : void
    1041      813502 : PageIndexTupleDelete(Page page, OffsetNumber offnum)
    1042             : {
    1043      813502 :     PageHeader  phdr = (PageHeader) page;
    1044             :     char       *addr;
    1045             :     ItemId      tup;
    1046             :     Size        size;
    1047             :     unsigned    offset;
    1048             :     int         nbytes;
    1049             :     int         offidx;
    1050             :     int         nline;
    1051             : 
    1052             :     /*
    1053             :      * As with PageRepairFragmentation, paranoia seems justified.
    1054             :      */
    1055      813502 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1056      813502 :         phdr->pd_lower > phdr->pd_upper ||
    1057      813502 :         phdr->pd_upper > phdr->pd_special ||
    1058      813502 :         phdr->pd_special > BLCKSZ ||
    1059      813502 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1060           0 :         ereport(ERROR,
    1061             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1062             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1063             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1064             : 
    1065      813502 :     nline = PageGetMaxOffsetNumber(page);
    1066      813502 :     if ((int) offnum <= 0 || (int) offnum > nline)
    1067           0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1068             : 
    1069             :     /* change offset number to offset index */
    1070      813502 :     offidx = offnum - 1;
    1071             : 
    1072      813502 :     tup = PageGetItemId(page, offnum);
    1073             :     Assert(ItemIdHasStorage(tup));
    1074      813502 :     size = ItemIdGetLength(tup);
    1075      813502 :     offset = ItemIdGetOffset(tup);
    1076             : 
    1077      813502 :     if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
    1078      813502 :         offset != MAXALIGN(offset))
    1079           0 :         ereport(ERROR,
    1080             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1081             :                  errmsg("corrupted line pointer: offset = %u, size = %u",
    1082             :                         offset, (unsigned int) size)));
    1083             : 
    1084             :     /* Amount of space to actually be deleted */
    1085      813502 :     size = MAXALIGN(size);
    1086             : 
    1087             :     /*
    1088             :      * First, we want to get rid of the pd_linp entry for the index tuple. We
    1089             :      * copy all subsequent linp's back one slot in the array. We don't use
    1090             :      * PageGetItemId, because we are manipulating the _array_, not individual
    1091             :      * linp's.
    1092             :      */
    1093      813502 :     nbytes = phdr->pd_lower -
    1094      813502 :         ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
    1095             : 
    1096      813502 :     if (nbytes > 0)
    1097      795072 :         memmove((char *) &(phdr->pd_linp[offidx]),
    1098      795072 :                 (char *) &(phdr->pd_linp[offidx + 1]),
    1099             :                 nbytes);
    1100             : 
    1101             :     /*
    1102             :      * Now move everything between the old upper bound (beginning of tuple
    1103             :      * space) and the beginning of the deleted tuple forward, so that space in
    1104             :      * the middle of the page is left free.  If we've just deleted the tuple
    1105             :      * at the beginning of tuple space, then there's no need to do the copy.
    1106             :      */
    1107             : 
    1108             :     /* beginning of tuple space */
    1109      813502 :     addr = (char *) page + phdr->pd_upper;
    1110             : 
    1111      813502 :     if (offset > phdr->pd_upper)
    1112      793832 :         memmove(addr + size, addr, offset - phdr->pd_upper);
    1113             : 
    1114             :     /* adjust free space boundary pointers */
    1115      813502 :     phdr->pd_upper += size;
    1116      813502 :     phdr->pd_lower -= sizeof(ItemIdData);
    1117             : 
    1118             :     /*
    1119             :      * Finally, we need to adjust the linp entries that remain.
    1120             :      *
    1121             :      * Anything that used to be before the deleted tuple's data was moved
    1122             :      * forward by the size of the deleted tuple.
    1123             :      */
    1124      813502 :     if (!PageIsEmpty(page))
    1125             :     {
    1126             :         int         i;
    1127             : 
    1128      811940 :         nline--;                /* there's one less than when we started */
    1129   142875078 :         for (i = 1; i <= nline; i++)
    1130             :         {
    1131   142063138 :             ItemId      ii = PageGetItemId(page, i);
    1132             : 
    1133             :             Assert(ItemIdHasStorage(ii));
    1134   142063138 :             if (ItemIdGetOffset(ii) <= offset)
    1135    93090464 :                 ii->lp_off += size;
    1136             :         }
    1137             :     }
    1138      813502 : }
    1139             : 
    1140             : 
    1141             : /*
    1142             :  * PageIndexMultiDelete
    1143             :  *
    1144             :  * This routine handles the case of deleting multiple tuples from an
    1145             :  * index page at once.  It is considerably faster than a loop around
    1146             :  * PageIndexTupleDelete ... however, the caller *must* supply the array
    1147             :  * of item numbers to be deleted in item number order!
    1148             :  */
    1149             : void
    1150       36468 : PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
    1151             : {
    1152       36468 :     PageHeader  phdr = (PageHeader) page;
    1153       36468 :     Offset      pd_lower = phdr->pd_lower;
    1154       36468 :     Offset      pd_upper = phdr->pd_upper;
    1155       36468 :     Offset      pd_special = phdr->pd_special;
    1156             :     Offset      last_offset;
    1157             :     itemIdCompactData itemidbase[MaxIndexTuplesPerPage];
    1158             :     ItemIdData  newitemids[MaxIndexTuplesPerPage];
    1159             :     itemIdCompact itemidptr;
    1160             :     ItemId      lp;
    1161             :     int         nline,
    1162             :                 nused;
    1163             :     Size        totallen;
    1164             :     Size        size;
    1165             :     unsigned    offset;
    1166             :     int         nextitm;
    1167             :     OffsetNumber offnum;
    1168       36468 :     bool        presorted = true;   /* For now */
    1169             : 
    1170             :     Assert(nitems <= MaxIndexTuplesPerPage);
    1171             : 
    1172             :     /*
    1173             :      * If there aren't very many items to delete, then retail
    1174             :      * PageIndexTupleDelete is the best way.  Delete the items in reverse
    1175             :      * order so we don't have to think about adjusting item numbers for
    1176             :      * previous deletions.
    1177             :      *
    1178             :      * TODO: tune the magic number here
    1179             :      */
    1180       36468 :     if (nitems <= 2)
    1181             :     {
    1182        7314 :         while (--nitems >= 0)
    1183        4134 :             PageIndexTupleDelete(page, itemnos[nitems]);
    1184        3180 :         return;
    1185             :     }
    1186             : 
    1187             :     /*
    1188             :      * As with PageRepairFragmentation, paranoia seems justified.
    1189             :      */
    1190       33288 :     if (pd_lower < SizeOfPageHeaderData ||
    1191       33288 :         pd_lower > pd_upper ||
    1192       33288 :         pd_upper > pd_special ||
    1193       33288 :         pd_special > BLCKSZ ||
    1194       33288 :         pd_special != MAXALIGN(pd_special))
    1195           0 :         ereport(ERROR,
    1196             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1197             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1198             :                         pd_lower, pd_upper, pd_special)));
    1199             : 
    1200             :     /*
    1201             :      * Scan the line pointer array and build a list of just the ones we are
    1202             :      * going to keep.  Notice we do not modify the page yet, since we are
    1203             :      * still validity-checking.
    1204             :      */
    1205       33288 :     nline = PageGetMaxOffsetNumber(page);
    1206       33288 :     itemidptr = itemidbase;
    1207       33288 :     totallen = 0;
    1208       33288 :     nused = 0;
    1209       33288 :     nextitm = 0;
    1210       33288 :     last_offset = pd_special;
    1211     7427308 :     for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
    1212             :     {
    1213     7394020 :         lp = PageGetItemId(page, offnum);
    1214             :         Assert(ItemIdHasStorage(lp));
    1215     7394020 :         size = ItemIdGetLength(lp);
    1216     7394020 :         offset = ItemIdGetOffset(lp);
    1217     7394020 :         if (offset < pd_upper ||
    1218     7394020 :             (offset + size) > pd_special ||
    1219     7394020 :             offset != MAXALIGN(offset))
    1220           0 :             ereport(ERROR,
    1221             :                     (errcode(ERRCODE_DATA_CORRUPTED),
    1222             :                      errmsg("corrupted line pointer: offset = %u, size = %u",
    1223             :                             offset, (unsigned int) size)));
    1224             : 
    1225     7394020 :         if (nextitm < nitems && offnum == itemnos[nextitm])
    1226             :         {
    1227             :             /* skip item to be deleted */
    1228     3539766 :             nextitm++;
    1229             :         }
    1230             :         else
    1231             :         {
    1232     3854254 :             itemidptr->offsetindex = nused; /* where it will go */
    1233     3854254 :             itemidptr->itemoff = offset;
    1234             : 
    1235     3854254 :             if (last_offset > itemidptr->itemoff)
    1236     1905774 :                 last_offset = itemidptr->itemoff;
    1237             :             else
    1238     1948480 :                 presorted = false;
    1239             : 
    1240     3854254 :             itemidptr->alignedlen = MAXALIGN(size);
    1241     3854254 :             totallen += itemidptr->alignedlen;
    1242     3854254 :             newitemids[nused] = *lp;
    1243     3854254 :             itemidptr++;
    1244     3854254 :             nused++;
    1245             :         }
    1246             :     }
    1247             : 
    1248             :     /* this will catch invalid or out-of-order itemnos[] */
    1249       33288 :     if (nextitm != nitems)
    1250           0 :         elog(ERROR, "incorrect index offsets supplied");
    1251             : 
    1252       33288 :     if (totallen > (Size) (pd_special - pd_lower))
    1253           0 :         ereport(ERROR,
    1254             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1255             :                  errmsg("corrupted item lengths: total %u, available space %u",
    1256             :                         (unsigned int) totallen, pd_special - pd_lower)));
    1257             : 
    1258             :     /*
    1259             :      * Looks good. Overwrite the line pointers with the copy, from which we've
    1260             :      * removed all the unused items.
    1261             :      */
    1262       33288 :     memcpy(phdr->pd_linp, newitemids, nused * sizeof(ItemIdData));
    1263       33288 :     phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
    1264             : 
    1265             :     /* and compactify the tuple data */
    1266       33288 :     if (nused > 0)
    1267       33002 :         compactify_tuples(itemidbase, nused, page, presorted);
    1268             :     else
    1269         286 :         phdr->pd_upper = pd_special;
    1270             : }
    1271             : 
    1272             : 
    1273             : /*
    1274             :  * PageIndexTupleDeleteNoCompact
    1275             :  *
    1276             :  * Remove the specified tuple from an index page, but set its line pointer
    1277             :  * to "unused" instead of compacting it out, except that it can be removed
    1278             :  * if it's the last line pointer on the page.
    1279             :  *
    1280             :  * This is used for index AMs that require that existing TIDs of live tuples
    1281             :  * remain unchanged, and are willing to allow unused line pointers instead.
    1282             :  */
    1283             : void
    1284         676 : PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
    1285             : {
    1286         676 :     PageHeader  phdr = (PageHeader) page;
    1287             :     char       *addr;
    1288             :     ItemId      tup;
    1289             :     Size        size;
    1290             :     unsigned    offset;
    1291             :     int         nline;
    1292             : 
    1293             :     /*
    1294             :      * As with PageRepairFragmentation, paranoia seems justified.
    1295             :      */
    1296         676 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1297         676 :         phdr->pd_lower > phdr->pd_upper ||
    1298         676 :         phdr->pd_upper > phdr->pd_special ||
    1299         676 :         phdr->pd_special > BLCKSZ ||
    1300         676 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1301           0 :         ereport(ERROR,
    1302             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1303             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1304             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1305             : 
    1306         676 :     nline = PageGetMaxOffsetNumber(page);
    1307         676 :     if ((int) offnum <= 0 || (int) offnum > nline)
    1308           0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1309             : 
    1310         676 :     tup = PageGetItemId(page, offnum);
    1311             :     Assert(ItemIdHasStorage(tup));
    1312         676 :     size = ItemIdGetLength(tup);
    1313         676 :     offset = ItemIdGetOffset(tup);
    1314             : 
    1315         676 :     if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
    1316         676 :         offset != MAXALIGN(offset))
    1317           0 :         ereport(ERROR,
    1318             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1319             :                  errmsg("corrupted line pointer: offset = %u, size = %u",
    1320             :                         offset, (unsigned int) size)));
    1321             : 
    1322             :     /* Amount of space to actually be deleted */
    1323         676 :     size = MAXALIGN(size);
    1324             : 
    1325             :     /*
    1326             :      * Either set the line pointer to "unused", or zap it if it's the last
    1327             :      * one.  (Note: it's possible that the next-to-last one(s) are already
    1328             :      * unused, but we do not trouble to try to compact them out if so.)
    1329             :      */
    1330         676 :     if ((int) offnum < nline)
    1331         608 :         ItemIdSetUnused(tup);
    1332             :     else
    1333             :     {
    1334          68 :         phdr->pd_lower -= sizeof(ItemIdData);
    1335          68 :         nline--;                /* there's one less than when we started */
    1336             :     }
    1337             : 
    1338             :     /*
    1339             :      * Now move everything between the old upper bound (beginning of tuple
    1340             :      * space) and the beginning of the deleted tuple forward, so that space in
    1341             :      * the middle of the page is left free.  If we've just deleted the tuple
    1342             :      * at the beginning of tuple space, then there's no need to do the copy.
    1343             :      */
    1344             : 
    1345             :     /* beginning of tuple space */
    1346         676 :     addr = (char *) page + phdr->pd_upper;
    1347             : 
    1348         676 :     if (offset > phdr->pd_upper)
    1349         608 :         memmove(addr + size, addr, offset - phdr->pd_upper);
    1350             : 
    1351             :     /* adjust free space boundary pointer */
    1352         676 :     phdr->pd_upper += size;
    1353             : 
    1354             :     /*
    1355             :      * Finally, we need to adjust the linp entries that remain.
    1356             :      *
    1357             :      * Anything that used to be before the deleted tuple's data was moved
    1358             :      * forward by the size of the deleted tuple.
    1359             :      */
    1360         676 :     if (!PageIsEmpty(page))
    1361             :     {
    1362             :         int         i;
    1363             : 
    1364      173014 :         for (i = 1; i <= nline; i++)
    1365             :         {
    1366      172348 :             ItemId      ii = PageGetItemId(page, i);
    1367             : 
    1368      172348 :             if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
    1369       84558 :                 ii->lp_off += size;
    1370             :         }
    1371             :     }
    1372         676 : }
    1373             : 
    1374             : 
    1375             : /*
    1376             :  * PageIndexTupleOverwrite
    1377             :  *
    1378             :  * Replace a specified tuple on an index page.
    1379             :  *
    1380             :  * The new tuple is placed exactly where the old one had been, shifting
    1381             :  * other tuples' data up or down as needed to keep the page compacted.
    1382             :  * This is better than deleting and reinserting the tuple, because it
    1383             :  * avoids any data shifting when the tuple size doesn't change; and
    1384             :  * even when it does, we avoid moving the line pointers around.
    1385             :  * This could be used by an index AM that doesn't want to unset the
    1386             :  * LP_DEAD bit when it happens to be set.  It could conceivably also be
    1387             :  * used by an index AM that cares about the physical order of tuples as
    1388             :  * well as their logical/ItemId order.
    1389             :  *
    1390             :  * If there's insufficient space for the new tuple, return false.  Other
    1391             :  * errors represent data-corruption problems, so we just elog.
    1392             :  */
    1393             : bool
    1394      892734 : PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
    1395             :                         Item newtup, Size newsize)
    1396             : {
    1397      892734 :     PageHeader  phdr = (PageHeader) page;
    1398             :     ItemId      tupid;
    1399             :     int         oldsize;
    1400             :     unsigned    offset;
    1401             :     Size        alignednewsize;
    1402             :     int         size_diff;
    1403             :     int         itemcount;
    1404             : 
    1405             :     /*
    1406             :      * As with PageRepairFragmentation, paranoia seems justified.
    1407             :      */
    1408      892734 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1409      892734 :         phdr->pd_lower > phdr->pd_upper ||
    1410      892734 :         phdr->pd_upper > phdr->pd_special ||
    1411      892734 :         phdr->pd_special > BLCKSZ ||
    1412      892734 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1413           0 :         ereport(ERROR,
    1414             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1415             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1416             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1417             : 
    1418      892734 :     itemcount = PageGetMaxOffsetNumber(page);
    1419      892734 :     if ((int) offnum <= 0 || (int) offnum > itemcount)
    1420           0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1421             : 
    1422      892734 :     tupid = PageGetItemId(page, offnum);
    1423             :     Assert(ItemIdHasStorage(tupid));
    1424      892734 :     oldsize = ItemIdGetLength(tupid);
    1425      892734 :     offset = ItemIdGetOffset(tupid);
    1426             : 
    1427      892734 :     if (offset < phdr->pd_upper || (offset + oldsize) > phdr->pd_special ||
    1428      892734 :         offset != MAXALIGN(offset))
    1429           0 :         ereport(ERROR,
    1430             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1431             :                  errmsg("corrupted line pointer: offset = %u, size = %u",
    1432             :                         offset, (unsigned int) oldsize)));
    1433             : 
    1434             :     /*
    1435             :      * Determine actual change in space requirement, check for page overflow.
    1436             :      */
    1437      892734 :     oldsize = MAXALIGN(oldsize);
    1438      892734 :     alignednewsize = MAXALIGN(newsize);
    1439      892734 :     if (alignednewsize > oldsize + (phdr->pd_upper - phdr->pd_lower))
    1440           0 :         return false;
    1441             : 
    1442             :     /*
    1443             :      * Relocate existing data and update line pointers, unless the new tuple
    1444             :      * is the same size as the old (after alignment), in which case there's
    1445             :      * nothing to do.  Notice that what we have to relocate is data before the
    1446             :      * target tuple, not data after, so it's convenient to express size_diff
    1447             :      * as the amount by which the tuple's size is decreasing, making it the
    1448             :      * delta to add to pd_upper and affected line pointers.
    1449             :      */
    1450      892734 :     size_diff = oldsize - (int) alignednewsize;
    1451      892734 :     if (size_diff != 0)
    1452             :     {
    1453       79404 :         char       *addr = (char *) page + phdr->pd_upper;
    1454             :         int         i;
    1455             : 
    1456             :         /* relocate all tuple data before the target tuple */
    1457       79404 :         memmove(addr + size_diff, addr, offset - phdr->pd_upper);
    1458             : 
    1459             :         /* adjust free space boundary pointer */
    1460       79404 :         phdr->pd_upper += size_diff;
    1461             : 
    1462             :         /* adjust affected line pointers too */
    1463    12728252 :         for (i = FirstOffsetNumber; i <= itemcount; i++)
    1464             :         {
    1465    12648848 :             ItemId      ii = PageGetItemId(page, i);
    1466             : 
    1467             :             /* Allow items without storage; currently only BRIN needs that */
    1468    12648848 :             if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
    1469     5915954 :                 ii->lp_off += size_diff;
    1470             :         }
    1471             :     }
    1472             : 
    1473             :     /* Update the item's tuple length without changing its lp_flags field */
    1474      892734 :     tupid->lp_off = offset + size_diff;
    1475      892734 :     tupid->lp_len = newsize;
    1476             : 
    1477             :     /* Copy new tuple data onto page */
    1478      892734 :     memcpy(PageGetItem(page, tupid), newtup, newsize);
    1479             : 
    1480      892734 :     return true;
    1481             : }
    1482             : 
    1483             : 
    1484             : /*
    1485             :  * Set checksum for a page in shared buffers.
    1486             :  *
    1487             :  * If checksums are disabled, or if the page is not initialized, just return
    1488             :  * the input.  Otherwise, we must make a copy of the page before calculating
    1489             :  * the checksum, to prevent concurrent modifications (e.g. setting hint bits)
    1490             :  * from making the final checksum invalid.  It doesn't matter if we include or
    1491             :  * exclude hints during the copy, as long as we write a valid page and
    1492             :  * associated checksum.
    1493             :  *
    1494             :  * Returns a pointer to the block-sized data that needs to be written. Uses
    1495             :  * statically-allocated memory, so the caller must immediately write the
    1496             :  * returned page and not refer to it again.
    1497             :  */
    1498             : char *
    1499      942652 : PageSetChecksumCopy(Page page, BlockNumber blkno)
    1500             : {
    1501             :     static char *pageCopy = NULL;
    1502             : 
    1503             :     /* If we don't need a checksum, just return the passed-in data */
    1504      942652 :     if (PageIsNew(page) || !DataChecksumsEnabled())
    1505       21330 :         return (char *) page;
    1506             : 
    1507             :     /*
    1508             :      * We allocate the copy space once and use it over on each subsequent
    1509             :      * call.  The point of palloc'ing here, rather than having a static char
    1510             :      * array, is first to ensure adequate alignment for the checksumming code
    1511             :      * and second to avoid wasting space in processes that never call this.
    1512             :      */
    1513      921322 :     if (pageCopy == NULL)
    1514        4930 :         pageCopy = MemoryContextAllocAligned(TopMemoryContext,
    1515             :                                              BLCKSZ,
    1516             :                                              PG_IO_ALIGN_SIZE,
    1517             :                                              0);
    1518             : 
    1519      921322 :     memcpy(pageCopy, (char *) page, BLCKSZ);
    1520      921322 :     ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno);
    1521      921322 :     return pageCopy;
    1522             : }
    1523             : 
    1524             : /*
    1525             :  * Set checksum for a page in private memory.
    1526             :  *
    1527             :  * This must only be used when we know that no other process can be modifying
    1528             :  * the page buffer.
    1529             :  */
    1530             : void
    1531      112990 : PageSetChecksumInplace(Page page, BlockNumber blkno)
    1532             : {
    1533             :     /* If we don't need a checksum, just return */
    1534      112990 :     if (PageIsNew(page) || !DataChecksumsEnabled())
    1535        3656 :         return;
    1536             : 
    1537      109334 :     ((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno);
    1538             : }

Generated by: LCOV version 1.14