LCOV - code coverage report
Current view: top level - src/backend/storage/page - bufpage.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 93.9 % 444 417
Test Date: 2026-02-27 08:14:49 Functions: 100.0 % 20 20
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * bufpage.c
       4              :  *    POSTGRES standard buffer page code.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/storage/page/bufpage.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/htup_details.h"
      18              : #include "access/itup.h"
      19              : #include "access/xlog.h"
      20              : #include "pgstat.h"
      21              : #include "storage/checksum.h"
      22              : #include "utils/memdebug.h"
      23              : #include "utils/memutils.h"
      24              : 
      25              : 
      26              : /* GUC variable */
      27              : bool        ignore_checksum_failure = false;
      28              : 
      29              : 
      30              : /* ----------------------------------------------------------------
      31              :  *                      Page support functions
      32              :  * ----------------------------------------------------------------
      33              :  */
      34              : 
      35              : /*
      36              :  * PageInit
      37              :  *      Initializes the contents of a page.
      38              :  *      Note that we don't calculate an initial checksum here; that's not done
      39              :  *      until it's time to write.
      40              :  */
      41              : void
      42       351797 : PageInit(Page page, Size pageSize, Size specialSize)
      43              : {
      44       351797 :     PageHeader  p = (PageHeader) page;
      45              : 
      46       351797 :     specialSize = MAXALIGN(specialSize);
      47              : 
      48              :     Assert(pageSize == BLCKSZ);
      49              :     Assert(pageSize > specialSize + SizeOfPageHeaderData);
      50              : 
      51              :     /* Make sure all fields of page are zero, as well as unused space */
      52       351797 :     MemSet(p, 0, pageSize);
      53              : 
      54       351797 :     p->pd_flags = 0;
      55       351797 :     p->pd_lower = SizeOfPageHeaderData;
      56       351797 :     p->pd_upper = pageSize - specialSize;
      57       351797 :     p->pd_special = pageSize - specialSize;
      58       351797 :     PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION);
      59              :     /* p->pd_prune_xid = InvalidTransactionId;       done by above MemSet */
      60       351797 : }
      61              : 
      62              : 
      63              : /*
      64              :  * PageIsVerified
      65              :  *      Check that the page header and checksum (if any) appear valid.
      66              :  *
      67              :  * This is called when a page has just been read in from disk.  The idea is
      68              :  * to cheaply detect trashed pages before we go nuts following bogus line
      69              :  * pointers, testing invalid transaction identifiers, etc.
      70              :  *
      71              :  * It turns out to be necessary to allow zeroed pages here too.  Even though
      72              :  * this routine is *not* called when deliberately adding a page to a relation,
      73              :  * there are scenarios in which a zeroed page might be found in a table.
      74              :  * (Example: a backend extends a relation, then crashes before it can write
      75              :  * any WAL entry about the new page.  The kernel will already have the
      76              :  * zeroed page in the file, and it will stay that way after restart.)  So we
      77              :  * allow zeroed pages here, and are careful that the page access macros
      78              :  * treat such a page as empty and without free space.  Eventually, VACUUM
      79              :  * will clean up such a page and make it usable.
      80              :  *
      81              :  * If flag PIV_LOG_WARNING/PIV_LOG_LOG is set, a WARNING/LOG message is logged
      82              :  * in the event of a checksum failure.
      83              :  *
      84              :  * If flag PIV_IGNORE_CHECKSUM_FAILURE is set, checksum failures will cause a
      85              :  * message about the failure to be emitted, but will not cause
      86              :  * PageIsVerified() to return false.
      87              :  *
      88              :  * To allow the caller to report statistics about checksum failures,
      89              :  * *checksum_failure_p can be passed in. Note that there may be checksum
      90              :  * failures even if this function returns true, due to
      91              :  * PIV_IGNORE_CHECKSUM_FAILURE.
      92              :  */
      93              : bool
      94      1356294 : PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_failure_p)
      95              : {
      96      1356294 :     const PageHeaderData *p = (const PageHeaderData *) page;
      97              :     size_t     *pagebytes;
      98      1356294 :     bool        checksum_failure = false;
      99      1356294 :     bool        header_sane = false;
     100      1356294 :     uint16      checksum = 0;
     101              : 
     102      1356294 :     if (checksum_failure_p)
     103      1356294 :         *checksum_failure_p = false;
     104              : 
     105              :     /*
     106              :      * Don't verify page data unless the page passes basic non-zero test
     107              :      */
     108      1356294 :     if (!PageIsNew(page))
     109              :     {
     110      1352160 :         if (DataChecksumsEnabled())
     111              :         {
     112      1342006 :             checksum = pg_checksum_page(page, blkno);
     113              : 
     114      1342006 :             if (checksum != p->pd_checksum)
     115              :             {
     116           32 :                 checksum_failure = true;
     117           32 :                 if (checksum_failure_p)
     118           32 :                     *checksum_failure_p = true;
     119              :             }
     120              :         }
     121              : 
     122              :         /*
     123              :          * The following checks don't prove the header is correct, only that
     124              :          * it looks sane enough to allow into the buffer pool. Later usage of
     125              :          * the block can still reveal problems, which is why we offer the
     126              :          * checksum option.
     127              :          */
     128      1352160 :         if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
     129      1352160 :             p->pd_lower <= p->pd_upper &&
     130      1352160 :             p->pd_upper <= p->pd_special &&
     131      1352160 :             p->pd_special <= BLCKSZ &&
     132      1352078 :             p->pd_special == MAXALIGN(p->pd_special))
     133      1352078 :             header_sane = true;
     134              : 
     135      1352160 :         if (header_sane && !checksum_failure)
     136      1352052 :             return true;
     137              :     }
     138              : 
     139              :     /* Check all-zeroes case */
     140         4242 :     pagebytes = (size_t *) page;
     141              : 
     142         4242 :     if (pg_memory_is_all_zeros(pagebytes, BLCKSZ))
     143         4134 :         return true;
     144              : 
     145              :     /*
     146              :      * Throw a WARNING/LOG, as instructed by PIV_LOG_*, if the checksum fails,
     147              :      * but only after we've checked for the all-zeroes case.
     148              :      */
     149          108 :     if (checksum_failure)
     150              :     {
     151           32 :         if ((flags & (PIV_LOG_WARNING | PIV_LOG_LOG)) != 0)
     152           32 :             ereport(flags & PIV_LOG_WARNING ? WARNING : LOG,
     153              :                     (errcode(ERRCODE_DATA_CORRUPTED),
     154              :                      errmsg("page verification failed, calculated checksum %u but expected %u",
     155              :                             checksum, p->pd_checksum)));
     156              : 
     157           32 :         if (header_sane && (flags & PIV_IGNORE_CHECKSUM_FAILURE))
     158           12 :             return true;
     159              :     }
     160              : 
     161           96 :     return false;
     162              : }
     163              : 
     164              : 
     165              : /*
     166              :  *  PageAddItemExtended
     167              :  *
     168              :  *  Add an item to a page.  Return value is the offset at which it was
     169              :  *  inserted, or InvalidOffsetNumber if the item is not inserted for any
     170              :  *  reason.  A WARNING is issued indicating the reason for the refusal.
     171              :  *
     172              :  *  offsetNumber must be either InvalidOffsetNumber to specify finding a
     173              :  *  free line pointer, or a value between FirstOffsetNumber and one past
     174              :  *  the last existing item, to specify using that particular line pointer.
     175              :  *
     176              :  *  If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
     177              :  *  the item at the specified offsetNumber, which must be either a
     178              :  *  currently-unused line pointer, or one past the last existing item.
     179              :  *
     180              :  *  If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
     181              :  *  the item at the specified offsetNumber, moving existing items later
     182              :  *  in the array to make room.
     183              :  *
     184              :  *  If offsetNumber is not valid, then assign a slot by finding the first
     185              :  *  one that is both unused and deallocated.
     186              :  *
     187              :  *  If flag PAI_IS_HEAP is set, we enforce that there can't be more than
     188              :  *  MaxHeapTuplesPerPage line pointers on the page.
     189              :  *
     190              :  *  !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
     191              :  */
     192              : OffsetNumber
     193     34904774 : PageAddItemExtended(Page page,
     194              :                     const void *item,
     195              :                     Size size,
     196              :                     OffsetNumber offsetNumber,
     197              :                     int flags)
     198              : {
     199     34904774 :     PageHeader  phdr = (PageHeader) page;
     200              :     Size        alignedSize;
     201              :     int         lower;
     202              :     int         upper;
     203              :     ItemId      itemId;
     204              :     OffsetNumber limit;
     205     34904774 :     bool        needshuffle = false;
     206              : 
     207              :     /*
     208              :      * Be wary about corrupted page pointers
     209              :      */
     210     34904774 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
     211     34904774 :         phdr->pd_lower > phdr->pd_upper ||
     212     34904774 :         phdr->pd_upper > phdr->pd_special ||
     213     34904774 :         phdr->pd_special > BLCKSZ)
     214            0 :         ereport(PANIC,
     215              :                 (errcode(ERRCODE_DATA_CORRUPTED),
     216              :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
     217              :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
     218              : 
     219              :     /*
     220              :      * Select offsetNumber to place the new item at
     221              :      */
     222     34904774 :     limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
     223              : 
     224              :     /* was offsetNumber passed in? */
     225     34904774 :     if (OffsetNumberIsValid(offsetNumber))
     226              :     {
     227              :         /* yes, check it */
     228     23081389 :         if ((flags & PAI_OVERWRITE) != 0)
     229              :         {
     230      1599989 :             if (offsetNumber < limit)
     231              :             {
     232        27805 :                 itemId = PageGetItemId(page, offsetNumber);
     233        27805 :                 if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
     234              :                 {
     235            0 :                     elog(WARNING, "will not overwrite a used ItemId");
     236            0 :                     return InvalidOffsetNumber;
     237              :                 }
     238              :             }
     239              :         }
     240              :         else
     241              :         {
     242     21481400 :             if (offsetNumber < limit)
     243      3389919 :                 needshuffle = true; /* need to move existing linp's */
     244              :         }
     245              :     }
     246              :     else
     247              :     {
     248              :         /* offsetNumber was not passed in, so find a free slot */
     249              :         /* if no free slot, we'll put it at limit (1st open slot) */
     250     11823385 :         if (PageHasFreeLinePointers(page))
     251              :         {
     252              :             /*
     253              :              * Scan line pointer array to locate a "recyclable" (unused)
     254              :              * ItemId.
     255              :              *
     256              :              * Always use earlier items first.  PageTruncateLinePointerArray
     257              :              * can only truncate unused items when they appear as a contiguous
     258              :              * group at the end of the line pointer array.
     259              :              */
     260       162776 :             for (offsetNumber = FirstOffsetNumber;
     261      9764793 :                  offsetNumber < limit;   /* limit is maxoff+1 */
     262      9602017 :                  offsetNumber++)
     263              :             {
     264      9756054 :                 itemId = PageGetItemId(page, offsetNumber);
     265              : 
     266              :                 /*
     267              :                  * We check for no storage as well, just to be paranoid;
     268              :                  * unused items should never have storage.  Assert() that the
     269              :                  * invariant is respected too.
     270              :                  */
     271              :                 Assert(ItemIdIsUsed(itemId) || !ItemIdHasStorage(itemId));
     272              : 
     273      9756054 :                 if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
     274       154037 :                     break;
     275              :             }
     276       162776 :             if (offsetNumber >= limit)
     277              :             {
     278              :                 /* the hint is wrong, so reset it */
     279         8739 :                 PageClearHasFreeLinePointers(page);
     280              :             }
     281              :         }
     282              :         else
     283              :         {
     284              :             /* don't bother searching if hint says there's no free slot */
     285     11660609 :             offsetNumber = limit;
     286              :         }
     287              :     }
     288              : 
     289              :     /* Reject placing items beyond the first unused line pointer */
     290     34904774 :     if (offsetNumber > limit)
     291              :     {
     292            0 :         elog(WARNING, "specified item offset is too large");
     293            0 :         return InvalidOffsetNumber;
     294              :     }
     295              : 
     296              :     /* Reject placing items beyond heap boundary, if heap */
     297     34904774 :     if ((flags & PAI_IS_HEAP) != 0 && offsetNumber > MaxHeapTuplesPerPage)
     298              :     {
     299            0 :         elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
     300            0 :         return InvalidOffsetNumber;
     301              :     }
     302              : 
     303              :     /*
     304              :      * Compute new lower and upper pointers for page, see if it'll fit.
     305              :      *
     306              :      * Note: do arithmetic as signed ints, to avoid mistakes if, say,
     307              :      * alignedSize > pd_upper.
     308              :      */
     309     34904774 :     if (offsetNumber == limit || needshuffle)
     310     34722932 :         lower = phdr->pd_lower + sizeof(ItemIdData);
     311              :     else
     312       181842 :         lower = phdr->pd_lower;
     313              : 
     314     34904774 :     alignedSize = MAXALIGN(size);
     315              : 
     316     34904774 :     upper = (int) phdr->pd_upper - (int) alignedSize;
     317              : 
     318     34904774 :     if (lower > upper)
     319            0 :         return InvalidOffsetNumber;
     320              : 
     321              :     /*
     322              :      * OK to insert the item.  First, shuffle the existing pointers if needed.
     323              :      */
     324     34904774 :     itemId = PageGetItemId(page, offsetNumber);
     325              : 
     326     34904774 :     if (needshuffle)
     327      3389919 :         memmove(itemId + 1, itemId,
     328      3389919 :                 (limit - offsetNumber) * sizeof(ItemIdData));
     329              : 
     330              :     /* set the line pointer */
     331     34904774 :     ItemIdSetNormal(itemId, upper, size);
     332              : 
     333              :     /*
     334              :      * Items normally contain no uninitialized bytes.  Core bufpage consumers
     335              :      * conform, but this is not a necessary coding rule; a new index AM could
     336              :      * opt to depart from it.  However, data type input functions and other
     337              :      * C-language functions that synthesize datums should initialize all
     338              :      * bytes; datumIsEqual() relies on this.  Testing here, along with the
     339              :      * similar check in printtup(), helps to catch such mistakes.
     340              :      *
     341              :      * Values of the "name" type retrieved via index-only scans may contain
     342              :      * uninitialized bytes; see comment in btrescan().  Valgrind will report
     343              :      * this as an error, but it is safe to ignore.
     344              :      */
     345              :     VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
     346              : 
     347              :     /* copy the item's data onto the page */
     348     34904774 :     memcpy((char *) page + upper, item, size);
     349              : 
     350              :     /* adjust page header */
     351     34904774 :     phdr->pd_lower = (LocationIndex) lower;
     352     34904774 :     phdr->pd_upper = (LocationIndex) upper;
     353              : 
     354     34904774 :     return offsetNumber;
     355              : }
     356              : 
     357              : 
     358              : /*
     359              :  * PageGetTempPage
     360              :  *      Get a temporary page in local memory for special processing.
     361              :  *      The returned page is not initialized at all; caller must do that.
     362              :  */
     363              : Page
     364          122 : PageGetTempPage(const PageData *page)
     365              : {
     366              :     Size        pageSize;
     367              :     Page        temp;
     368              : 
     369          122 :     pageSize = PageGetPageSize(page);
     370          122 :     temp = (Page) palloc(pageSize);
     371              : 
     372          122 :     return temp;
     373              : }
     374              : 
     375              : /*
     376              :  * PageGetTempPageCopy
     377              :  *      Get a temporary page in local memory for special processing.
     378              :  *      The page is initialized by copying the contents of the given page.
     379              :  */
     380              : Page
     381         5878 : PageGetTempPageCopy(const PageData *page)
     382              : {
     383              :     Size        pageSize;
     384              :     Page        temp;
     385              : 
     386         5878 :     pageSize = PageGetPageSize(page);
     387         5878 :     temp = (Page) palloc(pageSize);
     388              : 
     389         5878 :     memcpy(temp, page, pageSize);
     390              : 
     391         5878 :     return temp;
     392              : }
     393              : 
     394              : /*
     395              :  * PageGetTempPageCopySpecial
     396              :  *      Get a temporary page in local memory for special processing.
     397              :  *      The page is PageInit'd with the same special-space size as the
     398              :  *      given page, and the special space is copied from the given page.
     399              :  */
     400              : Page
     401        30550 : PageGetTempPageCopySpecial(const PageData *page)
     402              : {
     403              :     Size        pageSize;
     404              :     Page        temp;
     405              : 
     406        30550 :     pageSize = PageGetPageSize(page);
     407        30550 :     temp = (Page) palloc(pageSize);
     408              : 
     409        30550 :     PageInit(temp, pageSize, PageGetSpecialSize(page));
     410        91650 :     memcpy(PageGetSpecialPointer(temp),
     411        30550 :            PageGetSpecialPointer(page),
     412        30550 :            PageGetSpecialSize(page));
     413              : 
     414        30550 :     return temp;
     415              : }
     416              : 
     417              : /*
     418              :  * PageRestoreTempPage
     419              :  *      Copy temporary page back to permanent page after special processing
     420              :  *      and release the temporary page.
     421              :  */
     422              : void
     423        28965 : PageRestoreTempPage(Page tempPage, Page oldPage)
     424              : {
     425              :     Size        pageSize;
     426              : 
     427        28965 :     pageSize = PageGetPageSize(tempPage);
     428        28965 :     memcpy(oldPage, tempPage, pageSize);
     429              : 
     430        28965 :     pfree(tempPage);
     431        28965 : }
     432              : 
     433              : /*
     434              :  * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
     435              :  */
     436              : typedef struct itemIdCompactData
     437              : {
     438              :     uint16      offsetindex;    /* linp array index */
     439              :     int16       itemoff;        /* page offset of item data */
     440              :     uint16      alignedlen;     /* MAXALIGN(item data len) */
     441              : } itemIdCompactData;
     442              : typedef itemIdCompactData *itemIdCompact;
     443              : 
     444              : /*
     445              :  * After removing or marking some line pointers unused, move the tuples to
     446              :  * remove the gaps caused by the removed items and reorder them back into
     447              :  * reverse line pointer order in the page.
     448              :  *
     449              :  * This function can often be fairly hot, so it pays to take some measures to
     450              :  * make it as optimal as possible.
     451              :  *
     452              :  * Callers may pass 'presorted' as true if the 'itemidbase' array is sorted in
     453              :  * descending order of itemoff.  When this is true we can just memmove()
     454              :  * tuples towards the end of the page.  This is quite a common case as it's
     455              :  * the order that tuples are initially inserted into pages.  When we call this
     456              :  * function to defragment the tuples in the page then any new line pointers
     457              :  * added to the page will keep that presorted order, so hitting this case is
     458              :  * still very common for tables that are commonly updated.
     459              :  *
     460              :  * When the 'itemidbase' array is not presorted then we're unable to just
     461              :  * memmove() tuples around freely.  Doing so could cause us to overwrite the
     462              :  * memory belonging to a tuple we've not moved yet.  In this case, we copy all
     463              :  * the tuples that need to be moved into a temporary buffer.  We can then
     464              :  * simply memcpy() out of that temp buffer back into the page at the correct
     465              :  * location.  Tuples are copied back into the page in the same order as the
     466              :  * 'itemidbase' array, so we end up reordering the tuples back into reverse
     467              :  * line pointer order.  This will increase the chances of hitting the
     468              :  * presorted case the next time around.
     469              :  *
     470              :  * Callers must ensure that nitems is > 0
     471              :  */
     472              : static void
     473        67443 : compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
     474              : {
     475        67443 :     PageHeader  phdr = (PageHeader) page;
     476              :     Offset      upper;
     477              :     Offset      copy_tail;
     478              :     Offset      copy_head;
     479              :     itemIdCompact itemidptr;
     480              :     int         i;
     481              : 
     482              :     /* Code within will not work correctly if nitems == 0 */
     483              :     Assert(nitems > 0);
     484              : 
     485        67443 :     if (presorted)
     486              :     {
     487              : 
     488              : #ifdef USE_ASSERT_CHECKING
     489              :         {
     490              :             /*
     491              :              * Verify we've not gotten any new callers that are incorrectly
     492              :              * passing a true presorted value.
     493              :              */
     494              :             Offset      lastoff = phdr->pd_special;
     495              : 
     496              :             for (i = 0; i < nitems; i++)
     497              :             {
     498              :                 itemidptr = &itemidbase[i];
     499              : 
     500              :                 Assert(lastoff > itemidptr->itemoff);
     501              : 
     502              :                 lastoff = itemidptr->itemoff;
     503              :             }
     504              :         }
     505              : #endif                          /* USE_ASSERT_CHECKING */
     506              : 
     507              :         /*
     508              :          * 'itemidbase' is already in the optimal order, i.e, lower item
     509              :          * pointers have a higher offset.  This allows us to memmove() the
     510              :          * tuples up to the end of the page without having to worry about
     511              :          * overwriting other tuples that have not been moved yet.
     512              :          *
     513              :          * There's a good chance that there are tuples already right at the
     514              :          * end of the page that we can simply skip over because they're
     515              :          * already in the correct location within the page.  We'll do that
     516              :          * first...
     517              :          */
     518        49999 :         upper = phdr->pd_special;
     519        49999 :         i = 0;
     520              :         do
     521              :         {
     522       702137 :             itemidptr = &itemidbase[i];
     523       702137 :             if (upper != itemidptr->itemoff + itemidptr->alignedlen)
     524        44107 :                 break;
     525       658030 :             upper -= itemidptr->alignedlen;
     526              : 
     527       658030 :             i++;
     528       658030 :         } while (i < nitems);
     529              : 
     530              :         /*
     531              :          * Now that we've found the first tuple that needs to be moved, we can
     532              :          * do the tuple compactification.  We try and make the least number of
     533              :          * memmove() calls and only call memmove() when there's a gap.  When
     534              :          * we see a gap we just move all tuples after the gap up until the
     535              :          * point of the last move operation.
     536              :          */
     537        49999 :         copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
     538      1149674 :         for (; i < nitems; i++)
     539              :         {
     540              :             ItemId      lp;
     541              : 
     542      1099675 :             itemidptr = &itemidbase[i];
     543      1099675 :             lp = PageGetItemId(page, itemidptr->offsetindex + 1);
     544              : 
     545      1099675 :             if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
     546              :             {
     547       135437 :                 memmove((char *) page + upper,
     548       135437 :                         page + copy_head,
     549       135437 :                         copy_tail - copy_head);
     550              : 
     551              :                 /*
     552              :                  * We've now moved all tuples already seen, but not the
     553              :                  * current tuple, so we set the copy_tail to the end of this
     554              :                  * tuple so it can be moved in another iteration of the loop.
     555              :                  */
     556       135437 :                 copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
     557              :             }
     558              :             /* shift the target offset down by the length of this tuple */
     559      1099675 :             upper -= itemidptr->alignedlen;
     560              :             /* point the copy_head to the start of this tuple */
     561      1099675 :             copy_head = itemidptr->itemoff;
     562              : 
     563              :             /* update the line pointer to reference the new offset */
     564      1099675 :             lp->lp_off = upper;
     565              :         }
     566              : 
     567              :         /* move the remaining tuples. */
     568        49999 :         memmove((char *) page + upper,
     569        49999 :                 page + copy_head,
     570        49999 :                 copy_tail - copy_head);
     571              :     }
     572              :     else
     573              :     {
     574              :         PGAlignedBlock scratch;
     575        17444 :         char       *scratchptr = scratch.data;
     576              : 
     577              :         /*
     578              :          * Non-presorted case:  The tuples in the itemidbase array may be in
     579              :          * any order.  So, in order to move these to the end of the page we
     580              :          * must make a temp copy of each tuple that needs to be moved before
     581              :          * we copy them back into the page at the new offset.
     582              :          *
     583              :          * If a large percentage of tuples have been pruned (>75%) then we'll
     584              :          * copy these into the temp buffer tuple-by-tuple, otherwise, we'll
     585              :          * just do a single memcpy() for all tuples that need to be moved.
     586              :          * When so many tuples have been removed there's likely to be a lot of
     587              :          * gaps and it's unlikely that many non-movable tuples remain at the
     588              :          * end of the page.
     589              :          */
     590        17444 :         if (nitems < PageGetMaxOffsetNumber(page) / 4)
     591              :         {
     592         1023 :             i = 0;
     593              :             do
     594              :             {
     595        19715 :                 itemidptr = &itemidbase[i];
     596        19715 :                 memcpy(scratchptr + itemidptr->itemoff, page + itemidptr->itemoff,
     597        19715 :                        itemidptr->alignedlen);
     598        19715 :                 i++;
     599        19715 :             } while (i < nitems);
     600              : 
     601              :             /* Set things up for the compactification code below */
     602         1023 :             i = 0;
     603         1023 :             itemidptr = &itemidbase[0];
     604         1023 :             upper = phdr->pd_special;
     605              :         }
     606              :         else
     607              :         {
     608        16421 :             upper = phdr->pd_special;
     609              : 
     610              :             /*
     611              :              * Many tuples are likely to already be in the correct location.
     612              :              * There's no need to copy these into the temp buffer.  Instead
     613              :              * we'll just skip forward in the itemidbase array to the position
     614              :              * that we do need to move tuples from so that the code below just
     615              :              * leaves these ones alone.
     616              :              */
     617        16421 :             i = 0;
     618              :             do
     619              :             {
     620       388992 :                 itemidptr = &itemidbase[i];
     621       388992 :                 if (upper != itemidptr->itemoff + itemidptr->alignedlen)
     622        16421 :                     break;
     623       372571 :                 upper -= itemidptr->alignedlen;
     624              : 
     625       372571 :                 i++;
     626       372571 :             } while (i < nitems);
     627              : 
     628              :             /* Copy all tuples that need to be moved into the temp buffer */
     629        16421 :             memcpy(scratchptr + phdr->pd_upper,
     630        16421 :                    page + phdr->pd_upper,
     631        16421 :                    upper - phdr->pd_upper);
     632              :         }
     633              : 
     634              :         /*
     635              :          * Do the tuple compactification.  itemidptr is already pointing to
     636              :          * the first tuple that we're going to move.  Here we collapse the
     637              :          * memcpy calls for adjacent tuples into a single call.  This is done
     638              :          * by delaying the memcpy call until we find a gap that needs to be
     639              :          * closed.
     640              :          */
     641        17444 :         copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
     642      1822457 :         for (; i < nitems; i++)
     643              :         {
     644              :             ItemId      lp;
     645              : 
     646      1805013 :             itemidptr = &itemidbase[i];
     647      1805013 :             lp = PageGetItemId(page, itemidptr->offsetindex + 1);
     648              : 
     649              :             /* copy pending tuples when we detect a gap */
     650      1805013 :             if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
     651              :             {
     652       491650 :                 memcpy((char *) page + upper,
     653       491650 :                        scratchptr + copy_head,
     654       491650 :                        copy_tail - copy_head);
     655              : 
     656              :                 /*
     657              :                  * We've now copied all tuples already seen, but not the
     658              :                  * current tuple, so we set the copy_tail to the end of this
     659              :                  * tuple.
     660              :                  */
     661       491650 :                 copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
     662              :             }
     663              :             /* shift the target offset down by the length of this tuple */
     664      1805013 :             upper -= itemidptr->alignedlen;
     665              :             /* point the copy_head to the start of this tuple */
     666      1805013 :             copy_head = itemidptr->itemoff;
     667              : 
     668              :             /* update the line pointer to reference the new offset */
     669      1805013 :             lp->lp_off = upper;
     670              :         }
     671              : 
     672              :         /* Copy the remaining chunk */
     673        17444 :         memcpy((char *) page + upper,
     674        17444 :                scratchptr + copy_head,
     675        17444 :                copy_tail - copy_head);
     676              :     }
     677              : 
     678        67443 :     phdr->pd_upper = upper;
     679        67443 : }
     680              : 
     681              : /*
     682              :  * PageRepairFragmentation
     683              :  *
     684              :  * Frees fragmented space on a heap page following pruning.
     685              :  *
     686              :  * This routine is usable for heap pages only, but see PageIndexMultiDelete.
     687              :  *
     688              :  * This routine removes unused line pointers from the end of the line pointer
     689              :  * array.  This is possible when dead heap-only tuples get removed by pruning,
     690              :  * especially when there were HOT chains with several tuples each beforehand.
     691              :  *
     692              :  * Caller had better have a full cleanup lock on page's buffer.  As a side
     693              :  * effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
     694              :  * needed.  Caller might also need to account for a reduction in the length of
     695              :  * the line pointer array following array truncation.
     696              :  */
     697              : void
     698        60845 : PageRepairFragmentation(Page page)
     699              : {
     700        60845 :     Offset      pd_lower = ((PageHeader) page)->pd_lower;
     701        60845 :     Offset      pd_upper = ((PageHeader) page)->pd_upper;
     702        60845 :     Offset      pd_special = ((PageHeader) page)->pd_special;
     703              :     Offset      last_offset;
     704              :     itemIdCompactData itemidbase[MaxHeapTuplesPerPage];
     705              :     itemIdCompact itemidptr;
     706              :     ItemId      lp;
     707              :     int         nline,
     708              :                 nstorage,
     709              :                 nunused;
     710        60845 :     OffsetNumber finalusedlp = InvalidOffsetNumber;
     711              :     int         i;
     712              :     Size        totallen;
     713        60845 :     bool        presorted = true;   /* For now */
     714              : 
     715              :     /*
     716              :      * It's worth the trouble to be more paranoid here than in most places,
     717              :      * because we are about to reshuffle data in (what is usually) a shared
     718              :      * disk buffer.  If we aren't careful then corrupted pointers, lengths,
     719              :      * etc could cause us to clobber adjacent disk buffers, spreading the data
     720              :      * loss further.  So, check everything.
     721              :      */
     722        60845 :     if (pd_lower < SizeOfPageHeaderData ||
     723        60845 :         pd_lower > pd_upper ||
     724        60845 :         pd_upper > pd_special ||
     725        60845 :         pd_special > BLCKSZ ||
     726        60845 :         pd_special != MAXALIGN(pd_special))
     727            0 :         ereport(ERROR,
     728              :                 (errcode(ERRCODE_DATA_CORRUPTED),
     729              :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
     730              :                         pd_lower, pd_upper, pd_special)));
     731              : 
     732              :     /*
     733              :      * Run through the line pointer array and collect data about live items.
     734              :      */
     735        60845 :     nline = PageGetMaxOffsetNumber(page);
     736        60845 :     itemidptr = itemidbase;
     737        60845 :     nunused = totallen = 0;
     738        60845 :     last_offset = pd_special;
     739      4924451 :     for (i = FirstOffsetNumber; i <= nline; i++)
     740              :     {
     741      4863606 :         lp = PageGetItemId(page, i);
     742      4863606 :         if (ItemIdIsUsed(lp))
     743              :         {
     744      4635127 :             if (ItemIdHasStorage(lp))
     745              :             {
     746      1762873 :                 itemidptr->offsetindex = i - 1;
     747      1762873 :                 itemidptr->itemoff = ItemIdGetOffset(lp);
     748              : 
     749      1762873 :                 if (last_offset > itemidptr->itemoff)
     750      1483260 :                     last_offset = itemidptr->itemoff;
     751              :                 else
     752       279613 :                     presorted = false;
     753              : 
     754      1762873 :                 if (unlikely(itemidptr->itemoff < (int) pd_upper ||
     755              :                              itemidptr->itemoff >= (int) pd_special))
     756            0 :                     ereport(ERROR,
     757              :                             (errcode(ERRCODE_DATA_CORRUPTED),
     758              :                              errmsg("corrupted line pointer: %u",
     759              :                                     itemidptr->itemoff)));
     760      1762873 :                 itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
     761      1762873 :                 totallen += itemidptr->alignedlen;
     762      1762873 :                 itemidptr++;
     763              :             }
     764              : 
     765      4635127 :             finalusedlp = i;    /* Could be the final non-LP_UNUSED item */
     766              :         }
     767              :         else
     768              :         {
     769              :             /* Unused entries should have lp_len = 0, but make sure */
     770              :             Assert(!ItemIdHasStorage(lp));
     771       228479 :             ItemIdSetUnused(lp);
     772       228479 :             nunused++;
     773              :         }
     774              :     }
     775              : 
     776        60845 :     nstorage = itemidptr - itemidbase;
     777        60845 :     if (nstorage == 0)
     778              :     {
     779              :         /* Page is completely empty, so just reset it quickly */
     780        12229 :         ((PageHeader) page)->pd_upper = pd_special;
     781              :     }
     782              :     else
     783              :     {
     784              :         /* Need to compact the page the hard way */
     785        48616 :         if (totallen > (Size) (pd_special - pd_lower))
     786            0 :             ereport(ERROR,
     787              :                     (errcode(ERRCODE_DATA_CORRUPTED),
     788              :                      errmsg("corrupted item lengths: total %zu, available space %u",
     789              :                             totallen, pd_special - pd_lower)));
     790              : 
     791        48616 :         compactify_tuples(itemidbase, nstorage, page, presorted);
     792              :     }
     793              : 
     794        60845 :     if (finalusedlp != nline)
     795              :     {
     796              :         /* The last line pointer is not the last used line pointer */
     797         2036 :         int         nunusedend = nline - finalusedlp;
     798              : 
     799              :         Assert(nunused >= nunusedend && nunusedend > 0);
     800              : 
     801              :         /* remove trailing unused line pointers from the count */
     802         2036 :         nunused -= nunusedend;
     803              :         /* truncate the line pointer array */
     804         2036 :         ((PageHeader) page)->pd_lower -= (sizeof(ItemIdData) * nunusedend);
     805              :     }
     806              : 
     807              :     /* Set hint bit for PageAddItemExtended */
     808        60845 :     if (nunused > 0)
     809        15311 :         PageSetHasFreeLinePointers(page);
     810              :     else
     811        45534 :         PageClearHasFreeLinePointers(page);
     812        60845 : }
     813              : 
     814              : /*
     815              :  * PageTruncateLinePointerArray
     816              :  *
     817              :  * Removes unused line pointers at the end of the line pointer array.
     818              :  *
     819              :  * This routine is usable for heap pages only.  It is called by VACUUM during
     820              :  * its second pass over the heap.  We expect at least one LP_UNUSED line
     821              :  * pointer on the page (if VACUUM didn't have an LP_DEAD item on the page that
     822              :  * it just set to LP_UNUSED then it should not call here).
     823              :  *
     824              :  * We avoid truncating the line pointer array to 0 items, if necessary by
     825              :  * leaving behind a single remaining LP_UNUSED item.  This is a little
     826              :  * arbitrary, but it seems like a good idea to avoid leaving a PageIsEmpty()
     827              :  * page behind.
     828              :  *
     829              :  * Caller can have either an exclusive lock or a full cleanup lock on page's
     830              :  * buffer.  The page's PD_HAS_FREE_LINES hint bit will be set or unset based
     831              :  * on whether or not we leave behind any remaining LP_UNUSED items.
     832              :  */
     833              : void
     834        16598 : PageTruncateLinePointerArray(Page page)
     835              : {
     836        16598 :     PageHeader  phdr = (PageHeader) page;
     837        16598 :     bool        countdone = false,
     838        16598 :                 sethint = false;
     839        16598 :     int         nunusedend = 0;
     840              : 
     841              :     /* Scan line pointer array back-to-front */
     842       942365 :     for (int i = PageGetMaxOffsetNumber(page); i >= FirstOffsetNumber; i--)
     843              :     {
     844       941682 :         ItemId      lp = PageGetItemId(page, i);
     845              : 
     846       941682 :         if (!countdone && i > FirstOffsetNumber)
     847              :         {
     848              :             /*
     849              :              * Still determining which line pointers from the end of the array
     850              :              * will be truncated away.  Either count another line pointer as
     851              :              * safe to truncate, or notice that it's not safe to truncate
     852              :              * additional line pointers (stop counting line pointers).
     853              :              */
     854       820121 :             if (!ItemIdIsUsed(lp))
     855       811492 :                 nunusedend++;
     856              :             else
     857         8629 :                 countdone = true;
     858              :         }
     859              :         else
     860              :         {
     861              :             /*
     862              :              * Once we've stopped counting we still need to figure out if
     863              :              * there are any remaining LP_UNUSED line pointers somewhere more
     864              :              * towards the front of the array.
     865              :              */
     866       121561 :             if (!ItemIdIsUsed(lp))
     867              :             {
     868              :                 /*
     869              :                  * This is an unused line pointer that we won't be truncating
     870              :                  * away -- so there is at least one.  Set hint on page.
     871              :                  */
     872        15915 :                 sethint = true;
     873        15915 :                 break;
     874              :             }
     875              :         }
     876              :     }
     877              : 
     878        16598 :     if (nunusedend > 0)
     879              :     {
     880        10766 :         phdr->pd_lower -= sizeof(ItemIdData) * nunusedend;
     881              : 
     882              : #ifdef CLOBBER_FREED_MEMORY
     883              :         memset((char *) page + phdr->pd_lower, 0x7F,
     884              :                sizeof(ItemIdData) * nunusedend);
     885              : #endif
     886              :     }
     887              :     else
     888              :         Assert(sethint);
     889              : 
     890              :     /* Set hint bit for PageAddItemExtended */
     891        16598 :     if (sethint)
     892        15915 :         PageSetHasFreeLinePointers(page);
     893              :     else
     894          683 :         PageClearHasFreeLinePointers(page);
     895        16598 : }
     896              : 
     897              : /*
     898              :  * PageGetFreeSpace
     899              :  *      Returns the size of the free (allocatable) space on a page,
     900              :  *      reduced by the space needed for a new line pointer.
     901              :  *
     902              :  * Note: this should usually only be used on index pages.  Use
     903              :  * PageGetHeapFreeSpace on heap pages.
     904              :  */
     905              : Size
     906     29457741 : PageGetFreeSpace(const PageData *page)
     907              : {
     908     29457741 :     const PageHeaderData *phdr = (const PageHeaderData *) page;
     909              :     int         space;
     910              : 
     911              :     /*
     912              :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     913              :      * pd_upper.
     914              :      */
     915     29457741 :     space = (int) phdr->pd_upper - (int) phdr->pd_lower;
     916              : 
     917     29457741 :     if (space < (int) sizeof(ItemIdData))
     918         8185 :         return 0;
     919     29449556 :     space -= sizeof(ItemIdData);
     920              : 
     921     29449556 :     return (Size) space;
     922              : }
     923              : 
     924              : /*
     925              :  * PageGetFreeSpaceForMultipleTuples
     926              :  *      Returns the size of the free (allocatable) space on a page,
     927              :  *      reduced by the space needed for multiple new line pointers.
     928              :  *
     929              :  * Note: this should usually only be used on index pages.  Use
     930              :  * PageGetHeapFreeSpace on heap pages.
     931              :  */
     932              : Size
     933        64732 : PageGetFreeSpaceForMultipleTuples(const PageData *page, int ntups)
     934              : {
     935        64732 :     const PageHeaderData *phdr = (const PageHeaderData *) page;
     936              :     int         space;
     937              : 
     938              :     /*
     939              :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     940              :      * pd_upper.
     941              :      */
     942        64732 :     space = (int) phdr->pd_upper - (int) phdr->pd_lower;
     943              : 
     944        64732 :     if (space < (int) (ntups * sizeof(ItemIdData)))
     945            0 :         return 0;
     946        64732 :     space -= ntups * sizeof(ItemIdData);
     947              : 
     948        64732 :     return (Size) space;
     949              : }
     950              : 
     951              : /*
     952              :  * PageGetExactFreeSpace
     953              :  *      Returns the size of the free (allocatable) space on a page,
     954              :  *      without any consideration for adding/removing line pointers.
     955              :  */
     956              : Size
     957      1641613 : PageGetExactFreeSpace(const PageData *page)
     958              : {
     959      1641613 :     const PageHeaderData *phdr = (const PageHeaderData *) page;
     960              :     int         space;
     961              : 
     962              :     /*
     963              :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     964              :      * pd_upper.
     965              :      */
     966      1641613 :     space = (int) phdr->pd_upper - (int) phdr->pd_lower;
     967              : 
     968      1641613 :     if (space < 0)
     969            0 :         return 0;
     970              : 
     971      1641613 :     return (Size) space;
     972              : }
     973              : 
     974              : 
     975              : /*
     976              :  * PageGetHeapFreeSpace
     977              :  *      Returns the size of the free (allocatable) space on a page,
     978              :  *      reduced by the space needed for a new line pointer.
     979              :  *
     980              :  * The difference between this and PageGetFreeSpace is that this will return
     981              :  * zero if there are already MaxHeapTuplesPerPage line pointers in the page
     982              :  * and none are free.  We use this to enforce that no more than
     983              :  * MaxHeapTuplesPerPage line pointers are created on a heap page.  (Although
     984              :  * no more tuples than that could fit anyway, in the presence of redirected
     985              :  * or dead line pointers it'd be possible to have too many line pointers.
     986              :  * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
     987              :  * on the number of line pointers, we make this extra check.)
     988              :  */
     989              : Size
     990     14132802 : PageGetHeapFreeSpace(const PageData *page)
     991              : {
     992              :     Size        space;
     993              : 
     994     14132802 :     space = PageGetFreeSpace(page);
     995     14132802 :     if (space > 0)
     996              :     {
     997              :         OffsetNumber offnum,
     998              :                     nline;
     999              : 
    1000              :         /*
    1001              :          * Are there already MaxHeapTuplesPerPage line pointers in the page?
    1002              :          */
    1003     14108980 :         nline = PageGetMaxOffsetNumber(page);
    1004     14108980 :         if (nline >= MaxHeapTuplesPerPage)
    1005              :         {
    1006         4677 :             if (PageHasFreeLinePointers(page))
    1007              :             {
    1008              :                 /*
    1009              :                  * Since this is just a hint, we must confirm that there is
    1010              :                  * indeed a free line pointer
    1011              :                  */
    1012       593587 :                 for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
    1013              :                 {
    1014       593511 :                     ItemId      lp = PageGetItemId(unconstify(PageData *, page), offnum);
    1015              : 
    1016       593511 :                     if (!ItemIdIsUsed(lp))
    1017         3277 :                         break;
    1018              :                 }
    1019              : 
    1020         3353 :                 if (offnum > nline)
    1021              :                 {
    1022              :                     /*
    1023              :                      * The hint is wrong, but we can't clear it here since we
    1024              :                      * don't have the ability to mark the page dirty.
    1025              :                      */
    1026           76 :                     space = 0;
    1027              :                 }
    1028              :             }
    1029              :             else
    1030              :             {
    1031              :                 /*
    1032              :                  * Although the hint might be wrong, PageAddItem will believe
    1033              :                  * it anyway, so we must believe it too.
    1034              :                  */
    1035         1324 :                 space = 0;
    1036              :             }
    1037              :         }
    1038              :     }
    1039     14132802 :     return space;
    1040              : }
    1041              : 
    1042              : 
    1043              : /*
    1044              :  * PageIndexTupleDelete
    1045              :  *
    1046              :  * This routine does the work of removing a tuple from an index page.
    1047              :  *
    1048              :  * Unlike heap pages, we compact out the line pointer for the removed tuple.
    1049              :  */
    1050              : void
    1051       506946 : PageIndexTupleDelete(Page page, OffsetNumber offnum)
    1052              : {
    1053       506946 :     PageHeader  phdr = (PageHeader) page;
    1054              :     char       *addr;
    1055              :     ItemId      tup;
    1056              :     Size        size;
    1057              :     unsigned    offset;
    1058              :     int         nbytes;
    1059              :     int         offidx;
    1060              :     int         nline;
    1061              : 
    1062              :     /*
    1063              :      * As with PageRepairFragmentation, paranoia seems justified.
    1064              :      */
    1065       506946 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1066       506946 :         phdr->pd_lower > phdr->pd_upper ||
    1067       506946 :         phdr->pd_upper > phdr->pd_special ||
    1068       506946 :         phdr->pd_special > BLCKSZ ||
    1069       506946 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1070            0 :         ereport(ERROR,
    1071              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1072              :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1073              :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1074              : 
    1075       506946 :     nline = PageGetMaxOffsetNumber(page);
    1076       506946 :     if ((int) offnum <= 0 || (int) offnum > nline)
    1077            0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1078              : 
    1079              :     /* change offset number to offset index */
    1080       506946 :     offidx = offnum - 1;
    1081              : 
    1082       506946 :     tup = PageGetItemId(page, offnum);
    1083              :     Assert(ItemIdHasStorage(tup));
    1084       506946 :     size = ItemIdGetLength(tup);
    1085       506946 :     offset = ItemIdGetOffset(tup);
    1086              : 
    1087       506946 :     if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
    1088       506946 :         offset != MAXALIGN(offset))
    1089            0 :         ereport(ERROR,
    1090              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1091              :                  errmsg("corrupted line pointer: offset = %u, size = %zu",
    1092              :                         offset, size)));
    1093              : 
    1094              :     /* Amount of space to actually be deleted */
    1095       506946 :     size = MAXALIGN(size);
    1096              : 
    1097              :     /*
    1098              :      * First, we want to get rid of the pd_linp entry for the index tuple. We
    1099              :      * copy all subsequent linp's back one slot in the array. We don't use
    1100              :      * PageGetItemId, because we are manipulating the _array_, not individual
    1101              :      * linp's.
    1102              :      */
    1103       506946 :     nbytes = phdr->pd_lower -
    1104       506946 :         ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
    1105              : 
    1106       506946 :     if (nbytes > 0)
    1107       493169 :         memmove(&(phdr->pd_linp[offidx]),
    1108       493169 :                 &(phdr->pd_linp[offidx + 1]),
    1109              :                 nbytes);
    1110              : 
    1111              :     /*
    1112              :      * Now move everything between the old upper bound (beginning of tuple
    1113              :      * space) and the beginning of the deleted tuple forward, so that space in
    1114              :      * the middle of the page is left free.  If we've just deleted the tuple
    1115              :      * at the beginning of tuple space, then there's no need to do the copy.
    1116              :      */
    1117              : 
    1118              :     /* beginning of tuple space */
    1119       506946 :     addr = (char *) page + phdr->pd_upper;
    1120              : 
    1121       506946 :     if (offset > phdr->pd_upper)
    1122       493791 :         memmove(addr + size, addr, offset - phdr->pd_upper);
    1123              : 
    1124              :     /* adjust free space boundary pointers */
    1125       506946 :     phdr->pd_upper += size;
    1126       506946 :     phdr->pd_lower -= sizeof(ItemIdData);
    1127              : 
    1128              :     /*
    1129              :      * Finally, we need to adjust the linp entries that remain.
    1130              :      *
    1131              :      * Anything that used to be before the deleted tuple's data was moved
    1132              :      * forward by the size of the deleted tuple.
    1133              :      */
    1134       506946 :     if (!PageIsEmpty(page))
    1135              :     {
    1136              :         int         i;
    1137              : 
    1138       506161 :         nline--;                /* there's one less than when we started */
    1139     75149702 :         for (i = 1; i <= nline; i++)
    1140              :         {
    1141     74643541 :             ItemId      ii = PageGetItemId(page, i);
    1142              : 
    1143              :             Assert(ItemIdHasStorage(ii));
    1144     74643541 :             if (ItemIdGetOffset(ii) <= offset)
    1145     48420649 :                 ii->lp_off += size;
    1146              :         }
    1147              :     }
    1148       506946 : }
    1149              : 
    1150              : 
    1151              : /*
    1152              :  * PageIndexMultiDelete
    1153              :  *
    1154              :  * This routine handles the case of deleting multiple tuples from an
    1155              :  * index page at once.  It is considerably faster than a loop around
    1156              :  * PageIndexTupleDelete ... however, the caller *must* supply the array
    1157              :  * of item numbers to be deleted in item number order!
    1158              :  */
    1159              : void
    1160        21672 : PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
    1161              : {
    1162        21672 :     PageHeader  phdr = (PageHeader) page;
    1163        21672 :     Offset      pd_lower = phdr->pd_lower;
    1164        21672 :     Offset      pd_upper = phdr->pd_upper;
    1165        21672 :     Offset      pd_special = phdr->pd_special;
    1166              :     Offset      last_offset;
    1167              :     itemIdCompactData itemidbase[MaxIndexTuplesPerPage];
    1168              :     ItemIdData  newitemids[MaxIndexTuplesPerPage];
    1169              :     itemIdCompact itemidptr;
    1170              :     ItemId      lp;
    1171              :     int         nline,
    1172              :                 nused;
    1173              :     Size        totallen;
    1174              :     Size        size;
    1175              :     unsigned    offset;
    1176              :     int         nextitm;
    1177              :     OffsetNumber offnum;
    1178        21672 :     bool        presorted = true;   /* For now */
    1179              : 
    1180              :     Assert(nitems <= MaxIndexTuplesPerPage);
    1181              : 
    1182              :     /*
    1183              :      * If there aren't very many items to delete, then retail
    1184              :      * PageIndexTupleDelete is the best way.  Delete the items in reverse
    1185              :      * order so we don't have to think about adjusting item numbers for
    1186              :      * previous deletions.
    1187              :      *
    1188              :      * TODO: tune the magic number here
    1189              :      */
    1190        21672 :     if (nitems <= 2)
    1191              :     {
    1192         5830 :         while (--nitems >= 0)
    1193         3307 :             PageIndexTupleDelete(page, itemnos[nitems]);
    1194         2523 :         return;
    1195              :     }
    1196              : 
    1197              :     /*
    1198              :      * As with PageRepairFragmentation, paranoia seems justified.
    1199              :      */
    1200        19149 :     if (pd_lower < SizeOfPageHeaderData ||
    1201        19149 :         pd_lower > pd_upper ||
    1202        19149 :         pd_upper > pd_special ||
    1203        19149 :         pd_special > BLCKSZ ||
    1204        19149 :         pd_special != MAXALIGN(pd_special))
    1205            0 :         ereport(ERROR,
    1206              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1207              :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1208              :                         pd_lower, pd_upper, pd_special)));
    1209              : 
    1210              :     /*
    1211              :      * Scan the line pointer array and build a list of just the ones we are
    1212              :      * going to keep.  Notice we do not modify the page yet, since we are
    1213              :      * still validity-checking.
    1214              :      */
    1215        19149 :     nline = PageGetMaxOffsetNumber(page);
    1216        19149 :     itemidptr = itemidbase;
    1217        19149 :     totallen = 0;
    1218        19149 :     nused = 0;
    1219        19149 :     nextitm = 0;
    1220        19149 :     last_offset = pd_special;
    1221      4265531 :     for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
    1222              :     {
    1223      4246382 :         lp = PageGetItemId(page, offnum);
    1224              :         Assert(ItemIdHasStorage(lp));
    1225      4246382 :         size = ItemIdGetLength(lp);
    1226      4246382 :         offset = ItemIdGetOffset(lp);
    1227      4246382 :         if (offset < pd_upper ||
    1228      4246382 :             (offset + size) > pd_special ||
    1229      4246382 :             offset != MAXALIGN(offset))
    1230            0 :             ereport(ERROR,
    1231              :                     (errcode(ERRCODE_DATA_CORRUPTED),
    1232              :                      errmsg("corrupted line pointer: offset = %u, size = %zu",
    1233              :                             offset, size)));
    1234              : 
    1235      4246382 :         if (nextitm < nitems && offnum == itemnos[nextitm])
    1236              :         {
    1237              :             /* skip item to be deleted */
    1238      2073966 :             nextitm++;
    1239              :         }
    1240              :         else
    1241              :         {
    1242      2172416 :             itemidptr->offsetindex = nused; /* where it will go */
    1243      2172416 :             itemidptr->itemoff = offset;
    1244              : 
    1245      2172416 :             if (last_offset > itemidptr->itemoff)
    1246      1106551 :                 last_offset = itemidptr->itemoff;
    1247              :             else
    1248      1065865 :                 presorted = false;
    1249              : 
    1250      2172416 :             itemidptr->alignedlen = MAXALIGN(size);
    1251      2172416 :             totallen += itemidptr->alignedlen;
    1252      2172416 :             newitemids[nused] = *lp;
    1253      2172416 :             itemidptr++;
    1254      2172416 :             nused++;
    1255              :         }
    1256              :     }
    1257              : 
    1258              :     /* this will catch invalid or out-of-order itemnos[] */
    1259        19149 :     if (nextitm != nitems)
    1260            0 :         elog(ERROR, "incorrect index offsets supplied");
    1261              : 
    1262        19149 :     if (totallen > (Size) (pd_special - pd_lower))
    1263            0 :         ereport(ERROR,
    1264              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1265              :                  errmsg("corrupted item lengths: total %zu, available space %u",
    1266              :                         totallen, pd_special - pd_lower)));
    1267              : 
    1268              :     /*
    1269              :      * Looks good. Overwrite the line pointers with the copy, from which we've
    1270              :      * removed all the unused items.
    1271              :      */
    1272        19149 :     memcpy(phdr->pd_linp, newitemids, nused * sizeof(ItemIdData));
    1273        19149 :     phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
    1274              : 
    1275              :     /* and compactify the tuple data */
    1276        19149 :     if (nused > 0)
    1277        18827 :         compactify_tuples(itemidbase, nused, page, presorted);
    1278              :     else
    1279          322 :         phdr->pd_upper = pd_special;
    1280              : }
    1281              : 
    1282              : 
    1283              : /*
    1284              :  * PageIndexTupleDeleteNoCompact
    1285              :  *
    1286              :  * Remove the specified tuple from an index page, but set its line pointer
    1287              :  * to "unused" instead of compacting it out, except that it can be removed
    1288              :  * if it's the last line pointer on the page.
    1289              :  *
    1290              :  * This is used for index AMs that require that existing TIDs of live tuples
    1291              :  * remain unchanged, and are willing to allow unused line pointers instead.
    1292              :  */
    1293              : void
    1294          339 : PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
    1295              : {
    1296          339 :     PageHeader  phdr = (PageHeader) page;
    1297              :     char       *addr;
    1298              :     ItemId      tup;
    1299              :     Size        size;
    1300              :     unsigned    offset;
    1301              :     int         nline;
    1302              : 
    1303              :     /*
    1304              :      * As with PageRepairFragmentation, paranoia seems justified.
    1305              :      */
    1306          339 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1307          339 :         phdr->pd_lower > phdr->pd_upper ||
    1308          339 :         phdr->pd_upper > phdr->pd_special ||
    1309          339 :         phdr->pd_special > BLCKSZ ||
    1310          339 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1311            0 :         ereport(ERROR,
    1312              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1313              :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1314              :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1315              : 
    1316          339 :     nline = PageGetMaxOffsetNumber(page);
    1317          339 :     if ((int) offnum <= 0 || (int) offnum > nline)
    1318            0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1319              : 
    1320          339 :     tup = PageGetItemId(page, offnum);
    1321              :     Assert(ItemIdHasStorage(tup));
    1322          339 :     size = ItemIdGetLength(tup);
    1323          339 :     offset = ItemIdGetOffset(tup);
    1324              : 
    1325          339 :     if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
    1326          339 :         offset != MAXALIGN(offset))
    1327            0 :         ereport(ERROR,
    1328              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1329              :                  errmsg("corrupted line pointer: offset = %u, size = %zu",
    1330              :                         offset, size)));
    1331              : 
    1332              :     /* Amount of space to actually be deleted */
    1333          339 :     size = MAXALIGN(size);
    1334              : 
    1335              :     /*
    1336              :      * Either set the line pointer to "unused", or zap it if it's the last
    1337              :      * one.  (Note: it's possible that the next-to-last one(s) are already
    1338              :      * unused, but we do not trouble to try to compact them out if so.)
    1339              :      */
    1340          339 :     if ((int) offnum < nline)
    1341          305 :         ItemIdSetUnused(tup);
    1342              :     else
    1343              :     {
    1344           34 :         phdr->pd_lower -= sizeof(ItemIdData);
    1345           34 :         nline--;                /* there's one less than when we started */
    1346              :     }
    1347              : 
    1348              :     /*
    1349              :      * Now move everything between the old upper bound (beginning of tuple
    1350              :      * space) and the beginning of the deleted tuple forward, so that space in
    1351              :      * the middle of the page is left free.  If we've just deleted the tuple
    1352              :      * at the beginning of tuple space, then there's no need to do the copy.
    1353              :      */
    1354              : 
    1355              :     /* beginning of tuple space */
    1356          339 :     addr = (char *) page + phdr->pd_upper;
    1357              : 
    1358          339 :     if (offset > phdr->pd_upper)
    1359          305 :         memmove(addr + size, addr, offset - phdr->pd_upper);
    1360              : 
    1361              :     /* adjust free space boundary pointer */
    1362          339 :     phdr->pd_upper += size;
    1363              : 
    1364              :     /*
    1365              :      * Finally, we need to adjust the linp entries that remain.
    1366              :      *
    1367              :      * Anything that used to be before the deleted tuple's data was moved
    1368              :      * forward by the size of the deleted tuple.
    1369              :      */
    1370          339 :     if (!PageIsEmpty(page))
    1371              :     {
    1372              :         int         i;
    1373              : 
    1374        86516 :         for (i = 1; i <= nline; i++)
    1375              :         {
    1376        86182 :             ItemId      ii = PageGetItemId(page, i);
    1377              : 
    1378        86182 :             if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
    1379        42286 :                 ii->lp_off += size;
    1380              :         }
    1381              :     }
    1382          339 : }
    1383              : 
    1384              : 
    1385              : /*
    1386              :  * PageIndexTupleOverwrite
    1387              :  *
    1388              :  * Replace a specified tuple on an index page.
    1389              :  *
    1390              :  * The new tuple is placed exactly where the old one had been, shifting
    1391              :  * other tuples' data up or down as needed to keep the page compacted.
    1392              :  * This is better than deleting and reinserting the tuple, because it
    1393              :  * avoids any data shifting when the tuple size doesn't change; and
    1394              :  * even when it does, we avoid moving the line pointers around.
    1395              :  * This could be used by an index AM that doesn't want to unset the
    1396              :  * LP_DEAD bit when it happens to be set.  It could conceivably also be
    1397              :  * used by an index AM that cares about the physical order of tuples as
    1398              :  * well as their logical/ItemId order.
    1399              :  *
    1400              :  * If there's insufficient space for the new tuple, return false.  Other
    1401              :  * errors represent data-corruption problems, so we just elog.
    1402              :  */
    1403              : bool
    1404       448302 : PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
    1405              :                         const void *newtup, Size newsize)
    1406              : {
    1407       448302 :     PageHeader  phdr = (PageHeader) page;
    1408              :     ItemId      tupid;
    1409              :     int         oldsize;
    1410              :     unsigned    offset;
    1411              :     Size        alignednewsize;
    1412              :     int         size_diff;
    1413              :     int         itemcount;
    1414              : 
    1415              :     /*
    1416              :      * As with PageRepairFragmentation, paranoia seems justified.
    1417              :      */
    1418       448302 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1419       448302 :         phdr->pd_lower > phdr->pd_upper ||
    1420       448302 :         phdr->pd_upper > phdr->pd_special ||
    1421       448302 :         phdr->pd_special > BLCKSZ ||
    1422       448302 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1423            0 :         ereport(ERROR,
    1424              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1425              :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1426              :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1427              : 
    1428       448302 :     itemcount = PageGetMaxOffsetNumber(page);
    1429       448302 :     if ((int) offnum <= 0 || (int) offnum > itemcount)
    1430            0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1431              : 
    1432       448302 :     tupid = PageGetItemId(page, offnum);
    1433              :     Assert(ItemIdHasStorage(tupid));
    1434       448302 :     oldsize = ItemIdGetLength(tupid);
    1435       448302 :     offset = ItemIdGetOffset(tupid);
    1436              : 
    1437       448302 :     if (offset < phdr->pd_upper || (offset + oldsize) > phdr->pd_special ||
    1438       448302 :         offset != MAXALIGN(offset))
    1439            0 :         ereport(ERROR,
    1440              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1441              :                  errmsg("corrupted line pointer: offset = %u, size = %d",
    1442              :                         offset, oldsize)));
    1443              : 
    1444              :     /*
    1445              :      * Determine actual change in space requirement, check for page overflow.
    1446              :      */
    1447       448302 :     oldsize = MAXALIGN(oldsize);
    1448       448302 :     alignednewsize = MAXALIGN(newsize);
    1449       448302 :     if (alignednewsize > oldsize + (phdr->pd_upper - phdr->pd_lower))
    1450            0 :         return false;
    1451              : 
    1452              :     /*
    1453              :      * Relocate existing data and update line pointers, unless the new tuple
    1454              :      * is the same size as the old (after alignment), in which case there's
    1455              :      * nothing to do.  Notice that what we have to relocate is data before the
    1456              :      * target tuple, not data after, so it's convenient to express size_diff
    1457              :      * as the amount by which the tuple's size is decreasing, making it the
    1458              :      * delta to add to pd_upper and affected line pointers.
    1459              :      */
    1460       448302 :     size_diff = oldsize - (int) alignednewsize;
    1461       448302 :     if (size_diff != 0)
    1462              :     {
    1463        50146 :         char       *addr = (char *) page + phdr->pd_upper;
    1464              :         int         i;
    1465              : 
    1466              :         /* relocate all tuple data before the target tuple */
    1467        50146 :         memmove(addr + size_diff, addr, offset - phdr->pd_upper);
    1468              : 
    1469              :         /* adjust free space boundary pointer */
    1470        50146 :         phdr->pd_upper += size_diff;
    1471              : 
    1472              :         /* adjust affected line pointers too */
    1473      8316654 :         for (i = FirstOffsetNumber; i <= itemcount; i++)
    1474              :         {
    1475      8266508 :             ItemId      ii = PageGetItemId(page, i);
    1476              : 
    1477              :             /* Allow items without storage; currently only BRIN needs that */
    1478      8266508 :             if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
    1479      3995465 :                 ii->lp_off += size_diff;
    1480              :         }
    1481              :     }
    1482              : 
    1483              :     /* Update the item's tuple length without changing its lp_flags field */
    1484       448302 :     tupid->lp_off = offset + size_diff;
    1485       448302 :     tupid->lp_len = newsize;
    1486              : 
    1487              :     /* Copy new tuple data onto page */
    1488       448302 :     memcpy(PageGetItem(page, tupid), newtup, newsize);
    1489              : 
    1490       448302 :     return true;
    1491              : }
    1492              : 
    1493              : 
    1494              : /*
    1495              :  * Set checksum for a page in shared buffers.
    1496              :  *
    1497              :  * If checksums are disabled, or if the page is not initialized, just return
    1498              :  * the input.  Otherwise, we must make a copy of the page before calculating
    1499              :  * the checksum, to prevent concurrent modifications (e.g. setting hint bits)
    1500              :  * from making the final checksum invalid.  It doesn't matter if we include or
    1501              :  * exclude hints during the copy, as long as we write a valid page and
    1502              :  * associated checksum.
    1503              :  *
    1504              :  * Returns a pointer to the block-sized data that needs to be written. Uses
    1505              :  * statically-allocated memory, so the caller must immediately write the
    1506              :  * returned page and not refer to it again.
    1507              :  */
    1508              : char *
    1509       586442 : PageSetChecksumCopy(Page page, BlockNumber blkno)
    1510              : {
    1511              :     static char *pageCopy = NULL;
    1512              : 
    1513              :     /* If we don't need a checksum, just return the passed-in data */
    1514       586442 :     if (PageIsNew(page) || !DataChecksumsEnabled())
    1515        12116 :         return page;
    1516              : 
    1517              :     /*
    1518              :      * We allocate the copy space once and use it over on each subsequent
    1519              :      * call.  The point of palloc'ing here, rather than having a static char
    1520              :      * array, is first to ensure adequate alignment for the checksumming code
    1521              :      * and second to avoid wasting space in processes that never call this.
    1522              :      */
    1523       574326 :     if (pageCopy == NULL)
    1524         2863 :         pageCopy = MemoryContextAllocAligned(TopMemoryContext,
    1525              :                                              BLCKSZ,
    1526              :                                              PG_IO_ALIGN_SIZE,
    1527              :                                              0);
    1528              : 
    1529       574326 :     memcpy(pageCopy, page, BLCKSZ);
    1530       574326 :     ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno);
    1531       574326 :     return pageCopy;
    1532              : }
    1533              : 
    1534              : /*
    1535              :  * Set checksum for a page in private memory.
    1536              :  *
    1537              :  * This must only be used when we know that no other process can be modifying
    1538              :  * the page buffer.
    1539              :  */
    1540              : void
    1541        64005 : PageSetChecksumInplace(Page page, BlockNumber blkno)
    1542              : {
    1543              :     /* If we don't need a checksum, just return */
    1544        64005 :     if (PageIsNew(page) || !DataChecksumsEnabled())
    1545         2165 :         return;
    1546              : 
    1547        61840 :     ((PageHeader) page)->pd_checksum = pg_checksum_page(page, blkno);
    1548              : }
        

Generated by: LCOV version 2.0-1