LCOV - code coverage report
Current view: top level - src/backend/storage/page - bufpage.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 93.9 % 441 414
Test Date: 2026-04-07 14:16:30 Functions: 100.0 % 19 19
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * bufpage.c
       4              :  *    POSTGRES standard buffer page code.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/storage/page/bufpage.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/htup_details.h"
      18              : #include "access/itup.h"
      19              : #include "access/xlog.h"
      20              : #include "pgstat.h"
      21              : #include "storage/checksum.h"
      22              : #include "utils/memdebug.h"
      23              : #include "utils/memutils.h"
      24              : 
      25              : 
      26              : /* GUC variable */
      27              : bool        ignore_checksum_failure = false;
      28              : 
      29              : 
      30              : /* ----------------------------------------------------------------
      31              :  *                      Page support functions
      32              :  * ----------------------------------------------------------------
      33              :  */
      34              : 
      35              : /*
      36              :  * PageInit
      37              :  *      Initializes the contents of a page.
      38              :  *      Note that we don't calculate an initial checksum here; that's not done
      39              :  *      until it's time to write.
      40              :  */
      41              : void
      42       435667 : PageInit(Page page, Size pageSize, Size specialSize)
      43              : {
      44       435667 :     PageHeader  p = (PageHeader) page;
      45              : 
      46       435667 :     specialSize = MAXALIGN(specialSize);
      47              : 
      48              :     Assert(pageSize == BLCKSZ);
      49              :     Assert(pageSize > specialSize + SizeOfPageHeaderData);
      50              : 
      51              :     /* Make sure all fields of page are zero, as well as unused space */
      52       435667 :     MemSet(p, 0, pageSize);
      53              : 
      54       435667 :     p->pd_flags = 0;
      55       435667 :     p->pd_lower = SizeOfPageHeaderData;
      56       435667 :     p->pd_upper = pageSize - specialSize;
      57       435667 :     p->pd_special = pageSize - specialSize;
      58       435667 :     PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION);
      59              :     /* p->pd_prune_xid = InvalidTransactionId;       done by above MemSet */
      60       435667 : }
      61              : 
      62              : 
      63              : /*
      64              :  * PageIsVerified
      65              :  *      Check that the page header and checksum (if any) appear valid.
      66              :  *
      67              :  * This is called when a page has just been read in from disk.  The idea is
      68              :  * to cheaply detect trashed pages before we go nuts following bogus line
      69              :  * pointers, testing invalid transaction identifiers, etc.
      70              :  *
      71              :  * It turns out to be necessary to allow zeroed pages here too.  Even though
      72              :  * this routine is *not* called when deliberately adding a page to a relation,
      73              :  * there are scenarios in which a zeroed page might be found in a table.
      74              :  * (Example: a backend extends a relation, then crashes before it can write
      75              :  * any WAL entry about the new page.  The kernel will already have the
      76              :  * zeroed page in the file, and it will stay that way after restart.)  So we
      77              :  * allow zeroed pages here, and are careful that the page access macros
      78              :  * treat such a page as empty and without free space.  Eventually, VACUUM
      79              :  * will clean up such a page and make it usable.
      80              :  *
      81              :  * If flag PIV_LOG_WARNING/PIV_LOG_LOG is set, a WARNING/LOG message is logged
      82              :  * in the event of a checksum failure.
      83              :  *
      84              :  * If flag PIV_IGNORE_CHECKSUM_FAILURE is set, checksum failures will cause a
      85              :  * message about the failure to be emitted, but will not cause
      86              :  * PageIsVerified() to return false.
      87              :  *
      88              :  * To allow the caller to report statistics about checksum failures,
      89              :  * *checksum_failure_p can be passed in. Note that there may be checksum
      90              :  * failures even if this function returns true, due to
      91              :  * PIV_IGNORE_CHECKSUM_FAILURE.
      92              :  */
      93              : bool
      94      1494125 : PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_failure_p)
      95              : {
      96      1494125 :     const PageHeaderData *p = (const PageHeaderData *) page;
      97              :     size_t     *pagebytes;
      98      1494125 :     bool        checksum_failure = false;
      99      1494125 :     bool        header_sane = false;
     100      1494125 :     uint16      checksum = 0;
     101              : 
     102      1494125 :     if (checksum_failure_p)
     103      1494125 :         *checksum_failure_p = false;
     104              : 
     105              :     /*
     106              :      * Don't verify page data unless the page passes basic non-zero test
     107              :      */
     108      1494125 :     if (!PageIsNew(page))
     109              :     {
     110              :         /*
     111              :          * There shouldn't be any check for interrupt calls happening in this
     112              :          * codepath, but just to be on the safe side we hold interrupts since
     113              :          * if they did happen the data checksum state could change during
     114              :          * verifying checksums, which could lead to incorrect verification
     115              :          * results.
     116              :          */
     117      1488985 :         HOLD_INTERRUPTS();
     118      1488985 :         if (DataChecksumsNeedVerify())
     119              :         {
     120      1414921 :             checksum = pg_checksum_page(page, blkno);
     121              : 
     122      1414921 :             if (checksum != p->pd_checksum)
     123              :             {
     124           32 :                 checksum_failure = true;
     125           32 :                 if (checksum_failure_p)
     126           32 :                     *checksum_failure_p = true;
     127              :             }
     128              :         }
     129      1488985 :         RESUME_INTERRUPTS();
     130              : 
     131              :         /*
     132              :          * The following checks don't prove the header is correct, only that
     133              :          * it looks sane enough to allow into the buffer pool. Later usage of
     134              :          * the block can still reveal problems, which is why we offer the
     135              :          * checksum option.
     136              :          */
     137      1488985 :         if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
     138      1488985 :             p->pd_lower <= p->pd_upper &&
     139      1488985 :             p->pd_upper <= p->pd_special &&
     140      1488985 :             p->pd_special <= BLCKSZ &&
     141      1488903 :             p->pd_special == MAXALIGN(p->pd_special))
     142      1488903 :             header_sane = true;
     143              : 
     144      1488985 :         if (header_sane && !checksum_failure)
     145      1488877 :             return true;
     146              :     }
     147              : 
     148              :     /* Check all-zeroes case */
     149         5248 :     pagebytes = (size_t *) page;
     150              : 
     151         5248 :     if (pg_memory_is_all_zeros(pagebytes, BLCKSZ))
     152         5140 :         return true;
     153              : 
     154              :     /*
     155              :      * Throw a WARNING/LOG, as instructed by PIV_LOG_*, if the checksum fails,
     156              :      * but only after we've checked for the all-zeroes case.
     157              :      */
     158          108 :     if (checksum_failure)
     159              :     {
     160           32 :         if ((flags & (PIV_LOG_WARNING | PIV_LOG_LOG)) != 0)
     161           32 :             ereport(flags & PIV_LOG_WARNING ? WARNING : LOG,
     162              :                     (errcode(ERRCODE_DATA_CORRUPTED),
     163              :                      errmsg("page verification failed, calculated checksum %u but expected %u%s",
     164              :                             checksum, p->pd_checksum,
     165              :                             (flags & PIV_ZERO_BUFFERS_ON_ERROR ? ", buffer will be zeroed" : ""))));
     166              : 
     167           32 :         if (header_sane && (flags & PIV_IGNORE_CHECKSUM_FAILURE))
     168           12 :             return true;
     169              :     }
     170              : 
     171           96 :     return false;
     172              : }
     173              : 
     174              : 
     175              : /*
     176              :  *  PageAddItemExtended
     177              :  *
     178              :  *  Add an item to a page.  Return value is the offset at which it was
     179              :  *  inserted, or InvalidOffsetNumber if the item is not inserted for any
     180              :  *  reason.  A WARNING is issued indicating the reason for the refusal.
     181              :  *
     182              :  *  offsetNumber must be either InvalidOffsetNumber to specify finding a
     183              :  *  free line pointer, or a value between FirstOffsetNumber and one past
     184              :  *  the last existing item, to specify using that particular line pointer.
     185              :  *
     186              :  *  If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
     187              :  *  the item at the specified offsetNumber, which must be either a
     188              :  *  currently-unused line pointer, or one past the last existing item.
     189              :  *
     190              :  *  If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
     191              :  *  the item at the specified offsetNumber, moving existing items later
     192              :  *  in the array to make room.
     193              :  *
     194              :  *  If offsetNumber is not valid, then assign a slot by finding the first
     195              :  *  one that is both unused and deallocated.
     196              :  *
     197              :  *  If flag PAI_IS_HEAP is set, we enforce that there can't be more than
     198              :  *  MaxHeapTuplesPerPage line pointers on the page.
     199              :  *
     200              :  *  !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
     201              :  */
     202              : OffsetNumber
     203     46707257 : PageAddItemExtended(Page page,
     204              :                     const void *item,
     205              :                     Size size,
     206              :                     OffsetNumber offsetNumber,
     207              :                     int flags)
     208              : {
     209     46707257 :     PageHeader  phdr = (PageHeader) page;
     210              :     Size        alignedSize;
     211              :     int         lower;
     212              :     int         upper;
     213              :     ItemId      itemId;
     214              :     OffsetNumber limit;
     215     46707257 :     bool        needshuffle = false;
     216              : 
     217              :     /*
     218              :      * Be wary about corrupted page pointers
     219              :      */
     220     46707257 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
     221     46707257 :         phdr->pd_lower > phdr->pd_upper ||
     222     46707257 :         phdr->pd_upper > phdr->pd_special ||
     223     46707257 :         phdr->pd_special > BLCKSZ)
     224            0 :         ereport(PANIC,
     225              :                 (errcode(ERRCODE_DATA_CORRUPTED),
     226              :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
     227              :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
     228              : 
     229              :     /*
     230              :      * Select offsetNumber to place the new item at
     231              :      */
     232     46707257 :     limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
     233              : 
     234              :     /* was offsetNumber passed in? */
     235     46707257 :     if (OffsetNumberIsValid(offsetNumber))
     236              :     {
     237              :         /* yes, check it */
     238     28971512 :         if ((flags & PAI_OVERWRITE) != 0)
     239              :         {
     240      1636693 :             if (offsetNumber < limit)
     241              :             {
     242        23444 :                 itemId = PageGetItemId(page, offsetNumber);
     243        23444 :                 if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
     244              :                 {
     245            0 :                     elog(WARNING, "will not overwrite a used ItemId");
     246            0 :                     return InvalidOffsetNumber;
     247              :                 }
     248              :             }
     249              :         }
     250              :         else
     251              :         {
     252     27334819 :             if (offsetNumber < limit)
     253      4174129 :                 needshuffle = true; /* need to move existing linp's */
     254              :         }
     255              :     }
     256              :     else
     257              :     {
     258              :         /* offsetNumber was not passed in, so find a free slot */
     259              :         /* if no free slot, we'll put it at limit (1st open slot) */
     260     17735745 :         if (PageHasFreeLinePointers(page))
     261              :         {
     262              :             /*
     263              :              * Scan line pointer array to locate a "recyclable" (unused)
     264              :              * ItemId.
     265              :              *
     266              :              * Always use earlier items first.  PageTruncateLinePointerArray
     267              :              * can only truncate unused items when they appear as a contiguous
     268              :              * group at the end of the line pointer array.
     269              :              */
     270       156437 :             for (offsetNumber = FirstOffsetNumber;
     271     10206525 :                  offsetNumber < limit;   /* limit is maxoff+1 */
     272     10050088 :                  offsetNumber++)
     273              :             {
     274     10196367 :                 itemId = PageGetItemId(page, offsetNumber);
     275              : 
     276              :                 /*
     277              :                  * We check for no storage as well, just to be paranoid;
     278              :                  * unused items should never have storage.  Assert() that the
     279              :                  * invariant is respected too.
     280              :                  */
     281              :                 Assert(ItemIdIsUsed(itemId) || !ItemIdHasStorage(itemId));
     282              : 
     283     10196367 :                 if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
     284       146279 :                     break;
     285              :             }
     286       156437 :             if (offsetNumber >= limit)
     287              :             {
     288              :                 /* the hint is wrong, so reset it */
     289        10158 :                 PageClearHasFreeLinePointers(page);
     290              :             }
     291              :         }
     292              :         else
     293              :         {
     294              :             /* don't bother searching if hint says there's no free slot */
     295     17579308 :             offsetNumber = limit;
     296              :         }
     297              :     }
     298              : 
     299              :     /* Reject placing items beyond the first unused line pointer */
     300     46707257 :     if (offsetNumber > limit)
     301              :     {
     302            0 :         elog(WARNING, "specified item offset is too large");
     303            0 :         return InvalidOffsetNumber;
     304              :     }
     305              : 
     306              :     /* Reject placing items beyond heap boundary, if heap */
     307     46707257 :     if ((flags & PAI_IS_HEAP) != 0 && offsetNumber > MaxHeapTuplesPerPage)
     308              :     {
     309            0 :         elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
     310            0 :         return InvalidOffsetNumber;
     311              :     }
     312              : 
     313              :     /*
     314              :      * Compute new lower and upper pointers for page, see if it'll fit.
     315              :      *
     316              :      * Note: do arithmetic as signed ints, to avoid mistakes if, say,
     317              :      * alignedSize > pd_upper.
     318              :      */
     319     46707257 :     if (offsetNumber == limit || needshuffle)
     320     46537534 :         lower = phdr->pd_lower + sizeof(ItemIdData);
     321              :     else
     322       169723 :         lower = phdr->pd_lower;
     323              : 
     324     46707257 :     alignedSize = MAXALIGN(size);
     325              : 
     326     46707257 :     upper = (int) phdr->pd_upper - (int) alignedSize;
     327              : 
     328     46707257 :     if (lower > upper)
     329            0 :         return InvalidOffsetNumber;
     330              : 
     331              :     /*
     332              :      * OK to insert the item.  First, shuffle the existing pointers if needed.
     333              :      */
     334     46707257 :     itemId = PageGetItemId(page, offsetNumber);
     335              : 
     336     46707257 :     if (needshuffle)
     337      4174129 :         memmove(itemId + 1, itemId,
     338      4174129 :                 (limit - offsetNumber) * sizeof(ItemIdData));
     339              : 
     340              :     /* set the line pointer */
     341     46707257 :     ItemIdSetNormal(itemId, upper, size);
     342              : 
     343              :     /*
     344              :      * Items normally contain no uninitialized bytes.  Core bufpage consumers
     345              :      * conform, but this is not a necessary coding rule; a new index AM could
     346              :      * opt to depart from it.  However, data type input functions and other
     347              :      * C-language functions that synthesize datums should initialize all
     348              :      * bytes; datumIsEqual() relies on this.  Testing here, along with the
     349              :      * similar check in printtup(), helps to catch such mistakes.
     350              :      *
     351              :      * Values of the "name" type retrieved via index-only scans may contain
     352              :      * uninitialized bytes; see comment in btrescan().  Valgrind will report
     353              :      * this as an error, but it is safe to ignore.
     354              :      */
     355              :     VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
     356              : 
     357              :     /* copy the item's data onto the page */
     358     46707257 :     memcpy((char *) page + upper, item, size);
     359              : 
     360              :     /* adjust page header */
     361     46707257 :     phdr->pd_lower = (LocationIndex) lower;
     362     46707257 :     phdr->pd_upper = (LocationIndex) upper;
     363              : 
     364     46707257 :     return offsetNumber;
     365              : }
     366              : 
     367              : 
     368              : /*
     369              :  * PageGetTempPage
     370              :  *      Get a temporary page in local memory for special processing.
     371              :  *      The returned page is not initialized at all; caller must do that.
     372              :  */
     373              : Page
     374          135 : PageGetTempPage(const PageData *page)
     375              : {
     376              :     Size        pageSize;
     377              :     Page        temp;
     378              : 
     379          135 :     pageSize = PageGetPageSize(page);
     380          135 :     temp = (Page) palloc(pageSize);
     381              : 
     382          135 :     return temp;
     383              : }
     384              : 
     385              : /*
     386              :  * PageGetTempPageCopy
     387              :  *      Get a temporary page in local memory for special processing.
     388              :  *      The page is initialized by copying the contents of the given page.
     389              :  */
     390              : Page
     391         7225 : PageGetTempPageCopy(const PageData *page)
     392              : {
     393              :     Size        pageSize;
     394              :     Page        temp;
     395              : 
     396         7225 :     pageSize = PageGetPageSize(page);
     397         7225 :     temp = (Page) palloc(pageSize);
     398              : 
     399         7225 :     memcpy(temp, page, pageSize);
     400              : 
     401         7225 :     return temp;
     402              : }
     403              : 
     404              : /*
     405              :  * PageGetTempPageCopySpecial
     406              :  *      Get a temporary page in local memory for special processing.
     407              :  *      The page is PageInit'd with the same special-space size as the
     408              :  *      given page, and the special space is copied from the given page.
     409              :  */
     410              : Page
     411        38041 : PageGetTempPageCopySpecial(const PageData *page)
     412              : {
     413              :     Size        pageSize;
     414              :     Page        temp;
     415              : 
     416        38041 :     pageSize = PageGetPageSize(page);
     417        38041 :     temp = (Page) palloc(pageSize);
     418              : 
     419        38041 :     PageInit(temp, pageSize, PageGetSpecialSize(page));
     420       114123 :     memcpy(PageGetSpecialPointer(temp),
     421        38041 :            PageGetSpecialPointer(page),
     422        38041 :            PageGetSpecialSize(page));
     423              : 
     424        38041 :     return temp;
     425              : }
     426              : 
     427              : /*
     428              :  * PageRestoreTempPage
     429              :  *      Copy temporary page back to permanent page after special processing
     430              :  *      and release the temporary page.
     431              :  */
     432              : void
     433        35945 : PageRestoreTempPage(Page tempPage, Page oldPage)
     434              : {
     435              :     Size        pageSize;
     436              : 
     437        35945 :     pageSize = PageGetPageSize(tempPage);
     438        35945 :     memcpy(oldPage, tempPage, pageSize);
     439              : 
     440        35945 :     pfree(tempPage);
     441        35945 : }
     442              : 
     443              : /*
     444              :  * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
     445              :  */
     446              : typedef struct itemIdCompactData
     447              : {
     448              :     uint16      offsetindex;    /* linp array index */
     449              :     int16       itemoff;        /* page offset of item data */
     450              :     uint16      alignedlen;     /* MAXALIGN(item data len) */
     451              : } itemIdCompactData;
     452              : typedef itemIdCompactData *itemIdCompact;
     453              : 
     454              : /*
     455              :  * After removing or marking some line pointers unused, move the tuples to
     456              :  * remove the gaps caused by the removed items and reorder them back into
     457              :  * reverse line pointer order in the page.
     458              :  *
     459              :  * This function can often be fairly hot, so it pays to take some measures to
     460              :  * make it as optimal as possible.
     461              :  *
     462              :  * Callers may pass 'presorted' as true if the 'itemidbase' array is sorted in
     463              :  * descending order of itemoff.  When this is true we can just memmove()
     464              :  * tuples towards the end of the page.  This is quite a common case as it's
     465              :  * the order that tuples are initially inserted into pages.  When we call this
     466              :  * function to defragment the tuples in the page then any new line pointers
     467              :  * added to the page will keep that presorted order, so hitting this case is
     468              :  * still very common for tables that are commonly updated.
     469              :  *
     470              :  * When the 'itemidbase' array is not presorted then we're unable to just
     471              :  * memmove() tuples around freely.  Doing so could cause us to overwrite the
     472              :  * memory belonging to a tuple we've not moved yet.  In this case, we copy all
     473              :  * the tuples that need to be moved into a temporary buffer.  We can then
     474              :  * simply memcpy() out of that temp buffer back into the page at the correct
     475              :  * location.  Tuples are copied back into the page in the same order as the
     476              :  * 'itemidbase' array, so we end up reordering the tuples back into reverse
     477              :  * line pointer order.  This will increase the chances of hitting the
     478              :  * presorted case the next time around.
     479              :  *
     480              :  * Callers must ensure that nitems is > 0
     481              :  */
     482              : static void
     483        77107 : compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
     484              : {
     485        77107 :     PageHeader  phdr = (PageHeader) page;
     486              :     Offset      upper;
     487              :     Offset      copy_tail;
     488              :     Offset      copy_head;
     489              :     itemIdCompact itemidptr;
     490              :     int         i;
     491              : 
     492              :     /* Code within will not work correctly if nitems == 0 */
     493              :     Assert(nitems > 0);
     494              : 
     495        77107 :     if (presorted)
     496              :     {
     497              : 
     498              : #ifdef USE_ASSERT_CHECKING
     499              :         {
     500              :             /*
     501              :              * Verify we've not gotten any new callers that are incorrectly
     502              :              * passing a true presorted value.
     503              :              */
     504              :             Offset      lastoff = phdr->pd_special;
     505              : 
     506              :             for (i = 0; i < nitems; i++)
     507              :             {
     508              :                 itemidptr = &itemidbase[i];
     509              : 
     510              :                 Assert(lastoff > itemidptr->itemoff);
     511              : 
     512              :                 lastoff = itemidptr->itemoff;
     513              :             }
     514              :         }
     515              : #endif                          /* USE_ASSERT_CHECKING */
     516              : 
     517              :         /*
     518              :          * 'itemidbase' is already in the optimal order, i.e, lower item
     519              :          * pointers have a higher offset.  This allows us to memmove() the
     520              :          * tuples up to the end of the page without having to worry about
     521              :          * overwriting other tuples that have not been moved yet.
     522              :          *
     523              :          * There's a good chance that there are tuples already right at the
     524              :          * end of the page that we can simply skip over because they're
     525              :          * already in the correct location within the page.  We'll do that
     526              :          * first...
     527              :          */
     528        56992 :         upper = phdr->pd_special;
     529        56992 :         i = 0;
     530              :         do
     531              :         {
     532       863169 :             itemidptr = &itemidbase[i];
     533       863169 :             if (upper != itemidptr->itemoff + itemidptr->alignedlen)
     534        51207 :                 break;
     535       811962 :             upper -= itemidptr->alignedlen;
     536              : 
     537       811962 :             i++;
     538       811962 :         } while (i < nitems);
     539              : 
     540              :         /*
     541              :          * Now that we've found the first tuple that needs to be moved, we can
     542              :          * do the tuple compactification.  We try and make the least number of
     543              :          * memmove() calls and only call memmove() when there's a gap.  When
     544              :          * we see a gap we just move all tuples after the gap up until the
     545              :          * point of the last move operation.
     546              :          */
     547        56992 :         copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
     548      1290702 :         for (; i < nitems; i++)
     549              :         {
     550              :             ItemId      lp;
     551              : 
     552      1233710 :             itemidptr = &itemidbase[i];
     553      1233710 :             lp = PageGetItemId(page, itemidptr->offsetindex + 1);
     554              : 
     555      1233710 :             if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
     556              :             {
     557       138985 :                 memmove((char *) page + upper,
     558       138985 :                         page + copy_head,
     559       138985 :                         copy_tail - copy_head);
     560              : 
     561              :                 /*
     562              :                  * We've now moved all tuples already seen, but not the
     563              :                  * current tuple, so we set the copy_tail to the end of this
     564              :                  * tuple so it can be moved in another iteration of the loop.
     565              :                  */
     566       138985 :                 copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
     567              :             }
     568              :             /* shift the target offset down by the length of this tuple */
     569      1233710 :             upper -= itemidptr->alignedlen;
     570              :             /* point the copy_head to the start of this tuple */
     571      1233710 :             copy_head = itemidptr->itemoff;
     572              : 
     573              :             /* update the line pointer to reference the new offset */
     574      1233710 :             lp->lp_off = upper;
     575              :         }
     576              : 
     577              :         /* move the remaining tuples. */
     578        56992 :         memmove((char *) page + upper,
     579        56992 :                 page + copy_head,
     580        56992 :                 copy_tail - copy_head);
     581              :     }
     582              :     else
     583              :     {
     584              :         PGAlignedBlock scratch;
     585        20115 :         char       *scratchptr = scratch.data;
     586              : 
     587              :         /*
     588              :          * Non-presorted case:  The tuples in the itemidbase array may be in
     589              :          * any order.  So, in order to move these to the end of the page we
     590              :          * must make a temp copy of each tuple that needs to be moved before
     591              :          * we copy them back into the page at the new offset.
     592              :          *
     593              :          * If a large percentage of tuples have been pruned (>75%) then we'll
     594              :          * copy these into the temp buffer tuple-by-tuple, otherwise, we'll
     595              :          * just do a single memcpy() for all tuples that need to be moved.
     596              :          * When so many tuples have been removed there's likely to be a lot of
     597              :          * gaps and it's unlikely that many non-movable tuples remain at the
     598              :          * end of the page.
     599              :          */
     600        20115 :         if (nitems < PageGetMaxOffsetNumber(page) / 4)
     601              :         {
     602         1037 :             i = 0;
     603              :             do
     604              :             {
     605        22321 :                 itemidptr = &itemidbase[i];
     606        22321 :                 memcpy(scratchptr + itemidptr->itemoff, page + itemidptr->itemoff,
     607        22321 :                        itemidptr->alignedlen);
     608        22321 :                 i++;
     609        22321 :             } while (i < nitems);
     610              : 
     611              :             /* Set things up for the compactification code below */
     612         1037 :             i = 0;
     613         1037 :             itemidptr = &itemidbase[0];
     614         1037 :             upper = phdr->pd_special;
     615              :         }
     616              :         else
     617              :         {
     618        19078 :             upper = phdr->pd_special;
     619              : 
     620              :             /*
     621              :              * Many tuples are likely to already be in the correct location.
     622              :              * There's no need to copy these into the temp buffer.  Instead
     623              :              * we'll just skip forward in the itemidbase array to the position
     624              :              * that we do need to move tuples from so that the code below just
     625              :              * leaves these ones alone.
     626              :              */
     627        19078 :             i = 0;
     628              :             do
     629              :             {
     630       526937 :                 itemidptr = &itemidbase[i];
     631       526937 :                 if (upper != itemidptr->itemoff + itemidptr->alignedlen)
     632        19078 :                     break;
     633       507859 :                 upper -= itemidptr->alignedlen;
     634              : 
     635       507859 :                 i++;
     636       507859 :             } while (i < nitems);
     637              : 
     638              :             /* Copy all tuples that need to be moved into the temp buffer */
     639        19078 :             memcpy(scratchptr + phdr->pd_upper,
     640        19078 :                    page + phdr->pd_upper,
     641        19078 :                    upper - phdr->pd_upper);
     642              :         }
     643              : 
     644              :         /*
     645              :          * Do the tuple compactification.  itemidptr is already pointing to
     646              :          * the first tuple that we're going to move.  Here we collapse the
     647              :          * memcpy calls for adjacent tuples into a single call.  This is done
     648              :          * by delaying the memcpy call until we find a gap that needs to be
     649              :          * closed.
     650              :          */
     651        20115 :         copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
     652      2173794 :         for (; i < nitems; i++)
     653              :         {
     654              :             ItemId      lp;
     655              : 
     656      2153679 :             itemidptr = &itemidbase[i];
     657      2153679 :             lp = PageGetItemId(page, itemidptr->offsetindex + 1);
     658              : 
     659              :             /* copy pending tuples when we detect a gap */
     660      2153679 :             if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
     661              :             {
     662       600471 :                 memcpy((char *) page + upper,
     663       600471 :                        scratchptr + copy_head,
     664       600471 :                        copy_tail - copy_head);
     665              : 
     666              :                 /*
     667              :                  * We've now copied all tuples already seen, but not the
     668              :                  * current tuple, so we set the copy_tail to the end of this
     669              :                  * tuple.
     670              :                  */
     671       600471 :                 copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
     672              :             }
     673              :             /* shift the target offset down by the length of this tuple */
     674      2153679 :             upper -= itemidptr->alignedlen;
     675              :             /* point the copy_head to the start of this tuple */
     676      2153679 :             copy_head = itemidptr->itemoff;
     677              : 
     678              :             /* update the line pointer to reference the new offset */
     679      2153679 :             lp->lp_off = upper;
     680              :         }
     681              : 
     682              :         /* Copy the remaining chunk */
     683        20115 :         memcpy((char *) page + upper,
     684        20115 :                scratchptr + copy_head,
     685        20115 :                copy_tail - copy_head);
     686              :     }
     687              : 
     688        77107 :     phdr->pd_upper = upper;
     689        77107 : }
     690              : 
     691              : /*
     692              :  * PageRepairFragmentation
     693              :  *
     694              :  * Frees fragmented space on a heap page following pruning.
     695              :  *
     696              :  * This routine is usable for heap pages only, but see PageIndexMultiDelete.
     697              :  *
     698              :  * This routine removes unused line pointers from the end of the line pointer
     699              :  * array.  This is possible when dead heap-only tuples get removed by pruning,
     700              :  * especially when there were HOT chains with several tuples each beforehand.
     701              :  *
     702              :  * Caller had better have a full cleanup lock on page's buffer.  As a side
     703              :  * effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
     704              :  * needed.  Caller might also need to account for a reduction in the length of
     705              :  * the line pointer array following array truncation.
     706              :  */
     707              : void
     708        77436 : PageRepairFragmentation(Page page)
     709              : {
     710        77436 :     Offset      pd_lower = ((PageHeader) page)->pd_lower;
     711        77436 :     Offset      pd_upper = ((PageHeader) page)->pd_upper;
     712        77436 :     Offset      pd_special = ((PageHeader) page)->pd_special;
     713              :     Offset      last_offset;
     714              :     itemIdCompactData itemidbase[MaxHeapTuplesPerPage];
     715              :     itemIdCompact itemidptr;
     716              :     ItemId      lp;
     717              :     int         nline,
     718              :                 nstorage,
     719              :                 nunused;
     720        77436 :     OffsetNumber finalusedlp = InvalidOffsetNumber;
     721              :     int         i;
     722              :     Size        totallen;
     723        77436 :     bool        presorted = true;   /* For now */
     724              : 
     725              :     /*
     726              :      * It's worth the trouble to be more paranoid here than in most places,
     727              :      * because we are about to reshuffle data in (what is usually) a shared
     728              :      * disk buffer.  If we aren't careful then corrupted pointers, lengths,
     729              :      * etc could cause us to clobber adjacent disk buffers, spreading the data
     730              :      * loss further.  So, check everything.
     731              :      */
     732        77436 :     if (pd_lower < SizeOfPageHeaderData ||
     733        77436 :         pd_lower > pd_upper ||
     734        77436 :         pd_upper > pd_special ||
     735        77436 :         pd_special > BLCKSZ ||
     736        77436 :         pd_special != MAXALIGN(pd_special))
     737            0 :         ereport(ERROR,
     738              :                 (errcode(ERRCODE_DATA_CORRUPTED),
     739              :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
     740              :                         pd_lower, pd_upper, pd_special)));
     741              : 
     742              :     /*
     743              :      * Run through the line pointer array and collect data about live items.
     744              :      */
     745        77436 :     nline = PageGetMaxOffsetNumber(page);
     746        77436 :     itemidptr = itemidbase;
     747        77436 :     nunused = totallen = 0;
     748        77436 :     last_offset = pd_special;
     749      7874529 :     for (i = FirstOffsetNumber; i <= nline; i++)
     750              :     {
     751      7797093 :         lp = PageGetItemId(page, i);
     752      7797093 :         if (ItemIdIsUsed(lp))
     753              :         {
     754      7583208 :             if (ItemIdHasStorage(lp))
     755              :             {
     756      1917036 :                 itemidptr->offsetindex = i - 1;
     757      1917036 :                 itemidptr->itemoff = ItemIdGetOffset(lp);
     758              : 
     759      1917036 :                 if (last_offset > itemidptr->itemoff)
     760      1644225 :                     last_offset = itemidptr->itemoff;
     761              :                 else
     762       272811 :                     presorted = false;
     763              : 
     764      1917036 :                 if (unlikely(itemidptr->itemoff < (int) pd_upper ||
     765              :                              itemidptr->itemoff >= (int) pd_special))
     766            0 :                     ereport(ERROR,
     767              :                             (errcode(ERRCODE_DATA_CORRUPTED),
     768              :                              errmsg("corrupted line pointer: %u",
     769              :                                     itemidptr->itemoff)));
     770      1917036 :                 itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
     771      1917036 :                 totallen += itemidptr->alignedlen;
     772      1917036 :                 itemidptr++;
     773              :             }
     774              : 
     775      7583208 :             finalusedlp = i;    /* Could be the final non-LP_UNUSED item */
     776              :         }
     777              :         else
     778              :         {
     779              :             /* Unused entries should have lp_len = 0, but make sure */
     780              :             Assert(!ItemIdHasStorage(lp));
     781       213885 :             ItemIdSetUnused(lp);
     782       213885 :             nunused++;
     783              :         }
     784              :     }
     785              : 
     786        77436 :     nstorage = itemidptr - itemidbase;
     787        77436 :     if (nstorage == 0)
     788              :     {
     789              :         /* Page is completely empty, so just reset it quickly */
     790        22970 :         ((PageHeader) page)->pd_upper = pd_special;
     791              :     }
     792              :     else
     793              :     {
     794              :         /* Need to compact the page the hard way */
     795        54466 :         if (totallen > (Size) (pd_special - pd_lower))
     796            0 :             ereport(ERROR,
     797              :                     (errcode(ERRCODE_DATA_CORRUPTED),
     798              :                      errmsg("corrupted item lengths: total %zu, available space %u",
     799              :                             totallen, pd_special - pd_lower)));
     800              : 
     801        54466 :         compactify_tuples(itemidbase, nstorage, page, presorted);
     802              :     }
     803              : 
     804        77436 :     if (finalusedlp != nline)
     805              :     {
     806              :         /* The last line pointer is not the last used line pointer */
     807         2300 :         int         nunusedend = nline - finalusedlp;
     808              : 
     809              :         Assert(nunused >= nunusedend && nunusedend > 0);
     810              : 
     811              :         /* remove trailing unused line pointers from the count */
     812         2300 :         nunused -= nunusedend;
     813              :         /* truncate the line pointer array */
     814         2300 :         ((PageHeader) page)->pd_lower -= (sizeof(ItemIdData) * nunusedend);
     815              :     }
     816              : 
     817              :     /* Set hint bit for PageAddItemExtended */
     818        77436 :     if (nunused > 0)
     819        17242 :         PageSetHasFreeLinePointers(page);
     820              :     else
     821        60194 :         PageClearHasFreeLinePointers(page);
     822        77436 : }
     823              : 
     824              : /*
     825              :  * PageTruncateLinePointerArray
     826              :  *
     827              :  * Removes unused line pointers at the end of the line pointer array.
     828              :  *
     829              :  * This routine is usable for heap pages only.  It is called by VACUUM during
     830              :  * its second pass over the heap.  We expect at least one LP_UNUSED line
     831              :  * pointer on the page (if VACUUM didn't have an LP_DEAD item on the page that
     832              :  * it just set to LP_UNUSED then it should not call here).
     833              :  *
     834              :  * We avoid truncating the line pointer array to 0 items, if necessary by
     835              :  * leaving behind a single remaining LP_UNUSED item.  This is a little
     836              :  * arbitrary, but it seems like a good idea to avoid leaving a PageIsEmpty()
     837              :  * page behind.
     838              :  *
     839              :  * Caller can have either an exclusive lock or a full cleanup lock on page's
     840              :  * buffer.  The page's PD_HAS_FREE_LINES hint bit will be set or unset based
     841              :  * on whether or not we leave behind any remaining LP_UNUSED items.
     842              :  */
     843              : void
     844        16209 : PageTruncateLinePointerArray(Page page)
     845              : {
     846        16209 :     PageHeader  phdr = (PageHeader) page;
     847        16209 :     bool        countdone = false,
     848        16209 :                 sethint = false;
     849        16209 :     int         nunusedend = 0;
     850              : 
     851              :     /* Scan line pointer array back-to-front */
     852      1068454 :     for (int i = PageGetMaxOffsetNumber(page); i >= FirstOffsetNumber; i--)
     853              :     {
     854      1067931 :         ItemId      lp = PageGetItemId(page, i);
     855              : 
     856      1067931 :         if (!countdone && i > FirstOffsetNumber)
     857              :         {
     858              :             /*
     859              :              * Still determining which line pointers from the end of the array
     860              :              * will be truncated away.  Either count another line pointer as
     861              :              * safe to truncate, or notice that it's not safe to truncate
     862              :              * additional line pointers (stop counting line pointers).
     863              :              */
     864       970620 :             if (!ItemIdIsUsed(lp))
     865       963195 :                 nunusedend++;
     866              :             else
     867         7425 :                 countdone = true;
     868              :         }
     869              :         else
     870              :         {
     871              :             /*
     872              :              * Once we've stopped counting we still need to figure out if
     873              :              * there are any remaining LP_UNUSED line pointers somewhere more
     874              :              * towards the front of the array.
     875              :              */
     876        97311 :             if (!ItemIdIsUsed(lp))
     877              :             {
     878              :                 /*
     879              :                  * This is an unused line pointer that we won't be truncating
     880              :                  * away -- so there is at least one.  Set hint on page.
     881              :                  */
     882        15686 :                 sethint = true;
     883        15686 :                 break;
     884              :             }
     885              :         }
     886              :     }
     887              : 
     888        16209 :     if (nunusedend > 0)
     889              :     {
     890        11172 :         phdr->pd_lower -= sizeof(ItemIdData) * nunusedend;
     891              : 
     892              : #ifdef CLOBBER_FREED_MEMORY
     893              :         memset((char *) page + phdr->pd_lower, 0x7F,
     894              :                sizeof(ItemIdData) * nunusedend);
     895              : #endif
     896              :     }
     897              :     else
     898              :         Assert(sethint);
     899              : 
     900              :     /* Set hint bit for PageAddItemExtended */
     901        16209 :     if (sethint)
     902        15686 :         PageSetHasFreeLinePointers(page);
     903              :     else
     904          523 :         PageClearHasFreeLinePointers(page);
     905        16209 : }
     906              : 
     907              : /*
     908              :  * PageGetFreeSpace
     909              :  *      Returns the size of the free (allocatable) space on a page,
     910              :  *      reduced by the space needed for a new line pointer.
     911              :  *
     912              :  * Note: this should usually only be used on index pages.  Use
     913              :  * PageGetHeapFreeSpace on heap pages.
     914              :  */
     915              : Size
     916     42780920 : PageGetFreeSpace(const PageData *page)
     917              : {
     918     42780920 :     const PageHeaderData *phdr = (const PageHeaderData *) page;
     919              :     int         space;
     920              : 
     921              :     /*
     922              :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     923              :      * pd_upper.
     924              :      */
     925     42780920 :     space = (int) phdr->pd_upper - (int) phdr->pd_lower;
     926              : 
     927     42780920 :     if (space < (int) sizeof(ItemIdData))
     928         9864 :         return 0;
     929     42771056 :     space -= sizeof(ItemIdData);
     930              : 
     931     42771056 :     return (Size) space;
     932              : }
     933              : 
     934              : /*
     935              :  * PageGetFreeSpaceForMultipleTuples
     936              :  *      Returns the size of the free (allocatable) space on a page,
     937              :  *      reduced by the space needed for multiple new line pointers.
     938              :  *
     939              :  * Note: this should usually only be used on index pages.  Use
     940              :  * PageGetHeapFreeSpace on heap pages.
     941              :  */
     942              : Size
     943        87607 : PageGetFreeSpaceForMultipleTuples(const PageData *page, int ntups)
     944              : {
     945        87607 :     const PageHeaderData *phdr = (const PageHeaderData *) page;
     946              :     int         space;
     947              : 
     948              :     /*
     949              :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     950              :      * pd_upper.
     951              :      */
     952        87607 :     space = (int) phdr->pd_upper - (int) phdr->pd_lower;
     953              : 
     954        87607 :     if (space < (int) (ntups * sizeof(ItemIdData)))
     955            0 :         return 0;
     956        87607 :     space -= ntups * sizeof(ItemIdData);
     957              : 
     958        87607 :     return (Size) space;
     959              : }
     960              : 
     961              : /*
     962              :  * PageGetExactFreeSpace
     963              :  *      Returns the size of the free (allocatable) space on a page,
     964              :  *      without any consideration for adding/removing line pointers.
     965              :  */
     966              : Size
     967      2147306 : PageGetExactFreeSpace(const PageData *page)
     968              : {
     969      2147306 :     const PageHeaderData *phdr = (const PageHeaderData *) page;
     970              :     int         space;
     971              : 
     972              :     /*
     973              :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     974              :      * pd_upper.
     975              :      */
     976      2147306 :     space = (int) phdr->pd_upper - (int) phdr->pd_lower;
     977              : 
     978      2147306 :     if (space < 0)
     979            0 :         return 0;
     980              : 
     981      2147306 :     return (Size) space;
     982              : }
     983              : 
     984              : 
     985              : /*
     986              :  * PageGetHeapFreeSpace
     987              :  *      Returns the size of the free (allocatable) space on a page,
     988              :  *      reduced by the space needed for a new line pointer.
     989              :  *
     990              :  * The difference between this and PageGetFreeSpace is that this will return
     991              :  * zero if there are already MaxHeapTuplesPerPage line pointers in the page
     992              :  * and none are free.  We use this to enforce that no more than
     993              :  * MaxHeapTuplesPerPage line pointers are created on a heap page.  (Although
     994              :  * no more tuples than that could fit anyway, in the presence of redirected
     995              :  * or dead line pointers it'd be possible to have too many line pointers.
     996              :  * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
     997              :  * on the number of line pointers, we make this extra check.)
     998              :  */
     999              : Size
    1000     23203097 : PageGetHeapFreeSpace(const PageData *page)
    1001              : {
    1002              :     Size        space;
    1003              : 
    1004     23203097 :     space = PageGetFreeSpace(page);
    1005     23203097 :     if (space > 0)
    1006              :     {
    1007              :         OffsetNumber offnum,
    1008              :                     nline;
    1009              : 
    1010              :         /*
    1011              :          * Are there already MaxHeapTuplesPerPage line pointers in the page?
    1012              :          */
    1013     23175918 :         nline = PageGetMaxOffsetNumber(page);
    1014     23175918 :         if (nline >= MaxHeapTuplesPerPage)
    1015              :         {
    1016         4295 :             if (PageHasFreeLinePointers(page))
    1017              :             {
    1018              :                 /*
    1019              :                  * Since this is just a hint, we must confirm that there is
    1020              :                  * indeed a free line pointer
    1021              :                  */
    1022       407156 :                 for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
    1023              :                 {
    1024       407041 :                     ItemId      lp = PageGetItemId(unconstify(PageData *, page), offnum);
    1025              : 
    1026       407041 :                     if (!ItemIdIsUsed(lp))
    1027         1667 :                         break;
    1028              :                 }
    1029              : 
    1030         1782 :                 if (offnum > nline)
    1031              :                 {
    1032              :                     /*
    1033              :                      * The hint is wrong, but we can't clear it here since we
    1034              :                      * don't have the ability to mark the page dirty.
    1035              :                      */
    1036          115 :                     space = 0;
    1037              :                 }
    1038              :             }
    1039              :             else
    1040              :             {
    1041              :                 /*
    1042              :                  * Although the hint might be wrong, PageAddItem will believe
    1043              :                  * it anyway, so we must believe it too.
    1044              :                  */
    1045         2513 :                 space = 0;
    1046              :             }
    1047              :         }
    1048              :     }
    1049     23203097 :     return space;
    1050              : }
    1051              : 
    1052              : 
    1053              : /*
    1054              :  * PageIndexTupleDelete
    1055              :  *
    1056              :  * This routine does the work of removing a tuple from an index page.
    1057              :  *
    1058              :  * Unlike heap pages, we compact out the line pointer for the removed tuple.
    1059              :  */
    1060              : void
    1061       627138 : PageIndexTupleDelete(Page page, OffsetNumber offnum)
    1062              : {
    1063       627138 :     PageHeader  phdr = (PageHeader) page;
    1064              :     char       *addr;
    1065              :     ItemId      tup;
    1066              :     Size        size;
    1067              :     unsigned    offset;
    1068              :     int         nbytes;
    1069              :     int         offidx;
    1070              :     int         nline;
    1071              : 
    1072              :     /*
    1073              :      * As with PageRepairFragmentation, paranoia seems justified.
    1074              :      */
    1075       627138 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1076       627138 :         phdr->pd_lower > phdr->pd_upper ||
    1077       627138 :         phdr->pd_upper > phdr->pd_special ||
    1078       627138 :         phdr->pd_special > BLCKSZ ||
    1079       627138 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1080            0 :         ereport(ERROR,
    1081              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1082              :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1083              :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1084              : 
    1085       627138 :     nline = PageGetMaxOffsetNumber(page);
    1086       627138 :     if ((int) offnum <= 0 || (int) offnum > nline)
    1087            0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1088              : 
    1089              :     /* change offset number to offset index */
    1090       627138 :     offidx = offnum - 1;
    1091              : 
    1092       627138 :     tup = PageGetItemId(page, offnum);
    1093              :     Assert(ItemIdHasStorage(tup));
    1094       627138 :     size = ItemIdGetLength(tup);
    1095       627138 :     offset = ItemIdGetOffset(tup);
    1096              : 
    1097       627138 :     if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
    1098       627138 :         offset != MAXALIGN(offset))
    1099            0 :         ereport(ERROR,
    1100              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1101              :                  errmsg("corrupted line pointer: offset = %u, size = %zu",
    1102              :                         offset, size)));
    1103              : 
    1104              :     /* Amount of space to actually be deleted */
    1105       627138 :     size = MAXALIGN(size);
    1106              : 
    1107              :     /*
    1108              :      * First, we want to get rid of the pd_linp entry for the index tuple. We
    1109              :      * copy all subsequent linp's back one slot in the array. We don't use
    1110              :      * PageGetItemId, because we are manipulating the _array_, not individual
    1111              :      * linp's.
    1112              :      */
    1113       627138 :     nbytes = phdr->pd_lower -
    1114       627138 :         ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
    1115              : 
    1116       627138 :     if (nbytes > 0)
    1117       611315 :         memmove(&(phdr->pd_linp[offidx]),
    1118       611315 :                 &(phdr->pd_linp[offidx + 1]),
    1119              :                 nbytes);
    1120              : 
    1121              :     /*
    1122              :      * Now move everything between the old upper bound (beginning of tuple
    1123              :      * space) and the beginning of the deleted tuple forward, so that space in
    1124              :      * the middle of the page is left free.  If we've just deleted the tuple
    1125              :      * at the beginning of tuple space, then there's no need to do the copy.
    1126              :      */
    1127              : 
    1128              :     /* beginning of tuple space */
    1129       627138 :     addr = (char *) page + phdr->pd_upper;
    1130              : 
    1131       627138 :     if (offset > phdr->pd_upper)
    1132       611919 :         memmove(addr + size, addr, offset - phdr->pd_upper);
    1133              : 
    1134              :     /* adjust free space boundary pointers */
    1135       627138 :     phdr->pd_upper += size;
    1136       627138 :     phdr->pd_lower -= sizeof(ItemIdData);
    1137              : 
    1138              :     /*
    1139              :      * Finally, we need to adjust the linp entries that remain.
    1140              :      *
    1141              :      * Anything that used to be before the deleted tuple's data was moved
    1142              :      * forward by the size of the deleted tuple.
    1143              :      */
    1144       627138 :     if (!PageIsEmpty(page))
    1145              :     {
    1146              :         int         i;
    1147              : 
    1148       626303 :         nline--;                /* there's one less than when we started */
    1149     96625505 :         for (i = 1; i <= nline; i++)
    1150              :         {
    1151     95999202 :             ItemId      ii = PageGetItemId(page, i);
    1152              : 
    1153              :             Assert(ItemIdHasStorage(ii));
    1154     95999202 :             if (ItemIdGetOffset(ii) <= offset)
    1155     62693424 :                 ii->lp_off += size;
    1156              :         }
    1157              :     }
    1158       627138 : }
    1159              : 
    1160              : 
    1161              : /*
    1162              :  * PageIndexMultiDelete
    1163              :  *
    1164              :  * This routine handles the case of deleting multiple tuples from an
    1165              :  * index page at once.  It is considerably faster than a loop around
    1166              :  * PageIndexTupleDelete ... however, the caller *must* supply the array
    1167              :  * of item numbers to be deleted in item number order!
    1168              :  */
    1169              : void
    1170        25274 : PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
    1171              : {
    1172        25274 :     PageHeader  phdr = (PageHeader) page;
    1173        25274 :     Offset      pd_lower = phdr->pd_lower;
    1174        25274 :     Offset      pd_upper = phdr->pd_upper;
    1175        25274 :     Offset      pd_special = phdr->pd_special;
    1176              :     Offset      last_offset;
    1177              :     itemIdCompactData itemidbase[MaxIndexTuplesPerPage];
    1178              :     ItemIdData  newitemids[MaxIndexTuplesPerPage];
    1179              :     itemIdCompact itemidptr;
    1180              :     ItemId      lp;
    1181              :     int         nline,
    1182              :                 nused;
    1183              :     Size        totallen;
    1184              :     Size        size;
    1185              :     unsigned    offset;
    1186              :     int         nextitm;
    1187              :     OffsetNumber offnum;
    1188        25274 :     bool        presorted = true;   /* For now */
    1189              : 
    1190              :     Assert(nitems <= MaxIndexTuplesPerPage);
    1191              : 
    1192              :     /*
    1193              :      * If there aren't very many items to delete, then retail
    1194              :      * PageIndexTupleDelete is the best way.  Delete the items in reverse
    1195              :      * order so we don't have to think about adjusting item numbers for
    1196              :      * previous deletions.
    1197              :      *
    1198              :      * TODO: tune the magic number here
    1199              :      */
    1200        25274 :     if (nitems <= 2)
    1201              :     {
    1202         5511 :         while (--nitems >= 0)
    1203         3197 :             PageIndexTupleDelete(page, itemnos[nitems]);
    1204         2314 :         return;
    1205              :     }
    1206              : 
    1207              :     /*
    1208              :      * As with PageRepairFragmentation, paranoia seems justified.
    1209              :      */
    1210        22960 :     if (pd_lower < SizeOfPageHeaderData ||
    1211        22960 :         pd_lower > pd_upper ||
    1212        22960 :         pd_upper > pd_special ||
    1213        22960 :         pd_special > BLCKSZ ||
    1214        22960 :         pd_special != MAXALIGN(pd_special))
    1215            0 :         ereport(ERROR,
    1216              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1217              :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1218              :                         pd_lower, pd_upper, pd_special)));
    1219              : 
    1220              :     /*
    1221              :      * Scan the line pointer array and build a list of just the ones we are
    1222              :      * going to keep.  Notice we do not modify the page yet, since we are
    1223              :      * still validity-checking.
    1224              :      */
    1225        22960 :     nline = PageGetMaxOffsetNumber(page);
    1226        22960 :     itemidptr = itemidbase;
    1227        22960 :     totallen = 0;
    1228        22960 :     nused = 0;
    1229        22960 :     nextitm = 0;
    1230        22960 :     last_offset = pd_special;
    1231      5219579 :     for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
    1232              :     {
    1233      5196619 :         lp = PageGetItemId(page, offnum);
    1234              :         Assert(ItemIdHasStorage(lp));
    1235      5196619 :         size = ItemIdGetLength(lp);
    1236      5196619 :         offset = ItemIdGetOffset(lp);
    1237      5196619 :         if (offset < pd_upper ||
    1238      5196619 :             (offset + size) > pd_special ||
    1239      5196619 :             offset != MAXALIGN(offset))
    1240            0 :             ereport(ERROR,
    1241              :                     (errcode(ERRCODE_DATA_CORRUPTED),
    1242              :                      errmsg("corrupted line pointer: offset = %u, size = %zu",
    1243              :                             offset, size)));
    1244              : 
    1245      5196619 :         if (nextitm < nitems && offnum == itemnos[nextitm])
    1246              :         {
    1247              :             /* skip item to be deleted */
    1248      2406445 :             nextitm++;
    1249              :         }
    1250              :         else
    1251              :         {
    1252      2790174 :             itemidptr->offsetindex = nused; /* where it will go */
    1253      2790174 :             itemidptr->itemoff = offset;
    1254              : 
    1255      2790174 :             if (last_offset > itemidptr->itemoff)
    1256      1445176 :                 last_offset = itemidptr->itemoff;
    1257              :             else
    1258      1344998 :                 presorted = false;
    1259              : 
    1260      2790174 :             itemidptr->alignedlen = MAXALIGN(size);
    1261      2790174 :             totallen += itemidptr->alignedlen;
    1262      2790174 :             newitemids[nused] = *lp;
    1263      2790174 :             itemidptr++;
    1264      2790174 :             nused++;
    1265              :         }
    1266              :     }
    1267              : 
    1268              :     /* this will catch invalid or out-of-order itemnos[] */
    1269        22960 :     if (nextitm != nitems)
    1270            0 :         elog(ERROR, "incorrect index offsets supplied");
    1271              : 
    1272        22960 :     if (totallen > (Size) (pd_special - pd_lower))
    1273            0 :         ereport(ERROR,
    1274              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1275              :                  errmsg("corrupted item lengths: total %zu, available space %u",
    1276              :                         totallen, pd_special - pd_lower)));
    1277              : 
    1278              :     /*
    1279              :      * Looks good. Overwrite the line pointers with the copy, from which we've
    1280              :      * removed all the unused items.
    1281              :      */
    1282        22960 :     memcpy(phdr->pd_linp, newitemids, nused * sizeof(ItemIdData));
    1283        22960 :     phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
    1284              : 
    1285              :     /* and compactify the tuple data */
    1286        22960 :     if (nused > 0)
    1287        22641 :         compactify_tuples(itemidbase, nused, page, presorted);
    1288              :     else
    1289          319 :         phdr->pd_upper = pd_special;
    1290              : }
    1291              : 
    1292              : 
    1293              : /*
    1294              :  * PageIndexTupleDeleteNoCompact
    1295              :  *
    1296              :  * Remove the specified tuple from an index page, but set its line pointer
    1297              :  * to "unused" instead of compacting it out, except that it can be removed
    1298              :  * if it's the last line pointer on the page.
    1299              :  *
    1300              :  * This is used for index AMs that require that existing TIDs of live tuples
    1301              :  * remain unchanged, and are willing to allow unused line pointers instead.
    1302              :  */
    1303              : void
    1304          349 : PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
    1305              : {
    1306          349 :     PageHeader  phdr = (PageHeader) page;
    1307              :     char       *addr;
    1308              :     ItemId      tup;
    1309              :     Size        size;
    1310              :     unsigned    offset;
    1311              :     int         nline;
    1312              : 
    1313              :     /*
    1314              :      * As with PageRepairFragmentation, paranoia seems justified.
    1315              :      */
    1316          349 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1317          349 :         phdr->pd_lower > phdr->pd_upper ||
    1318          349 :         phdr->pd_upper > phdr->pd_special ||
    1319          349 :         phdr->pd_special > BLCKSZ ||
    1320          349 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1321            0 :         ereport(ERROR,
    1322              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1323              :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1324              :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1325              : 
    1326          349 :     nline = PageGetMaxOffsetNumber(page);
    1327          349 :     if ((int) offnum <= 0 || (int) offnum > nline)
    1328            0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1329              : 
    1330          349 :     tup = PageGetItemId(page, offnum);
    1331              :     Assert(ItemIdHasStorage(tup));
    1332          349 :     size = ItemIdGetLength(tup);
    1333          349 :     offset = ItemIdGetOffset(tup);
    1334              : 
    1335          349 :     if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
    1336          349 :         offset != MAXALIGN(offset))
    1337            0 :         ereport(ERROR,
    1338              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1339              :                  errmsg("corrupted line pointer: offset = %u, size = %zu",
    1340              :                         offset, size)));
    1341              : 
    1342              :     /* Amount of space to actually be deleted */
    1343          349 :     size = MAXALIGN(size);
    1344              : 
    1345              :     /*
    1346              :      * Either set the line pointer to "unused", or zap it if it's the last
    1347              :      * one.  (Note: it's possible that the next-to-last one(s) are already
    1348              :      * unused, but we do not trouble to try to compact them out if so.)
    1349              :      */
    1350          349 :     if ((int) offnum < nline)
    1351          308 :         ItemIdSetUnused(tup);
    1352              :     else
    1353              :     {
    1354           41 :         phdr->pd_lower -= sizeof(ItemIdData);
    1355           41 :         nline--;                /* there's one less than when we started */
    1356              :     }
    1357              : 
    1358              :     /*
    1359              :      * Now move everything between the old upper bound (beginning of tuple
    1360              :      * space) and the beginning of the deleted tuple forward, so that space in
    1361              :      * the middle of the page is left free.  If we've just deleted the tuple
    1362              :      * at the beginning of tuple space, then there's no need to do the copy.
    1363              :      */
    1364              : 
    1365              :     /* beginning of tuple space */
    1366          349 :     addr = (char *) page + phdr->pd_upper;
    1367              : 
    1368          349 :     if (offset > phdr->pd_upper)
    1369          308 :         memmove(addr + size, addr, offset - phdr->pd_upper);
    1370              : 
    1371              :     /* adjust free space boundary pointer */
    1372          349 :     phdr->pd_upper += size;
    1373              : 
    1374              :     /*
    1375              :      * Finally, we need to adjust the linp entries that remain.
    1376              :      *
    1377              :      * Anything that used to be before the deleted tuple's data was moved
    1378              :      * forward by the size of the deleted tuple.
    1379              :      */
    1380          349 :     if (!PageIsEmpty(page))
    1381              :     {
    1382              :         int         i;
    1383              : 
    1384        86604 :         for (i = 1; i <= nline; i++)
    1385              :         {
    1386        86261 :             ItemId      ii = PageGetItemId(page, i);
    1387              : 
    1388        86261 :             if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
    1389        42304 :                 ii->lp_off += size;
    1390              :         }
    1391              :     }
    1392          349 : }
    1393              : 
    1394              : 
    1395              : /*
    1396              :  * PageIndexTupleOverwrite
    1397              :  *
    1398              :  * Replace a specified tuple on an index page.
    1399              :  *
    1400              :  * The new tuple is placed exactly where the old one had been, shifting
    1401              :  * other tuples' data up or down as needed to keep the page compacted.
    1402              :  * This is better than deleting and reinserting the tuple, because it
    1403              :  * avoids any data shifting when the tuple size doesn't change; and
    1404              :  * even when it does, we avoid moving the line pointers around.
    1405              :  * This could be used by an index AM that doesn't want to unset the
    1406              :  * LP_DEAD bit when it happens to be set.  It could conceivably also be
    1407              :  * used by an index AM that cares about the physical order of tuples as
    1408              :  * well as their logical/ItemId order.
    1409              :  *
    1410              :  * If there's insufficient space for the new tuple, return false.  Other
    1411              :  * errors represent data-corruption problems, so we just elog.
    1412              :  */
    1413              : bool
    1414       579183 : PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
    1415              :                         const void *newtup, Size newsize)
    1416              : {
    1417       579183 :     PageHeader  phdr = (PageHeader) page;
    1418              :     ItemId      tupid;
    1419              :     int         oldsize;
    1420              :     unsigned    offset;
    1421              :     Size        alignednewsize;
    1422              :     int         size_diff;
    1423              :     int         itemcount;
    1424              : 
    1425              :     /*
    1426              :      * As with PageRepairFragmentation, paranoia seems justified.
    1427              :      */
    1428       579183 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1429       579183 :         phdr->pd_lower > phdr->pd_upper ||
    1430       579183 :         phdr->pd_upper > phdr->pd_special ||
    1431       579183 :         phdr->pd_special > BLCKSZ ||
    1432       579183 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1433            0 :         ereport(ERROR,
    1434              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1435              :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1436              :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1437              : 
    1438       579183 :     itemcount = PageGetMaxOffsetNumber(page);
    1439       579183 :     if ((int) offnum <= 0 || (int) offnum > itemcount)
    1440            0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1441              : 
    1442       579183 :     tupid = PageGetItemId(page, offnum);
    1443              :     Assert(ItemIdHasStorage(tupid));
    1444       579183 :     oldsize = ItemIdGetLength(tupid);
    1445       579183 :     offset = ItemIdGetOffset(tupid);
    1446              : 
    1447       579183 :     if (offset < phdr->pd_upper || (offset + oldsize) > phdr->pd_special ||
    1448       579183 :         offset != MAXALIGN(offset))
    1449            0 :         ereport(ERROR,
    1450              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1451              :                  errmsg("corrupted line pointer: offset = %u, size = %d",
    1452              :                         offset, oldsize)));
    1453              : 
    1454              :     /*
    1455              :      * Determine actual change in space requirement, check for page overflow.
    1456              :      */
    1457       579183 :     oldsize = MAXALIGN(oldsize);
    1458       579183 :     alignednewsize = MAXALIGN(newsize);
    1459       579183 :     if (alignednewsize > oldsize + (phdr->pd_upper - phdr->pd_lower))
    1460            0 :         return false;
    1461              : 
    1462              :     /*
    1463              :      * Relocate existing data and update line pointers, unless the new tuple
    1464              :      * is the same size as the old (after alignment), in which case there's
    1465              :      * nothing to do.  Notice that what we have to relocate is data before the
    1466              :      * target tuple, not data after, so it's convenient to express size_diff
    1467              :      * as the amount by which the tuple's size is decreasing, making it the
    1468              :      * delta to add to pd_upper and affected line pointers.
    1469              :      */
    1470       579183 :     size_diff = oldsize - (int) alignednewsize;
    1471       579183 :     if (size_diff != 0)
    1472              :     {
    1473        87363 :         char       *addr = (char *) page + phdr->pd_upper;
    1474              :         int         i;
    1475              : 
    1476              :         /* relocate all tuple data before the target tuple */
    1477        87363 :         memmove(addr + size_diff, addr, offset - phdr->pd_upper);
    1478              : 
    1479              :         /* adjust free space boundary pointer */
    1480        87363 :         phdr->pd_upper += size_diff;
    1481              : 
    1482              :         /* adjust affected line pointers too */
    1483     16128383 :         for (i = FirstOffsetNumber; i <= itemcount; i++)
    1484              :         {
    1485     16041020 :             ItemId      ii = PageGetItemId(page, i);
    1486              : 
    1487              :             /* Allow items without storage; currently only BRIN needs that */
    1488     16041020 :             if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
    1489      8491157 :                 ii->lp_off += size_diff;
    1490              :         }
    1491              :     }
    1492              : 
    1493              :     /* Update the item's tuple length without changing its lp_flags field */
    1494       579183 :     tupid->lp_off = offset + size_diff;
    1495       579183 :     tupid->lp_len = newsize;
    1496              : 
    1497              :     /* Copy new tuple data onto page */
    1498       579183 :     memcpy(PageGetItem(page, tupid), newtup, newsize);
    1499              : 
    1500       579183 :     return true;
    1501              : }
    1502              : 
    1503              : 
    1504              : /*
    1505              :  * Set checksum on a page.
    1506              :  *
    1507              :  * If the page is in shared buffers, it needs to be locked in at least
    1508              :  * share-exclusive mode.
    1509              :  *
    1510              :  * If checksums are disabled, or if the page is not initialized, just
    1511              :  * return. Otherwise compute and set the checksum.
    1512              :  *
    1513              :  * In the past this needed to be done on a copy of the page, due to the
    1514              :  * possibility of e.g., hint bits being set concurrently. However, this is not
    1515              :  * necessary anymore as hint bits won't be set while IO is going on.
    1516              :  */
    1517              : void
    1518       783140 : PageSetChecksum(Page page, BlockNumber blkno)
    1519              : {
    1520       783140 :     HOLD_INTERRUPTS();
    1521              :     /* If we don't need a checksum, just return */
    1522       783140 :     if (PageIsNew(page) || !DataChecksumsNeedWrite())
    1523              :     {
    1524        50632 :         RESUME_INTERRUPTS();
    1525        50632 :         return;
    1526              :     }
    1527              : 
    1528       732508 :     ((PageHeader) page)->pd_checksum = pg_checksum_page(page, blkno);
    1529       732508 :     RESUME_INTERRUPTS();
    1530              : }
        

Generated by: LCOV version 2.0-1