LCOV - code coverage report
Current view: top level - src/backend/storage/page - bufpage.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 407 443 91.9 %
Date: 2025-04-01 14:15:22 Functions: 20 20 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * bufpage.c
       4             :  *    POSTGRES standard buffer page code.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/storage/page/bufpage.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/htup_details.h"
      18             : #include "access/itup.h"
      19             : #include "access/xlog.h"
      20             : #include "pgstat.h"
      21             : #include "storage/checksum.h"
      22             : #include "utils/memdebug.h"
      23             : #include "utils/memutils.h"
      24             : 
      25             : 
      26             : /* GUC variable */
      27             : bool        ignore_checksum_failure = false;
      28             : 
      29             : 
      30             : /* ----------------------------------------------------------------
      31             :  *                      Page support functions
      32             :  * ----------------------------------------------------------------
      33             :  */
      34             : 
      35             : /*
      36             :  * PageInit
      37             :  *      Initializes the contents of a page.
      38             :  *      Note that we don't calculate an initial checksum here; that's not done
      39             :  *      until it's time to write.
      40             :  */
      41             : void
      42      669688 : PageInit(Page page, Size pageSize, Size specialSize)
      43             : {
      44      669688 :     PageHeader  p = (PageHeader) page;
      45             : 
      46      669688 :     specialSize = MAXALIGN(specialSize);
      47             : 
      48             :     Assert(pageSize == BLCKSZ);
      49             :     Assert(pageSize > specialSize + SizeOfPageHeaderData);
      50             : 
      51             :     /* Make sure all fields of page are zero, as well as unused space */
      52      669688 :     MemSet(p, 0, pageSize);
      53             : 
      54      669688 :     p->pd_flags = 0;
      55      669688 :     p->pd_lower = SizeOfPageHeaderData;
      56      669688 :     p->pd_upper = pageSize - specialSize;
      57      669688 :     p->pd_special = pageSize - specialSize;
      58      669688 :     PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION);
      59             :     /* p->pd_prune_xid = InvalidTransactionId;       done by above MemSet */
      60      669688 : }
      61             : 
      62             : 
      63             : /*
      64             :  * PageIsVerified
      65             :  *      Check that the page header and checksum (if any) appear valid.
      66             :  *
      67             :  * This is called when a page has just been read in from disk.  The idea is
      68             :  * to cheaply detect trashed pages before we go nuts following bogus line
      69             :  * pointers, testing invalid transaction identifiers, etc.
      70             :  *
      71             :  * It turns out to be necessary to allow zeroed pages here too.  Even though
      72             :  * this routine is *not* called when deliberately adding a page to a relation,
      73             :  * there are scenarios in which a zeroed page might be found in a table.
      74             :  * (Example: a backend extends a relation, then crashes before it can write
      75             :  * any WAL entry about the new page.  The kernel will already have the
      76             :  * zeroed page in the file, and it will stay that way after restart.)  So we
      77             :  * allow zeroed pages here, and are careful that the page access macros
      78             :  * treat such a page as empty and without free space.  Eventually, VACUUM
      79             :  * will clean up such a page and make it usable.
      80             :  *
      81             :  * If flag PIV_LOG_WARNING/PIV_LOG_LOG is set, a WARNING/LOG message is logged
      82             :  * in the event of a checksum failure.
      83             :  *
      84             :  * If flag PIV_IGNORE_CHECKSUM_FAILURE is set, checksum failures will cause a
      85             :  * message about the failure to be emitted, but will not cause
      86             :  * PageIsVerified() to return false.
      87             :  *
      88             :  * To allow the caller to report statistics about checksum failures,
      89             :  * *checksum_failure_p can be passed in. Note that there may be checksum
      90             :  * failures even if this function returns true, due to
      91             :  * IGNORE_CHECKSUM_FAILURE.
      92             :  */
      93             : bool
      94     2458336 : PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_failure_p)
      95             : {
      96     2458336 :     const PageHeaderData *p = (const PageHeaderData *) page;
      97             :     size_t     *pagebytes;
      98     2458336 :     bool        checksum_failure = false;
      99     2458336 :     bool        header_sane = false;
     100     2458336 :     uint16      checksum = 0;
     101             : 
     102     2458336 :     if (checksum_failure_p)
     103     2458336 :         *checksum_failure_p = false;
     104             : 
     105             :     /*
     106             :      * Don't verify page data unless the page passes basic non-zero test
     107             :      */
     108     2458336 :     if (!PageIsNew(page))
     109             :     {
     110     2450396 :         if (DataChecksumsEnabled())
     111             :         {
     112     2433306 :             checksum = pg_checksum_page(page, blkno);
     113             : 
     114     2433306 :             if (checksum != p->pd_checksum)
     115             :             {
     116           0 :                 checksum_failure = true;
     117           0 :                 if (checksum_failure_p)
     118           0 :                     *checksum_failure_p = true;
     119             :             }
     120             :         }
     121             : 
     122             :         /*
     123             :          * The following checks don't prove the header is correct, only that
     124             :          * it looks sane enough to allow into the buffer pool. Later usage of
     125             :          * the block can still reveal problems, which is why we offer the
     126             :          * checksum option.
     127             :          */
     128     2450396 :         if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
     129     2450396 :             p->pd_lower <= p->pd_upper &&
     130     2450396 :             p->pd_upper <= p->pd_special &&
     131     2450396 :             p->pd_special <= BLCKSZ &&
     132     2450396 :             p->pd_special == MAXALIGN(p->pd_special))
     133     2450396 :             header_sane = true;
     134             : 
     135     2450396 :         if (header_sane && !checksum_failure)
     136     2450396 :             return true;
     137             :     }
     138             : 
     139             :     /* Check all-zeroes case */
     140        7940 :     pagebytes = (size_t *) page;
     141             : 
     142        7940 :     if (pg_memory_is_all_zeros(pagebytes, BLCKSZ))
     143        7940 :         return true;
     144             : 
     145             :     /*
     146             :      * Throw a WARNING/LOG, as instructed by PIV_LOG_*, if the checksum fails,
     147             :      * but only after we've checked for the all-zeroes case.
     148             :      */
     149           0 :     if (checksum_failure)
     150             :     {
     151           0 :         if ((flags & (PIV_LOG_WARNING | PIV_LOG_LOG)) != 0)
     152           0 :             ereport(flags & PIV_LOG_WARNING ? WARNING : LOG,
     153             :                     (errcode(ERRCODE_DATA_CORRUPTED),
     154             :                      errmsg("page verification failed, calculated checksum %u but expected %u",
     155             :                             checksum, p->pd_checksum)));
     156             : 
     157           0 :         if (header_sane && (flags & PIV_IGNORE_CHECKSUM_FAILURE))
     158           0 :             return true;
     159             :     }
     160             : 
     161           0 :     return false;
     162             : }
     163             : 
     164             : 
     165             : /*
     166             :  *  PageAddItemExtended
     167             :  *
     168             :  *  Add an item to a page.  Return value is the offset at which it was
     169             :  *  inserted, or InvalidOffsetNumber if the item is not inserted for any
     170             :  *  reason.  A WARNING is issued indicating the reason for the refusal.
     171             :  *
     172             :  *  offsetNumber must be either InvalidOffsetNumber to specify finding a
     173             :  *  free line pointer, or a value between FirstOffsetNumber and one past
     174             :  *  the last existing item, to specify using that particular line pointer.
     175             :  *
     176             :  *  If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
     177             :  *  the item at the specified offsetNumber, which must be either a
     178             :  *  currently-unused line pointer, or one past the last existing item.
     179             :  *
     180             :  *  If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
     181             :  *  the item at the specified offsetNumber, moving existing items later
     182             :  *  in the array to make room.
     183             :  *
     184             :  *  If offsetNumber is not valid, then assign a slot by finding the first
     185             :  *  one that is both unused and deallocated.
     186             :  *
     187             :  *  If flag PAI_IS_HEAP is set, we enforce that there can't be more than
     188             :  *  MaxHeapTuplesPerPage line pointers on the page.
     189             :  *
     190             :  *  !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
     191             :  */
     192             : OffsetNumber
     193    66920738 : PageAddItemExtended(Page page,
     194             :                     Item item,
     195             :                     Size size,
     196             :                     OffsetNumber offsetNumber,
     197             :                     int flags)
     198             : {
     199    66920738 :     PageHeader  phdr = (PageHeader) page;
     200             :     Size        alignedSize;
     201             :     int         lower;
     202             :     int         upper;
     203             :     ItemId      itemId;
     204             :     OffsetNumber limit;
     205    66920738 :     bool        needshuffle = false;
     206             : 
     207             :     /*
     208             :      * Be wary about corrupted page pointers
     209             :      */
     210    66920738 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
     211    66920738 :         phdr->pd_lower > phdr->pd_upper ||
     212    66920738 :         phdr->pd_upper > phdr->pd_special ||
     213    66920738 :         phdr->pd_special > BLCKSZ)
     214           0 :         ereport(PANIC,
     215             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     216             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
     217             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
     218             : 
     219             :     /*
     220             :      * Select offsetNumber to place the new item at
     221             :      */
     222    66920738 :     limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
     223             : 
     224             :     /* was offsetNumber passed in? */
     225    66920738 :     if (OffsetNumberIsValid(offsetNumber))
     226             :     {
     227             :         /* yes, check it */
     228    44358364 :         if ((flags & PAI_OVERWRITE) != 0)
     229             :         {
     230     3147996 :             if (offsetNumber < limit)
     231             :             {
     232       22716 :                 itemId = PageGetItemId(page, offsetNumber);
     233       22716 :                 if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
     234             :                 {
     235           0 :                     elog(WARNING, "will not overwrite a used ItemId");
     236           0 :                     return InvalidOffsetNumber;
     237             :                 }
     238             :             }
     239             :         }
     240             :         else
     241             :         {
     242    41210368 :             if (offsetNumber < limit)
     243     6331756 :                 needshuffle = true; /* need to move existing linp's */
     244             :         }
     245             :     }
     246             :     else
     247             :     {
     248             :         /* offsetNumber was not passed in, so find a free slot */
     249             :         /* if no free slot, we'll put it at limit (1st open slot) */
     250    22562374 :         if (PageHasFreeLinePointers(page))
     251             :         {
     252             :             /*
     253             :              * Scan line pointer array to locate a "recyclable" (unused)
     254             :              * ItemId.
     255             :              *
     256             :              * Always use earlier items first.  PageTruncateLinePointerArray
     257             :              * can only truncate unused items when they appear as a contiguous
     258             :              * group at the end of the line pointer array.
     259             :              */
     260    17601610 :             for (offsetNumber = FirstOffsetNumber;
     261             :                  offsetNumber < limit;   /* limit is maxoff+1 */
     262    17335968 :                  offsetNumber++)
     263             :             {
     264    17587050 :                 itemId = PageGetItemId(page, offsetNumber);
     265             : 
     266             :                 /*
     267             :                  * We check for no storage as well, just to be paranoid;
     268             :                  * unused items should never have storage.  Assert() that the
     269             :                  * invariant is respected too.
     270             :                  */
     271             :                 Assert(ItemIdIsUsed(itemId) || !ItemIdHasStorage(itemId));
     272             : 
     273    17587050 :                 if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
     274      251082 :                     break;
     275             :             }
     276      265642 :             if (offsetNumber >= limit)
     277             :             {
     278             :                 /* the hint is wrong, so reset it */
     279       14560 :                 PageClearHasFreeLinePointers(page);
     280             :             }
     281             :         }
     282             :         else
     283             :         {
     284             :             /* don't bother searching if hint says there's no free slot */
     285    22296732 :             offsetNumber = limit;
     286             :         }
     287             :     }
     288             : 
     289             :     /* Reject placing items beyond the first unused line pointer */
     290    66920738 :     if (offsetNumber > limit)
     291             :     {
     292           0 :         elog(WARNING, "specified item offset is too large");
     293           0 :         return InvalidOffsetNumber;
     294             :     }
     295             : 
     296             :     /* Reject placing items beyond heap boundary, if heap */
     297    66920738 :     if ((flags & PAI_IS_HEAP) != 0 && offsetNumber > MaxHeapTuplesPerPage)
     298             :     {
     299           0 :         elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
     300           0 :         return InvalidOffsetNumber;
     301             :     }
     302             : 
     303             :     /*
     304             :      * Compute new lower and upper pointers for page, see if it'll fit.
     305             :      *
     306             :      * Note: do arithmetic as signed ints, to avoid mistakes if, say,
     307             :      * alignedSize > pd_upper.
     308             :      */
     309    66920738 :     if (offsetNumber == limit || needshuffle)
     310    66646940 :         lower = phdr->pd_lower + sizeof(ItemIdData);
     311             :     else
     312      273798 :         lower = phdr->pd_lower;
     313             : 
     314    66920738 :     alignedSize = MAXALIGN(size);
     315             : 
     316    66920738 :     upper = (int) phdr->pd_upper - (int) alignedSize;
     317             : 
     318    66920738 :     if (lower > upper)
     319           0 :         return InvalidOffsetNumber;
     320             : 
     321             :     /*
     322             :      * OK to insert the item.  First, shuffle the existing pointers if needed.
     323             :      */
     324    66920738 :     itemId = PageGetItemId(page, offsetNumber);
     325             : 
     326    66920738 :     if (needshuffle)
     327     6331756 :         memmove(itemId + 1, itemId,
     328     6331756 :                 (limit - offsetNumber) * sizeof(ItemIdData));
     329             : 
     330             :     /* set the line pointer */
     331    66920738 :     ItemIdSetNormal(itemId, upper, size);
     332             : 
     333             :     /*
     334             :      * Items normally contain no uninitialized bytes.  Core bufpage consumers
     335             :      * conform, but this is not a necessary coding rule; a new index AM could
     336             :      * opt to depart from it.  However, data type input functions and other
     337             :      * C-language functions that synthesize datums should initialize all
     338             :      * bytes; datumIsEqual() relies on this.  Testing here, along with the
     339             :      * similar check in printtup(), helps to catch such mistakes.
     340             :      *
     341             :      * Values of the "name" type retrieved via index-only scans may contain
     342             :      * uninitialized bytes; see comment in btrescan().  Valgrind will report
     343             :      * this as an error, but it is safe to ignore.
     344             :      */
     345             :     VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
     346             : 
     347             :     /* copy the item's data onto the page */
     348    66920738 :     memcpy((char *) page + upper, item, size);
     349             : 
     350             :     /* adjust page header */
     351    66920738 :     phdr->pd_lower = (LocationIndex) lower;
     352    66920738 :     phdr->pd_upper = (LocationIndex) upper;
     353             : 
     354    66920738 :     return offsetNumber;
     355             : }
     356             : 
     357             : 
     358             : /*
     359             :  * PageGetTempPage
     360             :  *      Get a temporary page in local memory for special processing.
     361             :  *      The returned page is not initialized at all; caller must do that.
     362             :  */
     363             : Page
     364       21928 : PageGetTempPage(const PageData *page)
     365             : {
     366             :     Size        pageSize;
     367             :     Page        temp;
     368             : 
     369       21928 :     pageSize = PageGetPageSize(page);
     370       21928 :     temp = (Page) palloc(pageSize);
     371             : 
     372       21928 :     return temp;
     373             : }
     374             : 
     375             : /*
     376             :  * PageGetTempPageCopy
     377             :  *      Get a temporary page in local memory for special processing.
     378             :  *      The page is initialized by copying the contents of the given page.
     379             :  */
     380             : Page
     381       11730 : PageGetTempPageCopy(const PageData *page)
     382             : {
     383             :     Size        pageSize;
     384             :     Page        temp;
     385             : 
     386       11730 :     pageSize = PageGetPageSize(page);
     387       11730 :     temp = (Page) palloc(pageSize);
     388             : 
     389       11730 :     memcpy(temp, page, pageSize);
     390             : 
     391       11730 :     return temp;
     392             : }
     393             : 
     394             : /*
     395             :  * PageGetTempPageCopySpecial
     396             :  *      Get a temporary page in local memory for special processing.
     397             :  *      The page is PageInit'd with the same special-space size as the
     398             :  *      given page, and the special space is copied from the given page.
     399             :  */
     400             : Page
     401       59370 : PageGetTempPageCopySpecial(const PageData *page)
     402             : {
     403             :     Size        pageSize;
     404             :     Page        temp;
     405             : 
     406       59370 :     pageSize = PageGetPageSize(page);
     407       59370 :     temp = (Page) palloc(pageSize);
     408             : 
     409       59370 :     PageInit(temp, pageSize, PageGetSpecialSize(page));
     410      178110 :     memcpy(PageGetSpecialPointer(temp),
     411       59370 :            PageGetSpecialPointer(page),
     412       59370 :            PageGetSpecialSize(page));
     413             : 
     414       59370 :     return temp;
     415             : }
     416             : 
     417             : /*
     418             :  * PageRestoreTempPage
     419             :  *      Copy temporary page back to permanent page after special processing
     420             :  *      and release the temporary page.
     421             :  */
     422             : void
     423       78418 : PageRestoreTempPage(Page tempPage, Page oldPage)
     424             : {
     425             :     Size        pageSize;
     426             : 
     427       78418 :     pageSize = PageGetPageSize(tempPage);
     428       78418 :     memcpy(oldPage, tempPage, pageSize);
     429             : 
     430       78418 :     pfree(tempPage);
     431       78418 : }
     432             : 
     433             : /*
     434             :  * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
     435             :  */
     436             : typedef struct itemIdCompactData
     437             : {
     438             :     uint16      offsetindex;    /* linp array index */
     439             :     int16       itemoff;        /* page offset of item data */
     440             :     uint16      alignedlen;     /* MAXALIGN(item data len) */
     441             : } itemIdCompactData;
     442             : typedef itemIdCompactData *itemIdCompact;
     443             : 
     444             : /*
     445             :  * After removing or marking some line pointers unused, move the tuples to
     446             :  * remove the gaps caused by the removed items and reorder them back into
     447             :  * reverse line pointer order in the page.
     448             :  *
     449             :  * This function can often be fairly hot, so it pays to take some measures to
     450             :  * make it as optimal as possible.
     451             :  *
     452             :  * Callers may pass 'presorted' as true if the 'itemidbase' array is sorted in
     453             :  * descending order of itemoff.  When this is true we can just memmove()
     454             :  * tuples towards the end of the page.  This is quite a common case as it's
     455             :  * the order that tuples are initially inserted into pages.  When we call this
     456             :  * function to defragment the tuples in the page then any new line pointers
     457             :  * added to the page will keep that presorted order, so hitting this case is
     458             :  * still very common for tables that are commonly updated.
     459             :  *
     460             :  * When the 'itemidbase' array is not presorted then we're unable to just
     461             :  * memmove() tuples around freely.  Doing so could cause us to overwrite the
     462             :  * memory belonging to a tuple we've not moved yet.  In this case, we copy all
     463             :  * the tuples that need to be moved into a temporary buffer.  We can then
     464             :  * simply memcpy() out of that temp buffer back into the page at the correct
     465             :  * location.  Tuples are copied back into the page in the same order as the
     466             :  * 'itemidbase' array, so we end up reordering the tuples back into reverse
     467             :  * line pointer order.  This will increase the chances of hitting the
     468             :  * presorted case the next time around.
     469             :  *
     470             :  * Callers must ensure that nitems is > 0
     471             :  */
     472             : static void
     473      122616 : compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
     474             : {
     475      122616 :     PageHeader  phdr = (PageHeader) page;
     476             :     Offset      upper;
     477             :     Offset      copy_tail;
     478             :     Offset      copy_head;
     479             :     itemIdCompact itemidptr;
     480             :     int         i;
     481             : 
     482             :     /* Code within will not work correctly if nitems == 0 */
     483             :     Assert(nitems > 0);
     484             : 
     485      122616 :     if (presorted)
     486             :     {
     487             : 
     488             : #ifdef USE_ASSERT_CHECKING
     489             :         {
     490             :             /*
     491             :              * Verify we've not gotten any new callers that are incorrectly
     492             :              * passing a true presorted value.
     493             :              */
     494             :             Offset      lastoff = phdr->pd_special;
     495             : 
     496             :             for (i = 0; i < nitems; i++)
     497             :             {
     498             :                 itemidptr = &itemidbase[i];
     499             : 
     500             :                 Assert(lastoff > itemidptr->itemoff);
     501             : 
     502             :                 lastoff = itemidptr->itemoff;
     503             :             }
     504             :         }
     505             : #endif                          /* USE_ASSERT_CHECKING */
     506             : 
     507             :         /*
     508             :          * 'itemidbase' is already in the optimal order, i.e, lower item
     509             :          * pointers have a higher offset.  This allows us to memmove() the
     510             :          * tuples up to the end of the page without having to worry about
     511             :          * overwriting other tuples that have not been moved yet.
     512             :          *
     513             :          * There's a good chance that there are tuples already right at the
     514             :          * end of the page that we can simply skip over because they're
     515             :          * already in the correct location within the page.  We'll do that
     516             :          * first...
     517             :          */
     518       92086 :         upper = phdr->pd_special;
     519       92086 :         i = 0;
     520             :         do
     521             :         {
     522     1361098 :             itemidptr = &itemidbase[i];
     523     1361098 :             if (upper != itemidptr->itemoff + itemidptr->alignedlen)
     524       83298 :                 break;
     525     1277800 :             upper -= itemidptr->alignedlen;
     526             : 
     527     1277800 :             i++;
     528     1277800 :         } while (i < nitems);
     529             : 
     530             :         /*
     531             :          * Now that we've found the first tuple that needs to be moved, we can
     532             :          * do the tuple compactification.  We try and make the least number of
     533             :          * memmove() calls and only call memmove() when there's a gap.  When
     534             :          * we see a gap we just move all tuples after the gap up until the
     535             :          * point of the last move operation.
     536             :          */
     537       92086 :         copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
     538     2074534 :         for (; i < nitems; i++)
     539             :         {
     540             :             ItemId      lp;
     541             : 
     542     1982448 :             itemidptr = &itemidbase[i];
     543     1982448 :             lp = PageGetItemId(page, itemidptr->offsetindex + 1);
     544             : 
     545     1982448 :             if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
     546             :             {
     547      248540 :                 memmove((char *) page + upper,
     548      248540 :                         page + copy_head,
     549      248540 :                         copy_tail - copy_head);
     550             : 
     551             :                 /*
     552             :                  * We've now moved all tuples already seen, but not the
     553             :                  * current tuple, so we set the copy_tail to the end of this
     554             :                  * tuple so it can be moved in another iteration of the loop.
     555             :                  */
     556      248540 :                 copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
     557             :             }
     558             :             /* shift the target offset down by the length of this tuple */
     559     1982448 :             upper -= itemidptr->alignedlen;
     560             :             /* point the copy_head to the start of this tuple */
     561     1982448 :             copy_head = itemidptr->itemoff;
     562             : 
     563             :             /* update the line pointer to reference the new offset */
     564     1982448 :             lp->lp_off = upper;
     565             :         }
     566             : 
     567             :         /* move the remaining tuples. */
     568       92086 :         memmove((char *) page + upper,
     569       92086 :                 page + copy_head,
     570       92086 :                 copy_tail - copy_head);
     571             :     }
     572             :     else
     573             :     {
     574             :         PGAlignedBlock scratch;
     575       30530 :         char       *scratchptr = scratch.data;
     576             : 
     577             :         /*
     578             :          * Non-presorted case:  The tuples in the itemidbase array may be in
     579             :          * any order.  So, in order to move these to the end of the page we
     580             :          * must make a temp copy of each tuple that needs to be moved before
     581             :          * we copy them back into the page at the new offset.
     582             :          *
     583             :          * If a large percentage of tuples have been pruned (>75%) then we'll
     584             :          * copy these into the temp buffer tuple-by-tuple, otherwise, we'll
     585             :          * just do a single memcpy() for all tuples that need to be moved.
     586             :          * When so many tuples have been removed there's likely to be a lot of
     587             :          * gaps and it's unlikely that many non-movable tuples remain at the
     588             :          * end of the page.
     589             :          */
     590       30530 :         if (nitems < PageGetMaxOffsetNumber(page) / 4)
     591             :         {
     592        1700 :             i = 0;
     593             :             do
     594             :             {
     595       31678 :                 itemidptr = &itemidbase[i];
     596       31678 :                 memcpy(scratchptr + itemidptr->itemoff, page + itemidptr->itemoff,
     597       31678 :                        itemidptr->alignedlen);
     598       31678 :                 i++;
     599       31678 :             } while (i < nitems);
     600             : 
     601             :             /* Set things up for the compactification code below */
     602        1700 :             i = 0;
     603        1700 :             itemidptr = &itemidbase[0];
     604        1700 :             upper = phdr->pd_special;
     605             :         }
     606             :         else
     607             :         {
     608       28830 :             upper = phdr->pd_special;
     609             : 
     610             :             /*
     611             :              * Many tuples are likely to already be in the correct location.
     612             :              * There's no need to copy these into the temp buffer.  Instead
     613             :              * we'll just skip forward in the itemidbase array to the position
     614             :              * that we do need to move tuples from so that the code below just
     615             :              * leaves these ones alone.
     616             :              */
     617       28830 :             i = 0;
     618             :             do
     619             :             {
     620      718150 :                 itemidptr = &itemidbase[i];
     621      718150 :                 if (upper != itemidptr->itemoff + itemidptr->alignedlen)
     622       28830 :                     break;
     623      689320 :                 upper -= itemidptr->alignedlen;
     624             : 
     625      689320 :                 i++;
     626      689320 :             } while (i < nitems);
     627             : 
     628             :             /* Copy all tuples that need to be moved into the temp buffer */
     629       28830 :             memcpy(scratchptr + phdr->pd_upper,
     630       28830 :                    page + phdr->pd_upper,
     631       28830 :                    upper - phdr->pd_upper);
     632             :         }
     633             : 
     634             :         /*
     635             :          * Do the tuple compactification.  itemidptr is already pointing to
     636             :          * the first tuple that we're going to move.  Here we collapse the
     637             :          * memcpy calls for adjacent tuples into a single call.  This is done
     638             :          * by delaying the memcpy call until we find a gap that needs to be
     639             :          * closed.
     640             :          */
     641       30530 :         copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
     642     3375402 :         for (; i < nitems; i++)
     643             :         {
     644             :             ItemId      lp;
     645             : 
     646     3344872 :             itemidptr = &itemidbase[i];
     647     3344872 :             lp = PageGetItemId(page, itemidptr->offsetindex + 1);
     648             : 
     649             :             /* copy pending tuples when we detect a gap */
     650     3344872 :             if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
     651             :             {
     652      921822 :                 memcpy((char *) page + upper,
     653      921822 :                        scratchptr + copy_head,
     654      921822 :                        copy_tail - copy_head);
     655             : 
     656             :                 /*
     657             :                  * We've now copied all tuples already seen, but not the
     658             :                  * current tuple, so we set the copy_tail to the end of this
     659             :                  * tuple.
     660             :                  */
     661      921822 :                 copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
     662             :             }
     663             :             /* shift the target offset down by the length of this tuple */
     664     3344872 :             upper -= itemidptr->alignedlen;
     665             :             /* point the copy_head to the start of this tuple */
     666     3344872 :             copy_head = itemidptr->itemoff;
     667             : 
     668             :             /* update the line pointer to reference the new offset */
     669     3344872 :             lp->lp_off = upper;
     670             :         }
     671             : 
     672             :         /* Copy the remaining chunk */
     673       30530 :         memcpy((char *) page + upper,
     674       30530 :                scratchptr + copy_head,
     675       30530 :                copy_tail - copy_head);
     676             :     }
     677             : 
     678      122616 :     phdr->pd_upper = upper;
     679      122616 : }
     680             : 
     681             : /*
     682             :  * PageRepairFragmentation
     683             :  *
     684             :  * Frees fragmented space on a heap page following pruning.
     685             :  *
     686             :  * This routine is usable for heap pages only, but see PageIndexMultiDelete.
     687             :  *
     688             :  * This routine removes unused line pointers from the end of the line pointer
     689             :  * array.  This is possible when dead heap-only tuples get removed by pruning,
     690             :  * especially when there were HOT chains with several tuples each beforehand.
     691             :  *
     692             :  * Caller had better have a full cleanup lock on page's buffer.  As a side
     693             :  * effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
     694             :  * needed.  Caller might also need to account for a reduction in the length of
     695             :  * the line pointer array following array truncation.
     696             :  */
     697             : void
     698      111404 : PageRepairFragmentation(Page page)
     699             : {
     700      111404 :     Offset      pd_lower = ((PageHeader) page)->pd_lower;
     701      111404 :     Offset      pd_upper = ((PageHeader) page)->pd_upper;
     702      111404 :     Offset      pd_special = ((PageHeader) page)->pd_special;
     703             :     Offset      last_offset;
     704             :     itemIdCompactData itemidbase[MaxHeapTuplesPerPage];
     705             :     itemIdCompact itemidptr;
     706             :     ItemId      lp;
     707             :     int         nline,
     708             :                 nstorage,
     709             :                 nunused;
     710      111404 :     OffsetNumber finalusedlp = InvalidOffsetNumber;
     711             :     int         i;
     712             :     Size        totallen;
     713      111404 :     bool        presorted = true;   /* For now */
     714             : 
     715             :     /*
     716             :      * It's worth the trouble to be more paranoid here than in most places,
     717             :      * because we are about to reshuffle data in (what is usually) a shared
     718             :      * disk buffer.  If we aren't careful then corrupted pointers, lengths,
     719             :      * etc could cause us to clobber adjacent disk buffers, spreading the data
     720             :      * loss further.  So, check everything.
     721             :      */
     722      111404 :     if (pd_lower < SizeOfPageHeaderData ||
     723      111404 :         pd_lower > pd_upper ||
     724      111404 :         pd_upper > pd_special ||
     725      111404 :         pd_special > BLCKSZ ||
     726      111404 :         pd_special != MAXALIGN(pd_special))
     727           0 :         ereport(ERROR,
     728             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     729             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
     730             :                         pd_lower, pd_upper, pd_special)));
     731             : 
     732             :     /*
     733             :      * Run through the line pointer array and collect data about live items.
     734             :      */
     735      111404 :     nline = PageGetMaxOffsetNumber(page);
     736      111404 :     itemidptr = itemidbase;
     737      111404 :     nunused = totallen = 0;
     738      111404 :     last_offset = pd_special;
     739     9174272 :     for (i = FirstOffsetNumber; i <= nline; i++)
     740             :     {
     741     9062868 :         lp = PageGetItemId(page, i);
     742     9062868 :         if (ItemIdIsUsed(lp))
     743             :         {
     744     8731334 :             if (ItemIdHasStorage(lp))
     745             :             {
     746     3281340 :                 itemidptr->offsetindex = i - 1;
     747     3281340 :                 itemidptr->itemoff = ItemIdGetOffset(lp);
     748             : 
     749     3281340 :                 if (last_offset > itemidptr->itemoff)
     750     2770150 :                     last_offset = itemidptr->itemoff;
     751             :                 else
     752      511190 :                     presorted = false;
     753             : 
     754     3281340 :                 if (unlikely(itemidptr->itemoff < (int) pd_upper ||
     755             :                              itemidptr->itemoff >= (int) pd_special))
     756           0 :                     ereport(ERROR,
     757             :                             (errcode(ERRCODE_DATA_CORRUPTED),
     758             :                              errmsg("corrupted line pointer: %u",
     759             :                                     itemidptr->itemoff)));
     760     3281340 :                 itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
     761     3281340 :                 totallen += itemidptr->alignedlen;
     762     3281340 :                 itemidptr++;
     763             :             }
     764             : 
     765     8731334 :             finalusedlp = i;    /* Could be the final non-LP_UNUSED item */
     766             :         }
     767             :         else
     768             :         {
     769             :             /* Unused entries should have lp_len = 0, but make sure */
     770             :             Assert(!ItemIdHasStorage(lp));
     771      331534 :             ItemIdSetUnused(lp);
     772      331534 :             nunused++;
     773             :         }
     774             :     }
     775             : 
     776      111404 :     nstorage = itemidptr - itemidbase;
     777      111404 :     if (nstorage == 0)
     778             :     {
     779             :         /* Page is completely empty, so just reset it quickly */
     780       22252 :         ((PageHeader) page)->pd_upper = pd_special;
     781             :     }
     782             :     else
     783             :     {
     784             :         /* Need to compact the page the hard way */
     785       89152 :         if (totallen > (Size) (pd_special - pd_lower))
     786           0 :             ereport(ERROR,
     787             :                     (errcode(ERRCODE_DATA_CORRUPTED),
     788             :                      errmsg("corrupted item lengths: total %u, available space %u",
     789             :                             (unsigned int) totallen, pd_special - pd_lower)));
     790             : 
     791       89152 :         compactify_tuples(itemidbase, nstorage, page, presorted);
     792             :     }
     793             : 
     794      111404 :     if (finalusedlp != nline)
     795             :     {
     796             :         /* The last line pointer is not the last used line pointer */
     797        3370 :         int         nunusedend = nline - finalusedlp;
     798             : 
     799             :         Assert(nunused >= nunusedend && nunusedend > 0);
     800             : 
     801             :         /* remove trailing unused line pointers from the count */
     802        3370 :         nunused -= nunusedend;
     803             :         /* truncate the line pointer array */
     804        3370 :         ((PageHeader) page)->pd_lower -= (sizeof(ItemIdData) * nunusedend);
     805             :     }
     806             : 
     807             :     /* Set hint bit for PageAddItemExtended */
     808      111404 :     if (nunused > 0)
     809       25466 :         PageSetHasFreeLinePointers(page);
     810             :     else
     811       85938 :         PageClearHasFreeLinePointers(page);
     812      111404 : }
     813             : 
     814             : /*
     815             :  * PageTruncateLinePointerArray
     816             :  *
     817             :  * Removes unused line pointers at the end of the line pointer array.
     818             :  *
     819             :  * This routine is usable for heap pages only.  It is called by VACUUM during
     820             :  * its second pass over the heap.  We expect at least one LP_UNUSED line
     821             :  * pointer on the page (if VACUUM didn't have an LP_DEAD item on the page that
     822             :  * it just set to LP_UNUSED then it should not call here).
     823             :  *
     824             :  * We avoid truncating the line pointer array to 0 items, if necessary by
     825             :  * leaving behind a single remaining LP_UNUSED item.  This is a little
     826             :  * arbitrary, but it seems like a good idea to avoid leaving a PageIsEmpty()
     827             :  * page behind.
     828             :  *
     829             :  * Caller can have either an exclusive lock or a full cleanup lock on page's
     830             :  * buffer.  The page's PD_HAS_FREE_LINES hint bit will be set or unset based
     831             :  * on whether or not we leave behind any remaining LP_UNUSED items.
     832             :  */
     833             : void
     834       24292 : PageTruncateLinePointerArray(Page page)
     835             : {
     836       24292 :     PageHeader  phdr = (PageHeader) page;
     837       24292 :     bool        countdone = false,
     838       24292 :                 sethint = false;
     839       24292 :     int         nunusedend = 0;
     840             : 
     841             :     /* Scan line pointer array back-to-front */
     842     1600548 :     for (int i = PageGetMaxOffsetNumber(page); i >= FirstOffsetNumber; i--)
     843             :     {
     844     1599706 :         ItemId      lp = PageGetItemId(page, i);
     845             : 
     846     1599706 :         if (!countdone && i > FirstOffsetNumber)
     847             :         {
     848             :             /*
     849             :              * Still determining which line pointers from the end of the array
     850             :              * will be truncated away.  Either count another line pointer as
     851             :              * safe to truncate, or notice that it's not safe to truncate
     852             :              * additional line pointers (stop counting line pointers).
     853             :              */
     854     1437834 :             if (!ItemIdIsUsed(lp))
     855     1426034 :                 nunusedend++;
     856             :             else
     857       11800 :                 countdone = true;
     858             :         }
     859             :         else
     860             :         {
     861             :             /*
     862             :              * Once we've stopped counting we still need to figure out if
     863             :              * there are any remaining LP_UNUSED line pointers somewhere more
     864             :              * towards the front of the array.
     865             :              */
     866      161872 :             if (!ItemIdIsUsed(lp))
     867             :             {
     868             :                 /*
     869             :                  * This is an unused line pointer that we won't be truncating
     870             :                  * away -- so there is at least one.  Set hint on page.
     871             :                  */
     872       23450 :                 sethint = true;
     873       23450 :                 break;
     874             :             }
     875             :         }
     876             :     }
     877             : 
     878       24292 :     if (nunusedend > 0)
     879             :     {
     880       15312 :         phdr->pd_lower -= sizeof(ItemIdData) * nunusedend;
     881             : 
     882             : #ifdef CLOBBER_FREED_MEMORY
     883             :         memset((char *) page + phdr->pd_lower, 0x7F,
     884             :                sizeof(ItemIdData) * nunusedend);
     885             : #endif
     886             :     }
     887             :     else
     888             :         Assert(sethint);
     889             : 
     890             :     /* Set hint bit for PageAddItemExtended */
     891       24292 :     if (sethint)
     892       23450 :         PageSetHasFreeLinePointers(page);
     893             :     else
     894         842 :         PageClearHasFreeLinePointers(page);
     895       24292 : }
     896             : 
     897             : /*
     898             :  * PageGetFreeSpace
     899             :  *      Returns the size of the free (allocatable) space on a page,
     900             :  *      reduced by the space needed for a new line pointer.
     901             :  *
     902             :  * Note: this should usually only be used on index pages.  Use
     903             :  * PageGetHeapFreeSpace on heap pages.
     904             :  */
     905             : Size
     906    56124532 : PageGetFreeSpace(const PageData *page)
     907             : {
     908    56124532 :     const PageHeaderData *phdr = (const PageHeaderData *) page;
     909             :     int         space;
     910             : 
     911             :     /*
     912             :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     913             :      * pd_upper.
     914             :      */
     915    56124532 :     space = (int) phdr->pd_upper - (int) phdr->pd_lower;
     916             : 
     917    56124532 :     if (space < (int) sizeof(ItemIdData))
     918       15164 :         return 0;
     919    56109368 :     space -= sizeof(ItemIdData);
     920             : 
     921    56109368 :     return (Size) space;
     922             : }
     923             : 
     924             : /*
     925             :  * PageGetFreeSpaceForMultipleTuples
     926             :  *      Returns the size of the free (allocatable) space on a page,
     927             :  *      reduced by the space needed for multiple new line pointers.
     928             :  *
     929             :  * Note: this should usually only be used on index pages.  Use
     930             :  * PageGetHeapFreeSpace on heap pages.
     931             :  */
     932             : Size
     933      130592 : PageGetFreeSpaceForMultipleTuples(const PageData *page, int ntups)
     934             : {
     935      130592 :     const PageHeaderData *phdr = (const PageHeaderData *) page;
     936             :     int         space;
     937             : 
     938             :     /*
     939             :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     940             :      * pd_upper.
     941             :      */
     942      130592 :     space = (int) phdr->pd_upper - (int) phdr->pd_lower;
     943             : 
     944      130592 :     if (space < (int) (ntups * sizeof(ItemIdData)))
     945           0 :         return 0;
     946      130592 :     space -= ntups * sizeof(ItemIdData);
     947             : 
     948      130592 :     return (Size) space;
     949             : }
     950             : 
     951             : /*
     952             :  * PageGetExactFreeSpace
     953             :  *      Returns the size of the free (allocatable) space on a page,
     954             :  *      without any consideration for adding/removing line pointers.
     955             :  */
     956             : Size
     957     3226046 : PageGetExactFreeSpace(const PageData *page)
     958             : {
     959     3226046 :     const PageHeaderData *phdr = (const PageHeaderData *) page;
     960             :     int         space;
     961             : 
     962             :     /*
     963             :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     964             :      * pd_upper.
     965             :      */
     966     3226046 :     space = (int) phdr->pd_upper - (int) phdr->pd_lower;
     967             : 
     968     3226046 :     if (space < 0)
     969           0 :         return 0;
     970             : 
     971     3226046 :     return (Size) space;
     972             : }
     973             : 
     974             : 
     975             : /*
     976             :  * PageGetHeapFreeSpace
     977             :  *      Returns the size of the free (allocatable) space on a page,
     978             :  *      reduced by the space needed for a new line pointer.
     979             :  *
     980             :  * The difference between this and PageGetFreeSpace is that this will return
     981             :  * zero if there are already MaxHeapTuplesPerPage line pointers in the page
     982             :  * and none are free.  We use this to enforce that no more than
     983             :  * MaxHeapTuplesPerPage line pointers are created on a heap page.  (Although
     984             :  * no more tuples than that could fit anyway, in the presence of redirected
     985             :  * or dead line pointers it'd be possible to have too many line pointers.
     986             :  * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
     987             :  * on the number of line pointers, we make this extra check.)
     988             :  */
     989             : Size
     990    26809554 : PageGetHeapFreeSpace(const PageData *page)
     991             : {
     992             :     Size        space;
     993             : 
     994    26809554 :     space = PageGetFreeSpace(page);
     995    26809554 :     if (space > 0)
     996             :     {
     997             :         OffsetNumber offnum,
     998             :                     nline;
     999             : 
    1000             :         /*
    1001             :          * Are there already MaxHeapTuplesPerPage line pointers in the page?
    1002             :          */
    1003    26777332 :         nline = PageGetMaxOffsetNumber(page);
    1004    26777332 :         if (nline >= MaxHeapTuplesPerPage)
    1005             :         {
    1006        5632 :             if (PageHasFreeLinePointers(page))
    1007             :             {
    1008             :                 /*
    1009             :                  * Since this is just a hint, we must confirm that there is
    1010             :                  * indeed a free line pointer
    1011             :                  */
    1012      592308 :                 for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
    1013             :                 {
    1014      592140 :                     ItemId      lp = PageGetItemId(unconstify(PageData *, page), offnum);
    1015             : 
    1016      592140 :                     if (!ItemIdIsUsed(lp))
    1017        2706 :                         break;
    1018             :                 }
    1019             : 
    1020        2874 :                 if (offnum > nline)
    1021             :                 {
    1022             :                     /*
    1023             :                      * The hint is wrong, but we can't clear it here since we
    1024             :                      * don't have the ability to mark the page dirty.
    1025             :                      */
    1026         168 :                     space = 0;
    1027             :                 }
    1028             :             }
    1029             :             else
    1030             :             {
    1031             :                 /*
    1032             :                  * Although the hint might be wrong, PageAddItem will believe
    1033             :                  * it anyway, so we must believe it too.
    1034             :                  */
    1035        2758 :                 space = 0;
    1036             :             }
    1037             :         }
    1038             :     }
    1039    26809554 :     return space;
    1040             : }
    1041             : 
    1042             : 
    1043             : /*
    1044             :  * PageIndexTupleDelete
    1045             :  *
    1046             :  * This routine does the work of removing a tuple from an index page.
    1047             :  *
    1048             :  * Unlike heap pages, we compact out the line pointer for the removed tuple.
    1049             :  */
    1050             : void
    1051     1008244 : PageIndexTupleDelete(Page page, OffsetNumber offnum)
    1052             : {
    1053     1008244 :     PageHeader  phdr = (PageHeader) page;
    1054             :     char       *addr;
    1055             :     ItemId      tup;
    1056             :     Size        size;
    1057             :     unsigned    offset;
    1058             :     int         nbytes;
    1059             :     int         offidx;
    1060             :     int         nline;
    1061             : 
    1062             :     /*
    1063             :      * As with PageRepairFragmentation, paranoia seems justified.
    1064             :      */
    1065     1008244 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1066     1008244 :         phdr->pd_lower > phdr->pd_upper ||
    1067     1008244 :         phdr->pd_upper > phdr->pd_special ||
    1068     1008244 :         phdr->pd_special > BLCKSZ ||
    1069     1008244 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1070           0 :         ereport(ERROR,
    1071             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1072             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1073             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1074             : 
    1075     1008244 :     nline = PageGetMaxOffsetNumber(page);
    1076     1008244 :     if ((int) offnum <= 0 || (int) offnum > nline)
    1077           0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1078             : 
    1079             :     /* change offset number to offset index */
    1080     1008244 :     offidx = offnum - 1;
    1081             : 
    1082     1008244 :     tup = PageGetItemId(page, offnum);
    1083             :     Assert(ItemIdHasStorage(tup));
    1084     1008244 :     size = ItemIdGetLength(tup);
    1085     1008244 :     offset = ItemIdGetOffset(tup);
    1086             : 
    1087     1008244 :     if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
    1088     1008244 :         offset != MAXALIGN(offset))
    1089           0 :         ereport(ERROR,
    1090             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1091             :                  errmsg("corrupted line pointer: offset = %u, size = %u",
    1092             :                         offset, (unsigned int) size)));
    1093             : 
    1094             :     /* Amount of space to actually be deleted */
    1095     1008244 :     size = MAXALIGN(size);
    1096             : 
    1097             :     /*
    1098             :      * First, we want to get rid of the pd_linp entry for the index tuple. We
    1099             :      * copy all subsequent linp's back one slot in the array. We don't use
    1100             :      * PageGetItemId, because we are manipulating the _array_, not individual
    1101             :      * linp's.
    1102             :      */
    1103     1008244 :     nbytes = phdr->pd_lower -
    1104     1008244 :         ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
    1105             : 
    1106     1008244 :     if (nbytes > 0)
    1107      980886 :         memmove(&(phdr->pd_linp[offidx]),
    1108      980886 :                 &(phdr->pd_linp[offidx + 1]),
    1109             :                 nbytes);
    1110             : 
    1111             :     /*
    1112             :      * Now move everything between the old upper bound (beginning of tuple
    1113             :      * space) and the beginning of the deleted tuple forward, so that space in
    1114             :      * the middle of the page is left free.  If we've just deleted the tuple
    1115             :      * at the beginning of tuple space, then there's no need to do the copy.
    1116             :      */
    1117             : 
    1118             :     /* beginning of tuple space */
    1119     1008244 :     addr = (char *) page + phdr->pd_upper;
    1120             : 
    1121     1008244 :     if (offset > phdr->pd_upper)
    1122      982032 :         memmove(addr + size, addr, offset - phdr->pd_upper);
    1123             : 
    1124             :     /* adjust free space boundary pointers */
    1125     1008244 :     phdr->pd_upper += size;
    1126     1008244 :     phdr->pd_lower -= sizeof(ItemIdData);
    1127             : 
    1128             :     /*
    1129             :      * Finally, we need to adjust the linp entries that remain.
    1130             :      *
    1131             :      * Anything that used to be before the deleted tuple's data was moved
    1132             :      * forward by the size of the deleted tuple.
    1133             :      */
    1134     1008244 :     if (!PageIsEmpty(page))
    1135             :     {
    1136             :         int         i;
    1137             : 
    1138     1006672 :         nline--;                /* there's one less than when we started */
    1139   149236712 :         for (i = 1; i <= nline; i++)
    1140             :         {
    1141   148230040 :             ItemId      ii = PageGetItemId(page, i);
    1142             : 
    1143             :             Assert(ItemIdHasStorage(ii));
    1144   148230040 :             if (ItemIdGetOffset(ii) <= offset)
    1145    96121486 :                 ii->lp_off += size;
    1146             :         }
    1147             :     }
    1148     1008244 : }
    1149             : 
    1150             : 
    1151             : /*
    1152             :  * PageIndexMultiDelete
    1153             :  *
    1154             :  * This routine handles the case of deleting multiple tuples from an
    1155             :  * index page at once.  It is considerably faster than a loop around
    1156             :  * PageIndexTupleDelete ... however, the caller *must* supply the array
    1157             :  * of item numbers to be deleted in item number order!
    1158             :  */
    1159             : void
    1160       37490 : PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
    1161             : {
    1162       37490 :     PageHeader  phdr = (PageHeader) page;
    1163       37490 :     Offset      pd_lower = phdr->pd_lower;
    1164       37490 :     Offset      pd_upper = phdr->pd_upper;
    1165       37490 :     Offset      pd_special = phdr->pd_special;
    1166             :     Offset      last_offset;
    1167             :     itemIdCompactData itemidbase[MaxIndexTuplesPerPage];
    1168             :     ItemIdData  newitemids[MaxIndexTuplesPerPage];
    1169             :     itemIdCompact itemidptr;
    1170             :     ItemId      lp;
    1171             :     int         nline,
    1172             :                 nused;
    1173             :     Size        totallen;
    1174             :     Size        size;
    1175             :     unsigned    offset;
    1176             :     int         nextitm;
    1177             :     OffsetNumber offnum;
    1178       37490 :     bool        presorted = true;   /* For now */
    1179             : 
    1180             :     Assert(nitems <= MaxIndexTuplesPerPage);
    1181             : 
    1182             :     /*
    1183             :      * If there aren't very many items to delete, then retail
    1184             :      * PageIndexTupleDelete is the best way.  Delete the items in reverse
    1185             :      * order so we don't have to think about adjusting item numbers for
    1186             :      * previous deletions.
    1187             :      *
    1188             :      * TODO: tune the magic number here
    1189             :      */
    1190       37490 :     if (nitems <= 2)
    1191             :     {
    1192        8216 :         while (--nitems >= 0)
    1193        4626 :             PageIndexTupleDelete(page, itemnos[nitems]);
    1194        3590 :         return;
    1195             :     }
    1196             : 
    1197             :     /*
    1198             :      * As with PageRepairFragmentation, paranoia seems justified.
    1199             :      */
    1200       33900 :     if (pd_lower < SizeOfPageHeaderData ||
    1201       33900 :         pd_lower > pd_upper ||
    1202       33900 :         pd_upper > pd_special ||
    1203       33900 :         pd_special > BLCKSZ ||
    1204       33900 :         pd_special != MAXALIGN(pd_special))
    1205           0 :         ereport(ERROR,
    1206             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1207             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1208             :                         pd_lower, pd_upper, pd_special)));
    1209             : 
    1210             :     /*
    1211             :      * Scan the line pointer array and build a list of just the ones we are
    1212             :      * going to keep.  Notice we do not modify the page yet, since we are
    1213             :      * still validity-checking.
    1214             :      */
    1215       33900 :     nline = PageGetMaxOffsetNumber(page);
    1216       33900 :     itemidptr = itemidbase;
    1217       33900 :     totallen = 0;
    1218       33900 :     nused = 0;
    1219       33900 :     nextitm = 0;
    1220       33900 :     last_offset = pd_special;
    1221     7556978 :     for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
    1222             :     {
    1223     7523078 :         lp = PageGetItemId(page, offnum);
    1224             :         Assert(ItemIdHasStorage(lp));
    1225     7523078 :         size = ItemIdGetLength(lp);
    1226     7523078 :         offset = ItemIdGetOffset(lp);
    1227     7523078 :         if (offset < pd_upper ||
    1228     7523078 :             (offset + size) > pd_special ||
    1229     7523078 :             offset != MAXALIGN(offset))
    1230           0 :             ereport(ERROR,
    1231             :                     (errcode(ERRCODE_DATA_CORRUPTED),
    1232             :                      errmsg("corrupted line pointer: offset = %u, size = %u",
    1233             :                             offset, (unsigned int) size)));
    1234             : 
    1235     7523078 :         if (nextitm < nitems && offnum == itemnos[nextitm])
    1236             :         {
    1237             :             /* skip item to be deleted */
    1238     3509978 :             nextitm++;
    1239             :         }
    1240             :         else
    1241             :         {
    1242     4013100 :             itemidptr->offsetindex = nused; /* where it will go */
    1243     4013100 :             itemidptr->itemoff = offset;
    1244             : 
    1245     4013100 :             if (last_offset > itemidptr->itemoff)
    1246     1982436 :                 last_offset = itemidptr->itemoff;
    1247             :             else
    1248     2030664 :                 presorted = false;
    1249             : 
    1250     4013100 :             itemidptr->alignedlen = MAXALIGN(size);
    1251     4013100 :             totallen += itemidptr->alignedlen;
    1252     4013100 :             newitemids[nused] = *lp;
    1253     4013100 :             itemidptr++;
    1254     4013100 :             nused++;
    1255             :         }
    1256             :     }
    1257             : 
    1258             :     /* this will catch invalid or out-of-order itemnos[] */
    1259       33900 :     if (nextitm != nitems)
    1260           0 :         elog(ERROR, "incorrect index offsets supplied");
    1261             : 
    1262       33900 :     if (totallen > (Size) (pd_special - pd_lower))
    1263           0 :         ereport(ERROR,
    1264             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1265             :                  errmsg("corrupted item lengths: total %u, available space %u",
    1266             :                         (unsigned int) totallen, pd_special - pd_lower)));
    1267             : 
    1268             :     /*
    1269             :      * Looks good. Overwrite the line pointers with the copy, from which we've
    1270             :      * removed all the unused items.
    1271             :      */
    1272       33900 :     memcpy(phdr->pd_linp, newitemids, nused * sizeof(ItemIdData));
    1273       33900 :     phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
    1274             : 
    1275             :     /* and compactify the tuple data */
    1276       33900 :     if (nused > 0)
    1277       33464 :         compactify_tuples(itemidbase, nused, page, presorted);
    1278             :     else
    1279         436 :         phdr->pd_upper = pd_special;
    1280             : }
    1281             : 
    1282             : 
    1283             : /*
    1284             :  * PageIndexTupleDeleteNoCompact
    1285             :  *
    1286             :  * Remove the specified tuple from an index page, but set its line pointer
    1287             :  * to "unused" instead of compacting it out, except that it can be removed
    1288             :  * if it's the last line pointer on the page.
    1289             :  *
    1290             :  * This is used for index AMs that require that existing TIDs of live tuples
    1291             :  * remain unchanged, and are willing to allow unused line pointers instead.
    1292             :  */
    1293             : void
    1294         672 : PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
    1295             : {
    1296         672 :     PageHeader  phdr = (PageHeader) page;
    1297             :     char       *addr;
    1298             :     ItemId      tup;
    1299             :     Size        size;
    1300             :     unsigned    offset;
    1301             :     int         nline;
    1302             : 
    1303             :     /*
    1304             :      * As with PageRepairFragmentation, paranoia seems justified.
    1305             :      */
    1306         672 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1307         672 :         phdr->pd_lower > phdr->pd_upper ||
    1308         672 :         phdr->pd_upper > phdr->pd_special ||
    1309         672 :         phdr->pd_special > BLCKSZ ||
    1310         672 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1311           0 :         ereport(ERROR,
    1312             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1313             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1314             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1315             : 
    1316         672 :     nline = PageGetMaxOffsetNumber(page);
    1317         672 :     if ((int) offnum <= 0 || (int) offnum > nline)
    1318           0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1319             : 
    1320         672 :     tup = PageGetItemId(page, offnum);
    1321             :     Assert(ItemIdHasStorage(tup));
    1322         672 :     size = ItemIdGetLength(tup);
    1323         672 :     offset = ItemIdGetOffset(tup);
    1324             : 
    1325         672 :     if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
    1326         672 :         offset != MAXALIGN(offset))
    1327           0 :         ereport(ERROR,
    1328             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1329             :                  errmsg("corrupted line pointer: offset = %u, size = %u",
    1330             :                         offset, (unsigned int) size)));
    1331             : 
    1332             :     /* Amount of space to actually be deleted */
    1333         672 :     size = MAXALIGN(size);
    1334             : 
    1335             :     /*
    1336             :      * Either set the line pointer to "unused", or zap it if it's the last
    1337             :      * one.  (Note: it's possible that the next-to-last one(s) are already
    1338             :      * unused, but we do not trouble to try to compact them out if so.)
    1339             :      */
    1340         672 :     if ((int) offnum < nline)
    1341         608 :         ItemIdSetUnused(tup);
    1342             :     else
    1343             :     {
    1344          64 :         phdr->pd_lower -= sizeof(ItemIdData);
    1345          64 :         nline--;                /* there's one less than when we started */
    1346             :     }
    1347             : 
    1348             :     /*
    1349             :      * Now move everything between the old upper bound (beginning of tuple
    1350             :      * space) and the beginning of the deleted tuple forward, so that space in
    1351             :      * the middle of the page is left free.  If we've just deleted the tuple
    1352             :      * at the beginning of tuple space, then there's no need to do the copy.
    1353             :      */
    1354             : 
    1355             :     /* beginning of tuple space */
    1356         672 :     addr = (char *) page + phdr->pd_upper;
    1357             : 
    1358         672 :     if (offset > phdr->pd_upper)
    1359         608 :         memmove(addr + size, addr, offset - phdr->pd_upper);
    1360             : 
    1361             :     /* adjust free space boundary pointer */
    1362         672 :     phdr->pd_upper += size;
    1363             : 
    1364             :     /*
    1365             :      * Finally, we need to adjust the linp entries that remain.
    1366             :      *
    1367             :      * Anything that used to be before the deleted tuple's data was moved
    1368             :      * forward by the size of the deleted tuple.
    1369             :      */
    1370         672 :     if (!PageIsEmpty(page))
    1371             :     {
    1372             :         int         i;
    1373             : 
    1374      172976 :         for (i = 1; i <= nline; i++)
    1375             :         {
    1376      172314 :             ItemId      ii = PageGetItemId(page, i);
    1377             : 
    1378      172314 :             if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
    1379       84558 :                 ii->lp_off += size;
    1380             :         }
    1381             :     }
    1382         672 : }
    1383             : 
    1384             : 
    1385             : /*
    1386             :  * PageIndexTupleOverwrite
    1387             :  *
    1388             :  * Replace a specified tuple on an index page.
    1389             :  *
    1390             :  * The new tuple is placed exactly where the old one had been, shifting
    1391             :  * other tuples' data up or down as needed to keep the page compacted.
    1392             :  * This is better than deleting and reinserting the tuple, because it
    1393             :  * avoids any data shifting when the tuple size doesn't change; and
    1394             :  * even when it does, we avoid moving the line pointers around.
    1395             :  * This could be used by an index AM that doesn't want to unset the
    1396             :  * LP_DEAD bit when it happens to be set.  It could conceivably also be
    1397             :  * used by an index AM that cares about the physical order of tuples as
    1398             :  * well as their logical/ItemId order.
    1399             :  *
    1400             :  * If there's insufficient space for the new tuple, return false.  Other
    1401             :  * errors represent data-corruption problems, so we just elog.
    1402             :  */
    1403             : bool
    1404      903464 : PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
    1405             :                         Item newtup, Size newsize)
    1406             : {
    1407      903464 :     PageHeader  phdr = (PageHeader) page;
    1408             :     ItemId      tupid;
    1409             :     int         oldsize;
    1410             :     unsigned    offset;
    1411             :     Size        alignednewsize;
    1412             :     int         size_diff;
    1413             :     int         itemcount;
    1414             : 
    1415             :     /*
    1416             :      * As with PageRepairFragmentation, paranoia seems justified.
    1417             :      */
    1418      903464 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1419      903464 :         phdr->pd_lower > phdr->pd_upper ||
    1420      903464 :         phdr->pd_upper > phdr->pd_special ||
    1421      903464 :         phdr->pd_special > BLCKSZ ||
    1422      903464 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1423           0 :         ereport(ERROR,
    1424             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1425             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1426             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1427             : 
    1428      903464 :     itemcount = PageGetMaxOffsetNumber(page);
    1429      903464 :     if ((int) offnum <= 0 || (int) offnum > itemcount)
    1430           0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1431             : 
    1432      903464 :     tupid = PageGetItemId(page, offnum);
    1433             :     Assert(ItemIdHasStorage(tupid));
    1434      903464 :     oldsize = ItemIdGetLength(tupid);
    1435      903464 :     offset = ItemIdGetOffset(tupid);
    1436             : 
    1437      903464 :     if (offset < phdr->pd_upper || (offset + oldsize) > phdr->pd_special ||
    1438      903464 :         offset != MAXALIGN(offset))
    1439           0 :         ereport(ERROR,
    1440             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1441             :                  errmsg("corrupted line pointer: offset = %u, size = %u",
    1442             :                         offset, (unsigned int) oldsize)));
    1443             : 
    1444             :     /*
    1445             :      * Determine actual change in space requirement, check for page overflow.
    1446             :      */
    1447      903464 :     oldsize = MAXALIGN(oldsize);
    1448      903464 :     alignednewsize = MAXALIGN(newsize);
    1449      903464 :     if (alignednewsize > oldsize + (phdr->pd_upper - phdr->pd_lower))
    1450           0 :         return false;
    1451             : 
    1452             :     /*
    1453             :      * Relocate existing data and update line pointers, unless the new tuple
    1454             :      * is the same size as the old (after alignment), in which case there's
    1455             :      * nothing to do.  Notice that what we have to relocate is data before the
    1456             :      * target tuple, not data after, so it's convenient to express size_diff
    1457             :      * as the amount by which the tuple's size is decreasing, making it the
    1458             :      * delta to add to pd_upper and affected line pointers.
    1459             :      */
    1460      903464 :     size_diff = oldsize - (int) alignednewsize;
    1461      903464 :     if (size_diff != 0)
    1462             :     {
    1463       88990 :         char       *addr = (char *) page + phdr->pd_upper;
    1464             :         int         i;
    1465             : 
    1466             :         /* relocate all tuple data before the target tuple */
    1467       88990 :         memmove(addr + size_diff, addr, offset - phdr->pd_upper);
    1468             : 
    1469             :         /* adjust free space boundary pointer */
    1470       88990 :         phdr->pd_upper += size_diff;
    1471             : 
    1472             :         /* adjust affected line pointers too */
    1473    14811562 :         for (i = FirstOffsetNumber; i <= itemcount; i++)
    1474             :         {
    1475    14722572 :             ItemId      ii = PageGetItemId(page, i);
    1476             : 
    1477             :             /* Allow items without storage; currently only BRIN needs that */
    1478    14722572 :             if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
    1479     7169822 :                 ii->lp_off += size_diff;
    1480             :         }
    1481             :     }
    1482             : 
    1483             :     /* Update the item's tuple length without changing its lp_flags field */
    1484      903464 :     tupid->lp_off = offset + size_diff;
    1485      903464 :     tupid->lp_len = newsize;
    1486             : 
    1487             :     /* Copy new tuple data onto page */
    1488      903464 :     memcpy(PageGetItem(page, tupid), newtup, newsize);
    1489             : 
    1490      903464 :     return true;
    1491             : }
    1492             : 
    1493             : 
    1494             : /*
    1495             :  * Set checksum for a page in shared buffers.
    1496             :  *
    1497             :  * If checksums are disabled, or if the page is not initialized, just return
    1498             :  * the input.  Otherwise, we must make a copy of the page before calculating
    1499             :  * the checksum, to prevent concurrent modifications (e.g. setting hint bits)
    1500             :  * from making the final checksum invalid.  It doesn't matter if we include or
    1501             :  * exclude hints during the copy, as long as we write a valid page and
    1502             :  * associated checksum.
    1503             :  *
    1504             :  * Returns a pointer to the block-sized data that needs to be written. Uses
    1505             :  * statically-allocated memory, so the caller must immediately write the
    1506             :  * returned page and not refer to it again.
    1507             :  */
    1508             : char *
    1509     1009866 : PageSetChecksumCopy(Page page, BlockNumber blkno)
    1510             : {
    1511             :     static char *pageCopy = NULL;
    1512             : 
    1513             :     /* If we don't need a checksum, just return the passed-in data */
    1514     1009866 :     if (PageIsNew(page) || !DataChecksumsEnabled())
    1515       21384 :         return page;
    1516             : 
    1517             :     /*
    1518             :      * We allocate the copy space once and use it over on each subsequent
    1519             :      * call.  The point of palloc'ing here, rather than having a static char
    1520             :      * array, is first to ensure adequate alignment for the checksumming code
    1521             :      * and second to avoid wasting space in processes that never call this.
    1522             :      */
    1523      988482 :     if (pageCopy == NULL)
    1524        5124 :         pageCopy = MemoryContextAllocAligned(TopMemoryContext,
    1525             :                                              BLCKSZ,
    1526             :                                              PG_IO_ALIGN_SIZE,
    1527             :                                              0);
    1528             : 
    1529      988482 :     memcpy(pageCopy, page, BLCKSZ);
    1530      988482 :     ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno);
    1531      988482 :     return pageCopy;
    1532             : }
    1533             : 
    1534             : /*
    1535             :  * Set checksum for a page in private memory.
    1536             :  *
    1537             :  * This must only be used when we know that no other process can be modifying
    1538             :  * the page buffer.
    1539             :  */
    1540             : void
    1541      119370 : PageSetChecksumInplace(Page page, BlockNumber blkno)
    1542             : {
    1543             :     /* If we don't need a checksum, just return */
    1544      119370 :     if (PageIsNew(page) || !DataChecksumsEnabled())
    1545        3666 :         return;
    1546             : 
    1547      115704 :     ((PageHeader) page)->pd_checksum = pg_checksum_page(page, blkno);
    1548             : }

Generated by: LCOV version 1.14