LCOV - code coverage report
Current view: top level - src/backend/storage/page - bufpage.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 409 447 91.5 %
Date: 2024-02-28 21:11:04 Functions: 20 20 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * bufpage.c
       4             :  *    POSTGRES standard buffer page code.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/storage/page/bufpage.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/htup_details.h"
      18             : #include "access/itup.h"
      19             : #include "access/xlog.h"
      20             : #include "pgstat.h"
      21             : #include "storage/checksum.h"
      22             : #include "utils/memdebug.h"
      23             : #include "utils/memutils.h"
      24             : 
      25             : 
      26             : /* GUC variable */
      27             : bool        ignore_checksum_failure = false;
      28             : 
      29             : 
      30             : /* ----------------------------------------------------------------
      31             :  *                      Page support functions
      32             :  * ----------------------------------------------------------------
      33             :  */
      34             : 
      35             : /*
      36             :  * PageInit
      37             :  *      Initializes the contents of a page.
      38             :  *      Note that we don't calculate an initial checksum here; that's not done
      39             :  *      until it's time to write.
      40             :  */
      41             : void
      42      596280 : PageInit(Page page, Size pageSize, Size specialSize)
      43             : {
      44      596280 :     PageHeader  p = (PageHeader) page;
      45             : 
      46      596280 :     specialSize = MAXALIGN(specialSize);
      47             : 
      48             :     Assert(pageSize == BLCKSZ);
      49             :     Assert(pageSize > specialSize + SizeOfPageHeaderData);
      50             : 
      51             :     /* Make sure all fields of page are zero, as well as unused space */
      52      596280 :     MemSet(p, 0, pageSize);
      53             : 
      54      596280 :     p->pd_flags = 0;
      55      596280 :     p->pd_lower = SizeOfPageHeaderData;
      56      596280 :     p->pd_upper = pageSize - specialSize;
      57      596280 :     p->pd_special = pageSize - specialSize;
      58      596280 :     PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION);
      59             :     /* p->pd_prune_xid = InvalidTransactionId;       done by above MemSet */
      60      596280 : }
      61             : 
      62             : 
      63             : /*
      64             :  * PageIsVerifiedExtended
      65             :  *      Check that the page header and checksum (if any) appear valid.
      66             :  *
      67             :  * This is called when a page has just been read in from disk.  The idea is
      68             :  * to cheaply detect trashed pages before we go nuts following bogus line
      69             :  * pointers, testing invalid transaction identifiers, etc.
      70             :  *
      71             :  * It turns out to be necessary to allow zeroed pages here too.  Even though
      72             :  * this routine is *not* called when deliberately adding a page to a relation,
      73             :  * there are scenarios in which a zeroed page might be found in a table.
      74             :  * (Example: a backend extends a relation, then crashes before it can write
      75             :  * any WAL entry about the new page.  The kernel will already have the
      76             :  * zeroed page in the file, and it will stay that way after restart.)  So we
      77             :  * allow zeroed pages here, and are careful that the page access macros
      78             :  * treat such a page as empty and without free space.  Eventually, VACUUM
      79             :  * will clean up such a page and make it usable.
      80             :  *
      81             :  * If flag PIV_LOG_WARNING is set, a WARNING is logged in the event of
      82             :  * a checksum failure.
      83             :  *
      84             :  * If flag PIV_REPORT_STAT is set, a checksum failure is reported directly
      85             :  * to pgstat.
      86             :  */
      87             : bool
      88     2097962 : PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
      89             : {
      90     2097962 :     PageHeader  p = (PageHeader) page;
      91             :     size_t     *pagebytes;
      92             :     int         i;
      93     2097962 :     bool        checksum_failure = false;
      94     2097962 :     bool        header_sane = false;
      95     2097962 :     bool        all_zeroes = false;
      96     2097962 :     uint16      checksum = 0;
      97             : 
      98             :     /*
      99             :      * Don't verify page data unless the page passes basic non-zero test
     100             :      */
     101     2097962 :     if (!PageIsNew(page))
     102             :     {
     103     2092732 :         if (DataChecksumsEnabled())
     104             :         {
     105        3734 :             checksum = pg_checksum_page((char *) page, blkno);
     106             : 
     107        3734 :             if (checksum != p->pd_checksum)
     108           0 :                 checksum_failure = true;
     109             :         }
     110             : 
     111             :         /*
     112             :          * The following checks don't prove the header is correct, only that
     113             :          * it looks sane enough to allow into the buffer pool. Later usage of
     114             :          * the block can still reveal problems, which is why we offer the
     115             :          * checksum option.
     116             :          */
     117     2092732 :         if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
     118     2092732 :             p->pd_lower <= p->pd_upper &&
     119     2092732 :             p->pd_upper <= p->pd_special &&
     120     2092732 :             p->pd_special <= BLCKSZ &&
     121     2092732 :             p->pd_special == MAXALIGN(p->pd_special))
     122     2092732 :             header_sane = true;
     123             : 
     124     2092732 :         if (header_sane && !checksum_failure)
     125     2092732 :             return true;
     126             :     }
     127             : 
     128             :     /* Check all-zeroes case */
     129        5230 :     all_zeroes = true;
     130        5230 :     pagebytes = (size_t *) page;
     131     5360750 :     for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++)
     132             :     {
     133     5355520 :         if (pagebytes[i] != 0)
     134             :         {
     135           0 :             all_zeroes = false;
     136           0 :             break;
     137             :         }
     138             :     }
     139             : 
     140        5230 :     if (all_zeroes)
     141        5230 :         return true;
     142             : 
     143             :     /*
     144             :      * Throw a WARNING if the checksum fails, but only after we've checked for
     145             :      * the all-zeroes case.
     146             :      */
     147           0 :     if (checksum_failure)
     148             :     {
     149           0 :         if ((flags & PIV_LOG_WARNING) != 0)
     150           0 :             ereport(WARNING,
     151             :                     (errcode(ERRCODE_DATA_CORRUPTED),
     152             :                      errmsg("page verification failed, calculated checksum %u but expected %u",
     153             :                             checksum, p->pd_checksum)));
     154             : 
     155           0 :         if ((flags & PIV_REPORT_STAT) != 0)
     156           0 :             pgstat_report_checksum_failure();
     157             : 
     158           0 :         if (header_sane && ignore_checksum_failure)
     159           0 :             return true;
     160             :     }
     161             : 
     162           0 :     return false;
     163             : }
     164             : 
     165             : 
     166             : /*
     167             :  *  PageAddItemExtended
     168             :  *
     169             :  *  Add an item to a page.  Return value is the offset at which it was
     170             :  *  inserted, or InvalidOffsetNumber if the item is not inserted for any
     171             :  *  reason.  A WARNING is issued indicating the reason for the refusal.
     172             :  *
     173             :  *  offsetNumber must be either InvalidOffsetNumber to specify finding a
     174             :  *  free line pointer, or a value between FirstOffsetNumber and one past
     175             :  *  the last existing item, to specify using that particular line pointer.
     176             :  *
     177             :  *  If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
     178             :  *  the item at the specified offsetNumber, which must be either a
     179             :  *  currently-unused line pointer, or one past the last existing item.
     180             :  *
     181             :  *  If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
     182             :  *  the item at the specified offsetNumber, moving existing items later
     183             :  *  in the array to make room.
     184             :  *
     185             :  *  If offsetNumber is not valid, then assign a slot by finding the first
     186             :  *  one that is both unused and deallocated.
     187             :  *
     188             :  *  If flag PAI_IS_HEAP is set, we enforce that there can't be more than
     189             :  *  MaxHeapTuplesPerPage line pointers on the page.
     190             :  *
     191             :  *  !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
     192             :  */
     193             : OffsetNumber
     194    60626004 : PageAddItemExtended(Page page,
     195             :                     Item item,
     196             :                     Size size,
     197             :                     OffsetNumber offsetNumber,
     198             :                     int flags)
     199             : {
     200    60626004 :     PageHeader  phdr = (PageHeader) page;
     201             :     Size        alignedSize;
     202             :     int         lower;
     203             :     int         upper;
     204             :     ItemId      itemId;
     205             :     OffsetNumber limit;
     206    60626004 :     bool        needshuffle = false;
     207             : 
     208             :     /*
     209             :      * Be wary about corrupted page pointers
     210             :      */
     211    60626004 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
     212    60626004 :         phdr->pd_lower > phdr->pd_upper ||
     213    60626004 :         phdr->pd_upper > phdr->pd_special ||
     214    60626004 :         phdr->pd_special > BLCKSZ)
     215           0 :         ereport(PANIC,
     216             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     217             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
     218             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
     219             : 
     220             :     /*
     221             :      * Select offsetNumber to place the new item at
     222             :      */
     223    60626004 :     limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
     224             : 
     225             :     /* was offsetNumber passed in? */
     226    60626004 :     if (OffsetNumberIsValid(offsetNumber))
     227             :     {
     228             :         /* yes, check it */
     229    39770834 :         if ((flags & PAI_OVERWRITE) != 0)
     230             :         {
     231     3070604 :             if (offsetNumber < limit)
     232             :             {
     233       20920 :                 itemId = PageGetItemId(page, offsetNumber);
     234       20920 :                 if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
     235             :                 {
     236           0 :                     elog(WARNING, "will not overwrite a used ItemId");
     237           0 :                     return InvalidOffsetNumber;
     238             :                 }
     239             :             }
     240             :         }
     241             :         else
     242             :         {
     243    36700230 :             if (offsetNumber < limit)
     244     5485156 :                 needshuffle = true; /* need to move existing linp's */
     245             :         }
     246             :     }
     247             :     else
     248             :     {
     249             :         /* offsetNumber was not passed in, so find a free slot */
     250             :         /* if no free slot, we'll put it at limit (1st open slot) */
     251    20855170 :         if (PageHasFreeLinePointers(page))
     252             :         {
     253             :             /*
     254             :              * Scan line pointer array to locate a "recyclable" (unused)
     255             :              * ItemId.
     256             :              *
     257             :              * Always use earlier items first.  PageTruncateLinePointerArray
     258             :              * can only truncate unused items when they appear as a contiguous
     259             :              * group at the end of the line pointer array.
     260             :              */
     261    15883742 :             for (offsetNumber = FirstOffsetNumber;
     262             :                  offsetNumber < limit;   /* limit is maxoff+1 */
     263    15670740 :                  offsetNumber++)
     264             :             {
     265    15873306 :                 itemId = PageGetItemId(page, offsetNumber);
     266             : 
     267             :                 /*
     268             :                  * We check for no storage as well, just to be paranoid;
     269             :                  * unused items should never have storage.  Assert() that the
     270             :                  * invariant is respected too.
     271             :                  */
     272             :                 Assert(ItemIdIsUsed(itemId) || !ItemIdHasStorage(itemId));
     273             : 
     274    15873306 :                 if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
     275      202566 :                     break;
     276             :             }
     277      213002 :             if (offsetNumber >= limit)
     278             :             {
     279             :                 /* the hint is wrong, so reset it */
     280       10436 :                 PageClearHasFreeLinePointers(page);
     281             :             }
     282             :         }
     283             :         else
     284             :         {
     285             :             /* don't bother searching if hint says there's no free slot */
     286    20642168 :             offsetNumber = limit;
     287             :         }
     288             :     }
     289             : 
     290             :     /* Reject placing items beyond the first unused line pointer */
     291    60626004 :     if (offsetNumber > limit)
     292             :     {
     293           0 :         elog(WARNING, "specified item offset is too large");
     294           0 :         return InvalidOffsetNumber;
     295             :     }
     296             : 
     297             :     /* Reject placing items beyond heap boundary, if heap */
     298    60626004 :     if ((flags & PAI_IS_HEAP) != 0 && offsetNumber > MaxHeapTuplesPerPage)
     299             :     {
     300           0 :         elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
     301           0 :         return InvalidOffsetNumber;
     302             :     }
     303             : 
     304             :     /*
     305             :      * Compute new lower and upper pointers for page, see if it'll fit.
     306             :      *
     307             :      * Note: do arithmetic as signed ints, to avoid mistakes if, say,
     308             :      * alignedSize > pd_upper.
     309             :      */
     310    60626004 :     if (offsetNumber == limit || needshuffle)
     311    60402518 :         lower = phdr->pd_lower + sizeof(ItemIdData);
     312             :     else
     313      223486 :         lower = phdr->pd_lower;
     314             : 
     315    60626004 :     alignedSize = MAXALIGN(size);
     316             : 
     317    60626004 :     upper = (int) phdr->pd_upper - (int) alignedSize;
     318             : 
     319    60626004 :     if (lower > upper)
     320           0 :         return InvalidOffsetNumber;
     321             : 
     322             :     /*
     323             :      * OK to insert the item.  First, shuffle the existing pointers if needed.
     324             :      */
     325    60626004 :     itemId = PageGetItemId(page, offsetNumber);
     326             : 
     327    60626004 :     if (needshuffle)
     328     5485156 :         memmove(itemId + 1, itemId,
     329     5485156 :                 (limit - offsetNumber) * sizeof(ItemIdData));
     330             : 
     331             :     /* set the line pointer */
     332    60626004 :     ItemIdSetNormal(itemId, upper, size);
     333             : 
     334             :     /*
     335             :      * Items normally contain no uninitialized bytes.  Core bufpage consumers
     336             :      * conform, but this is not a necessary coding rule; a new index AM could
     337             :      * opt to depart from it.  However, data type input functions and other
     338             :      * C-language functions that synthesize datums should initialize all
     339             :      * bytes; datumIsEqual() relies on this.  Testing here, along with the
     340             :      * similar check in printtup(), helps to catch such mistakes.
     341             :      *
     342             :      * Values of the "name" type retrieved via index-only scans may contain
     343             :      * uninitialized bytes; see comment in btrescan().  Valgrind will report
     344             :      * this as an error, but it is safe to ignore.
     345             :      */
     346             :     VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
     347             : 
     348             :     /* copy the item's data onto the page */
     349    60626004 :     memcpy((char *) page + upper, item, size);
     350             : 
     351             :     /* adjust page header */
     352    60626004 :     phdr->pd_lower = (LocationIndex) lower;
     353    60626004 :     phdr->pd_upper = (LocationIndex) upper;
     354             : 
     355    60626004 :     return offsetNumber;
     356             : }
     357             : 
     358             : 
     359             : /*
     360             :  * PageGetTempPage
     361             :  *      Get a temporary page in local memory for special processing.
     362             :  *      The returned page is not initialized at all; caller must do that.
     363             :  */
     364             : Page
     365       18904 : PageGetTempPage(Page page)
     366             : {
     367             :     Size        pageSize;
     368             :     Page        temp;
     369             : 
     370       18904 :     pageSize = PageGetPageSize(page);
     371       18904 :     temp = (Page) palloc(pageSize);
     372             : 
     373       18904 :     return temp;
     374             : }
     375             : 
     376             : /*
     377             :  * PageGetTempPageCopy
     378             :  *      Get a temporary page in local memory for special processing.
     379             :  *      The page is initialized by copying the contents of the given page.
     380             :  */
     381             : Page
     382        8818 : PageGetTempPageCopy(Page page)
     383             : {
     384             :     Size        pageSize;
     385             :     Page        temp;
     386             : 
     387        8818 :     pageSize = PageGetPageSize(page);
     388        8818 :     temp = (Page) palloc(pageSize);
     389             : 
     390        8818 :     memcpy(temp, page, pageSize);
     391             : 
     392        8818 :     return temp;
     393             : }
     394             : 
     395             : /*
     396             :  * PageGetTempPageCopySpecial
     397             :  *      Get a temporary page in local memory for special processing.
     398             :  *      The page is PageInit'd with the same special-space size as the
     399             :  *      given page, and the special space is copied from the given page.
     400             :  */
     401             : Page
     402       54892 : PageGetTempPageCopySpecial(Page page)
     403             : {
     404             :     Size        pageSize;
     405             :     Page        temp;
     406             : 
     407       54892 :     pageSize = PageGetPageSize(page);
     408       54892 :     temp = (Page) palloc(pageSize);
     409             : 
     410       54892 :     PageInit(temp, pageSize, PageGetSpecialSize(page));
     411       54892 :     memcpy(PageGetSpecialPointer(temp),
     412       54892 :            PageGetSpecialPointer(page),
     413       54892 :            PageGetSpecialSize(page));
     414             : 
     415       54892 :     return temp;
     416             : }
     417             : 
     418             : /*
     419             :  * PageRestoreTempPage
     420             :  *      Copy temporary page back to permanent page after special processing
     421             :  *      and release the temporary page.
     422             :  */
     423             : void
     424       71668 : PageRestoreTempPage(Page tempPage, Page oldPage)
     425             : {
     426             :     Size        pageSize;
     427             : 
     428       71668 :     pageSize = PageGetPageSize(tempPage);
     429       71668 :     memcpy((char *) oldPage, (char *) tempPage, pageSize);
     430             : 
     431       71668 :     pfree(tempPage);
     432       71668 : }
     433             : 
     434             : /*
     435             :  * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
     436             :  */
     437             : typedef struct itemIdCompactData
     438             : {
     439             :     uint16      offsetindex;    /* linp array index */
     440             :     int16       itemoff;        /* page offset of item data */
     441             :     uint16      alignedlen;     /* MAXALIGN(item data len) */
     442             : } itemIdCompactData;
     443             : typedef itemIdCompactData *itemIdCompact;
     444             : 
     445             : /*
     446             :  * After removing or marking some line pointers unused, move the tuples to
     447             :  * remove the gaps caused by the removed items and reorder them back into
     448             :  * reverse line pointer order in the page.
     449             :  *
     450             :  * This function can often be fairly hot, so it pays to take some measures to
     451             :  * make it as optimal as possible.
     452             :  *
     453             :  * Callers may pass 'presorted' as true if the 'itemidbase' array is sorted in
     454             :  * descending order of itemoff.  When this is true we can just memmove()
     455             :  * tuples towards the end of the page.  This is quite a common case as it's
     456             :  * the order that tuples are initially inserted into pages.  When we call this
     457             :  * function to defragment the tuples in the page then any new line pointers
     458             :  * added to the page will keep that presorted order, so hitting this case is
     459             :  * still very common for tables that are commonly updated.
     460             :  *
     461             :  * When the 'itemidbase' array is not presorted then we're unable to just
     462             :  * memmove() tuples around freely.  Doing so could cause us to overwrite the
     463             :  * memory belonging to a tuple we've not moved yet.  In this case, we copy all
     464             :  * the tuples that need to be moved into a temporary buffer.  We can then
     465             :  * simply memcpy() out of that temp buffer back into the page at the correct
     466             :  * location.  Tuples are copied back into the page in the same order as the
     467             :  * 'itemidbase' array, so we end up reordering the tuples back into reverse
     468             :  * line pointer order.  This will increase the chances of hitting the
     469             :  * presorted case the next time around.
     470             :  *
     471             :  * Callers must ensure that nitems is > 0
     472             :  */
     473             : static void
     474       99832 : compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
     475             : {
     476       99832 :     PageHeader  phdr = (PageHeader) page;
     477             :     Offset      upper;
     478             :     Offset      copy_tail;
     479             :     Offset      copy_head;
     480             :     itemIdCompact itemidptr;
     481             :     int         i;
     482             : 
     483             :     /* Code within will not work correctly if nitems == 0 */
     484             :     Assert(nitems > 0);
     485             : 
     486       99832 :     if (presorted)
     487             :     {
     488             : 
     489             : #ifdef USE_ASSERT_CHECKING
     490             :         {
     491             :             /*
     492             :              * Verify we've not gotten any new callers that are incorrectly
     493             :              * passing a true presorted value.
     494             :              */
     495             :             Offset      lastoff = phdr->pd_special;
     496             : 
     497             :             for (i = 0; i < nitems; i++)
     498             :             {
     499             :                 itemidptr = &itemidbase[i];
     500             : 
     501             :                 Assert(lastoff > itemidptr->itemoff);
     502             : 
     503             :                 lastoff = itemidptr->itemoff;
     504             :             }
     505             :         }
     506             : #endif                          /* USE_ASSERT_CHECKING */
     507             : 
     508             :         /*
     509             :          * 'itemidbase' is already in the optimal order, i.e, lower item
     510             :          * pointers have a higher offset.  This allows us to memmove() the
     511             :          * tuples up to the end of the page without having to worry about
     512             :          * overwriting other tuples that have not been moved yet.
     513             :          *
     514             :          * There's a good chance that there are tuples already right at the
     515             :          * end of the page that we can simply skip over because they're
     516             :          * already in the correct location within the page.  We'll do that
     517             :          * first...
     518             :          */
     519       74404 :         upper = phdr->pd_special;
     520       74404 :         i = 0;
     521             :         do
     522             :         {
     523     1164646 :             itemidptr = &itemidbase[i];
     524     1164646 :             if (upper != itemidptr->itemoff + itemidptr->alignedlen)
     525       67594 :                 break;
     526     1097052 :             upper -= itemidptr->alignedlen;
     527             : 
     528     1097052 :             i++;
     529     1097052 :         } while (i < nitems);
     530             : 
     531             :         /*
     532             :          * Now that we've found the first tuple that needs to be moved, we can
     533             :          * do the tuple compactification.  We try and make the least number of
     534             :          * memmove() calls and only call memmove() when there's a gap.  When
     535             :          * we see a gap we just move all tuples after the gap up until the
     536             :          * point of the last move operation.
     537             :          */
     538       74404 :         copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
     539     1742390 :         for (; i < nitems; i++)
     540             :         {
     541             :             ItemId      lp;
     542             : 
     543     1667986 :             itemidptr = &itemidbase[i];
     544     1667986 :             lp = PageGetItemId(page, itemidptr->offsetindex + 1);
     545             : 
     546     1667986 :             if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
     547             :             {
     548      176702 :                 memmove((char *) page + upper,
     549      176702 :                         page + copy_head,
     550      176702 :                         copy_tail - copy_head);
     551             : 
     552             :                 /*
     553             :                  * We've now moved all tuples already seen, but not the
     554             :                  * current tuple, so we set the copy_tail to the end of this
     555             :                  * tuple so it can be moved in another iteration of the loop.
     556             :                  */
     557      176702 :                 copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
     558             :             }
     559             :             /* shift the target offset down by the length of this tuple */
     560     1667986 :             upper -= itemidptr->alignedlen;
     561             :             /* point the copy_head to the start of this tuple */
     562     1667986 :             copy_head = itemidptr->itemoff;
     563             : 
     564             :             /* update the line pointer to reference the new offset */
     565     1667986 :             lp->lp_off = upper;
     566             :         }
     567             : 
     568             :         /* move the remaining tuples. */
     569       74404 :         memmove((char *) page + upper,
     570       74404 :                 page + copy_head,
     571       74404 :                 copy_tail - copy_head);
     572             :     }
     573             :     else
     574             :     {
     575             :         PGAlignedBlock scratch;
     576       25428 :         char       *scratchptr = scratch.data;
     577             : 
     578             :         /*
     579             :          * Non-presorted case:  The tuples in the itemidbase array may be in
     580             :          * any order.  So, in order to move these to the end of the page we
     581             :          * must make a temp copy of each tuple that needs to be moved before
     582             :          * we copy them back into the page at the new offset.
     583             :          *
     584             :          * If a large percentage of tuples have been pruned (>75%) then we'll
     585             :          * copy these into the temp buffer tuple-by-tuple, otherwise, we'll
     586             :          * just do a single memcpy() for all tuples that need to be moved.
     587             :          * When so many tuples have been removed there's likely to be a lot of
     588             :          * gaps and it's unlikely that many non-movable tuples remain at the
     589             :          * end of the page.
     590             :          */
     591       25428 :         if (nitems < PageGetMaxOffsetNumber(page) / 4)
     592             :         {
     593        1078 :             i = 0;
     594             :             do
     595             :             {
     596       20622 :                 itemidptr = &itemidbase[i];
     597       20622 :                 memcpy(scratchptr + itemidptr->itemoff, page + itemidptr->itemoff,
     598       20622 :                        itemidptr->alignedlen);
     599       20622 :                 i++;
     600       20622 :             } while (i < nitems);
     601             : 
     602             :             /* Set things up for the compactification code below */
     603        1078 :             i = 0;
     604        1078 :             itemidptr = &itemidbase[0];
     605        1078 :             upper = phdr->pd_special;
     606             :         }
     607             :         else
     608             :         {
     609       24350 :             upper = phdr->pd_special;
     610             : 
     611             :             /*
     612             :              * Many tuples are likely to already be in the correct location.
     613             :              * There's no need to copy these into the temp buffer.  Instead
     614             :              * we'll just skip forward in the itemidbase array to the position
     615             :              * that we do need to move tuples from so that the code below just
     616             :              * leaves these ones alone.
     617             :              */
     618       24350 :             i = 0;
     619             :             do
     620             :             {
     621      622406 :                 itemidptr = &itemidbase[i];
     622      622406 :                 if (upper != itemidptr->itemoff + itemidptr->alignedlen)
     623       24350 :                     break;
     624      598056 :                 upper -= itemidptr->alignedlen;
     625             : 
     626      598056 :                 i++;
     627      598056 :             } while (i < nitems);
     628             : 
     629             :             /* Copy all tuples that need to be moved into the temp buffer */
     630       24350 :             memcpy(scratchptr + phdr->pd_upper,
     631       24350 :                    page + phdr->pd_upper,
     632       24350 :                    upper - phdr->pd_upper);
     633             :         }
     634             : 
     635             :         /*
     636             :          * Do the tuple compactification.  itemidptr is already pointing to
     637             :          * the first tuple that we're going to move.  Here we collapse the
     638             :          * memcpy calls for adjacent tuples into a single call.  This is done
     639             :          * by delaying the memcpy call until we find a gap that needs to be
     640             :          * closed.
     641             :          */
     642       25428 :         copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
     643     2896458 :         for (; i < nitems; i++)
     644             :         {
     645             :             ItemId      lp;
     646             : 
     647     2871030 :             itemidptr = &itemidbase[i];
     648     2871030 :             lp = PageGetItemId(page, itemidptr->offsetindex + 1);
     649             : 
     650             :             /* copy pending tuples when we detect a gap */
     651     2871030 :             if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
     652             :             {
     653      788232 :                 memcpy((char *) page + upper,
     654      788232 :                        scratchptr + copy_head,
     655      788232 :                        copy_tail - copy_head);
     656             : 
     657             :                 /*
     658             :                  * We've now copied all tuples already seen, but not the
     659             :                  * current tuple, so we set the copy_tail to the end of this
     660             :                  * tuple.
     661             :                  */
     662      788232 :                 copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
     663             :             }
     664             :             /* shift the target offset down by the length of this tuple */
     665     2871030 :             upper -= itemidptr->alignedlen;
     666             :             /* point the copy_head to the start of this tuple */
     667     2871030 :             copy_head = itemidptr->itemoff;
     668             : 
     669             :             /* update the line pointer to reference the new offset */
     670     2871030 :             lp->lp_off = upper;
     671             :         }
     672             : 
     673             :         /* Copy the remaining chunk */
     674       25428 :         memcpy((char *) page + upper,
     675       25428 :                scratchptr + copy_head,
     676       25428 :                copy_tail - copy_head);
     677             :     }
     678             : 
     679       99832 :     phdr->pd_upper = upper;
     680       99832 : }
     681             : 
     682             : /*
     683             :  * PageRepairFragmentation
     684             :  *
     685             :  * Frees fragmented space on a heap page following pruning.
     686             :  *
     687             :  * This routine is usable for heap pages only, but see PageIndexMultiDelete.
     688             :  *
     689             :  * This routine removes unused line pointers from the end of the line pointer
     690             :  * array.  This is possible when dead heap-only tuples get removed by pruning,
     691             :  * especially when there were HOT chains with several tuples each beforehand.
     692             :  *
     693             :  * Caller had better have a full cleanup lock on page's buffer.  As a side
     694             :  * effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
     695             :  * needed.  Caller might also need to account for a reduction in the length of
     696             :  * the line pointer array following array truncation.
     697             :  */
     698             : void
     699       89902 : PageRepairFragmentation(Page page)
     700             : {
     701       89902 :     Offset      pd_lower = ((PageHeader) page)->pd_lower;
     702       89902 :     Offset      pd_upper = ((PageHeader) page)->pd_upper;
     703       89902 :     Offset      pd_special = ((PageHeader) page)->pd_special;
     704             :     Offset      last_offset;
     705             :     itemIdCompactData itemidbase[MaxHeapTuplesPerPage];
     706             :     itemIdCompact itemidptr;
     707             :     ItemId      lp;
     708             :     int         nline,
     709             :                 nstorage,
     710             :                 nunused;
     711       89902 :     OffsetNumber finalusedlp = InvalidOffsetNumber;
     712             :     int         i;
     713             :     Size        totallen;
     714       89902 :     bool        presorted = true;   /* For now */
     715             : 
     716             :     /*
     717             :      * It's worth the trouble to be more paranoid here than in most places,
     718             :      * because we are about to reshuffle data in (what is usually) a shared
     719             :      * disk buffer.  If we aren't careful then corrupted pointers, lengths,
     720             :      * etc could cause us to clobber adjacent disk buffers, spreading the data
     721             :      * loss further.  So, check everything.
     722             :      */
     723       89902 :     if (pd_lower < SizeOfPageHeaderData ||
     724       89902 :         pd_lower > pd_upper ||
     725       89902 :         pd_upper > pd_special ||
     726       89902 :         pd_special > BLCKSZ ||
     727       89902 :         pd_special != MAXALIGN(pd_special))
     728           0 :         ereport(ERROR,
     729             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     730             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
     731             :                         pd_lower, pd_upper, pd_special)));
     732             : 
     733             :     /*
     734             :      * Run through the line pointer array and collect data about live items.
     735             :      */
     736       89902 :     nline = PageGetMaxOffsetNumber(page);
     737       89902 :     itemidptr = itemidbase;
     738       89902 :     nunused = totallen = 0;
     739       89902 :     last_offset = pd_special;
     740     7810414 :     for (i = FirstOffsetNumber; i <= nline; i++)
     741             :     {
     742     7720512 :         lp = PageGetItemId(page, i);
     743     7720512 :         if (ItemIdIsUsed(lp))
     744             :         {
     745     7467904 :             if (ItemIdHasStorage(lp))
     746             :             {
     747     2747000 :                 itemidptr->offsetindex = i - 1;
     748     2747000 :                 itemidptr->itemoff = ItemIdGetOffset(lp);
     749             : 
     750     2747000 :                 if (last_offset > itemidptr->itemoff)
     751     2254678 :                     last_offset = itemidptr->itemoff;
     752             :                 else
     753      492322 :                     presorted = false;
     754             : 
     755     2747000 :                 if (unlikely(itemidptr->itemoff < (int) pd_upper ||
     756             :                              itemidptr->itemoff >= (int) pd_special))
     757           0 :                     ereport(ERROR,
     758             :                             (errcode(ERRCODE_DATA_CORRUPTED),
     759             :                              errmsg("corrupted line pointer: %u",
     760             :                                     itemidptr->itemoff)));
     761     2747000 :                 itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
     762     2747000 :                 totallen += itemidptr->alignedlen;
     763     2747000 :                 itemidptr++;
     764             :             }
     765             : 
     766     7467904 :             finalusedlp = i;    /* Could be the final non-LP_UNUSED item */
     767             :         }
     768             :         else
     769             :         {
     770             :             /* Unused entries should have lp_len = 0, but make sure */
     771             :             Assert(!ItemIdHasStorage(lp));
     772      252608 :             ItemIdSetUnused(lp);
     773      252608 :             nunused++;
     774             :         }
     775             :     }
     776             : 
     777       89902 :     nstorage = itemidptr - itemidbase;
     778       89902 :     if (nstorage == 0)
     779             :     {
     780             :         /* Page is completely empty, so just reset it quickly */
     781       19484 :         ((PageHeader) page)->pd_upper = pd_special;
     782             :     }
     783             :     else
     784             :     {
     785             :         /* Need to compact the page the hard way */
     786       70418 :         if (totallen > (Size) (pd_special - pd_lower))
     787           0 :             ereport(ERROR,
     788             :                     (errcode(ERRCODE_DATA_CORRUPTED),
     789             :                      errmsg("corrupted item lengths: total %u, available space %u",
     790             :                             (unsigned int) totallen, pd_special - pd_lower)));
     791             : 
     792       70418 :         compactify_tuples(itemidbase, nstorage, page, presorted);
     793             :     }
     794             : 
     795       89902 :     if (finalusedlp != nline)
     796             :     {
     797             :         /* The last line pointer is not the last used line pointer */
     798        2738 :         int         nunusedend = nline - finalusedlp;
     799             : 
     800             :         Assert(nunused >= nunusedend && nunusedend > 0);
     801             : 
     802             :         /* remove trailing unused line pointers from the count */
     803        2738 :         nunused -= nunusedend;
     804             :         /* truncate the line pointer array */
     805        2738 :         ((PageHeader) page)->pd_lower -= (sizeof(ItemIdData) * nunusedend);
     806             :     }
     807             : 
     808             :     /* Set hint bit for PageAddItemExtended */
     809       89902 :     if (nunused > 0)
     810       21120 :         PageSetHasFreeLinePointers(page);
     811             :     else
     812       68782 :         PageClearHasFreeLinePointers(page);
     813       89902 : }
     814             : 
     815             : /*
     816             :  * PageTruncateLinePointerArray
     817             :  *
     818             :  * Removes unused line pointers at the end of the line pointer array.
     819             :  *
     820             :  * This routine is usable for heap pages only.  It is called by VACUUM during
     821             :  * its second pass over the heap.  We expect at least one LP_UNUSED line
     822             :  * pointer on the page (if VACUUM didn't have an LP_DEAD item on the page that
     823             :  * it just set to LP_UNUSED then it should not call here).
     824             :  *
     825             :  * We avoid truncating the line pointer array to 0 items, if necessary by
     826             :  * leaving behind a single remaining LP_UNUSED item.  This is a little
     827             :  * arbitrary, but it seems like a good idea to avoid leaving a PageIsEmpty()
     828             :  * page behind.
     829             :  *
     830             :  * Caller can have either an exclusive lock or a full cleanup lock on page's
     831             :  * buffer.  The page's PD_HAS_FREE_LINES hint bit will be set or unset based
     832             :  * on whether or not we leave behind any remaining LP_UNUSED items.
     833             :  */
     834             : void
     835       19760 : PageTruncateLinePointerArray(Page page)
     836             : {
     837       19760 :     PageHeader  phdr = (PageHeader) page;
     838       19760 :     bool        countdone = false,
     839       19760 :                 sethint = false;
     840       19760 :     int         nunusedend = 0;
     841             : 
     842             :     /* Scan line pointer array back-to-front */
     843     1378610 :     for (int i = PageGetMaxOffsetNumber(page); i >= FirstOffsetNumber; i--)
     844             :     {
     845     1377924 :         ItemId      lp = PageGetItemId(page, i);
     846             : 
     847     1377924 :         if (!countdone && i > FirstOffsetNumber)
     848             :         {
     849             :             /*
     850             :              * Still determining which line pointers from the end of the array
     851             :              * will be truncated away.  Either count another line pointer as
     852             :              * safe to truncate, or notice that it's not safe to truncate
     853             :              * additional line pointers (stop counting line pointers).
     854             :              */
     855     1266228 :             if (!ItemIdIsUsed(lp))
     856     1257328 :                 nunusedend++;
     857             :             else
     858        8900 :                 countdone = true;
     859             :         }
     860             :         else
     861             :         {
     862             :             /*
     863             :              * Once we've stopped counting we still need to figure out if
     864             :              * there are any remaining LP_UNUSED line pointers somewhere more
     865             :              * towards the front of the array.
     866             :              */
     867      111696 :             if (!ItemIdIsUsed(lp))
     868             :             {
     869             :                 /*
     870             :                  * This is an unused line pointer that we won't be truncating
     871             :                  * away -- so there is at least one.  Set hint on page.
     872             :                  */
     873       19074 :                 sethint = true;
     874       19074 :                 break;
     875             :             }
     876             :         }
     877             :     }
     878             : 
     879       19760 :     if (nunusedend > 0)
     880             :     {
     881       13028 :         phdr->pd_lower -= sizeof(ItemIdData) * nunusedend;
     882             : 
     883             : #ifdef CLOBBER_FREED_MEMORY
     884             :         memset((char *) page + phdr->pd_lower, 0x7F,
     885             :                sizeof(ItemIdData) * nunusedend);
     886             : #endif
     887             :     }
     888             :     else
     889             :         Assert(sethint);
     890             : 
     891             :     /* Set hint bit for PageAddItemExtended */
     892       19760 :     if (sethint)
     893       19074 :         PageSetHasFreeLinePointers(page);
     894             :     else
     895         686 :         PageClearHasFreeLinePointers(page);
     896       19760 : }
     897             : 
     898             : /*
     899             :  * PageGetFreeSpace
     900             :  *      Returns the size of the free (allocatable) space on a page,
     901             :  *      reduced by the space needed for a new line pointer.
     902             :  *
     903             :  * Note: this should usually only be used on index pages.  Use
     904             :  * PageGetHeapFreeSpace on heap pages.
     905             :  */
     906             : Size
     907    49575960 : PageGetFreeSpace(Page page)
     908             : {
     909             :     int         space;
     910             : 
     911             :     /*
     912             :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     913             :      * pd_upper.
     914             :      */
     915    49575960 :     space = (int) ((PageHeader) page)->pd_upper -
     916    49575960 :         (int) ((PageHeader) page)->pd_lower;
     917             : 
     918    49575960 :     if (space < (int) sizeof(ItemIdData))
     919        9094 :         return 0;
     920    49566866 :     space -= sizeof(ItemIdData);
     921             : 
     922    49566866 :     return (Size) space;
     923             : }
     924             : 
     925             : /*
     926             :  * PageGetFreeSpaceForMultipleTuples
     927             :  *      Returns the size of the free (allocatable) space on a page,
     928             :  *      reduced by the space needed for multiple new line pointers.
     929             :  *
     930             :  * Note: this should usually only be used on index pages.  Use
     931             :  * PageGetHeapFreeSpace on heap pages.
     932             :  */
     933             : Size
     934      127266 : PageGetFreeSpaceForMultipleTuples(Page page, int ntups)
     935             : {
     936             :     int         space;
     937             : 
     938             :     /*
     939             :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     940             :      * pd_upper.
     941             :      */
     942      127266 :     space = (int) ((PageHeader) page)->pd_upper -
     943      127266 :         (int) ((PageHeader) page)->pd_lower;
     944             : 
     945      127266 :     if (space < (int) (ntups * sizeof(ItemIdData)))
     946           0 :         return 0;
     947      127266 :     space -= ntups * sizeof(ItemIdData);
     948             : 
     949      127266 :     return (Size) space;
     950             : }
     951             : 
     952             : /*
     953             :  * PageGetExactFreeSpace
     954             :  *      Returns the size of the free (allocatable) space on a page,
     955             :  *      without any consideration for adding/removing line pointers.
     956             :  */
     957             : Size
     958     3235394 : PageGetExactFreeSpace(Page page)
     959             : {
     960             :     int         space;
     961             : 
     962             :     /*
     963             :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     964             :      * pd_upper.
     965             :      */
     966     3235394 :     space = (int) ((PageHeader) page)->pd_upper -
     967     3235394 :         (int) ((PageHeader) page)->pd_lower;
     968             : 
     969     3235394 :     if (space < 0)
     970           0 :         return 0;
     971             : 
     972     3235394 :     return (Size) space;
     973             : }
     974             : 
     975             : 
     976             : /*
     977             :  * PageGetHeapFreeSpace
     978             :  *      Returns the size of the free (allocatable) space on a page,
     979             :  *      reduced by the space needed for a new line pointer.
     980             :  *
     981             :  * The difference between this and PageGetFreeSpace is that this will return
     982             :  * zero if there are already MaxHeapTuplesPerPage line pointers in the page
     983             :  * and none are free.  We use this to enforce that no more than
     984             :  * MaxHeapTuplesPerPage line pointers are created on a heap page.  (Although
     985             :  * no more tuples than that could fit anyway, in the presence of redirected
     986             :  * or dead line pointers it'd be possible to have too many line pointers.
     987             :  * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
     988             :  * on the number of line pointers, we make this extra check.)
     989             :  */
     990             : Size
     991    23992788 : PageGetHeapFreeSpace(Page page)
     992             : {
     993             :     Size        space;
     994             : 
     995    23992788 :     space = PageGetFreeSpace(page);
     996    23992788 :     if (space > 0)
     997             :     {
     998             :         OffsetNumber offnum,
     999             :                     nline;
    1000             : 
    1001             :         /*
    1002             :          * Are there already MaxHeapTuplesPerPage line pointers in the page?
    1003             :          */
    1004    23973090 :         nline = PageGetMaxOffsetNumber(page);
    1005    23973090 :         if (nline >= MaxHeapTuplesPerPage)
    1006             :         {
    1007        5540 :             if (PageHasFreeLinePointers(page))
    1008             :             {
    1009             :                 /*
    1010             :                  * Since this is just a hint, we must confirm that there is
    1011             :                  * indeed a free line pointer
    1012             :                  */
    1013      533388 :                 for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
    1014             :                 {
    1015      533224 :                     ItemId      lp = PageGetItemId(page, offnum);
    1016             : 
    1017      533224 :                     if (!ItemIdIsUsed(lp))
    1018        2534 :                         break;
    1019             :                 }
    1020             : 
    1021        2698 :                 if (offnum > nline)
    1022             :                 {
    1023             :                     /*
    1024             :                      * The hint is wrong, but we can't clear it here since we
    1025             :                      * don't have the ability to mark the page dirty.
    1026             :                      */
    1027         164 :                     space = 0;
    1028             :                 }
    1029             :             }
    1030             :             else
    1031             :             {
    1032             :                 /*
    1033             :                  * Although the hint might be wrong, PageAddItem will believe
    1034             :                  * it anyway, so we must believe it too.
    1035             :                  */
    1036        2842 :                 space = 0;
    1037             :             }
    1038             :         }
    1039             :     }
    1040    23992788 :     return space;
    1041             : }
    1042             : 
    1043             : 
    1044             : /*
    1045             :  * PageIndexTupleDelete
    1046             :  *
    1047             :  * This routine does the work of removing a tuple from an index page.
    1048             :  *
    1049             :  * Unlike heap pages, we compact out the line pointer for the removed tuple.
    1050             :  */
    1051             : void
    1052      807528 : PageIndexTupleDelete(Page page, OffsetNumber offnum)
    1053             : {
    1054      807528 :     PageHeader  phdr = (PageHeader) page;
    1055             :     char       *addr;
    1056             :     ItemId      tup;
    1057             :     Size        size;
    1058             :     unsigned    offset;
    1059             :     int         nbytes;
    1060             :     int         offidx;
    1061             :     int         nline;
    1062             : 
    1063             :     /*
    1064             :      * As with PageRepairFragmentation, paranoia seems justified.
    1065             :      */
    1066      807528 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1067      807528 :         phdr->pd_lower > phdr->pd_upper ||
    1068      807528 :         phdr->pd_upper > phdr->pd_special ||
    1069      807528 :         phdr->pd_special > BLCKSZ ||
    1070      807528 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1071           0 :         ereport(ERROR,
    1072             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1073             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1074             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1075             : 
    1076      807528 :     nline = PageGetMaxOffsetNumber(page);
    1077      807528 :     if ((int) offnum <= 0 || (int) offnum > nline)
    1078           0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1079             : 
    1080             :     /* change offset number to offset index */
    1081      807528 :     offidx = offnum - 1;
    1082             : 
    1083      807528 :     tup = PageGetItemId(page, offnum);
    1084             :     Assert(ItemIdHasStorage(tup));
    1085      807528 :     size = ItemIdGetLength(tup);
    1086      807528 :     offset = ItemIdGetOffset(tup);
    1087             : 
    1088      807528 :     if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
    1089      807528 :         offset != MAXALIGN(offset))
    1090           0 :         ereport(ERROR,
    1091             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1092             :                  errmsg("corrupted line pointer: offset = %u, size = %u",
    1093             :                         offset, (unsigned int) size)));
    1094             : 
    1095             :     /* Amount of space to actually be deleted */
    1096      807528 :     size = MAXALIGN(size);
    1097             : 
    1098             :     /*
    1099             :      * First, we want to get rid of the pd_linp entry for the index tuple. We
    1100             :      * copy all subsequent linp's back one slot in the array. We don't use
    1101             :      * PageGetItemId, because we are manipulating the _array_, not individual
    1102             :      * linp's.
    1103             :      */
    1104      807528 :     nbytes = phdr->pd_lower -
    1105      807528 :         ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
    1106             : 
    1107      807528 :     if (nbytes > 0)
    1108      789282 :         memmove((char *) &(phdr->pd_linp[offidx]),
    1109      789282 :                 (char *) &(phdr->pd_linp[offidx + 1]),
    1110             :                 nbytes);
    1111             : 
    1112             :     /*
    1113             :      * Now move everything between the old upper bound (beginning of tuple
    1114             :      * space) and the beginning of the deleted tuple forward, so that space in
    1115             :      * the middle of the page is left free.  If we've just deleted the tuple
    1116             :      * at the beginning of tuple space, then there's no need to do the copy.
    1117             :      */
    1118             : 
    1119             :     /* beginning of tuple space */
    1120      807528 :     addr = (char *) page + phdr->pd_upper;
    1121             : 
    1122      807528 :     if (offset > phdr->pd_upper)
    1123      787990 :         memmove(addr + size, addr, offset - phdr->pd_upper);
    1124             : 
    1125             :     /* adjust free space boundary pointers */
    1126      807528 :     phdr->pd_upper += size;
    1127      807528 :     phdr->pd_lower -= sizeof(ItemIdData);
    1128             : 
    1129             :     /*
    1130             :      * Finally, we need to adjust the linp entries that remain.
    1131             :      *
    1132             :      * Anything that used to be before the deleted tuple's data was moved
    1133             :      * forward by the size of the deleted tuple.
    1134             :      */
    1135      807528 :     if (!PageIsEmpty(page))
    1136             :     {
    1137             :         int         i;
    1138             : 
    1139      805898 :         nline--;                /* there's one less than when we started */
    1140   142015064 :         for (i = 1; i <= nline; i++)
    1141             :         {
    1142   141209166 :             ItemId      ii = PageGetItemId(page, i);
    1143             : 
    1144             :             Assert(ItemIdHasStorage(ii));
    1145   141209166 :             if (ItemIdGetOffset(ii) <= offset)
    1146    92612754 :                 ii->lp_off += size;
    1147             :         }
    1148             :     }
    1149      807528 : }
    1150             : 
    1151             : 
    1152             : /*
    1153             :  * PageIndexMultiDelete
    1154             :  *
    1155             :  * This routine handles the case of deleting multiple tuples from an
    1156             :  * index page at once.  It is considerably faster than a loop around
    1157             :  * PageIndexTupleDelete ... however, the caller *must* supply the array
    1158             :  * of item numbers to be deleted in item number order!
    1159             :  */
    1160             : void
    1161       32672 : PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
    1162             : {
    1163       32672 :     PageHeader  phdr = (PageHeader) page;
    1164       32672 :     Offset      pd_lower = phdr->pd_lower;
    1165       32672 :     Offset      pd_upper = phdr->pd_upper;
    1166       32672 :     Offset      pd_special = phdr->pd_special;
    1167             :     Offset      last_offset;
    1168             :     itemIdCompactData itemidbase[MaxIndexTuplesPerPage];
    1169             :     ItemIdData  newitemids[MaxIndexTuplesPerPage];
    1170             :     itemIdCompact itemidptr;
    1171             :     ItemId      lp;
    1172             :     int         nline,
    1173             :                 nused;
    1174             :     Size        totallen;
    1175             :     Size        size;
    1176             :     unsigned    offset;
    1177             :     int         nextitm;
    1178             :     OffsetNumber offnum;
    1179       32672 :     bool        presorted = true;   /* For now */
    1180             : 
    1181             :     Assert(nitems <= MaxIndexTuplesPerPage);
    1182             : 
    1183             :     /*
    1184             :      * If there aren't very many items to delete, then retail
    1185             :      * PageIndexTupleDelete is the best way.  Delete the items in reverse
    1186             :      * order so we don't have to think about adjusting item numbers for
    1187             :      * previous deletions.
    1188             :      *
    1189             :      * TODO: tune the magic number here
    1190             :      */
    1191       32672 :     if (nitems <= 2)
    1192             :     {
    1193        6730 :         while (--nitems >= 0)
    1194        3824 :             PageIndexTupleDelete(page, itemnos[nitems]);
    1195        2906 :         return;
    1196             :     }
    1197             : 
    1198             :     /*
    1199             :      * As with PageRepairFragmentation, paranoia seems justified.
    1200             :      */
    1201       29766 :     if (pd_lower < SizeOfPageHeaderData ||
    1202       29766 :         pd_lower > pd_upper ||
    1203       29766 :         pd_upper > pd_special ||
    1204       29766 :         pd_special > BLCKSZ ||
    1205       29766 :         pd_special != MAXALIGN(pd_special))
    1206           0 :         ereport(ERROR,
    1207             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1208             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1209             :                         pd_lower, pd_upper, pd_special)));
    1210             : 
    1211             :     /*
    1212             :      * Scan the line pointer array and build a list of just the ones we are
    1213             :      * going to keep.  Notice we do not modify the page yet, since we are
    1214             :      * still validity-checking.
    1215             :      */
    1216       29766 :     nline = PageGetMaxOffsetNumber(page);
    1217       29766 :     itemidptr = itemidbase;
    1218       29766 :     totallen = 0;
    1219       29766 :     nused = 0;
    1220       29766 :     nextitm = 0;
    1221       29766 :     last_offset = pd_special;
    1222     6749246 :     for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
    1223             :     {
    1224     6719480 :         lp = PageGetItemId(page, offnum);
    1225             :         Assert(ItemIdHasStorage(lp));
    1226     6719480 :         size = ItemIdGetLength(lp);
    1227     6719480 :         offset = ItemIdGetOffset(lp);
    1228     6719480 :         if (offset < pd_upper ||
    1229     6719480 :             (offset + size) > pd_special ||
    1230     6719480 :             offset != MAXALIGN(offset))
    1231           0 :             ereport(ERROR,
    1232             :                     (errcode(ERRCODE_DATA_CORRUPTED),
    1233             :                      errmsg("corrupted line pointer: offset = %u, size = %u",
    1234             :                             offset, (unsigned int) size)));
    1235             : 
    1236     6719480 :         if (nextitm < nitems && offnum == itemnos[nextitm])
    1237             :         {
    1238             :             /* skip item to be deleted */
    1239     3232356 :             nextitm++;
    1240             :         }
    1241             :         else
    1242             :         {
    1243     3487124 :             itemidptr->offsetindex = nused; /* where it will go */
    1244     3487124 :             itemidptr->itemoff = offset;
    1245             : 
    1246     3487124 :             if (last_offset > itemidptr->itemoff)
    1247     1802972 :                 last_offset = itemidptr->itemoff;
    1248             :             else
    1249     1684152 :                 presorted = false;
    1250             : 
    1251     3487124 :             itemidptr->alignedlen = MAXALIGN(size);
    1252     3487124 :             totallen += itemidptr->alignedlen;
    1253     3487124 :             newitemids[nused] = *lp;
    1254     3487124 :             itemidptr++;
    1255     3487124 :             nused++;
    1256             :         }
    1257             :     }
    1258             : 
    1259             :     /* this will catch invalid or out-of-order itemnos[] */
    1260       29766 :     if (nextitm != nitems)
    1261           0 :         elog(ERROR, "incorrect index offsets supplied");
    1262             : 
    1263       29766 :     if (totallen > (Size) (pd_special - pd_lower))
    1264           0 :         ereport(ERROR,
    1265             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1266             :                  errmsg("corrupted item lengths: total %u, available space %u",
    1267             :                         (unsigned int) totallen, pd_special - pd_lower)));
    1268             : 
    1269             :     /*
    1270             :      * Looks good. Overwrite the line pointers with the copy, from which we've
    1271             :      * removed all the unused items.
    1272             :      */
    1273       29766 :     memcpy(phdr->pd_linp, newitemids, nused * sizeof(ItemIdData));
    1274       29766 :     phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
    1275             : 
    1276             :     /* and compactify the tuple data */
    1277       29766 :     if (nused > 0)
    1278       29414 :         compactify_tuples(itemidbase, nused, page, presorted);
    1279             :     else
    1280         352 :         phdr->pd_upper = pd_special;
    1281             : }
    1282             : 
    1283             : 
    1284             : /*
    1285             :  * PageIndexTupleDeleteNoCompact
    1286             :  *
    1287             :  * Remove the specified tuple from an index page, but set its line pointer
    1288             :  * to "unused" instead of compacting it out, except that it can be removed
    1289             :  * if it's the last line pointer on the page.
    1290             :  *
    1291             :  * This is used for index AMs that require that existing TIDs of live tuples
    1292             :  * remain unchanged, and are willing to allow unused line pointers instead.
    1293             :  */
    1294             : void
    1295         678 : PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
    1296             : {
    1297         678 :     PageHeader  phdr = (PageHeader) page;
    1298             :     char       *addr;
    1299             :     ItemId      tup;
    1300             :     Size        size;
    1301             :     unsigned    offset;
    1302             :     int         nline;
    1303             : 
    1304             :     /*
    1305             :      * As with PageRepairFragmentation, paranoia seems justified.
    1306             :      */
    1307         678 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1308         678 :         phdr->pd_lower > phdr->pd_upper ||
    1309         678 :         phdr->pd_upper > phdr->pd_special ||
    1310         678 :         phdr->pd_special > BLCKSZ ||
    1311         678 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1312           0 :         ereport(ERROR,
    1313             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1314             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1315             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1316             : 
    1317         678 :     nline = PageGetMaxOffsetNumber(page);
    1318         678 :     if ((int) offnum <= 0 || (int) offnum > nline)
    1319           0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1320             : 
    1321         678 :     tup = PageGetItemId(page, offnum);
    1322             :     Assert(ItemIdHasStorage(tup));
    1323         678 :     size = ItemIdGetLength(tup);
    1324         678 :     offset = ItemIdGetOffset(tup);
    1325             : 
    1326         678 :     if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
    1327         678 :         offset != MAXALIGN(offset))
    1328           0 :         ereport(ERROR,
    1329             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1330             :                  errmsg("corrupted line pointer: offset = %u, size = %u",
    1331             :                         offset, (unsigned int) size)));
    1332             : 
    1333             :     /* Amount of space to actually be deleted */
    1334         678 :     size = MAXALIGN(size);
    1335             : 
    1336             :     /*
    1337             :      * Either set the line pointer to "unused", or zap it if it's the last
    1338             :      * one.  (Note: it's possible that the next-to-last one(s) are already
    1339             :      * unused, but we do not trouble to try to compact them out if so.)
    1340             :      */
    1341         678 :     if ((int) offnum < nline)
    1342         610 :         ItemIdSetUnused(tup);
    1343             :     else
    1344             :     {
    1345          68 :         phdr->pd_lower -= sizeof(ItemIdData);
    1346          68 :         nline--;                /* there's one less than when we started */
    1347             :     }
    1348             : 
    1349             :     /*
    1350             :      * Now move everything between the old upper bound (beginning of tuple
    1351             :      * space) and the beginning of the deleted tuple forward, so that space in
    1352             :      * the middle of the page is left free.  If we've just deleted the tuple
    1353             :      * at the beginning of tuple space, then there's no need to do the copy.
    1354             :      */
    1355             : 
    1356             :     /* beginning of tuple space */
    1357         678 :     addr = (char *) page + phdr->pd_upper;
    1358             : 
    1359         678 :     if (offset > phdr->pd_upper)
    1360         610 :         memmove(addr + size, addr, offset - phdr->pd_upper);
    1361             : 
    1362             :     /* adjust free space boundary pointer */
    1363         678 :     phdr->pd_upper += size;
    1364             : 
    1365             :     /*
    1366             :      * Finally, we need to adjust the linp entries that remain.
    1367             :      *
    1368             :      * Anything that used to be before the deleted tuple's data was moved
    1369             :      * forward by the size of the deleted tuple.
    1370             :      */
    1371         678 :     if (!PageIsEmpty(page))
    1372             :     {
    1373             :         int         i;
    1374             : 
    1375      173032 :         for (i = 1; i <= nline; i++)
    1376             :         {
    1377      172364 :             ItemId      ii = PageGetItemId(page, i);
    1378             : 
    1379      172364 :             if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
    1380       84572 :                 ii->lp_off += size;
    1381             :         }
    1382             :     }
    1383         678 : }
    1384             : 
    1385             : 
    1386             : /*
    1387             :  * PageIndexTupleOverwrite
    1388             :  *
    1389             :  * Replace a specified tuple on an index page.
    1390             :  *
    1391             :  * The new tuple is placed exactly where the old one had been, shifting
    1392             :  * other tuples' data up or down as needed to keep the page compacted.
    1393             :  * This is better than deleting and reinserting the tuple, because it
    1394             :  * avoids any data shifting when the tuple size doesn't change; and
    1395             :  * even when it does, we avoid moving the line pointers around.
    1396             :  * This could be used by an index AM that doesn't want to unset the
    1397             :  * LP_DEAD bit when it happens to be set.  It could conceivably also be
    1398             :  * used by an index AM that cares about the physical order of tuples as
    1399             :  * well as their logical/ItemId order.
    1400             :  *
    1401             :  * If there's insufficient space for the new tuple, return false.  Other
    1402             :  * errors represent data-corruption problems, so we just elog.
    1403             :  */
    1404             : bool
    1405      875668 : PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
    1406             :                         Item newtup, Size newsize)
    1407             : {
    1408      875668 :     PageHeader  phdr = (PageHeader) page;
    1409             :     ItemId      tupid;
    1410             :     int         oldsize;
    1411             :     unsigned    offset;
    1412             :     Size        alignednewsize;
    1413             :     int         size_diff;
    1414             :     int         itemcount;
    1415             : 
    1416             :     /*
    1417             :      * As with PageRepairFragmentation, paranoia seems justified.
    1418             :      */
    1419      875668 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1420      875668 :         phdr->pd_lower > phdr->pd_upper ||
    1421      875668 :         phdr->pd_upper > phdr->pd_special ||
    1422      875668 :         phdr->pd_special > BLCKSZ ||
    1423      875668 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1424           0 :         ereport(ERROR,
    1425             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1426             :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1427             :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1428             : 
    1429      875668 :     itemcount = PageGetMaxOffsetNumber(page);
    1430      875668 :     if ((int) offnum <= 0 || (int) offnum > itemcount)
    1431           0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1432             : 
    1433      875668 :     tupid = PageGetItemId(page, offnum);
    1434             :     Assert(ItemIdHasStorage(tupid));
    1435      875668 :     oldsize = ItemIdGetLength(tupid);
    1436      875668 :     offset = ItemIdGetOffset(tupid);
    1437             : 
    1438      875668 :     if (offset < phdr->pd_upper || (offset + oldsize) > phdr->pd_special ||
    1439      875668 :         offset != MAXALIGN(offset))
    1440           0 :         ereport(ERROR,
    1441             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1442             :                  errmsg("corrupted line pointer: offset = %u, size = %u",
    1443             :                         offset, (unsigned int) oldsize)));
    1444             : 
    1445             :     /*
    1446             :      * Determine actual change in space requirement, check for page overflow.
    1447             :      */
    1448      875668 :     oldsize = MAXALIGN(oldsize);
    1449      875668 :     alignednewsize = MAXALIGN(newsize);
    1450      875668 :     if (alignednewsize > oldsize + (phdr->pd_upper - phdr->pd_lower))
    1451           0 :         return false;
    1452             : 
    1453             :     /*
    1454             :      * Relocate existing data and update line pointers, unless the new tuple
    1455             :      * is the same size as the old (after alignment), in which case there's
    1456             :      * nothing to do.  Notice that what we have to relocate is data before the
    1457             :      * target tuple, not data after, so it's convenient to express size_diff
    1458             :      * as the amount by which the tuple's size is decreasing, making it the
    1459             :      * delta to add to pd_upper and affected line pointers.
    1460             :      */
    1461      875668 :     size_diff = oldsize - (int) alignednewsize;
    1462      875668 :     if (size_diff != 0)
    1463             :     {
    1464       65672 :         char       *addr = (char *) page + phdr->pd_upper;
    1465             :         int         i;
    1466             : 
    1467             :         /* relocate all tuple data before the target tuple */
    1468       65672 :         memmove(addr + size_diff, addr, offset - phdr->pd_upper);
    1469             : 
    1470             :         /* adjust free space boundary pointer */
    1471       65672 :         phdr->pd_upper += size_diff;
    1472             : 
    1473             :         /* adjust affected line pointers too */
    1474     9410100 :         for (i = FirstOffsetNumber; i <= itemcount; i++)
    1475             :         {
    1476     9344428 :             ItemId      ii = PageGetItemId(page, i);
    1477             : 
    1478             :             /* Allow items without storage; currently only BRIN needs that */
    1479     9344428 :             if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
    1480     4137818 :                 ii->lp_off += size_diff;
    1481             :         }
    1482             :     }
    1483             : 
    1484             :     /* Update the item's tuple length without changing its lp_flags field */
    1485      875668 :     tupid->lp_off = offset + size_diff;
    1486      875668 :     tupid->lp_len = newsize;
    1487             : 
    1488             :     /* Copy new tuple data onto page */
    1489      875668 :     memcpy(PageGetItem(page, tupid), newtup, newsize);
    1490             : 
    1491      875668 :     return true;
    1492             : }
    1493             : 
    1494             : 
    1495             : /*
    1496             :  * Set checksum for a page in shared buffers.
    1497             :  *
    1498             :  * If checksums are disabled, or if the page is not initialized, just return
    1499             :  * the input.  Otherwise, we must make a copy of the page before calculating
    1500             :  * the checksum, to prevent concurrent modifications (e.g. setting hint bits)
    1501             :  * from making the final checksum invalid.  It doesn't matter if we include or
    1502             :  * exclude hints during the copy, as long as we write a valid page and
    1503             :  * associated checksum.
    1504             :  *
    1505             :  * Returns a pointer to the block-sized data that needs to be written. Uses
    1506             :  * statically-allocated memory, so the caller must immediately write the
    1507             :  * returned page and not refer to it again.
    1508             :  */
    1509             : char *
    1510      853248 : PageSetChecksumCopy(Page page, BlockNumber blkno)
    1511             : {
    1512             :     static char *pageCopy = NULL;
    1513             : 
    1514             :     /* If we don't need a checksum, just return the passed-in data */
    1515      853248 :     if (PageIsNew(page) || !DataChecksumsEnabled())
    1516      850516 :         return (char *) page;
    1517             : 
    1518             :     /*
    1519             :      * We allocate the copy space once and use it over on each subsequent
    1520             :      * call.  The point of palloc'ing here, rather than having a static char
    1521             :      * array, is first to ensure adequate alignment for the checksumming code
    1522             :      * and second to avoid wasting space in processes that never call this.
    1523             :      */
    1524        2732 :     if (pageCopy == NULL)
    1525          20 :         pageCopy = MemoryContextAllocAligned(TopMemoryContext,
    1526             :                                              BLCKSZ,
    1527             :                                              PG_IO_ALIGN_SIZE,
    1528             :                                              0);
    1529             : 
    1530        2732 :     memcpy(pageCopy, (char *) page, BLCKSZ);
    1531        2732 :     ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno);
    1532        2732 :     return pageCopy;
    1533             : }
    1534             : 
    1535             : /*
    1536             :  * Set checksum for a page in private memory.
    1537             :  *
    1538             :  * This must only be used when we know that no other process can be modifying
    1539             :  * the page buffer.
    1540             :  */
    1541             : void
    1542       99328 : PageSetChecksumInplace(Page page, BlockNumber blkno)
    1543             : {
    1544             :     /* If we don't need a checksum, just return */
    1545       99328 :     if (PageIsNew(page) || !DataChecksumsEnabled())
    1546       98614 :         return;
    1547             : 
    1548         714 :     ((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno);
    1549             : }

Generated by: LCOV version 1.14