LCOV - code coverage report
Current view: top level - src/backend/access/brin - brin_pageops.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 224 287 78.0 %
Date: 2025-01-18 04:15:08 Functions: 10 11 90.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * brin_pageops.c
       3             :  *      Page-handling routines for BRIN indexes
       4             :  *
       5             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       6             :  * Portions Copyright (c) 1994, Regents of the University of California
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    src/backend/access/brin/brin_pageops.c
      10             :  */
      11             : #include "postgres.h"
      12             : 
      13             : #include "access/brin_page.h"
      14             : #include "access/brin_pageops.h"
      15             : #include "access/brin_revmap.h"
      16             : #include "access/brin_xlog.h"
      17             : #include "access/xloginsert.h"
      18             : #include "miscadmin.h"
      19             : #include "storage/bufmgr.h"
      20             : #include "storage/freespace.h"
      21             : #include "storage/lmgr.h"
      22             : #include "utils/rel.h"
      23             : 
      24             : /*
      25             :  * Maximum size of an entry in a BRIN_PAGETYPE_REGULAR page.  We can tolerate
      26             :  * a single item per page, unlike other index AMs.
      27             :  */
      28             : #define BrinMaxItemSize \
      29             :     MAXALIGN_DOWN(BLCKSZ - \
      30             :                   (MAXALIGN(SizeOfPageHeaderData + \
      31             :                             sizeof(ItemIdData)) + \
      32             :                    MAXALIGN(sizeof(BrinSpecialSpace))))
      33             : 
      34             : static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
      35             :                                    bool *extended);
      36             : static Size br_page_get_freespace(Page page);
      37             : static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer);
      38             : 
      39             : 
      40             : /*
      41             :  * Update tuple origtup (size origsz), located in offset oldoff of buffer
      42             :  * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
      43             :  * at heapBlk.  oldbuf must not be locked on entry, and is not locked at exit.
      44             :  *
      45             :  * If samepage is true, attempt to put the new tuple in the same page, but if
      46             :  * there's no room, use some other one.
      47             :  *
      48             :  * If the update is successful, return true; the revmap is updated to point to
      49             :  * the new tuple.  If the update is not done for whatever reason, return false.
      50             :  * Caller may retry the update if this happens.
      51             :  */
      52             : bool
      53       27428 : brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
      54             :               BrinRevmap *revmap, BlockNumber heapBlk,
      55             :               Buffer oldbuf, OffsetNumber oldoff,
      56             :               const BrinTuple *origtup, Size origsz,
      57             :               const BrinTuple *newtup, Size newsz,
      58             :               bool samepage)
      59             : {
      60             :     Page        oldpage;
      61             :     ItemId      oldlp;
      62             :     BrinTuple  *oldtup;
      63             :     Size        oldsz;
      64             :     Buffer      newbuf;
      65       27428 :     BlockNumber newblk = InvalidBlockNumber;
      66             :     bool        extended;
      67             : 
      68             :     Assert(newsz == MAXALIGN(newsz));
      69             : 
      70             :     /* If the item is oversized, don't bother. */
      71       27428 :     if (newsz > BrinMaxItemSize)
      72             :     {
      73           0 :         ereport(ERROR,
      74             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
      75             :                  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
      76             :                         newsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
      77             :         return false;           /* keep compiler quiet */
      78             :     }
      79             : 
      80             :     /* make sure the revmap is long enough to contain the entry we need */
      81       27428 :     brinRevmapExtend(revmap, heapBlk);
      82             : 
      83       27428 :     if (!samepage)
      84             :     {
      85             :         /* need a page on which to put the item */
      86         614 :         newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended);
      87         614 :         if (!BufferIsValid(newbuf))
      88             :         {
      89             :             Assert(!extended);
      90           0 :             return false;
      91             :         }
      92             : 
      93             :         /*
      94             :          * Note: it's possible (though unlikely) that the returned newbuf is
      95             :          * the same as oldbuf, if brin_getinsertbuffer determined that the old
      96             :          * buffer does in fact have enough space.
      97             :          */
      98         614 :         if (newbuf == oldbuf)
      99             :         {
     100             :             Assert(!extended);
     101           0 :             newbuf = InvalidBuffer;
     102             :         }
     103             :         else
     104         614 :             newblk = BufferGetBlockNumber(newbuf);
     105             :     }
     106             :     else
     107             :     {
     108       26814 :         LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
     109       26814 :         newbuf = InvalidBuffer;
     110       26814 :         extended = false;
     111             :     }
     112       27428 :     oldpage = BufferGetPage(oldbuf);
     113       27428 :     oldlp = PageGetItemId(oldpage, oldoff);
     114             : 
     115             :     /*
     116             :      * Check that the old tuple wasn't updated concurrently: it might have
     117             :      * moved someplace else entirely, and for that matter the whole page
     118             :      * might've become a revmap page.  Note that in the first two cases
     119             :      * checked here, the "oldlp" we just calculated is garbage; but
     120             :      * PageGetItemId() is simple enough that it was safe to do that
     121             :      * calculation anyway.
     122             :      */
     123       54856 :     if (!BRIN_IS_REGULAR_PAGE(oldpage) ||
     124       27428 :         oldoff > PageGetMaxOffsetNumber(oldpage) ||
     125       27428 :         !ItemIdIsNormal(oldlp))
     126             :     {
     127           0 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     128             : 
     129             :         /*
     130             :          * If this happens, and the new buffer was obtained by extending the
     131             :          * relation, then we need to ensure we don't leave it uninitialized or
     132             :          * forget about it.
     133             :          */
     134           0 :         if (BufferIsValid(newbuf))
     135             :         {
     136           0 :             if (extended)
     137           0 :                 brin_initialize_empty_new_buffer(idxrel, newbuf);
     138           0 :             UnlockReleaseBuffer(newbuf);
     139           0 :             if (extended)
     140           0 :                 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     141             :         }
     142           0 :         return false;
     143             :     }
     144             : 
     145       27428 :     oldsz = ItemIdGetLength(oldlp);
     146       27428 :     oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp);
     147             : 
     148             :     /*
     149             :      * ... or it might have been updated in place to different contents.
     150             :      */
     151       27428 :     if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz))
     152             :     {
     153           0 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     154           0 :         if (BufferIsValid(newbuf))
     155             :         {
     156             :             /* As above, initialize and record new page if we got one */
     157           0 :             if (extended)
     158           0 :                 brin_initialize_empty_new_buffer(idxrel, newbuf);
     159           0 :             UnlockReleaseBuffer(newbuf);
     160           0 :             if (extended)
     161           0 :                 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     162             :         }
     163           0 :         return false;
     164             :     }
     165             : 
     166             :     /*
     167             :      * Great, the old tuple is intact.  We can proceed with the update.
     168             :      *
     169             :      * If there's enough room in the old page for the new tuple, replace it.
     170             :      *
     171             :      * Note that there might now be enough space on the page even though the
     172             :      * caller told us there isn't, if a concurrent update moved another tuple
     173             :      * elsewhere or replaced a tuple with a smaller one.
     174             :      */
     175       54274 :     if (((BrinPageFlags(oldpage) & BRIN_EVACUATE_PAGE) == 0) &&
     176       26846 :         brin_can_do_samepage_update(oldbuf, origsz, newsz))
     177             :     {
     178       26814 :         START_CRIT_SECTION();
     179       26814 :         if (!PageIndexTupleOverwrite(oldpage, oldoff, (Item) unconstify(BrinTuple *, newtup), newsz))
     180           0 :             elog(ERROR, "failed to replace BRIN tuple");
     181       26814 :         MarkBufferDirty(oldbuf);
     182             : 
     183             :         /* XLOG stuff */
     184       26814 :         if (RelationNeedsWAL(idxrel))
     185             :         {
     186             :             xl_brin_samepage_update xlrec;
     187             :             XLogRecPtr  recptr;
     188       26808 :             uint8       info = XLOG_BRIN_SAMEPAGE_UPDATE;
     189             : 
     190       26808 :             xlrec.offnum = oldoff;
     191             : 
     192       26808 :             XLogBeginInsert();
     193       26808 :             XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
     194             : 
     195       26808 :             XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD);
     196       26808 :             XLogRegisterBufData(0, (const char *) newtup, newsz);
     197             : 
     198       26808 :             recptr = XLogInsert(RM_BRIN_ID, info);
     199             : 
     200       26808 :             PageSetLSN(oldpage, recptr);
     201             :         }
     202             : 
     203       26814 :         END_CRIT_SECTION();
     204             : 
     205       26814 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     206             : 
     207       26814 :         if (BufferIsValid(newbuf))
     208             :         {
     209             :             /* As above, initialize and record new page if we got one */
     210           0 :             if (extended)
     211           0 :                 brin_initialize_empty_new_buffer(idxrel, newbuf);
     212           0 :             UnlockReleaseBuffer(newbuf);
     213           0 :             if (extended)
     214           0 :                 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     215             :         }
     216             : 
     217       26814 :         return true;
     218             :     }
     219         614 :     else if (newbuf == InvalidBuffer)
     220             :     {
     221             :         /*
     222             :          * Not enough space, but caller said that there was. Tell them to
     223             :          * start over.
     224             :          */
     225           0 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     226           0 :         return false;
     227             :     }
     228             :     else
     229             :     {
     230             :         /*
     231             :          * Not enough free space on the oldpage. Put the new tuple on the new
     232             :          * page, and update the revmap.
     233             :          */
     234         614 :         Page        newpage = BufferGetPage(newbuf);
     235             :         Buffer      revmapbuf;
     236             :         ItemPointerData newtid;
     237             :         OffsetNumber newoff;
     238         614 :         Size        freespace = 0;
     239             : 
     240         614 :         revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
     241             : 
     242         614 :         START_CRIT_SECTION();
     243             : 
     244             :         /*
     245             :          * We need to initialize the page if it's newly obtained.  Note we
     246             :          * will WAL-log the initialization as part of the update, so we don't
     247             :          * need to do that here.
     248             :          */
     249         614 :         if (extended)
     250          22 :             brin_page_init(newpage, BRIN_PAGETYPE_REGULAR);
     251             : 
     252         614 :         PageIndexTupleDeleteNoCompact(oldpage, oldoff);
     253         614 :         newoff = PageAddItem(newpage, (Item) unconstify(BrinTuple *, newtup), newsz,
     254             :                              InvalidOffsetNumber, false, false);
     255         614 :         if (newoff == InvalidOffsetNumber)
     256           0 :             elog(ERROR, "failed to add BRIN tuple to new page");
     257         614 :         MarkBufferDirty(oldbuf);
     258         614 :         MarkBufferDirty(newbuf);
     259             : 
     260             :         /* needed to update FSM below */
     261         614 :         if (extended)
     262          22 :             freespace = br_page_get_freespace(newpage);
     263             : 
     264         614 :         ItemPointerSet(&newtid, newblk, newoff);
     265         614 :         brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
     266         614 :         MarkBufferDirty(revmapbuf);
     267             : 
     268             :         /* XLOG stuff */
     269         614 :         if (RelationNeedsWAL(idxrel))
     270             :         {
     271             :             xl_brin_update xlrec;
     272             :             XLogRecPtr  recptr;
     273             :             uint8       info;
     274             : 
     275         614 :             info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
     276             : 
     277         614 :             xlrec.insert.offnum = newoff;
     278         614 :             xlrec.insert.heapBlk = heapBlk;
     279         614 :             xlrec.insert.pagesPerRange = pagesPerRange;
     280         614 :             xlrec.oldOffnum = oldoff;
     281             : 
     282         614 :             XLogBeginInsert();
     283             : 
     284             :             /* new page */
     285         614 :             XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
     286             : 
     287         614 :             XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
     288         614 :             XLogRegisterBufData(0, (const char *) newtup, newsz);
     289             : 
     290             :             /* revmap page */
     291         614 :             XLogRegisterBuffer(1, revmapbuf, 0);
     292             : 
     293             :             /* old page */
     294         614 :             XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD);
     295             : 
     296         614 :             recptr = XLogInsert(RM_BRIN_ID, info);
     297             : 
     298         614 :             PageSetLSN(oldpage, recptr);
     299         614 :             PageSetLSN(newpage, recptr);
     300         614 :             PageSetLSN(BufferGetPage(revmapbuf), recptr);
     301             :         }
     302             : 
     303         614 :         END_CRIT_SECTION();
     304             : 
     305         614 :         LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
     306         614 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     307         614 :         UnlockReleaseBuffer(newbuf);
     308             : 
     309         614 :         if (extended)
     310             :         {
     311          22 :             RecordPageWithFreeSpace(idxrel, newblk, freespace);
     312          22 :             FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     313             :         }
     314             : 
     315         614 :         return true;
     316             :     }
     317             : }
     318             : 
     319             : /*
     320             :  * Return whether brin_doupdate can do a samepage update.
     321             :  */
     322             : bool
     323       53692 : brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
     324             : {
     325             :     return
     326       61576 :         ((newsz <= origsz) ||
     327        7884 :          PageGetExactFreeSpace(BufferGetPage(buffer)) >= (newsz - origsz));
     328             : }
     329             : 
     330             : /*
     331             :  * Insert an index tuple into the index relation.  The revmap is updated to
     332             :  * mark the range containing the given page as pointing to the inserted entry.
     333             :  * A WAL record is written.
     334             :  *
     335             :  * The buffer, if valid, is first checked for free space to insert the new
     336             :  * entry; if there isn't enough, a new buffer is obtained and pinned.  No
     337             :  * buffer lock must be held on entry, no buffer lock is held on exit.
     338             :  *
     339             :  * Return value is the offset number where the tuple was inserted.
     340             :  */
     341             : OffsetNumber
     342        5632 : brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
     343             :               BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk,
     344             :               BrinTuple *tup, Size itemsz)
     345             : {
     346             :     Page        page;
     347             :     BlockNumber blk;
     348             :     OffsetNumber off;
     349        5632 :     Size        freespace = 0;
     350             :     Buffer      revmapbuf;
     351             :     ItemPointerData tid;
     352             :     bool        extended;
     353             : 
     354             :     Assert(itemsz == MAXALIGN(itemsz));
     355             : 
     356             :     /* If the item is oversized, don't even bother. */
     357        5632 :     if (itemsz > BrinMaxItemSize)
     358             :     {
     359           0 :         ereport(ERROR,
     360             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     361             :                  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
     362             :                         itemsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
     363             :         return InvalidOffsetNumber; /* keep compiler quiet */
     364             :     }
     365             : 
     366             :     /* Make sure the revmap is long enough to contain the entry we need */
     367        5632 :     brinRevmapExtend(revmap, heapBlk);
     368             : 
     369             :     /*
     370             :      * Acquire lock on buffer supplied by caller, if any.  If it doesn't have
     371             :      * enough space, unpin it to obtain a new one below.
     372             :      */
     373        5632 :     if (BufferIsValid(*buffer))
     374             :     {
     375             :         /*
     376             :          * It's possible that another backend (or ourselves!) extended the
     377             :          * revmap over the page we held a pin on, so we cannot assume that
     378             :          * it's still a regular page.
     379             :          */
     380        2348 :         LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
     381        2348 :         if (br_page_get_freespace(BufferGetPage(*buffer)) < itemsz)
     382             :         {
     383         120 :             UnlockReleaseBuffer(*buffer);
     384         120 :             *buffer = InvalidBuffer;
     385             :         }
     386             :     }
     387             : 
     388             :     /*
     389             :      * If we still don't have a usable buffer, have brin_getinsertbuffer
     390             :      * obtain one for us.
     391             :      */
     392        5632 :     if (!BufferIsValid(*buffer))
     393             :     {
     394             :         do
     395        3404 :             *buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended);
     396        3404 :         while (!BufferIsValid(*buffer));
     397             :     }
     398             :     else
     399        2228 :         extended = false;
     400             : 
     401             :     /* Now obtain lock on revmap buffer */
     402        5632 :     revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
     403             : 
     404        5632 :     page = BufferGetPage(*buffer);
     405        5632 :     blk = BufferGetBlockNumber(*buffer);
     406             : 
     407             :     /* Execute the actual insertion */
     408        5632 :     START_CRIT_SECTION();
     409        5632 :     if (extended)
     410         466 :         brin_page_init(page, BRIN_PAGETYPE_REGULAR);
     411        5632 :     off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
     412             :                       false, false);
     413        5632 :     if (off == InvalidOffsetNumber)
     414           0 :         elog(ERROR, "failed to add BRIN tuple to new page");
     415        5632 :     MarkBufferDirty(*buffer);
     416             : 
     417             :     /* needed to update FSM below */
     418        5632 :     if (extended)
     419         466 :         freespace = br_page_get_freespace(page);
     420             : 
     421        5632 :     ItemPointerSet(&tid, blk, off);
     422        5632 :     brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid);
     423        5632 :     MarkBufferDirty(revmapbuf);
     424             : 
     425             :     /* XLOG stuff */
     426        5632 :     if (RelationNeedsWAL(idxrel))
     427             :     {
     428             :         xl_brin_insert xlrec;
     429             :         XLogRecPtr  recptr;
     430             :         uint8       info;
     431             : 
     432        4732 :         info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
     433        4732 :         xlrec.heapBlk = heapBlk;
     434        4732 :         xlrec.pagesPerRange = pagesPerRange;
     435        4732 :         xlrec.offnum = off;
     436             : 
     437        4732 :         XLogBeginInsert();
     438        4732 :         XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
     439             : 
     440        4732 :         XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
     441        4732 :         XLogRegisterBufData(0, (char *) tup, itemsz);
     442             : 
     443        4732 :         XLogRegisterBuffer(1, revmapbuf, 0);
     444             : 
     445        4732 :         recptr = XLogInsert(RM_BRIN_ID, info);
     446             : 
     447        4732 :         PageSetLSN(page, recptr);
     448        4732 :         PageSetLSN(BufferGetPage(revmapbuf), recptr);
     449             :     }
     450             : 
     451        5632 :     END_CRIT_SECTION();
     452             : 
     453             :     /* Tuple is firmly on buffer; we can release our locks */
     454        5632 :     LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
     455        5632 :     LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
     456             : 
     457             :     BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
     458             :                blk, off, heapBlk));
     459             : 
     460        5632 :     if (extended)
     461             :     {
     462         466 :         RecordPageWithFreeSpace(idxrel, blk, freespace);
     463         466 :         FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
     464             :     }
     465             : 
     466        5632 :     return off;
     467             : }
     468             : 
     469             : /*
     470             :  * Initialize a page with the given type.
     471             :  *
     472             :  * Caller is responsible for marking it dirty, as appropriate.
     473             :  */
     474             : void
     475        1458 : brin_page_init(Page page, uint16 type)
     476             : {
     477        1458 :     PageInit(page, BLCKSZ, sizeof(BrinSpecialSpace));
     478             : 
     479        1458 :     BrinPageType(page) = type;
     480        1458 : }
     481             : 
     482             : /*
     483             :  * Initialize a new BRIN index's metapage.
     484             :  */
     485             : void
     486         428 : brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
     487             : {
     488             :     BrinMetaPageData *metadata;
     489             : 
     490         428 :     brin_page_init(page, BRIN_PAGETYPE_META);
     491             : 
     492         428 :     metadata = (BrinMetaPageData *) PageGetContents(page);
     493             : 
     494         428 :     metadata->brinMagic = BRIN_META_MAGIC;
     495         428 :     metadata->brinVersion = version;
     496         428 :     metadata->pagesPerRange = pagesPerRange;
     497             : 
     498             :     /*
     499             :      * Note we cheat here a little.  0 is not a valid revmap block number
     500             :      * (because it's the metapage buffer), but doing this enables the first
     501             :      * revmap page to be created when the index is.
     502             :      */
     503         428 :     metadata->lastRevmapPage = 0;
     504             : 
     505             :     /*
     506             :      * Set pd_lower just past the end of the metadata.  This is essential,
     507             :      * because without doing so, metadata will be lost if xlog.c compresses
     508             :      * the page.
     509             :      */
     510         428 :     ((PageHeader) page)->pd_lower =
     511         428 :         ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) page;
     512         428 : }
     513             : 
     514             : /*
     515             :  * Initiate page evacuation protocol.
     516             :  *
     517             :  * The page must be locked in exclusive mode by the caller.
     518             :  *
     519             :  * If the page is not yet initialized or empty, return false without doing
     520             :  * anything; it can be used for revmap without any further changes.  If it
     521             :  * contains tuples, mark it for evacuation and return true.
     522             :  */
     523             : bool
     524         350 : brin_start_evacuating_page(Relation idxRel, Buffer buf)
     525             : {
     526             :     OffsetNumber off;
     527             :     OffsetNumber maxoff;
     528             :     Page        page;
     529             : 
     530         350 :     page = BufferGetPage(buf);
     531             : 
     532         350 :     if (PageIsNew(page))
     533         346 :         return false;
     534             : 
     535           4 :     maxoff = PageGetMaxOffsetNumber(page);
     536         584 :     for (off = FirstOffsetNumber; off <= maxoff; off++)
     537             :     {
     538             :         ItemId      lp;
     539             : 
     540         582 :         lp = PageGetItemId(page, off);
     541         582 :         if (ItemIdIsUsed(lp))
     542             :         {
     543             :             /*
     544             :              * Prevent other backends from adding more stuff to this page:
     545             :              * BRIN_EVACUATE_PAGE informs br_page_get_freespace that this page
     546             :              * can no longer be used to add new tuples.  Note that this flag
     547             :              * is not WAL-logged, except accidentally.
     548             :              */
     549           2 :             BrinPageFlags(page) |= BRIN_EVACUATE_PAGE;
     550           2 :             MarkBufferDirtyHint(buf, true);
     551             : 
     552           2 :             return true;
     553             :         }
     554             :     }
     555           2 :     return false;
     556             : }
     557             : 
     558             : /*
     559             :  * Move all tuples out of a page.
     560             :  *
     561             :  * The caller must hold lock on the page. The lock and pin are released.
     562             :  */
     563             : void
     564           2 : brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
     565             :                    BrinRevmap *revmap, Buffer buf)
     566             : {
     567             :     OffsetNumber off;
     568             :     OffsetNumber maxoff;
     569             :     Page        page;
     570           2 :     BrinTuple  *btup = NULL;
     571           2 :     Size        btupsz = 0;
     572             : 
     573           2 :     page = BufferGetPage(buf);
     574             : 
     575             :     Assert(BrinPageFlags(page) & BRIN_EVACUATE_PAGE);
     576             : 
     577           2 :     maxoff = PageGetMaxOffsetNumber(page);
     578         584 :     for (off = FirstOffsetNumber; off <= maxoff; off++)
     579             :     {
     580             :         BrinTuple  *tup;
     581             :         Size        sz;
     582             :         ItemId      lp;
     583             : 
     584         582 :         CHECK_FOR_INTERRUPTS();
     585             : 
     586         582 :         lp = PageGetItemId(page, off);
     587         582 :         if (ItemIdIsUsed(lp))
     588             :         {
     589         582 :             sz = ItemIdGetLength(lp);
     590         582 :             tup = (BrinTuple *) PageGetItem(page, lp);
     591         582 :             tup = brin_copy_tuple(tup, sz, btup, &btupsz);
     592             : 
     593         582 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     594             : 
     595         582 :             if (!brin_doupdate(idxRel, pagesPerRange, revmap, tup->bt_blkno,
     596             :                                buf, off, tup, sz, tup, sz, false))
     597           0 :                 off--;          /* retry */
     598             : 
     599         582 :             LockBuffer(buf, BUFFER_LOCK_SHARE);
     600             : 
     601             :             /* It's possible that someone extended the revmap over this page */
     602         582 :             if (!BRIN_IS_REGULAR_PAGE(page))
     603           0 :                 break;
     604             :         }
     605             :     }
     606             : 
     607           2 :     UnlockReleaseBuffer(buf);
     608           2 : }
     609             : 
     610             : /*
     611             :  * Given a BRIN index page, initialize it if necessary, and record its
     612             :  * current free space in the FSM.
     613             :  *
     614             :  * The main use for this is when, during vacuuming, an uninitialized page is
     615             :  * found, which could be the result of relation extension followed by a crash
     616             :  * before the page can be used.
     617             :  *
     618             :  * Here, we don't bother to update upper FSM pages, instead expecting that our
     619             :  * caller (brin_vacuum_scan) will fix them at the end of the scan.  Elsewhere
     620             :  * in this file, it's generally a good idea to propagate additions of free
     621             :  * space into the upper FSM pages immediately.
     622             :  */
     623             : void
     624         442 : brin_page_cleanup(Relation idxrel, Buffer buf)
     625             : {
     626         442 :     Page        page = BufferGetPage(buf);
     627             : 
     628             :     /*
     629             :      * If a page was left uninitialized, initialize it now; also record it in
     630             :      * FSM.
     631             :      *
     632             :      * Somebody else might be extending the relation concurrently.  To avoid
     633             :      * re-initializing the page before they can grab the buffer lock, we
     634             :      * acquire the extension lock momentarily.  Since they hold the extension
     635             :      * lock from before getting the page and after its been initialized, we're
     636             :      * sure to see their initialization.
     637             :      */
     638         442 :     if (PageIsNew(page))
     639             :     {
     640           0 :         LockRelationForExtension(idxrel, ShareLock);
     641           0 :         UnlockRelationForExtension(idxrel, ShareLock);
     642             : 
     643           0 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     644           0 :         if (PageIsNew(page))
     645             :         {
     646           0 :             brin_initialize_empty_new_buffer(idxrel, buf);
     647           0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     648           0 :             return;
     649             :         }
     650           0 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     651             :     }
     652             : 
     653             :     /* Nothing to be done for non-regular index pages */
     654         442 :     if (BRIN_IS_META_PAGE(BufferGetPage(buf)) ||
     655         340 :         BRIN_IS_REVMAP_PAGE(BufferGetPage(buf)))
     656         204 :         return;
     657             : 
     658             :     /* Measure free space and record it */
     659         238 :     RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf),
     660             :                             br_page_get_freespace(page));
     661             : }
     662             : 
     663             : /*
     664             :  * Return a pinned and exclusively locked buffer which can be used to insert an
     665             :  * index item of size itemsz (caller must ensure not to request sizes
     666             :  * impossible to fulfill).  If oldbuf is a valid buffer, it is also locked (in
     667             :  * an order determined to avoid deadlocks).
     668             :  *
     669             :  * If we find that the old page is no longer a regular index page (because
     670             :  * of a revmap extension), the old buffer is unlocked and we return
     671             :  * InvalidBuffer.
     672             :  *
     673             :  * If there's no existing page with enough free space to accommodate the new
     674             :  * item, the relation is extended.  If this happens, *extended is set to true,
     675             :  * and it is the caller's responsibility to initialize the page (and WAL-log
     676             :  * that fact) prior to use.  The caller should also update the FSM with the
     677             :  * page's remaining free space after the insertion.
     678             :  *
     679             :  * Note that the caller is not expected to update FSM unless *extended is set
     680             :  * true.  This policy means that we'll update FSM when a page is created, and
     681             :  * when it's found to have too little space for a desired tuple insertion,
     682             :  * but not every single time we add a tuple to the page.
     683             :  *
     684             :  * Note that in some corner cases it is possible for this routine to extend
     685             :  * the relation and then not return the new page.  It is this routine's
     686             :  * responsibility to WAL-log the page initialization and to record the page in
     687             :  * FSM if that happens, since the caller certainly can't do it.
     688             :  */
     689             : static Buffer
     690        4018 : brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
     691             :                      bool *extended)
     692             : {
     693             :     BlockNumber oldblk;
     694             :     BlockNumber newblk;
     695             :     Page        page;
     696             :     Size        freespace;
     697             : 
     698             :     /* callers must have checked */
     699             :     Assert(itemsz <= BrinMaxItemSize);
     700             : 
     701        4018 :     if (BufferIsValid(oldbuf))
     702         614 :         oldblk = BufferGetBlockNumber(oldbuf);
     703             :     else
     704        3404 :         oldblk = InvalidBlockNumber;
     705             : 
     706             :     /* Choose initial target page, re-using existing target if known */
     707        4018 :     newblk = RelationGetTargetBlock(irel);
     708        4018 :     if (newblk == InvalidBlockNumber)
     709         412 :         newblk = GetPageWithFreeSpace(irel, itemsz);
     710             : 
     711             :     /*
     712             :      * Loop until we find a page with sufficient free space.  By the time we
     713             :      * return to caller out of this loop, both buffers are valid and locked;
     714             :      * if we have to restart here, neither page is locked and newblk isn't
     715             :      * pinned (if it's even valid).
     716             :      */
     717             :     for (;;)
     718         154 :     {
     719             :         Buffer      buf;
     720        4172 :         bool        extensionLockHeld = false;
     721             : 
     722        4172 :         CHECK_FOR_INTERRUPTS();
     723             : 
     724        4172 :         *extended = false;
     725             : 
     726        4172 :         if (newblk == InvalidBlockNumber)
     727             :         {
     728             :             /*
     729             :              * There's not enough free space in any existing index page,
     730             :              * according to the FSM: extend the relation to obtain a shiny new
     731             :              * page.
     732             :              *
     733             :              * XXX: It's likely possible to use RBM_ZERO_AND_LOCK here,
     734             :              * which'd avoid the need to hold the extension lock during buffer
     735             :              * reclaim.
     736             :              */
     737         488 :             if (!RELATION_IS_LOCAL(irel))
     738             :             {
     739          46 :                 LockRelationForExtension(irel, ExclusiveLock);
     740          46 :                 extensionLockHeld = true;
     741             :             }
     742         488 :             buf = ReadBuffer(irel, P_NEW);
     743         488 :             newblk = BufferGetBlockNumber(buf);
     744         488 :             *extended = true;
     745             : 
     746             :             BRIN_elog((DEBUG2, "brin_getinsertbuffer: extending to page %u",
     747             :                        BufferGetBlockNumber(buf)));
     748             :         }
     749        3684 :         else if (newblk == oldblk)
     750             :         {
     751             :             /*
     752             :              * There's an odd corner-case here where the FSM is out-of-date,
     753             :              * and gave us the old page.
     754             :              */
     755          26 :             buf = oldbuf;
     756             :         }
     757             :         else
     758             :         {
     759        3658 :             buf = ReadBuffer(irel, newblk);
     760             :         }
     761             : 
     762             :         /*
     763             :          * We lock the old buffer first, if it's earlier than the new one; but
     764             :          * then we need to check that it hasn't been turned into a revmap page
     765             :          * concurrently.  If we detect that that happened, give up and tell
     766             :          * caller to start over.
     767             :          */
     768        4172 :         if (BufferIsValid(oldbuf) && oldblk < newblk)
     769             :         {
     770         622 :             LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
     771         622 :             if (!BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
     772             :             {
     773           0 :                 LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     774             : 
     775             :                 /*
     776             :                  * It is possible that the new page was obtained from
     777             :                  * extending the relation.  In that case, we must be sure to
     778             :                  * record it in the FSM before leaving, because otherwise the
     779             :                  * space would be lost forever.  However, we cannot let an
     780             :                  * uninitialized page get in the FSM, so we need to initialize
     781             :                  * it first.
     782             :                  */
     783           0 :                 if (*extended)
     784           0 :                     brin_initialize_empty_new_buffer(irel, buf);
     785             : 
     786           0 :                 if (extensionLockHeld)
     787           0 :                     UnlockRelationForExtension(irel, ExclusiveLock);
     788             : 
     789           0 :                 ReleaseBuffer(buf);
     790             : 
     791           0 :                 if (*extended)
     792             :                 {
     793           0 :                     FreeSpaceMapVacuumRange(irel, newblk, newblk + 1);
     794             :                     /* shouldn't matter, but don't confuse caller */
     795           0 :                     *extended = false;
     796             :                 }
     797             : 
     798           0 :                 return InvalidBuffer;
     799             :             }
     800             :         }
     801             : 
     802        4172 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     803             : 
     804        4172 :         if (extensionLockHeld)
     805          46 :             UnlockRelationForExtension(irel, ExclusiveLock);
     806             : 
     807        4172 :         page = BufferGetPage(buf);
     808             : 
     809             :         /*
     810             :          * We have a new buffer to insert into.  Check that the new page has
     811             :          * enough free space, and return it if it does; otherwise start over.
     812             :          * (br_page_get_freespace also checks that the FSM didn't hand us a
     813             :          * page that has since been repurposed for the revmap.)
     814             :          */
     815        8344 :         freespace = *extended ?
     816        4172 :             BrinMaxItemSize : br_page_get_freespace(page);
     817        4172 :         if (freespace >= itemsz)
     818             :         {
     819        4018 :             RelationSetTargetBlock(irel, newblk);
     820             : 
     821             :             /*
     822             :              * Lock the old buffer if not locked already.  Note that in this
     823             :              * case we know for sure it's a regular page: it's later than the
     824             :              * new page we just got, which is not a revmap page, and revmap
     825             :              * pages are always consecutive.
     826             :              */
     827        4018 :             if (BufferIsValid(oldbuf) && oldblk > newblk)
     828             :             {
     829           0 :                 LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
     830             :                 Assert(BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)));
     831             :             }
     832             : 
     833        4018 :             return buf;
     834             :         }
     835             : 
     836             :         /* This page is no good. */
     837             : 
     838             :         /*
     839             :          * If an entirely new page does not contain enough free space for the
     840             :          * new item, then surely that item is oversized.  Complain loudly; but
     841             :          * first make sure we initialize the page and record it as free, for
     842             :          * next time.
     843             :          */
     844         154 :         if (*extended)
     845             :         {
     846           0 :             brin_initialize_empty_new_buffer(irel, buf);
     847             :             /* since this should not happen, skip FreeSpaceMapVacuum */
     848             : 
     849           0 :             ereport(ERROR,
     850             :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     851             :                      errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
     852             :                             itemsz, freespace, RelationGetRelationName(irel))));
     853             :             return InvalidBuffer;   /* keep compiler quiet */
     854             :         }
     855             : 
     856         154 :         if (newblk != oldblk)
     857         128 :             UnlockReleaseBuffer(buf);
     858         154 :         if (BufferIsValid(oldbuf) && oldblk <= newblk)
     859          34 :             LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     860             : 
     861             :         /*
     862             :          * Update the FSM with the new, presumably smaller, freespace value
     863             :          * for this page, then search for a new target page.
     864             :          */
     865         154 :         newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
     866             :     }
     867             : }
     868             : 
     869             : /*
     870             :  * Initialize a page as an empty regular BRIN page, WAL-log this, and record
     871             :  * the page in FSM.
     872             :  *
     873             :  * There are several corner situations in which we extend the relation to
     874             :  * obtain a new page and later find that we cannot use it immediately.  When
     875             :  * that happens, we don't want to leave the page go unrecorded in FSM, because
     876             :  * there is no mechanism to get the space back and the index would bloat.
     877             :  * Also, because we would not WAL-log the action that would initialize the
     878             :  * page, the page would go uninitialized in a standby (or after recovery).
     879             :  *
     880             :  * While we record the page in FSM here, caller is responsible for doing FSM
     881             :  * upper-page update if that seems appropriate.
     882             :  */
     883             : static void
     884           0 : brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
     885             : {
     886             :     Page        page;
     887             : 
     888             :     BRIN_elog((DEBUG2,
     889             :                "brin_initialize_empty_new_buffer: initializing blank page %u",
     890             :                BufferGetBlockNumber(buffer)));
     891             : 
     892           0 :     START_CRIT_SECTION();
     893           0 :     page = BufferGetPage(buffer);
     894           0 :     brin_page_init(page, BRIN_PAGETYPE_REGULAR);
     895           0 :     MarkBufferDirty(buffer);
     896           0 :     log_newpage_buffer(buffer, true);
     897           0 :     END_CRIT_SECTION();
     898             : 
     899             :     /*
     900             :      * We update the FSM for this page, but this is not WAL-logged.  This is
     901             :      * acceptable because VACUUM will scan the index and update the FSM with
     902             :      * pages whose FSM records were forgotten in a crash.
     903             :      */
     904           0 :     RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer),
     905             :                             br_page_get_freespace(page));
     906           0 : }
     907             : 
     908             : 
     909             : /*
     910             :  * Return the amount of free space on a regular BRIN index page.
     911             :  *
     912             :  * If the page is not a regular page, or has been marked with the
     913             :  * BRIN_EVACUATE_PAGE flag, returns 0.
     914             :  */
     915             : static Size
     916        6758 : br_page_get_freespace(Page page)
     917             : {
     918        6758 :     if (!BRIN_IS_REGULAR_PAGE(page) ||
     919        6758 :         (BrinPageFlags(page) & BRIN_EVACUATE_PAGE) != 0)
     920           0 :         return 0;
     921             :     else
     922        6758 :         return PageGetFreeSpace(page);
     923             : }

Generated by: LCOV version 1.14