LCOV - code coverage report
Current view: top level - src/backend/access/brin - brin_pageops.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 77.8 % 288 224
Test Date: 2026-03-02 00:15:17 Functions: 90.9 % 11 10
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*
       2              :  * brin_pageops.c
       3              :  *      Page-handling routines for BRIN indexes
       4              :  *
       5              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       6              :  * Portions Copyright (c) 1994, Regents of the University of California
       7              :  *
       8              :  * IDENTIFICATION
       9              :  *    src/backend/access/brin/brin_pageops.c
      10              :  */
      11              : #include "postgres.h"
      12              : 
      13              : #include "access/brin_page.h"
      14              : #include "access/brin_pageops.h"
      15              : #include "access/brin_revmap.h"
      16              : #include "access/brin_xlog.h"
      17              : #include "access/xloginsert.h"
      18              : #include "miscadmin.h"
      19              : #include "storage/bufmgr.h"
      20              : #include "storage/freespace.h"
      21              : #include "storage/lmgr.h"
      22              : #include "utils/rel.h"
      23              : 
      24              : /*
      25              :  * Maximum size of an entry in a BRIN_PAGETYPE_REGULAR page.  We can tolerate
      26              :  * a single item per page, unlike other index AMs.
      27              :  */
      28              : #define BrinMaxItemSize \
      29              :     MAXALIGN_DOWN(BLCKSZ - \
      30              :                   (MAXALIGN(SizeOfPageHeaderData + \
      31              :                             sizeof(ItemIdData)) + \
      32              :                    MAXALIGN(sizeof(BrinSpecialSpace))))
      33              : 
      34              : static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
      35              :                                    bool *extended);
      36              : static Size br_page_get_freespace(Page page);
      37              : static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer);
      38              : 
      39              : 
      40              : /*
      41              :  * Update tuple origtup (size origsz), located in offset oldoff of buffer
      42              :  * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
      43              :  * at heapBlk.  oldbuf must not be locked on entry, and is not locked at exit.
      44              :  *
      45              :  * If samepage is true, attempt to put the new tuple in the same page, but if
      46              :  * there's no room, use some other one.
      47              :  *
      48              :  * If the update is successful, return true; the revmap is updated to point to
      49              :  * the new tuple.  If the update is not done for whatever reason, return false.
      50              :  * Caller may retry the update if this happens.
      51              :  */
      52              : bool
      53        13729 : brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
      54              :               BrinRevmap *revmap, BlockNumber heapBlk,
      55              :               Buffer oldbuf, OffsetNumber oldoff,
      56              :               const BrinTuple *origtup, Size origsz,
      57              :               const BrinTuple *newtup, Size newsz,
      58              :               bool samepage)
      59              : {
      60              :     Page        oldpage;
      61              :     ItemId      oldlp;
      62              :     BrinTuple  *oldtup;
      63              :     Size        oldsz;
      64              :     Buffer      newbuf;
      65        13729 :     BlockNumber newblk = InvalidBlockNumber;
      66              :     bool        extended;
      67              : 
      68              :     Assert(newsz == MAXALIGN(newsz));
      69              : 
      70              :     /* If the item is oversized, don't bother. */
      71        13729 :     if (newsz > BrinMaxItemSize)
      72              :     {
      73            0 :         ereport(ERROR,
      74              :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
      75              :                  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
      76              :                         newsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
      77              :         return false;           /* keep compiler quiet */
      78              :     }
      79              : 
      80              :     /* make sure the revmap is long enough to contain the entry we need */
      81        13729 :     brinRevmapExtend(revmap, heapBlk);
      82              : 
      83        13729 :     if (!samepage)
      84              :     {
      85              :         /* need a page on which to put the item */
      86          307 :         newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended);
      87          307 :         if (!BufferIsValid(newbuf))
      88              :         {
      89              :             Assert(!extended);
      90            0 :             return false;
      91              :         }
      92              : 
      93              :         /*
      94              :          * Note: it's possible (though unlikely) that the returned newbuf is
      95              :          * the same as oldbuf, if brin_getinsertbuffer determined that the old
      96              :          * buffer does in fact have enough space.
      97              :          */
      98          307 :         if (newbuf == oldbuf)
      99              :         {
     100              :             Assert(!extended);
     101            0 :             newbuf = InvalidBuffer;
     102              :         }
     103              :         else
     104          307 :             newblk = BufferGetBlockNumber(newbuf);
     105              :     }
     106              :     else
     107              :     {
     108        13422 :         LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
     109        13422 :         newbuf = InvalidBuffer;
     110        13422 :         extended = false;
     111              :     }
     112        13729 :     oldpage = BufferGetPage(oldbuf);
     113        13729 :     oldlp = PageGetItemId(oldpage, oldoff);
     114              : 
     115              :     /*
     116              :      * Check that the old tuple wasn't updated concurrently: it might have
     117              :      * moved someplace else entirely, and for that matter the whole page
     118              :      * might've become a revmap page.  Note that in the first two cases
     119              :      * checked here, the "oldlp" we just calculated is garbage; but
     120              :      * PageGetItemId() is simple enough that it was safe to do that
     121              :      * calculation anyway.
     122              :      */
     123        27458 :     if (!BRIN_IS_REGULAR_PAGE(oldpage) ||
     124        13729 :         oldoff > PageGetMaxOffsetNumber(oldpage) ||
     125        13729 :         !ItemIdIsNormal(oldlp))
     126              :     {
     127            0 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     128              : 
     129              :         /*
     130              :          * If this happens, and the new buffer was obtained by extending the
     131              :          * relation, then we need to ensure we don't leave it uninitialized or
     132              :          * forget about it.
     133              :          */
     134            0 :         if (BufferIsValid(newbuf))
     135              :         {
     136            0 :             if (extended)
     137            0 :                 brin_initialize_empty_new_buffer(idxrel, newbuf);
     138            0 :             UnlockReleaseBuffer(newbuf);
     139            0 :             if (extended)
     140            0 :                 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     141              :         }
     142            0 :         return false;
     143              :     }
     144              : 
     145        13729 :     oldsz = ItemIdGetLength(oldlp);
     146        13729 :     oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp);
     147              : 
     148              :     /*
     149              :      * ... or it might have been updated in place to different contents.
     150              :      */
     151        13729 :     if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz))
     152              :     {
     153            0 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     154            0 :         if (BufferIsValid(newbuf))
     155              :         {
     156              :             /* As above, initialize and record new page if we got one */
     157            0 :             if (extended)
     158            0 :                 brin_initialize_empty_new_buffer(idxrel, newbuf);
     159            0 :             UnlockReleaseBuffer(newbuf);
     160            0 :             if (extended)
     161            0 :                 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     162              :         }
     163            0 :         return false;
     164              :     }
     165              : 
     166              :     /*
     167              :      * Great, the old tuple is intact.  We can proceed with the update.
     168              :      *
     169              :      * If there's enough room in the old page for the new tuple, replace it.
     170              :      *
     171              :      * Note that there might now be enough space on the page even though the
     172              :      * caller told us there isn't, if a concurrent update moved another tuple
     173              :      * elsewhere or replaced a tuple with a smaller one.
     174              :      */
     175        27167 :     if (((BrinPageFlags(oldpage) & BRIN_EVACUATE_PAGE) == 0) &&
     176        13438 :         brin_can_do_samepage_update(oldbuf, origsz, newsz))
     177              :     {
     178        13422 :         START_CRIT_SECTION();
     179        13422 :         if (!PageIndexTupleOverwrite(oldpage, oldoff, newtup, newsz))
     180            0 :             elog(ERROR, "failed to replace BRIN tuple");
     181        13422 :         MarkBufferDirty(oldbuf);
     182              : 
     183              :         /* XLOG stuff */
     184        13422 :         if (RelationNeedsWAL(idxrel))
     185              :         {
     186              :             xl_brin_samepage_update xlrec;
     187              :             XLogRecPtr  recptr;
     188        13419 :             uint8       info = XLOG_BRIN_SAMEPAGE_UPDATE;
     189              : 
     190        13419 :             xlrec.offnum = oldoff;
     191              : 
     192        13419 :             XLogBeginInsert();
     193        13419 :             XLogRegisterData(&xlrec, SizeOfBrinSamepageUpdate);
     194              : 
     195        13419 :             XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD);
     196        13419 :             XLogRegisterBufData(0, newtup, newsz);
     197              : 
     198        13419 :             recptr = XLogInsert(RM_BRIN_ID, info);
     199              : 
     200        13419 :             PageSetLSN(oldpage, recptr);
     201              :         }
     202              : 
     203        13422 :         END_CRIT_SECTION();
     204              : 
     205        13422 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     206              : 
     207        13422 :         if (BufferIsValid(newbuf))
     208              :         {
     209              :             /* As above, initialize and record new page if we got one */
     210            0 :             if (extended)
     211            0 :                 brin_initialize_empty_new_buffer(idxrel, newbuf);
     212            0 :             UnlockReleaseBuffer(newbuf);
     213            0 :             if (extended)
     214            0 :                 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     215              :         }
     216              : 
     217        13422 :         return true;
     218              :     }
     219          307 :     else if (newbuf == InvalidBuffer)
     220              :     {
     221              :         /*
     222              :          * Not enough space, but caller said that there was. Tell them to
     223              :          * start over.
     224              :          */
     225            0 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     226            0 :         return false;
     227              :     }
     228              :     else
     229              :     {
     230              :         /*
     231              :          * Not enough free space on the oldpage. Put the new tuple on the new
     232              :          * page, and update the revmap.
     233              :          */
     234          307 :         Page        newpage = BufferGetPage(newbuf);
     235              :         Buffer      revmapbuf;
     236              :         ItemPointerData newtid;
     237              :         OffsetNumber newoff;
     238          307 :         Size        freespace = 0;
     239              : 
     240          307 :         revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
     241              : 
     242          307 :         START_CRIT_SECTION();
     243              : 
     244              :         /*
     245              :          * We need to initialize the page if it's newly obtained.  Note we
     246              :          * will WAL-log the initialization as part of the update, so we don't
     247              :          * need to do that here.
     248              :          */
     249          307 :         if (extended)
     250           11 :             brin_page_init(newpage, BRIN_PAGETYPE_REGULAR);
     251              : 
     252          307 :         PageIndexTupleDeleteNoCompact(oldpage, oldoff);
     253          307 :         newoff = PageAddItem(newpage, newtup, newsz, InvalidOffsetNumber, false, false);
     254          307 :         if (newoff == InvalidOffsetNumber)
     255            0 :             elog(ERROR, "failed to add BRIN tuple to new page");
     256          307 :         MarkBufferDirty(oldbuf);
     257          307 :         MarkBufferDirty(newbuf);
     258              : 
     259              :         /* needed to update FSM below */
     260          307 :         if (extended)
     261           11 :             freespace = br_page_get_freespace(newpage);
     262              : 
     263          307 :         ItemPointerSet(&newtid, newblk, newoff);
     264          307 :         brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
     265          307 :         MarkBufferDirty(revmapbuf);
     266              : 
     267              :         /* XLOG stuff */
     268          307 :         if (RelationNeedsWAL(idxrel))
     269              :         {
     270              :             xl_brin_update xlrec;
     271              :             XLogRecPtr  recptr;
     272              :             uint8       info;
     273              : 
     274          307 :             info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
     275              : 
     276          307 :             xlrec.insert.offnum = newoff;
     277          307 :             xlrec.insert.heapBlk = heapBlk;
     278          307 :             xlrec.insert.pagesPerRange = pagesPerRange;
     279          307 :             xlrec.oldOffnum = oldoff;
     280              : 
     281          307 :             XLogBeginInsert();
     282              : 
     283              :             /* new page */
     284          307 :             XLogRegisterData(&xlrec, SizeOfBrinUpdate);
     285              : 
     286          307 :             XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
     287          307 :             XLogRegisterBufData(0, newtup, newsz);
     288              : 
     289              :             /* revmap page */
     290          307 :             XLogRegisterBuffer(1, revmapbuf, 0);
     291              : 
     292              :             /* old page */
     293          307 :             XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD);
     294              : 
     295          307 :             recptr = XLogInsert(RM_BRIN_ID, info);
     296              : 
     297          307 :             PageSetLSN(oldpage, recptr);
     298          307 :             PageSetLSN(newpage, recptr);
     299          307 :             PageSetLSN(BufferGetPage(revmapbuf), recptr);
     300              :         }
     301              : 
     302          307 :         END_CRIT_SECTION();
     303              : 
     304          307 :         LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
     305          307 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     306          307 :         UnlockReleaseBuffer(newbuf);
     307              : 
     308          307 :         if (extended)
     309              :         {
     310           11 :             RecordPageWithFreeSpace(idxrel, newblk, freespace);
     311           11 :             FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     312              :         }
     313              : 
     314          307 :         return true;
     315              :     }
     316              : }
     317              : 
     318              : /*
     319              :  * Return whether brin_doupdate can do a samepage update.
     320              :  */
     321              : bool
     322        26876 : brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
     323              : {
     324              :     return
     325        30844 :         ((newsz <= origsz) ||
     326         3968 :          PageGetExactFreeSpace(BufferGetPage(buffer)) >= (newsz - origsz));
     327              : }
     328              : 
     329              : /*
     330              :  * Insert an index tuple into the index relation.  The revmap is updated to
     331              :  * mark the range containing the given page as pointing to the inserted entry.
     332              :  * A WAL record is written.
     333              :  *
     334              :  * The buffer, if valid, is first checked for free space to insert the new
     335              :  * entry; if there isn't enough, a new buffer is obtained and pinned.  No
     336              :  * buffer lock must be held on entry, no buffer lock is held on exit.
     337              :  *
     338              :  * Return value is the offset number where the tuple was inserted.
     339              :  */
     340              : OffsetNumber
     341         2832 : brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
     342              :               BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk,
     343              :               const BrinTuple *tup, Size itemsz)
     344              : {
     345              :     Page        page;
     346              :     BlockNumber blk;
     347              :     OffsetNumber off;
     348         2832 :     Size        freespace = 0;
     349              :     Buffer      revmapbuf;
     350              :     ItemPointerData tid;
     351              :     bool        extended;
     352              : 
     353              :     Assert(itemsz == MAXALIGN(itemsz));
     354              : 
     355              :     /* If the item is oversized, don't even bother. */
     356         2832 :     if (itemsz > BrinMaxItemSize)
     357              :     {
     358            0 :         ereport(ERROR,
     359              :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     360              :                  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
     361              :                         itemsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
     362              :         return InvalidOffsetNumber; /* keep compiler quiet */
     363              :     }
     364              : 
     365              :     /* Make sure the revmap is long enough to contain the entry we need */
     366         2832 :     brinRevmapExtend(revmap, heapBlk);
     367              : 
     368              :     /*
     369              :      * Acquire lock on buffer supplied by caller, if any.  If it doesn't have
     370              :      * enough space, unpin it to obtain a new one below.
     371              :      */
     372         2832 :     if (BufferIsValid(*buffer))
     373              :     {
     374              :         /*
     375              :          * It's possible that another backend (or ourselves!) extended the
     376              :          * revmap over the page we held a pin on, so we cannot assume that
     377              :          * it's still a regular page.
     378              :          */
     379         1174 :         LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
     380         1174 :         if (br_page_get_freespace(BufferGetPage(*buffer)) < itemsz)
     381              :         {
     382           60 :             UnlockReleaseBuffer(*buffer);
     383           60 :             *buffer = InvalidBuffer;
     384              :         }
     385              :     }
     386              : 
     387              :     /*
     388              :      * If we still don't have a usable buffer, have brin_getinsertbuffer
     389              :      * obtain one for us.
     390              :      */
     391         2832 :     if (!BufferIsValid(*buffer))
     392              :     {
     393              :         do
     394         1718 :             *buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended);
     395         1718 :         while (!BufferIsValid(*buffer));
     396              :     }
     397              :     else
     398         1114 :         extended = false;
     399              : 
     400              :     /* Now obtain lock on revmap buffer */
     401         2832 :     revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
     402              : 
     403         2832 :     page = BufferGetPage(*buffer);
     404         2832 :     blk = BufferGetBlockNumber(*buffer);
     405              : 
     406              :     /* Execute the actual insertion */
     407         2832 :     START_CRIT_SECTION();
     408         2832 :     if (extended)
     409          244 :         brin_page_init(page, BRIN_PAGETYPE_REGULAR);
     410         2832 :     off = PageAddItem(page, tup, itemsz, InvalidOffsetNumber, false, false);
     411         2832 :     if (off == InvalidOffsetNumber)
     412            0 :         elog(ERROR, "failed to add BRIN tuple to new page");
     413         2832 :     MarkBufferDirty(*buffer);
     414              : 
     415              :     /* needed to update FSM below */
     416         2832 :     if (extended)
     417          244 :         freespace = br_page_get_freespace(page);
     418              : 
     419         2832 :     ItemPointerSet(&tid, blk, off);
     420         2832 :     brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid);
     421         2832 :     MarkBufferDirty(revmapbuf);
     422              : 
     423              :     /* XLOG stuff */
     424         2832 :     if (RelationNeedsWAL(idxrel))
     425              :     {
     426              :         xl_brin_insert xlrec;
     427              :         XLogRecPtr  recptr;
     428              :         uint8       info;
     429              : 
     430         2775 :         info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
     431         2775 :         xlrec.heapBlk = heapBlk;
     432         2775 :         xlrec.pagesPerRange = pagesPerRange;
     433         2775 :         xlrec.offnum = off;
     434              : 
     435         2775 :         XLogBeginInsert();
     436         2775 :         XLogRegisterData(&xlrec, SizeOfBrinInsert);
     437              : 
     438         2775 :         XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
     439         2775 :         XLogRegisterBufData(0, tup, itemsz);
     440              : 
     441         2775 :         XLogRegisterBuffer(1, revmapbuf, 0);
     442              : 
     443         2775 :         recptr = XLogInsert(RM_BRIN_ID, info);
     444              : 
     445         2775 :         PageSetLSN(page, recptr);
     446         2775 :         PageSetLSN(BufferGetPage(revmapbuf), recptr);
     447              :     }
     448              : 
     449         2832 :     END_CRIT_SECTION();
     450              : 
     451              :     /* Tuple is firmly on buffer; we can release our locks */
     452         2832 :     LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
     453         2832 :     LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
     454              : 
     455              :     BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
     456              :                blk, off, heapBlk));
     457              : 
     458         2832 :     if (extended)
     459              :     {
     460          244 :         RecordPageWithFreeSpace(idxrel, blk, freespace);
     461          244 :         FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
     462              :     }
     463              : 
     464         2832 :     return off;
     465              : }
     466              : 
     467              : /*
     468              :  * Initialize a page with the given type.
     469              :  *
     470              :  * Caller is responsible for marking it dirty, as appropriate.
     471              :  */
     472              : void
     473          762 : brin_page_init(Page page, uint16 type)
     474              : {
     475          762 :     PageInit(page, BLCKSZ, sizeof(BrinSpecialSpace));
     476              : 
     477          762 :     BrinPageType(page) = type;
     478          762 : }
     479              : 
     480              : /*
     481              :  * Initialize a new BRIN index's metapage.
     482              :  */
     483              : void
     484          225 : brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
     485              : {
     486              :     BrinMetaPageData *metadata;
     487              : 
     488          225 :     brin_page_init(page, BRIN_PAGETYPE_META);
     489              : 
     490          225 :     metadata = (BrinMetaPageData *) PageGetContents(page);
     491              : 
     492          225 :     metadata->brinMagic = BRIN_META_MAGIC;
     493          225 :     metadata->brinVersion = version;
     494          225 :     metadata->pagesPerRange = pagesPerRange;
     495              : 
     496              :     /*
     497              :      * Note we cheat here a little.  0 is not a valid revmap block number
     498              :      * (because it's the metapage buffer), but doing this enables the first
     499              :      * revmap page to be created when the index is.
     500              :      */
     501          225 :     metadata->lastRevmapPage = 0;
     502              : 
     503              :     /*
     504              :      * Set pd_lower just past the end of the metadata.  This is essential,
     505              :      * because without doing so, metadata will be lost if xlog.c compresses
     506              :      * the page.
     507              :      */
     508          225 :     ((PageHeader) page)->pd_lower =
     509          225 :         ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) page;
     510          225 : }
     511              : 
     512              : /*
     513              :  * Initiate page evacuation protocol.
     514              :  *
     515              :  * The page must be locked in exclusive mode by the caller.
     516              :  *
     517              :  * If the page is not yet initialized or empty, return false without doing
     518              :  * anything; it can be used for revmap without any further changes.  If it
     519              :  * contains tuples, mark it for evacuation and return true.
     520              :  */
     521              : bool
     522          186 : brin_start_evacuating_page(Relation idxRel, Buffer buf)
     523              : {
     524              :     OffsetNumber off;
     525              :     OffsetNumber maxoff;
     526              :     Page        page;
     527              : 
     528          186 :     page = BufferGetPage(buf);
     529              : 
     530          186 :     if (PageIsNew(page))
     531          184 :         return false;
     532              : 
     533            2 :     maxoff = PageGetMaxOffsetNumber(page);
     534          292 :     for (off = FirstOffsetNumber; off <= maxoff; off++)
     535              :     {
     536              :         ItemId      lp;
     537              : 
     538          291 :         lp = PageGetItemId(page, off);
     539          291 :         if (ItemIdIsUsed(lp))
     540              :         {
     541              :             /*
     542              :              * Prevent other backends from adding more stuff to this page:
     543              :              * BRIN_EVACUATE_PAGE informs br_page_get_freespace that this page
     544              :              * can no longer be used to add new tuples.  Note that this flag
     545              :              * is not WAL-logged, except accidentally.
     546              :              */
     547            1 :             BrinPageFlags(page) |= BRIN_EVACUATE_PAGE;
     548            1 :             MarkBufferDirtyHint(buf, true);
     549              : 
     550            1 :             return true;
     551              :         }
     552              :     }
     553            1 :     return false;
     554              : }
     555              : 
     556              : /*
     557              :  * Move all tuples out of a page.
     558              :  *
     559              :  * The caller must hold lock on the page. The lock and pin are released.
     560              :  */
     561              : void
     562            1 : brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
     563              :                    BrinRevmap *revmap, Buffer buf)
     564              : {
     565              :     OffsetNumber off;
     566              :     OffsetNumber maxoff;
     567              :     Page        page;
     568            1 :     BrinTuple  *btup = NULL;
     569            1 :     Size        btupsz = 0;
     570              : 
     571            1 :     page = BufferGetPage(buf);
     572              : 
     573              :     Assert(BrinPageFlags(page) & BRIN_EVACUATE_PAGE);
     574              : 
     575            1 :     maxoff = PageGetMaxOffsetNumber(page);
     576          292 :     for (off = FirstOffsetNumber; off <= maxoff; off++)
     577              :     {
     578              :         BrinTuple  *tup;
     579              :         Size        sz;
     580              :         ItemId      lp;
     581              : 
     582          291 :         CHECK_FOR_INTERRUPTS();
     583              : 
     584          291 :         lp = PageGetItemId(page, off);
     585          291 :         if (ItemIdIsUsed(lp))
     586              :         {
     587          291 :             sz = ItemIdGetLength(lp);
     588          291 :             tup = (BrinTuple *) PageGetItem(page, lp);
     589          291 :             tup = brin_copy_tuple(tup, sz, btup, &btupsz);
     590              : 
     591          291 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     592              : 
     593          291 :             if (!brin_doupdate(idxRel, pagesPerRange, revmap, tup->bt_blkno,
     594              :                                buf, off, tup, sz, tup, sz, false))
     595            0 :                 off--;          /* retry */
     596              : 
     597          291 :             LockBuffer(buf, BUFFER_LOCK_SHARE);
     598              : 
     599              :             /* It's possible that someone extended the revmap over this page */
     600          291 :             if (!BRIN_IS_REGULAR_PAGE(page))
     601            0 :                 break;
     602              :         }
     603              :     }
     604              : 
     605            1 :     UnlockReleaseBuffer(buf);
     606            1 : }
     607              : 
     608              : /*
     609              :  * Given a BRIN index page, initialize it if necessary, and record its
     610              :  * current free space in the FSM.
     611              :  *
     612              :  * The main use for this is when, during vacuuming, an uninitialized page is
     613              :  * found, which could be the result of relation extension followed by a crash
     614              :  * before the page can be used.
     615              :  *
     616              :  * Here, we don't bother to update upper FSM pages, instead expecting that our
     617              :  * caller (brin_vacuum_scan) will fix them at the end of the scan.  Elsewhere
     618              :  * in this file, it's generally a good idea to propagate additions of free
     619              :  * space into the upper FSM pages immediately.
     620              :  */
     621              : void
     622          238 : brin_page_cleanup(Relation idxrel, Buffer buf)
     623              : {
     624          238 :     Page        page = BufferGetPage(buf);
     625              : 
     626              :     /*
     627              :      * If a page was left uninitialized, initialize it now; also record it in
     628              :      * FSM.
     629              :      *
     630              :      * Somebody else might be extending the relation concurrently.  To avoid
     631              :      * re-initializing the page before they can grab the buffer lock, we
     632              :      * acquire the extension lock momentarily.  Since they hold the extension
     633              :      * lock from before getting the page and after its been initialized, we're
     634              :      * sure to see their initialization.
     635              :      */
     636          238 :     if (PageIsNew(page))
     637              :     {
     638            0 :         LockRelationForExtension(idxrel, ShareLock);
     639            0 :         UnlockRelationForExtension(idxrel, ShareLock);
     640              : 
     641            0 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     642            0 :         if (PageIsNew(page))
     643              :         {
     644            0 :             brin_initialize_empty_new_buffer(idxrel, buf);
     645            0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     646            0 :             return;
     647              :         }
     648            0 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     649              :     }
     650              : 
     651              :     /* Nothing to be done for non-regular index pages */
     652          238 :     if (BRIN_IS_META_PAGE(BufferGetPage(buf)) ||
     653          185 :         BRIN_IS_REVMAP_PAGE(BufferGetPage(buf)))
     654          106 :         return;
     655              : 
     656              :     /* Measure free space and record it */
     657          132 :     RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf),
     658              :                             br_page_get_freespace(page));
     659              : }
     660              : 
     661              : /*
     662              :  * Return a pinned and exclusively locked buffer which can be used to insert an
     663              :  * index item of size itemsz (caller must ensure not to request sizes
     664              :  * impossible to fulfill).  If oldbuf is a valid buffer, it is also locked (in
     665              :  * an order determined to avoid deadlocks).
     666              :  *
     667              :  * If we find that the old page is no longer a regular index page (because
     668              :  * of a revmap extension), the old buffer is unlocked and we return
     669              :  * InvalidBuffer.
     670              :  *
     671              :  * If there's no existing page with enough free space to accommodate the new
     672              :  * item, the relation is extended.  If this happens, *extended is set to true,
     673              :  * and it is the caller's responsibility to initialize the page (and WAL-log
     674              :  * that fact) prior to use.  The caller should also update the FSM with the
     675              :  * page's remaining free space after the insertion.
     676              :  *
     677              :  * Note that the caller is not expected to update FSM unless *extended is set
     678              :  * true.  This policy means that we'll update FSM when a page is created, and
     679              :  * when it's found to have too little space for a desired tuple insertion,
     680              :  * but not every single time we add a tuple to the page.
     681              :  *
     682              :  * Note that in some corner cases it is possible for this routine to extend
     683              :  * the relation and then not return the new page.  It is this routine's
     684              :  * responsibility to WAL-log the page initialization and to record the page in
     685              :  * FSM if that happens, since the caller certainly can't do it.
     686              :  */
     687              : static Buffer
     688         2025 : brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
     689              :                      bool *extended)
     690              : {
     691              :     BlockNumber oldblk;
     692              :     BlockNumber newblk;
     693              :     Page        page;
     694              :     Size        freespace;
     695              : 
     696              :     /* callers must have checked */
     697              :     Assert(itemsz <= BrinMaxItemSize);
     698              : 
     699         2025 :     if (BufferIsValid(oldbuf))
     700          307 :         oldblk = BufferGetBlockNumber(oldbuf);
     701              :     else
     702         1718 :         oldblk = InvalidBlockNumber;
     703              : 
     704              :     /* Choose initial target page, re-using existing target if known */
     705         2025 :     newblk = RelationGetTargetBlock(irel);
     706         2025 :     if (newblk == InvalidBlockNumber)
     707          220 :         newblk = GetPageWithFreeSpace(irel, itemsz);
     708              : 
     709              :     /*
     710              :      * Loop until we find a page with sufficient free space.  By the time we
     711              :      * return to caller out of this loop, both buffers are valid and locked;
     712              :      * if we have to restart here, neither page is locked and newblk isn't
     713              :      * pinned (if it's even valid).
     714              :      */
     715              :     for (;;)
     716           77 :     {
     717              :         Buffer      buf;
     718         2102 :         bool        extensionLockHeld = false;
     719              : 
     720         2102 :         CHECK_FOR_INTERRUPTS();
     721              : 
     722         2102 :         *extended = false;
     723              : 
     724         2102 :         if (newblk == InvalidBlockNumber)
     725              :         {
     726              :             /*
     727              :              * There's not enough free space in any existing index page,
     728              :              * according to the FSM: extend the relation to obtain a shiny new
     729              :              * page.
     730              :              *
     731              :              * XXX: It's likely possible to use RBM_ZERO_AND_LOCK here,
     732              :              * which'd avoid the need to hold the extension lock during buffer
     733              :              * reclaim.
     734              :              */
     735          255 :             if (!RELATION_IS_LOCAL(irel))
     736              :             {
     737           29 :                 LockRelationForExtension(irel, ExclusiveLock);
     738           29 :                 extensionLockHeld = true;
     739              :             }
     740          255 :             buf = ReadBuffer(irel, P_NEW);
     741          255 :             newblk = BufferGetBlockNumber(buf);
     742          255 :             *extended = true;
     743              : 
     744              :             BRIN_elog((DEBUG2, "brin_getinsertbuffer: extending to page %u",
     745              :                        BufferGetBlockNumber(buf)));
     746              :         }
     747         1847 :         else if (newblk == oldblk)
     748              :         {
     749              :             /*
     750              :              * There's an odd corner-case here where the FSM is out-of-date,
     751              :              * and gave us the old page.
     752              :              */
     753           13 :             buf = oldbuf;
     754              :         }
     755              :         else
     756              :         {
     757         1834 :             buf = ReadBuffer(irel, newblk);
     758              :         }
     759              : 
     760              :         /*
     761              :          * We lock the old buffer first, if it's earlier than the new one; but
     762              :          * then we need to check that it hasn't been turned into a revmap page
     763              :          * concurrently.  If we detect that that happened, give up and tell
     764              :          * caller to start over.
     765              :          */
     766         2102 :         if (BufferIsValid(oldbuf) && oldblk < newblk)
     767              :         {
     768          311 :             LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
     769          311 :             if (!BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
     770              :             {
     771            0 :                 LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     772              : 
     773              :                 /*
     774              :                  * It is possible that the new page was obtained from
     775              :                  * extending the relation.  In that case, we must be sure to
     776              :                  * record it in the FSM before leaving, because otherwise the
     777              :                  * space would be lost forever.  However, we cannot let an
     778              :                  * uninitialized page get in the FSM, so we need to initialize
     779              :                  * it first.
     780              :                  */
     781            0 :                 if (*extended)
     782            0 :                     brin_initialize_empty_new_buffer(irel, buf);
     783              : 
     784            0 :                 if (extensionLockHeld)
     785            0 :                     UnlockRelationForExtension(irel, ExclusiveLock);
     786              : 
     787            0 :                 ReleaseBuffer(buf);
     788              : 
     789            0 :                 if (*extended)
     790              :                 {
     791            0 :                     FreeSpaceMapVacuumRange(irel, newblk, newblk + 1);
     792              :                     /* shouldn't matter, but don't confuse caller */
     793            0 :                     *extended = false;
     794              :                 }
     795              : 
     796            0 :                 return InvalidBuffer;
     797              :             }
     798              :         }
     799              : 
     800         2102 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     801              : 
     802         2102 :         if (extensionLockHeld)
     803           29 :             UnlockRelationForExtension(irel, ExclusiveLock);
     804              : 
     805         2102 :         page = BufferGetPage(buf);
     806              : 
     807              :         /*
     808              :          * We have a new buffer to insert into.  Check that the new page has
     809              :          * enough free space, and return it if it does; otherwise start over.
     810              :          * (br_page_get_freespace also checks that the FSM didn't hand us a
     811              :          * page that has since been repurposed for the revmap.)
     812              :          */
     813         4204 :         freespace = *extended ?
     814         2102 :             BrinMaxItemSize : br_page_get_freespace(page);
     815         2102 :         if (freespace >= itemsz)
     816              :         {
     817         2025 :             RelationSetTargetBlock(irel, newblk);
     818              : 
     819              :             /*
     820              :              * Lock the old buffer if not locked already.  Note that in this
     821              :              * case we know for sure it's a regular page: it's later than the
     822              :              * new page we just got, which is not a revmap page, and revmap
     823              :              * pages are always consecutive.
     824              :              */
     825         2025 :             if (BufferIsValid(oldbuf) && oldblk > newblk)
     826              :             {
     827            0 :                 LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
     828              :                 Assert(BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)));
     829              :             }
     830              : 
     831         2025 :             return buf;
     832              :         }
     833              : 
     834              :         /* This page is no good. */
     835              : 
     836              :         /*
     837              :          * If an entirely new page does not contain enough free space for the
     838              :          * new item, then surely that item is oversized.  Complain loudly; but
     839              :          * first make sure we initialize the page and record it as free, for
     840              :          * next time.
     841              :          */
     842           77 :         if (*extended)
     843              :         {
     844            0 :             brin_initialize_empty_new_buffer(irel, buf);
     845              :             /* since this should not happen, skip FreeSpaceMapVacuum */
     846              : 
     847            0 :             ereport(ERROR,
     848              :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     849              :                      errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
     850              :                             itemsz, freespace, RelationGetRelationName(irel))));
     851              :             return InvalidBuffer;   /* keep compiler quiet */
     852              :         }
     853              : 
     854           77 :         if (newblk != oldblk)
     855           64 :             UnlockReleaseBuffer(buf);
     856           77 :         if (BufferIsValid(oldbuf) && oldblk <= newblk)
     857           17 :             LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     858              : 
     859              :         /*
     860              :          * Update the FSM with the new, presumably smaller, freespace value
     861              :          * for this page, then search for a new target page.
     862              :          */
     863           77 :         newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
     864              :     }
     865              : }
     866              : 
     867              : /*
     868              :  * Initialize a page as an empty regular BRIN page, WAL-log this, and record
     869              :  * the page in FSM.
     870              :  *
     871              :  * There are several corner situations in which we extend the relation to
     872              :  * obtain a new page and later find that we cannot use it immediately.  When
     873              :  * that happens, we don't want to leave the page go unrecorded in FSM, because
     874              :  * there is no mechanism to get the space back and the index would bloat.
     875              :  * Also, because we would not WAL-log the action that would initialize the
     876              :  * page, the page would go uninitialized in a standby (or after recovery).
     877              :  *
     878              :  * While we record the page in FSM here, caller is responsible for doing FSM
     879              :  * upper-page update if that seems appropriate.
     880              :  */
     881              : static void
     882            0 : brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
     883              : {
     884              :     Page        page;
     885              : 
     886              :     BRIN_elog((DEBUG2,
     887              :                "brin_initialize_empty_new_buffer: initializing blank page %u",
     888              :                BufferGetBlockNumber(buffer)));
     889              : 
     890            0 :     START_CRIT_SECTION();
     891            0 :     page = BufferGetPage(buffer);
     892            0 :     brin_page_init(page, BRIN_PAGETYPE_REGULAR);
     893            0 :     MarkBufferDirty(buffer);
     894              : 
     895              :     /* XLOG stuff */
     896            0 :     if (RelationNeedsWAL(idxrel))
     897            0 :         log_newpage_buffer(buffer, true);
     898              : 
     899            0 :     END_CRIT_SECTION();
     900              : 
     901              :     /*
     902              :      * We update the FSM for this page, but this is not WAL-logged.  This is
     903              :      * acceptable because VACUUM will scan the index and update the FSM with
     904              :      * pages whose FSM records were forgotten in a crash.
     905              :      */
     906            0 :     RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer),
     907              :                             br_page_get_freespace(page));
     908            0 : }
     909              : 
     910              : 
     911              : /*
     912              :  * Return the amount of free space on a regular BRIN index page.
     913              :  *
     914              :  * If the page is not a regular page, or has been marked with the
     915              :  * BRIN_EVACUATE_PAGE flag, returns 0.
     916              :  */
     917              : static Size
     918         3408 : br_page_get_freespace(Page page)
     919              : {
     920         3408 :     if (!BRIN_IS_REGULAR_PAGE(page) ||
     921         3408 :         (BrinPageFlags(page) & BRIN_EVACUATE_PAGE) != 0)
     922            0 :         return 0;
     923              :     else
     924         3408 :         return PageGetFreeSpace(page);
     925              : }
        

Generated by: LCOV version 2.0-1