LCOV - code coverage report
Current view: top level - src/backend/access/brin - brin_pageops.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 198 287 69.0 %
Date: 2019-09-22 08:06:49 Functions: 9 11 81.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * brin_pageops.c
       3             :  *      Page-handling routines for BRIN indexes
       4             :  *
       5             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       6             :  * Portions Copyright (c) 1994, Regents of the University of California
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    src/backend/access/brin/brin_pageops.c
      10             :  */
      11             : #include "postgres.h"
      12             : 
      13             : #include "access/brin_pageops.h"
      14             : #include "access/brin_page.h"
      15             : #include "access/brin_revmap.h"
      16             : #include "access/brin_xlog.h"
      17             : #include "access/xloginsert.h"
      18             : #include "miscadmin.h"
      19             : #include "storage/bufmgr.h"
      20             : #include "storage/freespace.h"
      21             : #include "storage/lmgr.h"
      22             : #include "storage/smgr.h"
      23             : #include "utils/rel.h"
      24             : 
      25             : 
      26             : /*
      27             :  * Maximum size of an entry in a BRIN_PAGETYPE_REGULAR page.  We can tolerate
      28             :  * a single item per page, unlike other index AMs.
      29             :  */
      30             : #define BrinMaxItemSize \
      31             :     MAXALIGN_DOWN(BLCKSZ - \
      32             :                   (MAXALIGN(SizeOfPageHeaderData + \
      33             :                             sizeof(ItemIdData)) + \
      34             :                    MAXALIGN(sizeof(BrinSpecialSpace))))
      35             : 
      36             : static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
      37             :                                    bool *extended);
      38             : static Size br_page_get_freespace(Page page);
      39             : static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer);
      40             : 
      41             : 
      42             : /*
      43             :  * Update tuple origtup (size origsz), located in offset oldoff of buffer
      44             :  * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
      45             :  * at heapBlk.  oldbuf must not be locked on entry, and is not locked at exit.
      46             :  *
      47             :  * If samepage is true, attempt to put the new tuple in the same page, but if
      48             :  * there's no room, use some other one.
      49             :  *
      50             :  * If the update is successful, return true; the revmap is updated to point to
      51             :  * the new tuple.  If the update is not done for whatever reason, return false.
      52             :  * Caller may retry the update if this happens.
      53             :  */
      54             : bool
      55         934 : brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
      56             :               BrinRevmap *revmap, BlockNumber heapBlk,
      57             :               Buffer oldbuf, OffsetNumber oldoff,
      58             :               const BrinTuple *origtup, Size origsz,
      59             :               const BrinTuple *newtup, Size newsz,
      60             :               bool samepage)
      61             : {
      62             :     Page        oldpage;
      63             :     ItemId      oldlp;
      64             :     BrinTuple  *oldtup;
      65             :     Size        oldsz;
      66             :     Buffer      newbuf;
      67         934 :     BlockNumber newblk = InvalidBlockNumber;
      68             :     bool        extended;
      69             : 
      70             :     Assert(newsz == MAXALIGN(newsz));
      71             : 
      72             :     /* If the item is oversized, don't bother. */
      73         934 :     if (newsz > BrinMaxItemSize)
      74             :     {
      75           0 :         ereport(ERROR,
      76             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
      77             :                  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
      78             :                         newsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
      79             :         return false;           /* keep compiler quiet */
      80             :     }
      81             : 
      82             :     /* make sure the revmap is long enough to contain the entry we need */
      83         934 :     brinRevmapExtend(revmap, heapBlk);
      84             : 
      85         934 :     if (!samepage)
      86             :     {
      87             :         /* need a page on which to put the item */
      88           4 :         newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended);
      89           4 :         if (!BufferIsValid(newbuf))
      90             :         {
      91             :             Assert(!extended);
      92           0 :             return false;
      93             :         }
      94             : 
      95             :         /*
      96             :          * Note: it's possible (though unlikely) that the returned newbuf is
      97             :          * the same as oldbuf, if brin_getinsertbuffer determined that the old
      98             :          * buffer does in fact have enough space.
      99             :          */
     100           4 :         if (newbuf == oldbuf)
     101             :         {
     102             :             Assert(!extended);
     103           0 :             newbuf = InvalidBuffer;
     104             :         }
     105             :         else
     106           4 :             newblk = BufferGetBlockNumber(newbuf);
     107             :     }
     108             :     else
     109             :     {
     110         930 :         LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
     111         930 :         newbuf = InvalidBuffer;
     112         930 :         extended = false;
     113             :     }
     114         934 :     oldpage = BufferGetPage(oldbuf);
     115         934 :     oldlp = PageGetItemId(oldpage, oldoff);
     116             : 
     117             :     /*
     118             :      * Check that the old tuple wasn't updated concurrently: it might have
     119             :      * moved someplace else entirely, and for that matter the whole page
     120             :      * might've become a revmap page.  Note that in the first two cases
     121             :      * checked here, the "oldlp" we just calculated is garbage; but
     122             :      * PageGetItemId() is simple enough that it was safe to do that
     123             :      * calculation anyway.
     124             :      */
     125        1868 :     if (!BRIN_IS_REGULAR_PAGE(oldpage) ||
     126        1868 :         oldoff > PageGetMaxOffsetNumber(oldpage) ||
     127         934 :         !ItemIdIsNormal(oldlp))
     128             :     {
     129           0 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     130             : 
     131             :         /*
     132             :          * If this happens, and the new buffer was obtained by extending the
     133             :          * relation, then we need to ensure we don't leave it uninitialized or
     134             :          * forget about it.
     135             :          */
     136           0 :         if (BufferIsValid(newbuf))
     137             :         {
     138           0 :             if (extended)
     139           0 :                 brin_initialize_empty_new_buffer(idxrel, newbuf);
     140           0 :             UnlockReleaseBuffer(newbuf);
     141           0 :             if (extended)
     142           0 :                 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     143             :         }
     144           0 :         return false;
     145             :     }
     146             : 
     147         934 :     oldsz = ItemIdGetLength(oldlp);
     148         934 :     oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp);
     149             : 
     150             :     /*
     151             :      * ... or it might have been updated in place to different contents.
     152             :      */
     153         934 :     if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz))
     154             :     {
     155           0 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     156           0 :         if (BufferIsValid(newbuf))
     157             :         {
     158             :             /* As above, initialize and record new page if we got one */
     159           0 :             if (extended)
     160           0 :                 brin_initialize_empty_new_buffer(idxrel, newbuf);
     161           0 :             UnlockReleaseBuffer(newbuf);
     162           0 :             if (extended)
     163           0 :                 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     164             :         }
     165           0 :         return false;
     166             :     }
     167             : 
     168             :     /*
     169             :      * Great, the old tuple is intact.  We can proceed with the update.
     170             :      *
     171             :      * If there's enough room in the old page for the new tuple, replace it.
     172             :      *
     173             :      * Note that there might now be enough space on the page even though the
     174             :      * caller told us there isn't, if a concurrent update moved another tuple
     175             :      * elsewhere or replaced a tuple with a smaller one.
     176             :      */
     177        1868 :     if (((BrinPageFlags(oldpage) & BRIN_EVACUATE_PAGE) == 0) &&
     178         934 :         brin_can_do_samepage_update(oldbuf, origsz, newsz))
     179             :     {
     180         930 :         START_CRIT_SECTION();
     181         930 :         if (!PageIndexTupleOverwrite(oldpage, oldoff, (Item) unconstify(BrinTuple *, newtup), newsz))
     182           0 :             elog(ERROR, "failed to replace BRIN tuple");
     183         930 :         MarkBufferDirty(oldbuf);
     184             : 
     185             :         /* XLOG stuff */
     186         930 :         if (RelationNeedsWAL(idxrel))
     187             :         {
     188             :             xl_brin_samepage_update xlrec;
     189             :             XLogRecPtr  recptr;
     190         930 :             uint8       info = XLOG_BRIN_SAMEPAGE_UPDATE;
     191             : 
     192         930 :             xlrec.offnum = oldoff;
     193             : 
     194         930 :             XLogBeginInsert();
     195         930 :             XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
     196             : 
     197         930 :             XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD);
     198         930 :             XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz);
     199             : 
     200         930 :             recptr = XLogInsert(RM_BRIN_ID, info);
     201             : 
     202         930 :             PageSetLSN(oldpage, recptr);
     203             :         }
     204             : 
     205         930 :         END_CRIT_SECTION();
     206             : 
     207         930 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     208             : 
     209         930 :         if (BufferIsValid(newbuf))
     210             :         {
     211             :             /* As above, initialize and record new page if we got one */
     212           0 :             if (extended)
     213           0 :                 brin_initialize_empty_new_buffer(idxrel, newbuf);
     214           0 :             UnlockReleaseBuffer(newbuf);
     215           0 :             if (extended)
     216           0 :                 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     217             :         }
     218             : 
     219         930 :         return true;
     220             :     }
     221           4 :     else if (newbuf == InvalidBuffer)
     222             :     {
     223             :         /*
     224             :          * Not enough space, but caller said that there was. Tell them to
     225             :          * start over.
     226             :          */
     227           0 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     228           0 :         return false;
     229             :     }
     230             :     else
     231             :     {
     232             :         /*
     233             :          * Not enough free space on the oldpage. Put the new tuple on the new
     234             :          * page, and update the revmap.
     235             :          */
     236           4 :         Page        newpage = BufferGetPage(newbuf);
     237             :         Buffer      revmapbuf;
     238             :         ItemPointerData newtid;
     239             :         OffsetNumber newoff;
     240           4 :         Size        freespace = 0;
     241             : 
     242           4 :         revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
     243             : 
     244           4 :         START_CRIT_SECTION();
     245             : 
     246             :         /*
     247             :          * We need to initialize the page if it's newly obtained.  Note we
     248             :          * will WAL-log the initialization as part of the update, so we don't
     249             :          * need to do that here.
     250             :          */
     251           4 :         if (extended)
     252           4 :             brin_page_init(newpage, BRIN_PAGETYPE_REGULAR);
     253             : 
     254           4 :         PageIndexTupleDeleteNoCompact(oldpage, oldoff);
     255           4 :         newoff = PageAddItem(newpage, (Item) unconstify(BrinTuple *, newtup), newsz,
     256             :                              InvalidOffsetNumber, false, false);
     257           4 :         if (newoff == InvalidOffsetNumber)
     258           0 :             elog(ERROR, "failed to add BRIN tuple to new page");
     259           4 :         MarkBufferDirty(oldbuf);
     260           4 :         MarkBufferDirty(newbuf);
     261             : 
     262             :         /* needed to update FSM below */
     263           4 :         if (extended)
     264           4 :             freespace = br_page_get_freespace(newpage);
     265             : 
     266           4 :         ItemPointerSet(&newtid, newblk, newoff);
     267           4 :         brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
     268           4 :         MarkBufferDirty(revmapbuf);
     269             : 
     270             :         /* XLOG stuff */
     271           4 :         if (RelationNeedsWAL(idxrel))
     272             :         {
     273             :             xl_brin_update xlrec;
     274             :             XLogRecPtr  recptr;
     275             :             uint8       info;
     276             : 
     277           4 :             info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
     278             : 
     279           4 :             xlrec.insert.offnum = newoff;
     280           4 :             xlrec.insert.heapBlk = heapBlk;
     281           4 :             xlrec.insert.pagesPerRange = pagesPerRange;
     282           4 :             xlrec.oldOffnum = oldoff;
     283             : 
     284           4 :             XLogBeginInsert();
     285             : 
     286             :             /* new page */
     287           4 :             XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
     288             : 
     289           4 :             XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
     290           4 :             XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz);
     291             : 
     292             :             /* revmap page */
     293           4 :             XLogRegisterBuffer(1, revmapbuf, 0);
     294             : 
     295             :             /* old page */
     296           4 :             XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD);
     297             : 
     298           4 :             recptr = XLogInsert(RM_BRIN_ID, info);
     299             : 
     300           4 :             PageSetLSN(oldpage, recptr);
     301           4 :             PageSetLSN(newpage, recptr);
     302           4 :             PageSetLSN(BufferGetPage(revmapbuf), recptr);
     303             :         }
     304             : 
     305           4 :         END_CRIT_SECTION();
     306             : 
     307           4 :         LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
     308           4 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     309           4 :         UnlockReleaseBuffer(newbuf);
     310             : 
     311           4 :         if (extended)
     312             :         {
     313           4 :             RecordPageWithFreeSpace(idxrel, newblk, freespace);
     314           4 :             FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     315             :         }
     316             : 
     317           4 :         return true;
     318             :     }
     319             : }
     320             : 
     321             : /*
     322             :  * Return whether brin_doupdate can do a samepage update.
     323             :  */
     324             : bool
     325        1868 : brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
     326             : {
     327             :     return
     328        1972 :         ((newsz <= origsz) ||
     329         104 :          PageGetExactFreeSpace(BufferGetPage(buffer)) >= (newsz - origsz));
     330             : }
     331             : 
     332             : /*
     333             :  * Insert an index tuple into the index relation.  The revmap is updated to
     334             :  * mark the range containing the given page as pointing to the inserted entry.
     335             :  * A WAL record is written.
     336             :  *
     337             :  * The buffer, if valid, is first checked for free space to insert the new
     338             :  * entry; if there isn't enough, a new buffer is obtained and pinned.  No
     339             :  * buffer lock must be held on entry, no buffer lock is held on exit.
     340             :  *
     341             :  * Return value is the offset number where the tuple was inserted.
     342             :  */
     343             : OffsetNumber
     344         654 : brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
     345             :               BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk,
     346             :               BrinTuple *tup, Size itemsz)
     347             : {
     348             :     Page        page;
     349             :     BlockNumber blk;
     350             :     OffsetNumber off;
     351         654 :     Size        freespace = 0;
     352             :     Buffer      revmapbuf;
     353             :     ItemPointerData tid;
     354             :     bool        extended;
     355             : 
     356             :     Assert(itemsz == MAXALIGN(itemsz));
     357             : 
     358             :     /* If the item is oversized, don't even bother. */
     359         654 :     if (itemsz > BrinMaxItemSize)
     360             :     {
     361           0 :         ereport(ERROR,
     362             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     363             :                  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
     364             :                         itemsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
     365             :         return InvalidOffsetNumber; /* keep compiler quiet */
     366             :     }
     367             : 
     368             :     /* Make sure the revmap is long enough to contain the entry we need */
     369         654 :     brinRevmapExtend(revmap, heapBlk);
     370             : 
     371             :     /*
     372             :      * Acquire lock on buffer supplied by caller, if any.  If it doesn't have
     373             :      * enough space, unpin it to obtain a new one below.
     374             :      */
     375         654 :     if (BufferIsValid(*buffer))
     376             :     {
     377             :         /*
     378             :          * It's possible that another backend (or ourselves!) extended the
     379             :          * revmap over the page we held a pin on, so we cannot assume that
     380             :          * it's still a regular page.
     381             :          */
     382         578 :         LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
     383         578 :         if (br_page_get_freespace(BufferGetPage(*buffer)) < itemsz)
     384             :         {
     385          36 :             UnlockReleaseBuffer(*buffer);
     386          36 :             *buffer = InvalidBuffer;
     387             :         }
     388             :     }
     389             : 
     390             :     /*
     391             :      * If we still don't have a usable buffer, have brin_getinsertbuffer
     392             :      * obtain one for us.
     393             :      */
     394         654 :     if (!BufferIsValid(*buffer))
     395             :     {
     396             :         do
     397         112 :             *buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended);
     398         112 :         while (!BufferIsValid(*buffer));
     399             :     }
     400             :     else
     401         542 :         extended = false;
     402             : 
     403             :     /* Now obtain lock on revmap buffer */
     404         654 :     revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
     405             : 
     406         654 :     page = BufferGetPage(*buffer);
     407         654 :     blk = BufferGetBlockNumber(*buffer);
     408             : 
     409             :     /* Execute the actual insertion */
     410         654 :     START_CRIT_SECTION();
     411         654 :     if (extended)
     412          70 :         brin_page_init(page, BRIN_PAGETYPE_REGULAR);
     413         654 :     off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
     414             :                       false, false);
     415         654 :     if (off == InvalidOffsetNumber)
     416           0 :         elog(ERROR, "failed to add BRIN tuple to new page");
     417         654 :     MarkBufferDirty(*buffer);
     418             : 
     419             :     /* needed to update FSM below */
     420         654 :     if (extended)
     421          70 :         freespace = br_page_get_freespace(page);
     422             : 
     423         654 :     ItemPointerSet(&tid, blk, off);
     424         654 :     brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid);
     425         654 :     MarkBufferDirty(revmapbuf);
     426             : 
     427             :     /* XLOG stuff */
     428         654 :     if (RelationNeedsWAL(idxrel))
     429             :     {
     430             :         xl_brin_insert xlrec;
     431             :         XLogRecPtr  recptr;
     432             :         uint8       info;
     433             : 
     434         654 :         info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
     435         654 :         xlrec.heapBlk = heapBlk;
     436         654 :         xlrec.pagesPerRange = pagesPerRange;
     437         654 :         xlrec.offnum = off;
     438             : 
     439         654 :         XLogBeginInsert();
     440         654 :         XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
     441             : 
     442         654 :         XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
     443         654 :         XLogRegisterBufData(0, (char *) tup, itemsz);
     444             : 
     445         654 :         XLogRegisterBuffer(1, revmapbuf, 0);
     446             : 
     447         654 :         recptr = XLogInsert(RM_BRIN_ID, info);
     448             : 
     449         654 :         PageSetLSN(page, recptr);
     450         654 :         PageSetLSN(BufferGetPage(revmapbuf), recptr);
     451             :     }
     452             : 
     453         654 :     END_CRIT_SECTION();
     454             : 
     455             :     /* Tuple is firmly on buffer; we can release our locks */
     456         654 :     LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
     457         654 :     LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
     458             : 
     459             :     BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
     460             :                blk, off, heapBlk));
     461             : 
     462         654 :     if (extended)
     463             :     {
     464          70 :         RecordPageWithFreeSpace(idxrel, blk, freespace);
     465          70 :         FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
     466             :     }
     467             : 
     468         654 :     return off;
     469             : }
     470             : 
     471             : /*
     472             :  * Initialize a page with the given type.
     473             :  *
     474             :  * Caller is responsible for marking it dirty, as appropriate.
     475             :  */
     476             : void
     477         142 : brin_page_init(Page page, uint16 type)
     478             : {
     479         142 :     PageInit(page, BLCKSZ, sizeof(BrinSpecialSpace));
     480             : 
     481         142 :     BrinPageType(page) = type;
     482         142 : }
     483             : 
     484             : /*
     485             :  * Initialize a new BRIN index's metapage.
     486             :  */
     487             : void
     488          34 : brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
     489             : {
     490             :     BrinMetaPageData *metadata;
     491             : 
     492          34 :     brin_page_init(page, BRIN_PAGETYPE_META);
     493             : 
     494          34 :     metadata = (BrinMetaPageData *) PageGetContents(page);
     495             : 
     496          34 :     metadata->brinMagic = BRIN_META_MAGIC;
     497          34 :     metadata->brinVersion = version;
     498          34 :     metadata->pagesPerRange = pagesPerRange;
     499             : 
     500             :     /*
     501             :      * Note we cheat here a little.  0 is not a valid revmap block number
     502             :      * (because it's the metapage buffer), but doing this enables the first
     503             :      * revmap page to be created when the index is.
     504             :      */
     505          34 :     metadata->lastRevmapPage = 0;
     506             : 
     507             :     /*
     508             :      * Set pd_lower just past the end of the metadata.  This is essential,
     509             :      * because without doing so, metadata will be lost if xlog.c compresses
     510             :      * the page.
     511             :      */
     512          34 :     ((PageHeader) page)->pd_lower =
     513          34 :         ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) page;
     514          34 : }
     515             : 
     516             : /*
     517             :  * Initiate page evacuation protocol.
     518             :  *
     519             :  * The page must be locked in exclusive mode by the caller.
     520             :  *
     521             :  * If the page is not yet initialized or empty, return false without doing
     522             :  * anything; it can be used for revmap without any further changes.  If it
     523             :  * contains tuples, mark it for evacuation and return true.
     524             :  */
     525             : bool
     526          34 : brin_start_evacuating_page(Relation idxRel, Buffer buf)
     527             : {
     528             :     OffsetNumber off;
     529             :     OffsetNumber maxoff;
     530             :     Page        page;
     531             : 
     532          34 :     page = BufferGetPage(buf);
     533             : 
     534          34 :     if (PageIsNew(page))
     535          34 :         return false;
     536             : 
     537           0 :     maxoff = PageGetMaxOffsetNumber(page);
     538           0 :     for (off = FirstOffsetNumber; off <= maxoff; off++)
     539             :     {
     540             :         ItemId      lp;
     541             : 
     542           0 :         lp = PageGetItemId(page, off);
     543           0 :         if (ItemIdIsUsed(lp))
     544             :         {
     545             :             /* prevent other backends from adding more stuff to this page */
     546           0 :             BrinPageFlags(page) |= BRIN_EVACUATE_PAGE;
     547           0 :             MarkBufferDirtyHint(buf, true);
     548             : 
     549           0 :             return true;
     550             :         }
     551             :     }
     552           0 :     return false;
     553             : }
     554             : 
     555             : /*
     556             :  * Move all tuples out of a page.
     557             :  *
     558             :  * The caller must hold lock on the page. The lock and pin are released.
     559             :  */
     560             : void
     561           0 : brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
     562             :                    BrinRevmap *revmap, Buffer buf)
     563             : {
     564             :     OffsetNumber off;
     565             :     OffsetNumber maxoff;
     566             :     Page        page;
     567           0 :     BrinTuple  *btup = NULL;
     568           0 :     Size        btupsz = 0;
     569             : 
     570           0 :     page = BufferGetPage(buf);
     571             : 
     572             :     Assert(BrinPageFlags(page) & BRIN_EVACUATE_PAGE);
     573             : 
     574           0 :     maxoff = PageGetMaxOffsetNumber(page);
     575           0 :     for (off = FirstOffsetNumber; off <= maxoff; off++)
     576             :     {
     577             :         BrinTuple  *tup;
     578             :         Size        sz;
     579             :         ItemId      lp;
     580             : 
     581           0 :         CHECK_FOR_INTERRUPTS();
     582             : 
     583           0 :         lp = PageGetItemId(page, off);
     584           0 :         if (ItemIdIsUsed(lp))
     585             :         {
     586           0 :             sz = ItemIdGetLength(lp);
     587           0 :             tup = (BrinTuple *) PageGetItem(page, lp);
     588           0 :             tup = brin_copy_tuple(tup, sz, btup, &btupsz);
     589             : 
     590           0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     591             : 
     592           0 :             if (!brin_doupdate(idxRel, pagesPerRange, revmap, tup->bt_blkno,
     593             :                                buf, off, tup, sz, tup, sz, false))
     594           0 :                 off--;          /* retry */
     595             : 
     596           0 :             LockBuffer(buf, BUFFER_LOCK_SHARE);
     597             : 
     598             :             /* It's possible that someone extended the revmap over this page */
     599           0 :             if (!BRIN_IS_REGULAR_PAGE(page))
     600           0 :                 break;
     601             :         }
     602             :     }
     603             : 
     604           0 :     UnlockReleaseBuffer(buf);
     605           0 : }
     606             : 
     607             : /*
     608             :  * Given a BRIN index page, initialize it if necessary, and record its
     609             :  * current free space in the FSM.
     610             :  *
     611             :  * The main use for this is when, during vacuuming, an uninitialized page is
     612             :  * found, which could be the result of relation extension followed by a crash
     613             :  * before the page can be used.
     614             :  *
     615             :  * Here, we don't bother to update upper FSM pages, instead expecting that our
     616             :  * caller (brin_vacuum_scan) will fix them at the end of the scan.  Elsewhere
     617             :  * in this file, it's generally a good idea to propagate additions of free
     618             :  * space into the upper FSM pages immediately.
     619             :  */
     620             : void
     621         156 : brin_page_cleanup(Relation idxrel, Buffer buf)
     622             : {
     623         156 :     Page        page = BufferGetPage(buf);
     624             : 
     625             :     /*
     626             :      * If a page was left uninitialized, initialize it now; also record it in
     627             :      * FSM.
     628             :      *
     629             :      * Somebody else might be extending the relation concurrently.  To avoid
     630             :      * re-initializing the page before they can grab the buffer lock, we
     631             :      * acquire the extension lock momentarily.  Since they hold the extension
     632             :      * lock from before getting the page and after its been initialized, we're
     633             :      * sure to see their initialization.
     634             :      */
     635         156 :     if (PageIsNew(page))
     636             :     {
     637           0 :         LockRelationForExtension(idxrel, ShareLock);
     638           0 :         UnlockRelationForExtension(idxrel, ShareLock);
     639             : 
     640           0 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     641           0 :         if (PageIsNew(page))
     642             :         {
     643           0 :             brin_initialize_empty_new_buffer(idxrel, buf);
     644           0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     645           0 :             return;
     646             :         }
     647           0 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     648             :     }
     649             : 
     650             :     /* Nothing to be done for non-regular index pages */
     651         292 :     if (BRIN_IS_META_PAGE(BufferGetPage(buf)) ||
     652         136 :         BRIN_IS_REVMAP_PAGE(BufferGetPage(buf)))
     653          40 :         return;
     654             : 
     655             :     /* Measure free space and record it */
     656         116 :     RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf),
     657             :                             br_page_get_freespace(page));
     658             : }
     659             : 
     660             : /*
     661             :  * Return a pinned and exclusively locked buffer which can be used to insert an
     662             :  * index item of size itemsz (caller must ensure not to request sizes
     663             :  * impossible to fulfill).  If oldbuf is a valid buffer, it is also locked (in
     664             :  * an order determined to avoid deadlocks).
     665             :  *
     666             :  * If we find that the old page is no longer a regular index page (because
     667             :  * of a revmap extension), the old buffer is unlocked and we return
     668             :  * InvalidBuffer.
     669             :  *
     670             :  * If there's no existing page with enough free space to accommodate the new
     671             :  * item, the relation is extended.  If this happens, *extended is set to true,
     672             :  * and it is the caller's responsibility to initialize the page (and WAL-log
     673             :  * that fact) prior to use.  The caller should also update the FSM with the
     674             :  * page's remaining free space after the insertion.
     675             :  *
     676             :  * Note that the caller is not expected to update FSM unless *extended is set
     677             :  * true.  This policy means that we'll update FSM when a page is created, and
     678             :  * when it's found to have too little space for a desired tuple insertion,
     679             :  * but not every single time we add a tuple to the page.
     680             :  *
     681             :  * Note that in some corner cases it is possible for this routine to extend
     682             :  * the relation and then not return the new page.  It is this routine's
     683             :  * responsibility to WAL-log the page initialization and to record the page in
     684             :  * FSM if that happens, since the caller certainly can't do it.
     685             :  */
     686             : static Buffer
     687         116 : brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
     688             :                      bool *extended)
     689             : {
     690             :     BlockNumber oldblk;
     691             :     BlockNumber newblk;
     692             :     Page        page;
     693             :     Size        freespace;
     694             : 
     695             :     /* callers must have checked */
     696             :     Assert(itemsz <= BrinMaxItemSize);
     697             : 
     698         116 :     if (BufferIsValid(oldbuf))
     699           4 :         oldblk = BufferGetBlockNumber(oldbuf);
     700             :     else
     701         112 :         oldblk = InvalidBlockNumber;
     702             : 
     703             :     /* Choose initial target page, re-using existing target if known */
     704         116 :     newblk = RelationGetTargetBlock(irel);
     705         116 :     if (newblk == InvalidBlockNumber)
     706          52 :         newblk = GetPageWithFreeSpace(irel, itemsz);
     707             : 
     708             :     /*
     709             :      * Loop until we find a page with sufficient free space.  By the time we
     710             :      * return to caller out of this loop, both buffers are valid and locked;
     711             :      * if we have to restart here, neither page is locked and newblk isn't
     712             :      * pinned (if it's even valid).
     713             :      */
     714             :     for (;;)
     715          40 :     {
     716             :         Buffer      buf;
     717         156 :         bool        extensionLockHeld = false;
     718             : 
     719         156 :         CHECK_FOR_INTERRUPTS();
     720             : 
     721         156 :         *extended = false;
     722             : 
     723         156 :         if (newblk == InvalidBlockNumber)
     724             :         {
     725             :             /*
     726             :              * There's not enough free space in any existing index page,
     727             :              * according to the FSM: extend the relation to obtain a shiny new
     728             :              * page.
     729             :              */
     730          74 :             if (!RELATION_IS_LOCAL(irel))
     731             :             {
     732           4 :                 LockRelationForExtension(irel, ExclusiveLock);
     733           4 :                 extensionLockHeld = true;
     734             :             }
     735          74 :             buf = ReadBuffer(irel, P_NEW);
     736          74 :             newblk = BufferGetBlockNumber(buf);
     737          74 :             *extended = true;
     738             : 
     739             :             BRIN_elog((DEBUG2, "brin_getinsertbuffer: extending to page %u",
     740             :                        BufferGetBlockNumber(buf)));
     741             :         }
     742          82 :         else if (newblk == oldblk)
     743             :         {
     744             :             /*
     745             :              * There's an odd corner-case here where the FSM is out-of-date,
     746             :              * and gave us the old page.
     747             :              */
     748           4 :             buf = oldbuf;
     749             :         }
     750             :         else
     751             :         {
     752          78 :             buf = ReadBuffer(irel, newblk);
     753             :         }
     754             : 
     755             :         /*
     756             :          * We lock the old buffer first, if it's earlier than the new one; but
     757             :          * then we need to check that it hasn't been turned into a revmap page
     758             :          * concurrently.  If we detect that that happened, give up and tell
     759             :          * caller to start over.
     760             :          */
     761         156 :         if (BufferIsValid(oldbuf) && oldblk < newblk)
     762             :         {
     763           4 :             LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
     764           4 :             if (!BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
     765             :             {
     766           0 :                 LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     767             : 
     768             :                 /*
     769             :                  * It is possible that the new page was obtained from
     770             :                  * extending the relation.  In that case, we must be sure to
     771             :                  * record it in the FSM before leaving, because otherwise the
     772             :                  * space would be lost forever.  However, we cannot let an
     773             :                  * uninitialized page get in the FSM, so we need to initialize
     774             :                  * it first.
     775             :                  */
     776           0 :                 if (*extended)
     777           0 :                     brin_initialize_empty_new_buffer(irel, buf);
     778             : 
     779           0 :                 if (extensionLockHeld)
     780           0 :                     UnlockRelationForExtension(irel, ExclusiveLock);
     781             : 
     782           0 :                 ReleaseBuffer(buf);
     783             : 
     784           0 :                 if (*extended)
     785             :                 {
     786           0 :                     FreeSpaceMapVacuumRange(irel, newblk, newblk + 1);
     787             :                     /* shouldn't matter, but don't confuse caller */
     788           0 :                     *extended = false;
     789             :                 }
     790             : 
     791           0 :                 return InvalidBuffer;
     792             :             }
     793             :         }
     794             : 
     795         156 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     796             : 
     797         156 :         if (extensionLockHeld)
     798           4 :             UnlockRelationForExtension(irel, ExclusiveLock);
     799             : 
     800         156 :         page = BufferGetPage(buf);
     801             : 
     802             :         /*
     803             :          * We have a new buffer to insert into.  Check that the new page has
     804             :          * enough free space, and return it if it does; otherwise start over.
     805             :          * (br_page_get_freespace also checks that the FSM didn't hand us a
     806             :          * page that has since been repurposed for the revmap.)
     807             :          */
     808         312 :         freespace = *extended ?
     809         156 :             BrinMaxItemSize : br_page_get_freespace(page);
     810         156 :         if (freespace >= itemsz)
     811             :         {
     812         116 :             RelationSetTargetBlock(irel, newblk);
     813             : 
     814             :             /*
     815             :              * Lock the old buffer if not locked already.  Note that in this
     816             :              * case we know for sure it's a regular page: it's later than the
     817             :              * new page we just got, which is not a revmap page, and revmap
     818             :              * pages are always consecutive.
     819             :              */
     820         116 :             if (BufferIsValid(oldbuf) && oldblk > newblk)
     821             :             {
     822           0 :                 LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
     823             :                 Assert(BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)));
     824             :             }
     825             : 
     826         116 :             return buf;
     827             :         }
     828             : 
     829             :         /* This page is no good. */
     830             : 
     831             :         /*
     832             :          * If an entirely new page does not contain enough free space for the
     833             :          * new item, then surely that item is oversized.  Complain loudly; but
     834             :          * first make sure we initialize the page and record it as free, for
     835             :          * next time.
     836             :          */
     837          40 :         if (*extended)
     838             :         {
     839           0 :             brin_initialize_empty_new_buffer(irel, buf);
     840             :             /* since this should not happen, skip FreeSpaceMapVacuum */
     841             : 
     842           0 :             ereport(ERROR,
     843             :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     844             :                      errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
     845             :                             itemsz, freespace, RelationGetRelationName(irel))));
     846             :             return InvalidBuffer;   /* keep compiler quiet */
     847             :         }
     848             : 
     849          40 :         if (newblk != oldblk)
     850          36 :             UnlockReleaseBuffer(buf);
     851          40 :         if (BufferIsValid(oldbuf) && oldblk <= newblk)
     852           4 :             LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     853             : 
     854             :         /*
     855             :          * Update the FSM with the new, presumably smaller, freespace value
     856             :          * for this page, then search for a new target page.
     857             :          */
     858          40 :         newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
     859             :     }
     860             : }
     861             : 
     862             : /*
     863             :  * Initialize a page as an empty regular BRIN page, WAL-log this, and record
     864             :  * the page in FSM.
     865             :  *
     866             :  * There are several corner situations in which we extend the relation to
     867             :  * obtain a new page and later find that we cannot use it immediately.  When
     868             :  * that happens, we don't want to leave the page go unrecorded in FSM, because
     869             :  * there is no mechanism to get the space back and the index would bloat.
     870             :  * Also, because we would not WAL-log the action that would initialize the
     871             :  * page, the page would go uninitialized in a standby (or after recovery).
     872             :  *
     873             :  * While we record the page in FSM here, caller is responsible for doing FSM
     874             :  * upper-page update if that seems appropriate.
     875             :  */
     876             : static void
     877           0 : brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
     878             : {
     879             :     Page        page;
     880             : 
     881             :     BRIN_elog((DEBUG2,
     882             :                "brin_initialize_empty_new_buffer: initializing blank page %u",
     883             :                BufferGetBlockNumber(buffer)));
     884             : 
     885           0 :     START_CRIT_SECTION();
     886           0 :     page = BufferGetPage(buffer);
     887           0 :     brin_page_init(page, BRIN_PAGETYPE_REGULAR);
     888           0 :     MarkBufferDirty(buffer);
     889           0 :     log_newpage_buffer(buffer, true);
     890           0 :     END_CRIT_SECTION();
     891             : 
     892             :     /*
     893             :      * We update the FSM for this page, but this is not WAL-logged.  This is
     894             :      * acceptable because VACUUM will scan the index and update the FSM with
     895             :      * pages whose FSM records were forgotten in a crash.
     896             :      */
     897           0 :     RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer),
     898             :                             br_page_get_freespace(page));
     899           0 : }
     900             : 
     901             : 
     902             : /*
     903             :  * Return the amount of free space on a regular BRIN index page.
     904             :  *
     905             :  * If the page is not a regular page, or has been marked with the
     906             :  * BRIN_EVACUATE_PAGE flag, returns 0.
     907             :  */
     908             : static Size
     909         850 : br_page_get_freespace(Page page)
     910             : {
     911        1700 :     if (!BRIN_IS_REGULAR_PAGE(page) ||
     912         850 :         (BrinPageFlags(page) & BRIN_EVACUATE_PAGE) != 0)
     913           0 :         return 0;
     914             :     else
     915         850 :         return PageGetFreeSpace(page);
     916             : }

Generated by: LCOV version 1.13