LCOV - code coverage report
Current view: top level - src/backend/storage/freespace - freespace.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 196 208 94.2 %
Date: 2023-12-11 15:11:28 Functions: 21 21 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * freespace.c
       4             :  *    POSTGRES free space map for quickly finding free space in relations
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/storage/freespace/freespace.c
      12             :  *
      13             :  *
      14             :  * NOTES:
      15             :  *
      16             :  *  Free Space Map keeps track of the amount of free space on pages, and
      17             :  *  allows quickly searching for a page with enough free space. The FSM is
      18             :  *  stored in a dedicated relation fork of all heap relations, and those
      19             :  *  index access methods that need it (see also indexfsm.c). See README for
      20             :  *  more information.
      21             :  *
      22             :  *-------------------------------------------------------------------------
      23             :  */
      24             : #include "postgres.h"
      25             : 
      26             : #include "access/htup_details.h"
      27             : #include "access/xloginsert.h"
      28             : #include "access/xlogutils.h"
      29             : #include "miscadmin.h"
      30             : #include "storage/freespace.h"
      31             : #include "storage/fsm_internals.h"
      32             : #include "storage/lmgr.h"
      33             : #include "storage/smgr.h"
      34             : 
      35             : 
      36             : /*
      37             :  * We use just one byte to store the amount of free space on a page, so we
      38             :  * divide the amount of free space a page can have into 256 different
      39             :  * categories. The highest category, 255, represents a page with at least
      40             :  * MaxFSMRequestSize bytes of free space, and the second highest category
      41             :  * represents the range from 254 * FSM_CAT_STEP, inclusive, to
      42             :  * MaxFSMRequestSize, exclusive.
      43             :  *
      44             :  * MaxFSMRequestSize depends on the architecture and BLCKSZ, but assuming
      45             :  * default 8k BLCKSZ, and that MaxFSMRequestSize is 8164 bytes, the
      46             :  * categories look like this:
      47             :  *
      48             :  *
      49             :  * Range     Category
      50             :  * 0    - 31   0
      51             :  * 32   - 63   1
      52             :  * ...    ...  ...
      53             :  * 8096 - 8127 253
      54             :  * 8128 - 8163 254
      55             :  * 8164 - 8192 255
      56             :  *
      57             :  * The reason that MaxFSMRequestSize is special is that if MaxFSMRequestSize
      58             :  * isn't equal to a range boundary, a page with exactly MaxFSMRequestSize
      59             :  * bytes of free space wouldn't satisfy a request for MaxFSMRequestSize
      60             :  * bytes. If there isn't more than MaxFSMRequestSize bytes of free space on a
      61             :  * completely empty page, that would mean that we could never satisfy a
      62             :  * request of exactly MaxFSMRequestSize bytes.
      63             :  */
      64             : #define FSM_CATEGORIES  256
      65             : #define FSM_CAT_STEP    (BLCKSZ / FSM_CATEGORIES)
      66             : #define MaxFSMRequestSize   MaxHeapTupleSize
      67             : 
      68             : /*
      69             :  * Depth of the on-disk tree. We need to be able to address 2^32-1 blocks,
      70             :  * and 1626 is the smallest number that satisfies X^3 >= 2^32-1. Likewise,
      71             :  * 256 is the smallest number that satisfies X^4 >= 2^32-1. In practice,
      72             :  * this means that 4096 bytes is the smallest BLCKSZ that we can get away
      73             :  * with a 3-level tree, and 512 is the smallest we support.
      74             :  */
      75             : #define FSM_TREE_DEPTH  ((SlotsPerFSMPage >= 1626) ? 3 : 4)
      76             : 
      77             : #define FSM_ROOT_LEVEL  (FSM_TREE_DEPTH - 1)
      78             : #define FSM_BOTTOM_LEVEL 0
      79             : 
      80             : /*
      81             :  * The internal FSM routines work on a logical addressing scheme. Each
      82             :  * level of the tree can be thought of as a separately addressable file.
      83             :  */
      84             : typedef struct
      85             : {
      86             :     int         level;          /* level */
      87             :     int         logpageno;      /* page number within the level */
      88             : } FSMAddress;
      89             : 
      90             : /* Address of the root page. */
      91             : static const FSMAddress FSM_ROOT_ADDRESS = {FSM_ROOT_LEVEL, 0};
      92             : 
      93             : /* functions to navigate the tree */
      94             : static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot);
      95             : static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot);
      96             : static FSMAddress fsm_get_location(BlockNumber heapblk, uint16 *slot);
      97             : static BlockNumber fsm_get_heap_blk(FSMAddress addr, uint16 slot);
      98             : static BlockNumber fsm_logical_to_physical(FSMAddress addr);
      99             : 
     100             : static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend);
     101             : static Buffer fsm_extend(Relation rel, BlockNumber fsm_nblocks);
     102             : 
     103             : /* functions to convert amount of free space to a FSM category */
     104             : static uint8 fsm_space_avail_to_cat(Size avail);
     105             : static uint8 fsm_space_needed_to_cat(Size needed);
     106             : static Size fsm_space_cat_to_avail(uint8 cat);
     107             : 
     108             : /* workhorse functions for various operations */
     109             : static int  fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
     110             :                                uint8 newValue, uint8 minValue);
     111             : static BlockNumber fsm_search(Relation rel, uint8 min_cat);
     112             : static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr,
     113             :                              BlockNumber start, BlockNumber end,
     114             :                              bool *eof_p);
     115             : 
     116             : 
     117             : /******** Public API ********/
     118             : 
     119             : /*
     120             :  * GetPageWithFreeSpace - try to find a page in the given relation with
     121             :  *      at least the specified amount of free space.
     122             :  *
     123             :  * If successful, return the block number; if not, return InvalidBlockNumber.
     124             :  *
     125             :  * The caller must be prepared for the possibility that the returned page
     126             :  * will turn out to have too little space available by the time the caller
     127             :  * gets a lock on it.  In that case, the caller should report the actual
     128             :  * amount of free space available on that page and then try again (see
     129             :  * RecordAndGetPageWithFreeSpace).  If InvalidBlockNumber is returned,
     130             :  * extend the relation.
     131             :  */
     132             : BlockNumber
     133      145598 : GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
     134             : {
     135      145598 :     uint8       min_cat = fsm_space_needed_to_cat(spaceNeeded);
     136             : 
     137      145598 :     return fsm_search(rel, min_cat);
     138             : }
     139             : 
     140             : /*
     141             :  * RecordAndGetPageWithFreeSpace - update info about a page and try again.
     142             :  *
     143             :  * We provide this combo form to save some locking overhead, compared to
     144             :  * separate RecordPageWithFreeSpace + GetPageWithFreeSpace calls. There's
     145             :  * also some effort to return a page close to the old page; if there's a
     146             :  * page with enough free space on the same FSM page where the old one page
     147             :  * is located, it is preferred.
     148             :  */
     149             : BlockNumber
     150      172970 : RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
     151             :                               Size oldSpaceAvail, Size spaceNeeded)
     152             : {
     153      172970 :     int         old_cat = fsm_space_avail_to_cat(oldSpaceAvail);
     154      172970 :     int         search_cat = fsm_space_needed_to_cat(spaceNeeded);
     155             :     FSMAddress  addr;
     156             :     uint16      slot;
     157             :     int         search_slot;
     158             : 
     159             :     /* Get the location of the FSM byte representing the heap block */
     160      172970 :     addr = fsm_get_location(oldPage, &slot);
     161             : 
     162      172970 :     search_slot = fsm_set_and_search(rel, addr, slot, old_cat, search_cat);
     163             : 
     164             :     /*
     165             :      * If fsm_set_and_search found a suitable new block, return that.
     166             :      * Otherwise, search as usual.
     167             :      */
     168      172970 :     if (search_slot != -1)
     169       24314 :         return fsm_get_heap_blk(addr, search_slot);
     170             :     else
     171      148656 :         return fsm_search(rel, search_cat);
     172             : }
     173             : 
     174             : /*
     175             :  * RecordPageWithFreeSpace - update info about a page.
     176             :  *
     177             :  * Note that if the new spaceAvail value is higher than the old value stored
     178             :  * in the FSM, the space might not become visible to searchers until the next
     179             :  * FreeSpaceMapVacuum call, which updates the upper level pages.
     180             :  */
     181             : void
     182      117740 : RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
     183             : {
     184      117740 :     int         new_cat = fsm_space_avail_to_cat(spaceAvail);
     185             :     FSMAddress  addr;
     186             :     uint16      slot;
     187             : 
     188             :     /* Get the location of the FSM byte representing the heap block */
     189      117740 :     addr = fsm_get_location(heapBlk, &slot);
     190             : 
     191      117740 :     fsm_set_and_search(rel, addr, slot, new_cat, 0);
     192      117740 : }
     193             : 
     194             : /*
     195             :  * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
     196             :  *      WAL replay
     197             :  */
     198             : void
     199      560156 : XLogRecordPageWithFreeSpace(RelFileLocator rlocator, BlockNumber heapBlk,
     200             :                             Size spaceAvail)
     201             : {
     202      560156 :     int         new_cat = fsm_space_avail_to_cat(spaceAvail);
     203             :     FSMAddress  addr;
     204             :     uint16      slot;
     205             :     BlockNumber blkno;
     206             :     Buffer      buf;
     207             :     Page        page;
     208             : 
     209             :     /* Get the location of the FSM byte representing the heap block */
     210      560156 :     addr = fsm_get_location(heapBlk, &slot);
     211      560156 :     blkno = fsm_logical_to_physical(addr);
     212             : 
     213             :     /* If the page doesn't exist already, extend */
     214      560156 :     buf = XLogReadBufferExtended(rlocator, FSM_FORKNUM, blkno,
     215             :                                  RBM_ZERO_ON_ERROR, InvalidBuffer);
     216      560156 :     LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     217             : 
     218      560156 :     page = BufferGetPage(buf);
     219      560156 :     if (PageIsNew(page))
     220         830 :         PageInit(page, BLCKSZ, 0);
     221             : 
     222      560156 :     if (fsm_set_avail(page, slot, new_cat))
     223      552606 :         MarkBufferDirtyHint(buf, false);
     224      560156 :     UnlockReleaseBuffer(buf);
     225      560156 : }
     226             : 
     227             : /*
     228             :  * GetRecordedFreeSpace - return the amount of free space on a particular page,
     229             :  *      according to the FSM.
     230             :  */
     231             : Size
     232        2694 : GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
     233             : {
     234             :     FSMAddress  addr;
     235             :     uint16      slot;
     236             :     Buffer      buf;
     237             :     uint8       cat;
     238             : 
     239             :     /* Get the location of the FSM byte representing the heap block */
     240        2694 :     addr = fsm_get_location(heapBlk, &slot);
     241             : 
     242        2694 :     buf = fsm_readbuf(rel, addr, false);
     243        2694 :     if (!BufferIsValid(buf))
     244          72 :         return 0;
     245        2622 :     cat = fsm_get_avail(BufferGetPage(buf), slot);
     246        2622 :     ReleaseBuffer(buf);
     247             : 
     248        2622 :     return fsm_space_cat_to_avail(cat);
     249             : }
     250             : 
     251             : /*
     252             :  * FreeSpaceMapPrepareTruncateRel - prepare for truncation of a relation.
     253             :  *
     254             :  * nblocks is the new size of the heap.
     255             :  *
     256             :  * Return the number of blocks of new FSM.
     257             :  * If it's InvalidBlockNumber, there is nothing to truncate;
     258             :  * otherwise the caller is responsible for calling smgrtruncate()
     259             :  * to truncate the FSM pages, and FreeSpaceMapVacuumRange()
     260             :  * to update upper-level pages in the FSM.
     261             :  */
     262             : BlockNumber
     263         362 : FreeSpaceMapPrepareTruncateRel(Relation rel, BlockNumber nblocks)
     264             : {
     265             :     BlockNumber new_nfsmblocks;
     266             :     FSMAddress  first_removed_address;
     267             :     uint16      first_removed_slot;
     268             :     Buffer      buf;
     269             : 
     270             :     /*
     271             :      * If no FSM has been created yet for this relation, there's nothing to
     272             :      * truncate.
     273             :      */
     274         362 :     if (!smgrexists(RelationGetSmgr(rel), FSM_FORKNUM))
     275           0 :         return InvalidBlockNumber;
     276             : 
     277             :     /* Get the location in the FSM of the first removed heap block */
     278         362 :     first_removed_address = fsm_get_location(nblocks, &first_removed_slot);
     279             : 
     280             :     /*
     281             :      * Zero out the tail of the last remaining FSM page. If the slot
     282             :      * representing the first removed heap block is at a page boundary, as the
     283             :      * first slot on the FSM page that first_removed_address points to, we can
     284             :      * just truncate that page altogether.
     285             :      */
     286         362 :     if (first_removed_slot > 0)
     287             :     {
     288         244 :         buf = fsm_readbuf(rel, first_removed_address, false);
     289         244 :         if (!BufferIsValid(buf))
     290           0 :             return InvalidBlockNumber;  /* nothing to do; the FSM was already
     291             :                                          * smaller */
     292         244 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     293             : 
     294             :         /* NO EREPORT(ERROR) from here till changes are logged */
     295         244 :         START_CRIT_SECTION();
     296             : 
     297         244 :         fsm_truncate_avail(BufferGetPage(buf), first_removed_slot);
     298             : 
     299             :         /*
     300             :          * Truncation of a relation is WAL-logged at a higher-level, and we
     301             :          * will be called at WAL replay. But if checksums are enabled, we need
     302             :          * to still write a WAL record to protect against a torn page, if the
     303             :          * page is flushed to disk before the truncation WAL record. We cannot
     304             :          * use MarkBufferDirtyHint here, because that will not dirty the page
     305             :          * during recovery.
     306             :          */
     307         244 :         MarkBufferDirty(buf);
     308         244 :         if (!InRecovery && RelationNeedsWAL(rel) && XLogHintBitIsNeeded())
     309          36 :             log_newpage_buffer(buf, false);
     310             : 
     311         244 :         END_CRIT_SECTION();
     312             : 
     313         244 :         UnlockReleaseBuffer(buf);
     314             : 
     315         244 :         new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1;
     316             :     }
     317             :     else
     318             :     {
     319         118 :         new_nfsmblocks = fsm_logical_to_physical(first_removed_address);
     320         118 :         if (smgrnblocks(RelationGetSmgr(rel), FSM_FORKNUM) <= new_nfsmblocks)
     321           0 :             return InvalidBlockNumber;  /* nothing to do; the FSM was already
     322             :                                          * smaller */
     323             :     }
     324             : 
     325         362 :     return new_nfsmblocks;
     326             : }
     327             : 
     328             : /*
     329             :  * FreeSpaceMapVacuum - update upper-level pages in the rel's FSM
     330             :  *
     331             :  * We assume that the bottom-level pages have already been updated with
     332             :  * new free-space information.
     333             :  */
     334             : void
     335         294 : FreeSpaceMapVacuum(Relation rel)
     336             : {
     337             :     bool        dummy;
     338             : 
     339             :     /* Recursively scan the tree, starting at the root */
     340         294 :     (void) fsm_vacuum_page(rel, FSM_ROOT_ADDRESS,
     341             :                            (BlockNumber) 0, InvalidBlockNumber,
     342             :                            &dummy);
     343         294 : }
     344             : 
     345             : /*
     346             :  * FreeSpaceMapVacuumRange - update upper-level pages in the rel's FSM
     347             :  *
     348             :  * As above, but assume that only heap pages between start and end-1 inclusive
     349             :  * have new free-space information, so update only the upper-level slots
     350             :  * covering that block range.  end == InvalidBlockNumber is equivalent to
     351             :  * "all the rest of the relation".
     352             :  */
     353             : void
     354        8876 : FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
     355             : {
     356             :     bool        dummy;
     357             : 
     358             :     /* Recursively scan the tree, starting at the root */
     359        8876 :     if (end > start)
     360        8550 :         (void) fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, start, end, &dummy);
     361        8876 : }
     362             : 
     363             : /******** Internal routines ********/
     364             : 
     365             : /*
     366             :  * Return category corresponding x bytes of free space
     367             :  */
     368             : static uint8
     369      850866 : fsm_space_avail_to_cat(Size avail)
     370             : {
     371             :     int         cat;
     372             : 
     373             :     Assert(avail < BLCKSZ);
     374             : 
     375      850866 :     if (avail >= MaxFSMRequestSize)
     376       37214 :         return 255;
     377             : 
     378      813652 :     cat = avail / FSM_CAT_STEP;
     379             : 
     380             :     /*
     381             :      * The highest category, 255, is reserved for MaxFSMRequestSize bytes or
     382             :      * more.
     383             :      */
     384      813652 :     if (cat > 254)
     385           0 :         cat = 254;
     386             : 
     387      813652 :     return (uint8) cat;
     388             : }
     389             : 
     390             : /*
     391             :  * Return the lower bound of the range of free space represented by given
     392             :  * category.
     393             :  */
     394             : static Size
     395        2622 : fsm_space_cat_to_avail(uint8 cat)
     396             : {
     397             :     /* The highest category represents exactly MaxFSMRequestSize bytes. */
     398        2622 :     if (cat == 255)
     399        1600 :         return MaxFSMRequestSize;
     400             :     else
     401        1022 :         return cat * FSM_CAT_STEP;
     402             : }
     403             : 
     404             : /*
     405             :  * Which category does a page need to have, to accommodate x bytes of data?
     406             :  * While fsm_space_avail_to_cat() rounds down, this needs to round up.
     407             :  */
     408             : static uint8
     409      318568 : fsm_space_needed_to_cat(Size needed)
     410             : {
     411             :     int         cat;
     412             : 
     413             :     /* Can't ask for more space than the highest category represents */
     414      318568 :     if (needed > MaxFSMRequestSize)
     415           0 :         elog(ERROR, "invalid FSM request size %zu", needed);
     416             : 
     417      318568 :     if (needed == 0)
     418           0 :         return 1;
     419             : 
     420      318568 :     cat = (needed + FSM_CAT_STEP - 1) / FSM_CAT_STEP;
     421             : 
     422      318568 :     if (cat > 255)
     423           0 :         cat = 255;
     424             : 
     425      318568 :     return (uint8) cat;
     426             : }
     427             : 
     428             : /*
     429             :  * Returns the physical block number of a FSM page
     430             :  */
     431             : static BlockNumber
     432     1225856 : fsm_logical_to_physical(FSMAddress addr)
     433             : {
     434             :     BlockNumber pages;
     435             :     int         leafno;
     436             :     int         l;
     437             : 
     438             :     /*
     439             :      * Calculate the logical page number of the first leaf page below the
     440             :      * given page.
     441             :      */
     442     1225856 :     leafno = addr.logpageno;
     443     1875724 :     for (l = 0; l < addr.level; l++)
     444      649868 :         leafno *= SlotsPerFSMPage;
     445             : 
     446             :     /* Count upper level nodes required to address the leaf page */
     447     1225856 :     pages = 0;
     448     4903424 :     for (l = 0; l < FSM_TREE_DEPTH; l++)
     449             :     {
     450     3677568 :         pages += leafno + 1;
     451     3677568 :         leafno /= SlotsPerFSMPage;
     452             :     }
     453             : 
     454             :     /*
     455             :      * If the page we were asked for wasn't at the bottom level, subtract the
     456             :      * additional lower level pages we counted above.
     457             :      */
     458     1225856 :     pages -= addr.level;
     459             : 
     460             :     /* Turn the page count into 0-based block number */
     461     1225856 :     return pages - 1;
     462             : }
     463             : 
     464             : /*
     465             :  * Return the FSM location corresponding to given heap block.
     466             :  */
     467             : static FSMAddress
     468      889146 : fsm_get_location(BlockNumber heapblk, uint16 *slot)
     469             : {
     470             :     FSMAddress  addr;
     471             : 
     472      889146 :     addr.level = FSM_BOTTOM_LEVEL;
     473      889146 :     addr.logpageno = heapblk / SlotsPerFSMPage;
     474      889146 :     *slot = heapblk % SlotsPerFSMPage;
     475             : 
     476      889146 :     return addr;
     477             : }
     478             : 
     479             : /*
     480             :  * Return the heap block number corresponding to given location in the FSM.
     481             :  */
     482             : static BlockNumber
     483       43316 : fsm_get_heap_blk(FSMAddress addr, uint16 slot)
     484             : {
     485             :     Assert(addr.level == FSM_BOTTOM_LEVEL);
     486       43316 :     return ((unsigned int) addr.logpageno) * SlotsPerFSMPage + slot;
     487             : }
     488             : 
     489             : /*
     490             :  * Given a logical address of a child page, get the logical page number of
     491             :  * the parent, and the slot within the parent corresponding to the child.
     492             :  */
     493             : static FSMAddress
     494       56224 : fsm_get_parent(FSMAddress child, uint16 *slot)
     495             : {
     496             :     FSMAddress  parent;
     497             : 
     498             :     Assert(child.level < FSM_ROOT_LEVEL);
     499             : 
     500       56224 :     parent.level = child.level + 1;
     501       56224 :     parent.logpageno = child.logpageno / SlotsPerFSMPage;
     502       56224 :     *slot = child.logpageno % SlotsPerFSMPage;
     503             : 
     504       56224 :     return parent;
     505             : }
     506             : 
     507             : /*
     508             :  * Given a logical address of a parent page and a slot number, get the
     509             :  * logical address of the corresponding child page.
     510             :  */
     511             : static FSMAddress
     512       61816 : fsm_get_child(FSMAddress parent, uint16 slot)
     513             : {
     514             :     FSMAddress  child;
     515             : 
     516             :     Assert(parent.level > FSM_BOTTOM_LEVEL);
     517             : 
     518       61816 :     child.level = parent.level - 1;
     519       61816 :     child.logpageno = parent.logpageno * SlotsPerFSMPage + slot;
     520             : 
     521       61816 :     return child;
     522             : }
     523             : 
     524             : /*
     525             :  * Read a FSM page.
     526             :  *
     527             :  * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
     528             :  * true, the FSM file is extended.
     529             :  */
     530             : static Buffer
     531      665338 : fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
     532             : {
     533      665338 :     BlockNumber blkno = fsm_logical_to_physical(addr);
     534             :     Buffer      buf;
     535             :     SMgrRelation reln;
     536             : 
     537             :     /*
     538             :      * Caution: re-using this smgr pointer could fail if the relcache entry
     539             :      * gets closed.  It's safe as long as we only do smgr-level operations
     540             :      * between here and the last use of the pointer.
     541             :      */
     542      665338 :     reln = RelationGetSmgr(rel);
     543             : 
     544             :     /*
     545             :      * If we haven't cached the size of the FSM yet, check it first.  Also
     546             :      * recheck if the requested block seems to be past end, since our cached
     547             :      * value might be stale.  (We send smgr inval messages on truncation, but
     548             :      * not on extension.)
     549             :      */
     550      665338 :     if (reln->smgr_cached_nblocks[FSM_FORKNUM] == InvalidBlockNumber ||
     551      566132 :         blkno >= reln->smgr_cached_nblocks[FSM_FORKNUM])
     552             :     {
     553             :         /* Invalidate the cache so smgrnblocks asks the kernel. */
     554      156968 :         reln->smgr_cached_nblocks[FSM_FORKNUM] = InvalidBlockNumber;
     555      156968 :         if (smgrexists(reln, FSM_FORKNUM))
     556       51462 :             smgrnblocks(reln, FSM_FORKNUM);
     557             :         else
     558      105506 :             reln->smgr_cached_nblocks[FSM_FORKNUM] = 0;
     559             :     }
     560             : 
     561             :     /*
     562             :      * For reading we use ZERO_ON_ERROR mode, and initialize the page if
     563             :      * necessary.  The FSM information is not accurate anyway, so it's better
     564             :      * to clear corrupt pages than error out. Since the FSM changes are not
     565             :      * WAL-logged, the so-called torn page problem on crash can lead to pages
     566             :      * with corrupt headers, for example.
     567             :      *
     568             :      * We use the same path below to initialize pages when extending the
     569             :      * relation, as a concurrent extension can end up with vm_extend()
     570             :      * returning an already-initialized page.
     571             :      */
     572      665338 :     if (blkno >= reln->smgr_cached_nblocks[FSM_FORKNUM])
     573             :     {
     574      106764 :         if (extend)
     575        6088 :             buf = fsm_extend(rel, blkno + 1);
     576             :         else
     577      100676 :             return InvalidBuffer;
     578             :     }
     579             :     else
     580      558574 :         buf = ReadBufferExtended(rel, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR, NULL);
     581             : 
     582             :     /*
     583             :      * Initializing the page when needed is trickier than it looks, because of
     584             :      * the possibility of multiple backends doing this concurrently, and our
     585             :      * desire to not uselessly take the buffer lock in the normal path where
     586             :      * the page is OK.  We must take the lock to initialize the page, so
     587             :      * recheck page newness after we have the lock, in case someone else
     588             :      * already did it.  Also, because we initially check PageIsNew with no
     589             :      * lock, it's possible to fall through and return the buffer while someone
     590             :      * else is still initializing the page (i.e., we might see pd_upper as set
     591             :      * but other page header fields are still zeroes).  This is harmless for
     592             :      * callers that will take a buffer lock themselves, but some callers
     593             :      * inspect the page without any lock at all.  The latter is OK only so
     594             :      * long as it doesn't depend on the page header having correct contents.
     595             :      * Current usage is safe because PageGetContents() does not require that.
     596             :      */
     597      564662 :     if (PageIsNew(BufferGetPage(buf)))
     598             :     {
     599       19548 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     600       19548 :         if (PageIsNew(BufferGetPage(buf)))
     601       19548 :             PageInit(BufferGetPage(buf), BLCKSZ, 0);
     602       19548 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     603             :     }
     604      564662 :     return buf;
     605             : }
     606             : 
     607             : /*
     608             :  * Ensure that the FSM fork is at least fsm_nblocks long, extending
     609             :  * it if necessary with empty pages. And by empty, I mean pages filled
     610             :  * with zeros, meaning there's no free space.
     611             :  */
     612             : static Buffer
     613        6088 : fsm_extend(Relation rel, BlockNumber fsm_nblocks)
     614             : {
     615        6088 :     return ExtendBufferedRelTo(BMR_REL(rel), FSM_FORKNUM, NULL,
     616             :                                EB_CREATE_FORK_IF_NEEDED |
     617             :                                EB_CLEAR_SIZE_CACHE,
     618             :                                fsm_nblocks,
     619             :                                RBM_ZERO_ON_ERROR);
     620             : }
     621             : 
     622             : /*
     623             :  * Set value in given FSM page and slot.
     624             :  *
     625             :  * If minValue > 0, the updated page is also searched for a page with at
     626             :  * least minValue of free space. If one is found, its slot number is
     627             :  * returned, -1 otherwise.
     628             :  */
     629             : static int
     630      294098 : fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
     631             :                    uint8 newValue, uint8 minValue)
     632             : {
     633             :     Buffer      buf;
     634             :     Page        page;
     635      294098 :     int         newslot = -1;
     636             : 
     637      294098 :     buf = fsm_readbuf(rel, addr, true);
     638      294098 :     LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     639             : 
     640      294098 :     page = BufferGetPage(buf);
     641             : 
     642      294098 :     if (fsm_set_avail(page, slot, newValue))
     643      160848 :         MarkBufferDirtyHint(buf, false);
     644             : 
     645      294098 :     if (minValue != 0)
     646             :     {
     647             :         /* Search while we still hold the lock */
     648      172970 :         newslot = fsm_search_avail(buf, minValue,
     649      172970 :                                    addr.level == FSM_BOTTOM_LEVEL,
     650             :                                    true);
     651             :     }
     652             : 
     653      294098 :     UnlockReleaseBuffer(buf);
     654             : 
     655      294098 :     return newslot;
     656             : }
     657             : 
     658             : /*
     659             :  * Search the tree for a heap page with at least min_cat of free space
     660             :  */
     661             : static BlockNumber
     662      294254 : fsm_search(Relation rel, uint8 min_cat)
     663             : {
     664      294254 :     int         restarts = 0;
     665      294254 :     FSMAddress  addr = FSM_ROOT_ADDRESS;
     666             : 
     667             :     for (;;)
     668       46474 :     {
     669             :         int         slot;
     670             :         Buffer      buf;
     671      340728 :         uint8       max_avail = 0;
     672             : 
     673             :         /* Read the FSM page. */
     674      340728 :         buf = fsm_readbuf(rel, addr, false);
     675             : 
     676             :         /* Search within the page */
     677      340728 :         if (BufferIsValid(buf))
     678             :         {
     679      241398 :             LockBuffer(buf, BUFFER_LOCK_SHARE);
     680      241398 :             slot = fsm_search_avail(buf, min_cat,
     681      241398 :                                     (addr.level == FSM_BOTTOM_LEVEL),
     682             :                                     false);
     683      241398 :             if (slot == -1)
     684      179310 :                 max_avail = fsm_get_max_avail(BufferGetPage(buf));
     685      241398 :             UnlockReleaseBuffer(buf);
     686             :         }
     687             :         else
     688       99330 :             slot = -1;
     689             : 
     690      340728 :         if (slot != -1)
     691             :         {
     692             :             /*
     693             :              * Descend the tree, or return the found block if we're at the
     694             :              * bottom.
     695             :              */
     696       62088 :             if (addr.level == FSM_BOTTOM_LEVEL)
     697       19002 :                 return fsm_get_heap_blk(addr, slot);
     698             : 
     699       43086 :             addr = fsm_get_child(addr, slot);
     700             :         }
     701      278640 :         else if (addr.level == FSM_ROOT_LEVEL)
     702             :         {
     703             :             /*
     704             :              * At the root, failure means there's no page with enough free
     705             :              * space in the FSM. Give up.
     706             :              */
     707      275252 :             return InvalidBlockNumber;
     708             :         }
     709             :         else
     710             :         {
     711             :             uint16      parentslot;
     712             :             FSMAddress  parent;
     713             : 
     714             :             /*
     715             :              * At lower level, failure can happen if the value in the upper-
     716             :              * level node didn't reflect the value on the lower page. Update
     717             :              * the upper node, to avoid falling into the same trap again, and
     718             :              * start over.
     719             :              *
     720             :              * There's a race condition here, if another backend updates this
     721             :              * page right after we release it, and gets the lock on the parent
     722             :              * page before us. We'll then update the parent page with the now
     723             :              * stale information we had. It's OK, because it should happen
     724             :              * rarely, and will be fixed by the next vacuum.
     725             :              */
     726        3388 :             parent = fsm_get_parent(addr, &parentslot);
     727        3388 :             fsm_set_and_search(rel, parent, parentslot, max_avail, 0);
     728             : 
     729             :             /*
     730             :              * If the upper pages are badly out of date, we might need to loop
     731             :              * quite a few times, updating them as we go. Any inconsistencies
     732             :              * should eventually be corrected and the loop should end. Looping
     733             :              * indefinitely is nevertheless scary, so provide an emergency
     734             :              * valve.
     735             :              */
     736        3388 :             if (restarts++ > 10000)
     737           0 :                 return InvalidBlockNumber;
     738             : 
     739             :             /* Start search all over from the root */
     740        3388 :             addr = FSM_ROOT_ADDRESS;
     741             :         }
     742             :     }
     743             : }
     744             : 
     745             : 
     746             : /*
     747             :  * Recursive guts of FreeSpaceMapVacuum
     748             :  *
     749             :  * Examine the FSM page indicated by addr, as well as its children, updating
     750             :  * upper-level nodes that cover the heap block range from start to end-1.
     751             :  * (It's okay if end is beyond the actual end of the map.)
     752             :  * Return the maximum freespace value on this page.
     753             :  *
     754             :  * If addr is past the end of the FSM, set *eof_p to true and return 0.
     755             :  *
     756             :  * This traverses the tree in depth-first order.  The tree is stored
     757             :  * physically in depth-first order, so this should be pretty I/O efficient.
     758             :  */
     759             : static uint8
     760       27574 : fsm_vacuum_page(Relation rel, FSMAddress addr,
     761             :                 BlockNumber start, BlockNumber end,
     762             :                 bool *eof_p)
     763             : {
     764             :     Buffer      buf;
     765             :     Page        page;
     766             :     uint8       max_avail;
     767             : 
     768             :     /* Read the page if it exists, or return EOF */
     769       27574 :     buf = fsm_readbuf(rel, addr, false);
     770       27574 :     if (!BufferIsValid(buf))
     771             :     {
     772        1274 :         *eof_p = true;
     773        1274 :         return 0;
     774             :     }
     775             :     else
     776       26300 :         *eof_p = false;
     777             : 
     778       26300 :     page = BufferGetPage(buf);
     779             : 
     780             :     /*
     781             :      * If we're above the bottom level, recurse into children, and fix the
     782             :      * information stored about them at this level.
     783             :      */
     784       26300 :     if (addr.level > FSM_BOTTOM_LEVEL)
     785             :     {
     786             :         FSMAddress  fsm_start,
     787             :                     fsm_end;
     788             :         uint16      fsm_start_slot,
     789             :                     fsm_end_slot;
     790             :         int         slot,
     791             :                     start_slot,
     792             :                     end_slot;
     793       17612 :         bool        eof = false;
     794             : 
     795             :         /*
     796             :          * Compute the range of slots we need to update on this page, given
     797             :          * the requested range of heap blocks to consider.  The first slot to
     798             :          * update is the one covering the "start" block, and the last slot is
     799             :          * the one covering "end - 1".  (Some of this work will be duplicated
     800             :          * in each recursive call, but it's cheap enough to not worry about.)
     801             :          */
     802       17612 :         fsm_start = fsm_get_location(start, &fsm_start_slot);
     803       17612 :         fsm_end = fsm_get_location(end - 1, &fsm_end_slot);
     804             : 
     805       44030 :         while (fsm_start.level < addr.level)
     806             :         {
     807       26418 :             fsm_start = fsm_get_parent(fsm_start, &fsm_start_slot);
     808       26418 :             fsm_end = fsm_get_parent(fsm_end, &fsm_end_slot);
     809             :         }
     810             :         Assert(fsm_start.level == addr.level);
     811             : 
     812       17612 :         if (fsm_start.logpageno == addr.logpageno)
     813       17612 :             start_slot = fsm_start_slot;
     814           0 :         else if (fsm_start.logpageno > addr.logpageno)
     815           0 :             start_slot = SlotsPerFSMPage;   /* shouldn't get here... */
     816             :         else
     817           0 :             start_slot = 0;
     818             : 
     819       17612 :         if (fsm_end.logpageno == addr.logpageno)
     820       16994 :             end_slot = fsm_end_slot;
     821         618 :         else if (fsm_end.logpageno > addr.logpageno)
     822         618 :             end_slot = SlotsPerFSMPage - 1;
     823             :         else
     824           0 :             end_slot = -1;      /* shouldn't get here... */
     825             : 
     826     2709310 :         for (slot = start_slot; slot <= end_slot; slot++)
     827             :         {
     828             :             int         child_avail;
     829             : 
     830     2691698 :             CHECK_FOR_INTERRUPTS();
     831             : 
     832             :             /* After we hit end-of-file, just clear the rest of the slots */
     833     2691698 :             if (!eof)
     834       18730 :                 child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot),
     835             :                                               start, end,
     836             :                                               &eof);
     837             :             else
     838     2672968 :                 child_avail = 0;
     839             : 
     840             :             /* Update information about the child */
     841     2691698 :             if (fsm_get_avail(page, slot) != child_avail)
     842             :             {
     843       11324 :                 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     844       11324 :                 fsm_set_avail(page, slot, child_avail);
     845       11324 :                 MarkBufferDirtyHint(buf, false);
     846       11324 :                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     847             :             }
     848             :         }
     849             :     }
     850             : 
     851             :     /* Now get the maximum value on the page, to return to caller */
     852       26300 :     max_avail = fsm_get_max_avail(page);
     853             : 
     854             :     /*
     855             :      * Reset the next slot pointer. This encourages the use of low-numbered
     856             :      * pages, increasing the chances that a later vacuum can truncate the
     857             :      * relation.  We don't bother with a lock here, nor with marking the page
     858             :      * dirty if it wasn't already, since this is just a hint.
     859             :      */
     860       26300 :     ((FSMPage) PageGetContents(page))->fp_next_slot = 0;
     861             : 
     862       26300 :     ReleaseBuffer(buf);
     863             : 
     864       26300 :     return max_avail;
     865             : }

Generated by: LCOV version 1.14