LCOV - code coverage report
Current view: top level - src/backend/storage/freespace - fsmpage.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 85 96 88.5 %
Date: 2025-01-18 04:15:08 Functions: 7 7 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * fsmpage.c
       4             :  *    routines to search and manipulate one FSM page.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/storage/freespace/fsmpage.c
      12             :  *
      13             :  * NOTES:
      14             :  *
      15             :  *  The public functions in this file form an API that hides the internal
      16             :  *  structure of a FSM page. This allows freespace.c to treat each FSM page
      17             :  *  as a black box with SlotsPerPage "slots". fsm_set_avail() and
      18             :  *  fsm_get_avail() let you get/set the value of a slot, and
      19             :  *  fsm_search_avail() lets you search for a slot with value >= X.
      20             :  *
      21             :  *-------------------------------------------------------------------------
      22             :  */
      23             : #include "postgres.h"
      24             : 
      25             : #include "storage/bufmgr.h"
      26             : #include "storage/fsm_internals.h"
      27             : 
      28             : /* Macros to navigate the tree within a page. Root has index zero. */
      29             : #define leftchild(x)    (2 * (x) + 1)
      30             : #define rightchild(x)   (2 * (x) + 2)
      31             : #define parentof(x)     (((x) - 1) / 2)
      32             : 
      33             : /*
      34             :  * Find right neighbor of x, wrapping around within the level
      35             :  */
      36             : static int
      37      131132 : rightneighbor(int x)
      38             : {
      39             :     /*
      40             :      * Move right. This might wrap around, stepping to the leftmost node at
      41             :      * the next level.
      42             :      */
      43      131132 :     x++;
      44             : 
      45             :     /*
      46             :      * Check if we stepped to the leftmost node at next level, and correct if
      47             :      * so. The leftmost nodes at each level are numbered x = 2^level - 1, so
      48             :      * check if (x + 1) is a power of two, using a standard
      49             :      * twos-complement-arithmetic trick.
      50             :      */
      51      131132 :     if (((x + 1) & x) == 0)
      52        7326 :         x = parentof(x);
      53             : 
      54      131132 :     return x;
      55             : }
      56             : 
      57             : /*
      58             :  * Sets the value of a slot on page. Returns true if the page was modified.
      59             :  *
      60             :  * The caller must hold an exclusive lock on the page.
      61             :  */
      62             : bool
      63     1554580 : fsm_set_avail(Page page, int slot, uint8 value)
      64             : {
      65     1554580 :     int         nodeno = NonLeafNodesPerPage + slot;
      66     1554580 :     FSMPage     fsmpage = (FSMPage) PageGetContents(page);
      67             :     uint8       oldvalue;
      68             : 
      69             :     Assert(slot < LeafNodesPerPage);
      70             : 
      71     1554580 :     oldvalue = fsmpage->fp_nodes[nodeno];
      72             : 
      73             :     /* If the value hasn't changed, we don't need to do anything */
      74     1554580 :     if (oldvalue == value && value <= fsmpage->fp_nodes[0])
      75      784552 :         return false;
      76             : 
      77      770028 :     fsmpage->fp_nodes[nodeno] = value;
      78             : 
      79             :     /*
      80             :      * Propagate up, until we hit the root or a node that doesn't need to be
      81             :      * updated.
      82             :      */
      83             :     do
      84             :     {
      85     7019158 :         uint8       newvalue = 0;
      86             :         int         lchild;
      87             :         int         rchild;
      88             : 
      89     7019158 :         nodeno = parentof(nodeno);
      90     7019158 :         lchild = leftchild(nodeno);
      91     7019158 :         rchild = lchild + 1;
      92             : 
      93     7019158 :         newvalue = fsmpage->fp_nodes[lchild];
      94     7019158 :         if (rchild < NodesPerPage)
      95     7019154 :             newvalue = Max(newvalue,
      96             :                            fsmpage->fp_nodes[rchild]);
      97             : 
      98     7019158 :         oldvalue = fsmpage->fp_nodes[nodeno];
      99     7019158 :         if (oldvalue == newvalue)
     100      236404 :             break;
     101             : 
     102     6782754 :         fsmpage->fp_nodes[nodeno] = newvalue;
     103     6782754 :     } while (nodeno > 0);
     104             : 
     105             :     /*
     106             :      * sanity check: if the new value is (still) higher than the value at the
     107             :      * top, the tree is corrupt.  If so, rebuild.
     108             :      */
     109      770028 :     if (value > fsmpage->fp_nodes[0])
     110           0 :         fsm_rebuild_page(page);
     111             : 
     112      770028 :     return true;
     113             : }
     114             : 
     115             : /*
     116             :  * Returns the value of given slot on page.
     117             :  *
     118             :  * Since this is just a read-only access of a single byte, the page doesn't
     119             :  * need to be locked.
     120             :  */
     121             : uint8
     122     2685438 : fsm_get_avail(Page page, int slot)
     123             : {
     124     2685438 :     FSMPage     fsmpage = (FSMPage) PageGetContents(page);
     125             : 
     126             :     Assert(slot < LeafNodesPerPage);
     127             : 
     128     2685438 :     return fsmpage->fp_nodes[NonLeafNodesPerPage + slot];
     129             : }
     130             : 
     131             : /*
     132             :  * Returns the value at the root of a page.
     133             :  *
     134             :  * Since this is just a read-only access of a single byte, the page doesn't
     135             :  * need to be locked.
     136             :  */
     137             : uint8
     138      347942 : fsm_get_max_avail(Page page)
     139             : {
     140      347942 :     FSMPage     fsmpage = (FSMPage) PageGetContents(page);
     141             : 
     142      347942 :     return fsmpage->fp_nodes[0];
     143             : }
     144             : 
     145             : /*
     146             :  * Searches for a slot with category at least minvalue.
     147             :  * Returns slot number, or -1 if none found.
     148             :  *
     149             :  * The caller must hold at least a shared lock on the page, and this
     150             :  * function can unlock and lock the page again in exclusive mode if it
     151             :  * needs to be updated. exclusive_lock_held should be set to true if the
     152             :  * caller is already holding an exclusive lock, to avoid extra work.
     153             :  *
     154             :  * If advancenext is false, fp_next_slot is set to point to the returned
     155             :  * slot, and if it's true, to the slot after the returned slot.
     156             :  */
     157             : int
     158      462424 : fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext,
     159             :                  bool exclusive_lock_held)
     160             : {
     161      462424 :     Page        page = BufferGetPage(buf);
     162      462424 :     FSMPage     fsmpage = (FSMPage) PageGetContents(page);
     163             :     int         nodeno;
     164             :     int         target;
     165             :     uint16      slot;
     166             : 
     167      462424 : restart:
     168             : 
     169             :     /*
     170             :      * Check the root first, and exit quickly if there's no leaf with enough
     171             :      * free space
     172             :      */
     173      462424 :     if (fsmpage->fp_nodes[0] < minvalue)
     174      370274 :         return -1;
     175             : 
     176             :     /*
     177             :      * Start search using fp_next_slot.  It's just a hint, so check that it's
     178             :      * sane.  (This also handles wrapping around when the prior call returned
     179             :      * the last slot on the page.)
     180             :      */
     181       92150 :     target = fsmpage->fp_next_slot;
     182       92150 :     if (target < 0 || target >= LeafNodesPerPage)
     183           0 :         target = 0;
     184       92150 :     target += NonLeafNodesPerPage;
     185             : 
     186             :     /*----------
     187             :      * Start the search from the target slot.  At every step, move one
     188             :      * node to the right, then climb up to the parent.  Stop when we reach
     189             :      * a node with enough free space (as we must, since the root has enough
     190             :      * space).
     191             :      *
     192             :      * The idea is to gradually expand our "search triangle", that is, all
     193             :      * nodes covered by the current node, and to be sure we search to the
     194             :      * right from the start point.  At the first step, only the target slot
     195             :      * is examined.  When we move up from a left child to its parent, we are
     196             :      * adding the right-hand subtree of that parent to the search triangle.
     197             :      * When we move right then up from a right child, we are dropping the
     198             :      * current search triangle (which we know doesn't contain any suitable
     199             :      * page) and instead looking at the next-larger-size triangle to its
     200             :      * right.  So we never look left from our original start point, and at
     201             :      * each step the size of the search triangle doubles, ensuring it takes
     202             :      * only log2(N) work to search N pages.
     203             :      *
     204             :      * The "move right" operation will wrap around if it hits the right edge
     205             :      * of the tree, so the behavior is still good if we start near the right.
     206             :      * Note also that the move-and-climb behavior ensures that we can't end
     207             :      * up on one of the missing nodes at the right of the leaf level.
     208             :      *
     209             :      * For example, consider this tree:
     210             :      *
     211             :      *         7
     212             :      *     7       6
     213             :      *   5   7   6   5
     214             :      *  4 5 5 7 2 6 5 2
     215             :      *              T
     216             :      *
     217             :      * Assume that the target node is the node indicated by the letter T,
     218             :      * and we're searching for a node with value of 6 or higher. The search
     219             :      * begins at T. At the first iteration, we move to the right, then to the
     220             :      * parent, arriving at the rightmost 5. At the second iteration, we move
     221             :      * to the right, wrapping around, then climb up, arriving at the 7 on the
     222             :      * third level.  7 satisfies our search, so we descend down to the bottom,
     223             :      * following the path of sevens.  This is in fact the first suitable page
     224             :      * to the right of (allowing for wraparound) our start point.
     225             :      *----------
     226             :      */
     227       92150 :     nodeno = target;
     228      223282 :     while (nodeno > 0)
     229             :     {
     230      215956 :         if (fsmpage->fp_nodes[nodeno] >= minvalue)
     231       84824 :             break;
     232             : 
     233             :         /*
     234             :          * Move to the right, wrapping around on same level if necessary, then
     235             :          * climb up.
     236             :          */
     237      131132 :         nodeno = parentof(rightneighbor(nodeno));
     238             :     }
     239             : 
     240             :     /*
     241             :      * We're now at a node with enough free space, somewhere in the middle of
     242             :      * the tree. Descend to the bottom, following a path with enough free
     243             :      * space, preferring to move left if there's a choice.
     244             :      */
     245      223282 :     while (nodeno < NonLeafNodesPerPage)
     246             :     {
     247      131132 :         int         childnodeno = leftchild(nodeno);
     248             : 
     249      131132 :         if (childnodeno < NodesPerPage &&
     250      131132 :             fsmpage->fp_nodes[childnodeno] >= minvalue)
     251             :         {
     252       95370 :             nodeno = childnodeno;
     253       95370 :             continue;
     254             :         }
     255       35762 :         childnodeno++;          /* point to right child */
     256       35762 :         if (childnodeno < NodesPerPage &&
     257       35762 :             fsmpage->fp_nodes[childnodeno] >= minvalue)
     258             :         {
     259       35762 :             nodeno = childnodeno;
     260             :         }
     261             :         else
     262             :         {
     263             :             /*
     264             :              * Oops. The parent node promised that either left or right child
     265             :              * has enough space, but neither actually did. This can happen in
     266             :              * case of a "torn page", IOW if we crashed earlier while writing
     267             :              * the page to disk, and only part of the page made it to disk.
     268             :              *
     269             :              * Fix the corruption and restart.
     270             :              */
     271             :             RelFileLocator rlocator;
     272             :             ForkNumber  forknum;
     273             :             BlockNumber blknum;
     274             : 
     275           0 :             BufferGetTag(buf, &rlocator, &forknum, &blknum);
     276           0 :             elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/%u",
     277             :                  blknum, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber);
     278             : 
     279             :             /* make sure we hold an exclusive lock */
     280           0 :             if (!exclusive_lock_held)
     281             :             {
     282           0 :                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     283           0 :                 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     284           0 :                 exclusive_lock_held = true;
     285             :             }
     286           0 :             fsm_rebuild_page(page);
     287           0 :             MarkBufferDirtyHint(buf, false);
     288           0 :             goto restart;
     289             :         }
     290             :     }
     291             : 
     292             :     /* We're now at the bottom level, at a node with enough space. */
     293       92150 :     slot = nodeno - NonLeafNodesPerPage;
     294             : 
     295             :     /*
     296             :      * Update the next-target pointer. Note that we do this even if we're only
     297             :      * holding a shared lock, on the grounds that it's better to use a shared
     298             :      * lock and get a garbled next pointer every now and then, than take the
     299             :      * concurrency hit of an exclusive lock.
     300             :      *
     301             :      * Wrap-around is handled at the beginning of this function.
     302             :      */
     303       92150 :     fsmpage->fp_next_slot = slot + (advancenext ? 1 : 0);
     304             : 
     305       92150 :     return slot;
     306             : }
     307             : 
     308             : /*
     309             :  * Sets the available space to zero for all slots numbered >= nslots.
     310             :  * Returns true if the page was modified.
     311             :  */
     312             : bool
     313         210 : fsm_truncate_avail(Page page, int nslots)
     314             : {
     315         210 :     FSMPage     fsmpage = (FSMPage) PageGetContents(page);
     316             :     uint8      *ptr;
     317         210 :     bool        changed = false;
     318             : 
     319             :     Assert(nslots >= 0 && nslots < LeafNodesPerPage);
     320             : 
     321             :     /* Clear all truncated leaf nodes */
     322         210 :     ptr = &fsmpage->fp_nodes[NonLeafNodesPerPage + nslots];
     323      843760 :     for (; ptr < &fsmpage->fp_nodes[NodesPerPage]; ptr++)
     324             :     {
     325      843550 :         if (*ptr != 0)
     326        4288 :             changed = true;
     327      843550 :         *ptr = 0;
     328             :     }
     329             : 
     330             :     /* Fix upper nodes. */
     331         210 :     if (changed)
     332         182 :         fsm_rebuild_page(page);
     333             : 
     334         210 :     return changed;
     335             : }
     336             : 
     337             : /*
     338             :  * Reconstructs the upper levels of a page. Returns true if the page
     339             :  * was modified.
     340             :  */
     341             : bool
     342         182 : fsm_rebuild_page(Page page)
     343             : {
     344         182 :     FSMPage     fsmpage = (FSMPage) PageGetContents(page);
     345         182 :     bool        changed = false;
     346             :     int         nodeno;
     347             : 
     348             :     /*
     349             :      * Start from the lowest non-leaf level, at last node, working our way
     350             :      * backwards, through all non-leaf nodes at all levels, up to the root.
     351             :      */
     352      745472 :     for (nodeno = NonLeafNodesPerPage - 1; nodeno >= 0; nodeno--)
     353             :     {
     354      745290 :         int         lchild = leftchild(nodeno);
     355      745290 :         int         rchild = lchild + 1;
     356      745290 :         uint8       newvalue = 0;
     357             : 
     358             :         /* The first few nodes we examine might have zero or one child. */
     359      745290 :         if (lchild < NodesPerPage)
     360      742924 :             newvalue = fsmpage->fp_nodes[lchild];
     361             : 
     362      745290 :         if (rchild < NodesPerPage)
     363      742742 :             newvalue = Max(newvalue,
     364             :                            fsmpage->fp_nodes[rchild]);
     365             : 
     366      745290 :         if (fsmpage->fp_nodes[nodeno] != newvalue)
     367             :         {
     368        5762 :             fsmpage->fp_nodes[nodeno] = newvalue;
     369        5762 :             changed = true;
     370             :         }
     371             :     }
     372             : 
     373         182 :     return changed;
     374             : }

Generated by: LCOV version 1.14