LCOV - code coverage report
Current view: top level - src/backend/storage/freespace - fsmpage.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 88.9 % 99 88
Test Date: 2026-03-12 16:14:50 Functions: 100.0 % 7 7
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * fsmpage.c
       4              :  *    routines to search and manipulate one FSM page.
       5              :  *
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  * Portions Copyright (c) 1994, Regents of the University of California
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/storage/freespace/fsmpage.c
      12              :  *
      13              :  * NOTES:
      14              :  *
      15              :  *  The public functions in this file form an API that hides the internal
      16              :  *  structure of a FSM page. This allows freespace.c to treat each FSM page
      17              :  *  as a black box with SlotsPerPage "slots". fsm_set_avail() and
      18              :  *  fsm_get_avail() let you get/set the value of a slot, and
      19              :  *  fsm_search_avail() lets you search for a slot with value >= X.
      20              :  *
      21              :  *-------------------------------------------------------------------------
      22              :  */
      23              : #include "postgres.h"
      24              : 
      25              : #include "storage/bufmgr.h"
      26              : #include "storage/fsm_internals.h"
      27              : 
      28              : /* Macros to navigate the tree within a page. Root has index zero. */
      29              : #define leftchild(x)    (2 * (x) + 1)
      30              : #define rightchild(x)   (2 * (x) + 2)
      31              : #define parentof(x)     (((x) - 1) / 2)
      32              : 
      33              : /*
      34              :  * Find right neighbor of x, wrapping around within the level
      35              :  */
      36              : static int
      37        69052 : rightneighbor(int x)
      38              : {
      39              :     /*
      40              :      * Move right. This might wrap around, stepping to the leftmost node at
      41              :      * the next level.
      42              :      */
      43        69052 :     x++;
      44              : 
      45              :     /*
      46              :      * Check if we stepped to the leftmost node at next level, and correct if
      47              :      * so. The leftmost nodes at each level are numbered x = 2^level - 1, so
      48              :      * check if (x + 1) is a power of two, using a standard
      49              :      * twos-complement-arithmetic trick.
      50              :      */
      51        69052 :     if (((x + 1) & x) == 0)
      52         3731 :         x = parentof(x);
      53              : 
      54        69052 :     return x;
      55              : }
      56              : 
      57              : /*
      58              :  * Sets the value of a slot on page. Returns true if the page was modified.
      59              :  *
      60              :  * The caller must hold an exclusive lock on the page.
      61              :  */
      62              : bool
      63       924666 : fsm_set_avail(Page page, int slot, uint8 value)
      64              : {
      65       924666 :     int         nodeno = NonLeafNodesPerPage + slot;
      66       924666 :     FSMPage     fsmpage = (FSMPage) PageGetContents(page);
      67              :     uint8       oldvalue;
      68              : 
      69              :     Assert(slot < LeafNodesPerPage);
      70              : 
      71       924666 :     oldvalue = fsmpage->fp_nodes[nodeno];
      72              : 
      73              :     /* If the value hasn't changed, we don't need to do anything */
      74       924666 :     if (oldvalue == value && value <= fsmpage->fp_nodes[0])
      75       512173 :         return false;
      76              : 
      77       412493 :     fsmpage->fp_nodes[nodeno] = value;
      78              : 
      79              :     /*
      80              :      * Propagate up, until we hit the root or a node that doesn't need to be
      81              :      * updated.
      82              :      */
      83              :     do
      84              :     {
      85      3642374 :         uint8       newvalue = 0;
      86              :         int         lchild;
      87              :         int         rchild;
      88              : 
      89      3642374 :         nodeno = parentof(nodeno);
      90      3642374 :         lchild = leftchild(nodeno);
      91      3642374 :         rchild = lchild + 1;
      92              : 
      93      3642374 :         newvalue = fsmpage->fp_nodes[lchild];
      94      3642374 :         if (rchild < NodesPerPage)
      95      3642372 :             newvalue = Max(newvalue,
      96              :                            fsmpage->fp_nodes[rchild]);
      97              : 
      98      3642374 :         oldvalue = fsmpage->fp_nodes[nodeno];
      99      3642374 :         if (oldvalue == newvalue)
     100       137886 :             break;
     101              : 
     102      3504488 :         fsmpage->fp_nodes[nodeno] = newvalue;
     103      3504488 :     } while (nodeno > 0);
     104              : 
     105              :     /*
     106              :      * sanity check: if the new value is (still) higher than the value at the
     107              :      * top, the tree is corrupt.  If so, rebuild.
     108              :      */
     109       412493 :     if (value > fsmpage->fp_nodes[0])
     110            0 :         fsm_rebuild_page(page);
     111              : 
     112       412493 :     return true;
     113              : }
     114              : 
     115              : /*
     116              :  * Returns the value of given slot on page.
     117              :  *
     118              :  * Since this is just a read-only access of a single byte, the page doesn't
     119              :  * need to be locked.
     120              :  */
     121              : uint8
     122      1480632 : fsm_get_avail(Page page, int slot)
     123              : {
     124      1480632 :     FSMPage     fsmpage = (FSMPage) PageGetContents(page);
     125              : 
     126              :     Assert(slot < LeafNodesPerPage);
     127              : 
     128      1480632 :     return fsmpage->fp_nodes[NonLeafNodesPerPage + slot];
     129              : }
     130              : 
     131              : /*
     132              :  * Returns the value at the root of a page.
     133              :  *
     134              :  * Since this is just a read-only access of a single byte, the page doesn't
     135              :  * need to be locked.
     136              :  */
     137              : uint8
     138       263818 : fsm_get_max_avail(Page page)
     139              : {
     140       263818 :     FSMPage     fsmpage = (FSMPage) PageGetContents(page);
     141              : 
     142       263818 :     return fsmpage->fp_nodes[0];
     143              : }
     144              : 
     145              : /*
     146              :  * Searches for a slot with category at least minvalue.
     147              :  * Returns slot number, or -1 if none found.
     148              :  *
     149              :  * The caller must hold at least a shared lock on the page, and this
     150              :  * function can unlock and lock the page again in exclusive mode if it
     151              :  * needs to be updated. exclusive_lock_held should be set to true if the
     152              :  * caller is already holding an exclusive lock, to avoid extra work.
     153              :  *
     154              :  * If advancenext is false, fp_next_slot is set to point to the returned
     155              :  * slot, and if it's true, to the slot after the returned slot.
     156              :  */
     157              : int
     158       268443 : fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext,
     159              :                  bool exclusive_lock_held)
     160              : {
     161       268443 :     Page        page = BufferGetPage(buf);
     162       268443 :     FSMPage     fsmpage = (FSMPage) PageGetContents(page);
     163              :     int         nodeno;
     164              :     int         target;
     165              :     uint16      slot;
     166              : 
     167       268443 : restart:
     168              : 
     169              :     /*
     170              :      * Check the root first, and exit quickly if there's no leaf with enough
     171              :      * free space
     172              :      */
     173       268443 :     if (fsmpage->fp_nodes[0] < minvalue)
     174       218451 :         return -1;
     175              : 
     176              :     /*
     177              :      * Start search using fp_next_slot.  It's just a hint, so check that it's
     178              :      * sane.  (This also handles wrapping around when the prior call returned
     179              :      * the last slot on the page.)
     180              :      */
     181        49992 :     target = fsmpage->fp_next_slot;
     182        49992 :     if (target < 0 || target >= LeafNodesPerPage)
     183            0 :         target = 0;
     184        49992 :     target += NonLeafNodesPerPage;
     185              : 
     186              :     /*----------
     187              :      * Start the search from the target slot.  At every step, move one
     188              :      * node to the right, then climb up to the parent.  Stop when we reach
     189              :      * a node with enough free space (as we must, since the root has enough
     190              :      * space).
     191              :      *
     192              :      * The idea is to gradually expand our "search triangle", that is, all
     193              :      * nodes covered by the current node, and to be sure we search to the
     194              :      * right from the start point.  At the first step, only the target slot
     195              :      * is examined.  When we move up from a left child to its parent, we are
     196              :      * adding the right-hand subtree of that parent to the search triangle.
     197              :      * When we move right then up from a right child, we are dropping the
     198              :      * current search triangle (which we know doesn't contain any suitable
     199              :      * page) and instead looking at the next-larger-size triangle to its
     200              :      * right.  So we never look left from our original start point, and at
     201              :      * each step the size of the search triangle doubles, ensuring it takes
     202              :      * only log2(N) work to search N pages.
     203              :      *
     204              :      * The "move right" operation will wrap around if it hits the right edge
     205              :      * of the tree, so the behavior is still good if we start near the right.
     206              :      * Note also that the move-and-climb behavior ensures that we can't end
     207              :      * up on one of the missing nodes at the right of the leaf level.
     208              :      *
     209              :      * For example, consider this tree:
     210              :      *
     211              :      *         7
     212              :      *     7       6
     213              :      *   5   7   6   5
     214              :      *  4 5 5 7 2 6 5 2
     215              :      *              T
     216              :      *
     217              :      * Assume that the target node is the node indicated by the letter T,
     218              :      * and we're searching for a node with value of 6 or higher. The search
     219              :      * begins at T. At the first iteration, we move to the right, then to the
     220              :      * parent, arriving at the rightmost 5. At the second iteration, we move
     221              :      * to the right, wrapping around, then climb up, arriving at the 7 on the
     222              :      * third level.  7 satisfies our search, so we descend down to the bottom,
     223              :      * following the path of sevens.  This is in fact the first suitable page
     224              :      * to the right of (allowing for wraparound) our start point.
     225              :      *----------
     226              :      */
     227        49992 :     nodeno = target;
     228       119044 :     while (nodeno > 0)
     229              :     {
     230       115313 :         if (fsmpage->fp_nodes[nodeno] >= minvalue)
     231        46261 :             break;
     232              : 
     233              :         /*
     234              :          * Move to the right, wrapping around on same level if necessary, then
     235              :          * climb up.
     236              :          */
     237        69052 :         nodeno = parentof(rightneighbor(nodeno));
     238              :     }
     239              : 
     240              :     /*
     241              :      * We're now at a node with enough free space, somewhere in the middle of
     242              :      * the tree. Descend to the bottom, following a path with enough free
     243              :      * space, preferring to move left if there's a choice.
     244              :      */
     245       119044 :     while (nodeno < NonLeafNodesPerPage)
     246              :     {
     247        69052 :         int         childnodeno = leftchild(nodeno);
     248              : 
     249        69052 :         if (childnodeno < NodesPerPage &&
     250        69052 :             fsmpage->fp_nodes[childnodeno] >= minvalue)
     251              :         {
     252        50066 :             nodeno = childnodeno;
     253        50066 :             continue;
     254              :         }
     255        18986 :         childnodeno++;          /* point to right child */
     256        18986 :         if (childnodeno < NodesPerPage &&
     257        18986 :             fsmpage->fp_nodes[childnodeno] >= minvalue)
     258              :         {
     259        18986 :             nodeno = childnodeno;
     260              :         }
     261              :         else
     262              :         {
     263              :             /*
     264              :              * Oops. The parent node promised that either left or right child
     265              :              * has enough space, but neither actually did. This can happen in
     266              :              * case of a "torn page", IOW if we crashed earlier while writing
     267              :              * the page to disk, and only part of the page made it to disk.
     268              :              *
     269              :              * Fix the corruption and restart.
     270              :              */
     271              :             RelFileLocator rlocator;
     272              :             ForkNumber  forknum;
     273              :             BlockNumber blknum;
     274              : 
     275            0 :             BufferGetTag(buf, &rlocator, &forknum, &blknum);
     276            0 :             elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/%u",
     277              :                  blknum, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber);
     278              : 
     279              :             /* make sure we hold an exclusive lock */
     280            0 :             if (!exclusive_lock_held)
     281              :             {
     282            0 :                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     283            0 :                 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     284            0 :                 exclusive_lock_held = true;
     285              :             }
     286            0 :             fsm_rebuild_page(page);
     287            0 :             MarkBufferDirtyHint(buf, false);
     288            0 :             goto restart;
     289              :         }
     290              :     }
     291              : 
     292              :     /* We're now at the bottom level, at a node with enough space. */
     293        49992 :     slot = nodeno - NonLeafNodesPerPage;
     294              : 
     295              :     /*
     296              :      * Update the next-target pointer. Note that we do this even if we're only
     297              :      * holding a shared lock, on the grounds that it's better to use a shared
     298              :      * lock and get a garbled next pointer every now and then, than take the
     299              :      * concurrency hit of an exclusive lock.
     300              :      *
     301              :      * Without an exclusive lock, we need to use the hint bit infrastructure
     302              :      * to be allowed to modify the page.
     303              :      *
     304              :      * Wrap-around is handled at the beginning of this function.
     305              :      */
     306        49992 :     if (exclusive_lock_held || BufferBeginSetHintBits(buf))
     307              :     {
     308        49986 :         fsmpage->fp_next_slot = slot + (advancenext ? 1 : 0);
     309              : 
     310        49986 :         if (!exclusive_lock_held)
     311        36259 :             BufferFinishSetHintBits(buf, false, false);
     312              :     }
     313              : 
     314        49992 :     return slot;
     315              : }
     316              : 
     317              : /*
     318              :  * Sets the available space to zero for all slots numbered >= nslots.
     319              :  * Returns true if the page was modified.
     320              :  */
     321              : bool
     322          107 : fsm_truncate_avail(Page page, int nslots)
     323              : {
     324          107 :     FSMPage     fsmpage = (FSMPage) PageGetContents(page);
     325              :     uint8      *ptr;
     326          107 :     bool        changed = false;
     327              : 
     328              :     Assert(nslots >= 0 && nslots < LeafNodesPerPage);
     329              : 
     330              :     /* Clear all truncated leaf nodes */
     331          107 :     ptr = &fsmpage->fp_nodes[NonLeafNodesPerPage + nslots];
     332       429997 :     for (; ptr < &fsmpage->fp_nodes[NodesPerPage]; ptr++)
     333              :     {
     334       429890 :         if (*ptr != 0)
     335         1824 :             changed = true;
     336       429890 :         *ptr = 0;
     337              :     }
     338              : 
     339              :     /* Fix upper nodes. */
     340          107 :     if (changed)
     341           84 :         fsm_rebuild_page(page);
     342              : 
     343          107 :     return changed;
     344              : }
     345              : 
     346              : /*
     347              :  * Reconstructs the upper levels of a page. Returns true if the page
     348              :  * was modified.
     349              :  */
     350              : bool
     351           84 : fsm_rebuild_page(Page page)
     352              : {
     353           84 :     FSMPage     fsmpage = (FSMPage) PageGetContents(page);
     354           84 :     bool        changed = false;
     355              :     int         nodeno;
     356              : 
     357              :     /*
     358              :      * Start from the lowest non-leaf level, at last node, working our way
     359              :      * backwards, through all non-leaf nodes at all levels, up to the root.
     360              :      */
     361       344064 :     for (nodeno = NonLeafNodesPerPage - 1; nodeno >= 0; nodeno--)
     362              :     {
     363       343980 :         int         lchild = leftchild(nodeno);
     364       343980 :         int         rchild = lchild + 1;
     365       343980 :         uint8       newvalue = 0;
     366              : 
     367              :         /* The first few nodes we examine might have zero or one child. */
     368       343980 :         if (lchild < NodesPerPage)
     369       342888 :             newvalue = fsmpage->fp_nodes[lchild];
     370              : 
     371       343980 :         if (rchild < NodesPerPage)
     372       342804 :             newvalue = Max(newvalue,
     373              :                            fsmpage->fp_nodes[rchild]);
     374              : 
     375       343980 :         if (fsmpage->fp_nodes[nodeno] != newvalue)
     376              :         {
     377         2473 :             fsmpage->fp_nodes[nodeno] = newvalue;
     378         2473 :             changed = true;
     379              :         }
     380              :     }
     381              : 
     382           84 :     return changed;
     383              : }
        

Generated by: LCOV version 2.0-1