LCOV - code coverage report
Current view: top level - src/backend/utils/mmgr - freepage.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 56.1 % 606 340
Test Date: 2026-02-26 22:14:50 Functions: 72.4 % 29 21
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * freepage.c
       4              :  *    Management of free memory pages.
       5              :  *
       6              :  * The intention of this code is to provide infrastructure for memory
       7              :  * allocators written specifically for PostgreSQL.  At least in the case
       8              :  * of dynamic shared memory, we can't simply use malloc() or even
       9              :  * relatively thin wrappers like palloc() which sit on top of it, because
      10              :  * no allocator built into the operating system will deal with relative
      11              :  * pointers.  In the future, we may find other cases in which greater
      12              :  * control over our own memory management seems desirable.
      13              :  *
      14              :  * A FreePageManager keeps track of which 4kB pages of memory are currently
      15              :  * unused from the point of view of some higher-level memory allocator.
      16              :  * Unlike a user-facing allocator such as palloc(), a FreePageManager can
      17              :  * only allocate and free in units of whole pages, and freeing an
      18              :  * allocation can only be done given knowledge of its length in pages.
      19              :  *
      20              :  * Since a free page manager has only a fixed amount of dedicated memory,
      21              :  * and since there is no underlying allocator, it uses the free pages
      22              :  * it is given to manage to store its bookkeeping data.  It keeps multiple
      23              :  * freelists of runs of pages, sorted by the size of the run; the head of
      24              :  * each freelist is stored in the FreePageManager itself, and the first
      25              :  * page of each run contains a relative pointer to the next run. See
      26              :  * FreePageManagerGetInternal for more details on how the freelists are
      27              :  * managed.
      28              :  *
      29              :  * To avoid memory fragmentation, it's important to consolidate adjacent
      30              :  * spans of pages whenever possible; otherwise, large allocation requests
      31              :  * might not be satisfied even when sufficient contiguous space is
      32              :  * available.  Therefore, in addition to the freelists, we maintain an
      33              :  * in-memory btree of free page ranges ordered by page number.  If a
      34              :  * range being freed precedes or follows a range that is already free,
      35              :  * the existing range is extended; if it exactly bridges the gap between
      36              :  * free ranges, then the two existing ranges are consolidated with the
      37              :  * newly-freed range to form one great big range of free pages.
      38              :  *
      39              :  * When there is only one range of free pages, the btree is trivial and
      40              :  * is stored within the FreePageManager proper; otherwise, pages are
      41              :  * allocated from the area under management as needed.  Even in cases
      42              :  * where memory fragmentation is very severe, only a tiny fraction of
      43              :  * the pages under management are consumed by this btree.
      44              :  *
      45              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      46              :  * Portions Copyright (c) 1994, Regents of the University of California
      47              :  *
      48              :  * IDENTIFICATION
      49              :  *    src/backend/utils/mmgr/freepage.c
      50              :  *
      51              :  *-------------------------------------------------------------------------
      52              :  */
      53              : 
      54              : #include "postgres.h"
      55              : #include "lib/stringinfo.h"
      56              : #include "miscadmin.h"
      57              : 
      58              : #include "utils/freepage.h"
      59              : #include "utils/relptr.h"
      60              : 
      61              : 
      62              : /* Magic numbers to identify various page types */
      63              : #define FREE_PAGE_SPAN_LEADER_MAGIC     0xea4020f0
      64              : #define FREE_PAGE_LEAF_MAGIC            0x98eae728
      65              : #define FREE_PAGE_INTERNAL_MAGIC        0x19aa32c9
      66              : 
      67              : /* Doubly linked list of spans of free pages; stored in first page of span. */
      68              : struct FreePageSpanLeader
      69              : {
      70              :     int         magic;          /* always FREE_PAGE_SPAN_LEADER_MAGIC */
      71              :     Size        npages;         /* number of pages in span */
      72              :     RelptrFreePageSpanLeader prev;
      73              :     RelptrFreePageSpanLeader next;
      74              : };
      75              : 
      76              : /* Common header for btree leaf and internal pages. */
      77              : typedef struct FreePageBtreeHeader
      78              : {
      79              :     int         magic;          /* FREE_PAGE_LEAF_MAGIC or
      80              :                                  * FREE_PAGE_INTERNAL_MAGIC */
      81              :     Size        nused;          /* number of items used */
      82              :     RelptrFreePageBtree parent; /* uplink */
      83              : } FreePageBtreeHeader;
      84              : 
      85              : /* Internal key; points to next level of btree. */
      86              : typedef struct FreePageBtreeInternalKey
      87              : {
      88              :     Size        first_page;     /* low bound for keys on child page */
      89              :     RelptrFreePageBtree child;  /* downlink */
      90              : } FreePageBtreeInternalKey;
      91              : 
      92              : /* Leaf key; no payload data. */
      93              : typedef struct FreePageBtreeLeafKey
      94              : {
      95              :     Size        first_page;     /* first page in span */
      96              :     Size        npages;         /* number of pages in span */
      97              : } FreePageBtreeLeafKey;
      98              : 
      99              : /* Work out how many keys will fit on a page. */
     100              : #define FPM_ITEMS_PER_INTERNAL_PAGE \
     101              :     ((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
     102              :         sizeof(FreePageBtreeInternalKey))
     103              : #define FPM_ITEMS_PER_LEAF_PAGE \
     104              :     ((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
     105              :         sizeof(FreePageBtreeLeafKey))
     106              : 
     107              : /* A btree page of either sort */
     108              : struct FreePageBtree
     109              : {
     110              :     FreePageBtreeHeader hdr;
     111              :     union
     112              :     {
     113              :         FreePageBtreeInternalKey internal_key[FPM_ITEMS_PER_INTERNAL_PAGE];
     114              :         FreePageBtreeLeafKey leaf_key[FPM_ITEMS_PER_LEAF_PAGE];
     115              :     }           u;
     116              : };
     117              : 
     118              : /* Results of a btree search */
     119              : typedef struct FreePageBtreeSearchResult
     120              : {
     121              :     FreePageBtree *page;
     122              :     Size        index;
     123              :     bool        found;
     124              :     unsigned    split_pages;
     125              : } FreePageBtreeSearchResult;
     126              : 
     127              : /* Helper functions */
     128              : static void FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm,
     129              :                                             FreePageBtree *btp);
     130              : static Size FreePageBtreeCleanup(FreePageManager *fpm);
     131              : static FreePageBtree *FreePageBtreeFindLeftSibling(char *base,
     132              :                                                    FreePageBtree *btp);
     133              : static FreePageBtree *FreePageBtreeFindRightSibling(char *base,
     134              :                                                     FreePageBtree *btp);
     135              : static Size FreePageBtreeFirstKey(FreePageBtree *btp);
     136              : static FreePageBtree *FreePageBtreeGetRecycled(FreePageManager *fpm);
     137              : static void FreePageBtreeInsertInternal(char *base, FreePageBtree *btp,
     138              :                                         Size index, Size first_page, FreePageBtree *child);
     139              : static void FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index,
     140              :                                     Size first_page, Size npages);
     141              : static void FreePageBtreeRecycle(FreePageManager *fpm, Size pageno);
     142              : static void FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp,
     143              :                                 Size index);
     144              : static void FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp);
     145              : static void FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
     146              :                                 FreePageBtreeSearchResult *result);
     147              : static Size FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page);
     148              : static Size FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page);
     149              : static FreePageBtree *FreePageBtreeSplitPage(FreePageManager *fpm,
     150              :                                              FreePageBtree *btp);
     151              : static void FreePageBtreeUpdateParentPointers(char *base, FreePageBtree *btp);
     152              : static void FreePageManagerDumpBtree(FreePageManager *fpm, FreePageBtree *btp,
     153              :                                      FreePageBtree *parent, int level, StringInfo buf);
     154              : static void FreePageManagerDumpSpans(FreePageManager *fpm,
     155              :                                      FreePageSpanLeader *span, Size expected_pages,
     156              :                                      StringInfo buf);
     157              : static bool FreePageManagerGetInternal(FreePageManager *fpm, Size npages,
     158              :                                        Size *first_page);
     159              : static Size FreePageManagerPutInternal(FreePageManager *fpm, Size first_page,
     160              :                                        Size npages, bool soft);
     161              : static void FreePagePopSpanLeader(FreePageManager *fpm, Size pageno);
     162              : static void FreePagePushSpanLeader(FreePageManager *fpm, Size first_page,
     163              :                                    Size npages);
     164              : static Size FreePageManagerLargestContiguous(FreePageManager *fpm);
     165              : static void FreePageManagerUpdateLargest(FreePageManager *fpm);
     166              : 
     167              : #ifdef FPM_EXTRA_ASSERTS
     168              : static Size sum_free_pages(FreePageManager *fpm);
     169              : #endif
     170              : 
     171              : /*
     172              :  * Initialize a new, empty free page manager.
     173              :  *
     174              :  * 'fpm' should reference caller-provided memory large enough to contain a
     175              :  * FreePageManager.  We'll initialize it here.
     176              :  *
     177              :  * 'base' is the address to which all pointers are relative.  When managing
     178              :  * a dynamic shared memory segment, it should normally be the base of the
     179              :  * segment.  When managing backend-private memory, it can be either NULL or,
     180              :  * if managing a single contiguous extent of memory, the start of that extent.
     181              :  */
     182              : void
     183         2841 : FreePageManagerInitialize(FreePageManager *fpm, char *base)
     184              : {
     185              :     Size        f;
     186              : 
     187         2841 :     relptr_store(base, fpm->self, fpm);
     188         2841 :     relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
     189         2841 :     relptr_store(base, fpm->btree_recycle, (FreePageSpanLeader *) NULL);
     190         2841 :     fpm->btree_depth = 0;
     191         2841 :     fpm->btree_recycle_count = 0;
     192         2841 :     fpm->singleton_first_page = 0;
     193         2841 :     fpm->singleton_npages = 0;
     194         2841 :     fpm->contiguous_pages = 0;
     195         2841 :     fpm->contiguous_pages_dirty = true;
     196              : #ifdef FPM_EXTRA_ASSERTS
     197              :     fpm->free_pages = 0;
     198              : #endif
     199              : 
     200       369330 :     for (f = 0; f < FPM_NUM_FREELISTS; f++)
     201       366489 :         relptr_store(base, fpm->freelist[f], (FreePageSpanLeader *) NULL);
     202         2841 : }
     203              : 
     204              : /*
     205              :  * Allocate a run of pages of the given length from the free page manager.
     206              :  * The return value indicates whether we were able to satisfy the request;
     207              :  * if true, the first page of the allocation is stored in *first_page.
     208              :  */
     209              : bool
     210        14095 : FreePageManagerGet(FreePageManager *fpm, Size npages, Size *first_page)
     211              : {
     212              :     bool        result;
     213              :     Size        contiguous_pages;
     214              : 
     215        14095 :     result = FreePageManagerGetInternal(fpm, npages, first_page);
     216              : 
     217              :     /*
     218              :      * It's a bit counterintuitive, but allocating pages can actually create
     219              :      * opportunities for cleanup that create larger ranges.  We might pull a
     220              :      * key out of the btree that enables the item at the head of the btree
     221              :      * recycle list to be inserted; and then if there are more items behind it
     222              :      * one of those might cause two currently-separated ranges to merge,
     223              :      * creating a single range of contiguous pages larger than any that
     224              :      * existed previously.  It might be worth trying to improve the cleanup
     225              :      * algorithm to avoid such corner cases, but for now we just notice the
     226              :      * condition and do the appropriate reporting.
     227              :      */
     228        14095 :     contiguous_pages = FreePageBtreeCleanup(fpm);
     229        14095 :     if (fpm->contiguous_pages < contiguous_pages)
     230           58 :         fpm->contiguous_pages = contiguous_pages;
     231              : 
     232              :     /*
     233              :      * FreePageManagerGetInternal may have set contiguous_pages_dirty.
     234              :      * Recompute contiguous_pages if so.
     235              :      */
     236        14095 :     FreePageManagerUpdateLargest(fpm);
     237              : 
     238              : #ifdef FPM_EXTRA_ASSERTS
     239              :     if (result)
     240              :     {
     241              :         Assert(fpm->free_pages >= npages);
     242              :         fpm->free_pages -= npages;
     243              :     }
     244              :     Assert(fpm->free_pages == sum_free_pages(fpm));
     245              :     Assert(fpm->contiguous_pages == FreePageManagerLargestContiguous(fpm));
     246              : #endif
     247        14095 :     return result;
     248              : }
     249              : 
     250              : #ifdef FPM_EXTRA_ASSERTS
     251              : static void
     252              : sum_free_pages_recurse(FreePageManager *fpm, FreePageBtree *btp, Size *sum)
     253              : {
     254              :     char       *base = fpm_segment_base(fpm);
     255              : 
     256              :     Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC ||
     257              :            btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
     258              :     ++*sum;
     259              :     if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
     260              :     {
     261              :         Size        index;
     262              : 
     263              : 
     264              :         for (index = 0; index < btp->hdr.nused; ++index)
     265              :         {
     266              :             FreePageBtree *child;
     267              : 
     268              :             child = relptr_access(base, btp->u.internal_key[index].child);
     269              :             sum_free_pages_recurse(fpm, child, sum);
     270              :         }
     271              :     }
     272              : }
     273              : static Size
     274              : sum_free_pages(FreePageManager *fpm)
     275              : {
     276              :     FreePageSpanLeader *recycle;
     277              :     char       *base = fpm_segment_base(fpm);
     278              :     Size        sum = 0;
     279              :     int         list;
     280              : 
     281              :     /* Count the spans by scanning the freelists. */
     282              :     for (list = 0; list < FPM_NUM_FREELISTS; ++list)
     283              :     {
     284              : 
     285              :         if (!relptr_is_null(fpm->freelist[list]))
     286              :         {
     287              :             FreePageSpanLeader *candidate =
     288              :                 relptr_access(base, fpm->freelist[list]);
     289              : 
     290              :             do
     291              :             {
     292              :                 sum += candidate->npages;
     293              :                 candidate = relptr_access(base, candidate->next);
     294              :             } while (candidate != NULL);
     295              :         }
     296              :     }
     297              : 
     298              :     /* Count btree internal pages. */
     299              :     if (fpm->btree_depth > 0)
     300              :     {
     301              :         FreePageBtree *root = relptr_access(base, fpm->btree_root);
     302              : 
     303              :         sum_free_pages_recurse(fpm, root, &sum);
     304              :     }
     305              : 
     306              :     /* Count the recycle list. */
     307              :     for (recycle = relptr_access(base, fpm->btree_recycle);
     308              :          recycle != NULL;
     309              :          recycle = relptr_access(base, recycle->next))
     310              :     {
     311              :         Assert(recycle->npages == 1);
     312              :         ++sum;
     313              :     }
     314              : 
     315              :     return sum;
     316              : }
     317              : #endif
     318              : 
     319              : /*
     320              :  * Compute the size of the largest run of pages that the user could
     321              :  * successfully get.
     322              :  */
     323              : static Size
     324        15983 : FreePageManagerLargestContiguous(FreePageManager *fpm)
     325              : {
     326              :     char       *base;
     327              :     Size        largest;
     328              : 
     329        15983 :     base = fpm_segment_base(fpm);
     330        15983 :     largest = 0;
     331        15983 :     if (!relptr_is_null(fpm->freelist[FPM_NUM_FREELISTS - 1]))
     332              :     {
     333              :         FreePageSpanLeader *candidate;
     334              : 
     335         8619 :         candidate = relptr_access(base, fpm->freelist[FPM_NUM_FREELISTS - 1]);
     336              :         do
     337              :         {
     338         8619 :             if (candidate->npages > largest)
     339         8619 :                 largest = candidate->npages;
     340         8619 :             candidate = relptr_access(base, candidate->next);
     341         8619 :         } while (candidate != NULL);
     342              :     }
     343              :     else
     344              :     {
     345         7364 :         Size        f = FPM_NUM_FREELISTS - 1;
     346              : 
     347              :         do
     348              :         {
     349       638843 :             --f;
     350       638843 :             if (!relptr_is_null(fpm->freelist[f]))
     351              :             {
     352         7364 :                 largest = f + 1;
     353         7364 :                 break;
     354              :             }
     355       631479 :         } while (f > 0);
     356              :     }
     357              : 
     358        15983 :     return largest;
     359              : }
     360              : 
     361              : /*
     362              :  * Recompute the size of the largest run of pages that the user could
     363              :  * successfully get, if it has been marked dirty.
     364              :  */
     365              : static void
     366        18753 : FreePageManagerUpdateLargest(FreePageManager *fpm)
     367              : {
     368        18753 :     if (!fpm->contiguous_pages_dirty)
     369         2770 :         return;
     370              : 
     371        15983 :     fpm->contiguous_pages = FreePageManagerLargestContiguous(fpm);
     372        15983 :     fpm->contiguous_pages_dirty = false;
     373              : }
     374              : 
     375              : /*
     376              :  * Transfer a run of pages to the free page manager.
     377              :  */
     378              : void
     379         4658 : FreePageManagerPut(FreePageManager *fpm, Size first_page, Size npages)
     380              : {
     381              :     Size        contiguous_pages;
     382              : 
     383              :     Assert(npages > 0);
     384              : 
     385              :     /* Record the new pages. */
     386              :     contiguous_pages =
     387         4658 :         FreePageManagerPutInternal(fpm, first_page, npages, false);
     388              : 
     389              :     /*
     390              :      * If the new range we inserted into the page manager was contiguous with
     391              :      * an existing range, it may have opened up cleanup opportunities.
     392              :      */
     393         4658 :     if (contiguous_pages > npages)
     394              :     {
     395              :         Size        cleanup_contiguous_pages;
     396              : 
     397         1735 :         cleanup_contiguous_pages = FreePageBtreeCleanup(fpm);
     398         1735 :         if (cleanup_contiguous_pages > contiguous_pages)
     399           47 :             contiguous_pages = cleanup_contiguous_pages;
     400              :     }
     401              : 
     402              :     /* See if we now have a new largest chunk. */
     403         4658 :     if (fpm->contiguous_pages < contiguous_pages)
     404         3555 :         fpm->contiguous_pages = contiguous_pages;
     405              : 
     406              :     /*
     407              :      * The earlier call to FreePageManagerPutInternal may have set
     408              :      * contiguous_pages_dirty if it needed to allocate internal pages, so
     409              :      * recompute contiguous_pages if necessary.
     410              :      */
     411         4658 :     FreePageManagerUpdateLargest(fpm);
     412              : 
     413              : #ifdef FPM_EXTRA_ASSERTS
     414              :     fpm->free_pages += npages;
     415              :     Assert(fpm->free_pages == sum_free_pages(fpm));
     416              :     Assert(fpm->contiguous_pages == FreePageManagerLargestContiguous(fpm));
     417              : #endif
     418         4658 : }
     419              : 
     420              : /*
     421              :  * Produce a debugging dump of the state of a free page manager.
     422              :  */
     423              : char *
     424            0 : FreePageManagerDump(FreePageManager *fpm)
     425              : {
     426            0 :     char       *base = fpm_segment_base(fpm);
     427              :     StringInfoData buf;
     428              :     FreePageSpanLeader *recycle;
     429            0 :     bool        dumped_any_freelist = false;
     430              :     Size        f;
     431              : 
     432              :     /* Initialize output buffer. */
     433            0 :     initStringInfo(&buf);
     434              : 
     435              :     /* Dump general stuff. */
     436            0 :     appendStringInfo(&buf, "metadata: self %zu max contiguous pages = %zu\n",
     437            0 :                      relptr_offset(fpm->self), fpm->contiguous_pages);
     438              : 
     439              :     /* Dump btree. */
     440            0 :     if (fpm->btree_depth > 0)
     441              :     {
     442              :         FreePageBtree *root;
     443              : 
     444            0 :         appendStringInfo(&buf, "btree depth %u:\n", fpm->btree_depth);
     445            0 :         root = relptr_access(base, fpm->btree_root);
     446            0 :         FreePageManagerDumpBtree(fpm, root, NULL, 0, &buf);
     447              :     }
     448            0 :     else if (fpm->singleton_npages > 0)
     449              :     {
     450            0 :         appendStringInfo(&buf, "singleton: %zu(%zu)\n",
     451              :                          fpm->singleton_first_page, fpm->singleton_npages);
     452              :     }
     453              : 
     454              :     /* Dump btree recycle list. */
     455            0 :     recycle = relptr_access(base, fpm->btree_recycle);
     456            0 :     if (recycle != NULL)
     457              :     {
     458            0 :         appendStringInfoString(&buf, "btree recycle:");
     459            0 :         FreePageManagerDumpSpans(fpm, recycle, 1, &buf);
     460              :     }
     461              : 
     462              :     /* Dump free lists. */
     463            0 :     for (f = 0; f < FPM_NUM_FREELISTS; ++f)
     464              :     {
     465              :         FreePageSpanLeader *span;
     466              : 
     467            0 :         if (relptr_is_null(fpm->freelist[f]))
     468            0 :             continue;
     469            0 :         if (!dumped_any_freelist)
     470              :         {
     471            0 :             appendStringInfoString(&buf, "freelists:\n");
     472            0 :             dumped_any_freelist = true;
     473              :         }
     474            0 :         appendStringInfo(&buf, "  %zu:", f + 1);
     475            0 :         span = relptr_access(base, fpm->freelist[f]);
     476            0 :         FreePageManagerDumpSpans(fpm, span, f + 1, &buf);
     477              :     }
     478              : 
     479              :     /* And return result to caller. */
     480            0 :     return buf.data;
     481              : }
     482              : 
     483              : 
     484              : /*
     485              :  * The first_page value stored at index zero in any non-root page must match
     486              :  * the first_page value stored in its parent at the index which points to that
     487              :  * page.  So when the value stored at index zero in a btree page changes, we've
     488              :  * got to walk up the tree adjusting ancestor keys until we reach an ancestor
     489              :  * where that key isn't index zero.  This function should be called after
     490              :  * updating the first key on the target page; it will propagate the change
     491              :  * upward as far as needed.
     492              :  *
     493              :  * We assume here that the first key on the page has not changed enough to
     494              :  * require changes in the ordering of keys on its ancestor pages.  Thus,
     495              :  * if we search the parent page for the first key greater than or equal to
     496              :  * the first key on the current page, the downlink to this page will be either
     497              :  * the exact index returned by the search (if the first key decreased)
     498              :  * or one less (if the first key increased).
     499              :  */
     500              : static void
     501          923 : FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm, FreePageBtree *btp)
     502              : {
     503          923 :     char       *base = fpm_segment_base(fpm);
     504              :     Size        first_page;
     505              :     FreePageBtree *parent;
     506              :     FreePageBtree *child;
     507              : 
     508              :     /* This might be either a leaf or an internal page. */
     509              :     Assert(btp->hdr.nused > 0);
     510          923 :     if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     511              :     {
     512              :         Assert(btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
     513          923 :         first_page = btp->u.leaf_key[0].first_page;
     514              :     }
     515              :     else
     516              :     {
     517              :         Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     518              :         Assert(btp->hdr.nused <= FPM_ITEMS_PER_INTERNAL_PAGE);
     519            0 :         first_page = btp->u.internal_key[0].first_page;
     520              :     }
     521          923 :     child = btp;
     522              : 
     523              :     /* Loop until we find an ancestor that does not require adjustment. */
     524              :     for (;;)
     525            0 :     {
     526              :         Size        s;
     527              : 
     528          923 :         parent = relptr_access(base, child->hdr.parent);
     529          923 :         if (parent == NULL)
     530          923 :             break;
     531            0 :         s = FreePageBtreeSearchInternal(parent, first_page);
     532              : 
     533              :         /* Key is either at index s or index s-1; figure out which. */
     534            0 :         if (s >= parent->hdr.nused)
     535              :         {
     536              :             Assert(s == parent->hdr.nused);
     537            0 :             --s;
     538              :         }
     539              :         else
     540              :         {
     541              :             FreePageBtree *check;
     542              : 
     543            0 :             check = relptr_access(base, parent->u.internal_key[s].child);
     544            0 :             if (check != child)
     545              :             {
     546              :                 Assert(s > 0);
     547            0 :                 --s;
     548              :             }
     549              :         }
     550              : 
     551              : #ifdef USE_ASSERT_CHECKING
     552              :         /* Debugging double-check. */
     553              :         {
     554              :             FreePageBtree *check;
     555              : 
     556              :             check = relptr_access(base, parent->u.internal_key[s].child);
     557              :             Assert(s < parent->hdr.nused);
     558              :             Assert(child == check);
     559              :         }
     560              : #endif
     561              : 
     562              :         /* Update the parent key. */
     563            0 :         parent->u.internal_key[s].first_page = first_page;
     564              : 
     565              :         /*
     566              :          * If this is the first key in the parent, go up another level; else
     567              :          * done.
     568              :          */
     569            0 :         if (s > 0)
     570            0 :             break;
     571            0 :         child = parent;
     572              :     }
     573          923 : }
     574              : 
     575              : /*
     576              :  * Attempt to reclaim space from the free-page btree.  The return value is
     577              :  * the largest range of contiguous pages created by the cleanup operation.
     578              :  */
     579              : static Size
     580        15830 : FreePageBtreeCleanup(FreePageManager *fpm)
     581              : {
     582        15830 :     char       *base = fpm_segment_base(fpm);
     583        15830 :     Size        max_contiguous_pages = 0;
     584              : 
     585              :     /* Attempt to shrink the depth of the btree. */
     586        15920 :     while (!relptr_is_null(fpm->btree_root))
     587              :     {
     588         1752 :         FreePageBtree *root = relptr_access(base, fpm->btree_root);
     589              : 
     590              :         /* If the root contains only one key, reduce depth by one. */
     591         1752 :         if (root->hdr.nused == 1)
     592              :         {
     593              :             /* Shrink depth of tree by one. */
     594              :             Assert(fpm->btree_depth > 0);
     595           90 :             --fpm->btree_depth;
     596           90 :             if (root->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     597              :             {
     598              :                 /* If root is a leaf, convert only entry to singleton range. */
     599           90 :                 relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
     600           90 :                 fpm->singleton_first_page = root->u.leaf_key[0].first_page;
     601           90 :                 fpm->singleton_npages = root->u.leaf_key[0].npages;
     602              :             }
     603              :             else
     604              :             {
     605              :                 FreePageBtree *newroot;
     606              : 
     607              :                 /* If root is an internal page, make only child the root. */
     608              :                 Assert(root->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     609            0 :                 relptr_copy(fpm->btree_root, root->u.internal_key[0].child);
     610            0 :                 newroot = relptr_access(base, fpm->btree_root);
     611            0 :                 relptr_store(base, newroot->hdr.parent, (FreePageBtree *) NULL);
     612              :             }
     613           90 :             FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, root));
     614              :         }
     615         1662 :         else if (root->hdr.nused == 2 &&
     616          882 :                  root->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     617              :         {
     618              :             Size        end_of_first;
     619              :             Size        start_of_second;
     620              : 
     621          882 :             end_of_first = root->u.leaf_key[0].first_page +
     622          882 :                 root->u.leaf_key[0].npages;
     623          882 :             start_of_second = root->u.leaf_key[1].first_page;
     624              : 
     625          882 :             if (end_of_first + 1 == start_of_second)
     626              :             {
     627           42 :                 Size        root_page = fpm_pointer_to_page(base, root);
     628              : 
     629           42 :                 if (end_of_first == root_page)
     630              :                 {
     631           42 :                     FreePagePopSpanLeader(fpm, root->u.leaf_key[0].first_page);
     632           42 :                     FreePagePopSpanLeader(fpm, root->u.leaf_key[1].first_page);
     633           42 :                     fpm->singleton_first_page = root->u.leaf_key[0].first_page;
     634           42 :                     fpm->singleton_npages = root->u.leaf_key[0].npages +
     635           42 :                         root->u.leaf_key[1].npages + 1;
     636           42 :                     fpm->btree_depth = 0;
     637           42 :                     relptr_store(base, fpm->btree_root,
     638              :                                  (FreePageBtree *) NULL);
     639           42 :                     FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
     640              :                                            fpm->singleton_npages);
     641              :                     Assert(max_contiguous_pages == 0);
     642           42 :                     max_contiguous_pages = fpm->singleton_npages;
     643              :                 }
     644              :             }
     645              : 
     646              :             /* Whether it worked or not, it's time to stop. */
     647          882 :             break;
     648              :         }
     649              :         else
     650              :         {
     651              :             /* Nothing more to do.  Stop. */
     652              :             break;
     653              :         }
     654              :     }
     655              : 
     656              :     /*
     657              :      * Attempt to free recycled btree pages.  We skip this if releasing the
     658              :      * recycled page would require a btree page split, because the page we're
     659              :      * trying to recycle would be consumed by the split, which would be
     660              :      * counterproductive.
     661              :      *
     662              :      * We also currently only ever attempt to recycle the first page on the
     663              :      * list; that could be made more aggressive, but it's not clear that the
     664              :      * complexity would be worthwhile.
     665              :      */
     666        15893 :     while (fpm->btree_recycle_count > 0)
     667              :     {
     668              :         FreePageBtree *btp;
     669              :         Size        first_page;
     670              :         Size        contiguous_pages;
     671              : 
     672           90 :         btp = FreePageBtreeGetRecycled(fpm);
     673           90 :         first_page = fpm_pointer_to_page(base, btp);
     674           90 :         contiguous_pages = FreePageManagerPutInternal(fpm, first_page, 1, true);
     675           90 :         if (contiguous_pages == 0)
     676              :         {
     677           27 :             FreePageBtreeRecycle(fpm, first_page);
     678           27 :             break;
     679              :         }
     680              :         else
     681              :         {
     682           63 :             if (contiguous_pages > max_contiguous_pages)
     683           63 :                 max_contiguous_pages = contiguous_pages;
     684              :         }
     685              :     }
     686              : 
     687        15830 :     return max_contiguous_pages;
     688              : }
     689              : 
     690              : /*
     691              :  * Consider consolidating the given page with its left or right sibling,
     692              :  * if it's fairly empty.
     693              :  */
     694              : static void
     695          359 : FreePageBtreeConsolidate(FreePageManager *fpm, FreePageBtree *btp)
     696              : {
     697          359 :     char       *base = fpm_segment_base(fpm);
     698              :     FreePageBtree *np;
     699              :     Size        max;
     700              : 
     701              :     /*
     702              :      * We only try to consolidate pages that are less than a third full. We
     703              :      * could be more aggressive about this, but that might risk performing
     704              :      * consolidation only to end up splitting again shortly thereafter.  Since
     705              :      * the btree should be very small compared to the space under management,
     706              :      * our goal isn't so much to ensure that it always occupies the absolutely
     707              :      * smallest possible number of pages as to reclaim pages before things get
     708              :      * too egregiously out of hand.
     709              :      */
     710          359 :     if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     711          359 :         max = FPM_ITEMS_PER_LEAF_PAGE;
     712              :     else
     713              :     {
     714              :         Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     715            0 :         max = FPM_ITEMS_PER_INTERNAL_PAGE;
     716              :     }
     717          359 :     if (btp->hdr.nused >= max / 3)
     718            0 :         return;
     719              : 
     720              :     /*
     721              :      * If we can fit our right sibling's keys onto this page, consolidate.
     722              :      */
     723          359 :     np = FreePageBtreeFindRightSibling(base, btp);
     724          359 :     if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
     725              :     {
     726            0 :         if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     727              :         {
     728            0 :             memcpy(&btp->u.leaf_key[btp->hdr.nused], &np->u.leaf_key[0],
     729            0 :                    sizeof(FreePageBtreeLeafKey) * np->hdr.nused);
     730            0 :             btp->hdr.nused += np->hdr.nused;
     731              :         }
     732              :         else
     733              :         {
     734            0 :             memcpy(&btp->u.internal_key[btp->hdr.nused], &np->u.internal_key[0],
     735            0 :                    sizeof(FreePageBtreeInternalKey) * np->hdr.nused);
     736            0 :             btp->hdr.nused += np->hdr.nused;
     737            0 :             FreePageBtreeUpdateParentPointers(base, btp);
     738              :         }
     739            0 :         FreePageBtreeRemovePage(fpm, np);
     740            0 :         return;
     741              :     }
     742              : 
     743              :     /*
     744              :      * If we can fit our keys onto our left sibling's page, consolidate. In
     745              :      * this case, we move our keys onto the other page rather than vice versa,
     746              :      * to avoid having to adjust ancestor keys.
     747              :      */
     748          359 :     np = FreePageBtreeFindLeftSibling(base, btp);
     749          359 :     if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
     750              :     {
     751            0 :         if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     752              :         {
     753            0 :             memcpy(&np->u.leaf_key[np->hdr.nused], &btp->u.leaf_key[0],
     754            0 :                    sizeof(FreePageBtreeLeafKey) * btp->hdr.nused);
     755            0 :             np->hdr.nused += btp->hdr.nused;
     756              :         }
     757              :         else
     758              :         {
     759            0 :             memcpy(&np->u.internal_key[np->hdr.nused], &btp->u.internal_key[0],
     760            0 :                    sizeof(FreePageBtreeInternalKey) * btp->hdr.nused);
     761            0 :             np->hdr.nused += btp->hdr.nused;
     762            0 :             FreePageBtreeUpdateParentPointers(base, np);
     763              :         }
     764            0 :         FreePageBtreeRemovePage(fpm, btp);
     765            0 :         return;
     766              :     }
     767              : }
     768              : 
     769              : /*
     770              :  * Find the passed page's left sibling; that is, the page at the same level
     771              :  * of the tree whose keyspace immediately precedes ours.
     772              :  */
     773              : static FreePageBtree *
     774          359 : FreePageBtreeFindLeftSibling(char *base, FreePageBtree *btp)
     775              : {
     776          359 :     FreePageBtree *p = btp;
     777          359 :     int         levels = 0;
     778              : 
     779              :     /* Move up until we can move left. */
     780              :     for (;;)
     781            0 :     {
     782              :         Size        first_page;
     783              :         Size        index;
     784              : 
     785          359 :         first_page = FreePageBtreeFirstKey(p);
     786          359 :         p = relptr_access(base, p->hdr.parent);
     787              : 
     788          359 :         if (p == NULL)
     789          359 :             return NULL;        /* we were passed the rightmost page */
     790              : 
     791            0 :         index = FreePageBtreeSearchInternal(p, first_page);
     792            0 :         if (index > 0)
     793              :         {
     794              :             Assert(p->u.internal_key[index].first_page == first_page);
     795            0 :             p = relptr_access(base, p->u.internal_key[index - 1].child);
     796            0 :             break;
     797              :         }
     798              :         Assert(index == 0);
     799            0 :         ++levels;
     800              :     }
     801              : 
     802              :     /* Descend left. */
     803            0 :     while (levels > 0)
     804              :     {
     805              :         Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     806            0 :         p = relptr_access(base, p->u.internal_key[p->hdr.nused - 1].child);
     807            0 :         --levels;
     808              :     }
     809              :     Assert(p->hdr.magic == btp->hdr.magic);
     810              : 
     811            0 :     return p;
     812              : }
     813              : 
     814              : /*
     815              :  * Find the passed page's right sibling; that is, the page at the same level
     816              :  * of the tree whose keyspace immediately follows ours.
     817              :  */
     818              : static FreePageBtree *
     819          366 : FreePageBtreeFindRightSibling(char *base, FreePageBtree *btp)
     820              : {
     821          366 :     FreePageBtree *p = btp;
     822          366 :     int         levels = 0;
     823              : 
     824              :     /* Move up until we can move right. */
     825              :     for (;;)
     826            0 :     {
     827              :         Size        first_page;
     828              :         Size        index;
     829              : 
     830          366 :         first_page = FreePageBtreeFirstKey(p);
     831          366 :         p = relptr_access(base, p->hdr.parent);
     832              : 
     833          366 :         if (p == NULL)
     834          366 :             return NULL;        /* we were passed the rightmost page */
     835              : 
     836            0 :         index = FreePageBtreeSearchInternal(p, first_page);
     837            0 :         if (index < p->hdr.nused - 1)
     838              :         {
     839              :             Assert(p->u.internal_key[index].first_page == first_page);
     840            0 :             p = relptr_access(base, p->u.internal_key[index + 1].child);
     841            0 :             break;
     842              :         }
     843              :         Assert(index == p->hdr.nused - 1);
     844            0 :         ++levels;
     845              :     }
     846              : 
     847              :     /* Descend left. */
     848            0 :     while (levels > 0)
     849              :     {
     850              :         Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     851            0 :         p = relptr_access(base, p->u.internal_key[0].child);
     852            0 :         --levels;
     853              :     }
     854              :     Assert(p->hdr.magic == btp->hdr.magic);
     855              : 
     856            0 :     return p;
     857              : }
     858              : 
     859              : /*
     860              :  * Get the first key on a btree page.
     861              :  */
     862              : static Size
     863          725 : FreePageBtreeFirstKey(FreePageBtree *btp)
     864              : {
     865              :     Assert(btp->hdr.nused > 0);
     866              : 
     867          725 :     if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     868          725 :         return btp->u.leaf_key[0].first_page;
     869              :     else
     870              :     {
     871              :         Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     872            0 :         return btp->u.internal_key[0].first_page;
     873              :     }
     874              : }
     875              : 
     876              : /*
     877              :  * Get a page from the btree recycle list for use as a btree page.
     878              :  */
     879              : static FreePageBtree *
     880          117 : FreePageBtreeGetRecycled(FreePageManager *fpm)
     881              : {
     882          117 :     char       *base = fpm_segment_base(fpm);
     883          117 :     FreePageSpanLeader *victim = relptr_access(base, fpm->btree_recycle);
     884              :     FreePageSpanLeader *newhead;
     885              : 
     886              :     Assert(victim != NULL);
     887          117 :     newhead = relptr_access(base, victim->next);
     888          117 :     if (newhead != NULL)
     889            0 :         relptr_copy(newhead->prev, victim->prev);
     890          117 :     relptr_store(base, fpm->btree_recycle, newhead);
     891              :     Assert(fpm_pointer_is_page_aligned(base, victim));
     892          117 :     fpm->btree_recycle_count--;
     893          117 :     return (FreePageBtree *) victim;
     894              : }
     895              : 
     896              : /*
     897              :  * Insert an item into an internal page (there must be room).
     898              :  */
     899              : static void
     900            0 : FreePageBtreeInsertInternal(char *base, FreePageBtree *btp, Size index,
     901              :                             Size first_page, FreePageBtree *child)
     902              : {
     903              :     Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     904              :     Assert(btp->hdr.nused < FPM_ITEMS_PER_INTERNAL_PAGE);
     905              :     Assert(index <= btp->hdr.nused);
     906            0 :     memmove(&btp->u.internal_key[index + 1], &btp->u.internal_key[index],
     907            0 :             sizeof(FreePageBtreeInternalKey) * (btp->hdr.nused - index));
     908            0 :     btp->u.internal_key[index].first_page = first_page;
     909            0 :     relptr_store(base, btp->u.internal_key[index].child, child);
     910            0 :     ++btp->hdr.nused;
     911            0 : }
     912              : 
     913              : /*
     914              :  * Insert an item into a leaf page (there must be room).
     915              :  */
     916              : static void
     917          446 : FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index, Size first_page,
     918              :                         Size npages)
     919              : {
     920              :     Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
     921              :     Assert(btp->hdr.nused < FPM_ITEMS_PER_LEAF_PAGE);
     922              :     Assert(index <= btp->hdr.nused);
     923          446 :     memmove(&btp->u.leaf_key[index + 1], &btp->u.leaf_key[index],
     924          446 :             sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
     925          446 :     btp->u.leaf_key[index].first_page = first_page;
     926          446 :     btp->u.leaf_key[index].npages = npages;
     927          446 :     ++btp->hdr.nused;
     928          446 : }
     929              : 
     930              : /*
     931              :  * Put a page on the btree recycle list.
     932              :  */
     933              : static void
     934          117 : FreePageBtreeRecycle(FreePageManager *fpm, Size pageno)
     935              : {
     936          117 :     char       *base = fpm_segment_base(fpm);
     937          117 :     FreePageSpanLeader *head = relptr_access(base, fpm->btree_recycle);
     938              :     FreePageSpanLeader *span;
     939              : 
     940          117 :     span = (FreePageSpanLeader *) fpm_page_to_pointer(base, pageno);
     941          117 :     span->magic = FREE_PAGE_SPAN_LEADER_MAGIC;
     942          117 :     span->npages = 1;
     943          117 :     relptr_store(base, span->next, head);
     944          117 :     relptr_store(base, span->prev, (FreePageSpanLeader *) NULL);
     945          117 :     if (head != NULL)
     946            0 :         relptr_store(base, head->prev, span);
     947          117 :     relptr_store(base, fpm->btree_recycle, span);
     948          117 :     fpm->btree_recycle_count++;
     949          117 : }
     950              : 
     951              : /*
     952              :  * Remove an item from the btree at the given position on the given page.
     953              :  */
     954              : static void
     955          359 : FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp, Size index)
     956              : {
     957              :     Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
     958              :     Assert(index < btp->hdr.nused);
     959              : 
     960              :     /* When last item is removed, extirpate entire page from btree. */
     961          359 :     if (btp->hdr.nused == 1)
     962              :     {
     963            0 :         FreePageBtreeRemovePage(fpm, btp);
     964            0 :         return;
     965              :     }
     966              : 
     967              :     /* Physically remove the key from the page. */
     968          359 :     --btp->hdr.nused;
     969          359 :     if (index < btp->hdr.nused)
     970          359 :         memmove(&btp->u.leaf_key[index], &btp->u.leaf_key[index + 1],
     971          359 :                 sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
     972              : 
     973              :     /* If we just removed the first key, adjust ancestor keys. */
     974          359 :     if (index == 0)
     975          117 :         FreePageBtreeAdjustAncestorKeys(fpm, btp);
     976              : 
     977              :     /* Consider whether to consolidate this page with a sibling. */
     978          359 :     FreePageBtreeConsolidate(fpm, btp);
     979              : }
     980              : 
     981              : /*
     982              :  * Remove a page from the btree.  Caller is responsible for having relocated
     983              :  * any keys from this page that are still wanted.  The page is placed on the
     984              :  * recycled list.
     985              :  */
     986              : static void
     987            0 : FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp)
     988              : {
     989            0 :     char       *base = fpm_segment_base(fpm);
     990              :     FreePageBtree *parent;
     991              :     Size        index;
     992              :     Size        first_page;
     993              : 
     994              :     for (;;)
     995              :     {
     996              :         /* Find parent page. */
     997            0 :         parent = relptr_access(base, btp->hdr.parent);
     998            0 :         if (parent == NULL)
     999              :         {
    1000              :             /* We are removing the root page. */
    1001            0 :             relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
    1002            0 :             fpm->btree_depth = 0;
    1003              :             Assert(fpm->singleton_first_page == 0);
    1004              :             Assert(fpm->singleton_npages == 0);
    1005            0 :             return;
    1006              :         }
    1007              : 
    1008              :         /*
    1009              :          * If the parent contains only one item, we need to remove it as well.
    1010              :          */
    1011            0 :         if (parent->hdr.nused > 1)
    1012            0 :             break;
    1013            0 :         FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, btp));
    1014            0 :         btp = parent;
    1015              :     }
    1016              : 
    1017              :     /* Find and remove the downlink. */
    1018            0 :     first_page = FreePageBtreeFirstKey(btp);
    1019            0 :     if (parent->hdr.magic == FREE_PAGE_LEAF_MAGIC)
    1020              :     {
    1021            0 :         index = FreePageBtreeSearchLeaf(parent, first_page);
    1022              :         Assert(index < parent->hdr.nused);
    1023            0 :         if (index < parent->hdr.nused - 1)
    1024            0 :             memmove(&parent->u.leaf_key[index],
    1025            0 :                     &parent->u.leaf_key[index + 1],
    1026              :                     sizeof(FreePageBtreeLeafKey)
    1027            0 :                     * (parent->hdr.nused - index - 1));
    1028              :     }
    1029              :     else
    1030              :     {
    1031            0 :         index = FreePageBtreeSearchInternal(parent, first_page);
    1032              :         Assert(index < parent->hdr.nused);
    1033            0 :         if (index < parent->hdr.nused - 1)
    1034            0 :             memmove(&parent->u.internal_key[index],
    1035            0 :                     &parent->u.internal_key[index + 1],
    1036              :                     sizeof(FreePageBtreeInternalKey)
    1037            0 :                     * (parent->hdr.nused - index - 1));
    1038              :     }
    1039            0 :     parent->hdr.nused--;
    1040              :     Assert(parent->hdr.nused > 0);
    1041              : 
    1042              :     /* Recycle the page. */
    1043            0 :     FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, btp));
    1044              : 
    1045              :     /* Adjust ancestor keys if needed. */
    1046            0 :     if (index == 0)
    1047            0 :         FreePageBtreeAdjustAncestorKeys(fpm, parent);
    1048              : 
    1049              :     /* Consider whether to consolidate the parent with a sibling. */
    1050            0 :     FreePageBtreeConsolidate(fpm, parent);
    1051              : }
    1052              : 
    1053              : /*
    1054              :  * Search the btree for an entry for the given first page and initialize
    1055              :  * *result with the results of the search.  result->page and result->index
    1056              :  * indicate either the position of an exact match or the position at which
    1057              :  * the new key should be inserted.  result->found is true for an exact match,
    1058              :  * otherwise false.  result->split_pages will contain the number of additional
    1059              :  * btree pages that will be needed when performing a split to insert a key.
    1060              :  * Except as described above, the contents of fields in the result object are
    1061              :  * undefined on return.
    1062              :  */
    1063              : static void
    1064         2198 : FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
    1065              :                     FreePageBtreeSearchResult *result)
    1066              : {
    1067         2198 :     char       *base = fpm_segment_base(fpm);
    1068         2198 :     FreePageBtree *btp = relptr_access(base, fpm->btree_root);
    1069              :     Size        index;
    1070              : 
    1071         2198 :     result->split_pages = 1;
    1072              : 
    1073              :     /* If the btree is empty, there's nothing to find. */
    1074         2198 :     if (btp == NULL)
    1075              :     {
    1076            0 :         result->page = NULL;
    1077            0 :         result->found = false;
    1078            0 :         return;
    1079              :     }
    1080              : 
    1081              :     /* Descend until we hit a leaf. */
    1082         2198 :     while (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
    1083              :     {
    1084              :         FreePageBtree *child;
    1085              :         bool        found_exact;
    1086              : 
    1087            0 :         index = FreePageBtreeSearchInternal(btp, first_page);
    1088            0 :         found_exact = index < btp->hdr.nused &&
    1089            0 :             btp->u.internal_key[index].first_page == first_page;
    1090              : 
    1091              :         /*
    1092              :          * If we found an exact match we descend directly.  Otherwise, we
    1093              :          * descend into the child to the left if possible so that we can find
    1094              :          * the insertion point at that child's high end.
    1095              :          */
    1096            0 :         if (!found_exact && index > 0)
    1097            0 :             --index;
    1098              : 
    1099              :         /* Track required split depth for leaf insert. */
    1100            0 :         if (btp->hdr.nused >= FPM_ITEMS_PER_INTERNAL_PAGE)
    1101              :         {
    1102              :             Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE);
    1103            0 :             result->split_pages++;
    1104              :         }
    1105              :         else
    1106            0 :             result->split_pages = 0;
    1107              : 
    1108              :         /* Descend to appropriate child page. */
    1109              :         Assert(index < btp->hdr.nused);
    1110            0 :         child = relptr_access(base, btp->u.internal_key[index].child);
    1111              :         Assert(relptr_access(base, child->hdr.parent) == btp);
    1112            0 :         btp = child;
    1113              :     }
    1114              : 
    1115              :     /* Track required split depth for leaf insert. */
    1116         2198 :     if (btp->hdr.nused >= FPM_ITEMS_PER_LEAF_PAGE)
    1117              :     {
    1118              :         Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE);
    1119            0 :         result->split_pages++;
    1120              :     }
    1121              :     else
    1122         2198 :         result->split_pages = 0;
    1123              : 
    1124              :     /* Search leaf page. */
    1125         2198 :     index = FreePageBtreeSearchLeaf(btp, first_page);
    1126              : 
    1127              :     /* Assemble results. */
    1128         2198 :     result->page = btp;
    1129         2198 :     result->index = index;
    1130         4389 :     result->found = index < btp->hdr.nused &&
    1131         4389 :         first_page == btp->u.leaf_key[index].first_page;
    1132              : }
    1133              : 
    1134              : /*
    1135              :  * Search an internal page for the first key greater than or equal to a given
    1136              :  * page number.  Returns the index of that key, or one greater than the number
    1137              :  * of keys on the page if none.
    1138              :  */
    1139              : static Size
    1140            0 : FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page)
    1141              : {
    1142            0 :     Size        low = 0;
    1143            0 :     Size        high = btp->hdr.nused;
    1144              : 
    1145              :     Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
    1146              :     Assert(high > 0 && high <= FPM_ITEMS_PER_INTERNAL_PAGE);
    1147              : 
    1148            0 :     while (low < high)
    1149              :     {
    1150            0 :         Size        mid = (low + high) / 2;
    1151            0 :         Size        val = btp->u.internal_key[mid].first_page;
    1152              : 
    1153            0 :         if (first_page == val)
    1154            0 :             return mid;
    1155            0 :         else if (first_page < val)
    1156            0 :             high = mid;
    1157              :         else
    1158            0 :             low = mid + 1;
    1159              :     }
    1160              : 
    1161            0 :     return low;
    1162              : }
    1163              : 
    1164              : /*
    1165              :  * Search a leaf page for the first key greater than or equal to a given
    1166              :  * page number.  Returns the index of that key, or one greater than the number
    1167              :  * of keys on the page if none.
    1168              :  */
    1169              : static Size
    1170         2198 : FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page)
    1171              : {
    1172         2198 :     Size        low = 0;
    1173         2198 :     Size        high = btp->hdr.nused;
    1174              : 
    1175              :     Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
    1176              :     Assert(high > 0 && high <= FPM_ITEMS_PER_LEAF_PAGE);
    1177              : 
    1178         5242 :     while (low < high)
    1179              :     {
    1180         3984 :         Size        mid = (low + high) / 2;
    1181         3984 :         Size        val = btp->u.leaf_key[mid].first_page;
    1182              : 
    1183         3984 :         if (first_page == val)
    1184          940 :             return mid;
    1185         3044 :         else if (first_page < val)
    1186         2264 :             high = mid;
    1187              :         else
    1188          780 :             low = mid + 1;
    1189              :     }
    1190              : 
    1191         1258 :     return low;
    1192              : }
    1193              : 
    1194              : /*
    1195              :  * Allocate a new btree page and move half the keys from the provided page
    1196              :  * to the new page.  Caller is responsible for making sure that there's a
    1197              :  * page available from fpm->btree_recycle.  Returns a pointer to the new page,
    1198              :  * to which caller must add a downlink.
    1199              :  */
    1200              : static FreePageBtree *
    1201            0 : FreePageBtreeSplitPage(FreePageManager *fpm, FreePageBtree *btp)
    1202              : {
    1203              :     FreePageBtree *newsibling;
    1204              : 
    1205            0 :     newsibling = FreePageBtreeGetRecycled(fpm);
    1206            0 :     newsibling->hdr.magic = btp->hdr.magic;
    1207            0 :     newsibling->hdr.nused = btp->hdr.nused / 2;
    1208            0 :     relptr_copy(newsibling->hdr.parent, btp->hdr.parent);
    1209            0 :     btp->hdr.nused -= newsibling->hdr.nused;
    1210              : 
    1211            0 :     if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
    1212            0 :         memcpy(&newsibling->u.leaf_key,
    1213            0 :                &btp->u.leaf_key[btp->hdr.nused],
    1214            0 :                sizeof(FreePageBtreeLeafKey) * newsibling->hdr.nused);
    1215              :     else
    1216              :     {
    1217              :         Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
    1218            0 :         memcpy(&newsibling->u.internal_key,
    1219            0 :                &btp->u.internal_key[btp->hdr.nused],
    1220            0 :                sizeof(FreePageBtreeInternalKey) * newsibling->hdr.nused);
    1221            0 :         FreePageBtreeUpdateParentPointers(fpm_segment_base(fpm), newsibling);
    1222              :     }
    1223              : 
    1224            0 :     return newsibling;
    1225              : }
    1226              : 
    1227              : /*
    1228              :  * When internal pages are split or merged, the parent pointers of their
    1229              :  * children must be updated.
    1230              :  */
    1231              : static void
    1232            0 : FreePageBtreeUpdateParentPointers(char *base, FreePageBtree *btp)
    1233              : {
    1234              :     Size        i;
    1235              : 
    1236              :     Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
    1237            0 :     for (i = 0; i < btp->hdr.nused; ++i)
    1238              :     {
    1239              :         FreePageBtree *child;
    1240              : 
    1241            0 :         child = relptr_access(base, btp->u.internal_key[i].child);
    1242            0 :         relptr_store(base, child->hdr.parent, btp);
    1243              :     }
    1244            0 : }
    1245              : 
    1246              : /*
    1247              :  * Debugging dump of btree data.
    1248              :  */
    1249              : static void
    1250            0 : FreePageManagerDumpBtree(FreePageManager *fpm, FreePageBtree *btp,
    1251              :                          FreePageBtree *parent, int level, StringInfo buf)
    1252              : {
    1253            0 :     char       *base = fpm_segment_base(fpm);
    1254            0 :     Size        pageno = fpm_pointer_to_page(base, btp);
    1255              :     Size        index;
    1256              :     FreePageBtree *check_parent;
    1257              : 
    1258            0 :     check_stack_depth();
    1259            0 :     check_parent = relptr_access(base, btp->hdr.parent);
    1260            0 :     appendStringInfo(buf, "  %zu@%d %c", pageno, level,
    1261            0 :                      btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC ? 'i' : 'l');
    1262            0 :     if (parent != check_parent)
    1263            0 :         appendStringInfo(buf, " [actual parent %zu, expected %zu]",
    1264            0 :                          fpm_pointer_to_page(base, check_parent),
    1265            0 :                          fpm_pointer_to_page(base, parent));
    1266            0 :     appendStringInfoChar(buf, ':');
    1267            0 :     for (index = 0; index < btp->hdr.nused; ++index)
    1268              :     {
    1269            0 :         if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
    1270            0 :             appendStringInfo(buf, " %zu->%zu",
    1271              :                              btp->u.internal_key[index].first_page,
    1272            0 :                              relptr_offset(btp->u.internal_key[index].child) / FPM_PAGE_SIZE);
    1273              :         else
    1274            0 :             appendStringInfo(buf, " %zu(%zu)",
    1275              :                              btp->u.leaf_key[index].first_page,
    1276              :                              btp->u.leaf_key[index].npages);
    1277              :     }
    1278            0 :     appendStringInfoChar(buf, '\n');
    1279              : 
    1280            0 :     if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
    1281              :     {
    1282            0 :         for (index = 0; index < btp->hdr.nused; ++index)
    1283              :         {
    1284              :             FreePageBtree *child;
    1285              : 
    1286            0 :             child = relptr_access(base, btp->u.internal_key[index].child);
    1287            0 :             FreePageManagerDumpBtree(fpm, child, btp, level + 1, buf);
    1288              :         }
    1289              :     }
    1290            0 : }
    1291              : 
    1292              : /*
    1293              :  * Debugging dump of free-span data.
    1294              :  */
    1295              : static void
    1296            0 : FreePageManagerDumpSpans(FreePageManager *fpm, FreePageSpanLeader *span,
    1297              :                          Size expected_pages, StringInfo buf)
    1298              : {
    1299            0 :     char       *base = fpm_segment_base(fpm);
    1300              : 
    1301            0 :     while (span != NULL)
    1302              :     {
    1303            0 :         if (span->npages != expected_pages)
    1304            0 :             appendStringInfo(buf, " %zu(%zu)", fpm_pointer_to_page(base, span),
    1305              :                              span->npages);
    1306              :         else
    1307            0 :             appendStringInfo(buf, " %zu", fpm_pointer_to_page(base, span));
    1308            0 :         span = relptr_access(base, span->next);
    1309              :     }
    1310              : 
    1311            0 :     appendStringInfoChar(buf, '\n');
    1312            0 : }
    1313              : 
    1314              : /*
    1315              :  * This function allocates a run of pages of the given length from the free
    1316              :  * page manager.
    1317              :  */
    1318              : static bool
    1319        14241 : FreePageManagerGetInternal(FreePageManager *fpm, Size npages, Size *first_page)
    1320              : {
    1321        14241 :     char       *base = fpm_segment_base(fpm);
    1322        14241 :     FreePageSpanLeader *victim = NULL;
    1323              :     FreePageSpanLeader *prev;
    1324              :     FreePageSpanLeader *next;
    1325              :     FreePageBtreeSearchResult result;
    1326        14241 :     Size        victim_page = 0;    /* placate compiler */
    1327              :     Size        f;
    1328              : 
    1329              :     /*
    1330              :      * Search for a free span.
    1331              :      *
    1332              :      * Right now, we use a simple best-fit policy here, but it's possible for
    1333              :      * this to result in memory fragmentation if we're repeatedly asked to
    1334              :      * allocate chunks just a little smaller than what we have available.
    1335              :      * Hopefully, this is unlikely, because we expect most requests to be
    1336              :      * single pages or superblock-sized chunks -- but no policy can be optimal
    1337              :      * under all circumstances unless it has knowledge of future allocation
    1338              :      * patterns.
    1339              :      */
    1340      1103648 :     for (f = Min(npages, FPM_NUM_FREELISTS) - 1; f < FPM_NUM_FREELISTS; ++f)
    1341              :     {
    1342              :         /* Skip empty freelists. */
    1343      1103648 :         if (relptr_is_null(fpm->freelist[f]))
    1344      1089407 :             continue;
    1345              : 
    1346              :         /*
    1347              :          * All of the freelists except the last one contain only items of a
    1348              :          * single size, so we just take the first one.  But the final free
    1349              :          * list contains everything too big for any of the other lists, so we
    1350              :          * need to search the list.
    1351              :          */
    1352        14241 :         if (f < FPM_NUM_FREELISTS - 1)
    1353         6734 :             victim = relptr_access(base, fpm->freelist[f]);
    1354              :         else
    1355              :         {
    1356              :             FreePageSpanLeader *candidate;
    1357              : 
    1358         7507 :             candidate = relptr_access(base, fpm->freelist[f]);
    1359              :             do
    1360              :             {
    1361         7507 :                 if (candidate->npages >= npages && (victim == NULL ||
    1362            0 :                                                     victim->npages > candidate->npages))
    1363              :                 {
    1364         7507 :                     victim = candidate;
    1365         7507 :                     if (victim->npages == npages)
    1366            0 :                         break;
    1367              :                 }
    1368         7507 :                 candidate = relptr_access(base, candidate->next);
    1369         7507 :             } while (candidate != NULL);
    1370              :         }
    1371        14241 :         break;
    1372              :     }
    1373              : 
    1374              :     /* If we didn't find an allocatable span, return failure. */
    1375        14241 :     if (victim == NULL)
    1376            0 :         return false;
    1377              : 
    1378              :     /* Remove span from free list. */
    1379              :     Assert(victim->magic == FREE_PAGE_SPAN_LEADER_MAGIC);
    1380        14241 :     prev = relptr_access(base, victim->prev);
    1381        14241 :     next = relptr_access(base, victim->next);
    1382        14241 :     if (prev != NULL)
    1383            0 :         relptr_copy(prev->next, victim->next);
    1384              :     else
    1385        14241 :         relptr_copy(fpm->freelist[f], victim->next);
    1386        14241 :     if (next != NULL)
    1387           57 :         relptr_copy(next->prev, victim->prev);
    1388        14241 :     victim_page = fpm_pointer_to_page(base, victim);
    1389              : 
    1390              :     /* Decide whether we might be invalidating contiguous_pages. */
    1391        14241 :     if (f == FPM_NUM_FREELISTS - 1 &&
    1392         7507 :         victim->npages == fpm->contiguous_pages)
    1393              :     {
    1394              :         /*
    1395              :          * The victim span came from the oversized freelist, and had the same
    1396              :          * size as the longest span.  There may or may not be another one of
    1397              :          * the same size, so contiguous_pages must be recomputed just to be
    1398              :          * safe.
    1399              :          */
    1400         7507 :         fpm->contiguous_pages_dirty = true;
    1401              :     }
    1402         6734 :     else if (f + 1 == fpm->contiguous_pages &&
    1403         6018 :              relptr_is_null(fpm->freelist[f]))
    1404              :     {
    1405              :         /*
    1406              :          * The victim span came from a fixed sized freelist, and it was the
    1407              :          * list for spans of the same size as the current longest span, and
    1408              :          * the list is now empty after removing the victim.  So
    1409              :          * contiguous_pages must be recomputed without a doubt.
    1410              :          */
    1411         6018 :         fpm->contiguous_pages_dirty = true;
    1412              :     }
    1413              : 
    1414              :     /*
    1415              :      * If we haven't initialized the btree yet, the victim must be the single
    1416              :      * span stored within the FreePageManager itself.  Otherwise, we need to
    1417              :      * update the btree.
    1418              :      */
    1419        14241 :     if (relptr_is_null(fpm->btree_root))
    1420              :     {
    1421              :         Assert(victim_page == fpm->singleton_first_page);
    1422              :         Assert(victim->npages == fpm->singleton_npages);
    1423              :         Assert(victim->npages >= npages);
    1424        13301 :         fpm->singleton_first_page += npages;
    1425        13301 :         fpm->singleton_npages -= npages;
    1426        13301 :         if (fpm->singleton_npages > 0)
    1427        13282 :             FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
    1428              :                                    fpm->singleton_npages);
    1429              :     }
    1430              :     else
    1431              :     {
    1432              :         /*
    1433              :          * If the span we found is exactly the right size, remove it from the
    1434              :          * btree completely.  Otherwise, adjust the btree entry to reflect the
    1435              :          * still-unallocated portion of the span, and put that portion on the
    1436              :          * appropriate free list.
    1437              :          */
    1438          940 :         FreePageBtreeSearch(fpm, victim_page, &result);
    1439              :         Assert(result.found);
    1440          940 :         if (victim->npages == npages)
    1441          254 :             FreePageBtreeRemove(fpm, result.page, result.index);
    1442              :         else
    1443              :         {
    1444              :             FreePageBtreeLeafKey *key;
    1445              : 
    1446              :             /* Adjust btree to reflect remaining pages. */
    1447              :             Assert(victim->npages > npages);
    1448          686 :             key = &result.page->u.leaf_key[result.index];
    1449              :             Assert(key->npages == victim->npages);
    1450          686 :             key->first_page += npages;
    1451          686 :             key->npages -= npages;
    1452          686 :             if (result.index == 0)
    1453          284 :                 FreePageBtreeAdjustAncestorKeys(fpm, result.page);
    1454              : 
    1455              :             /* Put the unallocated pages back on the appropriate free list. */
    1456          686 :             FreePagePushSpanLeader(fpm, victim_page + npages,
    1457          686 :                                    victim->npages - npages);
    1458              :         }
    1459              :     }
    1460              : 
    1461              :     /* Return results to caller. */
    1462        14241 :     *first_page = fpm_pointer_to_page(base, victim);
    1463        14241 :     return true;
    1464              : }
    1465              : 
    1466              : /*
    1467              :  * Put a range of pages into the btree and freelists, consolidating it with
    1468              :  * existing free spans just before and/or after it.  If 'soft' is true,
    1469              :  * only perform the insertion if it can be done without allocating new btree
    1470              :  * pages; if false, do it always.  Returns 0 if the soft flag caused the
    1471              :  * insertion to be skipped, or otherwise the size of the contiguous span
    1472              :  * created by the insertion.  This may be larger than npages if we're able
    1473              :  * to consolidate with an adjacent range.
    1474              :  */
    1475              : static Size
    1476         4748 : FreePageManagerPutInternal(FreePageManager *fpm, Size first_page, Size npages,
    1477              :                            bool soft)
    1478              : {
    1479         4748 :     char       *base = fpm_segment_base(fpm);
    1480              :     FreePageBtreeSearchResult result;
    1481         4748 :     FreePageBtreeLeafKey *prevkey = NULL;
    1482         4748 :     FreePageBtreeLeafKey *nextkey = NULL;
    1483              :     FreePageBtree *np;
    1484              :     Size        nindex;
    1485              : 
    1486              :     Assert(npages > 0);
    1487              : 
    1488              :     /* We can store a single free span without initializing the btree. */
    1489         4748 :     if (fpm->btree_depth == 0)
    1490              :     {
    1491         3644 :         if (fpm->singleton_npages == 0)
    1492              :         {
    1493              :             /* Don't have a span yet; store this one. */
    1494         2458 :             fpm->singleton_first_page = first_page;
    1495         2458 :             fpm->singleton_npages = npages;
    1496         2458 :             FreePagePushSpanLeader(fpm, first_page, npages);
    1497         2458 :             return fpm->singleton_npages;
    1498              :         }
    1499         1186 :         else if (fpm->singleton_first_page + fpm->singleton_npages ==
    1500              :                  first_page)
    1501              :         {
    1502              :             /* New span immediately follows sole existing span. */
    1503            7 :             fpm->singleton_npages += npages;
    1504            7 :             FreePagePopSpanLeader(fpm, fpm->singleton_first_page);
    1505            7 :             FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
    1506              :                                    fpm->singleton_npages);
    1507            7 :             return fpm->singleton_npages;
    1508              :         }
    1509         1179 :         else if (first_page + npages == fpm->singleton_first_page)
    1510              :         {
    1511              :             /* New span immediately precedes sole existing span. */
    1512          979 :             FreePagePopSpanLeader(fpm, fpm->singleton_first_page);
    1513          979 :             fpm->singleton_first_page = first_page;
    1514          979 :             fpm->singleton_npages += npages;
    1515          979 :             FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
    1516              :                                    fpm->singleton_npages);
    1517          979 :             return fpm->singleton_npages;
    1518              :         }
    1519              :         else
    1520              :         {
    1521              :             /* Not contiguous; we need to initialize the btree. */
    1522              :             Size        root_page;
    1523              :             FreePageBtree *root;
    1524              : 
    1525          200 :             if (!relptr_is_null(fpm->btree_recycle))
    1526           27 :                 root = FreePageBtreeGetRecycled(fpm);
    1527          173 :             else if (soft)
    1528           46 :                 return 0;       /* Should not allocate if soft. */
    1529          146 :             else if (FreePageManagerGetInternal(fpm, 1, &root_page))
    1530          146 :                 root = (FreePageBtree *) fpm_page_to_pointer(base, root_page);
    1531              :             else
    1532              :             {
    1533              :                 /* We'd better be able to get a page from the existing range. */
    1534            0 :                 elog(FATAL, "free page manager btree is corrupt");
    1535              :             }
    1536              : 
    1537              :             /* Create the btree and move the preexisting range into it. */
    1538          173 :             root->hdr.magic = FREE_PAGE_LEAF_MAGIC;
    1539          173 :             root->hdr.nused = 1;
    1540          173 :             relptr_store(base, root->hdr.parent, (FreePageBtree *) NULL);
    1541          173 :             root->u.leaf_key[0].first_page = fpm->singleton_first_page;
    1542          173 :             root->u.leaf_key[0].npages = fpm->singleton_npages;
    1543          173 :             relptr_store(base, fpm->btree_root, root);
    1544          173 :             fpm->singleton_first_page = 0;
    1545          173 :             fpm->singleton_npages = 0;
    1546          173 :             fpm->btree_depth = 1;
    1547              : 
    1548              :             /*
    1549              :              * Corner case: it may be that the btree root took the very last
    1550              :              * free page.  In that case, the sole btree entry covers a zero
    1551              :              * page run, which is invalid.  Overwrite it with the entry we're
    1552              :              * trying to insert and get out.
    1553              :              */
    1554          173 :             if (root->u.leaf_key[0].npages == 0)
    1555              :             {
    1556           19 :                 root->u.leaf_key[0].first_page = first_page;
    1557           19 :                 root->u.leaf_key[0].npages = npages;
    1558           19 :                 FreePagePushSpanLeader(fpm, first_page, npages);
    1559           19 :                 return npages;
    1560              :             }
    1561              : 
    1562              :             /* Fall through to insert the new key. */
    1563              :         }
    1564              :     }
    1565              : 
    1566              :     /* Search the btree. */
    1567         1258 :     FreePageBtreeSearch(fpm, first_page, &result);
    1568              :     Assert(!result.found);
    1569         1258 :     if (result.index > 0)
    1570          736 :         prevkey = &result.page->u.leaf_key[result.index - 1];
    1571         1258 :     if (result.index < result.page->hdr.nused)
    1572              :     {
    1573         1251 :         np = result.page;
    1574         1251 :         nindex = result.index;
    1575         1251 :         nextkey = &result.page->u.leaf_key[result.index];
    1576              :     }
    1577              :     else
    1578              :     {
    1579            7 :         np = FreePageBtreeFindRightSibling(base, result.page);
    1580            7 :         nindex = 0;
    1581            7 :         if (np != NULL)
    1582            0 :             nextkey = &np->u.leaf_key[0];
    1583              :     }
    1584              : 
    1585              :     /* Consolidate with the previous entry if possible. */
    1586         1258 :     if (prevkey != NULL && prevkey->first_page + prevkey->npages >= first_page)
    1587              :     {
    1588          211 :         bool        remove_next = false;
    1589              :         Size        result;
    1590              : 
    1591              :         Assert(prevkey->first_page + prevkey->npages == first_page);
    1592          211 :         prevkey->npages = (first_page - prevkey->first_page) + npages;
    1593              : 
    1594              :         /* Check whether we can *also* consolidate with the following entry. */
    1595          211 :         if (nextkey != NULL &&
    1596          204 :             prevkey->first_page + prevkey->npages >= nextkey->first_page)
    1597              :         {
    1598              :             Assert(prevkey->first_page + prevkey->npages ==
    1599              :                    nextkey->first_page);
    1600          105 :             prevkey->npages = (nextkey->first_page - prevkey->first_page)
    1601          105 :                 + nextkey->npages;
    1602          105 :             FreePagePopSpanLeader(fpm, nextkey->first_page);
    1603          105 :             remove_next = true;
    1604              :         }
    1605              : 
    1606              :         /* Put the span on the correct freelist and save size. */
    1607          211 :         FreePagePopSpanLeader(fpm, prevkey->first_page);
    1608          211 :         FreePagePushSpanLeader(fpm, prevkey->first_page, prevkey->npages);
    1609          211 :         result = prevkey->npages;
    1610              : 
    1611              :         /*
    1612              :          * If we consolidated with both the preceding and following entries,
    1613              :          * we must remove the following entry.  We do this last, because
    1614              :          * removing an element from the btree may invalidate pointers we hold
    1615              :          * into the current data structure.
    1616              :          *
    1617              :          * NB: The btree is technically in an invalid state a this point
    1618              :          * because we've already updated prevkey to cover the same key space
    1619              :          * as nextkey.  FreePageBtreeRemove() shouldn't notice that, though.
    1620              :          */
    1621          211 :         if (remove_next)
    1622          105 :             FreePageBtreeRemove(fpm, np, nindex);
    1623              : 
    1624          211 :         return result;
    1625              :     }
    1626              : 
    1627              :     /* Consolidate with the next entry if possible. */
    1628         1047 :     if (nextkey != NULL && first_page + npages >= nextkey->first_page)
    1629              :     {
    1630              :         Size        newpages;
    1631              : 
    1632              :         /* Compute new size for span. */
    1633              :         Assert(first_page + npages == nextkey->first_page);
    1634          601 :         newpages = (nextkey->first_page - first_page) + nextkey->npages;
    1635              : 
    1636              :         /* Put span on correct free list. */
    1637          601 :         FreePagePopSpanLeader(fpm, nextkey->first_page);
    1638          601 :         FreePagePushSpanLeader(fpm, first_page, newpages);
    1639              : 
    1640              :         /* Update key in place. */
    1641          601 :         nextkey->first_page = first_page;
    1642          601 :         nextkey->npages = newpages;
    1643              : 
    1644              :         /* If reducing first key on page, ancestors might need adjustment. */
    1645          601 :         if (nindex == 0)
    1646          295 :             FreePageBtreeAdjustAncestorKeys(fpm, np);
    1647              : 
    1648          601 :         return nextkey->npages;
    1649              :     }
    1650              : 
    1651              :     /* Split leaf page and as many of its ancestors as necessary. */
    1652          446 :     if (result.split_pages > 0)
    1653              :     {
    1654              :         /*
    1655              :          * NB: We could consider various coping strategies here to avoid a
    1656              :          * split; most obviously, if np != result.page, we could target that
    1657              :          * page instead.   More complicated shuffling strategies could be
    1658              :          * possible as well; basically, unless every single leaf page is 100%
    1659              :          * full, we can jam this key in there if we try hard enough.  It's
    1660              :          * unlikely that trying that hard is worthwhile, but it's possible we
    1661              :          * might need to make more than no effort.  For now, we just do the
    1662              :          * easy thing, which is nothing.
    1663              :          */
    1664              : 
    1665              :         /* If this is a soft insert, it's time to give up. */
    1666            0 :         if (soft)
    1667            0 :             return 0;
    1668              : 
    1669              :         /* Check whether we need to allocate more btree pages to split. */
    1670            0 :         if (result.split_pages > fpm->btree_recycle_count)
    1671              :         {
    1672              :             Size        pages_needed;
    1673              :             Size        recycle_page;
    1674              :             Size        i;
    1675              : 
    1676              :             /*
    1677              :              * Allocate the required number of pages and split each one in
    1678              :              * turn.  This should never fail, because if we've got enough
    1679              :              * spans of free pages kicking around that we need additional
    1680              :              * storage space just to remember them all, then we should
    1681              :              * certainly have enough to expand the btree, which should only
    1682              :              * ever use a tiny number of pages compared to the number under
    1683              :              * management.  If it does, something's badly screwed up.
    1684              :              */
    1685            0 :             pages_needed = result.split_pages - fpm->btree_recycle_count;
    1686            0 :             for (i = 0; i < pages_needed; ++i)
    1687              :             {
    1688            0 :                 if (!FreePageManagerGetInternal(fpm, 1, &recycle_page))
    1689            0 :                     elog(FATAL, "free page manager btree is corrupt");
    1690            0 :                 FreePageBtreeRecycle(fpm, recycle_page);
    1691              :             }
    1692              : 
    1693              :             /*
    1694              :              * The act of allocating pages to recycle may have invalidated the
    1695              :              * results of our previous btree research, so repeat it. (We could
    1696              :              * recheck whether any of our split-avoidance strategies that were
    1697              :              * not viable before now are, but it hardly seems worthwhile, so
    1698              :              * we don't bother. Consolidation can't be possible now if it
    1699              :              * wasn't previously.)
    1700              :              */
    1701            0 :             FreePageBtreeSearch(fpm, first_page, &result);
    1702              : 
    1703              :             /*
    1704              :              * The act of allocating pages for use in constructing our btree
    1705              :              * should never cause any page to become more full, so the new
    1706              :              * split depth should be no greater than the old one, and perhaps
    1707              :              * less if we fortuitously allocated a chunk that freed up a slot
    1708              :              * on the page we need to update.
    1709              :              */
    1710              :             Assert(result.split_pages <= fpm->btree_recycle_count);
    1711              :         }
    1712              : 
    1713              :         /* If we still need to perform a split, do it. */
    1714            0 :         if (result.split_pages > 0)
    1715              :         {
    1716            0 :             FreePageBtree *split_target = result.page;
    1717            0 :             FreePageBtree *child = NULL;
    1718            0 :             Size        key = first_page;
    1719              : 
    1720              :             for (;;)
    1721            0 :             {
    1722              :                 FreePageBtree *newsibling;
    1723              :                 FreePageBtree *parent;
    1724              : 
    1725              :                 /* Identify parent page, which must receive downlink. */
    1726            0 :                 parent = relptr_access(base, split_target->hdr.parent);
    1727              : 
    1728              :                 /* Split the page - downlink not added yet. */
    1729            0 :                 newsibling = FreePageBtreeSplitPage(fpm, split_target);
    1730              : 
    1731              :                 /*
    1732              :                  * At this point in the loop, we're always carrying a pending
    1733              :                  * insertion.  On the first pass, it's the actual key we're
    1734              :                  * trying to insert; on subsequent passes, it's the downlink
    1735              :                  * that needs to be added as a result of the split performed
    1736              :                  * during the previous loop iteration.  Since we've just split
    1737              :                  * the page, there's definitely room on one of the two
    1738              :                  * resulting pages.
    1739              :                  */
    1740            0 :                 if (child == NULL)
    1741              :                 {
    1742              :                     Size        index;
    1743              :                     FreePageBtree *insert_into;
    1744              : 
    1745            0 :                     insert_into = key < newsibling->u.leaf_key[0].first_page ?
    1746            0 :                         split_target : newsibling;
    1747            0 :                     index = FreePageBtreeSearchLeaf(insert_into, key);
    1748            0 :                     FreePageBtreeInsertLeaf(insert_into, index, key, npages);
    1749            0 :                     if (index == 0 && insert_into == split_target)
    1750            0 :                         FreePageBtreeAdjustAncestorKeys(fpm, split_target);
    1751              :                 }
    1752              :                 else
    1753              :                 {
    1754              :                     Size        index;
    1755              :                     FreePageBtree *insert_into;
    1756              : 
    1757            0 :                     insert_into =
    1758            0 :                         key < newsibling->u.internal_key[0].first_page ?
    1759            0 :                         split_target : newsibling;
    1760            0 :                     index = FreePageBtreeSearchInternal(insert_into, key);
    1761            0 :                     FreePageBtreeInsertInternal(base, insert_into, index,
    1762              :                                                 key, child);
    1763            0 :                     relptr_store(base, child->hdr.parent, insert_into);
    1764            0 :                     if (index == 0 && insert_into == split_target)
    1765            0 :                         FreePageBtreeAdjustAncestorKeys(fpm, split_target);
    1766              :                 }
    1767              : 
    1768              :                 /* If the page we just split has no parent, split the root. */
    1769            0 :                 if (parent == NULL)
    1770              :                 {
    1771              :                     FreePageBtree *newroot;
    1772              : 
    1773            0 :                     newroot = FreePageBtreeGetRecycled(fpm);
    1774            0 :                     newroot->hdr.magic = FREE_PAGE_INTERNAL_MAGIC;
    1775            0 :                     newroot->hdr.nused = 2;
    1776            0 :                     relptr_store(base, newroot->hdr.parent,
    1777              :                                  (FreePageBtree *) NULL);
    1778            0 :                     newroot->u.internal_key[0].first_page =
    1779            0 :                         FreePageBtreeFirstKey(split_target);
    1780            0 :                     relptr_store(base, newroot->u.internal_key[0].child,
    1781              :                                  split_target);
    1782            0 :                     relptr_store(base, split_target->hdr.parent, newroot);
    1783            0 :                     newroot->u.internal_key[1].first_page =
    1784            0 :                         FreePageBtreeFirstKey(newsibling);
    1785            0 :                     relptr_store(base, newroot->u.internal_key[1].child,
    1786              :                                  newsibling);
    1787            0 :                     relptr_store(base, newsibling->hdr.parent, newroot);
    1788            0 :                     relptr_store(base, fpm->btree_root, newroot);
    1789            0 :                     fpm->btree_depth++;
    1790              : 
    1791            0 :                     break;
    1792              :                 }
    1793              : 
    1794              :                 /* If the parent page isn't full, insert the downlink. */
    1795            0 :                 key = newsibling->u.internal_key[0].first_page;
    1796            0 :                 if (parent->hdr.nused < FPM_ITEMS_PER_INTERNAL_PAGE)
    1797              :                 {
    1798              :                     Size        index;
    1799              : 
    1800            0 :                     index = FreePageBtreeSearchInternal(parent, key);
    1801            0 :                     FreePageBtreeInsertInternal(base, parent, index,
    1802              :                                                 key, newsibling);
    1803            0 :                     relptr_store(base, newsibling->hdr.parent, parent);
    1804            0 :                     if (index == 0)
    1805            0 :                         FreePageBtreeAdjustAncestorKeys(fpm, parent);
    1806            0 :                     break;
    1807              :                 }
    1808              : 
    1809              :                 /* The parent also needs to be split, so loop around. */
    1810            0 :                 child = newsibling;
    1811            0 :                 split_target = parent;
    1812              :             }
    1813              : 
    1814              :             /*
    1815              :              * The loop above did the insert, so just need to update the free
    1816              :              * list, and we're done.
    1817              :              */
    1818            0 :             FreePagePushSpanLeader(fpm, first_page, npages);
    1819              : 
    1820            0 :             return npages;
    1821              :         }
    1822              :     }
    1823              : 
    1824              :     /* Physically add the key to the page. */
    1825              :     Assert(result.page->hdr.nused < FPM_ITEMS_PER_LEAF_PAGE);
    1826          446 :     FreePageBtreeInsertLeaf(result.page, result.index, first_page, npages);
    1827              : 
    1828              :     /* If new first key on page, ancestors might need adjustment. */
    1829          446 :     if (result.index == 0)
    1830          227 :         FreePageBtreeAdjustAncestorKeys(fpm, result.page);
    1831              : 
    1832              :     /* Put it on the free list. */
    1833          446 :     FreePagePushSpanLeader(fpm, first_page, npages);
    1834              : 
    1835          446 :     return npages;
    1836              : }
    1837              : 
    1838              : /*
    1839              :  * Remove a FreePageSpanLeader from the linked-list that contains it, either
    1840              :  * because we're changing the size of the span, or because we're allocating it.
    1841              :  */
    1842              : static void
    1843         1987 : FreePagePopSpanLeader(FreePageManager *fpm, Size pageno)
    1844              : {
    1845         1987 :     char       *base = fpm_segment_base(fpm);
    1846              :     FreePageSpanLeader *span;
    1847              :     FreePageSpanLeader *next;
    1848              :     FreePageSpanLeader *prev;
    1849              : 
    1850         1987 :     span = (FreePageSpanLeader *) fpm_page_to_pointer(base, pageno);
    1851              : 
    1852         1987 :     next = relptr_access(base, span->next);
    1853         1987 :     prev = relptr_access(base, span->prev);
    1854         1987 :     if (next != NULL)
    1855           88 :         relptr_copy(next->prev, span->prev);
    1856         1987 :     if (prev != NULL)
    1857           42 :         relptr_copy(prev->next, span->next);
    1858              :     else
    1859              :     {
    1860         1945 :         Size        f = Min(span->npages, FPM_NUM_FREELISTS) - 1;
    1861              : 
    1862              :         Assert(relptr_offset(fpm->freelist[f]) == pageno * FPM_PAGE_SIZE);
    1863         1945 :         relptr_copy(fpm->freelist[f], span->next);
    1864              :     }
    1865         1987 : }
    1866              : 
    1867              : /*
    1868              :  * Initialize a new FreePageSpanLeader and put it on the appropriate free list.
    1869              :  */
    1870              : static void
    1871        18731 : FreePagePushSpanLeader(FreePageManager *fpm, Size first_page, Size npages)
    1872              : {
    1873        18731 :     char       *base = fpm_segment_base(fpm);
    1874        18731 :     Size        f = Min(npages, FPM_NUM_FREELISTS) - 1;
    1875        18731 :     FreePageSpanLeader *head = relptr_access(base, fpm->freelist[f]);
    1876              :     FreePageSpanLeader *span;
    1877              : 
    1878        18731 :     span = (FreePageSpanLeader *) fpm_page_to_pointer(base, first_page);
    1879        18731 :     span->magic = FREE_PAGE_SPAN_LEADER_MAGIC;
    1880        18731 :     span->npages = npages;
    1881        18731 :     relptr_store(base, span->next, head);
    1882        18731 :     relptr_store(base, span->prev, (FreePageSpanLeader *) NULL);
    1883        18731 :     if (head != NULL)
    1884          181 :         relptr_store(base, head->prev, span);
    1885        18731 :     relptr_store(base, fpm->freelist[f], span);
    1886        18731 : }
        

Generated by: LCOV version 2.0-1