LCOV - code coverage report
Current view: top level - src/backend/utils/mmgr - freepage.c (source / functions) Hit Total Coverage
Test: PostgreSQL 12beta2 Lines: 329 605 54.4 %
Date: 2019-06-18 07:06:57 Functions: 21 29 72.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * freepage.c
       4             :  *    Management of free memory pages.
       5             :  *
       6             :  * The intention of this code is to provide infrastructure for memory
       7             :  * allocators written specifically for PostgreSQL.  At least in the case
       8             :  * of dynamic shared memory, we can't simply use malloc() or even
       9             :  * relatively thin wrappers like palloc() which sit on top of it, because
      10             :  * no allocator built into the operating system will deal with relative
      11             :  * pointers.  In the future, we may find other cases in which greater
      12             :  * control over our own memory management seems desirable.
      13             :  *
      14             :  * A FreePageManager keeps track of which 4kB pages of memory are currently
      15             :  * unused from the point of view of some higher-level memory allocator.
      16             :  * Unlike a user-facing allocator such as palloc(), a FreePageManager can
      17             :  * only allocate and free in units of whole pages, and freeing an
      18             :  * allocation can only be done given knowledge of its length in pages.
      19             :  *
      20             :  * Since a free page manager has only a fixed amount of dedicated memory,
      21             :  * and since there is no underlying allocator, it uses the free pages
      22             :  * it is given to manage to store its bookkeeping data.  It keeps multiple
      23             :  * freelists of runs of pages, sorted by the size of the run; the head of
      24             :  * each freelist is stored in the FreePageManager itself, and the first
      25             :  * page of each run contains a relative pointer to the next run. See
      26             :  * FreePageManagerGetInternal for more details on how the freelists are
      27             :  * managed.
      28             :  *
      29             :  * To avoid memory fragmentation, it's important to consolidate adjacent
      30             :  * spans of pages whenever possible; otherwise, large allocation requests
      31             :  * might not be satisfied even when sufficient contiguous space is
      32             :  * available.  Therefore, in addition to the freelists, we maintain an
      33             :  * in-memory btree of free page ranges ordered by page number.  If a
      34             :  * range being freed precedes or follows a range that is already free,
      35             :  * the existing range is extended; if it exactly bridges the gap between
      36             :  * free ranges, then the two existing ranges are consolidated with the
      37             :  * newly-freed range to form one great big range of free pages.
      38             :  *
      39             :  * When there is only one range of free pages, the btree is trivial and
      40             :  * is stored within the FreePageManager proper; otherwise, pages are
      41             :  * allocated from the area under management as needed.  Even in cases
      42             :  * where memory fragmentation is very severe, only a tiny fraction of
      43             :  * the pages under management are consumed by this btree.
      44             :  *
      45             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
      46             :  * Portions Copyright (c) 1994, Regents of the University of California
      47             :  *
      48             :  * IDENTIFICATION
      49             :  *    src/backend/utils/mmgr/freepage.c
      50             :  *
      51             :  *-------------------------------------------------------------------------
      52             :  */
      53             : 
      54             : #include "postgres.h"
      55             : #include "lib/stringinfo.h"
      56             : #include "miscadmin.h"
      57             : 
      58             : #include "utils/freepage.h"
      59             : #include "utils/relptr.h"
      60             : 
      61             : 
      62             : /* Magic numbers to identify various page types */
      63             : #define FREE_PAGE_SPAN_LEADER_MAGIC     0xea4020f0
      64             : #define FREE_PAGE_LEAF_MAGIC            0x98eae728
      65             : #define FREE_PAGE_INTERNAL_MAGIC        0x19aa32c9
      66             : 
      67             : /* Doubly linked list of spans of free pages; stored in first page of span. */
      68             : struct FreePageSpanLeader
      69             : {
      70             :     int         magic;          /* always FREE_PAGE_SPAN_LEADER_MAGIC */
      71             :     Size        npages;         /* number of pages in span */
      72             :     RelptrFreePageSpanLeader prev;
      73             :     RelptrFreePageSpanLeader next;
      74             : };
      75             : 
      76             : /* Common header for btree leaf and internal pages. */
      77             : typedef struct FreePageBtreeHeader
      78             : {
      79             :     int         magic;          /* FREE_PAGE_LEAF_MAGIC or
      80             :                                  * FREE_PAGE_INTERNAL_MAGIC */
      81             :     Size        nused;          /* number of items used */
      82             :     RelptrFreePageBtree parent; /* uplink */
      83             : } FreePageBtreeHeader;
      84             : 
      85             : /* Internal key; points to next level of btree. */
      86             : typedef struct FreePageBtreeInternalKey
      87             : {
      88             :     Size        first_page;     /* low bound for keys on child page */
      89             :     RelptrFreePageBtree child;  /* downlink */
      90             : } FreePageBtreeInternalKey;
      91             : 
      92             : /* Leaf key; no payload data. */
      93             : typedef struct FreePageBtreeLeafKey
      94             : {
      95             :     Size        first_page;     /* first page in span */
      96             :     Size        npages;         /* number of pages in span */
      97             : } FreePageBtreeLeafKey;
      98             : 
      99             : /* Work out how many keys will fit on a page. */
     100             : #define FPM_ITEMS_PER_INTERNAL_PAGE \
     101             :     ((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
     102             :         sizeof(FreePageBtreeInternalKey))
     103             : #define FPM_ITEMS_PER_LEAF_PAGE \
     104             :     ((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
     105             :         sizeof(FreePageBtreeLeafKey))
     106             : 
     107             : /* A btree page of either sort */
     108             : struct FreePageBtree
     109             : {
     110             :     FreePageBtreeHeader hdr;
     111             :     union
     112             :     {
     113             :         FreePageBtreeInternalKey internal_key[FPM_ITEMS_PER_INTERNAL_PAGE];
     114             :         FreePageBtreeLeafKey leaf_key[FPM_ITEMS_PER_LEAF_PAGE];
     115             :     }           u;
     116             : };
     117             : 
     118             : /* Results of a btree search */
     119             : typedef struct FreePageBtreeSearchResult
     120             : {
     121             :     FreePageBtree *page;
     122             :     Size        index;
     123             :     bool        found;
     124             :     unsigned    split_pages;
     125             : } FreePageBtreeSearchResult;
     126             : 
     127             : /* Helper functions */
     128             : static void FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm,
     129             :                                             FreePageBtree *btp);
     130             : static Size FreePageBtreeCleanup(FreePageManager *fpm);
     131             : static FreePageBtree *FreePageBtreeFindLeftSibling(char *base,
     132             :                                                    FreePageBtree *btp);
     133             : static FreePageBtree *FreePageBtreeFindRightSibling(char *base,
     134             :                                                     FreePageBtree *btp);
     135             : static Size FreePageBtreeFirstKey(FreePageBtree *btp);
     136             : static FreePageBtree *FreePageBtreeGetRecycled(FreePageManager *fpm);
     137             : static void FreePageBtreeInsertInternal(char *base, FreePageBtree *btp,
     138             :                                         Size index, Size first_page, FreePageBtree *child);
     139             : static void FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index,
     140             :                                     Size first_page, Size npages);
     141             : static void FreePageBtreeRecycle(FreePageManager *fpm, Size pageno);
     142             : static void FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp,
     143             :                                 Size index);
     144             : static void FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp);
     145             : static void FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
     146             :                                 FreePageBtreeSearchResult *result);
     147             : static Size FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page);
     148             : static Size FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page);
     149             : static FreePageBtree *FreePageBtreeSplitPage(FreePageManager *fpm,
     150             :                                              FreePageBtree *btp);
     151             : static void FreePageBtreeUpdateParentPointers(char *base, FreePageBtree *btp);
     152             : static void FreePageManagerDumpBtree(FreePageManager *fpm, FreePageBtree *btp,
     153             :                                      FreePageBtree *parent, int level, StringInfo buf);
     154             : static void FreePageManagerDumpSpans(FreePageManager *fpm,
     155             :                                      FreePageSpanLeader *span, Size expected_pages,
     156             :                                      StringInfo buf);
     157             : static bool FreePageManagerGetInternal(FreePageManager *fpm, Size npages,
     158             :                                        Size *first_page);
     159             : static Size FreePageManagerPutInternal(FreePageManager *fpm, Size first_page,
     160             :                                        Size npages, bool soft);
     161             : static void FreePagePopSpanLeader(FreePageManager *fpm, Size pageno);
     162             : static void FreePagePushSpanLeader(FreePageManager *fpm, Size first_page,
     163             :                                    Size npages);
     164             : static Size FreePageManagerLargestContiguous(FreePageManager *fpm);
     165             : static void FreePageManagerUpdateLargest(FreePageManager *fpm);
     166             : 
     167             : #if FPM_EXTRA_ASSERTS
     168             : static Size sum_free_pages(FreePageManager *fpm);
     169             : #endif
     170             : 
     171             : /*
     172             :  * Initialize a new, empty free page manager.
     173             :  *
     174             :  * 'fpm' should reference caller-provided memory large enough to contain a
     175             :  * FreePageManager.  We'll initialize it here.
     176             :  *
     177             :  * 'base' is the address to which all pointers are relative.  When managing
     178             :  * a dynamic shared memory segment, it should normally be the base of the
     179             :  * segment.  When managing backend-private memory, it can be either NULL or,
     180             :  * if managing a single contiguous extent of memory, the start of that extent.
     181             :  */
     182             : void
     183         550 : FreePageManagerInitialize(FreePageManager *fpm, char *base)
     184             : {
     185             :     Size        f;
     186             : 
     187         550 :     relptr_store(base, fpm->self, fpm);
     188         550 :     relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
     189         550 :     relptr_store(base, fpm->btree_recycle, (FreePageSpanLeader *) NULL);
     190         550 :     fpm->btree_depth = 0;
     191         550 :     fpm->btree_recycle_count = 0;
     192         550 :     fpm->singleton_first_page = 0;
     193         550 :     fpm->singleton_npages = 0;
     194         550 :     fpm->contiguous_pages = 0;
     195         550 :     fpm->contiguous_pages_dirty = true;
     196             : #ifdef FPM_EXTRA_ASSERTS
     197             :     fpm->free_pages = 0;
     198             : #endif
     199             : 
     200       71500 :     for (f = 0; f < FPM_NUM_FREELISTS; f++)
     201       70950 :         relptr_store(base, fpm->freelist[f], (FreePageSpanLeader *) NULL);
     202         550 : }
     203             : 
     204             : /*
     205             :  * Allocate a run of pages of the given length from the free page manager.
     206             :  * The return value indicates whether we were able to satisfy the request;
     207             :  * if true, the first page of the allocation is stored in *first_page.
     208             :  */
     209             : bool
     210        2894 : FreePageManagerGet(FreePageManager *fpm, Size npages, Size *first_page)
     211             : {
     212             :     bool        result;
     213             :     Size        contiguous_pages;
     214             : 
     215        2894 :     result = FreePageManagerGetInternal(fpm, npages, first_page);
     216             : 
     217             :     /*
     218             :      * It's a bit counterintuitive, but allocating pages can actually create
     219             :      * opportunities for cleanup that create larger ranges.  We might pull a
     220             :      * key out of the btree that enables the item at the head of the btree
     221             :      * recycle list to be inserted; and then if there are more items behind it
     222             :      * one of those might cause two currently-separated ranges to merge,
     223             :      * creating a single range of contiguous pages larger than any that
     224             :      * existed previously.  It might be worth trying to improve the cleanup
     225             :      * algorithm to avoid such corner cases, but for now we just notice the
     226             :      * condition and do the appropriate reporting.
     227             :      */
     228        2894 :     contiguous_pages = FreePageBtreeCleanup(fpm);
     229        2894 :     if (fpm->contiguous_pages < contiguous_pages)
     230          44 :         fpm->contiguous_pages = contiguous_pages;
     231             : 
     232             :     /*
     233             :      * FreePageManagerGetInternal may have set contiguous_pages_dirty.
     234             :      * Recompute contigous_pages if so.
     235             :      */
     236        2894 :     FreePageManagerUpdateLargest(fpm);
     237             : 
     238             : #ifdef FPM_EXTRA_ASSERTS
     239             :     if (result)
     240             :     {
     241             :         Assert(fpm->free_pages >= npages);
     242             :         fpm->free_pages -= npages;
     243             :     }
     244             :     Assert(fpm->free_pages == sum_free_pages(fpm));
     245             :     Assert(fpm->contiguous_pages == FreePageManagerLargestContiguous(fpm));
     246             : #endif
     247        2894 :     return result;
     248             : }
     249             : 
     250             : #ifdef FPM_EXTRA_ASSERTS
     251             : static void
     252             : sum_free_pages_recurse(FreePageManager *fpm, FreePageBtree *btp, Size *sum)
     253             : {
     254             :     char       *base = fpm_segment_base(fpm);
     255             : 
     256             :     Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC ||
     257             :            btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
     258             :     ++*sum;
     259             :     if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
     260             :     {
     261             :         Size        index;
     262             : 
     263             : 
     264             :         for (index = 0; index < btp->hdr.nused; ++index)
     265             :         {
     266             :             FreePageBtree *child;
     267             : 
     268             :             child = relptr_access(base, btp->u.internal_key[index].child);
     269             :             sum_free_pages_recurse(fpm, child, sum);
     270             :         }
     271             :     }
     272             : }
     273             : static Size
     274             : sum_free_pages(FreePageManager *fpm)
     275             : {
     276             :     FreePageSpanLeader *recycle;
     277             :     char       *base = fpm_segment_base(fpm);
     278             :     Size        sum = 0;
     279             :     int         list;
     280             : 
     281             :     /* Count the spans by scanning the freelists. */
     282             :     for (list = 0; list < FPM_NUM_FREELISTS; ++list)
     283             :     {
     284             : 
     285             :         if (!relptr_is_null(fpm->freelist[list]))
     286             :         {
     287             :             FreePageSpanLeader *candidate =
     288             :             relptr_access(base, fpm->freelist[list]);
     289             : 
     290             :             do
     291             :             {
     292             :                 sum += candidate->npages;
     293             :                 candidate = relptr_access(base, candidate->next);
     294             :             } while (candidate != NULL);
     295             :         }
     296             :     }
     297             : 
     298             :     /* Count btree internal pages. */
     299             :     if (fpm->btree_depth > 0)
     300             :     {
     301             :         FreePageBtree *root = relptr_access(base, fpm->btree_root);
     302             : 
     303             :         sum_free_pages_recurse(fpm, root, &sum);
     304             :     }
     305             : 
     306             :     /* Count the recycle list. */
     307             :     for (recycle = relptr_access(base, fpm->btree_recycle);
     308             :          recycle != NULL;
     309             :          recycle = relptr_access(base, recycle->next))
     310             :     {
     311             :         Assert(recycle->npages == 1);
     312             :         ++sum;
     313             :     }
     314             : 
     315             :     return sum;
     316             : }
     317             : #endif
     318             : 
     319             : /*
     320             :  * Compute the size of the largest run of pages that the user could
     321             :  * successfully get.
     322             :  */
     323             : static Size
     324        2294 : FreePageManagerLargestContiguous(FreePageManager *fpm)
     325             : {
     326             :     char       *base;
     327             :     Size        largest;
     328             : 
     329        2294 :     base = fpm_segment_base(fpm);
     330        2294 :     largest = 0;
     331        2294 :     if (!relptr_is_null(fpm->freelist[FPM_NUM_FREELISTS - 1]))
     332             :     {
     333             :         FreePageSpanLeader *candidate;
     334             : 
     335        1772 :         candidate = relptr_access(base, fpm->freelist[FPM_NUM_FREELISTS - 1]);
     336             :         do
     337             :         {
     338        1772 :             if (candidate->npages > largest)
     339        1772 :                 largest = candidate->npages;
     340        1772 :             candidate = relptr_access(base, candidate->next);
     341        1772 :         } while (candidate != NULL);
     342             :     }
     343             :     else
     344             :     {
     345         522 :         Size        f = FPM_NUM_FREELISTS - 1;
     346             : 
     347             :         do
     348             :         {
     349       35630 :             --f;
     350       35630 :             if (!relptr_is_null(fpm->freelist[f]))
     351             :             {
     352         522 :                 largest = f + 1;
     353         522 :                 break;
     354             :             }
     355       35108 :         } while (f > 0);
     356             :     }
     357             : 
     358        2294 :     return largest;
     359             : }
     360             : 
     361             : /*
     362             :  * Recompute the size of the largest run of pages that the user could
     363             :  * successfully get, if it has been marked dirty.
     364             :  */
     365             : static void
     366        5544 : FreePageManagerUpdateLargest(FreePageManager *fpm)
     367             : {
     368        5544 :     if (!fpm->contiguous_pages_dirty)
     369        3250 :         return;
     370             : 
     371        2294 :     fpm->contiguous_pages = FreePageManagerLargestContiguous(fpm);
     372        2294 :     fpm->contiguous_pages_dirty = false;
     373             : }
     374             : 
     375             : /*
     376             :  * Transfer a run of pages to the free page manager.
     377             :  */
     378             : void
     379        2650 : FreePageManagerPut(FreePageManager *fpm, Size first_page, Size npages)
     380             : {
     381             :     Size        contiguous_pages;
     382             : 
     383             :     Assert(npages > 0);
     384             : 
     385             :     /* Record the new pages. */
     386        2650 :     contiguous_pages =
     387             :         FreePageManagerPutInternal(fpm, first_page, npages, false);
     388             : 
     389             :     /*
     390             :      * If the new range we inserted into the page manager was contiguous with
     391             :      * an existing range, it may have opened up cleanup opportunities.
     392             :      */
     393        2650 :     if (contiguous_pages > npages)
     394             :     {
     395             :         Size        cleanup_contiguous_pages;
     396             : 
     397        2034 :         cleanup_contiguous_pages = FreePageBtreeCleanup(fpm);
     398        2034 :         if (cleanup_contiguous_pages > contiguous_pages)
     399          66 :             contiguous_pages = cleanup_contiguous_pages;
     400             :     }
     401             : 
     402             :     /* See if we now have a new largest chunk. */
     403        2650 :     if (fpm->contiguous_pages < contiguous_pages)
     404        1072 :         fpm->contiguous_pages = contiguous_pages;
     405             : 
     406             :     /*
     407             :      * The earlier call to FreePageManagerPutInternal may have set
     408             :      * contiguous_pages_dirty if it needed to allocate internal pages, so
     409             :      * recompute contiguous_pages if necessary.
     410             :      */
     411        2650 :     FreePageManagerUpdateLargest(fpm);
     412             : 
     413             : #ifdef FPM_EXTRA_ASSERTS
     414             :     fpm->free_pages += npages;
     415             :     Assert(fpm->free_pages == sum_free_pages(fpm));
     416             :     Assert(fpm->contiguous_pages == FreePageManagerLargestContiguous(fpm));
     417             : #endif
     418        2650 : }
     419             : 
     420             : /*
     421             :  * Produce a debugging dump of the state of a free page manager.
     422             :  */
     423             : char *
     424           0 : FreePageManagerDump(FreePageManager *fpm)
     425             : {
     426           0 :     char       *base = fpm_segment_base(fpm);
     427             :     StringInfoData buf;
     428             :     FreePageSpanLeader *recycle;
     429           0 :     bool        dumped_any_freelist = false;
     430             :     Size        f;
     431             : 
     432             :     /* Initialize output buffer. */
     433           0 :     initStringInfo(&buf);
     434             : 
     435             :     /* Dump general stuff. */
     436           0 :     appendStringInfo(&buf, "metadata: self %zu max contiguous pages = %zu\n",
     437             :                      fpm->self.relptr_off, fpm->contiguous_pages);
     438             : 
     439             :     /* Dump btree. */
     440           0 :     if (fpm->btree_depth > 0)
     441             :     {
     442             :         FreePageBtree *root;
     443             : 
     444           0 :         appendStringInfo(&buf, "btree depth %u:\n", fpm->btree_depth);
     445           0 :         root = relptr_access(base, fpm->btree_root);
     446           0 :         FreePageManagerDumpBtree(fpm, root, NULL, 0, &buf);
     447             :     }
     448           0 :     else if (fpm->singleton_npages > 0)
     449             :     {
     450           0 :         appendStringInfo(&buf, "singleton: %zu(%zu)\n",
     451             :                          fpm->singleton_first_page, fpm->singleton_npages);
     452             :     }
     453             : 
     454             :     /* Dump btree recycle list. */
     455           0 :     recycle = relptr_access(base, fpm->btree_recycle);
     456           0 :     if (recycle != NULL)
     457             :     {
     458           0 :         appendStringInfoString(&buf, "btree recycle:");
     459           0 :         FreePageManagerDumpSpans(fpm, recycle, 1, &buf);
     460             :     }
     461             : 
     462             :     /* Dump free lists. */
     463           0 :     for (f = 0; f < FPM_NUM_FREELISTS; ++f)
     464             :     {
     465             :         FreePageSpanLeader *span;
     466             : 
     467           0 :         if (relptr_is_null(fpm->freelist[f]))
     468           0 :             continue;
     469           0 :         if (!dumped_any_freelist)
     470             :         {
     471           0 :             appendStringInfoString(&buf, "freelists:\n");
     472           0 :             dumped_any_freelist = true;
     473             :         }
     474           0 :         appendStringInfo(&buf, "  %zu:", f + 1);
     475           0 :         span = relptr_access(base, fpm->freelist[f]);
     476           0 :         FreePageManagerDumpSpans(fpm, span, f + 1, &buf);
     477             :     }
     478             : 
     479             :     /* And return result to caller. */
     480           0 :     return buf.data;
     481             : }
     482             : 
     483             : 
     484             : /*
     485             :  * The first_page value stored at index zero in any non-root page must match
     486             :  * the first_page value stored in its parent at the index which points to that
     487             :  * page.  So when the value stored at index zero in a btree page changes, we've
     488             :  * got to walk up the tree adjusting ancestor keys until we reach an ancestor
     489             :  * where that key isn't index zero.  This function should be called after
     490             :  * updating the first key on the target page; it will propagate the change
     491             :  * upward as far as needed.
     492             :  *
     493             :  * We assume here that the first key on the page has not changed enough to
     494             :  * require changes in the ordering of keys on its ancestor pages.  Thus,
     495             :  * if we search the parent page for the first key greater than or equal to
     496             :  * the first key on the current page, the downlink to this page will be either
     497             :  * the exact index returned by the search (if the first key decreased)
     498             :  * or one less (if the first key increased).
     499             :  */
     500             : static void
     501        1348 : FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm, FreePageBtree *btp)
     502             : {
     503        1348 :     char       *base = fpm_segment_base(fpm);
     504             :     Size        first_page;
     505             :     FreePageBtree *parent;
     506             :     FreePageBtree *child;
     507             : 
     508             :     /* This might be either a leaf or an internal page. */
     509             :     Assert(btp->hdr.nused > 0);
     510        1348 :     if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     511             :     {
     512             :         Assert(btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
     513        1348 :         first_page = btp->u.leaf_key[0].first_page;
     514             :     }
     515             :     else
     516             :     {
     517             :         Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     518             :         Assert(btp->hdr.nused <= FPM_ITEMS_PER_INTERNAL_PAGE);
     519           0 :         first_page = btp->u.internal_key[0].first_page;
     520             :     }
     521        1348 :     child = btp;
     522             : 
     523             :     /* Loop until we find an ancestor that does not require adjustment. */
     524             :     for (;;)
     525           0 :     {
     526             :         Size        s;
     527             : 
     528        1348 :         parent = relptr_access(base, child->hdr.parent);
     529        1348 :         if (parent == NULL)
     530        1348 :             break;
     531           0 :         s = FreePageBtreeSearchInternal(parent, first_page);
     532             : 
     533             :         /* Key is either at index s or index s-1; figure out which. */
     534           0 :         if (s >= parent->hdr.nused)
     535             :         {
     536             :             Assert(s == parent->hdr.nused);
     537           0 :             --s;
     538             :         }
     539             :         else
     540             :         {
     541             :             FreePageBtree *check;
     542             : 
     543           0 :             check = relptr_access(base, parent->u.internal_key[s].child);
     544           0 :             if (check != child)
     545             :             {
     546             :                 Assert(s > 0);
     547           0 :                 --s;
     548             :             }
     549             :         }
     550             : 
     551             : #ifdef USE_ASSERT_CHECKING
     552             :         /* Debugging double-check. */
     553             :         {
     554             :             FreePageBtree *check;
     555             : 
     556             :             check = relptr_access(base, parent->u.internal_key[s].child);
     557             :             Assert(s < parent->hdr.nused);
     558             :             Assert(child == check);
     559             :         }
     560             : #endif
     561             : 
     562             :         /* Update the parent key. */
     563           0 :         parent->u.internal_key[s].first_page = first_page;
     564             : 
     565             :         /*
     566             :          * If this is the first key in the parent, go up another level; else
     567             :          * done.
     568             :          */
     569           0 :         if (s > 0)
     570           0 :             break;
     571           0 :         child = parent;
     572             :     }
     573        1348 : }
     574             : 
     575             : /*
     576             :  * Attempt to reclaim space from the free-page btree.  The return value is
     577             :  * the largest range of contiguous pages created by the cleanup operation.
     578             :  */
     579             : static Size
     580        4928 : FreePageBtreeCleanup(FreePageManager *fpm)
     581             : {
     582        4928 :     char       *base = fpm_segment_base(fpm);
     583        4928 :     Size        max_contiguous_pages = 0;
     584             : 
     585             :     /* Attempt to shrink the depth of the btree. */
     586        9960 :     while (!relptr_is_null(fpm->btree_root))
     587             :     {
     588        2518 :         FreePageBtree *root = relptr_access(base, fpm->btree_root);
     589             : 
     590             :         /* If the root contains only one key, reduce depth by one. */
     591        2518 :         if (root->hdr.nused == 1)
     592             :         {
     593             :             /* Shrink depth of tree by one. */
     594             :             Assert(fpm->btree_depth > 0);
     595         104 :             --fpm->btree_depth;
     596         104 :             if (root->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     597             :             {
     598             :                 /* If root is a leaf, convert only entry to singleton range. */
     599         104 :                 relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
     600         104 :                 fpm->singleton_first_page = root->u.leaf_key[0].first_page;
     601         104 :                 fpm->singleton_npages = root->u.leaf_key[0].npages;
     602             :             }
     603             :             else
     604             :             {
     605             :                 FreePageBtree *newroot;
     606             : 
     607             :                 /* If root is an internal page, make only child the root. */
     608             :                 Assert(root->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     609           0 :                 relptr_copy(fpm->btree_root, root->u.internal_key[0].child);
     610           0 :                 newroot = relptr_access(base, fpm->btree_root);
     611           0 :                 relptr_store(base, newroot->hdr.parent, (FreePageBtree *) NULL);
     612             :             }
     613         104 :             FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, root));
     614             :         }
     615        3642 :         else if (root->hdr.nused == 2 &&
     616        1228 :                  root->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     617             :         {
     618             :             Size        end_of_first;
     619             :             Size        start_of_second;
     620             : 
     621        2456 :             end_of_first = root->u.leaf_key[0].first_page +
     622        1228 :                 root->u.leaf_key[0].npages;
     623        1228 :             start_of_second = root->u.leaf_key[1].first_page;
     624             : 
     625        1228 :             if (end_of_first + 1 == start_of_second)
     626             :             {
     627          66 :                 Size        root_page = fpm_pointer_to_page(base, root);
     628             : 
     629          66 :                 if (end_of_first == root_page)
     630             :                 {
     631          66 :                     FreePagePopSpanLeader(fpm, root->u.leaf_key[0].first_page);
     632          66 :                     FreePagePopSpanLeader(fpm, root->u.leaf_key[1].first_page);
     633          66 :                     fpm->singleton_first_page = root->u.leaf_key[0].first_page;
     634         198 :                     fpm->singleton_npages = root->u.leaf_key[0].npages +
     635         132 :                         root->u.leaf_key[1].npages + 1;
     636          66 :                     fpm->btree_depth = 0;
     637          66 :                     relptr_store(base, fpm->btree_root,
     638             :                                  (FreePageBtree *) NULL);
     639          66 :                     FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
     640             :                                            fpm->singleton_npages);
     641             :                     Assert(max_contiguous_pages == 0);
     642          66 :                     max_contiguous_pages = fpm->singleton_npages;
     643             :                 }
     644             :             }
     645             : 
     646             :             /* Whether it worked or not, it's time to stop. */
     647        1228 :             break;
     648             :         }
     649             :         else
     650             :         {
     651             :             /* Nothing more to do.  Stop. */
     652             :             break;
     653             :         }
     654             :     }
     655             : 
     656             :     /*
     657             :      * Attempt to free recycled btree pages.  We skip this if releasing the
     658             :      * recycled page would require a btree page split, because the page we're
     659             :      * trying to recycle would be consumed by the split, which would be
     660             :      * counterproductive.
     661             :      *
     662             :      * We also currently only ever attempt to recycle the first page on the
     663             :      * list; that could be made more aggressive, but it's not clear that the
     664             :      * complexity would be worthwhile.
     665             :      */
     666        9900 :     while (fpm->btree_recycle_count > 0)
     667             :     {
     668             :         FreePageBtree *btp;
     669             :         Size        first_page;
     670             :         Size        contiguous_pages;
     671             : 
     672         104 :         btp = FreePageBtreeGetRecycled(fpm);
     673         104 :         first_page = fpm_pointer_to_page(base, btp);
     674         104 :         contiguous_pages = FreePageManagerPutInternal(fpm, first_page, 1, true);
     675         104 :         if (contiguous_pages == 0)
     676             :         {
     677          60 :             FreePageBtreeRecycle(fpm, first_page);
     678          60 :             break;
     679             :         }
     680             :         else
     681             :         {
     682          44 :             if (contiguous_pages > max_contiguous_pages)
     683          44 :                 max_contiguous_pages = contiguous_pages;
     684             :         }
     685             :     }
     686             : 
     687        4928 :     return max_contiguous_pages;
     688             : }
     689             : 
     690             : /*
     691             :  * Consider consolidating the given page with its left or right sibling,
     692             :  * if it's fairly empty.
     693             :  */
     694             : static void
     695         354 : FreePageBtreeConsolidate(FreePageManager *fpm, FreePageBtree *btp)
     696             : {
     697         354 :     char       *base = fpm_segment_base(fpm);
     698             :     FreePageBtree *np;
     699             :     Size        max;
     700             : 
     701             :     /*
     702             :      * We only try to consolidate pages that are less than a third full. We
     703             :      * could be more aggressive about this, but that might risk performing
     704             :      * consolidation only to end up splitting again shortly thereafter.  Since
     705             :      * the btree should be very small compared to the space under management,
     706             :      * our goal isn't so much to ensure that it always occupies the absolutely
     707             :      * smallest possible number of pages as to reclaim pages before things get
     708             :      * too egregiously out of hand.
     709             :      */
     710         354 :     if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     711         354 :         max = FPM_ITEMS_PER_LEAF_PAGE;
     712             :     else
     713             :     {
     714             :         Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     715           0 :         max = FPM_ITEMS_PER_INTERNAL_PAGE;
     716             :     }
     717         354 :     if (btp->hdr.nused >= max / 3)
     718           0 :         return;
     719             : 
     720             :     /*
     721             :      * If we can fit our right sibling's keys onto this page, consolidate.
     722             :      */
     723         354 :     np = FreePageBtreeFindRightSibling(base, btp);
     724         354 :     if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
     725             :     {
     726           0 :         if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     727             :         {
     728           0 :             memcpy(&btp->u.leaf_key[btp->hdr.nused], &np->u.leaf_key[0],
     729           0 :                    sizeof(FreePageBtreeLeafKey) * np->hdr.nused);
     730           0 :             btp->hdr.nused += np->hdr.nused;
     731             :         }
     732             :         else
     733             :         {
     734           0 :             memcpy(&btp->u.internal_key[btp->hdr.nused], &np->u.internal_key[0],
     735           0 :                    sizeof(FreePageBtreeInternalKey) * np->hdr.nused);
     736           0 :             btp->hdr.nused += np->hdr.nused;
     737           0 :             FreePageBtreeUpdateParentPointers(base, btp);
     738             :         }
     739           0 :         FreePageBtreeRemovePage(fpm, np);
     740           0 :         return;
     741             :     }
     742             : 
     743             :     /*
     744             :      * If we can fit our keys onto our left sibling's page, consolidate. In
     745             :      * this case, we move our keys onto the other page rather than visca
     746             :      * versa, to avoid having to adjust ancestor keys.
     747             :      */
     748         354 :     np = FreePageBtreeFindLeftSibling(base, btp);
     749         354 :     if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
     750             :     {
     751           0 :         if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     752             :         {
     753           0 :             memcpy(&np->u.leaf_key[np->hdr.nused], &btp->u.leaf_key[0],
     754           0 :                    sizeof(FreePageBtreeLeafKey) * btp->hdr.nused);
     755           0 :             np->hdr.nused += btp->hdr.nused;
     756             :         }
     757             :         else
     758             :         {
     759           0 :             memcpy(&np->u.internal_key[np->hdr.nused], &btp->u.internal_key[0],
     760           0 :                    sizeof(FreePageBtreeInternalKey) * btp->hdr.nused);
     761           0 :             np->hdr.nused += btp->hdr.nused;
     762           0 :             FreePageBtreeUpdateParentPointers(base, np);
     763             :         }
     764           0 :         FreePageBtreeRemovePage(fpm, btp);
     765           0 :         return;
     766             :     }
     767             : }
     768             : 
     769             : /*
     770             :  * Find the passed page's left sibling; that is, the page at the same level
     771             :  * of the tree whose keyspace immediately precedes ours.
     772             :  */
     773             : static FreePageBtree *
     774         354 : FreePageBtreeFindLeftSibling(char *base, FreePageBtree *btp)
     775             : {
     776         354 :     FreePageBtree *p = btp;
     777         354 :     int         levels = 0;
     778             : 
     779             :     /* Move up until we can move left. */
     780             :     for (;;)
     781           0 :     {
     782             :         Size        first_page;
     783             :         Size        index;
     784             : 
     785         354 :         first_page = FreePageBtreeFirstKey(p);
     786         354 :         p = relptr_access(base, p->hdr.parent);
     787             : 
     788         354 :         if (p == NULL)
     789         354 :             return NULL;        /* we were passed the rightmost page */
     790             : 
     791           0 :         index = FreePageBtreeSearchInternal(p, first_page);
     792           0 :         if (index > 0)
     793             :         {
     794             :             Assert(p->u.internal_key[index].first_page == first_page);
     795           0 :             p = relptr_access(base, p->u.internal_key[index - 1].child);
     796           0 :             break;
     797             :         }
     798             :         Assert(index == 0);
     799           0 :         ++levels;
     800             :     }
     801             : 
     802             :     /* Descend left. */
     803           0 :     while (levels > 0)
     804             :     {
     805             :         Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     806           0 :         p = relptr_access(base, p->u.internal_key[p->hdr.nused - 1].child);
     807           0 :         --levels;
     808             :     }
     809             :     Assert(p->hdr.magic == btp->hdr.magic);
     810             : 
     811           0 :     return p;
     812             : }
     813             : 
     814             : /*
     815             :  * Find the passed page's right sibling; that is, the page at the same level
     816             :  * of the tree whose keyspace immediately follows ours.
     817             :  */
     818             : static FreePageBtree *
     819         354 : FreePageBtreeFindRightSibling(char *base, FreePageBtree *btp)
     820             : {
     821         354 :     FreePageBtree *p = btp;
     822         354 :     int         levels = 0;
     823             : 
     824             :     /* Move up until we can move right. */
     825             :     for (;;)
     826           0 :     {
     827             :         Size        first_page;
     828             :         Size        index;
     829             : 
     830         354 :         first_page = FreePageBtreeFirstKey(p);
     831         354 :         p = relptr_access(base, p->hdr.parent);
     832             : 
     833         354 :         if (p == NULL)
     834         354 :             return NULL;        /* we were passed the rightmost page */
     835             : 
     836           0 :         index = FreePageBtreeSearchInternal(p, first_page);
     837           0 :         if (index < p->hdr.nused - 1)
     838             :         {
     839             :             Assert(p->u.internal_key[index].first_page == first_page);
     840           0 :             p = relptr_access(base, p->u.internal_key[index + 1].child);
     841           0 :             break;
     842             :         }
     843             :         Assert(index == p->hdr.nused - 1);
     844           0 :         ++levels;
     845             :     }
     846             : 
     847             :     /* Descend left. */
     848           0 :     while (levels > 0)
     849             :     {
     850             :         Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     851           0 :         p = relptr_access(base, p->u.internal_key[0].child);
     852           0 :         --levels;
     853             :     }
     854             :     Assert(p->hdr.magic == btp->hdr.magic);
     855             : 
     856           0 :     return p;
     857             : }
     858             : 
     859             : /*
     860             :  * Get the first key on a btree page.
     861             :  */
     862             : static Size
     863         708 : FreePageBtreeFirstKey(FreePageBtree *btp)
     864             : {
     865             :     Assert(btp->hdr.nused > 0);
     866             : 
     867         708 :     if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     868         708 :         return btp->u.leaf_key[0].first_page;
     869             :     else
     870             :     {
     871             :         Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     872           0 :         return btp->u.internal_key[0].first_page;
     873             :     }
     874             : }
     875             : 
     876             : /*
     877             :  * Get a page from the btree recycle list for use as a btree page.
     878             :  */
     879             : static FreePageBtree *
     880         164 : FreePageBtreeGetRecycled(FreePageManager *fpm)
     881             : {
     882         164 :     char       *base = fpm_segment_base(fpm);
     883         164 :     FreePageSpanLeader *victim = relptr_access(base, fpm->btree_recycle);
     884             :     FreePageSpanLeader *newhead;
     885             : 
     886             :     Assert(victim != NULL);
     887         164 :     newhead = relptr_access(base, victim->next);
     888         164 :     if (newhead != NULL)
     889           0 :         relptr_copy(newhead->prev, victim->prev);
     890         164 :     relptr_store(base, fpm->btree_recycle, newhead);
     891             :     Assert(fpm_pointer_is_page_aligned(base, victim));
     892         164 :     fpm->btree_recycle_count--;
     893         164 :     return (FreePageBtree *) victim;
     894             : }
     895             : 
     896             : /*
     897             :  * Insert an item into an internal page.
     898             :  */
     899             : static void
     900           0 : FreePageBtreeInsertInternal(char *base, FreePageBtree *btp, Size index,
     901             :                             Size first_page, FreePageBtree *child)
     902             : {
     903             :     Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     904             :     Assert(btp->hdr.nused <= FPM_ITEMS_PER_INTERNAL_PAGE);
     905             :     Assert(index <= btp->hdr.nused);
     906           0 :     memmove(&btp->u.internal_key[index + 1], &btp->u.internal_key[index],
     907           0 :             sizeof(FreePageBtreeInternalKey) * (btp->hdr.nused - index));
     908           0 :     btp->u.internal_key[index].first_page = first_page;
     909           0 :     relptr_store(base, btp->u.internal_key[index].child, child);
     910           0 :     ++btp->hdr.nused;
     911           0 : }
     912             : 
     913             : /*
     914             :  * Insert an item into a leaf page.
     915             :  */
     916             : static void
     917         472 : FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index, Size first_page,
     918             :                         Size npages)
     919             : {
     920             :     Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
     921             :     Assert(btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
     922             :     Assert(index <= btp->hdr.nused);
     923         472 :     memmove(&btp->u.leaf_key[index + 1], &btp->u.leaf_key[index],
     924         472 :             sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
     925         472 :     btp->u.leaf_key[index].first_page = first_page;
     926         472 :     btp->u.leaf_key[index].npages = npages;
     927         472 :     ++btp->hdr.nused;
     928         472 : }
     929             : 
     930             : /*
     931             :  * Put a page on the btree recycle list.
     932             :  */
     933             : static void
     934         164 : FreePageBtreeRecycle(FreePageManager *fpm, Size pageno)
     935             : {
     936         164 :     char       *base = fpm_segment_base(fpm);
     937         164 :     FreePageSpanLeader *head = relptr_access(base, fpm->btree_recycle);
     938             :     FreePageSpanLeader *span;
     939             : 
     940         164 :     span = (FreePageSpanLeader *) fpm_page_to_pointer(base, pageno);
     941         164 :     span->magic = FREE_PAGE_SPAN_LEADER_MAGIC;
     942         164 :     span->npages = 1;
     943         164 :     relptr_store(base, span->next, head);
     944         164 :     relptr_store(base, span->prev, (FreePageSpanLeader *) NULL);
     945         164 :     if (head != NULL)
     946           0 :         relptr_store(base, head->prev, span);
     947         164 :     relptr_store(base, fpm->btree_recycle, span);
     948         164 :     fpm->btree_recycle_count++;
     949         164 : }
     950             : 
     951             : /*
     952             :  * Remove an item from the btree at the given position on the given page.
     953             :  */
     954             : static void
     955         354 : FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp, Size index)
     956             : {
     957             :     Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
     958             :     Assert(index < btp->hdr.nused);
     959             : 
     960             :     /* When last item is removed, extirpate entire page from btree. */
     961         354 :     if (btp->hdr.nused == 1)
     962             :     {
     963           0 :         FreePageBtreeRemovePage(fpm, btp);
     964           0 :         return;
     965             :     }
     966             : 
     967             :     /* Physically remove the key from the page. */
     968         354 :     --btp->hdr.nused;
     969         354 :     if (index < btp->hdr.nused)
     970         354 :         memmove(&btp->u.leaf_key[index], &btp->u.leaf_key[index + 1],
     971         354 :                 sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
     972             : 
     973             :     /* If we just removed the first key, adjust ancestor keys. */
     974         354 :     if (index == 0)
     975         140 :         FreePageBtreeAdjustAncestorKeys(fpm, btp);
     976             : 
     977             :     /* Consider whether to consolidate this page with a sibling. */
     978         354 :     FreePageBtreeConsolidate(fpm, btp);
     979             : }
     980             : 
     981             : /*
     982             :  * Remove a page from the btree.  Caller is responsible for having relocated
     983             :  * any keys from this page that are still wanted.  The page is placed on the
     984             :  * recycled list.
     985             :  */
     986             : static void
     987           0 : FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp)
     988             : {
     989           0 :     char       *base = fpm_segment_base(fpm);
     990             :     FreePageBtree *parent;
     991             :     Size        index;
     992             :     Size        first_page;
     993             : 
     994             :     for (;;)
     995             :     {
     996             :         /* Find parent page. */
     997           0 :         parent = relptr_access(base, btp->hdr.parent);
     998           0 :         if (parent == NULL)
     999             :         {
    1000             :             /* We are removing the root page. */
    1001           0 :             relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
    1002           0 :             fpm->btree_depth = 0;
    1003             :             Assert(fpm->singleton_first_page == 0);
    1004             :             Assert(fpm->singleton_npages == 0);
    1005           0 :             return;
    1006             :         }
    1007             : 
    1008             :         /*
    1009             :          * If the parent contains only one item, we need to remove it as well.
    1010             :          */
    1011           0 :         if (parent->hdr.nused > 1)
    1012           0 :             break;
    1013           0 :         FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, btp));
    1014           0 :         btp = parent;
    1015             :     }
    1016             : 
    1017             :     /* Find and remove the downlink. */
    1018           0 :     first_page = FreePageBtreeFirstKey(btp);
    1019           0 :     if (parent->hdr.magic == FREE_PAGE_LEAF_MAGIC)
    1020             :     {
    1021           0 :         index = FreePageBtreeSearchLeaf(parent, first_page);
    1022             :         Assert(index < parent->hdr.nused);
    1023           0 :         if (index < parent->hdr.nused - 1)
    1024           0 :             memmove(&parent->u.leaf_key[index],
    1025           0 :                     &parent->u.leaf_key[index + 1],
    1026             :                     sizeof(FreePageBtreeLeafKey)
    1027           0 :                     * (parent->hdr.nused - index - 1));
    1028             :     }
    1029             :     else
    1030             :     {
    1031           0 :         index = FreePageBtreeSearchInternal(parent, first_page);
    1032             :         Assert(index < parent->hdr.nused);
    1033           0 :         if (index < parent->hdr.nused - 1)
    1034           0 :             memmove(&parent->u.internal_key[index],
    1035           0 :                     &parent->u.internal_key[index + 1],
    1036             :                     sizeof(FreePageBtreeInternalKey)
    1037           0 :                     * (parent->hdr.nused - index - 1));
    1038             :     }
    1039           0 :     parent->hdr.nused--;
    1040             :     Assert(parent->hdr.nused > 0);
    1041             : 
    1042             :     /* Recycle the page. */
    1043           0 :     FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, btp));
    1044             : 
    1045             :     /* Adjust ancestor keys if needed. */
    1046           0 :     if (index == 0)
    1047           0 :         FreePageBtreeAdjustAncestorKeys(fpm, parent);
    1048             : 
    1049             :     /* Consider whether to consolidate the parent with a sibling. */
    1050           0 :     FreePageBtreeConsolidate(fpm, parent);
    1051             : }
    1052             : 
    1053             : /*
    1054             :  * Search the btree for an entry for the given first page and initialize
    1055             :  * *result with the results of the search.  result->page and result->index
    1056             :  * indicate either the position of an exact match or the position at which
    1057             :  * the new key should be inserted.  result->found is true for an exact match,
    1058             :  * otherwise false.  result->split_pages will contain the number of additional
    1059             :  * btree pages that will be needed when performing a split to insert a key.
    1060             :  * Except as described above, the contents of fields in the result object are
    1061             :  * undefined on return.
    1062             :  */
    1063             : static void
    1064        2990 : FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
    1065             :                     FreePageBtreeSearchResult *result)
    1066             : {
    1067        2990 :     char       *base = fpm_segment_base(fpm);
    1068        2990 :     FreePageBtree *btp = relptr_access(base, fpm->btree_root);
    1069             :     Size        index;
    1070             : 
    1071        2990 :     result->split_pages = 1;
    1072             : 
    1073             :     /* If the btree is empty, there's nothing to find. */
    1074        2990 :     if (btp == NULL)
    1075             :     {
    1076           0 :         result->page = NULL;
    1077           0 :         result->found = false;
    1078           0 :         return;
    1079             :     }
    1080             : 
    1081             :     /* Descend until we hit a leaf. */
    1082        5980 :     while (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
    1083             :     {
    1084             :         FreePageBtree *child;
    1085             :         bool        found_exact;
    1086             : 
    1087           0 :         index = FreePageBtreeSearchInternal(btp, first_page);
    1088           0 :         found_exact = index < btp->hdr.nused &&
    1089           0 :             btp->u.internal_key[index].first_page == first_page;
    1090             : 
    1091             :         /*
    1092             :          * If we found an exact match we descend directly.  Otherwise, we
    1093             :          * descend into the child to the left if possible so that we can find
    1094             :          * the insertion point at that child's high end.
    1095             :          */
    1096           0 :         if (!found_exact && index > 0)
    1097           0 :             --index;
    1098             : 
    1099             :         /* Track required split depth for leaf insert. */
    1100           0 :         if (btp->hdr.nused >= FPM_ITEMS_PER_INTERNAL_PAGE)
    1101             :         {
    1102             :             Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE);
    1103           0 :             result->split_pages++;
    1104             :         }
    1105             :         else
    1106           0 :             result->split_pages = 0;
    1107             : 
    1108             :         /* Descend to appropriate child page. */
    1109             :         Assert(index < btp->hdr.nused);
    1110           0 :         child = relptr_access(base, btp->u.internal_key[index].child);
    1111             :         Assert(relptr_access(base, child->hdr.parent) == btp);
    1112           0 :         btp = child;
    1113             :     }
    1114             : 
    1115             :     /* Track required split depth for leaf insert. */
    1116        2990 :     if (btp->hdr.nused >= FPM_ITEMS_PER_LEAF_PAGE)
    1117             :     {
    1118             :         Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE);
    1119           0 :         result->split_pages++;
    1120             :     }
    1121             :     else
    1122        2990 :         result->split_pages = 0;
    1123             : 
    1124             :     /* Search leaf page. */
    1125        2990 :     index = FreePageBtreeSearchLeaf(btp, first_page);
    1126             : 
    1127             :     /* Assemble results. */
    1128        2990 :     result->page = btp;
    1129        2990 :     result->index = index;
    1130        5980 :     result->found = index < btp->hdr.nused &&
    1131        2990 :         first_page == btp->u.leaf_key[index].first_page;
    1132             : }
    1133             : 
    1134             : /*
    1135             :  * Search an internal page for the first key greater than or equal to a given
    1136             :  * page number.  Returns the index of that key, or one greater than the number
    1137             :  * of keys on the page if none.
    1138             :  */
    1139             : static Size
    1140           0 : FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page)
    1141             : {
    1142           0 :     Size        low = 0;
    1143           0 :     Size        high = btp->hdr.nused;
    1144             : 
    1145             :     Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
    1146             :     Assert(high > 0 && high <= FPM_ITEMS_PER_INTERNAL_PAGE);
    1147             : 
    1148           0 :     while (low < high)
    1149             :     {
    1150           0 :         Size        mid = (low + high) / 2;
    1151           0 :         Size        val = btp->u.internal_key[mid].first_page;
    1152             : 
    1153           0 :         if (first_page == val)
    1154           0 :             return mid;
    1155           0 :         else if (first_page < val)
    1156           0 :             high = mid;
    1157             :         else
    1158           0 :             low = mid + 1;
    1159             :     }
    1160             : 
    1161           0 :     return low;
    1162             : }
    1163             : 
    1164             : /*
    1165             :  * Search a leaf page for the first key greater than or equal to a given
    1166             :  * page number.  Returns the index of that key, or one greater than the number
    1167             :  * of keys on the page if none.
    1168             :  */
    1169             : static Size
    1170        2990 : FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page)
    1171             : {
    1172        2990 :     Size        low = 0;
    1173        2990 :     Size        high = btp->hdr.nused;
    1174             : 
    1175             :     Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
    1176             :     Assert(high > 0 && high <= FPM_ITEMS_PER_LEAF_PAGE);
    1177             : 
    1178       10212 :     while (low < high)
    1179             :     {
    1180        5426 :         Size        mid = (low + high) / 2;
    1181        5426 :         Size        val = btp->u.leaf_key[mid].first_page;
    1182             : 
    1183        5426 :         if (first_page == val)
    1184        1194 :             return mid;
    1185        4232 :         else if (first_page < val)
    1186        3158 :             high = mid;
    1187             :         else
    1188        1074 :             low = mid + 1;
    1189             :     }
    1190             : 
    1191        1796 :     return low;
    1192             : }
    1193             : 
    1194             : /*
    1195             :  * Allocate a new btree page and move half the keys from the provided page
    1196             :  * to the new page.  Caller is responsible for making sure that there's a
    1197             :  * page available from fpm->btree_recycle.  Returns a pointer to the new page,
    1198             :  * to which caller must add a downlink.
    1199             :  */
    1200             : static FreePageBtree *
    1201           0 : FreePageBtreeSplitPage(FreePageManager *fpm, FreePageBtree *btp)
    1202             : {
    1203             :     FreePageBtree *newsibling;
    1204             : 
    1205           0 :     newsibling = FreePageBtreeGetRecycled(fpm);
    1206           0 :     newsibling->hdr.magic = btp->hdr.magic;
    1207           0 :     newsibling->hdr.nused = btp->hdr.nused / 2;
    1208           0 :     relptr_copy(newsibling->hdr.parent, btp->hdr.parent);
    1209           0 :     btp->hdr.nused -= newsibling->hdr.nused;
    1210             : 
    1211           0 :     if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
    1212           0 :         memcpy(&newsibling->u.leaf_key,
    1213           0 :                &btp->u.leaf_key[btp->hdr.nused],
    1214           0 :                sizeof(FreePageBtreeLeafKey) * newsibling->hdr.nused);
    1215             :     else
    1216             :     {
    1217             :         Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
    1218           0 :         memcpy(&newsibling->u.internal_key,
    1219           0 :                &btp->u.internal_key[btp->hdr.nused],
    1220           0 :                sizeof(FreePageBtreeInternalKey) * newsibling->hdr.nused);
    1221           0 :         FreePageBtreeUpdateParentPointers(fpm_segment_base(fpm), newsibling);
    1222             :     }
    1223             : 
    1224           0 :     return newsibling;
    1225             : }
    1226             : 
    1227             : /*
    1228             :  * When internal pages are split or merged, the parent pointers of their
    1229             :  * children must be updated.
    1230             :  */
    1231             : static void
    1232           0 : FreePageBtreeUpdateParentPointers(char *base, FreePageBtree *btp)
    1233             : {
    1234             :     Size        i;
    1235             : 
    1236             :     Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
    1237           0 :     for (i = 0; i < btp->hdr.nused; ++i)
    1238             :     {
    1239             :         FreePageBtree *child;
    1240             : 
    1241           0 :         child = relptr_access(base, btp->u.internal_key[i].child);
    1242           0 :         relptr_store(base, child->hdr.parent, btp);
    1243             :     }
    1244           0 : }
    1245             : 
    1246             : /*
    1247             :  * Debugging dump of btree data.
    1248             :  */
    1249             : static void
    1250           0 : FreePageManagerDumpBtree(FreePageManager *fpm, FreePageBtree *btp,
    1251             :                          FreePageBtree *parent, int level, StringInfo buf)
    1252             : {
    1253           0 :     char       *base = fpm_segment_base(fpm);
    1254           0 :     Size        pageno = fpm_pointer_to_page(base, btp);
    1255             :     Size        index;
    1256             :     FreePageBtree *check_parent;
    1257             : 
    1258           0 :     check_stack_depth();
    1259           0 :     check_parent = relptr_access(base, btp->hdr.parent);
    1260           0 :     appendStringInfo(buf, "  %zu@%d %c", pageno, level,
    1261           0 :                      btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC ? 'i' : 'l');
    1262           0 :     if (parent != check_parent)
    1263           0 :         appendStringInfo(buf, " [actual parent %zu, expected %zu]",
    1264           0 :                          fpm_pointer_to_page(base, check_parent),
    1265           0 :                          fpm_pointer_to_page(base, parent));
    1266           0 :     appendStringInfoChar(buf, ':');
    1267           0 :     for (index = 0; index < btp->hdr.nused; ++index)
    1268             :     {
    1269           0 :         if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
    1270           0 :             appendStringInfo(buf, " %zu->%zu",
    1271             :                              btp->u.internal_key[index].first_page,
    1272           0 :                              btp->u.internal_key[index].child.relptr_off / FPM_PAGE_SIZE);
    1273             :         else
    1274           0 :             appendStringInfo(buf, " %zu(%zu)",
    1275             :                              btp->u.leaf_key[index].first_page,
    1276             :                              btp->u.leaf_key[index].npages);
    1277             :     }
    1278           0 :     appendStringInfoChar(buf, '\n');
    1279             : 
    1280           0 :     if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
    1281             :     {
    1282           0 :         for (index = 0; index < btp->hdr.nused; ++index)
    1283             :         {
    1284             :             FreePageBtree *child;
    1285             : 
    1286           0 :             child = relptr_access(base, btp->u.internal_key[index].child);
    1287           0 :             FreePageManagerDumpBtree(fpm, child, btp, level + 1, buf);
    1288             :         }
    1289             :     }
    1290           0 : }
    1291             : 
    1292             : /*
    1293             :  * Debugging dump of free-span data.
    1294             :  */
    1295             : static void
    1296           0 : FreePageManagerDumpSpans(FreePageManager *fpm, FreePageSpanLeader *span,
    1297             :                          Size expected_pages, StringInfo buf)
    1298             : {
    1299           0 :     char       *base = fpm_segment_base(fpm);
    1300             : 
    1301           0 :     while (span != NULL)
    1302             :     {
    1303           0 :         if (span->npages != expected_pages)
    1304           0 :             appendStringInfo(buf, " %zu(%zu)", fpm_pointer_to_page(base, span),
    1305             :                              span->npages);
    1306             :         else
    1307           0 :             appendStringInfo(buf, " %zu", fpm_pointer_to_page(base, span));
    1308           0 :         span = relptr_access(base, span->next);
    1309             :     }
    1310             : 
    1311           0 :     appendStringInfoChar(buf, '\n');
    1312           0 : }
    1313             : 
    1314             : /*
    1315             :  * This function allocates a run of pages of the given length from the free
    1316             :  * page manager.
    1317             :  */
    1318             : static bool
    1319        3036 : FreePageManagerGetInternal(FreePageManager *fpm, Size npages, Size *first_page)
    1320             : {
    1321        3036 :     char       *base = fpm_segment_base(fpm);
    1322        3036 :     FreePageSpanLeader *victim = NULL;
    1323             :     FreePageSpanLeader *prev;
    1324             :     FreePageSpanLeader *next;
    1325             :     FreePageBtreeSearchResult result;
    1326        3036 :     Size        victim_page = 0;    /* placate compiler */
    1327             :     Size        f;
    1328             : 
    1329             :     /*
    1330             :      * Search for a free span.
    1331             :      *
    1332             :      * Right now, we use a simple best-fit policy here, but it's possible for
    1333             :      * this to result in memory fragmentation if we're repeatedly asked to
    1334             :      * allocate chunks just a little smaller than what we have available.
    1335             :      * Hopefully, this is unlikely, because we expect most requests to be
    1336             :      * single pages or superblock-sized chunks -- but no policy can be optimal
    1337             :      * under all circumstances unless it has knowledge of future allocation
    1338             :      * patterns.
    1339             :      */
    1340      488724 :     for (f = Min(npages, FPM_NUM_FREELISTS) - 1; f < FPM_NUM_FREELISTS; ++f)
    1341             :     {
    1342             :         /* Skip empty freelists. */
    1343      244362 :         if (relptr_is_null(fpm->freelist[f]))
    1344      241326 :             continue;
    1345             : 
    1346             :         /*
    1347             :          * All of the freelists except the last one contain only items of a
    1348             :          * single size, so we just take the first one.  But the final free
    1349             :          * list contains everything too big for any of the other lists, so we
    1350             :          * need to search the list.
    1351             :          */
    1352        3036 :         if (f < FPM_NUM_FREELISTS - 1)
    1353        1310 :             victim = relptr_access(base, fpm->freelist[f]);
    1354             :         else
    1355             :         {
    1356             :             FreePageSpanLeader *candidate;
    1357             : 
    1358        1726 :             candidate = relptr_access(base, fpm->freelist[f]);
    1359             :             do
    1360             :             {
    1361        1726 :                 if (candidate->npages >= npages && (victim == NULL ||
    1362           0 :                                                     victim->npages > candidate->npages))
    1363             :                 {
    1364        1726 :                     victim = candidate;
    1365        1726 :                     if (victim->npages == npages)
    1366           0 :                         break;
    1367             :                 }
    1368        1726 :                 candidate = relptr_access(base, candidate->next);
    1369        1726 :             } while (candidate != NULL);
    1370             :         }
    1371        3036 :         break;
    1372             :     }
    1373             : 
    1374             :     /* If we didn't find an allocatable span, return failure. */
    1375        3036 :     if (victim == NULL)
    1376           0 :         return false;
    1377             : 
    1378             :     /* Remove span from free list. */
    1379             :     Assert(victim->magic == FREE_PAGE_SPAN_LEADER_MAGIC);
    1380        3036 :     prev = relptr_access(base, victim->prev);
    1381        3036 :     next = relptr_access(base, victim->next);
    1382        3036 :     if (prev != NULL)
    1383           0 :         relptr_copy(prev->next, victim->next);
    1384             :     else
    1385        3036 :         relptr_copy(fpm->freelist[f], victim->next);
    1386        3036 :     if (next != NULL)
    1387          36 :         relptr_copy(next->prev, victim->prev);
    1388        3036 :     victim_page = fpm_pointer_to_page(base, victim);
    1389             : 
    1390             :     /* Decide whether we might be invalidating contiguous_pages. */
    1391        4762 :     if (f == FPM_NUM_FREELISTS - 1 &&
    1392        1726 :         victim->npages == fpm->contiguous_pages)
    1393             :     {
    1394             :         /*
    1395             :          * The victim span came from the oversized freelist, and had the same
    1396             :          * size as the longest span.  There may or may not be another one of
    1397             :          * the same size, so contiguous_pages must be recomputed just to be
    1398             :          * safe.
    1399             :          */
    1400        1726 :         fpm->contiguous_pages_dirty = true;
    1401             :     }
    1402        1734 :     else if (f + 1 == fpm->contiguous_pages &&
    1403         424 :              relptr_is_null(fpm->freelist[f]))
    1404             :     {
    1405             :         /*
    1406             :          * The victim span came from a fixed sized freelist, and it was the
    1407             :          * list for spans of the same size as the current longest span, and
    1408             :          * the list is now empty after removing the victim.  So
    1409             :          * contiguous_pages must be recomputed without a doubt.
    1410             :          */
    1411         424 :         fpm->contiguous_pages_dirty = true;
    1412             :     }
    1413             : 
    1414             :     /*
    1415             :      * If we haven't initialized the btree yet, the victim must be the single
    1416             :      * span stored within the FreePageManager itself.  Otherwise, we need to
    1417             :      * update the btree.
    1418             :      */
    1419        3036 :     if (relptr_is_null(fpm->btree_root))
    1420             :     {
    1421             :         Assert(victim_page == fpm->singleton_first_page);
    1422             :         Assert(victim->npages == fpm->singleton_npages);
    1423             :         Assert(victim->npages >= npages);
    1424        1842 :         fpm->singleton_first_page += npages;
    1425        1842 :         fpm->singleton_npages -= npages;
    1426        1842 :         if (fpm->singleton_npages > 0)
    1427        1842 :             FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
    1428             :                                    fpm->singleton_npages);
    1429             :     }
    1430             :     else
    1431             :     {
    1432             :         /*
    1433             :          * If the span we found is exactly the right size, remove it from the
    1434             :          * btree completely.  Otherwise, adjust the btree entry to reflect the
    1435             :          * still-unallocated portion of the span, and put that portion on the
    1436             :          * appropriate free list.
    1437             :          */
    1438        1194 :         FreePageBtreeSearch(fpm, victim_page, &result);
    1439             :         Assert(result.found);
    1440        1194 :         if (victim->npages == npages)
    1441         232 :             FreePageBtreeRemove(fpm, result.page, result.index);
    1442             :         else
    1443             :         {
    1444             :             FreePageBtreeLeafKey *key;
    1445             : 
    1446             :             /* Adjust btree to reflect remaining pages. */
    1447             :             Assert(victim->npages > npages);
    1448         962 :             key = &result.page->u.leaf_key[result.index];
    1449             :             Assert(key->npages == victim->npages);
    1450         962 :             key->first_page += npages;
    1451         962 :             key->npages -= npages;
    1452         962 :             if (result.index == 0)
    1453         456 :                 FreePageBtreeAdjustAncestorKeys(fpm, result.page);
    1454             : 
    1455             :             /* Put the unallocated pages back on the appropriate free list. */
    1456         962 :             FreePagePushSpanLeader(fpm, victim_page + npages,
    1457         962 :                                    victim->npages - npages);
    1458             :         }
    1459             :     }
    1460             : 
    1461             :     /* Return results to caller. */
    1462        3036 :     *first_page = fpm_pointer_to_page(base, victim);
    1463        3036 :     return true;
    1464             : }
    1465             : 
    1466             : /*
    1467             :  * Put a range of pages into the btree and freelists, consolidating it with
    1468             :  * existing free spans just before and/or after it.  If 'soft' is true,
    1469             :  * only perform the insertion if it can be done without allocating new btree
    1470             :  * pages; if false, do it always.  Returns 0 if the soft flag caused the
    1471             :  * insertion to be skipped, or otherwise the size of the contiguous span
    1472             :  * created by the insertion.  This may be larger than npages if we're able
    1473             :  * to consolidate with an adjacent range.
    1474             :  */
    1475             : static Size
    1476        2754 : FreePageManagerPutInternal(FreePageManager *fpm, Size first_page, Size npages,
    1477             :                            bool soft)
    1478             : {
    1479        2754 :     char       *base = fpm_segment_base(fpm);
    1480             :     FreePageBtreeSearchResult result;
    1481        2754 :     FreePageBtreeLeafKey *prevkey = NULL;
    1482        2754 :     FreePageBtreeLeafKey *nextkey = NULL;
    1483             :     FreePageBtree *np;
    1484             :     Size        nindex;
    1485             : 
    1486             :     Assert(npages > 0);
    1487             : 
    1488             :     /* We can store a single free span without initializing the btree. */
    1489        2754 :     if (fpm->btree_depth == 0)
    1490             :     {
    1491        1160 :         if (fpm->singleton_npages == 0)
    1492             :         {
    1493             :             /* Don't have a span yet; store this one. */
    1494         144 :             fpm->singleton_first_page = first_page;
    1495         144 :             fpm->singleton_npages = npages;
    1496         144 :             FreePagePushSpanLeader(fpm, first_page, npages);
    1497         144 :             return fpm->singleton_npages;
    1498             :         }
    1499        1016 :         else if (fpm->singleton_first_page + fpm->singleton_npages ==
    1500             :                  first_page)
    1501             :         {
    1502             :             /* New span immediately follows sole existing span. */
    1503           0 :             fpm->singleton_npages += npages;
    1504           0 :             FreePagePopSpanLeader(fpm, fpm->singleton_first_page);
    1505           0 :             FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
    1506             :                                    fpm->singleton_npages);
    1507           0 :             return fpm->singleton_npages;
    1508             :         }
    1509        1016 :         else if (first_page + npages == fpm->singleton_first_page)
    1510             :         {
    1511             :             /* New span immediately precedes sole existing span. */
    1512         754 :             FreePagePopSpanLeader(fpm, fpm->singleton_first_page);
    1513         754 :             fpm->singleton_first_page = first_page;
    1514         754 :             fpm->singleton_npages += npages;
    1515         754 :             FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
    1516             :                                    fpm->singleton_npages);
    1517         754 :             return fpm->singleton_npages;
    1518             :         }
    1519             :         else
    1520             :         {
    1521             :             /* Not contiguous; we need to initialize the btree. */
    1522             :             Size        root_page;
    1523             :             FreePageBtree *root;
    1524             : 
    1525         262 :             if (!relptr_is_null(fpm->btree_recycle))
    1526          60 :                 root = FreePageBtreeGetRecycled(fpm);
    1527         202 :             else if (soft)
    1528         120 :                 return 0;       /* Should not allocate if soft. */
    1529         142 :             else if (FreePageManagerGetInternal(fpm, 1, &root_page))
    1530         142 :                 root = (FreePageBtree *) fpm_page_to_pointer(base, root_page);
    1531             :             else
    1532             :             {
    1533             :                 /* We'd better be able to get a page from the existing range. */
    1534           0 :                 elog(FATAL, "free page manager btree is corrupt");
    1535             :             }
    1536             : 
    1537             :             /* Create the btree and move the preexisting range into it. */
    1538         202 :             root->hdr.magic = FREE_PAGE_LEAF_MAGIC;
    1539         202 :             root->hdr.nused = 1;
    1540         202 :             relptr_store(base, root->hdr.parent, (FreePageBtree *) NULL);
    1541         202 :             root->u.leaf_key[0].first_page = fpm->singleton_first_page;
    1542         202 :             root->u.leaf_key[0].npages = fpm->singleton_npages;
    1543         202 :             relptr_store(base, fpm->btree_root, root);
    1544         202 :             fpm->singleton_first_page = 0;
    1545         202 :             fpm->singleton_npages = 0;
    1546         202 :             fpm->btree_depth = 1;
    1547             : 
    1548             :             /*
    1549             :              * Corner case: it may be that the btree root took the very last
    1550             :              * free page.  In that case, the sole btree entry covers a zero
    1551             :              * page run, which is invalid.  Overwrite it with the entry we're
    1552             :              * trying to insert and get out.
    1553             :              */
    1554         202 :             if (root->u.leaf_key[0].npages == 0)
    1555             :             {
    1556           0 :                 root->u.leaf_key[0].first_page = first_page;
    1557           0 :                 root->u.leaf_key[0].npages = npages;
    1558           0 :                 FreePagePushSpanLeader(fpm, first_page, npages);
    1559           0 :                 return npages;
    1560             :             }
    1561             : 
    1562             :             /* Fall through to insert the new key. */
    1563             :         }
    1564             :     }
    1565             : 
    1566             :     /* Search the btree. */
    1567        1796 :     FreePageBtreeSearch(fpm, first_page, &result);
    1568             :     Assert(!result.found);
    1569        1796 :     if (result.index > 0)
    1570        1044 :         prevkey = &result.page->u.leaf_key[result.index - 1];
    1571        1796 :     if (result.index < result.page->hdr.nused)
    1572             :     {
    1573        1796 :         np = result.page;
    1574        1796 :         nindex = result.index;
    1575        1796 :         nextkey = &result.page->u.leaf_key[result.index];
    1576             :     }
    1577             :     else
    1578             :     {
    1579           0 :         np = FreePageBtreeFindRightSibling(base, result.page);
    1580           0 :         nindex = 0;
    1581           0 :         if (np != NULL)
    1582           0 :             nextkey = &np->u.leaf_key[0];
    1583             :     }
    1584             : 
    1585             :     /* Consolidate with the previous entry if possible. */
    1586        1796 :     if (prevkey != NULL && prevkey->first_page + prevkey->npages >= first_page)
    1587             :     {
    1588         202 :         bool        remove_next = false;
    1589             :         Size        result;
    1590             : 
    1591             :         Assert(prevkey->first_page + prevkey->npages == first_page);
    1592         202 :         prevkey->npages = (first_page - prevkey->first_page) + npages;
    1593             : 
    1594             :         /* Check whether we can *also* consolidate with the following entry. */
    1595         404 :         if (nextkey != NULL &&
    1596         202 :             prevkey->first_page + prevkey->npages >= nextkey->first_page)
    1597             :         {
    1598             :             Assert(prevkey->first_page + prevkey->npages ==
    1599             :                    nextkey->first_page);
    1600         244 :             prevkey->npages = (nextkey->first_page - prevkey->first_page)
    1601         122 :                 + nextkey->npages;
    1602         122 :             FreePagePopSpanLeader(fpm, nextkey->first_page);
    1603         122 :             remove_next = true;
    1604             :         }
    1605             : 
    1606             :         /* Put the span on the correct freelist and save size. */
    1607         202 :         FreePagePopSpanLeader(fpm, prevkey->first_page);
    1608         202 :         FreePagePushSpanLeader(fpm, prevkey->first_page, prevkey->npages);
    1609         202 :         result = prevkey->npages;
    1610             : 
    1611             :         /*
    1612             :          * If we consolidated with both the preceding and following entries,
    1613             :          * we must remove the following entry.  We do this last, because
    1614             :          * removing an element from the btree may invalidate pointers we hold
    1615             :          * into the current data structure.
    1616             :          *
    1617             :          * NB: The btree is technically in an invalid state a this point
    1618             :          * because we've already updated prevkey to cover the same key space
    1619             :          * as nextkey.  FreePageBtreeRemove() shouldn't notice that, though.
    1620             :          */
    1621         202 :         if (remove_next)
    1622         122 :             FreePageBtreeRemove(fpm, np, nindex);
    1623             : 
    1624         202 :         return result;
    1625             :     }
    1626             : 
    1627             :     /* Consolidate with the next entry if possible. */
    1628        1594 :     if (nextkey != NULL && first_page + npages >= nextkey->first_page)
    1629             :     {
    1630             :         Size        newpages;
    1631             : 
    1632             :         /* Compute new size for span. */
    1633             :         Assert(first_page + npages == nextkey->first_page);
    1634        1122 :         newpages = (nextkey->first_page - first_page) + nextkey->npages;
    1635             : 
    1636             :         /* Put span on correct free list. */
    1637        1122 :         FreePagePopSpanLeader(fpm, nextkey->first_page);
    1638        1122 :         FreePagePushSpanLeader(fpm, first_page, newpages);
    1639             : 
    1640             :         /* Update key in place. */
    1641        1122 :         nextkey->first_page = first_page;
    1642        1122 :         nextkey->npages = newpages;
    1643             : 
    1644             :         /* If reducing first key on page, ancestors might need adjustment. */
    1645        1122 :         if (nindex == 0)
    1646         494 :             FreePageBtreeAdjustAncestorKeys(fpm, np);
    1647             : 
    1648        1122 :         return nextkey->npages;
    1649             :     }
    1650             : 
    1651             :     /* Split leaf page and as many of its ancestors as necessary. */
    1652         472 :     if (result.split_pages > 0)
    1653             :     {
    1654             :         /*
    1655             :          * NB: We could consider various coping strategies here to avoid a
    1656             :          * split; most obviously, if np != result.page, we could target that
    1657             :          * page instead.   More complicated shuffling strategies could be
    1658             :          * possible as well; basically, unless every single leaf page is 100%
    1659             :          * full, we can jam this key in there if we try hard enough.  It's
    1660             :          * unlikely that trying that hard is worthwhile, but it's possible we
    1661             :          * might need to make more than no effort.  For now, we just do the
    1662             :          * easy thing, which is nothing.
    1663             :          */
    1664             : 
    1665             :         /* If this is a soft insert, it's time to give up. */
    1666           0 :         if (soft)
    1667           0 :             return 0;
    1668             : 
    1669             :         /* Check whether we need to allocate more btree pages to split. */
    1670           0 :         if (result.split_pages > fpm->btree_recycle_count)
    1671             :         {
    1672             :             Size        pages_needed;
    1673             :             Size        recycle_page;
    1674             :             Size        i;
    1675             : 
    1676             :             /*
    1677             :              * Allocate the required number of pages and split each one in
    1678             :              * turn.  This should never fail, because if we've got enough
    1679             :              * spans of free pages kicking around that we need additional
    1680             :              * storage space just to remember them all, then we should
    1681             :              * certainly have enough to expand the btree, which should only
    1682             :              * ever use a tiny number of pages compared to the number under
    1683             :              * management.  If it does, something's badly screwed up.
    1684             :              */
    1685           0 :             pages_needed = result.split_pages - fpm->btree_recycle_count;
    1686           0 :             for (i = 0; i < pages_needed; ++i)
    1687             :             {
    1688           0 :                 if (!FreePageManagerGetInternal(fpm, 1, &recycle_page))
    1689           0 :                     elog(FATAL, "free page manager btree is corrupt");
    1690           0 :                 FreePageBtreeRecycle(fpm, recycle_page);
    1691             :             }
    1692             : 
    1693             :             /*
    1694             :              * The act of allocating pages to recycle may have invalidated the
    1695             :              * results of our previous btree reserch, so repeat it. (We could
    1696             :              * recheck whether any of our split-avoidance strategies that were
    1697             :              * not viable before now are, but it hardly seems worthwhile, so
    1698             :              * we don't bother. Consolidation can't be possible now if it
    1699             :              * wasn't previously.)
    1700             :              */
    1701           0 :             FreePageBtreeSearch(fpm, first_page, &result);
    1702             : 
    1703             :             /*
    1704             :              * The act of allocating pages for use in constructing our btree
    1705             :              * should never cause any page to become more full, so the new
    1706             :              * split depth should be no greater than the old one, and perhaps
    1707             :              * less if we fortuitously allocated a chunk that freed up a slot
    1708             :              * on the page we need to update.
    1709             :              */
    1710             :             Assert(result.split_pages <= fpm->btree_recycle_count);
    1711             :         }
    1712             : 
    1713             :         /* If we still need to perform a split, do it. */
    1714           0 :         if (result.split_pages > 0)
    1715             :         {
    1716           0 :             FreePageBtree *split_target = result.page;
    1717           0 :             FreePageBtree *child = NULL;
    1718           0 :             Size        key = first_page;
    1719             : 
    1720             :             for (;;)
    1721           0 :             {
    1722             :                 FreePageBtree *newsibling;
    1723             :                 FreePageBtree *parent;
    1724             : 
    1725             :                 /* Identify parent page, which must receive downlink. */
    1726           0 :                 parent = relptr_access(base, split_target->hdr.parent);
    1727             : 
    1728             :                 /* Split the page - downlink not added yet. */
    1729           0 :                 newsibling = FreePageBtreeSplitPage(fpm, split_target);
    1730             : 
    1731             :                 /*
    1732             :                  * At this point in the loop, we're always carrying a pending
    1733             :                  * insertion.  On the first pass, it's the actual key we're
    1734             :                  * trying to insert; on subsequent passes, it's the downlink
    1735             :                  * that needs to be added as a result of the split performed
    1736             :                  * during the previous loop iteration.  Since we've just split
    1737             :                  * the page, there's definitely room on one of the two
    1738             :                  * resulting pages.
    1739             :                  */
    1740           0 :                 if (child == NULL)
    1741             :                 {
    1742             :                     Size        index;
    1743             :                     FreePageBtree *insert_into;
    1744             : 
    1745           0 :                     insert_into = key < newsibling->u.leaf_key[0].first_page ?
    1746           0 :                         split_target : newsibling;
    1747           0 :                     index = FreePageBtreeSearchLeaf(insert_into, key);
    1748           0 :                     FreePageBtreeInsertLeaf(insert_into, index, key, npages);
    1749           0 :                     if (index == 0 && insert_into == split_target)
    1750           0 :                         FreePageBtreeAdjustAncestorKeys(fpm, split_target);
    1751             :                 }
    1752             :                 else
    1753             :                 {
    1754             :                     Size        index;
    1755             :                     FreePageBtree *insert_into;
    1756             : 
    1757           0 :                     insert_into =
    1758           0 :                         key < newsibling->u.internal_key[0].first_page ?
    1759           0 :                         split_target : newsibling;
    1760           0 :                     index = FreePageBtreeSearchInternal(insert_into, key);
    1761           0 :                     FreePageBtreeInsertInternal(base, insert_into, index,
    1762             :                                                 key, child);
    1763           0 :                     relptr_store(base, child->hdr.parent, insert_into);
    1764           0 :                     if (index == 0 && insert_into == split_target)
    1765           0 :                         FreePageBtreeAdjustAncestorKeys(fpm, split_target);
    1766             :                 }
    1767             : 
    1768             :                 /* If the page we just split has no parent, split the root. */
    1769           0 :                 if (parent == NULL)
    1770             :                 {
    1771             :                     FreePageBtree *newroot;
    1772             : 
    1773           0 :                     newroot = FreePageBtreeGetRecycled(fpm);
    1774           0 :                     newroot->hdr.magic = FREE_PAGE_INTERNAL_MAGIC;
    1775           0 :                     newroot->hdr.nused = 2;
    1776           0 :                     relptr_store(base, newroot->hdr.parent,
    1777             :                                  (FreePageBtree *) NULL);
    1778           0 :                     newroot->u.internal_key[0].first_page =
    1779           0 :                         FreePageBtreeFirstKey(split_target);
    1780           0 :                     relptr_store(base, newroot->u.internal_key[0].child,
    1781             :                                  split_target);
    1782           0 :                     relptr_store(base, split_target->hdr.parent, newroot);
    1783           0 :                     newroot->u.internal_key[1].first_page =
    1784           0 :                         FreePageBtreeFirstKey(newsibling);
    1785           0 :                     relptr_store(base, newroot->u.internal_key[1].child,
    1786             :                                  newsibling);
    1787           0 :                     relptr_store(base, newsibling->hdr.parent, newroot);
    1788           0 :                     relptr_store(base, fpm->btree_root, newroot);
    1789           0 :                     fpm->btree_depth++;
    1790             : 
    1791           0 :                     break;
    1792             :                 }
    1793             : 
    1794             :                 /* If the parent page isn't full, insert the downlink. */
    1795           0 :                 key = newsibling->u.internal_key[0].first_page;
    1796           0 :                 if (parent->hdr.nused < FPM_ITEMS_PER_INTERNAL_PAGE)
    1797             :                 {
    1798             :                     Size        index;
    1799             : 
    1800           0 :                     index = FreePageBtreeSearchInternal(parent, key);
    1801           0 :                     FreePageBtreeInsertInternal(base, parent, index,
    1802             :                                                 key, newsibling);
    1803           0 :                     relptr_store(base, newsibling->hdr.parent, parent);
    1804           0 :                     if (index == 0)
    1805           0 :                         FreePageBtreeAdjustAncestorKeys(fpm, parent);
    1806           0 :                     break;
    1807             :                 }
    1808             : 
    1809             :                 /* The parent also needs to be split, so loop around. */
    1810           0 :                 child = newsibling;
    1811           0 :                 split_target = parent;
    1812             :             }
    1813             : 
    1814             :             /*
    1815             :              * The loop above did the insert, so just need to update the free
    1816             :              * list, and we're done.
    1817             :              */
    1818           0 :             FreePagePushSpanLeader(fpm, first_page, npages);
    1819             : 
    1820           0 :             return npages;
    1821             :         }
    1822             :     }
    1823             : 
    1824             :     /* Physically add the key to the page. */
    1825             :     Assert(result.page->hdr.nused < FPM_ITEMS_PER_LEAF_PAGE);
    1826         472 :     FreePageBtreeInsertLeaf(result.page, result.index, first_page, npages);
    1827             : 
    1828             :     /* If new first key on page, ancestors might need adjustment. */
    1829         472 :     if (result.index == 0)
    1830         258 :         FreePageBtreeAdjustAncestorKeys(fpm, result.page);
    1831             : 
    1832             :     /* Put it on the free list. */
    1833         472 :     FreePagePushSpanLeader(fpm, first_page, npages);
    1834             : 
    1835         472 :     return npages;
    1836             : }
    1837             : 
    1838             : /*
    1839             :  * Remove a FreePageSpanLeader from the linked-list that contains it, either
    1840             :  * because we're changing the size of the span, or because we're allocating it.
    1841             :  */
    1842             : static void
    1843        2332 : FreePagePopSpanLeader(FreePageManager *fpm, Size pageno)
    1844             : {
    1845        2332 :     char       *base = fpm_segment_base(fpm);
    1846             :     FreePageSpanLeader *span;
    1847             :     FreePageSpanLeader *next;
    1848             :     FreePageSpanLeader *prev;
    1849             : 
    1850        2332 :     span = (FreePageSpanLeader *) fpm_page_to_pointer(base, pageno);
    1851             : 
    1852        2332 :     next = relptr_access(base, span->next);
    1853        2332 :     prev = relptr_access(base, span->prev);
    1854        2332 :     if (next != NULL)
    1855          98 :         relptr_copy(next->prev, span->prev);
    1856        2332 :     if (prev != NULL)
    1857           4 :         relptr_copy(prev->next, span->next);
    1858             :     else
    1859             :     {
    1860        2328 :         Size        f = Min(span->npages, FPM_NUM_FREELISTS) - 1;
    1861             : 
    1862             :         Assert(fpm->freelist[f].relptr_off == pageno * FPM_PAGE_SIZE);
    1863        2328 :         relptr_copy(fpm->freelist[f], span->next);
    1864             :     }
    1865        2332 : }
    1866             : 
    1867             : /*
    1868             :  * Initialize a new FreePageSpanLeader and put it on the appropriate free list.
    1869             :  */
    1870             : static void
    1871        5564 : FreePagePushSpanLeader(FreePageManager *fpm, Size first_page, Size npages)
    1872             : {
    1873        5564 :     char       *base = fpm_segment_base(fpm);
    1874        5564 :     Size        f = Min(npages, FPM_NUM_FREELISTS) - 1;
    1875        5564 :     FreePageSpanLeader *head = relptr_access(base, fpm->freelist[f]);
    1876             :     FreePageSpanLeader *span;
    1877             : 
    1878        5564 :     span = (FreePageSpanLeader *) fpm_page_to_pointer(base, first_page);
    1879        5564 :     span->magic = FREE_PAGE_SPAN_LEADER_MAGIC;
    1880        5564 :     span->npages = npages;
    1881        5564 :     relptr_store(base, span->next, head);
    1882        5564 :     relptr_store(base, span->prev, (FreePageSpanLeader *) NULL);
    1883        5564 :     if (head != NULL)
    1884         138 :         relptr_store(base, head->prev, span);
    1885        5564 :     relptr_store(base, fpm->freelist[f], span);
    1886        5564 : }

Generated by: LCOV version 1.13