Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * freepage.c
4 : * Management of free memory pages.
5 : *
6 : * The intention of this code is to provide infrastructure for memory
7 : * allocators written specifically for PostgreSQL. At least in the case
8 : * of dynamic shared memory, we can't simply use malloc() or even
9 : * relatively thin wrappers like palloc() which sit on top of it, because
10 : * no allocator built into the operating system will deal with relative
11 : * pointers. In the future, we may find other cases in which greater
12 : * control over our own memory management seems desirable.
13 : *
14 : * A FreePageManager keeps track of which 4kB pages of memory are currently
15 : * unused from the point of view of some higher-level memory allocator.
16 : * Unlike a user-facing allocator such as palloc(), a FreePageManager can
17 : * only allocate and free in units of whole pages, and freeing an
18 : * allocation can only be done given knowledge of its length in pages.
19 : *
20 : * Since a free page manager has only a fixed amount of dedicated memory,
21 : * and since there is no underlying allocator, it uses the free pages
22 : * it is given to manage to store its bookkeeping data. It keeps multiple
23 : * freelists of runs of pages, sorted by the size of the run; the head of
24 : * each freelist is stored in the FreePageManager itself, and the first
25 : * page of each run contains a relative pointer to the next run. See
26 : * FreePageManagerGetInternal for more details on how the freelists are
27 : * managed.
28 : *
29 : * To avoid memory fragmentation, it's important to consolidate adjacent
30 : * spans of pages whenever possible; otherwise, large allocation requests
31 : * might not be satisfied even when sufficient contiguous space is
32 : * available. Therefore, in addition to the freelists, we maintain an
33 : * in-memory btree of free page ranges ordered by page number. If a
34 : * range being freed precedes or follows a range that is already free,
35 : * the existing range is extended; if it exactly bridges the gap between
36 : * free ranges, then the two existing ranges are consolidated with the
37 : * newly-freed range to form one great big range of free pages.
38 : *
39 : * When there is only one range of free pages, the btree is trivial and
40 : * is stored within the FreePageManager proper; otherwise, pages are
41 : * allocated from the area under management as needed. Even in cases
42 : * where memory fragmentation is very severe, only a tiny fraction of
43 : * the pages under management are consumed by this btree.
44 : *
45 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
46 : * Portions Copyright (c) 1994, Regents of the University of California
47 : *
48 : * IDENTIFICATION
49 : * src/backend/utils/mmgr/freepage.c
50 : *
51 : *-------------------------------------------------------------------------
52 : */
53 :
54 : #include "postgres.h"
55 : #include "lib/stringinfo.h"
56 : #include "miscadmin.h"
57 :
58 : #include "utils/freepage.h"
59 : #include "utils/relptr.h"
60 :
61 :
62 : /* Magic numbers to identify various page types */
63 : #define FREE_PAGE_SPAN_LEADER_MAGIC 0xea4020f0
64 : #define FREE_PAGE_LEAF_MAGIC 0x98eae728
65 : #define FREE_PAGE_INTERNAL_MAGIC 0x19aa32c9
66 :
67 : /* Doubly linked list of spans of free pages; stored in first page of span. */
68 : struct FreePageSpanLeader
69 : {
70 : int magic; /* always FREE_PAGE_SPAN_LEADER_MAGIC */
71 : Size npages; /* number of pages in span */
72 : RelptrFreePageSpanLeader prev;
73 : RelptrFreePageSpanLeader next;
74 : };
75 :
76 : /* Common header for btree leaf and internal pages. */
77 : typedef struct FreePageBtreeHeader
78 : {
79 : int magic; /* FREE_PAGE_LEAF_MAGIC or
80 : * FREE_PAGE_INTERNAL_MAGIC */
81 : Size nused; /* number of items used */
82 : RelptrFreePageBtree parent; /* uplink */
83 : } FreePageBtreeHeader;
84 :
85 : /* Internal key; points to next level of btree. */
86 : typedef struct FreePageBtreeInternalKey
87 : {
88 : Size first_page; /* low bound for keys on child page */
89 : RelptrFreePageBtree child; /* downlink */
90 : } FreePageBtreeInternalKey;
91 :
92 : /* Leaf key; no payload data. */
93 : typedef struct FreePageBtreeLeafKey
94 : {
95 : Size first_page; /* first page in span */
96 : Size npages; /* number of pages in span */
97 : } FreePageBtreeLeafKey;
98 :
99 : /* Work out how many keys will fit on a page. */
100 : #define FPM_ITEMS_PER_INTERNAL_PAGE \
101 : ((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
102 : sizeof(FreePageBtreeInternalKey))
103 : #define FPM_ITEMS_PER_LEAF_PAGE \
104 : ((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
105 : sizeof(FreePageBtreeLeafKey))
106 :
107 : /* A btree page of either sort */
108 : struct FreePageBtree
109 : {
110 : FreePageBtreeHeader hdr;
111 : union
112 : {
113 : FreePageBtreeInternalKey internal_key[FPM_ITEMS_PER_INTERNAL_PAGE];
114 : FreePageBtreeLeafKey leaf_key[FPM_ITEMS_PER_LEAF_PAGE];
115 : } u;
116 : };
117 :
118 : /* Results of a btree search */
119 : typedef struct FreePageBtreeSearchResult
120 : {
121 : FreePageBtree *page;
122 : Size index;
123 : bool found;
124 : unsigned split_pages;
125 : } FreePageBtreeSearchResult;
126 :
127 : /* Helper functions */
128 : static void FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm,
129 : FreePageBtree *btp);
130 : static Size FreePageBtreeCleanup(FreePageManager *fpm);
131 : static FreePageBtree *FreePageBtreeFindLeftSibling(char *base,
132 : FreePageBtree *btp);
133 : static FreePageBtree *FreePageBtreeFindRightSibling(char *base,
134 : FreePageBtree *btp);
135 : static Size FreePageBtreeFirstKey(FreePageBtree *btp);
136 : static FreePageBtree *FreePageBtreeGetRecycled(FreePageManager *fpm);
137 : static void FreePageBtreeInsertInternal(char *base, FreePageBtree *btp,
138 : Size index, Size first_page, FreePageBtree *child);
139 : static void FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index,
140 : Size first_page, Size npages);
141 : static void FreePageBtreeRecycle(FreePageManager *fpm, Size pageno);
142 : static void FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp,
143 : Size index);
144 : static void FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp);
145 : static void FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
146 : FreePageBtreeSearchResult *result);
147 : static Size FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page);
148 : static Size FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page);
149 : static FreePageBtree *FreePageBtreeSplitPage(FreePageManager *fpm,
150 : FreePageBtree *btp);
151 : static void FreePageBtreeUpdateParentPointers(char *base, FreePageBtree *btp);
152 : static void FreePageManagerDumpBtree(FreePageManager *fpm, FreePageBtree *btp,
153 : FreePageBtree *parent, int level, StringInfo buf);
154 : static void FreePageManagerDumpSpans(FreePageManager *fpm,
155 : FreePageSpanLeader *span, Size expected_pages,
156 : StringInfo buf);
157 : static bool FreePageManagerGetInternal(FreePageManager *fpm, Size npages,
158 : Size *first_page);
159 : static Size FreePageManagerPutInternal(FreePageManager *fpm, Size first_page,
160 : Size npages, bool soft);
161 : static void FreePagePopSpanLeader(FreePageManager *fpm, Size pageno);
162 : static void FreePagePushSpanLeader(FreePageManager *fpm, Size first_page,
163 : Size npages);
164 : static Size FreePageManagerLargestContiguous(FreePageManager *fpm);
165 : static void FreePageManagerUpdateLargest(FreePageManager *fpm);
166 :
167 : #ifdef FPM_EXTRA_ASSERTS
168 : static Size sum_free_pages(FreePageManager *fpm);
169 : #endif
170 :
171 : /*
172 : * Initialize a new, empty free page manager.
173 : *
174 : * 'fpm' should reference caller-provided memory large enough to contain a
175 : * FreePageManager. We'll initialize it here.
176 : *
177 : * 'base' is the address to which all pointers are relative. When managing
178 : * a dynamic shared memory segment, it should normally be the base of the
179 : * segment. When managing backend-private memory, it can be either NULL or,
180 : * if managing a single contiguous extent of memory, the start of that extent.
181 : */
182 : void
183 4664 : FreePageManagerInitialize(FreePageManager *fpm, char *base)
184 : {
185 : Size f;
186 :
187 4664 : relptr_store(base, fpm->self, fpm);
188 4664 : relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
189 4664 : relptr_store(base, fpm->btree_recycle, (FreePageSpanLeader *) NULL);
190 4664 : fpm->btree_depth = 0;
191 4664 : fpm->btree_recycle_count = 0;
192 4664 : fpm->singleton_first_page = 0;
193 4664 : fpm->singleton_npages = 0;
194 4664 : fpm->contiguous_pages = 0;
195 4664 : fpm->contiguous_pages_dirty = true;
196 : #ifdef FPM_EXTRA_ASSERTS
197 : fpm->free_pages = 0;
198 : #endif
199 :
200 606320 : for (f = 0; f < FPM_NUM_FREELISTS; f++)
201 601656 : relptr_store(base, fpm->freelist[f], (FreePageSpanLeader *) NULL);
202 4664 : }
203 :
204 : /*
205 : * Allocate a run of pages of the given length from the free page manager.
206 : * The return value indicates whether we were able to satisfy the request;
207 : * if true, the first page of the allocation is stored in *first_page.
208 : */
209 : bool
210 24408 : FreePageManagerGet(FreePageManager *fpm, Size npages, Size *first_page)
211 : {
212 : bool result;
213 : Size contiguous_pages;
214 :
215 24408 : result = FreePageManagerGetInternal(fpm, npages, first_page);
216 :
217 : /*
218 : * It's a bit counterintuitive, but allocating pages can actually create
219 : * opportunities for cleanup that create larger ranges. We might pull a
220 : * key out of the btree that enables the item at the head of the btree
221 : * recycle list to be inserted; and then if there are more items behind it
222 : * one of those might cause two currently-separated ranges to merge,
223 : * creating a single range of contiguous pages larger than any that
224 : * existed previously. It might be worth trying to improve the cleanup
225 : * algorithm to avoid such corner cases, but for now we just notice the
226 : * condition and do the appropriate reporting.
227 : */
228 24408 : contiguous_pages = FreePageBtreeCleanup(fpm);
229 24408 : if (fpm->contiguous_pages < contiguous_pages)
230 86 : fpm->contiguous_pages = contiguous_pages;
231 :
232 : /*
233 : * FreePageManagerGetInternal may have set contiguous_pages_dirty.
234 : * Recompute contiguous_pages if so.
235 : */
236 24408 : FreePageManagerUpdateLargest(fpm);
237 :
238 : #ifdef FPM_EXTRA_ASSERTS
239 : if (result)
240 : {
241 : Assert(fpm->free_pages >= npages);
242 : fpm->free_pages -= npages;
243 : }
244 : Assert(fpm->free_pages == sum_free_pages(fpm));
245 : Assert(fpm->contiguous_pages == FreePageManagerLargestContiguous(fpm));
246 : #endif
247 24408 : return result;
248 : }
249 :
250 : #ifdef FPM_EXTRA_ASSERTS
251 : static void
252 : sum_free_pages_recurse(FreePageManager *fpm, FreePageBtree *btp, Size *sum)
253 : {
254 : char *base = fpm_segment_base(fpm);
255 :
256 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC ||
257 : btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
258 : ++*sum;
259 : if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
260 : {
261 : Size index;
262 :
263 :
264 : for (index = 0; index < btp->hdr.nused; ++index)
265 : {
266 : FreePageBtree *child;
267 :
268 : child = relptr_access(base, btp->u.internal_key[index].child);
269 : sum_free_pages_recurse(fpm, child, sum);
270 : }
271 : }
272 : }
273 : static Size
274 : sum_free_pages(FreePageManager *fpm)
275 : {
276 : FreePageSpanLeader *recycle;
277 : char *base = fpm_segment_base(fpm);
278 : Size sum = 0;
279 : int list;
280 :
281 : /* Count the spans by scanning the freelists. */
282 : for (list = 0; list < FPM_NUM_FREELISTS; ++list)
283 : {
284 :
285 : if (!relptr_is_null(fpm->freelist[list]))
286 : {
287 : FreePageSpanLeader *candidate =
288 : relptr_access(base, fpm->freelist[list]);
289 :
290 : do
291 : {
292 : sum += candidate->npages;
293 : candidate = relptr_access(base, candidate->next);
294 : } while (candidate != NULL);
295 : }
296 : }
297 :
298 : /* Count btree internal pages. */
299 : if (fpm->btree_depth > 0)
300 : {
301 : FreePageBtree *root = relptr_access(base, fpm->btree_root);
302 :
303 : sum_free_pages_recurse(fpm, root, &sum);
304 : }
305 :
306 : /* Count the recycle list. */
307 : for (recycle = relptr_access(base, fpm->btree_recycle);
308 : recycle != NULL;
309 : recycle = relptr_access(base, recycle->next))
310 : {
311 : Assert(recycle->npages == 1);
312 : ++sum;
313 : }
314 :
315 : return sum;
316 : }
317 : #endif
318 :
319 : /*
320 : * Compute the size of the largest run of pages that the user could
321 : * successfully get.
322 : */
323 : static Size
324 27330 : FreePageManagerLargestContiguous(FreePageManager *fpm)
325 : {
326 : char *base;
327 : Size largest;
328 :
329 27330 : base = fpm_segment_base(fpm);
330 27330 : largest = 0;
331 27330 : if (!relptr_is_null(fpm->freelist[FPM_NUM_FREELISTS - 1]))
332 : {
333 : FreePageSpanLeader *candidate;
334 :
335 15202 : candidate = relptr_access(base, fpm->freelist[FPM_NUM_FREELISTS - 1]);
336 : do
337 : {
338 15202 : if (candidate->npages > largest)
339 15202 : largest = candidate->npages;
340 15202 : candidate = relptr_access(base, candidate->next);
341 15202 : } while (candidate != NULL);
342 : }
343 : else
344 : {
345 12128 : Size f = FPM_NUM_FREELISTS - 1;
346 :
347 : do
348 : {
349 1033376 : --f;
350 1033376 : if (!relptr_is_null(fpm->freelist[f]))
351 : {
352 12128 : largest = f + 1;
353 12128 : break;
354 : }
355 1021248 : } while (f > 0);
356 : }
357 :
358 27330 : return largest;
359 : }
360 :
361 : /*
362 : * Recompute the size of the largest run of pages that the user could
363 : * successfully get, if it has been marked dirty.
364 : */
365 : static void
366 32750 : FreePageManagerUpdateLargest(FreePageManager *fpm)
367 : {
368 32750 : if (!fpm->contiguous_pages_dirty)
369 5420 : return;
370 :
371 27330 : fpm->contiguous_pages = FreePageManagerLargestContiguous(fpm);
372 27330 : fpm->contiguous_pages_dirty = false;
373 : }
374 :
375 : /*
376 : * Transfer a run of pages to the free page manager.
377 : */
378 : void
379 8342 : FreePageManagerPut(FreePageManager *fpm, Size first_page, Size npages)
380 : {
381 : Size contiguous_pages;
382 :
383 : Assert(npages > 0);
384 :
385 : /* Record the new pages. */
386 : contiguous_pages =
387 8342 : FreePageManagerPutInternal(fpm, first_page, npages, false);
388 :
389 : /*
390 : * If the new range we inserted into the page manager was contiguous with
391 : * an existing range, it may have opened up cleanup opportunities.
392 : */
393 8342 : if (contiguous_pages > npages)
394 : {
395 : Size cleanup_contiguous_pages;
396 :
397 3512 : cleanup_contiguous_pages = FreePageBtreeCleanup(fpm);
398 3512 : if (cleanup_contiguous_pages > contiguous_pages)
399 120 : contiguous_pages = cleanup_contiguous_pages;
400 : }
401 :
402 : /* See if we now have a new largest chunk. */
403 8342 : if (fpm->contiguous_pages < contiguous_pages)
404 6016 : fpm->contiguous_pages = contiguous_pages;
405 :
406 : /*
407 : * The earlier call to FreePageManagerPutInternal may have set
408 : * contiguous_pages_dirty if it needed to allocate internal pages, so
409 : * recompute contiguous_pages if necessary.
410 : */
411 8342 : FreePageManagerUpdateLargest(fpm);
412 :
413 : #ifdef FPM_EXTRA_ASSERTS
414 : fpm->free_pages += npages;
415 : Assert(fpm->free_pages == sum_free_pages(fpm));
416 : Assert(fpm->contiguous_pages == FreePageManagerLargestContiguous(fpm));
417 : #endif
418 8342 : }
419 :
420 : /*
421 : * Produce a debugging dump of the state of a free page manager.
422 : */
423 : char *
424 0 : FreePageManagerDump(FreePageManager *fpm)
425 : {
426 0 : char *base = fpm_segment_base(fpm);
427 : StringInfoData buf;
428 : FreePageSpanLeader *recycle;
429 0 : bool dumped_any_freelist = false;
430 : Size f;
431 :
432 : /* Initialize output buffer. */
433 0 : initStringInfo(&buf);
434 :
435 : /* Dump general stuff. */
436 0 : appendStringInfo(&buf, "metadata: self %zu max contiguous pages = %zu\n",
437 0 : relptr_offset(fpm->self), fpm->contiguous_pages);
438 :
439 : /* Dump btree. */
440 0 : if (fpm->btree_depth > 0)
441 : {
442 : FreePageBtree *root;
443 :
444 0 : appendStringInfo(&buf, "btree depth %u:\n", fpm->btree_depth);
445 0 : root = relptr_access(base, fpm->btree_root);
446 0 : FreePageManagerDumpBtree(fpm, root, NULL, 0, &buf);
447 : }
448 0 : else if (fpm->singleton_npages > 0)
449 : {
450 0 : appendStringInfo(&buf, "singleton: %zu(%zu)\n",
451 : fpm->singleton_first_page, fpm->singleton_npages);
452 : }
453 :
454 : /* Dump btree recycle list. */
455 0 : recycle = relptr_access(base, fpm->btree_recycle);
456 0 : if (recycle != NULL)
457 : {
458 0 : appendStringInfoString(&buf, "btree recycle:");
459 0 : FreePageManagerDumpSpans(fpm, recycle, 1, &buf);
460 : }
461 :
462 : /* Dump free lists. */
463 0 : for (f = 0; f < FPM_NUM_FREELISTS; ++f)
464 : {
465 : FreePageSpanLeader *span;
466 :
467 0 : if (relptr_is_null(fpm->freelist[f]))
468 0 : continue;
469 0 : if (!dumped_any_freelist)
470 : {
471 0 : appendStringInfoString(&buf, "freelists:\n");
472 0 : dumped_any_freelist = true;
473 : }
474 0 : appendStringInfo(&buf, " %zu:", f + 1);
475 0 : span = relptr_access(base, fpm->freelist[f]);
476 0 : FreePageManagerDumpSpans(fpm, span, f + 1, &buf);
477 : }
478 :
479 : /* And return result to caller. */
480 0 : return buf.data;
481 : }
482 :
483 :
484 : /*
485 : * The first_page value stored at index zero in any non-root page must match
486 : * the first_page value stored in its parent at the index which points to that
487 : * page. So when the value stored at index zero in a btree page changes, we've
488 : * got to walk up the tree adjusting ancestor keys until we reach an ancestor
489 : * where that key isn't index zero. This function should be called after
490 : * updating the first key on the target page; it will propagate the change
491 : * upward as far as needed.
492 : *
493 : * We assume here that the first key on the page has not changed enough to
494 : * require changes in the ordering of keys on its ancestor pages. Thus,
495 : * if we search the parent page for the first key greater than or equal to
496 : * the first key on the current page, the downlink to this page will be either
497 : * the exact index returned by the search (if the first key decreased)
498 : * or one less (if the first key increased).
499 : */
500 : static void
501 1882 : FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm, FreePageBtree *btp)
502 : {
503 1882 : char *base = fpm_segment_base(fpm);
504 : Size first_page;
505 : FreePageBtree *parent;
506 : FreePageBtree *child;
507 :
508 : /* This might be either a leaf or an internal page. */
509 : Assert(btp->hdr.nused > 0);
510 1882 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
511 : {
512 : Assert(btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
513 1882 : first_page = btp->u.leaf_key[0].first_page;
514 : }
515 : else
516 : {
517 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
518 : Assert(btp->hdr.nused <= FPM_ITEMS_PER_INTERNAL_PAGE);
519 0 : first_page = btp->u.internal_key[0].first_page;
520 : }
521 1882 : child = btp;
522 :
523 : /* Loop until we find an ancestor that does not require adjustment. */
524 : for (;;)
525 0 : {
526 : Size s;
527 :
528 1882 : parent = relptr_access(base, child->hdr.parent);
529 1882 : if (parent == NULL)
530 1882 : break;
531 0 : s = FreePageBtreeSearchInternal(parent, first_page);
532 :
533 : /* Key is either at index s or index s-1; figure out which. */
534 0 : if (s >= parent->hdr.nused)
535 : {
536 : Assert(s == parent->hdr.nused);
537 0 : --s;
538 : }
539 : else
540 : {
541 : FreePageBtree *check;
542 :
543 0 : check = relptr_access(base, parent->u.internal_key[s].child);
544 0 : if (check != child)
545 : {
546 : Assert(s > 0);
547 0 : --s;
548 : }
549 : }
550 :
551 : #ifdef USE_ASSERT_CHECKING
552 : /* Debugging double-check. */
553 : {
554 : FreePageBtree *check;
555 :
556 : check = relptr_access(base, parent->u.internal_key[s].child);
557 : Assert(s < parent->hdr.nused);
558 : Assert(child == check);
559 : }
560 : #endif
561 :
562 : /* Update the parent key. */
563 0 : parent->u.internal_key[s].first_page = first_page;
564 :
565 : /*
566 : * If this is the first key in the parent, go up another level; else
567 : * done.
568 : */
569 0 : if (s > 0)
570 0 : break;
571 0 : child = parent;
572 : }
573 1882 : }
574 :
575 : /*
576 : * Attempt to reclaim space from the free-page btree. The return value is
577 : * the largest range of contiguous pages created by the cleanup operation.
578 : */
579 : static Size
580 27920 : FreePageBtreeCleanup(FreePageManager *fpm)
581 : {
582 27920 : char *base = fpm_segment_base(fpm);
583 27920 : Size max_contiguous_pages = 0;
584 :
585 : /* Attempt to shrink the depth of the btree. */
586 28106 : while (!relptr_is_null(fpm->btree_root))
587 : {
588 3634 : FreePageBtree *root = relptr_access(base, fpm->btree_root);
589 :
590 : /* If the root contains only one key, reduce depth by one. */
591 3634 : if (root->hdr.nused == 1)
592 : {
593 : /* Shrink depth of tree by one. */
594 : Assert(fpm->btree_depth > 0);
595 186 : --fpm->btree_depth;
596 186 : if (root->hdr.magic == FREE_PAGE_LEAF_MAGIC)
597 : {
598 : /* If root is a leaf, convert only entry to singleton range. */
599 186 : relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
600 186 : fpm->singleton_first_page = root->u.leaf_key[0].first_page;
601 186 : fpm->singleton_npages = root->u.leaf_key[0].npages;
602 : }
603 : else
604 : {
605 : FreePageBtree *newroot;
606 :
607 : /* If root is an internal page, make only child the root. */
608 : Assert(root->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
609 0 : relptr_copy(fpm->btree_root, root->u.internal_key[0].child);
610 0 : newroot = relptr_access(base, fpm->btree_root);
611 0 : relptr_store(base, newroot->hdr.parent, (FreePageBtree *) NULL);
612 : }
613 186 : FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, root));
614 : }
615 3448 : else if (root->hdr.nused == 2 &&
616 1790 : root->hdr.magic == FREE_PAGE_LEAF_MAGIC)
617 : {
618 : Size end_of_first;
619 : Size start_of_second;
620 :
621 1790 : end_of_first = root->u.leaf_key[0].first_page +
622 1790 : root->u.leaf_key[0].npages;
623 1790 : start_of_second = root->u.leaf_key[1].first_page;
624 :
625 1790 : if (end_of_first + 1 == start_of_second)
626 : {
627 108 : Size root_page = fpm_pointer_to_page(base, root);
628 :
629 108 : if (end_of_first == root_page)
630 : {
631 108 : FreePagePopSpanLeader(fpm, root->u.leaf_key[0].first_page);
632 108 : FreePagePopSpanLeader(fpm, root->u.leaf_key[1].first_page);
633 108 : fpm->singleton_first_page = root->u.leaf_key[0].first_page;
634 108 : fpm->singleton_npages = root->u.leaf_key[0].npages +
635 108 : root->u.leaf_key[1].npages + 1;
636 108 : fpm->btree_depth = 0;
637 108 : relptr_store(base, fpm->btree_root,
638 : (FreePageBtree *) NULL);
639 108 : FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
640 : fpm->singleton_npages);
641 : Assert(max_contiguous_pages == 0);
642 108 : max_contiguous_pages = fpm->singleton_npages;
643 : }
644 : }
645 :
646 : /* Whether it worked or not, it's time to stop. */
647 1790 : break;
648 : }
649 : else
650 : {
651 : /* Nothing more to do. Stop. */
652 : break;
653 : }
654 : }
655 :
656 : /*
657 : * Attempt to free recycled btree pages. We skip this if releasing the
658 : * recycled page would require a btree page split, because the page we're
659 : * trying to recycle would be consumed by the split, which would be
660 : * counterproductive.
661 : *
662 : * We also currently only ever attempt to recycle the first page on the
663 : * list; that could be made more aggressive, but it's not clear that the
664 : * complexity would be worthwhile.
665 : */
666 28018 : while (fpm->btree_recycle_count > 0)
667 : {
668 : FreePageBtree *btp;
669 : Size first_page;
670 : Size contiguous_pages;
671 :
672 186 : btp = FreePageBtreeGetRecycled(fpm);
673 186 : first_page = fpm_pointer_to_page(base, btp);
674 186 : contiguous_pages = FreePageManagerPutInternal(fpm, first_page, 1, true);
675 186 : if (contiguous_pages == 0)
676 : {
677 88 : FreePageBtreeRecycle(fpm, first_page);
678 88 : break;
679 : }
680 : else
681 : {
682 98 : if (contiguous_pages > max_contiguous_pages)
683 98 : max_contiguous_pages = contiguous_pages;
684 : }
685 : }
686 :
687 27920 : return max_contiguous_pages;
688 : }
689 :
690 : /*
691 : * Consider consolidating the given page with its left or right sibling,
692 : * if it's fairly empty.
693 : */
694 : static void
695 648 : FreePageBtreeConsolidate(FreePageManager *fpm, FreePageBtree *btp)
696 : {
697 648 : char *base = fpm_segment_base(fpm);
698 : FreePageBtree *np;
699 : Size max;
700 :
701 : /*
702 : * We only try to consolidate pages that are less than a third full. We
703 : * could be more aggressive about this, but that might risk performing
704 : * consolidation only to end up splitting again shortly thereafter. Since
705 : * the btree should be very small compared to the space under management,
706 : * our goal isn't so much to ensure that it always occupies the absolutely
707 : * smallest possible number of pages as to reclaim pages before things get
708 : * too egregiously out of hand.
709 : */
710 648 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
711 648 : max = FPM_ITEMS_PER_LEAF_PAGE;
712 : else
713 : {
714 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
715 0 : max = FPM_ITEMS_PER_INTERNAL_PAGE;
716 : }
717 648 : if (btp->hdr.nused >= max / 3)
718 0 : return;
719 :
720 : /*
721 : * If we can fit our right sibling's keys onto this page, consolidate.
722 : */
723 648 : np = FreePageBtreeFindRightSibling(base, btp);
724 648 : if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
725 : {
726 0 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
727 : {
728 0 : memcpy(&btp->u.leaf_key[btp->hdr.nused], &np->u.leaf_key[0],
729 0 : sizeof(FreePageBtreeLeafKey) * np->hdr.nused);
730 0 : btp->hdr.nused += np->hdr.nused;
731 : }
732 : else
733 : {
734 0 : memcpy(&btp->u.internal_key[btp->hdr.nused], &np->u.internal_key[0],
735 0 : sizeof(FreePageBtreeInternalKey) * np->hdr.nused);
736 0 : btp->hdr.nused += np->hdr.nused;
737 0 : FreePageBtreeUpdateParentPointers(base, btp);
738 : }
739 0 : FreePageBtreeRemovePage(fpm, np);
740 0 : return;
741 : }
742 :
743 : /*
744 : * If we can fit our keys onto our left sibling's page, consolidate. In
745 : * this case, we move our keys onto the other page rather than vice versa,
746 : * to avoid having to adjust ancestor keys.
747 : */
748 648 : np = FreePageBtreeFindLeftSibling(base, btp);
749 648 : if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
750 : {
751 0 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
752 : {
753 0 : memcpy(&np->u.leaf_key[np->hdr.nused], &btp->u.leaf_key[0],
754 0 : sizeof(FreePageBtreeLeafKey) * btp->hdr.nused);
755 0 : np->hdr.nused += btp->hdr.nused;
756 : }
757 : else
758 : {
759 0 : memcpy(&np->u.internal_key[np->hdr.nused], &btp->u.internal_key[0],
760 0 : sizeof(FreePageBtreeInternalKey) * btp->hdr.nused);
761 0 : np->hdr.nused += btp->hdr.nused;
762 0 : FreePageBtreeUpdateParentPointers(base, np);
763 : }
764 0 : FreePageBtreeRemovePage(fpm, btp);
765 0 : return;
766 : }
767 : }
768 :
769 : /*
770 : * Find the passed page's left sibling; that is, the page at the same level
771 : * of the tree whose keyspace immediately precedes ours.
772 : */
773 : static FreePageBtree *
774 648 : FreePageBtreeFindLeftSibling(char *base, FreePageBtree *btp)
775 : {
776 648 : FreePageBtree *p = btp;
777 648 : int levels = 0;
778 :
779 : /* Move up until we can move left. */
780 : for (;;)
781 0 : {
782 : Size first_page;
783 : Size index;
784 :
785 648 : first_page = FreePageBtreeFirstKey(p);
786 648 : p = relptr_access(base, p->hdr.parent);
787 :
788 648 : if (p == NULL)
789 648 : return NULL; /* we were passed the rightmost page */
790 :
791 0 : index = FreePageBtreeSearchInternal(p, first_page);
792 0 : if (index > 0)
793 : {
794 : Assert(p->u.internal_key[index].first_page == first_page);
795 0 : p = relptr_access(base, p->u.internal_key[index - 1].child);
796 0 : break;
797 : }
798 : Assert(index == 0);
799 0 : ++levels;
800 : }
801 :
802 : /* Descend left. */
803 0 : while (levels > 0)
804 : {
805 : Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
806 0 : p = relptr_access(base, p->u.internal_key[p->hdr.nused - 1].child);
807 0 : --levels;
808 : }
809 : Assert(p->hdr.magic == btp->hdr.magic);
810 :
811 0 : return p;
812 : }
813 :
814 : /*
815 : * Find the passed page's right sibling; that is, the page at the same level
816 : * of the tree whose keyspace immediately follows ours.
817 : */
818 : static FreePageBtree *
819 660 : FreePageBtreeFindRightSibling(char *base, FreePageBtree *btp)
820 : {
821 660 : FreePageBtree *p = btp;
822 660 : int levels = 0;
823 :
824 : /* Move up until we can move right. */
825 : for (;;)
826 0 : {
827 : Size first_page;
828 : Size index;
829 :
830 660 : first_page = FreePageBtreeFirstKey(p);
831 660 : p = relptr_access(base, p->hdr.parent);
832 :
833 660 : if (p == NULL)
834 660 : return NULL; /* we were passed the rightmost page */
835 :
836 0 : index = FreePageBtreeSearchInternal(p, first_page);
837 0 : if (index < p->hdr.nused - 1)
838 : {
839 : Assert(p->u.internal_key[index].first_page == first_page);
840 0 : p = relptr_access(base, p->u.internal_key[index + 1].child);
841 0 : break;
842 : }
843 : Assert(index == p->hdr.nused - 1);
844 0 : ++levels;
845 : }
846 :
847 : /* Descend left. */
848 0 : while (levels > 0)
849 : {
850 : Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
851 0 : p = relptr_access(base, p->u.internal_key[0].child);
852 0 : --levels;
853 : }
854 : Assert(p->hdr.magic == btp->hdr.magic);
855 :
856 0 : return p;
857 : }
858 :
859 : /*
860 : * Get the first key on a btree page.
861 : */
862 : static Size
863 1308 : FreePageBtreeFirstKey(FreePageBtree *btp)
864 : {
865 : Assert(btp->hdr.nused > 0);
866 :
867 1308 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
868 1308 : return btp->u.leaf_key[0].first_page;
869 : else
870 : {
871 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
872 0 : return btp->u.internal_key[0].first_page;
873 : }
874 : }
875 :
876 : /*
877 : * Get a page from the btree recycle list for use as a btree page.
878 : */
879 : static FreePageBtree *
880 274 : FreePageBtreeGetRecycled(FreePageManager *fpm)
881 : {
882 274 : char *base = fpm_segment_base(fpm);
883 274 : FreePageSpanLeader *victim = relptr_access(base, fpm->btree_recycle);
884 : FreePageSpanLeader *newhead;
885 :
886 : Assert(victim != NULL);
887 274 : newhead = relptr_access(base, victim->next);
888 274 : if (newhead != NULL)
889 0 : relptr_copy(newhead->prev, victim->prev);
890 274 : relptr_store(base, fpm->btree_recycle, newhead);
891 : Assert(fpm_pointer_is_page_aligned(base, victim));
892 274 : fpm->btree_recycle_count--;
893 274 : return (FreePageBtree *) victim;
894 : }
895 :
896 : /*
897 : * Insert an item into an internal page.
898 : */
899 : static void
900 0 : FreePageBtreeInsertInternal(char *base, FreePageBtree *btp, Size index,
901 : Size first_page, FreePageBtree *child)
902 : {
903 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
904 : Assert(btp->hdr.nused <= FPM_ITEMS_PER_INTERNAL_PAGE);
905 : Assert(index <= btp->hdr.nused);
906 0 : memmove(&btp->u.internal_key[index + 1], &btp->u.internal_key[index],
907 0 : sizeof(FreePageBtreeInternalKey) * (btp->hdr.nused - index));
908 0 : btp->u.internal_key[index].first_page = first_page;
909 0 : relptr_store(base, btp->u.internal_key[index].child, child);
910 0 : ++btp->hdr.nused;
911 0 : }
912 :
913 : /*
914 : * Insert an item into a leaf page.
915 : */
916 : static void
917 844 : FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index, Size first_page,
918 : Size npages)
919 : {
920 : Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
921 : Assert(btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
922 : Assert(index <= btp->hdr.nused);
923 844 : memmove(&btp->u.leaf_key[index + 1], &btp->u.leaf_key[index],
924 844 : sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
925 844 : btp->u.leaf_key[index].first_page = first_page;
926 844 : btp->u.leaf_key[index].npages = npages;
927 844 : ++btp->hdr.nused;
928 844 : }
929 :
930 : /*
931 : * Put a page on the btree recycle list.
932 : */
933 : static void
934 274 : FreePageBtreeRecycle(FreePageManager *fpm, Size pageno)
935 : {
936 274 : char *base = fpm_segment_base(fpm);
937 274 : FreePageSpanLeader *head = relptr_access(base, fpm->btree_recycle);
938 : FreePageSpanLeader *span;
939 :
940 274 : span = (FreePageSpanLeader *) fpm_page_to_pointer(base, pageno);
941 274 : span->magic = FREE_PAGE_SPAN_LEADER_MAGIC;
942 274 : span->npages = 1;
943 274 : relptr_store(base, span->next, head);
944 274 : relptr_store(base, span->prev, (FreePageSpanLeader *) NULL);
945 274 : if (head != NULL)
946 0 : relptr_store(base, head->prev, span);
947 274 : relptr_store(base, fpm->btree_recycle, span);
948 274 : fpm->btree_recycle_count++;
949 274 : }
950 :
951 : /*
952 : * Remove an item from the btree at the given position on the given page.
953 : */
954 : static void
955 648 : FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp, Size index)
956 : {
957 : Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
958 : Assert(index < btp->hdr.nused);
959 :
960 : /* When last item is removed, extirpate entire page from btree. */
961 648 : if (btp->hdr.nused == 1)
962 : {
963 0 : FreePageBtreeRemovePage(fpm, btp);
964 0 : return;
965 : }
966 :
967 : /* Physically remove the key from the page. */
968 648 : --btp->hdr.nused;
969 648 : if (index < btp->hdr.nused)
970 648 : memmove(&btp->u.leaf_key[index], &btp->u.leaf_key[index + 1],
971 648 : sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
972 :
973 : /* If we just removed the first key, adjust ancestor keys. */
974 648 : if (index == 0)
975 252 : FreePageBtreeAdjustAncestorKeys(fpm, btp);
976 :
977 : /* Consider whether to consolidate this page with a sibling. */
978 648 : FreePageBtreeConsolidate(fpm, btp);
979 : }
980 :
981 : /*
982 : * Remove a page from the btree. Caller is responsible for having relocated
983 : * any keys from this page that are still wanted. The page is placed on the
984 : * recycled list.
985 : */
986 : static void
987 0 : FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp)
988 : {
989 0 : char *base = fpm_segment_base(fpm);
990 : FreePageBtree *parent;
991 : Size index;
992 : Size first_page;
993 :
994 : for (;;)
995 : {
996 : /* Find parent page. */
997 0 : parent = relptr_access(base, btp->hdr.parent);
998 0 : if (parent == NULL)
999 : {
1000 : /* We are removing the root page. */
1001 0 : relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
1002 0 : fpm->btree_depth = 0;
1003 : Assert(fpm->singleton_first_page == 0);
1004 : Assert(fpm->singleton_npages == 0);
1005 0 : return;
1006 : }
1007 :
1008 : /*
1009 : * If the parent contains only one item, we need to remove it as well.
1010 : */
1011 0 : if (parent->hdr.nused > 1)
1012 0 : break;
1013 0 : FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, btp));
1014 0 : btp = parent;
1015 : }
1016 :
1017 : /* Find and remove the downlink. */
1018 0 : first_page = FreePageBtreeFirstKey(btp);
1019 0 : if (parent->hdr.magic == FREE_PAGE_LEAF_MAGIC)
1020 : {
1021 0 : index = FreePageBtreeSearchLeaf(parent, first_page);
1022 : Assert(index < parent->hdr.nused);
1023 0 : if (index < parent->hdr.nused - 1)
1024 0 : memmove(&parent->u.leaf_key[index],
1025 0 : &parent->u.leaf_key[index + 1],
1026 : sizeof(FreePageBtreeLeafKey)
1027 0 : * (parent->hdr.nused - index - 1));
1028 : }
1029 : else
1030 : {
1031 0 : index = FreePageBtreeSearchInternal(parent, first_page);
1032 : Assert(index < parent->hdr.nused);
1033 0 : if (index < parent->hdr.nused - 1)
1034 0 : memmove(&parent->u.internal_key[index],
1035 0 : &parent->u.internal_key[index + 1],
1036 : sizeof(FreePageBtreeInternalKey)
1037 0 : * (parent->hdr.nused - index - 1));
1038 : }
1039 0 : parent->hdr.nused--;
1040 : Assert(parent->hdr.nused > 0);
1041 :
1042 : /* Recycle the page. */
1043 0 : FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, btp));
1044 :
1045 : /* Adjust ancestor keys if needed. */
1046 0 : if (index == 0)
1047 0 : FreePageBtreeAdjustAncestorKeys(fpm, parent);
1048 :
1049 : /* Consider whether to consolidate the parent with a sibling. */
1050 0 : FreePageBtreeConsolidate(fpm, parent);
1051 : }
1052 :
1053 : /*
1054 : * Search the btree for an entry for the given first page and initialize
1055 : * *result with the results of the search. result->page and result->index
1056 : * indicate either the position of an exact match or the position at which
1057 : * the new key should be inserted. result->found is true for an exact match,
1058 : * otherwise false. result->split_pages will contain the number of additional
1059 : * btree pages that will be needed when performing a split to insert a key.
1060 : * Except as described above, the contents of fields in the result object are
1061 : * undefined on return.
1062 : */
1063 : static void
1064 4478 : FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
1065 : FreePageBtreeSearchResult *result)
1066 : {
1067 4478 : char *base = fpm_segment_base(fpm);
1068 4478 : FreePageBtree *btp = relptr_access(base, fpm->btree_root);
1069 : Size index;
1070 :
1071 4478 : result->split_pages = 1;
1072 :
1073 : /* If the btree is empty, there's nothing to find. */
1074 4478 : if (btp == NULL)
1075 : {
1076 0 : result->page = NULL;
1077 0 : result->found = false;
1078 0 : return;
1079 : }
1080 :
1081 : /* Descend until we hit a leaf. */
1082 4478 : while (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
1083 : {
1084 : FreePageBtree *child;
1085 : bool found_exact;
1086 :
1087 0 : index = FreePageBtreeSearchInternal(btp, first_page);
1088 0 : found_exact = index < btp->hdr.nused &&
1089 0 : btp->u.internal_key[index].first_page == first_page;
1090 :
1091 : /*
1092 : * If we found an exact match we descend directly. Otherwise, we
1093 : * descend into the child to the left if possible so that we can find
1094 : * the insertion point at that child's high end.
1095 : */
1096 0 : if (!found_exact && index > 0)
1097 0 : --index;
1098 :
1099 : /* Track required split depth for leaf insert. */
1100 0 : if (btp->hdr.nused >= FPM_ITEMS_PER_INTERNAL_PAGE)
1101 : {
1102 : Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE);
1103 0 : result->split_pages++;
1104 : }
1105 : else
1106 0 : result->split_pages = 0;
1107 :
1108 : /* Descend to appropriate child page. */
1109 : Assert(index < btp->hdr.nused);
1110 0 : child = relptr_access(base, btp->u.internal_key[index].child);
1111 : Assert(relptr_access(base, child->hdr.parent) == btp);
1112 0 : btp = child;
1113 : }
1114 :
1115 : /* Track required split depth for leaf insert. */
1116 4478 : if (btp->hdr.nused >= FPM_ITEMS_PER_LEAF_PAGE)
1117 : {
1118 : Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE);
1119 0 : result->split_pages++;
1120 : }
1121 : else
1122 4478 : result->split_pages = 0;
1123 :
1124 : /* Search leaf page. */
1125 4478 : index = FreePageBtreeSearchLeaf(btp, first_page);
1126 :
1127 : /* Assemble results. */
1128 4478 : result->page = btp;
1129 4478 : result->index = index;
1130 8944 : result->found = index < btp->hdr.nused &&
1131 4466 : first_page == btp->u.leaf_key[index].first_page;
1132 : }
1133 :
1134 : /*
1135 : * Search an internal page for the first key greater than or equal to a given
1136 : * page number. Returns the index of that key, or one greater than the number
1137 : * of keys on the page if none.
1138 : */
1139 : static Size
1140 0 : FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page)
1141 : {
1142 0 : Size low = 0;
1143 0 : Size high = btp->hdr.nused;
1144 :
1145 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
1146 : Assert(high > 0 && high <= FPM_ITEMS_PER_INTERNAL_PAGE);
1147 :
1148 0 : while (low < high)
1149 : {
1150 0 : Size mid = (low + high) / 2;
1151 0 : Size val = btp->u.internal_key[mid].first_page;
1152 :
1153 0 : if (first_page == val)
1154 0 : return mid;
1155 0 : else if (first_page < val)
1156 0 : high = mid;
1157 : else
1158 0 : low = mid + 1;
1159 : }
1160 :
1161 0 : return low;
1162 : }
1163 :
1164 : /*
1165 : * Search a leaf page for the first key greater than or equal to a given
1166 : * page number. Returns the index of that key, or one greater than the number
1167 : * of keys on the page if none.
1168 : */
1169 : static Size
1170 4478 : FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page)
1171 : {
1172 4478 : Size low = 0;
1173 4478 : Size high = btp->hdr.nused;
1174 :
1175 : Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
1176 : Assert(high > 0 && high <= FPM_ITEMS_PER_LEAF_PAGE);
1177 :
1178 10846 : while (low < high)
1179 : {
1180 8158 : Size mid = (low + high) / 2;
1181 8158 : Size val = btp->u.leaf_key[mid].first_page;
1182 :
1183 8158 : if (first_page == val)
1184 1790 : return mid;
1185 6368 : else if (first_page < val)
1186 4668 : high = mid;
1187 : else
1188 1700 : low = mid + 1;
1189 : }
1190 :
1191 2688 : return low;
1192 : }
1193 :
1194 : /*
1195 : * Allocate a new btree page and move half the keys from the provided page
1196 : * to the new page. Caller is responsible for making sure that there's a
1197 : * page available from fpm->btree_recycle. Returns a pointer to the new page,
1198 : * to which caller must add a downlink.
1199 : */
1200 : static FreePageBtree *
1201 0 : FreePageBtreeSplitPage(FreePageManager *fpm, FreePageBtree *btp)
1202 : {
1203 : FreePageBtree *newsibling;
1204 :
1205 0 : newsibling = FreePageBtreeGetRecycled(fpm);
1206 0 : newsibling->hdr.magic = btp->hdr.magic;
1207 0 : newsibling->hdr.nused = btp->hdr.nused / 2;
1208 0 : relptr_copy(newsibling->hdr.parent, btp->hdr.parent);
1209 0 : btp->hdr.nused -= newsibling->hdr.nused;
1210 :
1211 0 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
1212 0 : memcpy(&newsibling->u.leaf_key,
1213 0 : &btp->u.leaf_key[btp->hdr.nused],
1214 0 : sizeof(FreePageBtreeLeafKey) * newsibling->hdr.nused);
1215 : else
1216 : {
1217 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
1218 0 : memcpy(&newsibling->u.internal_key,
1219 0 : &btp->u.internal_key[btp->hdr.nused],
1220 0 : sizeof(FreePageBtreeInternalKey) * newsibling->hdr.nused);
1221 0 : FreePageBtreeUpdateParentPointers(fpm_segment_base(fpm), newsibling);
1222 : }
1223 :
1224 0 : return newsibling;
1225 : }
1226 :
1227 : /*
1228 : * When internal pages are split or merged, the parent pointers of their
1229 : * children must be updated.
1230 : */
1231 : static void
1232 0 : FreePageBtreeUpdateParentPointers(char *base, FreePageBtree *btp)
1233 : {
1234 : Size i;
1235 :
1236 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
1237 0 : for (i = 0; i < btp->hdr.nused; ++i)
1238 : {
1239 : FreePageBtree *child;
1240 :
1241 0 : child = relptr_access(base, btp->u.internal_key[i].child);
1242 0 : relptr_store(base, child->hdr.parent, btp);
1243 : }
1244 0 : }
1245 :
1246 : /*
1247 : * Debugging dump of btree data.
1248 : */
1249 : static void
1250 0 : FreePageManagerDumpBtree(FreePageManager *fpm, FreePageBtree *btp,
1251 : FreePageBtree *parent, int level, StringInfo buf)
1252 : {
1253 0 : char *base = fpm_segment_base(fpm);
1254 0 : Size pageno = fpm_pointer_to_page(base, btp);
1255 : Size index;
1256 : FreePageBtree *check_parent;
1257 :
1258 0 : check_stack_depth();
1259 0 : check_parent = relptr_access(base, btp->hdr.parent);
1260 0 : appendStringInfo(buf, " %zu@%d %c", pageno, level,
1261 0 : btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC ? 'i' : 'l');
1262 0 : if (parent != check_parent)
1263 0 : appendStringInfo(buf, " [actual parent %zu, expected %zu]",
1264 0 : fpm_pointer_to_page(base, check_parent),
1265 0 : fpm_pointer_to_page(base, parent));
1266 0 : appendStringInfoChar(buf, ':');
1267 0 : for (index = 0; index < btp->hdr.nused; ++index)
1268 : {
1269 0 : if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
1270 0 : appendStringInfo(buf, " %zu->%zu",
1271 : btp->u.internal_key[index].first_page,
1272 0 : relptr_offset(btp->u.internal_key[index].child) / FPM_PAGE_SIZE);
1273 : else
1274 0 : appendStringInfo(buf, " %zu(%zu)",
1275 : btp->u.leaf_key[index].first_page,
1276 : btp->u.leaf_key[index].npages);
1277 : }
1278 0 : appendStringInfoChar(buf, '\n');
1279 :
1280 0 : if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
1281 : {
1282 0 : for (index = 0; index < btp->hdr.nused; ++index)
1283 : {
1284 : FreePageBtree *child;
1285 :
1286 0 : child = relptr_access(base, btp->u.internal_key[index].child);
1287 0 : FreePageManagerDumpBtree(fpm, child, btp, level + 1, buf);
1288 : }
1289 : }
1290 0 : }
1291 :
1292 : /*
1293 : * Debugging dump of free-span data.
1294 : */
1295 : static void
1296 0 : FreePageManagerDumpSpans(FreePageManager *fpm, FreePageSpanLeader *span,
1297 : Size expected_pages, StringInfo buf)
1298 : {
1299 0 : char *base = fpm_segment_base(fpm);
1300 :
1301 0 : while (span != NULL)
1302 : {
1303 0 : if (span->npages != expected_pages)
1304 0 : appendStringInfo(buf, " %zu(%zu)", fpm_pointer_to_page(base, span),
1305 : span->npages);
1306 : else
1307 0 : appendStringInfo(buf, " %zu", fpm_pointer_to_page(base, span));
1308 0 : span = relptr_access(base, span->next);
1309 : }
1310 :
1311 0 : appendStringInfoChar(buf, '\n');
1312 0 : }
1313 :
1314 : /*
1315 : * This function allocates a run of pages of the given length from the free
1316 : * page manager.
1317 : */
1318 : static bool
1319 24694 : FreePageManagerGetInternal(FreePageManager *fpm, Size npages, Size *first_page)
1320 : {
1321 24694 : char *base = fpm_segment_base(fpm);
1322 24694 : FreePageSpanLeader *victim = NULL;
1323 : FreePageSpanLeader *prev;
1324 : FreePageSpanLeader *next;
1325 : FreePageBtreeSearchResult result;
1326 24694 : Size victim_page = 0; /* placate compiler */
1327 : Size f;
1328 :
1329 : /*
1330 : * Search for a free span.
1331 : *
1332 : * Right now, we use a simple best-fit policy here, but it's possible for
1333 : * this to result in memory fragmentation if we're repeatedly asked to
1334 : * allocate chunks just a little smaller than what we have available.
1335 : * Hopefully, this is unlikely, because we expect most requests to be
1336 : * single pages or superblock-sized chunks -- but no policy can be optimal
1337 : * under all circumstances unless it has knowledge of future allocation
1338 : * patterns.
1339 : */
1340 1965742 : for (f = Min(npages, FPM_NUM_FREELISTS) - 1; f < FPM_NUM_FREELISTS; ++f)
1341 : {
1342 : /* Skip empty freelists. */
1343 1965742 : if (relptr_is_null(fpm->freelist[f]))
1344 1941048 : continue;
1345 :
1346 : /*
1347 : * All of the freelists except the last one contain only items of a
1348 : * single size, so we just take the first one. But the final free
1349 : * list contains everything too big for any of the other lists, so we
1350 : * need to search the list.
1351 : */
1352 24694 : if (f < FPM_NUM_FREELISTS - 1)
1353 10978 : victim = relptr_access(base, fpm->freelist[f]);
1354 : else
1355 : {
1356 : FreePageSpanLeader *candidate;
1357 :
1358 13716 : candidate = relptr_access(base, fpm->freelist[f]);
1359 : do
1360 : {
1361 13716 : if (candidate->npages >= npages && (victim == NULL ||
1362 0 : victim->npages > candidate->npages))
1363 : {
1364 13716 : victim = candidate;
1365 13716 : if (victim->npages == npages)
1366 0 : break;
1367 : }
1368 13716 : candidate = relptr_access(base, candidate->next);
1369 13716 : } while (candidate != NULL);
1370 : }
1371 24694 : break;
1372 : }
1373 :
1374 : /* If we didn't find an allocatable span, return failure. */
1375 24694 : if (victim == NULL)
1376 0 : return false;
1377 :
1378 : /* Remove span from free list. */
1379 : Assert(victim->magic == FREE_PAGE_SPAN_LEADER_MAGIC);
1380 24694 : prev = relptr_access(base, victim->prev);
1381 24694 : next = relptr_access(base, victim->next);
1382 24694 : if (prev != NULL)
1383 0 : relptr_copy(prev->next, victim->next);
1384 : else
1385 24694 : relptr_copy(fpm->freelist[f], victim->next);
1386 24694 : if (next != NULL)
1387 76 : relptr_copy(next->prev, victim->prev);
1388 24694 : victim_page = fpm_pointer_to_page(base, victim);
1389 :
1390 : /* Decide whether we might be invalidating contiguous_pages. */
1391 24694 : if (f == FPM_NUM_FREELISTS - 1 &&
1392 13716 : victim->npages == fpm->contiguous_pages)
1393 : {
1394 : /*
1395 : * The victim span came from the oversized freelist, and had the same
1396 : * size as the longest span. There may or may not be another one of
1397 : * the same size, so contiguous_pages must be recomputed just to be
1398 : * safe.
1399 : */
1400 13716 : fpm->contiguous_pages_dirty = true;
1401 : }
1402 10978 : else if (f + 1 == fpm->contiguous_pages &&
1403 9662 : relptr_is_null(fpm->freelist[f]))
1404 : {
1405 : /*
1406 : * The victim span came from a fixed sized freelist, and it was the
1407 : * list for spans of the same size as the current longest span, and
1408 : * the list is now empty after removing the victim. So
1409 : * contiguous_pages must be recomputed without a doubt.
1410 : */
1411 9662 : fpm->contiguous_pages_dirty = true;
1412 : }
1413 :
1414 : /*
1415 : * If we haven't initialized the btree yet, the victim must be the single
1416 : * span stored within the FreePageManager itself. Otherwise, we need to
1417 : * update the btree.
1418 : */
1419 24694 : if (relptr_is_null(fpm->btree_root))
1420 : {
1421 : Assert(victim_page == fpm->singleton_first_page);
1422 : Assert(victim->npages == fpm->singleton_npages);
1423 : Assert(victim->npages >= npages);
1424 22904 : fpm->singleton_first_page += npages;
1425 22904 : fpm->singleton_npages -= npages;
1426 22904 : if (fpm->singleton_npages > 0)
1427 22870 : FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
1428 : fpm->singleton_npages);
1429 : }
1430 : else
1431 : {
1432 : /*
1433 : * If the span we found is exactly the right size, remove it from the
1434 : * btree completely. Otherwise, adjust the btree entry to reflect the
1435 : * still-unallocated portion of the span, and put that portion on the
1436 : * appropriate free list.
1437 : */
1438 1790 : FreePageBtreeSearch(fpm, victim_page, &result);
1439 : Assert(result.found);
1440 1790 : if (victim->npages == npages)
1441 448 : FreePageBtreeRemove(fpm, result.page, result.index);
1442 : else
1443 : {
1444 : FreePageBtreeLeafKey *key;
1445 :
1446 : /* Adjust btree to reflect remaining pages. */
1447 : Assert(victim->npages > npages);
1448 1342 : key = &result.page->u.leaf_key[result.index];
1449 : Assert(key->npages == victim->npages);
1450 1342 : key->first_page += npages;
1451 1342 : key->npages -= npages;
1452 1342 : if (result.index == 0)
1453 588 : FreePageBtreeAdjustAncestorKeys(fpm, result.page);
1454 :
1455 : /* Put the unallocated pages back on the appropriate free list. */
1456 1342 : FreePagePushSpanLeader(fpm, victim_page + npages,
1457 1342 : victim->npages - npages);
1458 : }
1459 : }
1460 :
1461 : /* Return results to caller. */
1462 24694 : *first_page = fpm_pointer_to_page(base, victim);
1463 24694 : return true;
1464 : }
1465 :
1466 : /*
1467 : * Put a range of pages into the btree and freelists, consolidating it with
1468 : * existing free spans just before and/or after it. If 'soft' is true,
1469 : * only perform the insertion if it can be done without allocating new btree
1470 : * pages; if false, do it always. Returns 0 if the soft flag caused the
1471 : * insertion to be skipped, or otherwise the size of the contiguous span
1472 : * created by the insertion. This may be larger than npages if we're able
1473 : * to consolidate with an adjacent range.
1474 : */
1475 : static Size
1476 8528 : FreePageManagerPutInternal(FreePageManager *fpm, Size first_page, Size npages,
1477 : bool soft)
1478 : {
1479 8528 : char *base = fpm_segment_base(fpm);
1480 : FreePageBtreeSearchResult result;
1481 8528 : FreePageBtreeLeafKey *prevkey = NULL;
1482 8528 : FreePageBtreeLeafKey *nextkey = NULL;
1483 : FreePageBtree *np;
1484 : Size nindex;
1485 :
1486 : Assert(npages > 0);
1487 :
1488 : /* We can store a single free span without initializing the btree. */
1489 8528 : if (fpm->btree_depth == 0)
1490 : {
1491 6180 : if (fpm->singleton_npages == 0)
1492 : {
1493 : /* Don't have a span yet; store this one. */
1494 3952 : fpm->singleton_first_page = first_page;
1495 3952 : fpm->singleton_npages = npages;
1496 3952 : FreePagePushSpanLeader(fpm, first_page, npages);
1497 3952 : return fpm->singleton_npages;
1498 : }
1499 2228 : else if (fpm->singleton_first_page + fpm->singleton_npages ==
1500 : first_page)
1501 : {
1502 : /* New span immediately follows sole existing span. */
1503 12 : fpm->singleton_npages += npages;
1504 12 : FreePagePopSpanLeader(fpm, fpm->singleton_first_page);
1505 12 : FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
1506 : fpm->singleton_npages);
1507 12 : return fpm->singleton_npages;
1508 : }
1509 2216 : else if (first_page + npages == fpm->singleton_first_page)
1510 : {
1511 : /* New span immediately precedes sole existing span. */
1512 1754 : FreePagePopSpanLeader(fpm, fpm->singleton_first_page);
1513 1754 : fpm->singleton_first_page = first_page;
1514 1754 : fpm->singleton_npages += npages;
1515 1754 : FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
1516 : fpm->singleton_npages);
1517 1754 : return fpm->singleton_npages;
1518 : }
1519 : else
1520 : {
1521 : /* Not contiguous; we need to initialize the btree. */
1522 : Size root_page;
1523 : FreePageBtree *root;
1524 :
1525 462 : if (!relptr_is_null(fpm->btree_recycle))
1526 88 : root = FreePageBtreeGetRecycled(fpm);
1527 374 : else if (soft)
1528 122 : return 0; /* Should not allocate if soft. */
1529 286 : else if (FreePageManagerGetInternal(fpm, 1, &root_page))
1530 286 : root = (FreePageBtree *) fpm_page_to_pointer(base, root_page);
1531 : else
1532 : {
1533 : /* We'd better be able to get a page from the existing range. */
1534 0 : elog(FATAL, "free page manager btree is corrupt");
1535 : }
1536 :
1537 : /* Create the btree and move the preexisting range into it. */
1538 374 : root->hdr.magic = FREE_PAGE_LEAF_MAGIC;
1539 374 : root->hdr.nused = 1;
1540 374 : relptr_store(base, root->hdr.parent, (FreePageBtree *) NULL);
1541 374 : root->u.leaf_key[0].first_page = fpm->singleton_first_page;
1542 374 : root->u.leaf_key[0].npages = fpm->singleton_npages;
1543 374 : relptr_store(base, fpm->btree_root, root);
1544 374 : fpm->singleton_first_page = 0;
1545 374 : fpm->singleton_npages = 0;
1546 374 : fpm->btree_depth = 1;
1547 :
1548 : /*
1549 : * Corner case: it may be that the btree root took the very last
1550 : * free page. In that case, the sole btree entry covers a zero
1551 : * page run, which is invalid. Overwrite it with the entry we're
1552 : * trying to insert and get out.
1553 : */
1554 374 : if (root->u.leaf_key[0].npages == 0)
1555 : {
1556 34 : root->u.leaf_key[0].first_page = first_page;
1557 34 : root->u.leaf_key[0].npages = npages;
1558 34 : FreePagePushSpanLeader(fpm, first_page, npages);
1559 34 : return npages;
1560 : }
1561 :
1562 : /* Fall through to insert the new key. */
1563 : }
1564 : }
1565 :
1566 : /* Search the btree. */
1567 2688 : FreePageBtreeSearch(fpm, first_page, &result);
1568 : Assert(!result.found);
1569 2688 : if (result.index > 0)
1570 1646 : prevkey = &result.page->u.leaf_key[result.index - 1];
1571 2688 : if (result.index < result.page->hdr.nused)
1572 : {
1573 2676 : np = result.page;
1574 2676 : nindex = result.index;
1575 2676 : nextkey = &result.page->u.leaf_key[result.index];
1576 : }
1577 : else
1578 : {
1579 12 : np = FreePageBtreeFindRightSibling(base, result.page);
1580 12 : nindex = 0;
1581 12 : if (np != NULL)
1582 0 : nextkey = &np->u.leaf_key[0];
1583 : }
1584 :
1585 : /* Consolidate with the previous entry if possible. */
1586 2688 : if (prevkey != NULL && prevkey->first_page + prevkey->npages >= first_page)
1587 : {
1588 372 : bool remove_next = false;
1589 : Size result;
1590 :
1591 : Assert(prevkey->first_page + prevkey->npages == first_page);
1592 372 : prevkey->npages = (first_page - prevkey->first_page) + npages;
1593 :
1594 : /* Check whether we can *also* consolidate with the following entry. */
1595 372 : if (nextkey != NULL &&
1596 360 : prevkey->first_page + prevkey->npages >= nextkey->first_page)
1597 : {
1598 : Assert(prevkey->first_page + prevkey->npages ==
1599 : nextkey->first_page);
1600 200 : prevkey->npages = (nextkey->first_page - prevkey->first_page)
1601 200 : + nextkey->npages;
1602 200 : FreePagePopSpanLeader(fpm, nextkey->first_page);
1603 200 : remove_next = true;
1604 : }
1605 :
1606 : /* Put the span on the correct freelist and save size. */
1607 372 : FreePagePopSpanLeader(fpm, prevkey->first_page);
1608 372 : FreePagePushSpanLeader(fpm, prevkey->first_page, prevkey->npages);
1609 372 : result = prevkey->npages;
1610 :
1611 : /*
1612 : * If we consolidated with both the preceding and following entries,
1613 : * we must remove the following entry. We do this last, because
1614 : * removing an element from the btree may invalidate pointers we hold
1615 : * into the current data structure.
1616 : *
1617 : * NB: The btree is technically in an invalid state a this point
1618 : * because we've already updated prevkey to cover the same key space
1619 : * as nextkey. FreePageBtreeRemove() shouldn't notice that, though.
1620 : */
1621 372 : if (remove_next)
1622 200 : FreePageBtreeRemove(fpm, np, nindex);
1623 :
1624 372 : return result;
1625 : }
1626 :
1627 : /* Consolidate with the next entry if possible. */
1628 2316 : if (nextkey != NULL && first_page + npages >= nextkey->first_page)
1629 : {
1630 : Size newpages;
1631 :
1632 : /* Compute new size for span. */
1633 : Assert(first_page + npages == nextkey->first_page);
1634 1472 : newpages = (nextkey->first_page - first_page) + nextkey->npages;
1635 :
1636 : /* Put span on correct free list. */
1637 1472 : FreePagePopSpanLeader(fpm, nextkey->first_page);
1638 1472 : FreePagePushSpanLeader(fpm, first_page, newpages);
1639 :
1640 : /* Update key in place. */
1641 1472 : nextkey->first_page = first_page;
1642 1472 : nextkey->npages = newpages;
1643 :
1644 : /* If reducing first key on page, ancestors might need adjustment. */
1645 1472 : if (nindex == 0)
1646 566 : FreePageBtreeAdjustAncestorKeys(fpm, np);
1647 :
1648 1472 : return nextkey->npages;
1649 : }
1650 :
1651 : /* Split leaf page and as many of its ancestors as necessary. */
1652 844 : if (result.split_pages > 0)
1653 : {
1654 : /*
1655 : * NB: We could consider various coping strategies here to avoid a
1656 : * split; most obviously, if np != result.page, we could target that
1657 : * page instead. More complicated shuffling strategies could be
1658 : * possible as well; basically, unless every single leaf page is 100%
1659 : * full, we can jam this key in there if we try hard enough. It's
1660 : * unlikely that trying that hard is worthwhile, but it's possible we
1661 : * might need to make more than no effort. For now, we just do the
1662 : * easy thing, which is nothing.
1663 : */
1664 :
1665 : /* If this is a soft insert, it's time to give up. */
1666 0 : if (soft)
1667 0 : return 0;
1668 :
1669 : /* Check whether we need to allocate more btree pages to split. */
1670 0 : if (result.split_pages > fpm->btree_recycle_count)
1671 : {
1672 : Size pages_needed;
1673 : Size recycle_page;
1674 : Size i;
1675 :
1676 : /*
1677 : * Allocate the required number of pages and split each one in
1678 : * turn. This should never fail, because if we've got enough
1679 : * spans of free pages kicking around that we need additional
1680 : * storage space just to remember them all, then we should
1681 : * certainly have enough to expand the btree, which should only
1682 : * ever use a tiny number of pages compared to the number under
1683 : * management. If it does, something's badly screwed up.
1684 : */
1685 0 : pages_needed = result.split_pages - fpm->btree_recycle_count;
1686 0 : for (i = 0; i < pages_needed; ++i)
1687 : {
1688 0 : if (!FreePageManagerGetInternal(fpm, 1, &recycle_page))
1689 0 : elog(FATAL, "free page manager btree is corrupt");
1690 0 : FreePageBtreeRecycle(fpm, recycle_page);
1691 : }
1692 :
1693 : /*
1694 : * The act of allocating pages to recycle may have invalidated the
1695 : * results of our previous btree research, so repeat it. (We could
1696 : * recheck whether any of our split-avoidance strategies that were
1697 : * not viable before now are, but it hardly seems worthwhile, so
1698 : * we don't bother. Consolidation can't be possible now if it
1699 : * wasn't previously.)
1700 : */
1701 0 : FreePageBtreeSearch(fpm, first_page, &result);
1702 :
1703 : /*
1704 : * The act of allocating pages for use in constructing our btree
1705 : * should never cause any page to become more full, so the new
1706 : * split depth should be no greater than the old one, and perhaps
1707 : * less if we fortuitously allocated a chunk that freed up a slot
1708 : * on the page we need to update.
1709 : */
1710 : Assert(result.split_pages <= fpm->btree_recycle_count);
1711 : }
1712 :
1713 : /* If we still need to perform a split, do it. */
1714 0 : if (result.split_pages > 0)
1715 : {
1716 0 : FreePageBtree *split_target = result.page;
1717 0 : FreePageBtree *child = NULL;
1718 0 : Size key = first_page;
1719 :
1720 : for (;;)
1721 0 : {
1722 : FreePageBtree *newsibling;
1723 : FreePageBtree *parent;
1724 :
1725 : /* Identify parent page, which must receive downlink. */
1726 0 : parent = relptr_access(base, split_target->hdr.parent);
1727 :
1728 : /* Split the page - downlink not added yet. */
1729 0 : newsibling = FreePageBtreeSplitPage(fpm, split_target);
1730 :
1731 : /*
1732 : * At this point in the loop, we're always carrying a pending
1733 : * insertion. On the first pass, it's the actual key we're
1734 : * trying to insert; on subsequent passes, it's the downlink
1735 : * that needs to be added as a result of the split performed
1736 : * during the previous loop iteration. Since we've just split
1737 : * the page, there's definitely room on one of the two
1738 : * resulting pages.
1739 : */
1740 0 : if (child == NULL)
1741 : {
1742 : Size index;
1743 : FreePageBtree *insert_into;
1744 :
1745 0 : insert_into = key < newsibling->u.leaf_key[0].first_page ?
1746 0 : split_target : newsibling;
1747 0 : index = FreePageBtreeSearchLeaf(insert_into, key);
1748 0 : FreePageBtreeInsertLeaf(insert_into, index, key, npages);
1749 0 : if (index == 0 && insert_into == split_target)
1750 0 : FreePageBtreeAdjustAncestorKeys(fpm, split_target);
1751 : }
1752 : else
1753 : {
1754 : Size index;
1755 : FreePageBtree *insert_into;
1756 :
1757 0 : insert_into =
1758 0 : key < newsibling->u.internal_key[0].first_page ?
1759 0 : split_target : newsibling;
1760 0 : index = FreePageBtreeSearchInternal(insert_into, key);
1761 0 : FreePageBtreeInsertInternal(base, insert_into, index,
1762 : key, child);
1763 0 : relptr_store(base, child->hdr.parent, insert_into);
1764 0 : if (index == 0 && insert_into == split_target)
1765 0 : FreePageBtreeAdjustAncestorKeys(fpm, split_target);
1766 : }
1767 :
1768 : /* If the page we just split has no parent, split the root. */
1769 0 : if (parent == NULL)
1770 : {
1771 : FreePageBtree *newroot;
1772 :
1773 0 : newroot = FreePageBtreeGetRecycled(fpm);
1774 0 : newroot->hdr.magic = FREE_PAGE_INTERNAL_MAGIC;
1775 0 : newroot->hdr.nused = 2;
1776 0 : relptr_store(base, newroot->hdr.parent,
1777 : (FreePageBtree *) NULL);
1778 0 : newroot->u.internal_key[0].first_page =
1779 0 : FreePageBtreeFirstKey(split_target);
1780 0 : relptr_store(base, newroot->u.internal_key[0].child,
1781 : split_target);
1782 0 : relptr_store(base, split_target->hdr.parent, newroot);
1783 0 : newroot->u.internal_key[1].first_page =
1784 0 : FreePageBtreeFirstKey(newsibling);
1785 0 : relptr_store(base, newroot->u.internal_key[1].child,
1786 : newsibling);
1787 0 : relptr_store(base, newsibling->hdr.parent, newroot);
1788 0 : relptr_store(base, fpm->btree_root, newroot);
1789 0 : fpm->btree_depth++;
1790 :
1791 0 : break;
1792 : }
1793 :
1794 : /* If the parent page isn't full, insert the downlink. */
1795 0 : key = newsibling->u.internal_key[0].first_page;
1796 0 : if (parent->hdr.nused < FPM_ITEMS_PER_INTERNAL_PAGE)
1797 : {
1798 : Size index;
1799 :
1800 0 : index = FreePageBtreeSearchInternal(parent, key);
1801 0 : FreePageBtreeInsertInternal(base, parent, index,
1802 : key, newsibling);
1803 0 : relptr_store(base, newsibling->hdr.parent, parent);
1804 0 : if (index == 0)
1805 0 : FreePageBtreeAdjustAncestorKeys(fpm, parent);
1806 0 : break;
1807 : }
1808 :
1809 : /* The parent also needs to be split, so loop around. */
1810 0 : child = newsibling;
1811 0 : split_target = parent;
1812 : }
1813 :
1814 : /*
1815 : * The loop above did the insert, so just need to update the free
1816 : * list, and we're done.
1817 : */
1818 0 : FreePagePushSpanLeader(fpm, first_page, npages);
1819 :
1820 0 : return npages;
1821 : }
1822 : }
1823 :
1824 : /* Physically add the key to the page. */
1825 : Assert(result.page->hdr.nused < FPM_ITEMS_PER_LEAF_PAGE);
1826 844 : FreePageBtreeInsertLeaf(result.page, result.index, first_page, npages);
1827 :
1828 : /* If new first key on page, ancestors might need adjustment. */
1829 844 : if (result.index == 0)
1830 476 : FreePageBtreeAdjustAncestorKeys(fpm, result.page);
1831 :
1832 : /* Put it on the free list. */
1833 844 : FreePagePushSpanLeader(fpm, first_page, npages);
1834 :
1835 844 : return npages;
1836 : }
1837 :
1838 : /*
1839 : * Remove a FreePageSpanLeader from the linked-list that contains it, either
1840 : * because we're changing the size of the span, or because we're allocating it.
1841 : */
1842 : static void
1843 4026 : FreePagePopSpanLeader(FreePageManager *fpm, Size pageno)
1844 : {
1845 4026 : char *base = fpm_segment_base(fpm);
1846 : FreePageSpanLeader *span;
1847 : FreePageSpanLeader *next;
1848 : FreePageSpanLeader *prev;
1849 :
1850 4026 : span = (FreePageSpanLeader *) fpm_page_to_pointer(base, pageno);
1851 :
1852 4026 : next = relptr_access(base, span->next);
1853 4026 : prev = relptr_access(base, span->prev);
1854 4026 : if (next != NULL)
1855 178 : relptr_copy(next->prev, span->prev);
1856 4026 : if (prev != NULL)
1857 30 : relptr_copy(prev->next, span->next);
1858 : else
1859 : {
1860 3996 : Size f = Min(span->npages, FPM_NUM_FREELISTS) - 1;
1861 :
1862 : Assert(relptr_offset(fpm->freelist[f]) == pageno * FPM_PAGE_SIZE);
1863 3996 : relptr_copy(fpm->freelist[f], span->next);
1864 : }
1865 4026 : }
1866 :
1867 : /*
1868 : * Initialize a new FreePageSpanLeader and put it on the appropriate free list.
1869 : */
1870 : static void
1871 32760 : FreePagePushSpanLeader(FreePageManager *fpm, Size first_page, Size npages)
1872 : {
1873 32760 : char *base = fpm_segment_base(fpm);
1874 32760 : Size f = Min(npages, FPM_NUM_FREELISTS) - 1;
1875 32760 : FreePageSpanLeader *head = relptr_access(base, fpm->freelist[f]);
1876 : FreePageSpanLeader *span;
1877 :
1878 32760 : span = (FreePageSpanLeader *) fpm_page_to_pointer(base, first_page);
1879 32760 : span->magic = FREE_PAGE_SPAN_LEADER_MAGIC;
1880 32760 : span->npages = npages;
1881 32760 : relptr_store(base, span->next, head);
1882 32760 : relptr_store(base, span->prev, (FreePageSpanLeader *) NULL);
1883 32760 : if (head != NULL)
1884 282 : relptr_store(base, head->prev, span);
1885 32760 : relptr_store(base, fpm->freelist[f], span);
1886 32760 : }
|