Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * ginbtree.c
4 : * page utilities routines for the postgres inverted index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/ginbtree.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gin_private.h"
18 : #include "access/ginxlog.h"
19 : #include "access/xloginsert.h"
20 : #include "miscadmin.h"
21 : #include "storage/predicate.h"
22 : #include "utils/injection_point.h"
23 : #include "utils/memutils.h"
24 : #include "utils/rel.h"
25 :
26 : static void ginFindParents(GinBtree btree, GinBtreeStack *stack);
27 : static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
28 : void *insertdata, BlockNumber updateblkno,
29 : Buffer childbuf, GinStatsData *buildStats);
30 : static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
31 : bool freestack, GinStatsData *buildStats);
32 : static void ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack,
33 : GinStatsData *buildStats, int access);
34 :
35 : /*
36 : * Lock buffer by needed method for search.
37 : */
38 : int
39 1622760 : ginTraverseLock(Buffer buffer, bool searchMode)
40 : {
41 : Page page;
42 1622760 : int access = GIN_SHARE;
43 :
44 1622760 : LockBuffer(buffer, GIN_SHARE);
45 1622760 : page = BufferGetPage(buffer);
46 1622760 : if (GinPageIsLeaf(page))
47 : {
48 827836 : if (searchMode == false)
49 : {
50 : /* we should relock our page */
51 821062 : LockBuffer(buffer, GIN_UNLOCK);
52 821062 : LockBuffer(buffer, GIN_EXCLUSIVE);
53 :
54 : /* But root can become non-leaf during relock */
55 821062 : if (!GinPageIsLeaf(page))
56 : {
57 : /* restore old lock type (very rare) */
58 0 : LockBuffer(buffer, GIN_UNLOCK);
59 0 : LockBuffer(buffer, GIN_SHARE);
60 : }
61 : else
62 821062 : access = GIN_EXCLUSIVE;
63 : }
64 : }
65 :
66 1622760 : return access;
67 : }
68 :
69 : /*
70 : * Descend the tree to the leaf page that contains or would contain the key
71 : * we're searching for. The key should already be filled in 'btree', in
72 : * tree-type specific manner. If btree->fullScan is true, descends to the
73 : * leftmost leaf page.
74 : *
75 : * If 'searchmode' is false, on return stack->buffer is exclusively locked,
76 : * and the stack represents the full path to the root. Otherwise stack->buffer
77 : * is share-locked, and stack->parent is NULL.
78 : *
79 : * If 'rootConflictCheck' is true, tree root is checked for serialization
80 : * conflict.
81 : */
82 : GinBtreeStack *
83 827840 : ginFindLeafPage(GinBtree btree, bool searchMode,
84 : bool rootConflictCheck)
85 : {
86 : GinBtreeStack *stack;
87 :
88 827840 : stack = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
89 827840 : stack->blkno = btree->rootBlkno;
90 827840 : stack->buffer = ReadBuffer(btree->index, btree->rootBlkno);
91 827840 : stack->parent = NULL;
92 827840 : stack->predictNumber = 1;
93 :
94 827840 : if (rootConflictCheck)
95 49560 : CheckForSerializableConflictIn(btree->index, NULL, btree->rootBlkno);
96 :
97 : for (;;)
98 794924 : {
99 : Page page;
100 : BlockNumber child;
101 : int access;
102 :
103 1622760 : stack->off = InvalidOffsetNumber;
104 :
105 1622760 : page = BufferGetPage(stack->buffer);
106 :
107 1622760 : access = ginTraverseLock(stack->buffer, searchMode);
108 :
109 : /*
110 : * If we're going to modify the tree, finish any incomplete splits we
111 : * encounter on the way.
112 : */
113 1622760 : if (!searchMode && GinPageIsIncompleteSplit(page))
114 4 : ginFinishOldSplit(btree, stack, NULL, access);
115 :
116 : /*
117 : * ok, page is correctly locked, we should check to move right ..,
118 : * root never has a right link, so small optimization
119 : */
120 2419792 : while (btree->fullScan == false && stack->blkno != btree->rootBlkno &&
121 795940 : btree->isMoveRight(btree, page))
122 : {
123 1092 : BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
124 :
125 1092 : if (rightlink == InvalidBlockNumber)
126 : /* rightmost page */
127 0 : break;
128 :
129 1092 : stack->buffer = ginStepRight(stack->buffer, btree->index, access);
130 1092 : stack->blkno = rightlink;
131 1092 : page = BufferGetPage(stack->buffer);
132 :
133 1092 : if (!searchMode && GinPageIsIncompleteSplit(page))
134 0 : ginFinishOldSplit(btree, stack, NULL, access);
135 : }
136 :
137 1622760 : if (GinPageIsLeaf(page)) /* we found, return locked page */
138 827836 : return stack;
139 :
140 : /* now we have correct buffer, try to find child */
141 794924 : child = btree->findChildPage(btree, stack);
142 :
143 794924 : LockBuffer(stack->buffer, GIN_UNLOCK);
144 : Assert(child != InvalidBlockNumber);
145 : Assert(stack->blkno != child);
146 :
147 794924 : if (searchMode)
148 : {
149 : /* in search mode we may forget path to leaf */
150 6794 : stack->blkno = child;
151 6794 : stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
152 : }
153 : else
154 : {
155 788130 : GinBtreeStack *ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
156 :
157 788130 : ptr->parent = stack;
158 788130 : stack = ptr;
159 788130 : stack->blkno = child;
160 788130 : stack->buffer = ReadBuffer(btree->index, stack->blkno);
161 788130 : stack->predictNumber = 1;
162 : }
163 : }
164 : }
165 :
166 : /*
167 : * Step right from current page.
168 : *
169 : * The next page is locked first, before releasing the current page. This is
170 : * crucial to prevent concurrent VACUUM from deleting a page that we are about
171 : * to step to. (The lock-coupling isn't strictly necessary when we are
172 : * traversing the tree to find an insert location, because page deletion grabs
173 : * a cleanup lock on the root to prevent any concurrent inserts. See Page
174 : * deletion section in the README. But there's no harm in doing it always.)
175 : */
176 : Buffer
177 4628 : ginStepRight(Buffer buffer, Relation index, int lockmode)
178 : {
179 : Buffer nextbuffer;
180 4628 : Page page = BufferGetPage(buffer);
181 4628 : bool isLeaf = GinPageIsLeaf(page);
182 4628 : bool isData = GinPageIsData(page);
183 4628 : BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
184 :
185 4628 : nextbuffer = ReadBuffer(index, blkno);
186 4628 : LockBuffer(nextbuffer, lockmode);
187 4628 : UnlockReleaseBuffer(buffer);
188 :
189 : /* Sanity check that the page we stepped to is of similar kind. */
190 4628 : page = BufferGetPage(nextbuffer);
191 4628 : if (isLeaf != GinPageIsLeaf(page) || isData != GinPageIsData(page))
192 0 : elog(ERROR, "right sibling of GIN page is of different type");
193 :
194 4628 : return nextbuffer;
195 : }
196 :
197 : void
198 827820 : freeGinBtreeStack(GinBtreeStack *stack)
199 : {
200 2439060 : while (stack)
201 : {
202 1611240 : GinBtreeStack *tmp = stack->parent;
203 :
204 1611240 : if (stack->buffer != InvalidBuffer)
205 1611240 : ReleaseBuffer(stack->buffer);
206 :
207 1611240 : pfree(stack);
208 1611240 : stack = tmp;
209 : }
210 827820 : }
211 :
212 : /*
213 : * Try to find parent for current stack position. Returns correct parent and
214 : * child's offset in stack->parent. The root page is never released, to
215 : * prevent conflict with vacuum process.
216 : */
217 : static void
218 0 : ginFindParents(GinBtree btree, GinBtreeStack *stack)
219 : {
220 : Page page;
221 : Buffer buffer;
222 : BlockNumber blkno,
223 : leftmostBlkno;
224 : OffsetNumber offset;
225 : GinBtreeStack *root;
226 : GinBtreeStack *ptr;
227 :
228 : /*
229 : * Unwind the stack all the way up to the root, leaving only the root
230 : * item.
231 : *
232 : * Be careful not to release the pin on the root page! The pin on root
233 : * page is required to lock out concurrent vacuums on the tree.
234 : */
235 0 : root = stack->parent;
236 0 : while (root->parent)
237 : {
238 0 : ReleaseBuffer(root->buffer);
239 0 : root = root->parent;
240 : }
241 :
242 : Assert(root->blkno == btree->rootBlkno);
243 : Assert(BufferGetBlockNumber(root->buffer) == btree->rootBlkno);
244 0 : root->off = InvalidOffsetNumber;
245 :
246 0 : blkno = root->blkno;
247 0 : buffer = root->buffer;
248 :
249 0 : ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
250 :
251 : for (;;)
252 : {
253 0 : LockBuffer(buffer, GIN_EXCLUSIVE);
254 0 : page = BufferGetPage(buffer);
255 0 : if (GinPageIsLeaf(page))
256 0 : elog(ERROR, "Lost path");
257 :
258 0 : if (GinPageIsIncompleteSplit(page))
259 : {
260 : Assert(blkno != btree->rootBlkno);
261 0 : ptr->blkno = blkno;
262 0 : ptr->buffer = buffer;
263 :
264 : /*
265 : * parent may be wrong, but if so, the ginFinishSplit call will
266 : * recurse to call ginFindParents again to fix it.
267 : */
268 0 : ptr->parent = root;
269 0 : ptr->off = InvalidOffsetNumber;
270 :
271 0 : ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE);
272 : }
273 :
274 0 : leftmostBlkno = btree->getLeftMostChild(btree, page);
275 :
276 0 : while ((offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) == InvalidOffsetNumber)
277 : {
278 0 : blkno = GinPageGetOpaque(page)->rightlink;
279 0 : if (blkno == InvalidBlockNumber)
280 : {
281 : /* Link not present in this level */
282 0 : LockBuffer(buffer, GIN_UNLOCK);
283 : /* Do not release pin on the root buffer */
284 0 : if (buffer != root->buffer)
285 0 : ReleaseBuffer(buffer);
286 0 : break;
287 : }
288 0 : buffer = ginStepRight(buffer, btree->index, GIN_EXCLUSIVE);
289 0 : page = BufferGetPage(buffer);
290 :
291 : /* finish any incomplete splits, as above */
292 0 : if (GinPageIsIncompleteSplit(page))
293 : {
294 : Assert(blkno != btree->rootBlkno);
295 0 : ptr->blkno = blkno;
296 0 : ptr->buffer = buffer;
297 0 : ptr->parent = root;
298 0 : ptr->off = InvalidOffsetNumber;
299 :
300 0 : ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE);
301 : }
302 : }
303 :
304 0 : if (blkno != InvalidBlockNumber)
305 : {
306 0 : ptr->blkno = blkno;
307 0 : ptr->buffer = buffer;
308 0 : ptr->parent = root; /* it may be wrong, but in next call we will
309 : * correct */
310 0 : ptr->off = offset;
311 0 : stack->parent = ptr;
312 0 : return;
313 : }
314 :
315 : /* Descend down to next level */
316 0 : blkno = leftmostBlkno;
317 0 : buffer = ReadBuffer(btree->index, blkno);
318 : }
319 : }
320 :
321 : /*
322 : * Insert a new item to a page.
323 : *
324 : * Returns true if the insertion was finished. On false, the page was split and
325 : * the parent needs to be updated. (A root split returns true as it doesn't
326 : * need any further action by the caller to complete.)
327 : *
328 : * When inserting a downlink to an internal page, 'childbuf' contains the
329 : * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
330 : * atomically with the insert. Also, the existing item at offset stack->off
331 : * in the target page is updated to point to updateblkno.
332 : *
333 : * stack->buffer is locked on entry, and is kept locked.
334 : * Likewise for childbuf, if given.
335 : */
336 : static bool
337 776356 : ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
338 : void *insertdata, BlockNumber updateblkno,
339 : Buffer childbuf, GinStatsData *buildStats)
340 : {
341 776356 : Page page = BufferGetPage(stack->buffer);
342 : bool result;
343 : GinPlaceToPageRC rc;
344 776356 : uint16 xlflags = 0;
345 776356 : Page childpage = NULL;
346 776356 : Page newlpage = NULL,
347 776356 : newrpage = NULL;
348 776356 : void *ptp_workspace = NULL;
349 : MemoryContext tmpCxt;
350 : MemoryContext oldCxt;
351 :
352 : /*
353 : * We do all the work of this function and its subfunctions in a temporary
354 : * memory context. This avoids leakages and simplifies APIs, since some
355 : * subfunctions allocate storage that has to survive until we've finished
356 : * the WAL insertion.
357 : */
358 776356 : tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
359 : "ginPlaceToPage temporary context",
360 : ALLOCSET_DEFAULT_SIZES);
361 776356 : oldCxt = MemoryContextSwitchTo(tmpCxt);
362 :
363 776356 : if (GinPageIsData(page))
364 49602 : xlflags |= GIN_INSERT_ISDATA;
365 776356 : if (GinPageIsLeaf(page))
366 : {
367 771658 : xlflags |= GIN_INSERT_ISLEAF;
368 : Assert(!BufferIsValid(childbuf));
369 : Assert(updateblkno == InvalidBlockNumber);
370 : }
371 : else
372 : {
373 : Assert(BufferIsValid(childbuf));
374 : Assert(updateblkno != InvalidBlockNumber);
375 4698 : childpage = BufferGetPage(childbuf);
376 : }
377 :
378 : /*
379 : * See if the incoming tuple will fit on the page. beginPlaceToPage will
380 : * decide if the page needs to be split, and will compute the split
381 : * contents if so. See comments for beginPlaceToPage and execPlaceToPage
382 : * functions for more details of the API here.
383 : */
384 776356 : rc = btree->beginPlaceToPage(btree, stack->buffer, stack,
385 : insertdata, updateblkno,
386 : &ptp_workspace,
387 : &newlpage, &newrpage);
388 :
389 776356 : if (rc == GPTP_NO_WORK)
390 : {
391 : /* Nothing to do */
392 0 : result = true;
393 : }
394 776356 : else if (rc == GPTP_INSERT)
395 : {
396 : /* It will fit, perform the insertion */
397 771430 : START_CRIT_SECTION();
398 :
399 771430 : if (RelationNeedsWAL(btree->index) && !btree->isBuild)
400 382334 : XLogBeginInsert();
401 :
402 : /*
403 : * Perform the page update, dirty and register stack->buffer, and
404 : * register any extra WAL data.
405 : */
406 771430 : btree->execPlaceToPage(btree, stack->buffer, stack,
407 : insertdata, updateblkno, ptp_workspace);
408 :
409 : /* An insert to an internal page finishes the split of the child. */
410 771430 : if (BufferIsValid(childbuf))
411 : {
412 4694 : GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
413 4694 : MarkBufferDirty(childbuf);
414 4694 : if (RelationNeedsWAL(btree->index) && !btree->isBuild)
415 2054 : XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
416 : }
417 :
418 771430 : if (RelationNeedsWAL(btree->index) && !btree->isBuild)
419 : {
420 : XLogRecPtr recptr;
421 : ginxlogInsert xlrec;
422 : BlockIdData childblknos[2];
423 :
424 382334 : xlrec.flags = xlflags;
425 :
426 382334 : XLogRegisterData((char *) &xlrec, sizeof(ginxlogInsert));
427 :
428 : /*
429 : * Log information about child if this was an insertion of a
430 : * downlink.
431 : */
432 382334 : if (BufferIsValid(childbuf))
433 : {
434 2054 : BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
435 2054 : BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);
436 2054 : XLogRegisterData((char *) childblknos,
437 : sizeof(BlockIdData) * 2);
438 : }
439 :
440 382334 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT);
441 382334 : PageSetLSN(page, recptr);
442 382334 : if (BufferIsValid(childbuf))
443 2054 : PageSetLSN(childpage, recptr);
444 : }
445 :
446 771430 : END_CRIT_SECTION();
447 :
448 : /* Insertion is complete. */
449 771430 : result = true;
450 : }
451 4926 : else if (rc == GPTP_SPLIT)
452 : {
453 : /*
454 : * Didn't fit, need to split. The split has been computed in newlpage
455 : * and newrpage, which are pointers to palloc'd pages, not associated
456 : * with buffers. stack->buffer is not touched yet.
457 : */
458 : Buffer rbuffer;
459 : BlockNumber savedRightLink;
460 : ginxlogSplit data;
461 4926 : Buffer lbuffer = InvalidBuffer;
462 4926 : Page newrootpg = NULL;
463 :
464 : /* Get a new index page to become the right page */
465 4926 : rbuffer = GinNewBuffer(btree->index);
466 :
467 : /* During index build, count the new page */
468 4926 : if (buildStats)
469 : {
470 1102 : if (btree->isData)
471 92 : buildStats->nDataPages++;
472 : else
473 1010 : buildStats->nEntryPages++;
474 : }
475 :
476 4926 : savedRightLink = GinPageGetOpaque(page)->rightlink;
477 :
478 : /* Begin setting up WAL record */
479 4926 : data.locator = btree->index->rd_locator;
480 4926 : data.flags = xlflags;
481 4926 : if (BufferIsValid(childbuf))
482 : {
483 4 : data.leftChildBlkno = BufferGetBlockNumber(childbuf);
484 4 : data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink;
485 : }
486 : else
487 4922 : data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;
488 :
489 4926 : if (stack->parent == NULL)
490 : {
491 : /*
492 : * splitting the root, so we need to allocate new left page and
493 : * place pointers to left and right page on root page.
494 : */
495 228 : lbuffer = GinNewBuffer(btree->index);
496 :
497 : /* During index build, count the new left page */
498 228 : if (buildStats)
499 : {
500 184 : if (btree->isData)
501 76 : buildStats->nDataPages++;
502 : else
503 108 : buildStats->nEntryPages++;
504 : }
505 :
506 228 : data.rrlink = InvalidBlockNumber;
507 228 : data.flags |= GIN_SPLIT_ROOT;
508 :
509 228 : GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
510 228 : GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
511 :
512 : /*
513 : * Construct a new root page containing downlinks to the new left
514 : * and right pages. (Do this in a temporary copy rather than
515 : * overwriting the original page directly, since we're not in the
516 : * critical section yet.)
517 : */
518 228 : newrootpg = PageGetTempPage(newrpage);
519 228 : GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~(GIN_LEAF | GIN_COMPRESSED), BLCKSZ);
520 :
521 228 : btree->fillRoot(btree, newrootpg,
522 : BufferGetBlockNumber(lbuffer), newlpage,
523 : BufferGetBlockNumber(rbuffer), newrpage);
524 :
525 228 : if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
526 : {
527 :
528 226 : PredicateLockPageSplit(btree->index,
529 : BufferGetBlockNumber(stack->buffer),
530 : BufferGetBlockNumber(lbuffer));
531 :
532 226 : PredicateLockPageSplit(btree->index,
533 : BufferGetBlockNumber(stack->buffer),
534 : BufferGetBlockNumber(rbuffer));
535 : }
536 : }
537 : else
538 : {
539 : /* splitting a non-root page */
540 4698 : data.rrlink = savedRightLink;
541 :
542 4698 : GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
543 4698 : GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
544 4698 : GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
545 :
546 4698 : if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
547 : {
548 :
549 4696 : PredicateLockPageSplit(btree->index,
550 : BufferGetBlockNumber(stack->buffer),
551 : BufferGetBlockNumber(rbuffer));
552 : }
553 : }
554 :
555 : /*
556 : * OK, we have the new contents of the left page in a temporary copy
557 : * now (newlpage), and likewise for the new contents of the
558 : * newly-allocated right block. The original page is still unchanged.
559 : *
560 : * If this is a root split, we also have a temporary page containing
561 : * the new contents of the root.
562 : */
563 :
564 4926 : START_CRIT_SECTION();
565 :
566 4926 : MarkBufferDirty(rbuffer);
567 4926 : MarkBufferDirty(stack->buffer);
568 :
569 : /*
570 : * Restore the temporary copies over the real buffers.
571 : */
572 4926 : if (stack->parent == NULL)
573 : {
574 : /* Splitting the root, three pages to update */
575 228 : MarkBufferDirty(lbuffer);
576 228 : memcpy(page, newrootpg, BLCKSZ);
577 228 : memcpy(BufferGetPage(lbuffer), newlpage, BLCKSZ);
578 228 : memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
579 : }
580 : else
581 : {
582 : /* Normal split, only two pages to update */
583 4698 : memcpy(page, newlpage, BLCKSZ);
584 4698 : memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
585 : }
586 :
587 : /* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */
588 4926 : if (BufferIsValid(childbuf))
589 : {
590 4 : GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
591 4 : MarkBufferDirty(childbuf);
592 : }
593 :
594 : /* write WAL record */
595 4926 : if (RelationNeedsWAL(btree->index) && !btree->isBuild)
596 : {
597 : XLogRecPtr recptr;
598 :
599 2084 : XLogBeginInsert();
600 :
601 : /*
602 : * We just take full page images of all the split pages. Splits
603 : * are uncommon enough that it's not worth complicating the code
604 : * to be more efficient.
605 : */
606 2084 : if (stack->parent == NULL)
607 : {
608 26 : XLogRegisterBuffer(0, lbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
609 26 : XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
610 26 : XLogRegisterBuffer(2, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
611 : }
612 : else
613 : {
614 2058 : XLogRegisterBuffer(0, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
615 2058 : XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
616 : }
617 2084 : if (BufferIsValid(childbuf))
618 4 : XLogRegisterBuffer(3, childbuf, REGBUF_STANDARD);
619 :
620 2084 : XLogRegisterData((char *) &data, sizeof(ginxlogSplit));
621 :
622 2084 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT);
623 :
624 2084 : PageSetLSN(page, recptr);
625 2084 : PageSetLSN(BufferGetPage(rbuffer), recptr);
626 2084 : if (stack->parent == NULL)
627 26 : PageSetLSN(BufferGetPage(lbuffer), recptr);
628 2084 : if (BufferIsValid(childbuf))
629 4 : PageSetLSN(childpage, recptr);
630 : }
631 4926 : END_CRIT_SECTION();
632 :
633 : /*
634 : * We can release the locks/pins on the new pages now, but keep
635 : * stack->buffer locked. childbuf doesn't get unlocked either.
636 : */
637 4926 : UnlockReleaseBuffer(rbuffer);
638 4926 : if (stack->parent == NULL)
639 228 : UnlockReleaseBuffer(lbuffer);
640 :
641 : /*
642 : * If we split the root, we're done. Otherwise the split is not
643 : * complete until the downlink for the new page has been inserted to
644 : * the parent.
645 : */
646 4926 : result = (stack->parent == NULL);
647 : }
648 : else
649 : {
650 0 : elog(ERROR, "invalid return code from GIN beginPlaceToPage method: %d", rc);
651 : result = false; /* keep compiler quiet */
652 : }
653 :
654 : /* Clean up temp context */
655 776356 : MemoryContextSwitchTo(oldCxt);
656 776356 : MemoryContextDelete(tmpCxt);
657 :
658 776356 : return result;
659 : }
660 :
661 : /*
662 : * Finish a split by inserting the downlink for the new page to parent.
663 : *
664 : * On entry, stack->buffer is exclusively locked.
665 : *
666 : * If freestack is true, all the buffers are released and unlocked as we
667 : * crawl up the tree, and 'stack' is freed. Otherwise stack->buffer is kept
668 : * locked, and stack is unmodified, except for possibly moving right to find
669 : * the correct parent of page.
670 : */
671 : static void
672 4700 : ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
673 : GinStatsData *buildStats)
674 : {
675 : Page page;
676 : bool done;
677 4700 : bool first = true;
678 :
679 : /* this loop crawls up the stack until the insertion is complete */
680 : do
681 : {
682 4702 : GinBtreeStack *parent = stack->parent;
683 : void *insertdata;
684 : BlockNumber updateblkno;
685 :
686 : #ifdef USE_INJECTION_POINTS
687 4702 : if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
688 4698 : INJECTION_POINT("gin-leave-leaf-split-incomplete");
689 : else
690 4 : INJECTION_POINT("gin-leave-internal-split-incomplete");
691 : #endif
692 :
693 : /* search parent to lock */
694 4698 : LockBuffer(parent->buffer, GIN_EXCLUSIVE);
695 :
696 : /*
697 : * If the parent page was incompletely split, finish that split first,
698 : * then continue with the current one.
699 : *
700 : * Note: we have to finish *all* incomplete splits we encounter, even
701 : * if we have to move right. Otherwise we might choose as the target a
702 : * page that has no downlink in the parent, and splitting it further
703 : * would fail.
704 : */
705 4698 : if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
706 0 : ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE);
707 :
708 : /* move right if it's needed */
709 4698 : page = BufferGetPage(parent->buffer);
710 4698 : while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
711 : {
712 0 : if (GinPageRightMost(page))
713 : {
714 : /*
715 : * rightmost page, but we don't find parent, we should use
716 : * plain search...
717 : */
718 0 : LockBuffer(parent->buffer, GIN_UNLOCK);
719 0 : ginFindParents(btree, stack);
720 0 : parent = stack->parent;
721 : Assert(parent != NULL);
722 0 : break;
723 : }
724 :
725 0 : parent->buffer = ginStepRight(parent->buffer, btree->index, GIN_EXCLUSIVE);
726 0 : parent->blkno = BufferGetBlockNumber(parent->buffer);
727 0 : page = BufferGetPage(parent->buffer);
728 :
729 0 : if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
730 0 : ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE);
731 : }
732 :
733 : /* insert the downlink */
734 4698 : insertdata = btree->prepareDownlink(btree, stack->buffer);
735 4698 : updateblkno = GinPageGetOpaque(BufferGetPage(stack->buffer))->rightlink;
736 4698 : done = ginPlaceToPage(btree, parent,
737 : insertdata, updateblkno,
738 : stack->buffer, buildStats);
739 4698 : pfree(insertdata);
740 :
741 : /*
742 : * If the caller requested to free the stack, unlock and release the
743 : * child buffer now. Otherwise keep it pinned and locked, but if we
744 : * have to recurse up the tree, we can unlock the upper pages, only
745 : * keeping the page at the bottom of the stack locked.
746 : */
747 4698 : if (!first || freestack)
748 4694 : LockBuffer(stack->buffer, GIN_UNLOCK);
749 4698 : if (freestack)
750 : {
751 4694 : ReleaseBuffer(stack->buffer);
752 4694 : pfree(stack);
753 : }
754 4698 : stack = parent;
755 :
756 4698 : first = false;
757 4698 : } while (!done);
758 :
759 : /* unlock the parent */
760 4696 : LockBuffer(stack->buffer, GIN_UNLOCK);
761 :
762 4696 : if (freestack)
763 4692 : freeGinBtreeStack(stack);
764 4696 : }
765 :
766 : /*
767 : * An entry point to ginFinishSplit() that is used when we stumble upon an
768 : * existing incompletely split page in the tree, as opposed to completing a
769 : * split that we just made ourselves. The difference is that stack->buffer may
770 : * be merely share-locked on entry, and will be upgraded to exclusive mode.
771 : *
772 : * Note: Upgrading the lock momentarily releases it. Doing that in a scan
773 : * would not be OK, because a concurrent VACUUM might delete the page while
774 : * we're not holding the lock. It's OK in an insert, though, because VACUUM
775 : * has a different mechanism that prevents it from running concurrently with
776 : * inserts. (Namely, it holds a cleanup lock on the root.)
777 : */
778 : static void
779 4 : ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats, int access)
780 : {
781 4 : INJECTION_POINT("gin-finish-incomplete-split");
782 4 : elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"",
783 : stack->blkno, RelationGetRelationName(btree->index));
784 :
785 4 : if (access == GIN_SHARE)
786 : {
787 2 : LockBuffer(stack->buffer, GIN_UNLOCK);
788 2 : LockBuffer(stack->buffer, GIN_EXCLUSIVE);
789 :
790 2 : if (!GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
791 : {
792 : /*
793 : * Someone else already completed the split while we were not
794 : * holding the lock.
795 : */
796 0 : return;
797 : }
798 : }
799 :
800 4 : ginFinishSplit(btree, stack, false, buildStats);
801 : }
802 :
803 : /*
804 : * Insert a value to tree described by stack.
805 : *
806 : * The value to be inserted is given in 'insertdata'. Its format depends
807 : * on whether this is an entry or data tree, ginInsertValue just passes it
808 : * through to the tree-specific callback function.
809 : *
810 : * During an index build, buildStats is non-null and the counters it contains
811 : * are incremented as needed.
812 : *
813 : * NB: the passed-in stack is freed, as though by freeGinBtreeStack.
814 : */
815 : void
816 771658 : ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata,
817 : GinStatsData *buildStats)
818 : {
819 : bool done;
820 :
821 : /* If the leaf page was incompletely split, finish the split first */
822 771658 : if (GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
823 0 : ginFinishOldSplit(btree, stack, buildStats, GIN_EXCLUSIVE);
824 :
825 771658 : done = ginPlaceToPage(btree, stack,
826 : insertdata, InvalidBlockNumber,
827 : InvalidBuffer, buildStats);
828 771658 : if (done)
829 : {
830 766962 : LockBuffer(stack->buffer, GIN_UNLOCK);
831 766962 : freeGinBtreeStack(stack);
832 : }
833 : else
834 4696 : ginFinishSplit(btree, stack, true, buildStats);
835 771654 : }
|