Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * ginbtree.c
4 : * page utilities routines for the postgres inverted index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/ginbtree.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gin_private.h"
18 : #include "access/ginxlog.h"
19 : #include "access/xloginsert.h"
20 : #include "miscadmin.h"
21 : #include "storage/predicate.h"
22 : #include "utils/memutils.h"
23 : #include "utils/rel.h"
24 :
25 : static void ginFindParents(GinBtree btree, GinBtreeStack *stack);
26 : static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
27 : void *insertdata, BlockNumber updateblkno,
28 : Buffer childbuf, GinStatsData *buildStats);
29 : static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
30 : bool freestack, GinStatsData *buildStats);
31 :
32 : /*
33 : * Lock buffer by needed method for search.
34 : */
35 : int
36 1195338 : ginTraverseLock(Buffer buffer, bool searchMode)
37 : {
38 : Page page;
39 1195338 : int access = GIN_SHARE;
40 :
41 1195338 : LockBuffer(buffer, GIN_SHARE);
42 1195338 : page = BufferGetPage(buffer);
43 1195338 : if (GinPageIsLeaf(page))
44 : {
45 644570 : if (searchMode == false)
46 : {
47 : /* we should relock our page */
48 640796 : LockBuffer(buffer, GIN_UNLOCK);
49 640796 : LockBuffer(buffer, GIN_EXCLUSIVE);
50 :
51 : /* But root can become non-leaf during relock */
52 640796 : if (!GinPageIsLeaf(page))
53 : {
54 : /* restore old lock type (very rare) */
55 0 : LockBuffer(buffer, GIN_UNLOCK);
56 0 : LockBuffer(buffer, GIN_SHARE);
57 : }
58 : else
59 640796 : access = GIN_EXCLUSIVE;
60 : }
61 : }
62 :
63 1195338 : return access;
64 : }
65 :
66 : /*
67 : * Descend the tree to the leaf page that contains or would contain the key
68 : * we're searching for. The key should already be filled in 'btree', in
69 : * tree-type specific manner. If btree->fullScan is true, descends to the
70 : * leftmost leaf page.
71 : *
72 : * If 'searchmode' is false, on return stack->buffer is exclusively locked,
73 : * and the stack represents the full path to the root. Otherwise stack->buffer
74 : * is share-locked, and stack->parent is NULL.
75 : *
76 : * If 'rootConflictCheck' is true, tree root is checked for serialization
77 : * conflict.
78 : */
79 : GinBtreeStack *
80 644574 : ginFindLeafPage(GinBtree btree, bool searchMode,
81 : bool rootConflictCheck, Snapshot snapshot)
82 : {
83 : GinBtreeStack *stack;
84 :
85 644574 : stack = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
86 644574 : stack->blkno = btree->rootBlkno;
87 644574 : stack->buffer = ReadBuffer(btree->index, btree->rootBlkno);
88 644574 : stack->parent = NULL;
89 644574 : stack->predictNumber = 1;
90 :
91 644574 : if (rootConflictCheck)
92 49560 : CheckForSerializableConflictIn(btree->index, NULL, btree->rootBlkno);
93 :
94 : for (;;)
95 550768 : {
96 : Page page;
97 : BlockNumber child;
98 : int access;
99 :
100 1195338 : stack->off = InvalidOffsetNumber;
101 :
102 1195338 : page = BufferGetPage(stack->buffer);
103 1195338 : TestForOldSnapshot(snapshot, btree->index, page);
104 :
105 1195338 : access = ginTraverseLock(stack->buffer, searchMode);
106 :
107 : /*
108 : * If we're going to modify the tree, finish any incomplete splits we
109 : * encounter on the way.
110 : */
111 1195338 : if (!searchMode && GinPageIsIncompleteSplit(page))
112 0 : ginFinishSplit(btree, stack, false, NULL);
113 :
114 : /*
115 : * ok, page is correctly locked, we should check to move right ..,
116 : * root never has a right link, so small optimization
117 : */
118 1746030 : while (btree->fullScan == false && stack->blkno != btree->rootBlkno &&
119 550692 : btree->isMoveRight(btree, page))
120 : {
121 0 : BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
122 :
123 0 : if (rightlink == InvalidBlockNumber)
124 : /* rightmost page */
125 0 : break;
126 :
127 0 : stack->buffer = ginStepRight(stack->buffer, btree->index, access);
128 0 : stack->blkno = rightlink;
129 0 : page = BufferGetPage(stack->buffer);
130 0 : TestForOldSnapshot(snapshot, btree->index, page);
131 :
132 0 : if (!searchMode && GinPageIsIncompleteSplit(page))
133 0 : ginFinishSplit(btree, stack, false, NULL);
134 : }
135 :
136 1195338 : if (GinPageIsLeaf(page)) /* we found, return locked page */
137 644570 : return stack;
138 :
139 : /* now we have correct buffer, try to find child */
140 550768 : child = btree->findChildPage(btree, stack);
141 :
142 550768 : LockBuffer(stack->buffer, GIN_UNLOCK);
143 : Assert(child != InvalidBlockNumber);
144 : Assert(stack->blkno != child);
145 :
146 550768 : if (searchMode)
147 : {
148 : /* in search mode we may forget path to leaf */
149 2594 : stack->blkno = child;
150 2594 : stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
151 : }
152 : else
153 : {
154 548174 : GinBtreeStack *ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
155 :
156 548174 : ptr->parent = stack;
157 548174 : stack = ptr;
158 548174 : stack->blkno = child;
159 548174 : stack->buffer = ReadBuffer(btree->index, stack->blkno);
160 548174 : stack->predictNumber = 1;
161 : }
162 : }
163 : }
164 :
165 : /*
166 : * Step right from current page.
167 : *
168 : * The next page is locked first, before releasing the current page. This is
169 : * crucial to protect from concurrent page deletion (see comment in
170 : * ginDeletePage).
171 : */
172 : Buffer
173 3536 : ginStepRight(Buffer buffer, Relation index, int lockmode)
174 : {
175 : Buffer nextbuffer;
176 3536 : Page page = BufferGetPage(buffer);
177 3536 : bool isLeaf = GinPageIsLeaf(page);
178 3536 : bool isData = GinPageIsData(page);
179 3536 : BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
180 :
181 3536 : nextbuffer = ReadBuffer(index, blkno);
182 3536 : LockBuffer(nextbuffer, lockmode);
183 3536 : UnlockReleaseBuffer(buffer);
184 :
185 : /* Sanity check that the page we stepped to is of similar kind. */
186 3536 : page = BufferGetPage(nextbuffer);
187 3536 : if (isLeaf != GinPageIsLeaf(page) || isData != GinPageIsData(page))
188 0 : elog(ERROR, "right sibling of GIN page is of different type");
189 :
190 3536 : return nextbuffer;
191 : }
192 :
193 : void
194 644558 : freeGinBtreeStack(GinBtreeStack *stack)
195 : {
196 1833804 : while (stack)
197 : {
198 1189246 : GinBtreeStack *tmp = stack->parent;
199 :
200 1189246 : if (stack->buffer != InvalidBuffer)
201 1189246 : ReleaseBuffer(stack->buffer);
202 :
203 1189246 : pfree(stack);
204 1189246 : stack = tmp;
205 : }
206 644558 : }
207 :
208 : /*
209 : * Try to find parent for current stack position. Returns correct parent and
210 : * child's offset in stack->parent. The root page is never released, to
211 : * prevent conflict with vacuum process.
212 : */
213 : static void
214 0 : ginFindParents(GinBtree btree, GinBtreeStack *stack)
215 : {
216 : Page page;
217 : Buffer buffer;
218 : BlockNumber blkno,
219 : leftmostBlkno;
220 : OffsetNumber offset;
221 : GinBtreeStack *root;
222 : GinBtreeStack *ptr;
223 :
224 : /*
225 : * Unwind the stack all the way up to the root, leaving only the root
226 : * item.
227 : *
228 : * Be careful not to release the pin on the root page! The pin on root
229 : * page is required to lock out concurrent vacuums on the tree.
230 : */
231 0 : root = stack->parent;
232 0 : while (root->parent)
233 : {
234 0 : ReleaseBuffer(root->buffer);
235 0 : root = root->parent;
236 : }
237 :
238 : Assert(root->blkno == btree->rootBlkno);
239 : Assert(BufferGetBlockNumber(root->buffer) == btree->rootBlkno);
240 0 : root->off = InvalidOffsetNumber;
241 :
242 0 : blkno = root->blkno;
243 0 : buffer = root->buffer;
244 :
245 0 : ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
246 :
247 : for (;;)
248 : {
249 0 : LockBuffer(buffer, GIN_EXCLUSIVE);
250 0 : page = BufferGetPage(buffer);
251 0 : if (GinPageIsLeaf(page))
252 0 : elog(ERROR, "Lost path");
253 :
254 0 : if (GinPageIsIncompleteSplit(page))
255 : {
256 : Assert(blkno != btree->rootBlkno);
257 0 : ptr->blkno = blkno;
258 0 : ptr->buffer = buffer;
259 :
260 : /*
261 : * parent may be wrong, but if so, the ginFinishSplit call will
262 : * recurse to call ginFindParents again to fix it.
263 : */
264 0 : ptr->parent = root;
265 0 : ptr->off = InvalidOffsetNumber;
266 :
267 0 : ginFinishSplit(btree, ptr, false, NULL);
268 : }
269 :
270 0 : leftmostBlkno = btree->getLeftMostChild(btree, page);
271 :
272 0 : while ((offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) == InvalidOffsetNumber)
273 : {
274 0 : blkno = GinPageGetOpaque(page)->rightlink;
275 0 : if (blkno == InvalidBlockNumber)
276 : {
277 0 : UnlockReleaseBuffer(buffer);
278 0 : break;
279 : }
280 0 : buffer = ginStepRight(buffer, btree->index, GIN_EXCLUSIVE);
281 0 : page = BufferGetPage(buffer);
282 :
283 : /* finish any incomplete splits, as above */
284 0 : if (GinPageIsIncompleteSplit(page))
285 : {
286 : Assert(blkno != btree->rootBlkno);
287 0 : ptr->blkno = blkno;
288 0 : ptr->buffer = buffer;
289 0 : ptr->parent = root;
290 0 : ptr->off = InvalidOffsetNumber;
291 :
292 0 : ginFinishSplit(btree, ptr, false, NULL);
293 : }
294 : }
295 :
296 0 : if (blkno != InvalidBlockNumber)
297 : {
298 0 : ptr->blkno = blkno;
299 0 : ptr->buffer = buffer;
300 0 : ptr->parent = root; /* it may be wrong, but in next call we will
301 : * correct */
302 0 : ptr->off = offset;
303 0 : stack->parent = ptr;
304 0 : return;
305 : }
306 :
307 : /* Descend down to next level */
308 0 : blkno = leftmostBlkno;
309 0 : buffer = ReadBuffer(btree->index, blkno);
310 : }
311 : }
312 :
313 : /*
314 : * Insert a new item to a page.
315 : *
316 : * Returns true if the insertion was finished. On false, the page was split and
317 : * the parent needs to be updated. (A root split returns true as it doesn't
318 : * need any further action by the caller to complete.)
319 : *
320 : * When inserting a downlink to an internal page, 'childbuf' contains the
321 : * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
322 : * atomically with the insert. Also, the existing item at offset stack->off
323 : * in the target page is updated to point to updateblkno.
324 : *
325 : * stack->buffer is locked on entry, and is kept locked.
326 : * Likewise for childbuf, if given.
327 : */
328 : static bool
329 594866 : ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
330 : void *insertdata, BlockNumber updateblkno,
331 : Buffer childbuf, GinStatsData *buildStats)
332 : {
333 594866 : Page page = BufferGetPage(stack->buffer);
334 : bool result;
335 : GinPlaceToPageRC rc;
336 594866 : uint16 xlflags = 0;
337 594866 : Page childpage = NULL;
338 594866 : Page newlpage = NULL,
339 594866 : newrpage = NULL;
340 594866 : void *ptp_workspace = NULL;
341 : MemoryContext tmpCxt;
342 : MemoryContext oldCxt;
343 :
344 : /*
345 : * We do all the work of this function and its subfunctions in a temporary
346 : * memory context. This avoids leakages and simplifies APIs, since some
347 : * subfunctions allocate storage that has to survive until we've finished
348 : * the WAL insertion.
349 : */
350 594866 : tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
351 : "ginPlaceToPage temporary context",
352 : ALLOCSET_DEFAULT_SIZES);
353 594866 : oldCxt = MemoryContextSwitchTo(tmpCxt);
354 :
355 594866 : if (GinPageIsData(page))
356 49602 : xlflags |= GIN_INSERT_ISDATA;
357 594866 : if (GinPageIsLeaf(page))
358 : {
359 591392 : xlflags |= GIN_INSERT_ISLEAF;
360 : Assert(!BufferIsValid(childbuf));
361 : Assert(updateblkno == InvalidBlockNumber);
362 : }
363 : else
364 : {
365 : Assert(BufferIsValid(childbuf));
366 : Assert(updateblkno != InvalidBlockNumber);
367 3474 : childpage = BufferGetPage(childbuf);
368 : }
369 :
370 : /*
371 : * See if the incoming tuple will fit on the page. beginPlaceToPage will
372 : * decide if the page needs to be split, and will compute the split
373 : * contents if so. See comments for beginPlaceToPage and execPlaceToPage
374 : * functions for more details of the API here.
375 : */
376 594866 : rc = btree->beginPlaceToPage(btree, stack->buffer, stack,
377 : insertdata, updateblkno,
378 : &ptp_workspace,
379 : &newlpage, &newrpage);
380 :
381 594866 : if (rc == GPTP_NO_WORK)
382 : {
383 : /* Nothing to do */
384 0 : result = true;
385 : }
386 594866 : else if (rc == GPTP_INSERT)
387 : {
388 : /* It will fit, perform the insertion */
389 591168 : START_CRIT_SECTION();
390 :
391 591168 : if (RelationNeedsWAL(btree->index) && !btree->isBuild)
392 : {
393 202078 : XLogBeginInsert();
394 202078 : XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
395 202078 : if (BufferIsValid(childbuf))
396 834 : XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
397 : }
398 :
399 : /* Perform the page update, and register any extra WAL data */
400 591168 : btree->execPlaceToPage(btree, stack->buffer, stack,
401 : insertdata, updateblkno, ptp_workspace);
402 :
403 591168 : MarkBufferDirty(stack->buffer);
404 :
405 : /* An insert to an internal page finishes the split of the child. */
406 591168 : if (BufferIsValid(childbuf))
407 : {
408 3474 : GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
409 3474 : MarkBufferDirty(childbuf);
410 : }
411 :
412 591168 : if (RelationNeedsWAL(btree->index) && !btree->isBuild)
413 : {
414 : XLogRecPtr recptr;
415 : ginxlogInsert xlrec;
416 : BlockIdData childblknos[2];
417 :
418 202078 : xlrec.flags = xlflags;
419 :
420 202078 : XLogRegisterData((char *) &xlrec, sizeof(ginxlogInsert));
421 :
422 : /*
423 : * Log information about child if this was an insertion of a
424 : * downlink.
425 : */
426 202078 : if (BufferIsValid(childbuf))
427 : {
428 834 : BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
429 834 : BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);
430 834 : XLogRegisterData((char *) childblknos,
431 : sizeof(BlockIdData) * 2);
432 : }
433 :
434 202078 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT);
435 202078 : PageSetLSN(page, recptr);
436 202078 : if (BufferIsValid(childbuf))
437 834 : PageSetLSN(childpage, recptr);
438 : }
439 :
440 591168 : END_CRIT_SECTION();
441 :
442 : /* Insertion is complete. */
443 591168 : result = true;
444 : }
445 3698 : else if (rc == GPTP_SPLIT)
446 : {
447 : /*
448 : * Didn't fit, need to split. The split has been computed in newlpage
449 : * and newrpage, which are pointers to palloc'd pages, not associated
450 : * with buffers. stack->buffer is not touched yet.
451 : */
452 : Buffer rbuffer;
453 : BlockNumber savedRightLink;
454 : ginxlogSplit data;
455 3698 : Buffer lbuffer = InvalidBuffer;
456 3698 : Page newrootpg = NULL;
457 :
458 : /* Get a new index page to become the right page */
459 3698 : rbuffer = GinNewBuffer(btree->index);
460 :
461 : /* During index build, count the new page */
462 3698 : if (buildStats)
463 : {
464 1102 : if (btree->isData)
465 92 : buildStats->nDataPages++;
466 : else
467 1010 : buildStats->nEntryPages++;
468 : }
469 :
470 3698 : savedRightLink = GinPageGetOpaque(page)->rightlink;
471 :
472 : /* Begin setting up WAL record */
473 3698 : data.locator = btree->index->rd_locator;
474 3698 : data.flags = xlflags;
475 3698 : if (BufferIsValid(childbuf))
476 : {
477 0 : data.leftChildBlkno = BufferGetBlockNumber(childbuf);
478 0 : data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink;
479 : }
480 : else
481 3698 : data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;
482 :
483 3698 : if (stack->parent == NULL)
484 : {
485 : /*
486 : * splitting the root, so we need to allocate new left page and
487 : * place pointers to left and right page on root page.
488 : */
489 224 : lbuffer = GinNewBuffer(btree->index);
490 :
491 : /* During index build, count the new left page */
492 224 : if (buildStats)
493 : {
494 184 : if (btree->isData)
495 76 : buildStats->nDataPages++;
496 : else
497 108 : buildStats->nEntryPages++;
498 : }
499 :
500 224 : data.rrlink = InvalidBlockNumber;
501 224 : data.flags |= GIN_SPLIT_ROOT;
502 :
503 224 : GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
504 224 : GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
505 :
506 : /*
507 : * Construct a new root page containing downlinks to the new left
508 : * and right pages. (Do this in a temporary copy rather than
509 : * overwriting the original page directly, since we're not in the
510 : * critical section yet.)
511 : */
512 224 : newrootpg = PageGetTempPage(newrpage);
513 224 : GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~(GIN_LEAF | GIN_COMPRESSED), BLCKSZ);
514 :
515 224 : btree->fillRoot(btree, newrootpg,
516 : BufferGetBlockNumber(lbuffer), newlpage,
517 : BufferGetBlockNumber(rbuffer), newrpage);
518 :
519 224 : if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
520 : {
521 :
522 224 : PredicateLockPageSplit(btree->index,
523 : BufferGetBlockNumber(stack->buffer),
524 : BufferGetBlockNumber(lbuffer));
525 :
526 224 : PredicateLockPageSplit(btree->index,
527 : BufferGetBlockNumber(stack->buffer),
528 : BufferGetBlockNumber(rbuffer));
529 : }
530 : }
531 : else
532 : {
533 : /* splitting a non-root page */
534 3474 : data.rrlink = savedRightLink;
535 :
536 3474 : GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
537 3474 : GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
538 3474 : GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
539 :
540 3474 : if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
541 : {
542 :
543 3474 : PredicateLockPageSplit(btree->index,
544 : BufferGetBlockNumber(stack->buffer),
545 : BufferGetBlockNumber(rbuffer));
546 : }
547 : }
548 :
549 : /*
550 : * OK, we have the new contents of the left page in a temporary copy
551 : * now (newlpage), and likewise for the new contents of the
552 : * newly-allocated right block. The original page is still unchanged.
553 : *
554 : * If this is a root split, we also have a temporary page containing
555 : * the new contents of the root.
556 : */
557 :
558 3698 : START_CRIT_SECTION();
559 :
560 3698 : MarkBufferDirty(rbuffer);
561 3698 : MarkBufferDirty(stack->buffer);
562 :
563 : /*
564 : * Restore the temporary copies over the real buffers.
565 : */
566 3698 : if (stack->parent == NULL)
567 : {
568 : /* Splitting the root, three pages to update */
569 224 : MarkBufferDirty(lbuffer);
570 224 : memcpy(page, newrootpg, BLCKSZ);
571 224 : memcpy(BufferGetPage(lbuffer), newlpage, BLCKSZ);
572 224 : memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
573 : }
574 : else
575 : {
576 : /* Normal split, only two pages to update */
577 3474 : memcpy(page, newlpage, BLCKSZ);
578 3474 : memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
579 : }
580 :
581 : /* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */
582 3698 : if (BufferIsValid(childbuf))
583 : {
584 0 : GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
585 0 : MarkBufferDirty(childbuf);
586 : }
587 :
588 : /* write WAL record */
589 3698 : if (RelationNeedsWAL(btree->index) && !btree->isBuild)
590 : {
591 : XLogRecPtr recptr;
592 :
593 856 : XLogBeginInsert();
594 :
595 : /*
596 : * We just take full page images of all the split pages. Splits
597 : * are uncommon enough that it's not worth complicating the code
598 : * to be more efficient.
599 : */
600 856 : if (stack->parent == NULL)
601 : {
602 22 : XLogRegisterBuffer(0, lbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
603 22 : XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
604 22 : XLogRegisterBuffer(2, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
605 : }
606 : else
607 : {
608 834 : XLogRegisterBuffer(0, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
609 834 : XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
610 : }
611 856 : if (BufferIsValid(childbuf))
612 0 : XLogRegisterBuffer(3, childbuf, REGBUF_STANDARD);
613 :
614 856 : XLogRegisterData((char *) &data, sizeof(ginxlogSplit));
615 :
616 856 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT);
617 :
618 856 : PageSetLSN(page, recptr);
619 856 : PageSetLSN(BufferGetPage(rbuffer), recptr);
620 856 : if (stack->parent == NULL)
621 22 : PageSetLSN(BufferGetPage(lbuffer), recptr);
622 856 : if (BufferIsValid(childbuf))
623 0 : PageSetLSN(childpage, recptr);
624 : }
625 3698 : END_CRIT_SECTION();
626 :
627 : /*
628 : * We can release the locks/pins on the new pages now, but keep
629 : * stack->buffer locked. childbuf doesn't get unlocked either.
630 : */
631 3698 : UnlockReleaseBuffer(rbuffer);
632 3698 : if (stack->parent == NULL)
633 224 : UnlockReleaseBuffer(lbuffer);
634 :
635 : /*
636 : * If we split the root, we're done. Otherwise the split is not
637 : * complete until the downlink for the new page has been inserted to
638 : * the parent.
639 : */
640 3698 : result = (stack->parent == NULL);
641 : }
642 : else
643 : {
644 0 : elog(ERROR, "invalid return code from GIN beginPlaceToPage method: %d", rc);
645 : result = false; /* keep compiler quiet */
646 : }
647 :
648 : /* Clean up temp context */
649 594866 : MemoryContextSwitchTo(oldCxt);
650 594866 : MemoryContextDelete(tmpCxt);
651 :
652 594866 : return result;
653 : }
654 :
655 : /*
656 : * Finish a split by inserting the downlink for the new page to parent.
657 : *
658 : * On entry, stack->buffer is exclusively locked.
659 : *
660 : * If freestack is true, all the buffers are released and unlocked as we
661 : * crawl up the tree, and 'stack' is freed. Otherwise stack->buffer is kept
662 : * locked, and stack is unmodified, except for possibly moving right to find
663 : * the correct parent of page.
664 : */
665 : static void
666 3474 : ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
667 : GinStatsData *buildStats)
668 : {
669 : Page page;
670 : bool done;
671 3474 : bool first = true;
672 :
673 : /*
674 : * freestack == false when we encounter an incompletely split page during
675 : * a scan, while freestack == true is used in the normal scenario that a
676 : * split is finished right after the initial insert.
677 : */
678 3474 : if (!freestack)
679 0 : elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"",
680 : stack->blkno, RelationGetRelationName(btree->index));
681 :
682 : /* this loop crawls up the stack until the insertion is complete */
683 : do
684 : {
685 3474 : GinBtreeStack *parent = stack->parent;
686 : void *insertdata;
687 : BlockNumber updateblkno;
688 :
689 : /* search parent to lock */
690 3474 : LockBuffer(parent->buffer, GIN_EXCLUSIVE);
691 :
692 : /*
693 : * If the parent page was incompletely split, finish that split first,
694 : * then continue with the current one.
695 : *
696 : * Note: we have to finish *all* incomplete splits we encounter, even
697 : * if we have to move right. Otherwise we might choose as the target a
698 : * page that has no downlink in the parent, and splitting it further
699 : * would fail.
700 : */
701 3474 : if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
702 0 : ginFinishSplit(btree, parent, false, buildStats);
703 :
704 : /* move right if it's needed */
705 3474 : page = BufferGetPage(parent->buffer);
706 3474 : while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
707 : {
708 0 : if (GinPageRightMost(page))
709 : {
710 : /*
711 : * rightmost page, but we don't find parent, we should use
712 : * plain search...
713 : */
714 0 : LockBuffer(parent->buffer, GIN_UNLOCK);
715 0 : ginFindParents(btree, stack);
716 0 : parent = stack->parent;
717 : Assert(parent != NULL);
718 0 : break;
719 : }
720 :
721 0 : parent->buffer = ginStepRight(parent->buffer, btree->index, GIN_EXCLUSIVE);
722 0 : parent->blkno = BufferGetBlockNumber(parent->buffer);
723 0 : page = BufferGetPage(parent->buffer);
724 :
725 0 : if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
726 0 : ginFinishSplit(btree, parent, false, buildStats);
727 : }
728 :
729 : /* insert the downlink */
730 3474 : insertdata = btree->prepareDownlink(btree, stack->buffer);
731 3474 : updateblkno = GinPageGetOpaque(BufferGetPage(stack->buffer))->rightlink;
732 3474 : done = ginPlaceToPage(btree, parent,
733 : insertdata, updateblkno,
734 : stack->buffer, buildStats);
735 3474 : pfree(insertdata);
736 :
737 : /*
738 : * If the caller requested to free the stack, unlock and release the
739 : * child buffer now. Otherwise keep it pinned and locked, but if we
740 : * have to recurse up the tree, we can unlock the upper pages, only
741 : * keeping the page at the bottom of the stack locked.
742 : */
743 3474 : if (!first || freestack)
744 3474 : LockBuffer(stack->buffer, GIN_UNLOCK);
745 3474 : if (freestack)
746 : {
747 3474 : ReleaseBuffer(stack->buffer);
748 3474 : pfree(stack);
749 : }
750 3474 : stack = parent;
751 :
752 3474 : first = false;
753 3474 : } while (!done);
754 :
755 : /* unlock the parent */
756 3474 : LockBuffer(stack->buffer, GIN_UNLOCK);
757 :
758 3474 : if (freestack)
759 3474 : freeGinBtreeStack(stack);
760 3474 : }
761 :
762 : /*
763 : * Insert a value to tree described by stack.
764 : *
765 : * The value to be inserted is given in 'insertdata'. Its format depends
766 : * on whether this is an entry or data tree, ginInsertValue just passes it
767 : * through to the tree-specific callback function.
768 : *
769 : * During an index build, buildStats is non-null and the counters it contains
770 : * are incremented as needed.
771 : *
772 : * NB: the passed-in stack is freed, as though by freeGinBtreeStack.
773 : */
774 : void
775 591392 : ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata,
776 : GinStatsData *buildStats)
777 : {
778 : bool done;
779 :
780 : /* If the leaf page was incompletely split, finish the split first */
781 591392 : if (GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
782 0 : ginFinishSplit(btree, stack, false, buildStats);
783 :
784 591392 : done = ginPlaceToPage(btree, stack,
785 : insertdata, InvalidBlockNumber,
786 : InvalidBuffer, buildStats);
787 591392 : if (done)
788 : {
789 587918 : LockBuffer(stack->buffer, GIN_UNLOCK);
790 587918 : freeGinBtreeStack(stack);
791 : }
792 : else
793 3474 : ginFinishSplit(btree, stack, true, buildStats);
794 591392 : }
|