LCOV - code coverage report
Current view: top level - src/backend/access/gin - ginbtree.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 222 286 77.6 %
Date: 2025-01-18 04:15:08 Functions: 8 9 88.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * ginbtree.c
       4             :  *    page utilities routines for the postgres inverted index access method.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *          src/backend/access/gin/ginbtree.c
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/gin_private.h"
      18             : #include "access/ginxlog.h"
      19             : #include "access/xloginsert.h"
      20             : #include "miscadmin.h"
      21             : #include "storage/predicate.h"
      22             : #include "utils/injection_point.h"
      23             : #include "utils/memutils.h"
      24             : #include "utils/rel.h"
      25             : 
      26             : static void ginFindParents(GinBtree btree, GinBtreeStack *stack);
      27             : static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
      28             :                            void *insertdata, BlockNumber updateblkno,
      29             :                            Buffer childbuf, GinStatsData *buildStats);
      30             : static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
      31             :                            bool freestack, GinStatsData *buildStats);
      32             : static void ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack,
      33             :                               GinStatsData *buildStats, int access);
      34             : 
      35             : /*
      36             :  * Lock buffer by needed method for search.
      37             :  */
      38             : int
      39     1622760 : ginTraverseLock(Buffer buffer, bool searchMode)
      40             : {
      41             :     Page        page;
      42     1622760 :     int         access = GIN_SHARE;
      43             : 
      44     1622760 :     LockBuffer(buffer, GIN_SHARE);
      45     1622760 :     page = BufferGetPage(buffer);
      46     1622760 :     if (GinPageIsLeaf(page))
      47             :     {
      48      827836 :         if (searchMode == false)
      49             :         {
      50             :             /* we should relock our page */
      51      821062 :             LockBuffer(buffer, GIN_UNLOCK);
      52      821062 :             LockBuffer(buffer, GIN_EXCLUSIVE);
      53             : 
      54             :             /* But root can become non-leaf during relock */
      55      821062 :             if (!GinPageIsLeaf(page))
      56             :             {
      57             :                 /* restore old lock type (very rare) */
      58           0 :                 LockBuffer(buffer, GIN_UNLOCK);
      59           0 :                 LockBuffer(buffer, GIN_SHARE);
      60             :             }
      61             :             else
      62      821062 :                 access = GIN_EXCLUSIVE;
      63             :         }
      64             :     }
      65             : 
      66     1622760 :     return access;
      67             : }
      68             : 
      69             : /*
      70             :  * Descend the tree to the leaf page that contains or would contain the key
      71             :  * we're searching for. The key should already be filled in 'btree', in
      72             :  * tree-type specific manner. If btree->fullScan is true, descends to the
      73             :  * leftmost leaf page.
      74             :  *
      75             :  * If 'searchmode' is false, on return stack->buffer is exclusively locked,
      76             :  * and the stack represents the full path to the root. Otherwise stack->buffer
      77             :  * is share-locked, and stack->parent is NULL.
      78             :  *
      79             :  * If 'rootConflictCheck' is true, tree root is checked for serialization
      80             :  * conflict.
      81             :  */
      82             : GinBtreeStack *
      83      827840 : ginFindLeafPage(GinBtree btree, bool searchMode,
      84             :                 bool rootConflictCheck)
      85             : {
      86             :     GinBtreeStack *stack;
      87             : 
      88      827840 :     stack = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
      89      827840 :     stack->blkno = btree->rootBlkno;
      90      827840 :     stack->buffer = ReadBuffer(btree->index, btree->rootBlkno);
      91      827840 :     stack->parent = NULL;
      92      827840 :     stack->predictNumber = 1;
      93             : 
      94      827840 :     if (rootConflictCheck)
      95       49560 :         CheckForSerializableConflictIn(btree->index, NULL, btree->rootBlkno);
      96             : 
      97             :     for (;;)
      98      794924 :     {
      99             :         Page        page;
     100             :         BlockNumber child;
     101             :         int         access;
     102             : 
     103     1622760 :         stack->off = InvalidOffsetNumber;
     104             : 
     105     1622760 :         page = BufferGetPage(stack->buffer);
     106             : 
     107     1622760 :         access = ginTraverseLock(stack->buffer, searchMode);
     108             : 
     109             :         /*
     110             :          * If we're going to modify the tree, finish any incomplete splits we
     111             :          * encounter on the way.
     112             :          */
     113     1622760 :         if (!searchMode && GinPageIsIncompleteSplit(page))
     114           4 :             ginFinishOldSplit(btree, stack, NULL, access);
     115             : 
     116             :         /*
     117             :          * ok, page is correctly locked, we should check to move right ..,
     118             :          * root never has a right link, so small optimization
     119             :          */
     120     2419792 :         while (btree->fullScan == false && stack->blkno != btree->rootBlkno &&
     121      795940 :                btree->isMoveRight(btree, page))
     122             :         {
     123        1092 :             BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
     124             : 
     125        1092 :             if (rightlink == InvalidBlockNumber)
     126             :                 /* rightmost page */
     127           0 :                 break;
     128             : 
     129        1092 :             stack->buffer = ginStepRight(stack->buffer, btree->index, access);
     130        1092 :             stack->blkno = rightlink;
     131        1092 :             page = BufferGetPage(stack->buffer);
     132             : 
     133        1092 :             if (!searchMode && GinPageIsIncompleteSplit(page))
     134           0 :                 ginFinishOldSplit(btree, stack, NULL, access);
     135             :         }
     136             : 
     137     1622760 :         if (GinPageIsLeaf(page))    /* we found, return locked page */
     138      827836 :             return stack;
     139             : 
     140             :         /* now we have correct buffer, try to find child */
     141      794924 :         child = btree->findChildPage(btree, stack);
     142             : 
     143      794924 :         LockBuffer(stack->buffer, GIN_UNLOCK);
     144             :         Assert(child != InvalidBlockNumber);
     145             :         Assert(stack->blkno != child);
     146             : 
     147      794924 :         if (searchMode)
     148             :         {
     149             :             /* in search mode we may forget path to leaf */
     150        6794 :             stack->blkno = child;
     151        6794 :             stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
     152             :         }
     153             :         else
     154             :         {
     155      788130 :             GinBtreeStack *ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
     156             : 
     157      788130 :             ptr->parent = stack;
     158      788130 :             stack = ptr;
     159      788130 :             stack->blkno = child;
     160      788130 :             stack->buffer = ReadBuffer(btree->index, stack->blkno);
     161      788130 :             stack->predictNumber = 1;
     162             :         }
     163             :     }
     164             : }
     165             : 
     166             : /*
     167             :  * Step right from current page.
     168             :  *
     169             :  * The next page is locked first, before releasing the current page. This is
     170             :  * crucial to prevent concurrent VACUUM from deleting a page that we are about
     171             :  * to step to. (The lock-coupling isn't strictly necessary when we are
     172             :  * traversing the tree to find an insert location, because page deletion grabs
     173             :  * a cleanup lock on the root to prevent any concurrent inserts. See Page
     174             :  * deletion section in the README. But there's no harm in doing it always.)
     175             :  */
     176             : Buffer
     177        4628 : ginStepRight(Buffer buffer, Relation index, int lockmode)
     178             : {
     179             :     Buffer      nextbuffer;
     180        4628 :     Page        page = BufferGetPage(buffer);
     181        4628 :     bool        isLeaf = GinPageIsLeaf(page);
     182        4628 :     bool        isData = GinPageIsData(page);
     183        4628 :     BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
     184             : 
     185        4628 :     nextbuffer = ReadBuffer(index, blkno);
     186        4628 :     LockBuffer(nextbuffer, lockmode);
     187        4628 :     UnlockReleaseBuffer(buffer);
     188             : 
     189             :     /* Sanity check that the page we stepped to is of similar kind. */
     190        4628 :     page = BufferGetPage(nextbuffer);
     191        4628 :     if (isLeaf != GinPageIsLeaf(page) || isData != GinPageIsData(page))
     192           0 :         elog(ERROR, "right sibling of GIN page is of different type");
     193             : 
     194        4628 :     return nextbuffer;
     195             : }
     196             : 
     197             : void
     198      827820 : freeGinBtreeStack(GinBtreeStack *stack)
     199             : {
     200     2439060 :     while (stack)
     201             :     {
     202     1611240 :         GinBtreeStack *tmp = stack->parent;
     203             : 
     204     1611240 :         if (stack->buffer != InvalidBuffer)
     205     1611240 :             ReleaseBuffer(stack->buffer);
     206             : 
     207     1611240 :         pfree(stack);
     208     1611240 :         stack = tmp;
     209             :     }
     210      827820 : }
     211             : 
     212             : /*
     213             :  * Try to find parent for current stack position. Returns correct parent and
     214             :  * child's offset in stack->parent. The root page is never released, to
     215             :  * prevent conflict with vacuum process.
     216             :  */
     217             : static void
     218           0 : ginFindParents(GinBtree btree, GinBtreeStack *stack)
     219             : {
     220             :     Page        page;
     221             :     Buffer      buffer;
     222             :     BlockNumber blkno,
     223             :                 leftmostBlkno;
     224             :     OffsetNumber offset;
     225             :     GinBtreeStack *root;
     226             :     GinBtreeStack *ptr;
     227             : 
     228             :     /*
     229             :      * Unwind the stack all the way up to the root, leaving only the root
     230             :      * item.
     231             :      *
     232             :      * Be careful not to release the pin on the root page! The pin on root
     233             :      * page is required to lock out concurrent vacuums on the tree.
     234             :      */
     235           0 :     root = stack->parent;
     236           0 :     while (root->parent)
     237             :     {
     238           0 :         ReleaseBuffer(root->buffer);
     239           0 :         root = root->parent;
     240             :     }
     241             : 
     242             :     Assert(root->blkno == btree->rootBlkno);
     243             :     Assert(BufferGetBlockNumber(root->buffer) == btree->rootBlkno);
     244           0 :     root->off = InvalidOffsetNumber;
     245             : 
     246           0 :     blkno = root->blkno;
     247           0 :     buffer = root->buffer;
     248             : 
     249           0 :     ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
     250             : 
     251             :     for (;;)
     252             :     {
     253           0 :         LockBuffer(buffer, GIN_EXCLUSIVE);
     254           0 :         page = BufferGetPage(buffer);
     255           0 :         if (GinPageIsLeaf(page))
     256           0 :             elog(ERROR, "Lost path");
     257             : 
     258           0 :         if (GinPageIsIncompleteSplit(page))
     259             :         {
     260             :             Assert(blkno != btree->rootBlkno);
     261           0 :             ptr->blkno = blkno;
     262           0 :             ptr->buffer = buffer;
     263             : 
     264             :             /*
     265             :              * parent may be wrong, but if so, the ginFinishSplit call will
     266             :              * recurse to call ginFindParents again to fix it.
     267             :              */
     268           0 :             ptr->parent = root;
     269           0 :             ptr->off = InvalidOffsetNumber;
     270             : 
     271           0 :             ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE);
     272             :         }
     273             : 
     274           0 :         leftmostBlkno = btree->getLeftMostChild(btree, page);
     275             : 
     276           0 :         while ((offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) == InvalidOffsetNumber)
     277             :         {
     278           0 :             blkno = GinPageGetOpaque(page)->rightlink;
     279           0 :             if (blkno == InvalidBlockNumber)
     280             :             {
     281             :                 /* Link not present in this level */
     282           0 :                 LockBuffer(buffer, GIN_UNLOCK);
     283             :                 /* Do not release pin on the root buffer */
     284           0 :                 if (buffer != root->buffer)
     285           0 :                     ReleaseBuffer(buffer);
     286           0 :                 break;
     287             :             }
     288           0 :             buffer = ginStepRight(buffer, btree->index, GIN_EXCLUSIVE);
     289           0 :             page = BufferGetPage(buffer);
     290             : 
     291             :             /* finish any incomplete splits, as above */
     292           0 :             if (GinPageIsIncompleteSplit(page))
     293             :             {
     294             :                 Assert(blkno != btree->rootBlkno);
     295           0 :                 ptr->blkno = blkno;
     296           0 :                 ptr->buffer = buffer;
     297           0 :                 ptr->parent = root;
     298           0 :                 ptr->off = InvalidOffsetNumber;
     299             : 
     300           0 :                 ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE);
     301             :             }
     302             :         }
     303             : 
     304           0 :         if (blkno != InvalidBlockNumber)
     305             :         {
     306           0 :             ptr->blkno = blkno;
     307           0 :             ptr->buffer = buffer;
     308           0 :             ptr->parent = root; /* it may be wrong, but in next call we will
     309             :                                  * correct */
     310           0 :             ptr->off = offset;
     311           0 :             stack->parent = ptr;
     312           0 :             return;
     313             :         }
     314             : 
     315             :         /* Descend down to next level */
     316           0 :         blkno = leftmostBlkno;
     317           0 :         buffer = ReadBuffer(btree->index, blkno);
     318             :     }
     319             : }
     320             : 
     321             : /*
     322             :  * Insert a new item to a page.
     323             :  *
     324             :  * Returns true if the insertion was finished. On false, the page was split and
     325             :  * the parent needs to be updated. (A root split returns true as it doesn't
     326             :  * need any further action by the caller to complete.)
     327             :  *
     328             :  * When inserting a downlink to an internal page, 'childbuf' contains the
     329             :  * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
     330             :  * atomically with the insert. Also, the existing item at offset stack->off
     331             :  * in the target page is updated to point to updateblkno.
     332             :  *
     333             :  * stack->buffer is locked on entry, and is kept locked.
     334             :  * Likewise for childbuf, if given.
     335             :  */
     336             : static bool
     337      776356 : ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
     338             :                void *insertdata, BlockNumber updateblkno,
     339             :                Buffer childbuf, GinStatsData *buildStats)
     340             : {
     341      776356 :     Page        page = BufferGetPage(stack->buffer);
     342             :     bool        result;
     343             :     GinPlaceToPageRC rc;
     344      776356 :     uint16      xlflags = 0;
     345      776356 :     Page        childpage = NULL;
     346      776356 :     Page        newlpage = NULL,
     347      776356 :                 newrpage = NULL;
     348      776356 :     void       *ptp_workspace = NULL;
     349             :     MemoryContext tmpCxt;
     350             :     MemoryContext oldCxt;
     351             : 
     352             :     /*
     353             :      * We do all the work of this function and its subfunctions in a temporary
     354             :      * memory context.  This avoids leakages and simplifies APIs, since some
     355             :      * subfunctions allocate storage that has to survive until we've finished
     356             :      * the WAL insertion.
     357             :      */
     358      776356 :     tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
     359             :                                    "ginPlaceToPage temporary context",
     360             :                                    ALLOCSET_DEFAULT_SIZES);
     361      776356 :     oldCxt = MemoryContextSwitchTo(tmpCxt);
     362             : 
     363      776356 :     if (GinPageIsData(page))
     364       49602 :         xlflags |= GIN_INSERT_ISDATA;
     365      776356 :     if (GinPageIsLeaf(page))
     366             :     {
     367      771658 :         xlflags |= GIN_INSERT_ISLEAF;
     368             :         Assert(!BufferIsValid(childbuf));
     369             :         Assert(updateblkno == InvalidBlockNumber);
     370             :     }
     371             :     else
     372             :     {
     373             :         Assert(BufferIsValid(childbuf));
     374             :         Assert(updateblkno != InvalidBlockNumber);
     375        4698 :         childpage = BufferGetPage(childbuf);
     376             :     }
     377             : 
     378             :     /*
     379             :      * See if the incoming tuple will fit on the page.  beginPlaceToPage will
     380             :      * decide if the page needs to be split, and will compute the split
     381             :      * contents if so.  See comments for beginPlaceToPage and execPlaceToPage
     382             :      * functions for more details of the API here.
     383             :      */
     384      776356 :     rc = btree->beginPlaceToPage(btree, stack->buffer, stack,
     385             :                                  insertdata, updateblkno,
     386             :                                  &ptp_workspace,
     387             :                                  &newlpage, &newrpage);
     388             : 
     389      776356 :     if (rc == GPTP_NO_WORK)
     390             :     {
     391             :         /* Nothing to do */
     392           0 :         result = true;
     393             :     }
     394      776356 :     else if (rc == GPTP_INSERT)
     395             :     {
     396             :         /* It will fit, perform the insertion */
     397      771430 :         START_CRIT_SECTION();
     398             : 
     399      771430 :         if (RelationNeedsWAL(btree->index) && !btree->isBuild)
     400      382334 :             XLogBeginInsert();
     401             : 
     402             :         /*
     403             :          * Perform the page update, dirty and register stack->buffer, and
     404             :          * register any extra WAL data.
     405             :          */
     406      771430 :         btree->execPlaceToPage(btree, stack->buffer, stack,
     407             :                                insertdata, updateblkno, ptp_workspace);
     408             : 
     409             :         /* An insert to an internal page finishes the split of the child. */
     410      771430 :         if (BufferIsValid(childbuf))
     411             :         {
     412        4694 :             GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
     413        4694 :             MarkBufferDirty(childbuf);
     414        4694 :             if (RelationNeedsWAL(btree->index) && !btree->isBuild)
     415        2054 :                 XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
     416             :         }
     417             : 
     418      771430 :         if (RelationNeedsWAL(btree->index) && !btree->isBuild)
     419             :         {
     420             :             XLogRecPtr  recptr;
     421             :             ginxlogInsert xlrec;
     422             :             BlockIdData childblknos[2];
     423             : 
     424      382334 :             xlrec.flags = xlflags;
     425             : 
     426      382334 :             XLogRegisterData((char *) &xlrec, sizeof(ginxlogInsert));
     427             : 
     428             :             /*
     429             :              * Log information about child if this was an insertion of a
     430             :              * downlink.
     431             :              */
     432      382334 :             if (BufferIsValid(childbuf))
     433             :             {
     434        2054 :                 BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
     435        2054 :                 BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);
     436        2054 :                 XLogRegisterData((char *) childblknos,
     437             :                                  sizeof(BlockIdData) * 2);
     438             :             }
     439             : 
     440      382334 :             recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT);
     441      382334 :             PageSetLSN(page, recptr);
     442      382334 :             if (BufferIsValid(childbuf))
     443        2054 :                 PageSetLSN(childpage, recptr);
     444             :         }
     445             : 
     446      771430 :         END_CRIT_SECTION();
     447             : 
     448             :         /* Insertion is complete. */
     449      771430 :         result = true;
     450             :     }
     451        4926 :     else if (rc == GPTP_SPLIT)
     452             :     {
     453             :         /*
     454             :          * Didn't fit, need to split.  The split has been computed in newlpage
     455             :          * and newrpage, which are pointers to palloc'd pages, not associated
     456             :          * with buffers.  stack->buffer is not touched yet.
     457             :          */
     458             :         Buffer      rbuffer;
     459             :         BlockNumber savedRightLink;
     460             :         ginxlogSplit data;
     461        4926 :         Buffer      lbuffer = InvalidBuffer;
     462        4926 :         Page        newrootpg = NULL;
     463             : 
     464             :         /* Get a new index page to become the right page */
     465        4926 :         rbuffer = GinNewBuffer(btree->index);
     466             : 
     467             :         /* During index build, count the new page */
     468        4926 :         if (buildStats)
     469             :         {
     470        1102 :             if (btree->isData)
     471          92 :                 buildStats->nDataPages++;
     472             :             else
     473        1010 :                 buildStats->nEntryPages++;
     474             :         }
     475             : 
     476        4926 :         savedRightLink = GinPageGetOpaque(page)->rightlink;
     477             : 
     478             :         /* Begin setting up WAL record */
     479        4926 :         data.locator = btree->index->rd_locator;
     480        4926 :         data.flags = xlflags;
     481        4926 :         if (BufferIsValid(childbuf))
     482             :         {
     483           4 :             data.leftChildBlkno = BufferGetBlockNumber(childbuf);
     484           4 :             data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink;
     485             :         }
     486             :         else
     487        4922 :             data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;
     488             : 
     489        4926 :         if (stack->parent == NULL)
     490             :         {
     491             :             /*
     492             :              * splitting the root, so we need to allocate new left page and
     493             :              * place pointers to left and right page on root page.
     494             :              */
     495         228 :             lbuffer = GinNewBuffer(btree->index);
     496             : 
     497             :             /* During index build, count the new left page */
     498         228 :             if (buildStats)
     499             :             {
     500         184 :                 if (btree->isData)
     501          76 :                     buildStats->nDataPages++;
     502             :                 else
     503         108 :                     buildStats->nEntryPages++;
     504             :             }
     505             : 
     506         228 :             data.rrlink = InvalidBlockNumber;
     507         228 :             data.flags |= GIN_SPLIT_ROOT;
     508             : 
     509         228 :             GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
     510         228 :             GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
     511             : 
     512             :             /*
     513             :              * Construct a new root page containing downlinks to the new left
     514             :              * and right pages.  (Do this in a temporary copy rather than
     515             :              * overwriting the original page directly, since we're not in the
     516             :              * critical section yet.)
     517             :              */
     518         228 :             newrootpg = PageGetTempPage(newrpage);
     519         228 :             GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~(GIN_LEAF | GIN_COMPRESSED), BLCKSZ);
     520             : 
     521         228 :             btree->fillRoot(btree, newrootpg,
     522             :                             BufferGetBlockNumber(lbuffer), newlpage,
     523             :                             BufferGetBlockNumber(rbuffer), newrpage);
     524             : 
     525         228 :             if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
     526             :             {
     527             : 
     528         226 :                 PredicateLockPageSplit(btree->index,
     529             :                                        BufferGetBlockNumber(stack->buffer),
     530             :                                        BufferGetBlockNumber(lbuffer));
     531             : 
     532         226 :                 PredicateLockPageSplit(btree->index,
     533             :                                        BufferGetBlockNumber(stack->buffer),
     534             :                                        BufferGetBlockNumber(rbuffer));
     535             :             }
     536             :         }
     537             :         else
     538             :         {
     539             :             /* splitting a non-root page */
     540        4698 :             data.rrlink = savedRightLink;
     541             : 
     542        4698 :             GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
     543        4698 :             GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
     544        4698 :             GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
     545             : 
     546        4698 :             if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
     547             :             {
     548             : 
     549        4696 :                 PredicateLockPageSplit(btree->index,
     550             :                                        BufferGetBlockNumber(stack->buffer),
     551             :                                        BufferGetBlockNumber(rbuffer));
     552             :             }
     553             :         }
     554             : 
     555             :         /*
     556             :          * OK, we have the new contents of the left page in a temporary copy
     557             :          * now (newlpage), and likewise for the new contents of the
     558             :          * newly-allocated right block. The original page is still unchanged.
     559             :          *
     560             :          * If this is a root split, we also have a temporary page containing
     561             :          * the new contents of the root.
     562             :          */
     563             : 
     564        4926 :         START_CRIT_SECTION();
     565             : 
     566        4926 :         MarkBufferDirty(rbuffer);
     567        4926 :         MarkBufferDirty(stack->buffer);
     568             : 
     569             :         /*
     570             :          * Restore the temporary copies over the real buffers.
     571             :          */
     572        4926 :         if (stack->parent == NULL)
     573             :         {
     574             :             /* Splitting the root, three pages to update */
     575         228 :             MarkBufferDirty(lbuffer);
     576         228 :             memcpy(page, newrootpg, BLCKSZ);
     577         228 :             memcpy(BufferGetPage(lbuffer), newlpage, BLCKSZ);
     578         228 :             memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
     579             :         }
     580             :         else
     581             :         {
     582             :             /* Normal split, only two pages to update */
     583        4698 :             memcpy(page, newlpage, BLCKSZ);
     584        4698 :             memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
     585             :         }
     586             : 
     587             :         /* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */
     588        4926 :         if (BufferIsValid(childbuf))
     589             :         {
     590           4 :             GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
     591           4 :             MarkBufferDirty(childbuf);
     592             :         }
     593             : 
     594             :         /* write WAL record */
     595        4926 :         if (RelationNeedsWAL(btree->index) && !btree->isBuild)
     596             :         {
     597             :             XLogRecPtr  recptr;
     598             : 
     599        2084 :             XLogBeginInsert();
     600             : 
     601             :             /*
     602             :              * We just take full page images of all the split pages. Splits
     603             :              * are uncommon enough that it's not worth complicating the code
     604             :              * to be more efficient.
     605             :              */
     606        2084 :             if (stack->parent == NULL)
     607             :             {
     608          26 :                 XLogRegisterBuffer(0, lbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
     609          26 :                 XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
     610          26 :                 XLogRegisterBuffer(2, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
     611             :             }
     612             :             else
     613             :             {
     614        2058 :                 XLogRegisterBuffer(0, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
     615        2058 :                 XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
     616             :             }
     617        2084 :             if (BufferIsValid(childbuf))
     618           4 :                 XLogRegisterBuffer(3, childbuf, REGBUF_STANDARD);
     619             : 
     620        2084 :             XLogRegisterData((char *) &data, sizeof(ginxlogSplit));
     621             : 
     622        2084 :             recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT);
     623             : 
     624        2084 :             PageSetLSN(page, recptr);
     625        2084 :             PageSetLSN(BufferGetPage(rbuffer), recptr);
     626        2084 :             if (stack->parent == NULL)
     627          26 :                 PageSetLSN(BufferGetPage(lbuffer), recptr);
     628        2084 :             if (BufferIsValid(childbuf))
     629           4 :                 PageSetLSN(childpage, recptr);
     630             :         }
     631        4926 :         END_CRIT_SECTION();
     632             : 
     633             :         /*
     634             :          * We can release the locks/pins on the new pages now, but keep
     635             :          * stack->buffer locked.  childbuf doesn't get unlocked either.
     636             :          */
     637        4926 :         UnlockReleaseBuffer(rbuffer);
     638        4926 :         if (stack->parent == NULL)
     639         228 :             UnlockReleaseBuffer(lbuffer);
     640             : 
     641             :         /*
     642             :          * If we split the root, we're done. Otherwise the split is not
     643             :          * complete until the downlink for the new page has been inserted to
     644             :          * the parent.
     645             :          */
     646        4926 :         result = (stack->parent == NULL);
     647             :     }
     648             :     else
     649             :     {
     650           0 :         elog(ERROR, "invalid return code from GIN beginPlaceToPage method: %d", rc);
     651             :         result = false;         /* keep compiler quiet */
     652             :     }
     653             : 
     654             :     /* Clean up temp context */
     655      776356 :     MemoryContextSwitchTo(oldCxt);
     656      776356 :     MemoryContextDelete(tmpCxt);
     657             : 
     658      776356 :     return result;
     659             : }
     660             : 
     661             : /*
     662             :  * Finish a split by inserting the downlink for the new page to parent.
     663             :  *
     664             :  * On entry, stack->buffer is exclusively locked.
     665             :  *
     666             :  * If freestack is true, all the buffers are released and unlocked as we
     667             :  * crawl up the tree, and 'stack' is freed. Otherwise stack->buffer is kept
     668             :  * locked, and stack is unmodified, except for possibly moving right to find
     669             :  * the correct parent of page.
     670             :  */
     671             : static void
     672        4700 : ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
     673             :                GinStatsData *buildStats)
     674             : {
     675             :     Page        page;
     676             :     bool        done;
     677        4700 :     bool        first = true;
     678             : 
     679             :     /* this loop crawls up the stack until the insertion is complete */
     680             :     do
     681             :     {
     682        4702 :         GinBtreeStack *parent = stack->parent;
     683             :         void       *insertdata;
     684             :         BlockNumber updateblkno;
     685             : 
     686             : #ifdef USE_INJECTION_POINTS
     687        4702 :         if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
     688        4698 :             INJECTION_POINT("gin-leave-leaf-split-incomplete");
     689             :         else
     690           4 :             INJECTION_POINT("gin-leave-internal-split-incomplete");
     691             : #endif
     692             : 
     693             :         /* search parent to lock */
     694        4698 :         LockBuffer(parent->buffer, GIN_EXCLUSIVE);
     695             : 
     696             :         /*
     697             :          * If the parent page was incompletely split, finish that split first,
     698             :          * then continue with the current one.
     699             :          *
     700             :          * Note: we have to finish *all* incomplete splits we encounter, even
     701             :          * if we have to move right. Otherwise we might choose as the target a
     702             :          * page that has no downlink in the parent, and splitting it further
     703             :          * would fail.
     704             :          */
     705        4698 :         if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
     706           0 :             ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE);
     707             : 
     708             :         /* move right if it's needed */
     709        4698 :         page = BufferGetPage(parent->buffer);
     710        4698 :         while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
     711             :         {
     712           0 :             if (GinPageRightMost(page))
     713             :             {
     714             :                 /*
     715             :                  * rightmost page, but we don't find parent, we should use
     716             :                  * plain search...
     717             :                  */
     718           0 :                 LockBuffer(parent->buffer, GIN_UNLOCK);
     719           0 :                 ginFindParents(btree, stack);
     720           0 :                 parent = stack->parent;
     721             :                 Assert(parent != NULL);
     722           0 :                 break;
     723             :             }
     724             : 
     725           0 :             parent->buffer = ginStepRight(parent->buffer, btree->index, GIN_EXCLUSIVE);
     726           0 :             parent->blkno = BufferGetBlockNumber(parent->buffer);
     727           0 :             page = BufferGetPage(parent->buffer);
     728             : 
     729           0 :             if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
     730           0 :                 ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE);
     731             :         }
     732             : 
     733             :         /* insert the downlink */
     734        4698 :         insertdata = btree->prepareDownlink(btree, stack->buffer);
     735        4698 :         updateblkno = GinPageGetOpaque(BufferGetPage(stack->buffer))->rightlink;
     736        4698 :         done = ginPlaceToPage(btree, parent,
     737             :                               insertdata, updateblkno,
     738             :                               stack->buffer, buildStats);
     739        4698 :         pfree(insertdata);
     740             : 
     741             :         /*
     742             :          * If the caller requested to free the stack, unlock and release the
     743             :          * child buffer now. Otherwise keep it pinned and locked, but if we
     744             :          * have to recurse up the tree, we can unlock the upper pages, only
     745             :          * keeping the page at the bottom of the stack locked.
     746             :          */
     747        4698 :         if (!first || freestack)
     748        4694 :             LockBuffer(stack->buffer, GIN_UNLOCK);
     749        4698 :         if (freestack)
     750             :         {
     751        4694 :             ReleaseBuffer(stack->buffer);
     752        4694 :             pfree(stack);
     753             :         }
     754        4698 :         stack = parent;
     755             : 
     756        4698 :         first = false;
     757        4698 :     } while (!done);
     758             : 
     759             :     /* unlock the parent */
     760        4696 :     LockBuffer(stack->buffer, GIN_UNLOCK);
     761             : 
     762        4696 :     if (freestack)
     763        4692 :         freeGinBtreeStack(stack);
     764        4696 : }
     765             : 
     766             : /*
     767             :  * An entry point to ginFinishSplit() that is used when we stumble upon an
     768             :  * existing incompletely split page in the tree, as opposed to completing a
     769             :  * split that we just made ourselves. The difference is that stack->buffer may
     770             :  * be merely share-locked on entry, and will be upgraded to exclusive mode.
     771             :  *
     772             :  * Note: Upgrading the lock momentarily releases it. Doing that in a scan
     773             :  * would not be OK, because a concurrent VACUUM might delete the page while
     774             :  * we're not holding the lock. It's OK in an insert, though, because VACUUM
     775             :  * has a different mechanism that prevents it from running concurrently with
     776             :  * inserts. (Namely, it holds a cleanup lock on the root.)
     777             :  */
     778             : static void
     779           4 : ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats, int access)
     780             : {
     781           4 :     INJECTION_POINT("gin-finish-incomplete-split");
     782           4 :     elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"",
     783             :          stack->blkno, RelationGetRelationName(btree->index));
     784             : 
     785           4 :     if (access == GIN_SHARE)
     786             :     {
     787           2 :         LockBuffer(stack->buffer, GIN_UNLOCK);
     788           2 :         LockBuffer(stack->buffer, GIN_EXCLUSIVE);
     789             : 
     790           2 :         if (!GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
     791             :         {
     792             :             /*
     793             :              * Someone else already completed the split while we were not
     794             :              * holding the lock.
     795             :              */
     796           0 :             return;
     797             :         }
     798             :     }
     799             : 
     800           4 :     ginFinishSplit(btree, stack, false, buildStats);
     801             : }
     802             : 
     803             : /*
     804             :  * Insert a value to tree described by stack.
     805             :  *
     806             :  * The value to be inserted is given in 'insertdata'. Its format depends
     807             :  * on whether this is an entry or data tree, ginInsertValue just passes it
     808             :  * through to the tree-specific callback function.
     809             :  *
     810             :  * During an index build, buildStats is non-null and the counters it contains
     811             :  * are incremented as needed.
     812             :  *
     813             :  * NB: the passed-in stack is freed, as though by freeGinBtreeStack.
     814             :  */
     815             : void
     816      771658 : ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata,
     817             :                GinStatsData *buildStats)
     818             : {
     819             :     bool        done;
     820             : 
     821             :     /* If the leaf page was incompletely split, finish the split first */
     822      771658 :     if (GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
     823           0 :         ginFinishOldSplit(btree, stack, buildStats, GIN_EXCLUSIVE);
     824             : 
     825      771658 :     done = ginPlaceToPage(btree, stack,
     826             :                           insertdata, InvalidBlockNumber,
     827             :                           InvalidBuffer, buildStats);
     828      771658 :     if (done)
     829             :     {
     830      766962 :         LockBuffer(stack->buffer, GIN_UNLOCK);
     831      766962 :         freeGinBtreeStack(stack);
     832             :     }
     833             :     else
     834        4696 :         ginFinishSplit(btree, stack, true, buildStats);
     835      771654 : }

Generated by: LCOV version 1.14