LCOV - code coverage report
Current view: top level - src/backend/access/gin - ginbtree.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 77.6 % 286 222
Test Date: 2026-02-28 10:14:33 Functions: 88.9 % 9 8
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * ginbtree.c
       4              :  *    page utilities routines for the postgres inverted index access method.
       5              :  *
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  * Portions Copyright (c) 1994, Regents of the University of California
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *          src/backend/access/gin/ginbtree.c
      12              :  *-------------------------------------------------------------------------
      13              :  */
      14              : 
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/gin_private.h"
      18              : #include "access/ginxlog.h"
      19              : #include "access/xloginsert.h"
      20              : #include "miscadmin.h"
      21              : #include "storage/predicate.h"
      22              : #include "utils/injection_point.h"
      23              : #include "utils/memutils.h"
      24              : #include "utils/rel.h"
      25              : 
      26              : static void ginFindParents(GinBtree btree, GinBtreeStack *stack);
      27              : static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
      28              :                            void *insertdata, BlockNumber updateblkno,
      29              :                            Buffer childbuf, GinStatsData *buildStats);
      30              : static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
      31              :                            bool freestack, GinStatsData *buildStats);
      32              : static void ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack,
      33              :                               GinStatsData *buildStats, int access);
      34              : 
      35              : /*
      36              :  * Lock buffer by needed method for search.
      37              :  */
      38              : int
      39      1010474 : ginTraverseLock(Buffer buffer, bool searchMode)
      40              : {
      41              :     Page        page;
      42      1010474 :     int         access = GIN_SHARE;
      43              : 
      44      1010474 :     LockBuffer(buffer, GIN_SHARE);
      45      1010474 :     page = BufferGetPage(buffer);
      46      1010474 :     if (GinPageIsLeaf(page))
      47              :     {
      48       514274 :         if (searchMode == false)
      49              :         {
      50              :             /* we should relock our page */
      51       510695 :             LockBuffer(buffer, GIN_UNLOCK);
      52       510695 :             LockBuffer(buffer, GIN_EXCLUSIVE);
      53              : 
      54              :             /* But root can become non-leaf during relock */
      55       510695 :             if (!GinPageIsLeaf(page))
      56              :             {
      57              :                 /* restore old lock type (very rare) */
      58            0 :                 LockBuffer(buffer, GIN_UNLOCK);
      59            0 :                 LockBuffer(buffer, GIN_SHARE);
      60              :             }
      61              :             else
      62       510695 :                 access = GIN_EXCLUSIVE;
      63              :         }
      64              :     }
      65              : 
      66      1010474 :     return access;
      67              : }
      68              : 
      69              : /*
      70              :  * Descend the tree to the leaf page that contains or would contain the key
      71              :  * we're searching for. The key should already be filled in 'btree', in
      72              :  * tree-type specific manner. If btree->fullScan is true, descends to the
      73              :  * leftmost leaf page.
      74              :  *
      75              :  * If 'searchmode' is false, on return stack->buffer is exclusively locked,
      76              :  * and the stack represents the full path to the root. Otherwise stack->buffer
      77              :  * is share-locked, and stack->parent is NULL.
      78              :  *
      79              :  * If 'rootConflictCheck' is true, tree root is checked for serialization
      80              :  * conflict.
      81              :  */
      82              : GinBtreeStack *
      83       514276 : ginFindLeafPage(GinBtree btree, bool searchMode,
      84              :                 bool rootConflictCheck)
      85              : {
      86              :     GinBtreeStack *stack;
      87              : 
      88       514276 :     stack = palloc_object(GinBtreeStack);
      89       514276 :     stack->blkno = btree->rootBlkno;
      90       514276 :     stack->buffer = ReadBuffer(btree->index, btree->rootBlkno);
      91       514276 :     stack->parent = NULL;
      92       514276 :     stack->predictNumber = 1;
      93              : 
      94       514276 :     if (rootConflictCheck)
      95        24778 :         CheckForSerializableConflictIn(btree->index, NULL, btree->rootBlkno);
      96              : 
      97              :     for (;;)
      98       496200 :     {
      99              :         Page        page;
     100              :         BlockNumber child;
     101              :         int         access;
     102              : 
     103      1010474 :         stack->off = InvalidOffsetNumber;
     104              : 
     105      1010474 :         page = BufferGetPage(stack->buffer);
     106              : 
     107      1010474 :         access = ginTraverseLock(stack->buffer, searchMode);
     108              : 
     109              :         /*
     110              :          * If we're going to modify the tree, finish any incomplete splits we
     111              :          * encounter on the way.
     112              :          */
     113      1010474 :         if (!searchMode && GinPageIsIncompleteSplit(page))
     114            2 :             ginFinishOldSplit(btree, stack, NULL, access);
     115              : 
     116              :         /*
     117              :          * ok, page is correctly locked, we should check to move right ..,
     118              :          * root never has a right link, so small optimization
     119              :          */
     120      1507728 :         while (btree->fullScan == false && stack->blkno != btree->rootBlkno &&
     121       496708 :                btree->isMoveRight(btree, page))
     122              :         {
     123          546 :             BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
     124              : 
     125          546 :             if (rightlink == InvalidBlockNumber)
     126              :                 /* rightmost page */
     127            0 :                 break;
     128              : 
     129          546 :             stack->buffer = ginStepRight(stack->buffer, btree->index, access);
     130          546 :             stack->blkno = rightlink;
     131          546 :             page = BufferGetPage(stack->buffer);
     132              : 
     133          546 :             if (!searchMode && GinPageIsIncompleteSplit(page))
     134            0 :                 ginFinishOldSplit(btree, stack, NULL, access);
     135              :         }
     136              : 
     137      1010474 :         if (GinPageIsLeaf(page))    /* we found, return locked page */
     138       514274 :             return stack;
     139              : 
     140              :         /* now we have correct buffer, try to find child */
     141       496200 :         child = btree->findChildPage(btree, stack);
     142              : 
     143       496200 :         LockBuffer(stack->buffer, GIN_UNLOCK);
     144              :         Assert(child != InvalidBlockNumber);
     145              :         Assert(stack->blkno != child);
     146              : 
     147       496200 :         if (searchMode)
     148              :         {
     149              :             /* in search mode we may forget path to leaf */
     150         3409 :             stack->blkno = child;
     151         3409 :             stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
     152              :         }
     153              :         else
     154              :         {
     155       492791 :             GinBtreeStack *ptr = palloc_object(GinBtreeStack);
     156              : 
     157       492791 :             ptr->parent = stack;
     158       492791 :             stack = ptr;
     159       492791 :             stack->blkno = child;
     160       492791 :             stack->buffer = ReadBuffer(btree->index, stack->blkno);
     161       492791 :             stack->predictNumber = 1;
     162              :         }
     163              :     }
     164              : }
     165              : 
     166              : /*
     167              :  * Step right from current page.
     168              :  *
     169              :  * The next page is locked first, before releasing the current page. This is
     170              :  * crucial to prevent concurrent VACUUM from deleting a page that we are about
     171              :  * to step to. (The lock-coupling isn't strictly necessary when we are
     172              :  * traversing the tree to find an insert location, because page deletion grabs
     173              :  * a cleanup lock on the root to prevent any concurrent inserts. See Page
     174              :  * deletion section in the README. But there's no harm in doing it always.)
     175              :  */
     176              : Buffer
     177         2314 : ginStepRight(Buffer buffer, Relation index, int lockmode)
     178              : {
     179              :     Buffer      nextbuffer;
     180         2314 :     Page        page = BufferGetPage(buffer);
     181         2314 :     bool        isLeaf = GinPageIsLeaf(page);
     182         2314 :     bool        isData = GinPageIsData(page);
     183         2314 :     BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
     184              : 
     185         2314 :     nextbuffer = ReadBuffer(index, blkno);
     186         2314 :     LockBuffer(nextbuffer, lockmode);
     187         2314 :     UnlockReleaseBuffer(buffer);
     188              : 
     189              :     /* Sanity check that the page we stepped to is of similar kind. */
     190         2314 :     page = BufferGetPage(nextbuffer);
     191         2314 :     if (isLeaf != GinPageIsLeaf(page) || isData != GinPageIsData(page))
     192            0 :         elog(ERROR, "right sibling of GIN page is of different type");
     193              : 
     194         2314 :     return nextbuffer;
     195              : }
     196              : 
     197              : void
     198       514266 : freeGinBtreeStack(GinBtreeStack *stack)
     199              : {
     200      1518857 :     while (stack)
     201              :     {
     202      1004591 :         GinBtreeStack *tmp = stack->parent;
     203              : 
     204      1004591 :         if (stack->buffer != InvalidBuffer)
     205      1004591 :             ReleaseBuffer(stack->buffer);
     206              : 
     207      1004591 :         pfree(stack);
     208      1004591 :         stack = tmp;
     209              :     }
     210       514266 : }
     211              : 
     212              : /*
     213              :  * Try to find parent for current stack position. Returns correct parent and
     214              :  * child's offset in stack->parent. The root page is never released, to
     215              :  * prevent conflict with vacuum process.
     216              :  */
     217              : static void
     218            0 : ginFindParents(GinBtree btree, GinBtreeStack *stack)
     219              : {
     220              :     Page        page;
     221              :     Buffer      buffer;
     222              :     BlockNumber blkno,
     223              :                 leftmostBlkno;
     224              :     OffsetNumber offset;
     225              :     GinBtreeStack *root;
     226              :     GinBtreeStack *ptr;
     227              : 
     228              :     /*
     229              :      * Unwind the stack all the way up to the root, leaving only the root
     230              :      * item.
     231              :      *
     232              :      * Be careful not to release the pin on the root page! The pin on root
     233              :      * page is required to lock out concurrent vacuums on the tree.
     234              :      */
     235            0 :     root = stack->parent;
     236            0 :     while (root->parent)
     237              :     {
     238            0 :         ReleaseBuffer(root->buffer);
     239            0 :         root = root->parent;
     240              :     }
     241              : 
     242              :     Assert(root->blkno == btree->rootBlkno);
     243              :     Assert(BufferGetBlockNumber(root->buffer) == btree->rootBlkno);
     244            0 :     root->off = InvalidOffsetNumber;
     245              : 
     246            0 :     blkno = root->blkno;
     247            0 :     buffer = root->buffer;
     248              : 
     249            0 :     ptr = palloc_object(GinBtreeStack);
     250              : 
     251              :     for (;;)
     252              :     {
     253            0 :         LockBuffer(buffer, GIN_EXCLUSIVE);
     254            0 :         page = BufferGetPage(buffer);
     255            0 :         if (GinPageIsLeaf(page))
     256            0 :             elog(ERROR, "Lost path");
     257              : 
     258            0 :         if (GinPageIsIncompleteSplit(page))
     259              :         {
     260              :             Assert(blkno != btree->rootBlkno);
     261            0 :             ptr->blkno = blkno;
     262            0 :             ptr->buffer = buffer;
     263              : 
     264              :             /*
     265              :              * parent may be wrong, but if so, the ginFinishSplit call will
     266              :              * recurse to call ginFindParents again to fix it.
     267              :              */
     268            0 :             ptr->parent = root;
     269            0 :             ptr->off = InvalidOffsetNumber;
     270              : 
     271            0 :             ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE);
     272              :         }
     273              : 
     274            0 :         leftmostBlkno = btree->getLeftMostChild(btree, page);
     275              : 
     276            0 :         while ((offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) == InvalidOffsetNumber)
     277              :         {
     278            0 :             blkno = GinPageGetOpaque(page)->rightlink;
     279            0 :             if (blkno == InvalidBlockNumber)
     280              :             {
     281              :                 /* Link not present in this level */
     282            0 :                 LockBuffer(buffer, GIN_UNLOCK);
     283              :                 /* Do not release pin on the root buffer */
     284            0 :                 if (buffer != root->buffer)
     285            0 :                     ReleaseBuffer(buffer);
     286            0 :                 break;
     287              :             }
     288            0 :             buffer = ginStepRight(buffer, btree->index, GIN_EXCLUSIVE);
     289            0 :             page = BufferGetPage(buffer);
     290              : 
     291              :             /* finish any incomplete splits, as above */
     292            0 :             if (GinPageIsIncompleteSplit(page))
     293              :             {
     294              :                 Assert(blkno != btree->rootBlkno);
     295            0 :                 ptr->blkno = blkno;
     296            0 :                 ptr->buffer = buffer;
     297            0 :                 ptr->parent = root;
     298            0 :                 ptr->off = InvalidOffsetNumber;
     299              : 
     300            0 :                 ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE);
     301              :             }
     302              :         }
     303              : 
     304            0 :         if (blkno != InvalidBlockNumber)
     305              :         {
     306            0 :             ptr->blkno = blkno;
     307            0 :             ptr->buffer = buffer;
     308            0 :             ptr->parent = root; /* it may be wrong, but in next call we will
     309              :                                  * correct */
     310            0 :             ptr->off = offset;
     311            0 :             stack->parent = ptr;
     312            0 :             return;
     313              :         }
     314              : 
     315              :         /* Descend down to next level */
     316            0 :         blkno = leftmostBlkno;
     317            0 :         buffer = ReadBuffer(btree->index, blkno);
     318              :     }
     319              : }
     320              : 
     321              : /*
     322              :  * Insert a new item to a page.
     323              :  *
     324              :  * Returns true if the insertion was finished. On false, the page was split and
     325              :  * the parent needs to be updated. (A root split returns true as it doesn't
     326              :  * need any further action by the caller to complete.)
     327              :  *
     328              :  * When inserting a downlink to an internal page, 'childbuf' contains the
     329              :  * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
     330              :  * atomically with the insert. Also, the existing item at offset stack->off
     331              :  * in the target page is updated to point to updateblkno.
     332              :  *
     333              :  * stack->buffer is locked on entry, and is kept locked.
     334              :  * Likewise for childbuf, if given.
     335              :  */
     336              : static bool
     337       488453 : ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
     338              :                void *insertdata, BlockNumber updateblkno,
     339              :                Buffer childbuf, GinStatsData *buildStats)
     340              : {
     341       488453 :     Page        page = BufferGetPage(stack->buffer);
     342              :     bool        result;
     343              :     GinPlaceToPageRC rc;
     344       488453 :     uint16      xlflags = 0;
     345       488453 :     Page        childpage = NULL;
     346       488453 :     Page        newlpage = NULL,
     347       488453 :                 newrpage = NULL;
     348       488453 :     void       *ptp_workspace = NULL;
     349              :     MemoryContext tmpCxt;
     350              :     MemoryContext oldCxt;
     351              : 
     352              :     /*
     353              :      * We do all the work of this function and its subfunctions in a temporary
     354              :      * memory context.  This avoids leakages and simplifies APIs, since some
     355              :      * subfunctions allocate storage that has to survive until we've finished
     356              :      * the WAL insertion.
     357              :      */
     358       488453 :     tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
     359              :                                    "ginPlaceToPage temporary context",
     360              :                                    ALLOCSET_DEFAULT_SIZES);
     361       488453 :     oldCxt = MemoryContextSwitchTo(tmpCxt);
     362              : 
     363       488453 :     if (GinPageIsData(page))
     364        24799 :         xlflags |= GIN_INSERT_ISDATA;
     365       488453 :     if (GinPageIsLeaf(page))
     366              :     {
     367       485993 :         xlflags |= GIN_INSERT_ISLEAF;
     368              :         Assert(!BufferIsValid(childbuf));
     369              :         Assert(updateblkno == InvalidBlockNumber);
     370              :     }
     371              :     else
     372              :     {
     373              :         Assert(BufferIsValid(childbuf));
     374              :         Assert(updateblkno != InvalidBlockNumber);
     375         2460 :         childpage = BufferGetPage(childbuf);
     376              :     }
     377              : 
     378              :     /*
     379              :      * See if the incoming tuple will fit on the page.  beginPlaceToPage will
     380              :      * decide if the page needs to be split, and will compute the split
     381              :      * contents if so.  See comments for beginPlaceToPage and execPlaceToPage
     382              :      * functions for more details of the API here.
     383              :      */
     384       488453 :     rc = btree->beginPlaceToPage(btree, stack->buffer, stack,
     385              :                                  insertdata, updateblkno,
     386              :                                  &ptp_workspace,
     387              :                                  &newlpage, &newrpage);
     388              : 
     389       488453 :     if (rc == GPTP_NO_WORK)
     390              :     {
     391              :         /* Nothing to do */
     392            0 :         result = true;
     393              :     }
     394       488453 :     else if (rc == GPTP_INSERT)
     395              :     {
     396              :         /* It will fit, perform the insertion */
     397       485871 :         START_CRIT_SECTION();
     398              : 
     399       485871 :         if (RelationNeedsWAL(btree->index) && !btree->isBuild)
     400       289550 :             XLogBeginInsert();
     401              : 
     402              :         /*
     403              :          * Perform the page update, dirty and register stack->buffer, and
     404              :          * register any extra WAL data.
     405              :          */
     406       485871 :         btree->execPlaceToPage(btree, stack->buffer, stack,
     407              :                                insertdata, updateblkno, ptp_workspace);
     408              : 
     409              :         /* An insert to an internal page finishes the split of the child. */
     410       485871 :         if (BufferIsValid(childbuf))
     411              :         {
     412         2458 :             GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
     413         2458 :             MarkBufferDirty(childbuf);
     414         2458 :             if (RelationNeedsWAL(btree->index) && !btree->isBuild)
     415         1053 :                 XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
     416              :         }
     417              : 
     418       485871 :         if (RelationNeedsWAL(btree->index) && !btree->isBuild)
     419              :         {
     420              :             XLogRecPtr  recptr;
     421              :             ginxlogInsert xlrec;
     422              :             BlockIdData childblknos[2];
     423              : 
     424       289550 :             xlrec.flags = xlflags;
     425              : 
     426       289550 :             XLogRegisterData(&xlrec, sizeof(ginxlogInsert));
     427              : 
     428              :             /*
     429              :              * Log information about child if this was an insertion of a
     430              :              * downlink.
     431              :              */
     432       289550 :             if (BufferIsValid(childbuf))
     433              :             {
     434         1053 :                 BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
     435         1053 :                 BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);
     436         1053 :                 XLogRegisterData(childblknos,
     437              :                                  sizeof(BlockIdData) * 2);
     438              :             }
     439              : 
     440       289550 :             recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT);
     441       289550 :             PageSetLSN(page, recptr);
     442       289550 :             if (BufferIsValid(childbuf))
     443         1053 :                 PageSetLSN(childpage, recptr);
     444              :         }
     445              : 
     446       485871 :         END_CRIT_SECTION();
     447              : 
     448              :         /* Insertion is complete. */
     449       485871 :         result = true;
     450              :     }
     451         2582 :     else if (rc == GPTP_SPLIT)
     452              :     {
     453              :         /*
     454              :          * Didn't fit, need to split.  The split has been computed in newlpage
     455              :          * and newrpage, which are pointers to palloc'd pages, not associated
     456              :          * with buffers.  stack->buffer is not touched yet.
     457              :          */
     458              :         Buffer      rbuffer;
     459              :         BlockNumber savedRightLink;
     460              :         ginxlogSplit data;
     461         2582 :         Buffer      lbuffer = InvalidBuffer;
     462         2582 :         Page        newrootpg = NULL;
     463              : 
     464              :         /* Get a new index page to become the right page */
     465         2582 :         rbuffer = GinNewBuffer(btree->index);
     466              : 
     467              :         /* During index build, count the new page */
     468         2582 :         if (buildStats)
     469              :         {
     470          643 :             if (btree->isData)
     471           47 :                 buildStats->nDataPages++;
     472              :             else
     473          596 :                 buildStats->nEntryPages++;
     474              :         }
     475              : 
     476         2582 :         savedRightLink = GinPageGetOpaque(page)->rightlink;
     477              : 
     478              :         /* Begin setting up WAL record */
     479         2582 :         data.locator = btree->index->rd_locator;
     480         2582 :         data.flags = xlflags;
     481         2582 :         if (BufferIsValid(childbuf))
     482              :         {
     483            2 :             data.leftChildBlkno = BufferGetBlockNumber(childbuf);
     484            2 :             data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink;
     485              :         }
     486              :         else
     487         2580 :             data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;
     488              : 
     489         2582 :         if (stack->parent == NULL)
     490              :         {
     491              :             /*
     492              :              * splitting the root, so we need to allocate new left page and
     493              :              * place pointers to left and right page on root page.
     494              :              */
     495          122 :             lbuffer = GinNewBuffer(btree->index);
     496              : 
     497              :             /* During index build, count the new left page */
     498          122 :             if (buildStats)
     499              :             {
     500           99 :                 if (btree->isData)
     501           39 :                     buildStats->nDataPages++;
     502              :                 else
     503           60 :                     buildStats->nEntryPages++;
     504              :             }
     505              : 
     506          122 :             data.rrlink = InvalidBlockNumber;
     507          122 :             data.flags |= GIN_SPLIT_ROOT;
     508              : 
     509          122 :             GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
     510          122 :             GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
     511              : 
     512              :             /*
     513              :              * Construct a new root page containing downlinks to the new left
     514              :              * and right pages.  (Do this in a temporary copy rather than
     515              :              * overwriting the original page directly, since we're not in the
     516              :              * critical section yet.)
     517              :              */
     518          122 :             newrootpg = PageGetTempPage(newrpage);
     519          122 :             GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~(GIN_LEAF | GIN_COMPRESSED), BLCKSZ);
     520              : 
     521          122 :             btree->fillRoot(btree, newrootpg,
     522              :                             BufferGetBlockNumber(lbuffer), newlpage,
     523              :                             BufferGetBlockNumber(rbuffer), newrpage);
     524              : 
     525          122 :             if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
     526              :             {
     527              : 
     528          121 :                 PredicateLockPageSplit(btree->index,
     529              :                                        BufferGetBlockNumber(stack->buffer),
     530              :                                        BufferGetBlockNumber(lbuffer));
     531              : 
     532          121 :                 PredicateLockPageSplit(btree->index,
     533              :                                        BufferGetBlockNumber(stack->buffer),
     534              :                                        BufferGetBlockNumber(rbuffer));
     535              :             }
     536              :         }
     537              :         else
     538              :         {
     539              :             /* splitting a non-root page */
     540         2460 :             data.rrlink = savedRightLink;
     541              : 
     542         2460 :             GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
     543         2460 :             GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
     544         2460 :             GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
     545              : 
     546         2460 :             if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
     547              :             {
     548              : 
     549         2459 :                 PredicateLockPageSplit(btree->index,
     550              :                                        BufferGetBlockNumber(stack->buffer),
     551              :                                        BufferGetBlockNumber(rbuffer));
     552              :             }
     553              :         }
     554              : 
     555              :         /*
     556              :          * OK, we have the new contents of the left page in a temporary copy
     557              :          * now (newlpage), and likewise for the new contents of the
     558              :          * newly-allocated right block. The original page is still unchanged.
     559              :          *
     560              :          * If this is a root split, we also have a temporary page containing
     561              :          * the new contents of the root.
     562              :          */
     563              : 
     564         2582 :         START_CRIT_SECTION();
     565              : 
     566         2582 :         MarkBufferDirty(rbuffer);
     567         2582 :         MarkBufferDirty(stack->buffer);
     568              : 
     569              :         /*
     570              :          * Restore the temporary copies over the real buffers.
     571              :          */
     572         2582 :         if (stack->parent == NULL)
     573              :         {
     574              :             /* Splitting the root, three pages to update */
     575          122 :             MarkBufferDirty(lbuffer);
     576          122 :             memcpy(page, newrootpg, BLCKSZ);
     577          122 :             memcpy(BufferGetPage(lbuffer), newlpage, BLCKSZ);
     578          122 :             memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
     579              :         }
     580              :         else
     581              :         {
     582              :             /* Normal split, only two pages to update */
     583         2460 :             memcpy(page, newlpage, BLCKSZ);
     584         2460 :             memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
     585              :         }
     586              : 
     587              :         /* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */
     588         2582 :         if (BufferIsValid(childbuf))
     589              :         {
     590            2 :             GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
     591            2 :             MarkBufferDirty(childbuf);
     592              :         }
     593              : 
     594              :         /* write WAL record */
     595         2582 :         if (RelationNeedsWAL(btree->index) && !btree->isBuild)
     596              :         {
     597              :             XLogRecPtr  recptr;
     598              : 
     599         1069 :             XLogBeginInsert();
     600              : 
     601              :             /*
     602              :              * We just take full page images of all the split pages. Splits
     603              :              * are uncommon enough that it's not worth complicating the code
     604              :              * to be more efficient.
     605              :              */
     606         1069 :             if (stack->parent == NULL)
     607              :             {
     608           14 :                 XLogRegisterBuffer(0, lbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
     609           14 :                 XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
     610           14 :                 XLogRegisterBuffer(2, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
     611              :             }
     612              :             else
     613              :             {
     614         1055 :                 XLogRegisterBuffer(0, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
     615         1055 :                 XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
     616              :             }
     617         1069 :             if (BufferIsValid(childbuf))
     618            2 :                 XLogRegisterBuffer(3, childbuf, REGBUF_STANDARD);
     619              : 
     620         1069 :             XLogRegisterData(&data, sizeof(ginxlogSplit));
     621              : 
     622         1069 :             recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT);
     623              : 
     624         1069 :             PageSetLSN(page, recptr);
     625         1069 :             PageSetLSN(BufferGetPage(rbuffer), recptr);
     626         1069 :             if (stack->parent == NULL)
     627           14 :                 PageSetLSN(BufferGetPage(lbuffer), recptr);
     628         1069 :             if (BufferIsValid(childbuf))
     629            2 :                 PageSetLSN(childpage, recptr);
     630              :         }
     631         2582 :         END_CRIT_SECTION();
     632              : 
     633              :         /*
     634              :          * We can release the locks/pins on the new pages now, but keep
     635              :          * stack->buffer locked.  childbuf doesn't get unlocked either.
     636              :          */
     637         2582 :         UnlockReleaseBuffer(rbuffer);
     638         2582 :         if (stack->parent == NULL)
     639          122 :             UnlockReleaseBuffer(lbuffer);
     640              : 
     641              :         /*
     642              :          * If we split the root, we're done. Otherwise the split is not
     643              :          * complete until the downlink for the new page has been inserted to
     644              :          * the parent.
     645              :          */
     646         2582 :         result = (stack->parent == NULL);
     647              :     }
     648              :     else
     649              :     {
     650            0 :         elog(ERROR, "invalid return code from GIN beginPlaceToPage method: %d", rc);
     651              :         result = false;         /* keep compiler quiet */
     652              :     }
     653              : 
     654              :     /* Clean up temp context */
     655       488453 :     MemoryContextSwitchTo(oldCxt);
     656       488453 :     MemoryContextDelete(tmpCxt);
     657              : 
     658       488453 :     return result;
     659              : }
     660              : 
     661              : /*
     662              :  * Finish a split by inserting the downlink for the new page to parent.
     663              :  *
     664              :  * On entry, stack->buffer is exclusively locked.
     665              :  *
     666              :  * If freestack is true, all the buffers are released and unlocked as we
     667              :  * crawl up the tree, and 'stack' is freed. Otherwise stack->buffer is kept
     668              :  * locked, and stack is unmodified, except for possibly moving right to find
     669              :  * the correct parent of page.
     670              :  */
     671              : static void
     672         2461 : ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
     673              :                GinStatsData *buildStats)
     674              : {
     675              :     Page        page;
     676              :     bool        done;
     677         2461 :     bool        first = true;
     678              : 
     679              :     /* this loop crawls up the stack until the insertion is complete */
     680              :     do
     681              :     {
     682         2462 :         GinBtreeStack *parent = stack->parent;
     683              :         void       *insertdata;
     684              :         BlockNumber updateblkno;
     685              : 
     686              : #ifdef USE_INJECTION_POINTS
     687         2462 :         if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
     688         2460 :             INJECTION_POINT("gin-leave-leaf-split-incomplete", NULL);
     689              :         else
     690            2 :             INJECTION_POINT("gin-leave-internal-split-incomplete", NULL);
     691              : #endif
     692              : 
     693              :         /* search parent to lock */
     694         2460 :         LockBuffer(parent->buffer, GIN_EXCLUSIVE);
     695              : 
     696              :         /*
     697              :          * If the parent page was incompletely split, finish that split first,
     698              :          * then continue with the current one.
     699              :          *
     700              :          * Note: we have to finish *all* incomplete splits we encounter, even
     701              :          * if we have to move right. Otherwise we might choose as the target a
     702              :          * page that has no downlink in the parent, and splitting it further
     703              :          * would fail.
     704              :          */
     705         2460 :         if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
     706            0 :             ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE);
     707              : 
     708              :         /* move right if it's needed */
     709         2460 :         page = BufferGetPage(parent->buffer);
     710         2460 :         while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
     711              :         {
     712            0 :             if (GinPageRightMost(page))
     713              :             {
     714              :                 /*
     715              :                  * rightmost page, but we don't find parent, we should use
     716              :                  * plain search...
     717              :                  */
     718            0 :                 LockBuffer(parent->buffer, GIN_UNLOCK);
     719            0 :                 ginFindParents(btree, stack);
     720            0 :                 parent = stack->parent;
     721              :                 Assert(parent != NULL);
     722            0 :                 break;
     723              :             }
     724              : 
     725            0 :             parent->buffer = ginStepRight(parent->buffer, btree->index, GIN_EXCLUSIVE);
     726            0 :             parent->blkno = BufferGetBlockNumber(parent->buffer);
     727            0 :             page = BufferGetPage(parent->buffer);
     728              : 
     729            0 :             if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
     730            0 :                 ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE);
     731              :         }
     732              : 
     733              :         /* insert the downlink */
     734         2460 :         insertdata = btree->prepareDownlink(btree, stack->buffer);
     735         2460 :         updateblkno = GinPageGetOpaque(BufferGetPage(stack->buffer))->rightlink;
     736         2460 :         done = ginPlaceToPage(btree, parent,
     737              :                               insertdata, updateblkno,
     738              :                               stack->buffer, buildStats);
     739         2460 :         pfree(insertdata);
     740              : 
     741              :         /*
     742              :          * If the caller requested to free the stack, unlock and release the
     743              :          * child buffer now. Otherwise keep it pinned and locked, but if we
     744              :          * have to recurse up the tree, we can unlock the upper pages, only
     745              :          * keeping the page at the bottom of the stack locked.
     746              :          */
     747         2460 :         if (!first || freestack)
     748         2458 :             LockBuffer(stack->buffer, GIN_UNLOCK);
     749         2460 :         if (freestack)
     750              :         {
     751         2458 :             ReleaseBuffer(stack->buffer);
     752         2458 :             pfree(stack);
     753              :         }
     754         2460 :         stack = parent;
     755              : 
     756         2460 :         first = false;
     757         2460 :     } while (!done);
     758              : 
     759              :     /* unlock the parent */
     760         2459 :     LockBuffer(stack->buffer, GIN_UNLOCK);
     761              : 
     762         2459 :     if (freestack)
     763         2457 :         freeGinBtreeStack(stack);
     764         2459 : }
     765              : 
     766              : /*
     767              :  * An entry point to ginFinishSplit() that is used when we stumble upon an
     768              :  * existing incompletely split page in the tree, as opposed to completing a
     769              :  * split that we just made ourselves. The difference is that stack->buffer may
     770              :  * be merely share-locked on entry, and will be upgraded to exclusive mode.
     771              :  *
     772              :  * Note: Upgrading the lock momentarily releases it. Doing that in a scan
     773              :  * would not be OK, because a concurrent VACUUM might delete the page while
     774              :  * we're not holding the lock. It's OK in an insert, though, because VACUUM
     775              :  * has a different mechanism that prevents it from running concurrently with
     776              :  * inserts. (Namely, it holds a cleanup lock on the root.)
     777              :  */
     778              : static void
     779            2 : ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats, int access)
     780              : {
     781            2 :     INJECTION_POINT("gin-finish-incomplete-split", NULL);
     782            2 :     elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"",
     783              :          stack->blkno, RelationGetRelationName(btree->index));
     784              : 
     785            2 :     if (access == GIN_SHARE)
     786              :     {
     787            1 :         LockBuffer(stack->buffer, GIN_UNLOCK);
     788            1 :         LockBuffer(stack->buffer, GIN_EXCLUSIVE);
     789              : 
     790            1 :         if (!GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
     791              :         {
     792              :             /*
     793              :              * Someone else already completed the split while we were not
     794              :              * holding the lock.
     795              :              */
     796            0 :             return;
     797              :         }
     798              :     }
     799              : 
     800            2 :     ginFinishSplit(btree, stack, false, buildStats);
     801              : }
     802              : 
     803              : /*
     804              :  * Insert a value to tree described by stack.
     805              :  *
     806              :  * The value to be inserted is given in 'insertdata'. Its format depends
     807              :  * on whether this is an entry or data tree, ginInsertValue just passes it
     808              :  * through to the tree-specific callback function.
     809              :  *
     810              :  * During an index build, buildStats is non-null and the counters it contains
     811              :  * are incremented as needed.
     812              :  *
     813              :  * NB: the passed-in stack is freed, as though by freeGinBtreeStack.
     814              :  */
     815              : void
     816       485993 : ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata,
     817              :                GinStatsData *buildStats)
     818              : {
     819              :     bool        done;
     820              : 
     821              :     /* If the leaf page was incompletely split, finish the split first */
     822       485993 :     if (GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
     823            0 :         ginFinishOldSplit(btree, stack, buildStats, GIN_EXCLUSIVE);
     824              : 
     825       485993 :     done = ginPlaceToPage(btree, stack,
     826              :                           insertdata, InvalidBlockNumber,
     827              :                           InvalidBuffer, buildStats);
     828       485993 :     if (done)
     829              :     {
     830       483534 :         LockBuffer(stack->buffer, GIN_UNLOCK);
     831       483534 :         freeGinBtreeStack(stack);
     832              :     }
     833              :     else
     834         2459 :         ginFinishSplit(btree, stack, true, buildStats);
     835       485991 : }
        

Generated by: LCOV version 2.0-1