LCOV - code coverage report
Current view: top level - src/backend/access/spgist - spgdoinsert.c (source / functions) Hit Total Coverage
Test: PostgreSQL 15devel Lines: 756 828 91.3 %
Date: 2021-12-09 04:09:06 Functions: 15 15 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * spgdoinsert.c
       4             :  *    implementation of insert algorithm
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *          src/backend/access/spgist/spgdoinsert.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : 
      16             : #include "postgres.h"
      17             : 
      18             : #include "access/genam.h"
      19             : #include "access/spgist_private.h"
      20             : #include "access/spgxlog.h"
      21             : #include "access/xloginsert.h"
      22             : #include "common/pg_prng.h"
      23             : #include "miscadmin.h"
      24             : #include "storage/bufmgr.h"
      25             : #include "utils/rel.h"
      26             : 
      27             : 
      28             : /*
      29             :  * SPPageDesc tracks all info about a page we are inserting into.  In some
      30             :  * situations it actually identifies a tuple, or even a specific node within
      31             :  * an inner tuple.  But any of the fields can be invalid.  If the buffer
      32             :  * field is valid, it implies we hold pin and exclusive lock on that buffer.
      33             :  * page pointer should be valid exactly when buffer is.
      34             :  */
      35             : typedef struct SPPageDesc
      36             : {
      37             :     BlockNumber blkno;          /* block number, or InvalidBlockNumber */
      38             :     Buffer      buffer;         /* page's buffer number, or InvalidBuffer */
      39             :     Page        page;           /* pointer to page buffer, or NULL */
      40             :     OffsetNumber offnum;        /* offset of tuple, or InvalidOffsetNumber */
      41             :     int         node;           /* node number within inner tuple, or -1 */
      42             : } SPPageDesc;
      43             : 
      44             : 
      45             : /*
      46             :  * Set the item pointer in the nodeN'th entry in inner tuple tup.  This
      47             :  * is used to update the parent inner tuple's downlink after a move or
      48             :  * split operation.
      49             :  */
      50             : void
      51        7634 : spgUpdateNodeLink(SpGistInnerTuple tup, int nodeN,
      52             :                   BlockNumber blkno, OffsetNumber offset)
      53             : {
      54             :     int         i;
      55             :     SpGistNodeTuple node;
      56             : 
      57       40370 :     SGITITERATE(tup, i, node)
      58             :     {
      59       40370 :         if (i == nodeN)
      60             :         {
      61        7634 :             ItemPointerSet(&node->t_tid, blkno, offset);
      62        7634 :             return;
      63             :         }
      64             :     }
      65             : 
      66           0 :     elog(ERROR, "failed to find requested node %d in SPGiST inner tuple",
      67             :          nodeN);
      68             : }
      69             : 
      70             : /*
      71             :  * Form a new inner tuple containing one more node than the given one, with
      72             :  * the specified label datum, inserted at offset "offset" in the node array.
      73             :  * The new tuple's prefix is the same as the old one's.
      74             :  *
      75             :  * Note that the new node initially has an invalid downlink.  We'll find a
      76             :  * page to point it to later.
      77             :  */
      78             : static SpGistInnerTuple
      79        1088 : addNode(SpGistState *state, SpGistInnerTuple tuple, Datum label, int offset)
      80             : {
      81             :     SpGistNodeTuple node,
      82             :                *nodes;
      83             :     int         i;
      84             : 
      85             :     /* if offset is negative, insert at end */
      86        1088 :     if (offset < 0)
      87           0 :         offset = tuple->nNodes;
      88        1088 :     else if (offset > tuple->nNodes)
      89           0 :         elog(ERROR, "invalid offset for adding node to SPGiST inner tuple");
      90             : 
      91        1088 :     nodes = palloc(sizeof(SpGistNodeTuple) * (tuple->nNodes + 1));
      92        7260 :     SGITITERATE(tuple, i, node)
      93             :     {
      94        6172 :         if (i < offset)
      95        5074 :             nodes[i] = node;
      96             :         else
      97        1098 :             nodes[i + 1] = node;
      98             :     }
      99             : 
     100        1088 :     nodes[offset] = spgFormNodeTuple(state, label, false);
     101             : 
     102        1088 :     return spgFormInnerTuple(state,
     103        1088 :                              (tuple->prefixSize > 0),
     104         506 :                              SGITDATUM(tuple, state),
     105        1088 :                              tuple->nNodes + 1,
     106             :                              nodes);
     107             : }
     108             : 
     109             : /* qsort comparator for sorting OffsetNumbers */
     110             : static int
     111     3441370 : cmpOffsetNumbers(const void *a, const void *b)
     112             : {
     113     3441370 :     if (*(const OffsetNumber *) a == *(const OffsetNumber *) b)
     114           0 :         return 0;
     115     3441370 :     return (*(const OffsetNumber *) a > *(const OffsetNumber *) b) ? 1 : -1;
     116             : }
     117             : 
     118             : /*
     119             :  * Delete multiple tuples from an index page, preserving tuple offset numbers.
     120             :  *
     121             :  * The first tuple in the given list is replaced with a dead tuple of type
     122             :  * "firststate" (REDIRECT/DEAD/PLACEHOLDER); the remaining tuples are replaced
     123             :  * with dead tuples of type "reststate".  If either firststate or reststate
     124             :  * is REDIRECT, blkno/offnum specify where to link to.
     125             :  *
     126             :  * NB: this is used during WAL replay, so beware of trying to make it too
     127             :  * smart.  In particular, it shouldn't use "state" except for calling
     128             :  * spgFormDeadTuple().  This is also used in a critical section, so no
     129             :  * pallocs either!
     130             :  */
     131             : void
     132        5310 : spgPageIndexMultiDelete(SpGistState *state, Page page,
     133             :                         OffsetNumber *itemnos, int nitems,
     134             :                         int firststate, int reststate,
     135             :                         BlockNumber blkno, OffsetNumber offnum)
     136             : {
     137             :     OffsetNumber firstItem;
     138             :     OffsetNumber sortednos[MaxIndexTuplesPerPage];
     139        5310 :     SpGistDeadTuple tuple = NULL;
     140             :     int         i;
     141             : 
     142        5310 :     if (nitems == 0)
     143          82 :         return;                 /* nothing to do */
     144             : 
     145             :     /*
     146             :      * For efficiency we want to use PageIndexMultiDelete, which requires the
     147             :      * targets to be listed in sorted order, so we have to sort the itemnos
     148             :      * array.  (This also greatly simplifies the math for reinserting the
     149             :      * replacement tuples.)  However, we must not scribble on the caller's
     150             :      * array, so we have to make a copy.
     151             :      */
     152        5228 :     memcpy(sortednos, itemnos, sizeof(OffsetNumber) * nitems);
     153        5228 :     if (nitems > 1)
     154        5174 :         qsort(sortednos, nitems, sizeof(OffsetNumber), cmpOffsetNumbers);
     155             : 
     156        5228 :     PageIndexMultiDelete(page, sortednos, nitems);
     157             : 
     158        5228 :     firstItem = itemnos[0];
     159             : 
     160      502900 :     for (i = 0; i < nitems; i++)
     161             :     {
     162      497672 :         OffsetNumber itemno = sortednos[i];
     163             :         int         tupstate;
     164             : 
     165      497672 :         tupstate = (itemno == firstItem) ? firststate : reststate;
     166      497672 :         if (tuple == NULL || tuple->tupstate != tupstate)
     167        7662 :             tuple = spgFormDeadTuple(state, tupstate, blkno, offnum);
     168             : 
     169      497672 :         if (PageAddItem(page, (Item) tuple, tuple->size,
     170             :                         itemno, false, false) != itemno)
     171           0 :             elog(ERROR, "failed to add item of size %u to SPGiST index page",
     172             :                  tuple->size);
     173             : 
     174      497672 :         if (tupstate == SPGIST_REDIRECT)
     175        1258 :             SpGistPageGetOpaque(page)->nRedirection++;
     176      496414 :         else if (tupstate == SPGIST_PLACEHOLDER)
     177      496414 :             SpGistPageGetOpaque(page)->nPlaceholder++;
     178             :     }
     179             : }
     180             : 
     181             : /*
     182             :  * Update the parent inner tuple's downlink, and mark the parent buffer
     183             :  * dirty (this must be the last change to the parent page in the current
     184             :  * WAL action).
     185             :  */
     186             : static void
     187        6762 : saveNodeLink(Relation index, SPPageDesc *parent,
     188             :              BlockNumber blkno, OffsetNumber offnum)
     189             : {
     190             :     SpGistInnerTuple innerTuple;
     191             : 
     192        6762 :     innerTuple = (SpGistInnerTuple) PageGetItem(parent->page,
     193             :                                                 PageGetItemId(parent->page, parent->offnum));
     194             : 
     195        6762 :     spgUpdateNodeLink(innerTuple, parent->node, blkno, offnum);
     196             : 
     197        6762 :     MarkBufferDirty(parent->buffer);
     198        6762 : }
     199             : 
     200             : /*
     201             :  * Add a leaf tuple to a leaf page where there is known to be room for it
     202             :  */
     203             : static void
     204      535612 : addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
     205             :              SPPageDesc *current, SPPageDesc *parent, bool isNulls, bool isNew)
     206             : {
     207             :     spgxlogAddLeaf xlrec;
     208             : 
     209      535612 :     xlrec.newPage = isNew;
     210      535612 :     xlrec.storesNulls = isNulls;
     211             : 
     212             :     /* these will be filled below as needed */
     213      535612 :     xlrec.offnumLeaf = InvalidOffsetNumber;
     214      535612 :     xlrec.offnumHeadLeaf = InvalidOffsetNumber;
     215      535612 :     xlrec.offnumParent = InvalidOffsetNumber;
     216      535612 :     xlrec.nodeI = 0;
     217             : 
     218      535612 :     START_CRIT_SECTION();
     219             : 
     220      535612 :     if (current->offnum == InvalidOffsetNumber ||
     221      534220 :         SpGistBlockIsRoot(current->blkno))
     222             :     {
     223             :         /* Tuple is not part of a chain */
     224       12860 :         SGLT_SET_NEXTOFFSET(leafTuple, InvalidOffsetNumber);
     225       25720 :         current->offnum = SpGistPageAddNewItem(state, current->page,
     226       12860 :                                                (Item) leafTuple, leafTuple->size,
     227             :                                                NULL, false);
     228             : 
     229       12860 :         xlrec.offnumLeaf = current->offnum;
     230             : 
     231             :         /* Must update parent's downlink if any */
     232       12860 :         if (parent->buffer != InvalidBuffer)
     233             :         {
     234        1392 :             xlrec.offnumParent = parent->offnum;
     235        1392 :             xlrec.nodeI = parent->node;
     236             : 
     237        1392 :             saveNodeLink(index, parent, current->blkno, current->offnum);
     238             :         }
     239             :     }
     240             :     else
     241             :     {
     242             :         /*
     243             :          * Tuple must be inserted into existing chain.  We mustn't change the
     244             :          * chain's head address, but we don't need to chase the entire chain
     245             :          * to put the tuple at the end; we can insert it second.
     246             :          *
     247             :          * Also, it's possible that the "chain" consists only of a DEAD tuple,
     248             :          * in which case we should replace the DEAD tuple in-place.
     249             :          */
     250             :         SpGistLeafTuple head;
     251             :         OffsetNumber offnum;
     252             : 
     253      522752 :         head = (SpGistLeafTuple) PageGetItem(current->page,
     254             :                                              PageGetItemId(current->page, current->offnum));
     255      522752 :         if (head->tupstate == SPGIST_LIVE)
     256             :         {
     257      522752 :             SGLT_SET_NEXTOFFSET(leafTuple, SGLT_GET_NEXTOFFSET(head));
     258      522752 :             offnum = SpGistPageAddNewItem(state, current->page,
     259      522752 :                                           (Item) leafTuple, leafTuple->size,
     260             :                                           NULL, false);
     261             : 
     262             :             /*
     263             :              * re-get head of list because it could have been moved on page,
     264             :              * and set new second element
     265             :              */
     266      522752 :             head = (SpGistLeafTuple) PageGetItem(current->page,
     267             :                                                  PageGetItemId(current->page, current->offnum));
     268      522752 :             SGLT_SET_NEXTOFFSET(head, offnum);
     269             : 
     270      522752 :             xlrec.offnumLeaf = offnum;
     271      522752 :             xlrec.offnumHeadLeaf = current->offnum;
     272             :         }
     273           0 :         else if (head->tupstate == SPGIST_DEAD)
     274             :         {
     275           0 :             SGLT_SET_NEXTOFFSET(leafTuple, InvalidOffsetNumber);
     276           0 :             PageIndexTupleDelete(current->page, current->offnum);
     277           0 :             if (PageAddItem(current->page,
     278             :                             (Item) leafTuple, leafTuple->size,
     279           0 :                             current->offnum, false, false) != current->offnum)
     280           0 :                 elog(ERROR, "failed to add item of size %u to SPGiST index page",
     281             :                      leafTuple->size);
     282             : 
     283             :             /* WAL replay distinguishes this case by equal offnums */
     284           0 :             xlrec.offnumLeaf = current->offnum;
     285           0 :             xlrec.offnumHeadLeaf = current->offnum;
     286             :         }
     287             :         else
     288           0 :             elog(ERROR, "unexpected SPGiST tuple state: %d", head->tupstate);
     289             :     }
     290             : 
     291      535612 :     MarkBufferDirty(current->buffer);
     292             : 
     293      535612 :     if (RelationNeedsWAL(index) && !state->isBuild)
     294             :     {
     295             :         XLogRecPtr  recptr;
     296             :         int         flags;
     297             : 
     298      162282 :         XLogBeginInsert();
     299      162282 :         XLogRegisterData((char *) &xlrec, sizeof(xlrec));
     300      162282 :         XLogRegisterData((char *) leafTuple, leafTuple->size);
     301             : 
     302      162282 :         flags = REGBUF_STANDARD;
     303      162282 :         if (xlrec.newPage)
     304           6 :             flags |= REGBUF_WILL_INIT;
     305      162282 :         XLogRegisterBuffer(0, current->buffer, flags);
     306      162282 :         if (xlrec.offnumParent != InvalidOffsetNumber)
     307         574 :             XLogRegisterBuffer(1, parent->buffer, REGBUF_STANDARD);
     308             : 
     309      162282 :         recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF);
     310             : 
     311      162282 :         PageSetLSN(current->page, recptr);
     312             : 
     313             :         /* update parent only if we actually changed it */
     314      162282 :         if (xlrec.offnumParent != InvalidOffsetNumber)
     315             :         {
     316         574 :             PageSetLSN(parent->page, recptr);
     317             :         }
     318             :     }
     319             : 
     320      535612 :     END_CRIT_SECTION();
     321      535612 : }
     322             : 
     323             : /*
     324             :  * Count the number and total size of leaf tuples in the chain starting at
     325             :  * current->offnum.  Return number into *nToSplit and total size as function
     326             :  * result.
     327             :  *
     328             :  * Klugy special case when considering the root page (i.e., root is a leaf
     329             :  * page, but we're about to split for the first time): return fake large
     330             :  * values to force spgdoinsert() to take the doPickSplit rather than
     331             :  * moveLeafs code path.  moveLeafs is not prepared to deal with root page.
     332             :  */
     333             : static int
     334        5422 : checkSplitConditions(Relation index, SpGistState *state,
     335             :                      SPPageDesc *current, int *nToSplit)
     336             : {
     337             :     int         i,
     338        5422 :                 n = 0,
     339        5422 :                 totalSize = 0;
     340             : 
     341        5422 :     if (SpGistBlockIsRoot(current->blkno))
     342             :     {
     343             :         /* return impossible values to force split */
     344          56 :         *nToSplit = BLCKSZ;
     345          56 :         return BLCKSZ;
     346             :     }
     347             : 
     348        5366 :     i = current->offnum;
     349      538884 :     while (i != InvalidOffsetNumber)
     350             :     {
     351             :         SpGistLeafTuple it;
     352             : 
     353             :         Assert(i >= FirstOffsetNumber &&
     354             :                i <= PageGetMaxOffsetNumber(current->page));
     355      533518 :         it = (SpGistLeafTuple) PageGetItem(current->page,
     356             :                                            PageGetItemId(current->page, i));
     357      533518 :         if (it->tupstate == SPGIST_LIVE)
     358             :         {
     359      533518 :             n++;
     360      533518 :             totalSize += it->size + sizeof(ItemIdData);
     361             :         }
     362           0 :         else if (it->tupstate == SPGIST_DEAD)
     363             :         {
     364             :             /* We could see a DEAD tuple as first/only chain item */
     365             :             Assert(i == current->offnum);
     366             :             Assert(SGLT_GET_NEXTOFFSET(it) == InvalidOffsetNumber);
     367             :             /* Don't count it in result, because it won't go to other page */
     368             :         }
     369             :         else
     370           0 :             elog(ERROR, "unexpected SPGiST tuple state: %d", it->tupstate);
     371             : 
     372      533518 :         i = SGLT_GET_NEXTOFFSET(it);
     373             :     }
     374             : 
     375        5366 :     *nToSplit = n;
     376             : 
     377        5366 :     return totalSize;
     378             : }
     379             : 
     380             : /*
     381             :  * current points to a leaf-tuple chain that we wanted to add newLeafTuple to,
     382             :  * but the chain has to be moved because there's not enough room to add
     383             :  * newLeafTuple to its page.  We use this method when the chain contains
     384             :  * very little data so a split would be inefficient.  We are sure we can
     385             :  * fit the chain plus newLeafTuple on one other page.
     386             :  */
     387             : static void
     388        1604 : moveLeafs(Relation index, SpGistState *state,
     389             :           SPPageDesc *current, SPPageDesc *parent,
     390             :           SpGistLeafTuple newLeafTuple, bool isNulls)
     391             : {
     392             :     int         i,
     393             :                 nDelete,
     394             :                 nInsert,
     395             :                 size;
     396             :     Buffer      nbuf;
     397             :     Page        npage;
     398             :     SpGistLeafTuple it;
     399        1604 :     OffsetNumber r = InvalidOffsetNumber,
     400        1604 :                 startOffset = InvalidOffsetNumber;
     401        1604 :     bool        replaceDead = false;
     402             :     OffsetNumber *toDelete;
     403             :     OffsetNumber *toInsert;
     404             :     BlockNumber nblkno;
     405             :     spgxlogMoveLeafs xlrec;
     406             :     char       *leafdata,
     407             :                *leafptr;
     408             : 
     409             :     /* This doesn't work on root page */
     410             :     Assert(parent->buffer != InvalidBuffer);
     411             :     Assert(parent->buffer != current->buffer);
     412             : 
     413             :     /* Locate the tuples to be moved, and count up the space needed */
     414        1604 :     i = PageGetMaxOffsetNumber(current->page);
     415        1604 :     toDelete = (OffsetNumber *) palloc(sizeof(OffsetNumber) * i);
     416        1604 :     toInsert = (OffsetNumber *) palloc(sizeof(OffsetNumber) * (i + 1));
     417             : 
     418        1604 :     size = newLeafTuple->size + sizeof(ItemIdData);
     419             : 
     420        1604 :     nDelete = 0;
     421        1604 :     i = current->offnum;
     422       64058 :     while (i != InvalidOffsetNumber)
     423             :     {
     424             :         SpGistLeafTuple it;
     425             : 
     426             :         Assert(i >= FirstOffsetNumber &&
     427             :                i <= PageGetMaxOffsetNumber(current->page));
     428       62454 :         it = (SpGistLeafTuple) PageGetItem(current->page,
     429             :                                            PageGetItemId(current->page, i));
     430             : 
     431       62454 :         if (it->tupstate == SPGIST_LIVE)
     432             :         {
     433       62454 :             toDelete[nDelete] = i;
     434       62454 :             size += it->size + sizeof(ItemIdData);
     435       62454 :             nDelete++;
     436             :         }
     437           0 :         else if (it->tupstate == SPGIST_DEAD)
     438             :         {
     439             :             /* We could see a DEAD tuple as first/only chain item */
     440             :             Assert(i == current->offnum);
     441             :             Assert(SGLT_GET_NEXTOFFSET(it) == InvalidOffsetNumber);
     442             :             /* We don't want to move it, so don't count it in size */
     443           0 :             toDelete[nDelete] = i;
     444           0 :             nDelete++;
     445           0 :             replaceDead = true;
     446             :         }
     447             :         else
     448           0 :             elog(ERROR, "unexpected SPGiST tuple state: %d", it->tupstate);
     449             : 
     450       62454 :         i = SGLT_GET_NEXTOFFSET(it);
     451             :     }
     452             : 
     453             :     /* Find a leaf page that will hold them */
     454        1604 :     nbuf = SpGistGetBuffer(index, GBUF_LEAF | (isNulls ? GBUF_NULLS : 0),
     455             :                            size, &xlrec.newPage);
     456        1604 :     npage = BufferGetPage(nbuf);
     457        1604 :     nblkno = BufferGetBlockNumber(nbuf);
     458             :     Assert(nblkno != current->blkno);
     459             : 
     460        1604 :     leafdata = leafptr = palloc(size);
     461             : 
     462        1604 :     START_CRIT_SECTION();
     463             : 
     464             :     /* copy all the old tuples to new page, unless they're dead */
     465        1604 :     nInsert = 0;
     466        1604 :     if (!replaceDead)
     467             :     {
     468       64058 :         for (i = 0; i < nDelete; i++)
     469             :         {
     470       62454 :             it = (SpGistLeafTuple) PageGetItem(current->page,
     471             :                                                PageGetItemId(current->page, toDelete[i]));
     472             :             Assert(it->tupstate == SPGIST_LIVE);
     473             : 
     474             :             /*
     475             :              * Update chain link (notice the chain order gets reversed, but we
     476             :              * don't care).  We're modifying the tuple on the source page
     477             :              * here, but it's okay since we're about to delete it.
     478             :              */
     479       62454 :             SGLT_SET_NEXTOFFSET(it, r);
     480             : 
     481       62454 :             r = SpGistPageAddNewItem(state, npage, (Item) it, it->size,
     482             :                                      &startOffset, false);
     483             : 
     484       62454 :             toInsert[nInsert] = r;
     485       62454 :             nInsert++;
     486             : 
     487             :             /* save modified tuple into leafdata as well */
     488       62454 :             memcpy(leafptr, it, it->size);
     489       62454 :             leafptr += it->size;
     490             :         }
     491             :     }
     492             : 
     493             :     /* add the new tuple as well */
     494        1604 :     SGLT_SET_NEXTOFFSET(newLeafTuple, r);
     495        1604 :     r = SpGistPageAddNewItem(state, npage,
     496        1604 :                              (Item) newLeafTuple, newLeafTuple->size,
     497             :                              &startOffset, false);
     498        1604 :     toInsert[nInsert] = r;
     499        1604 :     nInsert++;
     500        1604 :     memcpy(leafptr, newLeafTuple, newLeafTuple->size);
     501        1604 :     leafptr += newLeafTuple->size;
     502             : 
     503             :     /*
     504             :      * Now delete the old tuples, leaving a redirection pointer behind for the
     505             :      * first one, unless we're doing an index build; in which case there can't
     506             :      * be any concurrent scan so we need not provide a redirect.
     507             :      */
     508        1604 :     spgPageIndexMultiDelete(state, current->page, toDelete, nDelete,
     509        1604 :                             state->isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT,
     510             :                             SPGIST_PLACEHOLDER,
     511             :                             nblkno, r);
     512             : 
     513             :     /* Update parent's downlink and mark parent page dirty */
     514        1604 :     saveNodeLink(index, parent, nblkno, r);
     515             : 
     516             :     /* Mark the leaf pages too */
     517        1604 :     MarkBufferDirty(current->buffer);
     518        1604 :     MarkBufferDirty(nbuf);
     519             : 
     520        1604 :     if (RelationNeedsWAL(index) && !state->isBuild)
     521             :     {
     522             :         XLogRecPtr  recptr;
     523             : 
     524             :         /* prepare WAL info */
     525         388 :         STORE_STATE(state, xlrec.stateSrc);
     526             : 
     527         388 :         xlrec.nMoves = nDelete;
     528         388 :         xlrec.replaceDead = replaceDead;
     529         388 :         xlrec.storesNulls = isNulls;
     530             : 
     531         388 :         xlrec.offnumParent = parent->offnum;
     532         388 :         xlrec.nodeI = parent->node;
     533             : 
     534         388 :         XLogBeginInsert();
     535         388 :         XLogRegisterData((char *) &xlrec, SizeOfSpgxlogMoveLeafs);
     536         388 :         XLogRegisterData((char *) toDelete,
     537             :                          sizeof(OffsetNumber) * nDelete);
     538         388 :         XLogRegisterData((char *) toInsert,
     539             :                          sizeof(OffsetNumber) * nInsert);
     540         388 :         XLogRegisterData((char *) leafdata, leafptr - leafdata);
     541             : 
     542         388 :         XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
     543         388 :         XLogRegisterBuffer(1, nbuf, REGBUF_STANDARD | (xlrec.newPage ? REGBUF_WILL_INIT : 0));
     544         388 :         XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD);
     545             : 
     546         388 :         recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS);
     547             : 
     548         388 :         PageSetLSN(current->page, recptr);
     549         388 :         PageSetLSN(npage, recptr);
     550         388 :         PageSetLSN(parent->page, recptr);
     551             :     }
     552             : 
     553        1604 :     END_CRIT_SECTION();
     554             : 
     555             :     /* Update local free-space cache and release new buffer */
     556        1604 :     SpGistSetLastUsedPage(index, nbuf);
     557        1604 :     UnlockReleaseBuffer(nbuf);
     558        1604 : }
     559             : 
     560             : /*
     561             :  * Update previously-created redirection tuple with appropriate destination
     562             :  *
     563             :  * We use this when it's not convenient to know the destination first.
     564             :  * The tuple should have been made with the "impossible" destination of
     565             :  * the metapage.
     566             :  */
     567             : static void
     568         870 : setRedirectionTuple(SPPageDesc *current, OffsetNumber position,
     569             :                     BlockNumber blkno, OffsetNumber offnum)
     570             : {
     571             :     SpGistDeadTuple dt;
     572             : 
     573         870 :     dt = (SpGistDeadTuple) PageGetItem(current->page,
     574             :                                        PageGetItemId(current->page, position));
     575             :     Assert(dt->tupstate == SPGIST_REDIRECT);
     576             :     Assert(ItemPointerGetBlockNumber(&dt->pointer) == SPGIST_METAPAGE_BLKNO);
     577         870 :     ItemPointerSet(&dt->pointer, blkno, offnum);
     578         870 : }
     579             : 
     580             : /*
     581             :  * Test to see if the user-defined picksplit function failed to do its job,
     582             :  * ie, it put all the leaf tuples into the same node.
     583             :  * If so, randomly divide the tuples into several nodes (all with the same
     584             :  * label) and return true to select allTheSame mode for this inner tuple.
     585             :  *
     586             :  * (This code is also used to forcibly select allTheSame mode for nulls.)
     587             :  *
     588             :  * If we know that the leaf tuples wouldn't all fit on one page, then we
     589             :  * exclude the last tuple (which is the incoming new tuple that forced a split)
     590             :  * from the check to see if more than one node is used.  The reason for this
     591             :  * is that if the existing tuples are put into only one chain, then even if
     592             :  * we move them all to an empty page, there would still not be room for the
     593             :  * new tuple, so we'd get into an infinite loop of picksplit attempts.
     594             :  * Forcing allTheSame mode dodges this problem by ensuring the old tuples will
     595             :  * be split across pages.  (Exercise for the reader: figure out why this
     596             :  * fixes the problem even when there is only one old tuple.)
     597             :  */
     598             : static bool
     599        3818 : checkAllTheSame(spgPickSplitIn *in, spgPickSplitOut *out, bool tooBig,
     600             :                 bool *includeNew)
     601             : {
     602             :     int         theNode;
     603             :     int         limit;
     604             :     int         i;
     605             : 
     606             :     /* For the moment, assume we can include the new leaf tuple */
     607        3818 :     *includeNew = true;
     608             : 
     609             :     /* If there's only the new leaf tuple, don't select allTheSame mode */
     610        3818 :     if (in->nTuples <= 1)
     611          44 :         return false;
     612             : 
     613             :     /* If tuple set doesn't fit on one page, ignore the new tuple in test */
     614        3774 :     limit = tooBig ? in->nTuples - 1 : in->nTuples;
     615             : 
     616             :     /* Check to see if more than one node is populated */
     617        3774 :     theNode = out->mapTuplesToNodes[0];
     618       84380 :     for (i = 1; i < limit; i++)
     619             :     {
     620       84170 :         if (out->mapTuplesToNodes[i] != theNode)
     621        3564 :             return false;
     622             :     }
     623             : 
     624             :     /* Nope, so override the picksplit function's decisions */
     625             : 
     626             :     /* If the new tuple is in its own node, it can't be included in split */
     627         210 :     if (tooBig && out->mapTuplesToNodes[in->nTuples - 1] != theNode)
     628           0 :         *includeNew = false;
     629             : 
     630         210 :     out->nNodes = 8;         /* arbitrary number of child nodes */
     631             : 
     632             :     /* Random assignment of tuples to nodes (note we include new tuple) */
     633       22046 :     for (i = 0; i < in->nTuples; i++)
     634       21836 :         out->mapTuplesToNodes[i] = i % out->nNodes;
     635             : 
     636             :     /* The opclass may not use node labels, but if it does, duplicate 'em */
     637         210 :     if (out->nodeLabels)
     638             :     {
     639          36 :         Datum       theLabel = out->nodeLabels[theNode];
     640             : 
     641          36 :         out->nodeLabels = (Datum *) palloc(sizeof(Datum) * out->nNodes);
     642         324 :         for (i = 0; i < out->nNodes; i++)
     643         288 :             out->nodeLabels[i] = theLabel;
     644             :     }
     645             : 
     646             :     /* We don't touch the prefix or the leaf tuple datum assignments */
     647             : 
     648         210 :     return true;
     649             : }
     650             : 
     651             : /*
     652             :  * current points to a leaf-tuple chain that we wanted to add newLeafTuple to,
     653             :  * but the chain has to be split because there's not enough room to add
     654             :  * newLeafTuple to its page.
     655             :  *
     656             :  * This function splits the leaf tuple set according to picksplit's rules,
     657             :  * creating one or more new chains that are spread across the current page
     658             :  * and an additional leaf page (we assume that two leaf pages will be
     659             :  * sufficient).  A new inner tuple is created, and the parent downlink
     660             :  * pointer is updated to point to that inner tuple instead of the leaf chain.
     661             :  *
     662             :  * On exit, current contains the address of the new inner tuple.
     663             :  *
     664             :  * Returns true if we successfully inserted newLeafTuple during this function,
     665             :  * false if caller still has to do it (meaning another picksplit operation is
     666             :  * probably needed).  Failure could occur if the picksplit result is fairly
     667             :  * unbalanced, or if newLeafTuple is just plain too big to fit on a page.
     668             :  * Because we force the picksplit result to be at least two chains, each
     669             :  * cycle will get rid of at least one leaf tuple from the chain, so the loop
     670             :  * will eventually terminate if lack of balance is the issue.  If the tuple
     671             :  * is too big, we assume that repeated picksplit operations will eventually
     672             :  * make it small enough by repeated prefix-stripping.  A broken opclass could
     673             :  * make this an infinite loop, though, so spgdoinsert() checks that the
     674             :  * leaf datums get smaller each time.
     675             :  */
     676             : static bool
     677        3818 : doPickSplit(Relation index, SpGistState *state,
     678             :             SPPageDesc *current, SPPageDesc *parent,
     679             :             SpGistLeafTuple newLeafTuple,
     680             :             int level, bool isNulls, bool isNew)
     681             : {
     682        3818 :     bool        insertedNew = false;
     683             :     spgPickSplitIn in;
     684             :     spgPickSplitOut out;
     685             :     FmgrInfo   *procinfo;
     686             :     bool        includeNew;
     687             :     int         i,
     688             :                 max,
     689             :                 n;
     690             :     SpGistInnerTuple innerTuple;
     691             :     SpGistNodeTuple node,
     692             :                *nodes;
     693             :     Buffer      newInnerBuffer,
     694             :                 newLeafBuffer;
     695             :     uint8      *leafPageSelect;
     696             :     int        *leafSizes;
     697             :     OffsetNumber *toDelete;
     698             :     OffsetNumber *toInsert;
     699        3818 :     OffsetNumber redirectTuplePos = InvalidOffsetNumber;
     700             :     OffsetNumber startOffsets[2];
     701             :     SpGistLeafTuple *oldLeafs;
     702             :     SpGistLeafTuple *newLeafs;
     703             :     Datum       leafDatums[INDEX_MAX_KEYS];
     704             :     bool        leafIsnulls[INDEX_MAX_KEYS];
     705             :     int         spaceToDelete;
     706             :     int         currentFreeSpace;
     707             :     int         totalLeafSizes;
     708             :     bool        allTheSame;
     709             :     spgxlogPickSplit xlrec;
     710             :     char       *leafdata,
     711             :                *leafptr;
     712             :     SPPageDesc  saveCurrent;
     713             :     int         nToDelete,
     714             :                 nToInsert,
     715             :                 maxToInclude;
     716             : 
     717        3818 :     in.level = level;
     718             : 
     719             :     /*
     720             :      * Allocate per-leaf-tuple work arrays with max possible size
     721             :      */
     722        3818 :     max = PageGetMaxOffsetNumber(current->page);
     723        3818 :     n = max + 1;
     724        3818 :     in.datums = (Datum *) palloc(sizeof(Datum) * n);
     725        3818 :     toDelete = (OffsetNumber *) palloc(sizeof(OffsetNumber) * n);
     726        3818 :     toInsert = (OffsetNumber *) palloc(sizeof(OffsetNumber) * n);
     727        3818 :     oldLeafs = (SpGistLeafTuple *) palloc(sizeof(SpGistLeafTuple) * n);
     728        3818 :     newLeafs = (SpGistLeafTuple *) palloc(sizeof(SpGistLeafTuple) * n);
     729        3818 :     leafPageSelect = (uint8 *) palloc(sizeof(uint8) * n);
     730             : 
     731        3818 :     STORE_STATE(state, xlrec.stateSrc);
     732             : 
     733             :     /*
     734             :      * Form list of leaf tuples which will be distributed as split result;
     735             :      * also, count up the amount of space that will be freed from current.
     736             :      * (Note that in the non-root case, we won't actually delete the old
     737             :      * tuples, only replace them with redirects or placeholders.)
     738             :      */
     739        3818 :     nToInsert = 0;
     740        3818 :     nToDelete = 0;
     741        3818 :     spaceToDelete = 0;
     742        3818 :     if (SpGistBlockIsRoot(current->blkno))
     743             :     {
     744             :         /*
     745             :          * We are splitting the root (which up to now is also a leaf page).
     746             :          * Its tuples are not linked, so scan sequentially to get them all. We
     747             :          * ignore the original value of current->offnum.
     748             :          */
     749       10520 :         for (i = FirstOffsetNumber; i <= max; i++)
     750             :         {
     751             :             SpGistLeafTuple it;
     752             : 
     753       10464 :             it = (SpGistLeafTuple) PageGetItem(current->page,
     754             :                                                PageGetItemId(current->page, i));
     755       10464 :             if (it->tupstate == SPGIST_LIVE)
     756             :             {
     757       10464 :                 in.datums[nToInsert] =
     758       10464 :                     isNulls ? (Datum) 0 : SGLTDATUM(it, state);
     759       10464 :                 oldLeafs[nToInsert] = it;
     760       10464 :                 nToInsert++;
     761       10464 :                 toDelete[nToDelete] = i;
     762       10464 :                 nToDelete++;
     763             :                 /* we will delete the tuple altogether, so count full space */
     764       10464 :                 spaceToDelete += it->size + sizeof(ItemIdData);
     765             :             }
     766             :             else                /* tuples on root should be live */
     767           0 :                 elog(ERROR, "unexpected SPGiST tuple state: %d", it->tupstate);
     768             :         }
     769             :     }
     770             :     else
     771             :     {
     772             :         /* Normal case, just collect the leaf tuples in the chain */
     773        3762 :         i = current->offnum;
     774      474826 :         while (i != InvalidOffsetNumber)
     775             :         {
     776             :             SpGistLeafTuple it;
     777             : 
     778             :             Assert(i >= FirstOffsetNumber && i <= max);
     779      471064 :             it = (SpGistLeafTuple) PageGetItem(current->page,
     780             :                                                PageGetItemId(current->page, i));
     781      471064 :             if (it->tupstate == SPGIST_LIVE)
     782             :             {
     783      471064 :                 in.datums[nToInsert] =
     784      471064 :                     isNulls ? (Datum) 0 : SGLTDATUM(it, state);
     785      471064 :                 oldLeafs[nToInsert] = it;
     786      471064 :                 nToInsert++;
     787      471064 :                 toDelete[nToDelete] = i;
     788      471064 :                 nToDelete++;
     789             :                 /* we will not delete the tuple, only replace with dead */
     790             :                 Assert(it->size >= SGDTSIZE);
     791      471064 :                 spaceToDelete += it->size - SGDTSIZE;
     792             :             }
     793           0 :             else if (it->tupstate == SPGIST_DEAD)
     794             :             {
     795             :                 /* We could see a DEAD tuple as first/only chain item */
     796             :                 Assert(i == current->offnum);
     797             :                 Assert(SGLT_GET_NEXTOFFSET(it) == InvalidOffsetNumber);
     798           0 :                 toDelete[nToDelete] = i;
     799           0 :                 nToDelete++;
     800             :                 /* replacing it with redirect will save no space */
     801             :             }
     802             :             else
     803           0 :                 elog(ERROR, "unexpected SPGiST tuple state: %d", it->tupstate);
     804             : 
     805      471064 :             i = SGLT_GET_NEXTOFFSET(it);
     806             :         }
     807             :     }
     808        3818 :     in.nTuples = nToInsert;
     809             : 
     810             :     /*
     811             :      * We may not actually insert new tuple because another picksplit may be
     812             :      * necessary due to too large value, but we will try to allocate enough
     813             :      * space to include it; and in any case it has to be included in the input
     814             :      * for the picksplit function.  So don't increment nToInsert yet.
     815             :      */
     816        3818 :     in.datums[in.nTuples] =
     817        3818 :         isNulls ? (Datum) 0 : SGLTDATUM(newLeafTuple, state);
     818        3818 :     oldLeafs[in.nTuples] = newLeafTuple;
     819        3818 :     in.nTuples++;
     820             : 
     821        3818 :     memset(&out, 0, sizeof(out));
     822             : 
     823        3818 :     if (!isNulls)
     824             :     {
     825             :         /*
     826             :          * Perform split using user-defined method.
     827             :          */
     828        3818 :         procinfo = index_getprocinfo(index, 1, SPGIST_PICKSPLIT_PROC);
     829        3818 :         FunctionCall2Coll(procinfo,
     830        3818 :                           index->rd_indcollation[0],
     831             :                           PointerGetDatum(&in),
     832             :                           PointerGetDatum(&out));
     833             : 
     834             :         /*
     835             :          * Form new leaf tuples and count up the total space needed.
     836             :          */
     837        3818 :         totalLeafSizes = 0;
     838      489164 :         for (i = 0; i < in.nTuples; i++)
     839             :         {
     840      485346 :             if (state->leafTupDesc->natts > 1)
     841       43180 :                 spgDeformLeafTuple(oldLeafs[i],
     842             :                                    state->leafTupDesc,
     843             :                                    leafDatums,
     844             :                                    leafIsnulls,
     845             :                                    isNulls);
     846             : 
     847      485346 :             leafDatums[spgKeyColumn] = out.leafTupleDatums[i];
     848      485346 :             leafIsnulls[spgKeyColumn] = false;
     849             : 
     850      485346 :             newLeafs[i] = spgFormLeafTuple(state, &oldLeafs[i]->heapPtr,
     851             :                                            leafDatums,
     852             :                                            leafIsnulls);
     853      485346 :             totalLeafSizes += newLeafs[i]->size + sizeof(ItemIdData);
     854             :         }
     855             :     }
     856             :     else
     857             :     {
     858             :         /*
     859             :          * Perform dummy split that puts all tuples into one node.
     860             :          * checkAllTheSame will override this and force allTheSame mode.
     861             :          */
     862           0 :         out.hasPrefix = false;
     863           0 :         out.nNodes = 1;
     864           0 :         out.nodeLabels = NULL;
     865           0 :         out.mapTuplesToNodes = palloc0(sizeof(int) * in.nTuples);
     866             : 
     867             :         /*
     868             :          * Form new leaf tuples and count up the total space needed.
     869             :          */
     870           0 :         totalLeafSizes = 0;
     871           0 :         for (i = 0; i < in.nTuples; i++)
     872             :         {
     873           0 :             if (state->leafTupDesc->natts > 1)
     874           0 :                 spgDeformLeafTuple(oldLeafs[i],
     875             :                                    state->leafTupDesc,
     876             :                                    leafDatums,
     877             :                                    leafIsnulls,
     878             :                                    isNulls);
     879             : 
     880             :             /*
     881             :              * Nulls tree can contain only null key values.
     882             :              */
     883           0 :             leafDatums[spgKeyColumn] = (Datum) 0;
     884           0 :             leafIsnulls[spgKeyColumn] = true;
     885             : 
     886           0 :             newLeafs[i] = spgFormLeafTuple(state, &oldLeafs[i]->heapPtr,
     887             :                                            leafDatums,
     888             :                                            leafIsnulls);
     889           0 :             totalLeafSizes += newLeafs[i]->size + sizeof(ItemIdData);
     890             :         }
     891             :     }
     892             : 
     893             :     /*
     894             :      * Check to see if the picksplit function failed to separate the values,
     895             :      * ie, it put them all into the same child node.  If so, select allTheSame
     896             :      * mode and create a random split instead.  See comments for
     897             :      * checkAllTheSame as to why we need to know if the new leaf tuples could
     898             :      * fit on one page.
     899             :      */
     900        3818 :     allTheSame = checkAllTheSame(&in, &out,
     901             :                                  totalLeafSizes > SPGIST_PAGE_CAPACITY,
     902             :                                  &includeNew);
     903             : 
     904             :     /*
     905             :      * If checkAllTheSame decided we must exclude the new tuple, don't
     906             :      * consider it any further.
     907             :      */
     908        3818 :     if (includeNew)
     909        3818 :         maxToInclude = in.nTuples;
     910             :     else
     911             :     {
     912           0 :         maxToInclude = in.nTuples - 1;
     913           0 :         totalLeafSizes -= newLeafs[in.nTuples - 1]->size + sizeof(ItemIdData);
     914             :     }
     915             : 
     916             :     /*
     917             :      * Allocate per-node work arrays.  Since checkAllTheSame could replace
     918             :      * out.nNodes with a value larger than the number of tuples on the input
     919             :      * page, we can't allocate these arrays before here.
     920             :      */
     921        3818 :     nodes = (SpGistNodeTuple *) palloc(sizeof(SpGistNodeTuple) * out.nNodes);
     922        3818 :     leafSizes = (int *) palloc0(sizeof(int) * out.nNodes);
     923             : 
     924             :     /*
     925             :      * Form nodes of inner tuple and inner tuple itself
     926             :      */
     927       29568 :     for (i = 0; i < out.nNodes; i++)
     928             :     {
     929       25750 :         Datum       label = (Datum) 0;
     930       25750 :         bool        labelisnull = (out.nodeLabels == NULL);
     931             : 
     932       25750 :         if (!labelisnull)
     933        2502 :             label = out.nodeLabels[i];
     934       25750 :         nodes[i] = spgFormNodeTuple(state, label, labelisnull);
     935             :     }
     936        3818 :     innerTuple = spgFormInnerTuple(state,
     937        3818 :                                    out.hasPrefix, out.prefixDatum,
     938             :                                    out.nNodes, nodes);
     939        3818 :     innerTuple->allTheSame = allTheSame;
     940             : 
     941             :     /*
     942             :      * Update nodes[] array to point into the newly formed innerTuple, so that
     943             :      * we can adjust their downlinks below.
     944             :      */
     945       29568 :     SGITITERATE(innerTuple, i, node)
     946             :     {
     947       25750 :         nodes[i] = node;
     948             :     }
     949             : 
     950             :     /*
     951             :      * Re-scan new leaf tuples and count up the space needed under each node.
     952             :      */
     953      489164 :     for (i = 0; i < maxToInclude; i++)
     954             :     {
     955      485346 :         n = out.mapTuplesToNodes[i];
     956      485346 :         if (n < 0 || n >= out.nNodes)
     957           0 :             elog(ERROR, "inconsistent result of SPGiST picksplit function");
     958      485346 :         leafSizes[n] += newLeafs[i]->size + sizeof(ItemIdData);
     959             :     }
     960             : 
     961             :     /*
     962             :      * To perform the split, we must insert a new inner tuple, which can't go
     963             :      * on a leaf page; and unless we are splitting the root page, we must then
     964             :      * update the parent tuple's downlink to point to the inner tuple.  If
     965             :      * there is room, we'll put the new inner tuple on the same page as the
     966             :      * parent tuple, otherwise we need another non-leaf buffer. But if the
     967             :      * parent page is the root, we can't add the new inner tuple there,
     968             :      * because the root page must have only one inner tuple.
     969             :      */
     970        3818 :     xlrec.initInner = false;
     971        3818 :     if (parent->buffer != InvalidBuffer &&
     972        3762 :         !SpGistBlockIsRoot(parent->blkno) &&
     973        3518 :         (SpGistPageGetFreeSpace(parent->page, 1) >=
     974        3518 :          innerTuple->size + sizeof(ItemIdData)))
     975             :     {
     976             :         /* New inner tuple will fit on parent page */
     977        3246 :         newInnerBuffer = parent->buffer;
     978             :     }
     979         572 :     else if (parent->buffer != InvalidBuffer)
     980             :     {
     981             :         /* Send tuple to page with next triple parity (see README) */
     982        1032 :         newInnerBuffer = SpGistGetBuffer(index,
     983         516 :                                          GBUF_INNER_PARITY(parent->blkno + 1) |
     984         516 :                                          (isNulls ? GBUF_NULLS : 0),
     985         516 :                                          innerTuple->size + sizeof(ItemIdData),
     986             :                                          &xlrec.initInner);
     987             :     }
     988             :     else
     989             :     {
     990             :         /* Root page split ... inner tuple will go to root page */
     991          56 :         newInnerBuffer = InvalidBuffer;
     992             :     }
     993             : 
     994             :     /*
     995             :      * The new leaf tuples converted from the existing ones should require the
     996             :      * same or less space, and therefore should all fit onto one page
     997             :      * (although that's not necessarily the current page, since we can't
     998             :      * delete the old tuples but only replace them with placeholders).
     999             :      * However, the incoming new tuple might not also fit, in which case we
    1000             :      * might need another picksplit cycle to reduce it some more.
    1001             :      *
    1002             :      * If there's not room to put everything back onto the current page, then
    1003             :      * we decide on a per-node basis which tuples go to the new page. (We do
    1004             :      * it like that because leaf tuple chains can't cross pages, so we must
    1005             :      * place all leaf tuples belonging to the same parent node on the same
    1006             :      * page.)
    1007             :      *
    1008             :      * If we are splitting the root page (turning it from a leaf page into an
    1009             :      * inner page), then no leaf tuples can go back to the current page; they
    1010             :      * must all go somewhere else.
    1011             :      */
    1012        3818 :     if (!SpGistBlockIsRoot(current->blkno))
    1013        3762 :         currentFreeSpace = PageGetExactFreeSpace(current->page) + spaceToDelete;
    1014             :     else
    1015          56 :         currentFreeSpace = 0;   /* prevent assigning any tuples to current */
    1016             : 
    1017        3818 :     xlrec.initDest = false;
    1018             : 
    1019        3818 :     if (totalLeafSizes <= currentFreeSpace)
    1020             :     {
    1021             :         /* All the leaf tuples will fit on current page */
    1022          16 :         newLeafBuffer = InvalidBuffer;
    1023             :         /* mark new leaf tuple as included in insertions, if allowed */
    1024          16 :         if (includeNew)
    1025             :         {
    1026          16 :             nToInsert++;
    1027          16 :             insertedNew = true;
    1028             :         }
    1029        1980 :         for (i = 0; i < nToInsert; i++)
    1030        1964 :             leafPageSelect[i] = 0;  /* signifies current page */
    1031             :     }
    1032        3802 :     else if (in.nTuples == 1 && totalLeafSizes > SPGIST_PAGE_CAPACITY)
    1033             :     {
    1034             :         /*
    1035             :          * We're trying to split up a long value by repeated suffixing, but
    1036             :          * it's not going to fit yet.  Don't bother allocating a second leaf
    1037             :          * buffer that we won't be able to use.
    1038             :          */
    1039          44 :         newLeafBuffer = InvalidBuffer;
    1040             :         Assert(includeNew);
    1041          44 :         Assert(nToInsert == 0);
    1042             :     }
    1043             :     else
    1044             :     {
    1045             :         /* We will need another leaf page */
    1046             :         uint8      *nodePageSelect;
    1047             :         int         curspace;
    1048             :         int         newspace;
    1049             : 
    1050        3758 :         newLeafBuffer = SpGistGetBuffer(index,
    1051             :                                         GBUF_LEAF | (isNulls ? GBUF_NULLS : 0),
    1052        3758 :                                         Min(totalLeafSizes,
    1053             :                                             SPGIST_PAGE_CAPACITY),
    1054             :                                         &xlrec.initDest);
    1055             : 
    1056             :         /*
    1057             :          * Attempt to assign node groups to the two pages.  We might fail to
    1058             :          * do so, even if totalLeafSizes is less than the available space,
    1059             :          * because we can't split a group across pages.
    1060             :          */
    1061        3758 :         nodePageSelect = (uint8 *) palloc(sizeof(uint8) * out.nNodes);
    1062             : 
    1063        3758 :         curspace = currentFreeSpace;
    1064        3758 :         newspace = PageGetExactFreeSpace(BufferGetPage(newLeafBuffer));
    1065       29432 :         for (i = 0; i < out.nNodes; i++)
    1066             :         {
    1067       25674 :             if (leafSizes[i] <= curspace)
    1068             :             {
    1069       16640 :                 nodePageSelect[i] = 0;  /* signifies current page */
    1070       16640 :                 curspace -= leafSizes[i];
    1071             :             }
    1072             :             else
    1073             :             {
    1074        9034 :                 nodePageSelect[i] = 1;  /* signifies new leaf page */
    1075        9034 :                 newspace -= leafSizes[i];
    1076             :             }
    1077             :         }
    1078        3758 :         if (curspace >= 0 && newspace >= 0)
    1079             :         {
    1080             :             /* Successful assignment, so we can include the new leaf tuple */
    1081        3600 :             if (includeNew)
    1082             :             {
    1083        3600 :                 nToInsert++;
    1084        3600 :                 insertedNew = true;
    1085             :             }
    1086             :         }
    1087         158 :         else if (includeNew)
    1088             :         {
    1089             :             /* We must exclude the new leaf tuple from the split */
    1090         158 :             int         nodeOfNewTuple = out.mapTuplesToNodes[in.nTuples - 1];
    1091             : 
    1092         158 :             leafSizes[nodeOfNewTuple] -=
    1093         158 :                 newLeafs[in.nTuples - 1]->size + sizeof(ItemIdData);
    1094             : 
    1095             :             /* Repeat the node assignment process --- should succeed now */
    1096         158 :             curspace = currentFreeSpace;
    1097         158 :             newspace = PageGetExactFreeSpace(BufferGetPage(newLeafBuffer));
    1098         766 :             for (i = 0; i < out.nNodes; i++)
    1099             :             {
    1100         608 :                 if (leafSizes[i] <= curspace)
    1101             :                 {
    1102         200 :                     nodePageSelect[i] = 0;  /* signifies current page */
    1103         200 :                     curspace -= leafSizes[i];
    1104             :                 }
    1105             :                 else
    1106             :                 {
    1107         408 :                     nodePageSelect[i] = 1;  /* signifies new leaf page */
    1108         408 :                     newspace -= leafSizes[i];
    1109             :                 }
    1110             :             }
    1111         158 :             if (curspace < 0 || newspace < 0)
    1112           0 :                 elog(ERROR, "failed to divide leaf tuple groups across pages");
    1113             :         }
    1114             :         else
    1115             :         {
    1116             :             /* oops, we already excluded new tuple ... should not get here */
    1117           0 :             elog(ERROR, "failed to divide leaf tuple groups across pages");
    1118             :         }
    1119             :         /* Expand the per-node assignments to be shown per leaf tuple */
    1120      486938 :         for (i = 0; i < nToInsert; i++)
    1121             :         {
    1122      483180 :             n = out.mapTuplesToNodes[i];
    1123      483180 :             leafPageSelect[i] = nodePageSelect[n];
    1124             :         }
    1125             :     }
    1126             : 
    1127             :     /* Start preparing WAL record */
    1128        3818 :     xlrec.nDelete = 0;
    1129        3818 :     xlrec.initSrc = isNew;
    1130        3818 :     xlrec.storesNulls = isNulls;
    1131        3818 :     xlrec.isRootSplit = SpGistBlockIsRoot(current->blkno);
    1132             : 
    1133        3818 :     leafdata = leafptr = (char *) palloc(totalLeafSizes);
    1134             : 
    1135             :     /* Here we begin making the changes to the target pages */
    1136        3818 :     START_CRIT_SECTION();
    1137             : 
    1138             :     /*
    1139             :      * Delete old leaf tuples from current buffer, except when we're splitting
    1140             :      * the root; in that case there's no need because we'll re-init the page
    1141             :      * below.  We do this first to make room for reinserting new leaf tuples.
    1142             :      */
    1143        3818 :     if (!SpGistBlockIsRoot(current->blkno))
    1144             :     {
    1145             :         /*
    1146             :          * Init buffer instead of deleting individual tuples, but only if
    1147             :          * there aren't any other live tuples and only during build; otherwise
    1148             :          * we need to set a redirection tuple for concurrent scans.
    1149             :          */
    1150        6610 :         if (state->isBuild &&
    1151        2848 :             nToDelete + SpGistPageGetOpaque(current->page)->nPlaceholder ==
    1152        2848 :             PageGetMaxOffsetNumber(current->page))
    1153             :         {
    1154         210 :             SpGistInitBuffer(current->buffer,
    1155             :                              SPGIST_LEAF | (isNulls ? SPGIST_NULLS : 0));
    1156         210 :             xlrec.initSrc = true;
    1157             :         }
    1158        3552 :         else if (isNew)
    1159             :         {
    1160             :             /* don't expose the freshly init'd buffer as a backup block */
    1161             :             Assert(nToDelete == 0);
    1162             :         }
    1163             :         else
    1164             :         {
    1165        3508 :             xlrec.nDelete = nToDelete;
    1166             : 
    1167        3508 :             if (!state->isBuild)
    1168             :             {
    1169             :                 /*
    1170             :                  * Need to create redirect tuple (it will point to new inner
    1171             :                  * tuple) but right now the new tuple's location is not known
    1172             :                  * yet.  So, set the redirection pointer to "impossible" value
    1173             :                  * and remember its position to update tuple later.
    1174             :                  */
    1175         870 :                 if (nToDelete > 0)
    1176         870 :                     redirectTuplePos = toDelete[0];
    1177         870 :                 spgPageIndexMultiDelete(state, current->page,
    1178             :                                         toDelete, nToDelete,
    1179             :                                         SPGIST_REDIRECT,
    1180             :                                         SPGIST_PLACEHOLDER,
    1181             :                                         SPGIST_METAPAGE_BLKNO,
    1182             :                                         FirstOffsetNumber);
    1183             :             }
    1184             :             else
    1185             :             {
    1186             :                 /*
    1187             :                  * During index build there is not concurrent searches, so we
    1188             :                  * don't need to create redirection tuple.
    1189             :                  */
    1190        2638 :                 spgPageIndexMultiDelete(state, current->page,
    1191             :                                         toDelete, nToDelete,
    1192             :                                         SPGIST_PLACEHOLDER,
    1193             :                                         SPGIST_PLACEHOLDER,
    1194             :                                         InvalidBlockNumber,
    1195             :                                         InvalidOffsetNumber);
    1196             :             }
    1197             :         }
    1198             :     }
    1199             : 
    1200             :     /*
    1201             :      * Put leaf tuples on proper pages, and update downlinks in innerTuple's
    1202             :      * nodes.
    1203             :      */
    1204        3818 :     startOffsets[0] = startOffsets[1] = InvalidOffsetNumber;
    1205      488962 :     for (i = 0; i < nToInsert; i++)
    1206             :     {
    1207      485144 :         SpGistLeafTuple it = newLeafs[i];
    1208             :         Buffer      leafBuffer;
    1209             :         BlockNumber leafBlock;
    1210             :         OffsetNumber newoffset;
    1211             : 
    1212             :         /* Which page is it going to? */
    1213      485144 :         leafBuffer = leafPageSelect[i] ? newLeafBuffer : current->buffer;
    1214      485144 :         leafBlock = BufferGetBlockNumber(leafBuffer);
    1215             : 
    1216             :         /* Link tuple into correct chain for its node */
    1217      485144 :         n = out.mapTuplesToNodes[i];
    1218             : 
    1219      485144 :         if (ItemPointerIsValid(&nodes[n]->t_tid))
    1220             :         {
    1221             :             Assert(ItemPointerGetBlockNumber(&nodes[n]->t_tid) == leafBlock);
    1222      470478 :             SGLT_SET_NEXTOFFSET(it, ItemPointerGetOffsetNumber(&nodes[n]->t_tid));
    1223             :         }
    1224             :         else
    1225       14666 :             SGLT_SET_NEXTOFFSET(it, InvalidOffsetNumber);
    1226             : 
    1227             :         /* Insert it on page */
    1228      485144 :         newoffset = SpGistPageAddNewItem(state, BufferGetPage(leafBuffer),
    1229      485144 :                                          (Item) it, it->size,
    1230      485144 :                                          &startOffsets[leafPageSelect[i]],
    1231             :                                          false);
    1232      485144 :         toInsert[i] = newoffset;
    1233             : 
    1234             :         /* ... and complete the chain linking */
    1235      485144 :         ItemPointerSet(&nodes[n]->t_tid, leafBlock, newoffset);
    1236             : 
    1237             :         /* Also copy leaf tuple into WAL data */
    1238      485144 :         memcpy(leafptr, newLeafs[i], newLeafs[i]->size);
    1239      485144 :         leafptr += newLeafs[i]->size;
    1240             :     }
    1241             : 
    1242             :     /*
    1243             :      * We're done modifying the other leaf buffer (if any), so mark it dirty.
    1244             :      * current->buffer will be marked below, after we're entirely done
    1245             :      * modifying it.
    1246             :      */
    1247        3818 :     if (newLeafBuffer != InvalidBuffer)
    1248             :     {
    1249        3758 :         MarkBufferDirty(newLeafBuffer);
    1250             :     }
    1251             : 
    1252             :     /* Remember current buffer, since we're about to change "current" */
    1253        3818 :     saveCurrent = *current;
    1254             : 
    1255             :     /*
    1256             :      * Store the new innerTuple
    1257             :      */
    1258        3818 :     if (newInnerBuffer == parent->buffer && newInnerBuffer != InvalidBuffer)
    1259             :     {
    1260             :         /*
    1261             :          * new inner tuple goes to parent page
    1262             :          */
    1263             :         Assert(current->buffer != parent->buffer);
    1264             : 
    1265             :         /* Repoint "current" at the new inner tuple */
    1266        3246 :         current->blkno = parent->blkno;
    1267        3246 :         current->buffer = parent->buffer;
    1268        3246 :         current->page = parent->page;
    1269        3246 :         xlrec.offnumInner = current->offnum =
    1270        3246 :             SpGistPageAddNewItem(state, current->page,
    1271        3246 :                                  (Item) innerTuple, innerTuple->size,
    1272             :                                  NULL, false);
    1273             : 
    1274             :         /*
    1275             :          * Update parent node link and mark parent page dirty
    1276             :          */
    1277        3246 :         xlrec.innerIsParent = true;
    1278        3246 :         xlrec.offnumParent = parent->offnum;
    1279        3246 :         xlrec.nodeI = parent->node;
    1280        3246 :         saveNodeLink(index, parent, current->blkno, current->offnum);
    1281             : 
    1282             :         /*
    1283             :          * Update redirection link (in old current buffer)
    1284             :          */
    1285        3246 :         if (redirectTuplePos != InvalidOffsetNumber)
    1286         786 :             setRedirectionTuple(&saveCurrent, redirectTuplePos,
    1287         786 :                                 current->blkno, current->offnum);
    1288             : 
    1289             :         /* Done modifying old current buffer, mark it dirty */
    1290        3246 :         MarkBufferDirty(saveCurrent.buffer);
    1291             :     }
    1292         572 :     else if (parent->buffer != InvalidBuffer)
    1293             :     {
    1294             :         /*
    1295             :          * new inner tuple will be stored on a new page
    1296             :          */
    1297             :         Assert(newInnerBuffer != InvalidBuffer);
    1298             : 
    1299             :         /* Repoint "current" at the new inner tuple */
    1300         516 :         current->buffer = newInnerBuffer;
    1301         516 :         current->blkno = BufferGetBlockNumber(current->buffer);
    1302         516 :         current->page = BufferGetPage(current->buffer);
    1303         516 :         xlrec.offnumInner = current->offnum =
    1304         516 :             SpGistPageAddNewItem(state, current->page,
    1305         516 :                                  (Item) innerTuple, innerTuple->size,
    1306             :                                  NULL, false);
    1307             : 
    1308             :         /* Done modifying new current buffer, mark it dirty */
    1309         516 :         MarkBufferDirty(current->buffer);
    1310             : 
    1311             :         /*
    1312             :          * Update parent node link and mark parent page dirty
    1313             :          */
    1314         516 :         xlrec.innerIsParent = (parent->buffer == current->buffer);
    1315         516 :         xlrec.offnumParent = parent->offnum;
    1316         516 :         xlrec.nodeI = parent->node;
    1317         516 :         saveNodeLink(index, parent, current->blkno, current->offnum);
    1318             : 
    1319             :         /*
    1320             :          * Update redirection link (in old current buffer)
    1321             :          */
    1322         516 :         if (redirectTuplePos != InvalidOffsetNumber)
    1323          84 :             setRedirectionTuple(&saveCurrent, redirectTuplePos,
    1324          84 :                                 current->blkno, current->offnum);
    1325             : 
    1326             :         /* Done modifying old current buffer, mark it dirty */
    1327         516 :         MarkBufferDirty(saveCurrent.buffer);
    1328             :     }
    1329             :     else
    1330             :     {
    1331             :         /*
    1332             :          * Splitting root page, which was a leaf but now becomes inner page
    1333             :          * (and so "current" continues to point at it)
    1334             :          */
    1335             :         Assert(SpGistBlockIsRoot(current->blkno));
    1336             :         Assert(redirectTuplePos == InvalidOffsetNumber);
    1337             : 
    1338          56 :         SpGistInitBuffer(current->buffer, (isNulls ? SPGIST_NULLS : 0));
    1339          56 :         xlrec.initInner = true;
    1340          56 :         xlrec.innerIsParent = false;
    1341             : 
    1342          56 :         xlrec.offnumInner = current->offnum =
    1343          56 :             PageAddItem(current->page, (Item) innerTuple, innerTuple->size,
    1344             :                         InvalidOffsetNumber, false, false);
    1345          56 :         if (current->offnum != FirstOffsetNumber)
    1346           0 :             elog(ERROR, "failed to add item of size %u to SPGiST index page",
    1347             :                  innerTuple->size);
    1348             : 
    1349             :         /* No parent link to update, nor redirection to do */
    1350          56 :         xlrec.offnumParent = InvalidOffsetNumber;
    1351          56 :         xlrec.nodeI = 0;
    1352             : 
    1353             :         /* Done modifying new current buffer, mark it dirty */
    1354          56 :         MarkBufferDirty(current->buffer);
    1355             : 
    1356             :         /* saveCurrent doesn't represent a different buffer */
    1357          56 :         saveCurrent.buffer = InvalidBuffer;
    1358             :     }
    1359             : 
    1360        3818 :     if (RelationNeedsWAL(index) && !state->isBuild)
    1361             :     {
    1362             :         XLogRecPtr  recptr;
    1363             :         int         flags;
    1364             : 
    1365         928 :         XLogBeginInsert();
    1366             : 
    1367         928 :         xlrec.nInsert = nToInsert;
    1368         928 :         XLogRegisterData((char *) &xlrec, SizeOfSpgxlogPickSplit);
    1369             : 
    1370         928 :         XLogRegisterData((char *) toDelete,
    1371         928 :                          sizeof(OffsetNumber) * xlrec.nDelete);
    1372         928 :         XLogRegisterData((char *) toInsert,
    1373         928 :                          sizeof(OffsetNumber) * xlrec.nInsert);
    1374         928 :         XLogRegisterData((char *) leafPageSelect,
    1375         928 :                          sizeof(uint8) * xlrec.nInsert);
    1376         928 :         XLogRegisterData((char *) innerTuple, innerTuple->size);
    1377         928 :         XLogRegisterData(leafdata, leafptr - leafdata);
    1378             : 
    1379             :         /* Old leaf page */
    1380         928 :         if (BufferIsValid(saveCurrent.buffer))
    1381             :         {
    1382         914 :             flags = REGBUF_STANDARD;
    1383         914 :             if (xlrec.initSrc)
    1384          44 :                 flags |= REGBUF_WILL_INIT;
    1385         914 :             XLogRegisterBuffer(0, saveCurrent.buffer, flags);
    1386             :         }
    1387             : 
    1388             :         /* New leaf page */
    1389         928 :         if (BufferIsValid(newLeafBuffer))
    1390             :         {
    1391         884 :             flags = REGBUF_STANDARD;
    1392         884 :             if (xlrec.initDest)
    1393         820 :                 flags |= REGBUF_WILL_INIT;
    1394         884 :             XLogRegisterBuffer(1, newLeafBuffer, flags);
    1395             :         }
    1396             : 
    1397             :         /* Inner page */
    1398         928 :         flags = REGBUF_STANDARD;
    1399         928 :         if (xlrec.initInner)
    1400          28 :             flags |= REGBUF_WILL_INIT;
    1401         928 :         XLogRegisterBuffer(2, current->buffer, flags);
    1402             : 
    1403             :         /* Parent page, if different from inner page */
    1404         928 :         if (parent->buffer != InvalidBuffer)
    1405             :         {
    1406         914 :             if (parent->buffer != current->buffer)
    1407          84 :                 XLogRegisterBuffer(3, parent->buffer, REGBUF_STANDARD);
    1408             :             else
    1409             :                 Assert(xlrec.innerIsParent);
    1410             :         }
    1411             : 
    1412             :         /* Issue the WAL record */
    1413         928 :         recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT);
    1414             : 
    1415             :         /* Update page LSNs on all affected pages */
    1416         928 :         if (newLeafBuffer != InvalidBuffer)
    1417             :         {
    1418         884 :             Page        page = BufferGetPage(newLeafBuffer);
    1419             : 
    1420         884 :             PageSetLSN(page, recptr);
    1421             :         }
    1422             : 
    1423         928 :         if (saveCurrent.buffer != InvalidBuffer)
    1424             :         {
    1425         914 :             Page        page = BufferGetPage(saveCurrent.buffer);
    1426             : 
    1427         914 :             PageSetLSN(page, recptr);
    1428             :         }
    1429             : 
    1430         928 :         PageSetLSN(current->page, recptr);
    1431             : 
    1432         928 :         if (parent->buffer != InvalidBuffer)
    1433             :         {
    1434         914 :             PageSetLSN(parent->page, recptr);
    1435             :         }
    1436             :     }
    1437             : 
    1438        3818 :     END_CRIT_SECTION();
    1439             : 
    1440             :     /* Update local free-space cache and unlock buffers */
    1441        3818 :     if (newLeafBuffer != InvalidBuffer)
    1442             :     {
    1443        3758 :         SpGistSetLastUsedPage(index, newLeafBuffer);
    1444        3758 :         UnlockReleaseBuffer(newLeafBuffer);
    1445             :     }
    1446        3818 :     if (saveCurrent.buffer != InvalidBuffer)
    1447             :     {
    1448        3762 :         SpGistSetLastUsedPage(index, saveCurrent.buffer);
    1449        3762 :         UnlockReleaseBuffer(saveCurrent.buffer);
    1450             :     }
    1451             : 
    1452        3818 :     return insertedNew;
    1453             : }
    1454             : 
    1455             : /*
    1456             :  * spgMatchNode action: descend to N'th child node of current inner tuple
    1457             :  */
    1458             : static void
    1459    12437944 : spgMatchNodeAction(Relation index, SpGistState *state,
    1460             :                    SpGistInnerTuple innerTuple,
    1461             :                    SPPageDesc *current, SPPageDesc *parent, int nodeN)
    1462             : {
    1463             :     int         i;
    1464             :     SpGistNodeTuple node;
    1465             : 
    1466             :     /* Release previous parent buffer if any */
    1467    12437944 :     if (parent->buffer != InvalidBuffer &&
    1468    11908584 :         parent->buffer != current->buffer)
    1469             :     {
    1470      523728 :         SpGistSetLastUsedPage(index, parent->buffer);
    1471      523728 :         UnlockReleaseBuffer(parent->buffer);
    1472             :     }
    1473             : 
    1474             :     /* Repoint parent to specified node of current inner tuple */
    1475    12437944 :     parent->blkno = current->blkno;
    1476    12437944 :     parent->buffer = current->buffer;
    1477    12437944 :     parent->page = current->page;
    1478    12437944 :     parent->offnum = current->offnum;
    1479    12437944 :     parent->node = nodeN;
    1480             : 
    1481             :     /* Locate that node */
    1482    20893628 :     SGITITERATE(innerTuple, i, node)
    1483             :     {
    1484    20893628 :         if (i == nodeN)
    1485    12437944 :             break;
    1486             :     }
    1487             : 
    1488    12437944 :     if (i != nodeN)
    1489           0 :         elog(ERROR, "failed to find requested node %d in SPGiST inner tuple",
    1490             :              nodeN);
    1491             : 
    1492             :     /* Point current to the downlink location, if any */
    1493    12437944 :     if (ItemPointerIsValid(&node->t_tid))
    1494             :     {
    1495    12436504 :         current->blkno = ItemPointerGetBlockNumber(&node->t_tid);
    1496    12436504 :         current->offnum = ItemPointerGetOffsetNumber(&node->t_tid);
    1497             :     }
    1498             :     else
    1499             :     {
    1500             :         /* Downlink is empty, so we'll need to find a new page */
    1501        1440 :         current->blkno = InvalidBlockNumber;
    1502        1440 :         current->offnum = InvalidOffsetNumber;
    1503             :     }
    1504             : 
    1505    12437944 :     current->buffer = InvalidBuffer;
    1506    12437944 :     current->page = NULL;
    1507    12437944 : }
    1508             : 
    1509             : /*
    1510             :  * spgAddNode action: add a node to the inner tuple at current
    1511             :  */
    1512             : static void
    1513        1088 : spgAddNodeAction(Relation index, SpGistState *state,
    1514             :                  SpGistInnerTuple innerTuple,
    1515             :                  SPPageDesc *current, SPPageDesc *parent,
    1516             :                  int nodeN, Datum nodeLabel)
    1517             : {
    1518             :     SpGistInnerTuple newInnerTuple;
    1519             :     spgxlogAddNode xlrec;
    1520             : 
    1521             :     /* Should not be applied to nulls */
    1522             :     Assert(!SpGistPageStoresNulls(current->page));
    1523             : 
    1524             :     /* Construct new inner tuple with additional node */
    1525        1088 :     newInnerTuple = addNode(state, innerTuple, nodeLabel, nodeN);
    1526             : 
    1527             :     /* Prepare WAL record */
    1528        1088 :     STORE_STATE(state, xlrec.stateSrc);
    1529        1088 :     xlrec.offnum = current->offnum;
    1530             : 
    1531             :     /* we don't fill these unless we need to change the parent downlink */
    1532        1088 :     xlrec.parentBlk = -1;
    1533        1088 :     xlrec.offnumParent = InvalidOffsetNumber;
    1534        1088 :     xlrec.nodeI = 0;
    1535             : 
    1536             :     /* we don't fill these unless tuple has to be moved */
    1537        1088 :     xlrec.offnumNew = InvalidOffsetNumber;
    1538        1088 :     xlrec.newPage = false;
    1539             : 
    1540        1088 :     if (PageGetExactFreeSpace(current->page) >=
    1541        1088 :         newInnerTuple->size - innerTuple->size)
    1542             :     {
    1543             :         /*
    1544             :          * We can replace the inner tuple by new version in-place
    1545             :          */
    1546        1084 :         START_CRIT_SECTION();
    1547             : 
    1548        1084 :         PageIndexTupleDelete(current->page, current->offnum);
    1549        1084 :         if (PageAddItem(current->page,
    1550             :                         (Item) newInnerTuple, newInnerTuple->size,
    1551        1084 :                         current->offnum, false, false) != current->offnum)
    1552           0 :             elog(ERROR, "failed to add item of size %u to SPGiST index page",
    1553             :                  newInnerTuple->size);
    1554             : 
    1555        1084 :         MarkBufferDirty(current->buffer);
    1556             : 
    1557        1084 :         if (RelationNeedsWAL(index) && !state->isBuild)
    1558             :         {
    1559             :             XLogRecPtr  recptr;
    1560             : 
    1561         494 :             XLogBeginInsert();
    1562         494 :             XLogRegisterData((char *) &xlrec, sizeof(xlrec));
    1563         494 :             XLogRegisterData((char *) newInnerTuple, newInnerTuple->size);
    1564             : 
    1565         494 :             XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
    1566             : 
    1567         494 :             recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE);
    1568             : 
    1569         494 :             PageSetLSN(current->page, recptr);
    1570             :         }
    1571             : 
    1572        1084 :         END_CRIT_SECTION();
    1573             :     }
    1574             :     else
    1575             :     {
    1576             :         /*
    1577             :          * move inner tuple to another page, and update parent
    1578             :          */
    1579             :         SpGistDeadTuple dt;
    1580             :         SPPageDesc  saveCurrent;
    1581             : 
    1582             :         /*
    1583             :          * It should not be possible to get here for the root page, since we
    1584             :          * allow only one inner tuple on the root page, and spgFormInnerTuple
    1585             :          * always checks that inner tuples don't exceed the size of a page.
    1586             :          */
    1587           4 :         if (SpGistBlockIsRoot(current->blkno))
    1588           0 :             elog(ERROR, "cannot enlarge root tuple any more");
    1589             :         Assert(parent->buffer != InvalidBuffer);
    1590             : 
    1591           4 :         saveCurrent = *current;
    1592             : 
    1593           4 :         xlrec.offnumParent = parent->offnum;
    1594           4 :         xlrec.nodeI = parent->node;
    1595             : 
    1596             :         /*
    1597             :          * obtain new buffer with the same parity as current, since it will be
    1598             :          * a child of same parent tuple
    1599             :          */
    1600           8 :         current->buffer = SpGistGetBuffer(index,
    1601           4 :                                           GBUF_INNER_PARITY(current->blkno),
    1602           4 :                                           newInnerTuple->size + sizeof(ItemIdData),
    1603             :                                           &xlrec.newPage);
    1604           4 :         current->blkno = BufferGetBlockNumber(current->buffer);
    1605           4 :         current->page = BufferGetPage(current->buffer);
    1606             : 
    1607             :         /*
    1608             :          * Let's just make real sure new current isn't same as old.  Right now
    1609             :          * that's impossible, but if SpGistGetBuffer ever got smart enough to
    1610             :          * delete placeholder tuples before checking space, maybe it wouldn't
    1611             :          * be impossible.  The case would appear to work except that WAL
    1612             :          * replay would be subtly wrong, so I think a mere assert isn't enough
    1613             :          * here.
    1614             :          */
    1615           4 :         if (current->blkno == saveCurrent.blkno)
    1616           0 :             elog(ERROR, "SPGiST new buffer shouldn't be same as old buffer");
    1617             : 
    1618             :         /*
    1619             :          * New current and parent buffer will both be modified; but note that
    1620             :          * parent buffer could be same as either new or old current.
    1621             :          */
    1622           4 :         if (parent->buffer == saveCurrent.buffer)
    1623           0 :             xlrec.parentBlk = 0;
    1624           4 :         else if (parent->buffer == current->buffer)
    1625           0 :             xlrec.parentBlk = 1;
    1626             :         else
    1627           4 :             xlrec.parentBlk = 2;
    1628             : 
    1629           4 :         START_CRIT_SECTION();
    1630             : 
    1631             :         /* insert new ... */
    1632           4 :         xlrec.offnumNew = current->offnum =
    1633           4 :             SpGistPageAddNewItem(state, current->page,
    1634           4 :                                  (Item) newInnerTuple, newInnerTuple->size,
    1635             :                                  NULL, false);
    1636             : 
    1637           4 :         MarkBufferDirty(current->buffer);
    1638             : 
    1639             :         /* update parent's downlink and mark parent page dirty */
    1640           4 :         saveNodeLink(index, parent, current->blkno, current->offnum);
    1641             : 
    1642             :         /*
    1643             :          * Replace old tuple with a placeholder or redirection tuple.  Unless
    1644             :          * doing an index build, we have to insert a redirection tuple for
    1645             :          * possible concurrent scans.  We can't just delete it in any case,
    1646             :          * because that could change the offsets of other tuples on the page,
    1647             :          * breaking downlinks from their parents.
    1648             :          */
    1649           4 :         if (state->isBuild)
    1650           0 :             dt = spgFormDeadTuple(state, SPGIST_PLACEHOLDER,
    1651             :                                   InvalidBlockNumber, InvalidOffsetNumber);
    1652             :         else
    1653           4 :             dt = spgFormDeadTuple(state, SPGIST_REDIRECT,
    1654           4 :                                   current->blkno, current->offnum);
    1655             : 
    1656           4 :         PageIndexTupleDelete(saveCurrent.page, saveCurrent.offnum);
    1657           4 :         if (PageAddItem(saveCurrent.page, (Item) dt, dt->size,
    1658             :                         saveCurrent.offnum,
    1659           4 :                         false, false) != saveCurrent.offnum)
    1660           0 :             elog(ERROR, "failed to add item of size %u to SPGiST index page",
    1661             :                  dt->size);
    1662             : 
    1663           4 :         if (state->isBuild)
    1664           0 :             SpGistPageGetOpaque(saveCurrent.page)->nPlaceholder++;
    1665             :         else
    1666           4 :             SpGistPageGetOpaque(saveCurrent.page)->nRedirection++;
    1667             : 
    1668           4 :         MarkBufferDirty(saveCurrent.buffer);
    1669             : 
    1670           4 :         if (RelationNeedsWAL(index) && !state->isBuild)
    1671             :         {
    1672             :             XLogRecPtr  recptr;
    1673             :             int         flags;
    1674             : 
    1675           4 :             XLogBeginInsert();
    1676             : 
    1677             :             /* orig page */
    1678           4 :             XLogRegisterBuffer(0, saveCurrent.buffer, REGBUF_STANDARD);
    1679             :             /* new page */
    1680           4 :             flags = REGBUF_STANDARD;
    1681           4 :             if (xlrec.newPage)
    1682           4 :                 flags |= REGBUF_WILL_INIT;
    1683           4 :             XLogRegisterBuffer(1, current->buffer, flags);
    1684             :             /* parent page (if different from orig and new) */
    1685           4 :             if (xlrec.parentBlk == 2)
    1686           4 :                 XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD);
    1687             : 
    1688           4 :             XLogRegisterData((char *) &xlrec, sizeof(xlrec));
    1689           4 :             XLogRegisterData((char *) newInnerTuple, newInnerTuple->size);
    1690             : 
    1691           4 :             recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE);
    1692             : 
    1693             :             /* we don't bother to check if any of these are redundant */
    1694           4 :             PageSetLSN(current->page, recptr);
    1695           4 :             PageSetLSN(parent->page, recptr);
    1696           4 :             PageSetLSN(saveCurrent.page, recptr);
    1697             :         }
    1698             : 
    1699           4 :         END_CRIT_SECTION();
    1700             : 
    1701             :         /* Release saveCurrent if it's not same as current or parent */
    1702           4 :         if (saveCurrent.buffer != current->buffer &&
    1703           4 :             saveCurrent.buffer != parent->buffer)
    1704             :         {
    1705           4 :             SpGistSetLastUsedPage(index, saveCurrent.buffer);
    1706           4 :             UnlockReleaseBuffer(saveCurrent.buffer);
    1707             :         }
    1708             :     }
    1709        1088 : }
    1710             : 
    1711             : /*
    1712             :  * spgSplitNode action: split inner tuple at current into prefix and postfix
    1713             :  */
    1714             : static void
    1715         436 : spgSplitNodeAction(Relation index, SpGistState *state,
    1716             :                    SpGistInnerTuple innerTuple,
    1717             :                    SPPageDesc *current, spgChooseOut *out)
    1718             : {
    1719             :     SpGistInnerTuple prefixTuple,
    1720             :                 postfixTuple;
    1721             :     SpGistNodeTuple node,
    1722             :                *nodes;
    1723             :     BlockNumber postfixBlkno;
    1724             :     OffsetNumber postfixOffset;
    1725             :     int         i;
    1726             :     spgxlogSplitTuple xlrec;
    1727         436 :     Buffer      newBuffer = InvalidBuffer;
    1728             : 
    1729             :     /* Should not be applied to nulls */
    1730             :     Assert(!SpGistPageStoresNulls(current->page));
    1731             : 
    1732             :     /* Check opclass gave us sane values */
    1733         436 :     if (out->result.splitTuple.prefixNNodes <= 0 ||
    1734         436 :         out->result.splitTuple.prefixNNodes > SGITMAXNNODES)
    1735           0 :         elog(ERROR, "invalid number of prefix nodes: %d",
    1736             :              out->result.splitTuple.prefixNNodes);
    1737         436 :     if (out->result.splitTuple.childNodeN < 0 ||
    1738         436 :         out->result.splitTuple.childNodeN >=
    1739         436 :         out->result.splitTuple.prefixNNodes)
    1740           0 :         elog(ERROR, "invalid child node number: %d",
    1741             :              out->result.splitTuple.childNodeN);
    1742             : 
    1743             :     /*
    1744             :      * Construct new prefix tuple with requested number of nodes.  We'll fill
    1745             :      * in the childNodeN'th node's downlink below.
    1746             :      */
    1747         436 :     nodes = (SpGistNodeTuple *) palloc(sizeof(SpGistNodeTuple) *
    1748         436 :                                        out->result.splitTuple.prefixNNodes);
    1749             : 
    1750         872 :     for (i = 0; i < out->result.splitTuple.prefixNNodes; i++)
    1751             :     {
    1752         436 :         Datum       label = (Datum) 0;
    1753             :         bool        labelisnull;
    1754             : 
    1755         436 :         labelisnull = (out->result.splitTuple.prefixNodeLabels == NULL);
    1756         436 :         if (!labelisnull)
    1757         436 :             label = out->result.splitTuple.prefixNodeLabels[i];
    1758         436 :         nodes[i] = spgFormNodeTuple(state, label, labelisnull);
    1759             :     }
    1760             : 
    1761         436 :     prefixTuple = spgFormInnerTuple(state,
    1762         436 :                                     out->result.splitTuple.prefixHasPrefix,
    1763             :                                     out->result.splitTuple.prefixPrefixDatum,
    1764             :                                     out->result.splitTuple.prefixNNodes,
    1765             :                                     nodes);
    1766             : 
    1767             :     /* it must fit in the space that innerTuple now occupies */
    1768         436 :     if (prefixTuple->size > innerTuple->size)
    1769           0 :         elog(ERROR, "SPGiST inner-tuple split must not produce longer prefix");
    1770             : 
    1771             :     /*
    1772             :      * Construct new postfix tuple, containing all nodes of innerTuple with
    1773             :      * same node datums, but with the prefix specified by the picksplit
    1774             :      * function.
    1775             :      */
    1776         436 :     nodes = palloc(sizeof(SpGistNodeTuple) * innerTuple->nNodes);
    1777        1536 :     SGITITERATE(innerTuple, i, node)
    1778             :     {
    1779        1100 :         nodes[i] = node;
    1780             :     }
    1781             : 
    1782         436 :     postfixTuple = spgFormInnerTuple(state,
    1783         436 :                                      out->result.splitTuple.postfixHasPrefix,
    1784             :                                      out->result.splitTuple.postfixPrefixDatum,
    1785         436 :                                      innerTuple->nNodes, nodes);
    1786             : 
    1787             :     /* Postfix tuple is allTheSame if original tuple was */
    1788         436 :     postfixTuple->allTheSame = innerTuple->allTheSame;
    1789             : 
    1790             :     /* prep data for WAL record */
    1791         436 :     xlrec.newPage = false;
    1792             : 
    1793             :     /*
    1794             :      * If we can't fit both tuples on the current page, get a new page for the
    1795             :      * postfix tuple.  In particular, can't split to the root page.
    1796             :      *
    1797             :      * For the space calculation, note that prefixTuple replaces innerTuple
    1798             :      * but postfixTuple will be a new entry.
    1799             :      */
    1800         436 :     if (SpGistBlockIsRoot(current->blkno) ||
    1801         428 :         SpGistPageGetFreeSpace(current->page, 1) + innerTuple->size <
    1802         428 :         prefixTuple->size + postfixTuple->size + sizeof(ItemIdData))
    1803             :     {
    1804             :         /*
    1805             :          * Choose page with next triple parity, because postfix tuple is a
    1806             :          * child of prefix one
    1807             :          */
    1808         112 :         newBuffer = SpGistGetBuffer(index,
    1809         112 :                                     GBUF_INNER_PARITY(current->blkno + 1),
    1810         112 :                                     postfixTuple->size + sizeof(ItemIdData),
    1811             :                                     &xlrec.newPage);
    1812             :     }
    1813             : 
    1814         436 :     START_CRIT_SECTION();
    1815             : 
    1816             :     /*
    1817             :      * Replace old tuple by prefix tuple
    1818             :      */
    1819         436 :     PageIndexTupleDelete(current->page, current->offnum);
    1820         436 :     xlrec.offnumPrefix = PageAddItem(current->page,
    1821             :                                      (Item) prefixTuple, prefixTuple->size,
    1822             :                                      current->offnum, false, false);
    1823         436 :     if (xlrec.offnumPrefix != current->offnum)
    1824           0 :         elog(ERROR, "failed to add item of size %u to SPGiST index page",
    1825             :              prefixTuple->size);
    1826             : 
    1827             :     /*
    1828             :      * put postfix tuple into appropriate page
    1829             :      */
    1830         436 :     if (newBuffer == InvalidBuffer)
    1831             :     {
    1832         324 :         postfixBlkno = current->blkno;
    1833         324 :         xlrec.offnumPostfix = postfixOffset =
    1834         324 :             SpGistPageAddNewItem(state, current->page,
    1835         324 :                                  (Item) postfixTuple, postfixTuple->size,
    1836             :                                  NULL, false);
    1837         324 :         xlrec.postfixBlkSame = true;
    1838             :     }
    1839             :     else
    1840             :     {
    1841         112 :         postfixBlkno = BufferGetBlockNumber(newBuffer);
    1842         112 :         xlrec.offnumPostfix = postfixOffset =
    1843         112 :             SpGistPageAddNewItem(state, BufferGetPage(newBuffer),
    1844         112 :                                  (Item) postfixTuple, postfixTuple->size,
    1845             :                                  NULL, false);
    1846         112 :         MarkBufferDirty(newBuffer);
    1847         112 :         xlrec.postfixBlkSame = false;
    1848             :     }
    1849             : 
    1850             :     /*
    1851             :      * And set downlink pointer in the prefix tuple to point to postfix tuple.
    1852             :      * (We can't avoid this step by doing the above two steps in opposite
    1853             :      * order, because there might not be enough space on the page to insert
    1854             :      * the postfix tuple first.)  We have to update the local copy of the
    1855             :      * prefixTuple too, because that's what will be written to WAL.
    1856             :      */
    1857         436 :     spgUpdateNodeLink(prefixTuple, out->result.splitTuple.childNodeN,
    1858             :                       postfixBlkno, postfixOffset);
    1859         436 :     prefixTuple = (SpGistInnerTuple) PageGetItem(current->page,
    1860             :                                                  PageGetItemId(current->page, current->offnum));
    1861         436 :     spgUpdateNodeLink(prefixTuple, out->result.splitTuple.childNodeN,
    1862             :                       postfixBlkno, postfixOffset);
    1863             : 
    1864         436 :     MarkBufferDirty(current->buffer);
    1865             : 
    1866         436 :     if (RelationNeedsWAL(index) && !state->isBuild)
    1867             :     {
    1868             :         XLogRecPtr  recptr;
    1869             : 
    1870         412 :         XLogBeginInsert();
    1871         412 :         XLogRegisterData((char *) &xlrec, sizeof(xlrec));
    1872         412 :         XLogRegisterData((char *) prefixTuple, prefixTuple->size);
    1873         412 :         XLogRegisterData((char *) postfixTuple, postfixTuple->size);
    1874             : 
    1875         412 :         XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
    1876         412 :         if (newBuffer != InvalidBuffer)
    1877             :         {
    1878             :             int         flags;
    1879             : 
    1880         108 :             flags = REGBUF_STANDARD;
    1881         108 :             if (xlrec.newPage)
    1882           4 :                 flags |= REGBUF_WILL_INIT;
    1883         108 :             XLogRegisterBuffer(1, newBuffer, flags);
    1884             :         }
    1885             : 
    1886         412 :         recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE);
    1887             : 
    1888         412 :         PageSetLSN(current->page, recptr);
    1889             : 
    1890         412 :         if (newBuffer != InvalidBuffer)
    1891             :         {
    1892         108 :             PageSetLSN(BufferGetPage(newBuffer), recptr);
    1893             :         }
    1894             :     }
    1895             : 
    1896         436 :     END_CRIT_SECTION();
    1897             : 
    1898             :     /* Update local free-space cache and release buffer */
    1899         436 :     if (newBuffer != InvalidBuffer)
    1900             :     {
    1901         112 :         SpGistSetLastUsedPage(index, newBuffer);
    1902         112 :         UnlockReleaseBuffer(newBuffer);
    1903             :     }
    1904         436 : }
    1905             : 
    1906             : /*
    1907             :  * Insert one item into the index.
    1908             :  *
    1909             :  * Returns true on success, false if we failed to complete the insertion
    1910             :  * (typically because of conflict with a concurrent insert).  In the latter
    1911             :  * case, caller should re-call spgdoinsert() with the same args.
    1912             :  */
    1913             : bool
    1914      540836 : spgdoinsert(Relation index, SpGistState *state,
    1915             :             ItemPointer heapPtr, Datum *datums, bool *isnulls)
    1916             : {
    1917      540836 :     bool        result = true;
    1918      540836 :     TupleDesc   leafDescriptor = state->leafTupDesc;
    1919      540836 :     bool        isnull = isnulls[spgKeyColumn];
    1920      540836 :     int         level = 0;
    1921             :     Datum       leafDatums[INDEX_MAX_KEYS];
    1922             :     int         leafSize;
    1923             :     int         bestLeafSize;
    1924      540836 :     int         numNoProgressCycles = 0;
    1925             :     SPPageDesc  current,
    1926             :                 parent;
    1927      540836 :     FmgrInfo   *procinfo = NULL;
    1928             : 
    1929             :     /*
    1930             :      * Look up FmgrInfo of the user-defined choose function once, to save
    1931             :      * cycles in the loop below.
    1932             :      */
    1933      540836 :     if (!isnull)
    1934      540776 :         procinfo = index_getprocinfo(index, 1, SPGIST_CHOOSE_PROC);
    1935             : 
    1936             :     /*
    1937             :      * Prepare the leaf datum to insert.
    1938             :      *
    1939             :      * If an optional "compress" method is provided, then call it to form the
    1940             :      * leaf key datum from the input datum.  Otherwise, store the input datum
    1941             :      * as is.  Since we don't use index_form_tuple in this AM, we have to make
    1942             :      * sure value to be inserted is not toasted; FormIndexDatum doesn't
    1943             :      * guarantee that.  But we assume the "compress" method to return an
    1944             :      * untoasted value.
    1945             :      */
    1946      540836 :     if (!isnull)
    1947             :     {
    1948      540776 :         if (OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)))
    1949             :         {
    1950       56940 :             FmgrInfo   *compressProcinfo = NULL;
    1951             : 
    1952       56940 :             compressProcinfo = index_getprocinfo(index, 1, SPGIST_COMPRESS_PROC);
    1953       56940 :             leafDatums[spgKeyColumn] =
    1954       56940 :                 FunctionCall1Coll(compressProcinfo,
    1955       56940 :                                   index->rd_indcollation[spgKeyColumn],
    1956             :                                   datums[spgKeyColumn]);
    1957             :         }
    1958             :         else
    1959             :         {
    1960             :             Assert(state->attLeafType.type == state->attType.type);
    1961             : 
    1962      483836 :             if (state->attType.attlen == -1)
    1963      119728 :                 leafDatums[spgKeyColumn] =
    1964      119728 :                     PointerGetDatum(PG_DETOAST_DATUM(datums[spgKeyColumn]));
    1965             :             else
    1966      364108 :                 leafDatums[spgKeyColumn] = datums[spgKeyColumn];
    1967             :         }
    1968             :     }
    1969             :     else
    1970          60 :         leafDatums[spgKeyColumn] = (Datum) 0;
    1971             : 
    1972             :     /* Likewise, ensure that any INCLUDE values are not toasted */
    1973      610728 :     for (int i = spgFirstIncludeColumn; i < leafDescriptor->natts; i++)
    1974             :     {
    1975       69892 :         if (!isnulls[i])
    1976             :         {
    1977       63176 :             if (TupleDescAttr(leafDescriptor, i)->attlen == -1)
    1978       12940 :                 leafDatums[i] = PointerGetDatum(PG_DETOAST_DATUM(datums[i]));
    1979             :             else
    1980       50236 :                 leafDatums[i] = datums[i];
    1981             :         }
    1982             :         else
    1983        6716 :             leafDatums[i] = (Datum) 0;
    1984             :     }
    1985             : 
    1986             :     /*
    1987             :      * Compute space needed for a leaf tuple containing the given data.
    1988             :      */
    1989      540836 :     leafSize = SpGistGetLeafTupleSize(leafDescriptor, leafDatums, isnulls);
    1990             :     /* Account for an item pointer, too */
    1991      540836 :     leafSize += sizeof(ItemIdData);
    1992             : 
    1993             :     /*
    1994             :      * If it isn't gonna fit, and the opclass can't reduce the datum size by
    1995             :      * suffixing, bail out now rather than doing a lot of useless work.
    1996             :      */
    1997      540836 :     if (leafSize > SPGIST_PAGE_CAPACITY &&
    1998           4 :         (isnull || !state->config.longValuesOK))
    1999           0 :         ereport(ERROR,
    2000             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    2001             :                  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
    2002             :                         leafSize - sizeof(ItemIdData),
    2003             :                         SPGIST_PAGE_CAPACITY - sizeof(ItemIdData),
    2004             :                         RelationGetRelationName(index)),
    2005             :                  errhint("Values larger than a buffer page cannot be indexed.")));
    2006      540836 :     bestLeafSize = leafSize;
    2007             : 
    2008             :     /* Initialize "current" to the appropriate root page */
    2009      540836 :     current.blkno = isnull ? SPGIST_NULL_BLKNO : SPGIST_ROOT_BLKNO;
    2010      540836 :     current.buffer = InvalidBuffer;
    2011      540836 :     current.page = NULL;
    2012      540836 :     current.offnum = FirstOffsetNumber;
    2013      540836 :     current.node = -1;
    2014             : 
    2015             :     /* "parent" is invalid for the moment */
    2016      540836 :     parent.blkno = InvalidBlockNumber;
    2017      540836 :     parent.buffer = InvalidBuffer;
    2018      540836 :     parent.page = NULL;
    2019      540836 :     parent.offnum = InvalidOffsetNumber;
    2020      540836 :     parent.node = -1;
    2021             : 
    2022             :     /*
    2023             :      * Before entering the loop, try to clear any pending interrupt condition.
    2024             :      * If a query cancel is pending, we might as well accept it now not later;
    2025             :      * while if a non-canceling condition is pending, servicing it here avoids
    2026             :      * having to restart the insertion and redo all the work so far.
    2027             :      */
    2028      540836 :     CHECK_FOR_INTERRUPTS();
    2029             : 
    2030             :     for (;;)
    2031    12437940 :     {
    2032    12978776 :         bool        isNew = false;
    2033             : 
    2034             :         /*
    2035             :          * Bail out if query cancel is pending.  We must have this somewhere
    2036             :          * in the loop since a broken opclass could produce an infinite
    2037             :          * picksplit loop.  However, because we'll be holding buffer lock(s)
    2038             :          * after the first iteration, ProcessInterrupts() wouldn't be able to
    2039             :          * throw a cancel error here.  Hence, if we see that an interrupt is
    2040             :          * pending, break out of the loop and deal with the situation below.
    2041             :          * Set result = false because we must restart the insertion if the
    2042             :          * interrupt isn't a query-cancel-or-die case.
    2043             :          */
    2044    12978776 :         if (INTERRUPTS_PENDING_CONDITION())
    2045             :         {
    2046           0 :             result = false;
    2047           0 :             break;
    2048             :         }
    2049             : 
    2050    12978776 :         if (current.blkno == InvalidBlockNumber)
    2051             :         {
    2052             :             /*
    2053             :              * Create a leaf page.  If leafSize is too large to fit on a page,
    2054             :              * we won't actually use the page yet, but it simplifies the API
    2055             :              * for doPickSplit to always have a leaf page at hand; so just
    2056             :              * quietly limit our request to a page size.
    2057             :              */
    2058        1436 :             current.buffer =
    2059        1436 :                 SpGistGetBuffer(index,
    2060             :                                 GBUF_LEAF | (isnull ? GBUF_NULLS : 0),
    2061        1436 :                                 Min(leafSize, SPGIST_PAGE_CAPACITY),
    2062             :                                 &isNew);
    2063        1436 :             current.blkno = BufferGetBlockNumber(current.buffer);
    2064             :         }
    2065    12977340 :         else if (parent.buffer == InvalidBuffer)
    2066             :         {
    2067             :             /* we hold no parent-page lock, so no deadlock is possible */
    2068      540836 :             current.buffer = ReadBuffer(index, current.blkno);
    2069      540836 :             LockBuffer(current.buffer, BUFFER_LOCK_EXCLUSIVE);
    2070             :         }
    2071    12436504 :         else if (current.blkno != parent.blkno)
    2072             :         {
    2073             :             /* descend to a new child page */
    2074     1051796 :             current.buffer = ReadBuffer(index, current.blkno);
    2075             : 
    2076             :             /*
    2077             :              * Attempt to acquire lock on child page.  We must beware of
    2078             :              * deadlock against another insertion process descending from that
    2079             :              * page to our parent page (see README).  If we fail to get lock,
    2080             :              * abandon the insertion and tell our caller to start over.
    2081             :              *
    2082             :              * XXX this could be improved, because failing to get lock on a
    2083             :              * buffer is not proof of a deadlock situation; the lock might be
    2084             :              * held by a reader, or even just background writer/checkpointer
    2085             :              * process.  Perhaps it'd be worth retrying after sleeping a bit?
    2086             :              */
    2087     1051796 :             if (!ConditionalLockBuffer(current.buffer))
    2088             :             {
    2089           0 :                 ReleaseBuffer(current.buffer);
    2090           0 :                 UnlockReleaseBuffer(parent.buffer);
    2091           0 :                 return false;
    2092             :             }
    2093             :         }
    2094             :         else
    2095             :         {
    2096             :             /* inner tuple can be stored on the same page as parent one */
    2097    11384708 :             current.buffer = parent.buffer;
    2098             :         }
    2099    12978776 :         current.page = BufferGetPage(current.buffer);
    2100             : 
    2101             :         /* should not arrive at a page of the wrong type */
    2102    25957492 :         if (isnull ? !SpGistPageStoresNulls(current.page) :
    2103    12978716 :             SpGistPageStoresNulls(current.page))
    2104           0 :             elog(ERROR, "SPGiST index page %u has wrong nulls flag",
    2105             :                  current.blkno);
    2106             : 
    2107    12978776 :         if (SpGistPageIsLeaf(current.page))
    2108             :         {
    2109             :             SpGistLeafTuple leafTuple;
    2110             :             int         nToSplit,
    2111             :                         sizeToSplit;
    2112             : 
    2113      541034 :             leafTuple = spgFormLeafTuple(state, heapPtr, leafDatums, isnulls);
    2114     1082068 :             if (leafTuple->size + sizeof(ItemIdData) <=
    2115      541034 :                 SpGistPageGetFreeSpace(current.page, 1))
    2116             :             {
    2117             :                 /* it fits on page, so insert it and we're done */
    2118      535612 :                 addLeafTuple(index, state, leafTuple,
    2119             :                              &current, &parent, isnull, isNew);
    2120      540832 :                 break;
    2121             :             }
    2122        5422 :             else if ((sizeToSplit =
    2123        5422 :                       checkSplitConditions(index, state, &current,
    2124        2552 :                                            &nToSplit)) < SPGIST_PAGE_CAPACITY / 2 &&
    2125        2552 :                      nToSplit < 64 &&
    2126        1652 :                      leafTuple->size + sizeof(ItemIdData) + sizeToSplit <= SPGIST_PAGE_CAPACITY)
    2127             :             {
    2128             :                 /*
    2129             :                  * the amount of data is pretty small, so just move the whole
    2130             :                  * chain to another leaf page rather than splitting it.
    2131             :                  */
    2132             :                 Assert(!isNew);
    2133        1604 :                 moveLeafs(index, state, &current, &parent, leafTuple, isnull);
    2134        1604 :                 break;          /* we're done */
    2135             :             }
    2136             :             else
    2137             :             {
    2138             :                 /* picksplit */
    2139        3818 :                 if (doPickSplit(index, state, &current, &parent,
    2140             :                                 leafTuple, level, isnull, isNew))
    2141        3616 :                     break;      /* doPickSplit installed new tuples */
    2142             : 
    2143             :                 /* leaf tuple will not be inserted yet */
    2144         202 :                 pfree(leafTuple);
    2145             : 
    2146             :                 /*
    2147             :                  * current now describes new inner tuple, go insert into it
    2148             :                  */
    2149             :                 Assert(!SpGistPageIsLeaf(current.page));
    2150         202 :                 goto process_inner_tuple;
    2151             :             }
    2152             :         }
    2153             :         else                    /* non-leaf page */
    2154             :         {
    2155             :             /*
    2156             :              * Apply the opclass choose function to figure out how to insert
    2157             :              * the given datum into the current inner tuple.
    2158             :              */
    2159             :             SpGistInnerTuple innerTuple;
    2160             :             spgChooseIn in;
    2161             :             spgChooseOut out;
    2162             : 
    2163             :             /*
    2164             :              * spgAddNode and spgSplitTuple cases will loop back to here to
    2165             :              * complete the insertion operation.  Just in case the choose
    2166             :              * function is broken and produces add or split requests
    2167             :              * repeatedly, check for query cancel (see comments above).
    2168             :              */
    2169    12437944 :     process_inner_tuple:
    2170    12439468 :             if (INTERRUPTS_PENDING_CONDITION())
    2171             :             {
    2172           0 :                 result = false;
    2173           0 :                 break;
    2174             :             }
    2175             : 
    2176    12439468 :             innerTuple = (SpGistInnerTuple) PageGetItem(current.page,
    2177             :                                                         PageGetItemId(current.page, current.offnum));
    2178             : 
    2179    12439468 :             in.datum = datums[spgKeyColumn];
    2180    12439468 :             in.leafDatum = leafDatums[spgKeyColumn];
    2181    12439468 :             in.level = level;
    2182    12439468 :             in.allTheSame = innerTuple->allTheSame;
    2183    12439468 :             in.hasPrefix = (innerTuple->prefixSize > 0);
    2184    12439468 :             in.prefixDatum = SGITDATUM(innerTuple, state);
    2185    12439468 :             in.nNodes = innerTuple->nNodes;
    2186    12439468 :             in.nodeLabels = spgExtractNodeLabels(state, innerTuple);
    2187             : 
    2188    12439468 :             memset(&out, 0, sizeof(out));
    2189             : 
    2190    12439468 :             if (!isnull)
    2191             :             {
    2192             :                 /* use user-defined choose method */
    2193    12439468 :                 FunctionCall2Coll(procinfo,
    2194    12439468 :                                   index->rd_indcollation[0],
    2195             :                                   PointerGetDatum(&in),
    2196             :                                   PointerGetDatum(&out));
    2197             :             }
    2198             :             else
    2199             :             {
    2200             :                 /* force "match" action (to insert to random subnode) */
    2201           0 :                 out.resultType = spgMatchNode;
    2202             :             }
    2203             : 
    2204    12439468 :             if (innerTuple->allTheSame)
    2205             :             {
    2206             :                 /*
    2207             :                  * It's not allowed to do an AddNode at an allTheSame tuple.
    2208             :                  * Opclass must say "match", in which case we choose a random
    2209             :                  * one of the nodes to descend into, or "split".
    2210             :                  */
    2211      100000 :                 if (out.resultType == spgAddNode)
    2212           0 :                     elog(ERROR, "cannot add a node to an allTheSame inner tuple");
    2213      100000 :                 else if (out.resultType == spgMatchNode)
    2214       99992 :                     out.result.matchNode.nodeN =
    2215       99992 :                         pg_prng_uint64_range(&pg_global_prng_state,
    2216       99992 :                                              0, innerTuple->nNodes - 1);
    2217             :             }
    2218             : 
    2219    12439468 :             switch (out.resultType)
    2220             :             {
    2221    12437944 :                 case spgMatchNode:
    2222             :                     /* Descend to N'th child node */
    2223    12437944 :                     spgMatchNodeAction(index, state, innerTuple,
    2224             :                                        &current, &parent,
    2225             :                                        out.result.matchNode.nodeN);
    2226             :                     /* Adjust level as per opclass request */
    2227    12437944 :                     level += out.result.matchNode.levelAdd;
    2228             :                     /* Replace leafDatum and recompute leafSize */
    2229    12437944 :                     if (!isnull)
    2230             :                     {
    2231    12437944 :                         leafDatums[spgKeyColumn] = out.result.matchNode.restDatum;
    2232    12437944 :                         leafSize = SpGistGetLeafTupleSize(leafDescriptor,
    2233             :                                                           leafDatums, isnulls);
    2234    12437944 :                         leafSize += sizeof(ItemIdData);
    2235             :                     }
    2236             : 
    2237             :                     /*
    2238             :                      * Check new tuple size; fail if it can't fit, unless the
    2239             :                      * opclass says it can handle the situation by suffixing.
    2240             :                      *
    2241             :                      * However, the opclass can only shorten the leaf datum,
    2242             :                      * which may not be enough to ever make the tuple fit,
    2243             :                      * since INCLUDE columns might alone use more than a page.
    2244             :                      * Depending on the opclass' behavior, that could lead to
    2245             :                      * an infinite loop --- spgtextproc.c, for example, will
    2246             :                      * just repeatedly generate an empty-string leaf datum
    2247             :                      * once it runs out of data.  Actual bugs in opclasses
    2248             :                      * might cause infinite looping, too.  To detect such a
    2249             :                      * loop, check to see if we are making progress by
    2250             :                      * reducing the leafSize in each pass.  This is a bit
    2251             :                      * tricky though.  Because of alignment considerations,
    2252             :                      * the total tuple size might not decrease on every pass.
    2253             :                      * Also, there are edge cases where the choose method
    2254             :                      * might seem to not make progress for a cycle or two.
    2255             :                      * Somewhat arbitrarily, we allow up to 10 no-progress
    2256             :                      * iterations before failing.  (This limit should be more
    2257             :                      * than MAXALIGN, to accommodate opclasses that trim one
    2258             :                      * byte from the leaf datum per pass.)
    2259             :                      */
    2260    12437944 :                     if (leafSize > SPGIST_PAGE_CAPACITY)
    2261             :                     {
    2262          52 :                         bool        ok = false;
    2263             : 
    2264          52 :                         if (state->config.longValuesOK && !isnull)
    2265             :                         {
    2266          52 :                             if (leafSize < bestLeafSize)
    2267             :                             {
    2268           4 :                                 ok = true;
    2269           4 :                                 bestLeafSize = leafSize;
    2270           4 :                                 numNoProgressCycles = 0;
    2271             :                             }
    2272          48 :                             else if (++numNoProgressCycles < 10)
    2273          44 :                                 ok = true;
    2274             :                         }
    2275          52 :                         if (!ok)
    2276           4 :                             ereport(ERROR,
    2277             :                                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    2278             :                                      errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
    2279             :                                             leafSize - sizeof(ItemIdData),
    2280             :                                             SPGIST_PAGE_CAPACITY - sizeof(ItemIdData),
    2281             :                                             RelationGetRelationName(index)),
    2282             :                                      errhint("Values larger than a buffer page cannot be indexed.")));
    2283             :                     }
    2284             : 
    2285             :                     /*
    2286             :                      * Loop around and attempt to insert the new leafDatum at
    2287             :                      * "current" (which might reference an existing child
    2288             :                      * tuple, or might be invalid to force us to find a new
    2289             :                      * page for the tuple).
    2290             :                      */
    2291    12437940 :                     break;
    2292        1088 :                 case spgAddNode:
    2293             :                     /* AddNode is not sensible if nodes don't have labels */
    2294        1088 :                     if (in.nodeLabels == NULL)
    2295           0 :                         elog(ERROR, "cannot add a node to an inner tuple without node labels");
    2296             :                     /* Add node to inner tuple, per request */
    2297        1088 :                     spgAddNodeAction(index, state, innerTuple,
    2298             :                                      &current, &parent,
    2299             :                                      out.result.addNode.nodeN,
    2300             :                                      out.result.addNode.nodeLabel);
    2301             : 
    2302             :                     /*
    2303             :                      * Retry insertion into the enlarged node.  We assume that
    2304             :                      * we'll get a MatchNode result this time.
    2305             :                      */
    2306        1088 :                     goto process_inner_tuple;
    2307             :                     break;
    2308         436 :                 case spgSplitTuple:
    2309             :                     /* Split inner tuple, per request */
    2310         436 :                     spgSplitNodeAction(index, state, innerTuple,
    2311             :                                        &current, &out);
    2312             : 
    2313             :                     /* Retry insertion into the split node */
    2314         436 :                     goto process_inner_tuple;
    2315             :                     break;
    2316           0 :                 default:
    2317           0 :                     elog(ERROR, "unrecognized SPGiST choose result: %d",
    2318             :                          (int) out.resultType);
    2319             :                     break;
    2320             :             }
    2321             :         }
    2322             :     }                           /* end loop */
    2323             : 
    2324             :     /*
    2325             :      * Release any buffers we're still holding.  Beware of possibility that
    2326             :      * current and parent reference same buffer.
    2327             :      */
    2328      540832 :     if (current.buffer != InvalidBuffer)
    2329             :     {
    2330      540832 :         SpGistSetLastUsedPage(index, current.buffer);
    2331      540832 :         UnlockReleaseBuffer(current.buffer);
    2332             :     }
    2333      540832 :     if (parent.buffer != InvalidBuffer &&
    2334      529356 :         parent.buffer != current.buffer)
    2335             :     {
    2336      526258 :         SpGistSetLastUsedPage(index, parent.buffer);
    2337      526258 :         UnlockReleaseBuffer(parent.buffer);
    2338             :     }
    2339             : 
    2340             :     /*
    2341             :      * We do not support being called while some outer function is holding a
    2342             :      * buffer lock (or any other reason to postpone query cancels).  If that
    2343             :      * were the case, telling the caller to retry would create an infinite
    2344             :      * loop.
    2345             :      */
    2346             :     Assert(INTERRUPTS_CAN_BE_PROCESSED());
    2347             : 
    2348             :     /*
    2349             :      * Finally, check for interrupts again.  If there was a query cancel,
    2350             :      * ProcessInterrupts() will be able to throw the error here.  If it was
    2351             :      * some other kind of interrupt that can just be cleared, return false to
    2352             :      * tell our caller to retry.
    2353             :      */
    2354      540832 :     CHECK_FOR_INTERRUPTS();
    2355             : 
    2356      540832 :     return result;
    2357             : }

Generated by: LCOV version 1.14