LCOV - code coverage report
Current view: top level - src/backend/access/gist - gist.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 473 581 81.4 %
Date: 2025-10-31 04:18:36 Functions: 14 17 82.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * gist.c
       4             :  *    interface routines for the postgres GiST index access method.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/gist/gist.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/gist_private.h"
      18             : #include "access/gistscan.h"
      19             : #include "access/xloginsert.h"
      20             : #include "catalog/pg_collation.h"
      21             : #include "commands/vacuum.h"
      22             : #include "miscadmin.h"
      23             : #include "nodes/execnodes.h"
      24             : #include "storage/predicate.h"
      25             : #include "utils/fmgrprotos.h"
      26             : #include "utils/index_selfuncs.h"
      27             : #include "utils/memutils.h"
      28             : #include "utils/rel.h"
      29             : 
      30             : /* non-export function prototypes */
      31             : static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate);
      32             : static bool gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
      33             :                             GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum);
      34             : static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
      35             :                              GISTSTATE *giststate,
      36             :                              IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
      37             :                              Buffer leftchild, Buffer rightchild,
      38             :                              bool unlockbuf, bool unlockleftchild);
      39             : static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
      40             :                             GISTSTATE *giststate, List *splitinfo, bool unlockbuf);
      41             : static void gistprunepage(Relation rel, Page page, Buffer buffer,
      42             :                           Relation heapRel);
      43             : 
      44             : 
      45             : #define ROTATEDIST(d) do { \
      46             :     SplitPageLayout *tmp = (SplitPageLayout *) palloc0(sizeof(SplitPageLayout)); \
      47             :     tmp->block.blkno = InvalidBlockNumber;   \
      48             :     tmp->buffer = InvalidBuffer; \
      49             :     tmp->next = (d); \
      50             :     (d)=tmp; \
      51             : } while(0)
      52             : 
      53             : 
      54             : /*
      55             :  * GiST handler function: return IndexAmRoutine with access method parameters
      56             :  * and callbacks.
      57             :  */
      58             : Datum
      59       15372 : gisthandler(PG_FUNCTION_ARGS)
      60             : {
      61       15372 :     IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
      62             : 
      63       15372 :     amroutine->amstrategies = 0;
      64       15372 :     amroutine->amsupport = GISTNProcs;
      65       15372 :     amroutine->amoptsprocnum = GIST_OPTIONS_PROC;
      66       15372 :     amroutine->amcanorder = false;
      67       15372 :     amroutine->amcanorderbyop = true;
      68       15372 :     amroutine->amcanhash = false;
      69       15372 :     amroutine->amconsistentequality = false;
      70       15372 :     amroutine->amconsistentordering = false;
      71       15372 :     amroutine->amcanbackward = false;
      72       15372 :     amroutine->amcanunique = false;
      73       15372 :     amroutine->amcanmulticol = true;
      74       15372 :     amroutine->amoptionalkey = true;
      75       15372 :     amroutine->amsearcharray = false;
      76       15372 :     amroutine->amsearchnulls = true;
      77       15372 :     amroutine->amstorage = true;
      78       15372 :     amroutine->amclusterable = true;
      79       15372 :     amroutine->ampredlocks = true;
      80       15372 :     amroutine->amcanparallel = false;
      81       15372 :     amroutine->amcanbuildparallel = false;
      82       15372 :     amroutine->amcaninclude = true;
      83       15372 :     amroutine->amusemaintenanceworkmem = false;
      84       15372 :     amroutine->amsummarizing = false;
      85       15372 :     amroutine->amparallelvacuumoptions =
      86             :         VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_COND_CLEANUP;
      87       15372 :     amroutine->amkeytype = InvalidOid;
      88             : 
      89       15372 :     amroutine->ambuild = gistbuild;
      90       15372 :     amroutine->ambuildempty = gistbuildempty;
      91       15372 :     amroutine->aminsert = gistinsert;
      92       15372 :     amroutine->aminsertcleanup = NULL;
      93       15372 :     amroutine->ambulkdelete = gistbulkdelete;
      94       15372 :     amroutine->amvacuumcleanup = gistvacuumcleanup;
      95       15372 :     amroutine->amcanreturn = gistcanreturn;
      96       15372 :     amroutine->amcostestimate = gistcostestimate;
      97       15372 :     amroutine->amgettreeheight = NULL;
      98       15372 :     amroutine->amoptions = gistoptions;
      99       15372 :     amroutine->amproperty = gistproperty;
     100       15372 :     amroutine->ambuildphasename = NULL;
     101       15372 :     amroutine->amvalidate = gistvalidate;
     102       15372 :     amroutine->amadjustmembers = gistadjustmembers;
     103       15372 :     amroutine->ambeginscan = gistbeginscan;
     104       15372 :     amroutine->amrescan = gistrescan;
     105       15372 :     amroutine->amgettuple = gistgettuple;
     106       15372 :     amroutine->amgetbitmap = gistgetbitmap;
     107       15372 :     amroutine->amendscan = gistendscan;
     108       15372 :     amroutine->ammarkpos = NULL;
     109       15372 :     amroutine->amrestrpos = NULL;
     110       15372 :     amroutine->amestimateparallelscan = NULL;
     111       15372 :     amroutine->aminitparallelscan = NULL;
     112       15372 :     amroutine->amparallelrescan = NULL;
     113       15372 :     amroutine->amtranslatestrategy = NULL;
     114       15372 :     amroutine->amtranslatecmptype = gisttranslatecmptype;
     115             : 
     116       15372 :     PG_RETURN_POINTER(amroutine);
     117             : }
     118             : 
     119             : /*
     120             :  * Create and return a temporary memory context for use by GiST. We
     121             :  * _always_ invoke user-provided methods in a temporary memory
     122             :  * context, so that memory leaks in those functions cannot cause
     123             :  * problems. Also, we use some additional temporary contexts in the
     124             :  * GiST code itself, to avoid the need to do some awkward manual
     125             :  * memory management.
     126             :  */
     127             : MemoryContext
     128        9782 : createTempGistContext(void)
     129             : {
     130        9782 :     return AllocSetContextCreate(CurrentMemoryContext,
     131             :                                  "GiST temporary context",
     132             :                                  ALLOCSET_DEFAULT_SIZES);
     133             : }
     134             : 
     135             : /*
     136             :  *  gistbuildempty() -- build an empty gist index in the initialization fork
     137             :  */
     138             : void
     139          10 : gistbuildempty(Relation index)
     140             : {
     141             :     Buffer      buffer;
     142             : 
     143             :     /* Initialize the root page */
     144          10 :     buffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
     145             :                                EB_SKIP_EXTENSION_LOCK | EB_LOCK_FIRST);
     146             : 
     147             :     /* Initialize and xlog buffer */
     148          10 :     START_CRIT_SECTION();
     149          10 :     GISTInitBuffer(buffer, F_LEAF);
     150          10 :     MarkBufferDirty(buffer);
     151          10 :     log_newpage_buffer(buffer, true);
     152          10 :     END_CRIT_SECTION();
     153             : 
     154             :     /* Unlock and release the buffer */
     155          10 :     UnlockReleaseBuffer(buffer);
     156          10 : }
     157             : 
     158             : /*
     159             :  *  gistinsert -- wrapper for GiST tuple insertion.
     160             :  *
     161             :  *    This is the public interface routine for tuple insertion in GiSTs.
     162             :  *    It doesn't do any work; just locks the relation and passes the buck.
     163             :  */
     164             : bool
     165      304310 : gistinsert(Relation r, Datum *values, bool *isnull,
     166             :            ItemPointer ht_ctid, Relation heapRel,
     167             :            IndexUniqueCheck checkUnique,
     168             :            bool indexUnchanged,
     169             :            IndexInfo *indexInfo)
     170             : {
     171      304310 :     GISTSTATE  *giststate = (GISTSTATE *) indexInfo->ii_AmCache;
     172             :     IndexTuple  itup;
     173             :     MemoryContext oldCxt;
     174             : 
     175             :     /* Initialize GISTSTATE cache if first call in this statement */
     176      304310 :     if (giststate == NULL)
     177             :     {
     178        1920 :         oldCxt = MemoryContextSwitchTo(indexInfo->ii_Context);
     179        1920 :         giststate = initGISTstate(r);
     180        1920 :         giststate->tempCxt = createTempGistContext();
     181        1920 :         indexInfo->ii_AmCache = giststate;
     182        1920 :         MemoryContextSwitchTo(oldCxt);
     183             :     }
     184             : 
     185      304310 :     oldCxt = MemoryContextSwitchTo(giststate->tempCxt);
     186             : 
     187      304310 :     itup = gistFormTuple(giststate, r, values, isnull, true);
     188      304308 :     itup->t_tid = *ht_ctid;
     189             : 
     190      304308 :     gistdoinsert(r, itup, 0, giststate, heapRel, false);
     191             : 
     192             :     /* cleanup */
     193      304296 :     MemoryContextSwitchTo(oldCxt);
     194      304296 :     MemoryContextReset(giststate->tempCxt);
     195             : 
     196      304296 :     return false;
     197             : }
     198             : 
     199             : 
     200             : /*
     201             :  * Place tuples from 'itup' to 'buffer'. If 'oldoffnum' is valid, the tuple
     202             :  * at that offset is atomically removed along with inserting the new tuples.
     203             :  * This is used to replace a tuple with a new one.
     204             :  *
     205             :  * If 'leftchildbuf' is valid, we're inserting the downlink for the page
     206             :  * to the right of 'leftchildbuf', or updating the downlink for 'leftchildbuf'.
     207             :  * F_FOLLOW_RIGHT flag on 'leftchildbuf' is cleared and NSN is set.
     208             :  *
     209             :  * If 'markfollowright' is true and the page is split, the left child is
     210             :  * marked with F_FOLLOW_RIGHT flag. That is the normal case. During buffered
     211             :  * index build, however, there is no concurrent access and the page splitting
     212             :  * is done in a slightly simpler fashion, and false is passed.
     213             :  *
     214             :  * If there is not enough room on the page, it is split. All the split
     215             :  * pages are kept pinned and locked and returned in *splitinfo, the caller
     216             :  * is responsible for inserting the downlinks for them. However, if
     217             :  * 'buffer' is the root page and it needs to be split, gistplacetopage()
     218             :  * performs the split as one atomic operation, and *splitinfo is set to NIL.
     219             :  * In that case, we continue to hold the root page locked, and the child
     220             :  * pages are released; note that new tuple(s) are *not* on the root page
     221             :  * but in one of the new child pages.
     222             :  *
     223             :  * If 'newblkno' is not NULL, returns the block number of page the first
     224             :  * new/updated tuple was inserted to. Usually it's the given page, but could
     225             :  * be its right sibling if the page was split.
     226             :  *
     227             :  * Returns 'true' if the page was split, 'false' otherwise.
     228             :  */
     229             : bool
     230     1631480 : gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
     231             :                 Buffer buffer,
     232             :                 IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
     233             :                 BlockNumber *newblkno,
     234             :                 Buffer leftchildbuf,
     235             :                 List **splitinfo,
     236             :                 bool markfollowright,
     237             :                 Relation heapRel,
     238             :                 bool is_build)
     239             : {
     240     1631480 :     BlockNumber blkno = BufferGetBlockNumber(buffer);
     241     1631480 :     Page        page = BufferGetPage(buffer);
     242     1631480 :     bool        is_leaf = (GistPageIsLeaf(page)) ? true : false;
     243             :     XLogRecPtr  recptr;
     244             :     bool        is_split;
     245             : 
     246             :     /*
     247             :      * Refuse to modify a page that's incompletely split. This should not
     248             :      * happen because we finish any incomplete splits while we walk down the
     249             :      * tree. However, it's remotely possible that another concurrent inserter
     250             :      * splits a parent page, and errors out before completing the split. We
     251             :      * will just throw an error in that case, and leave any split we had in
     252             :      * progress unfinished too. The next insert that comes along will clean up
     253             :      * the mess.
     254             :      */
     255     1631480 :     if (GistFollowRight(page))
     256           0 :         elog(ERROR, "concurrent GiST page split was incomplete");
     257             : 
     258             :     /* should never try to insert to a deleted page */
     259             :     Assert(!GistPageIsDeleted(page));
     260             : 
     261     1631480 :     *splitinfo = NIL;
     262             : 
     263             :     /*
     264             :      * if isupdate, remove old key: This node's key has been modified, either
     265             :      * because a child split occurred or because we needed to adjust our key
     266             :      * for an insert in a child node. Therefore, remove the old version of
     267             :      * this node's key.
     268             :      *
     269             :      * for WAL replay, in the non-split case we handle this by setting up a
     270             :      * one-element todelete array; in the split case, it's handled implicitly
     271             :      * because the tuple vector passed to gistSplit won't include this tuple.
     272             :      */
     273     1631480 :     is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
     274             : 
     275             :     /*
     276             :      * If leaf page is full, try at first to delete dead tuples. And then
     277             :      * check again.
     278             :      */
     279     1631480 :     if (is_split && GistPageIsLeaf(page) && GistPageHasGarbage(page))
     280             :     {
     281           0 :         gistprunepage(rel, page, buffer, heapRel);
     282           0 :         is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
     283             :     }
     284             : 
     285     1631480 :     if (is_split)
     286             :     {
     287             :         /* no space for insertion */
     288             :         IndexTuple *itvec;
     289             :         int         tlen;
     290       25184 :         SplitPageLayout *dist = NULL,
     291             :                    *ptr;
     292       25184 :         BlockNumber oldrlink = InvalidBlockNumber;
     293       25184 :         GistNSN     oldnsn = 0;
     294             :         SplitPageLayout rootpg;
     295             :         bool        is_rootsplit;
     296             :         int         npage;
     297             : 
     298       25184 :         is_rootsplit = (blkno == GIST_ROOT_BLKNO);
     299             : 
     300             :         /*
     301             :          * Form index tuples vector to split. If we're replacing an old tuple,
     302             :          * remove the old version from the vector.
     303             :          */
     304       25184 :         itvec = gistextractpage(page, &tlen);
     305       25184 :         if (OffsetNumberIsValid(oldoffnum))
     306             :         {
     307             :             /* on inner page we should remove old tuple */
     308        5534 :             int         pos = oldoffnum - FirstOffsetNumber;
     309             : 
     310        5534 :             tlen--;
     311        5534 :             if (pos != tlen)
     312        3528 :                 memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
     313             :         }
     314       25184 :         itvec = gistjoinvector(itvec, &tlen, itup, ntup);
     315       25184 :         dist = gistSplit(rel, page, itvec, tlen, giststate);
     316             : 
     317             :         /*
     318             :          * Check that split didn't produce too many pages.
     319             :          */
     320       25184 :         npage = 0;
     321       75668 :         for (ptr = dist; ptr; ptr = ptr->next)
     322       50484 :             npage++;
     323             :         /* in a root split, we'll add one more page to the list below */
     324       25184 :         if (is_rootsplit)
     325         356 :             npage++;
     326       25184 :         if (npage > GIST_MAX_SPLIT_PAGES)
     327           0 :             elog(ERROR, "GiST page split into too many halves (%d, maximum %d)",
     328             :                  npage, GIST_MAX_SPLIT_PAGES);
     329             : 
     330             :         /*
     331             :          * Set up pages to work with. Allocate new buffers for all but the
     332             :          * leftmost page. The original page becomes the new leftmost page, and
     333             :          * is just replaced with the new contents.
     334             :          *
     335             :          * For a root-split, allocate new buffers for all child pages, the
     336             :          * original page is overwritten with new root page containing
     337             :          * downlinks to the new child pages.
     338             :          */
     339       25184 :         ptr = dist;
     340       25184 :         if (!is_rootsplit)
     341             :         {
     342             :             /* save old rightlink and NSN */
     343       24828 :             oldrlink = GistPageGetOpaque(page)->rightlink;
     344       24828 :             oldnsn = GistPageGetNSN(page);
     345             : 
     346       24828 :             dist->buffer = buffer;
     347       24828 :             dist->block.blkno = BufferGetBlockNumber(buffer);
     348       24828 :             dist->page = PageGetTempPageCopySpecial(BufferGetPage(buffer));
     349             : 
     350             :             /* clean all flags except F_LEAF */
     351       24828 :             GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
     352             : 
     353       24828 :             ptr = ptr->next;
     354             :         }
     355       50840 :         for (; ptr; ptr = ptr->next)
     356             :         {
     357             :             /* Allocate new page */
     358       25656 :             ptr->buffer = gistNewBuffer(rel, heapRel);
     359       25656 :             GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
     360       25656 :             ptr->page = BufferGetPage(ptr->buffer);
     361       25656 :             ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
     362       25656 :             PredicateLockPageSplit(rel,
     363             :                                    BufferGetBlockNumber(buffer),
     364             :                                    BufferGetBlockNumber(ptr->buffer));
     365             :         }
     366             : 
     367             :         /*
     368             :          * Now that we know which blocks the new pages go to, set up downlink
     369             :          * tuples to point to them.
     370             :          */
     371       75668 :         for (ptr = dist; ptr; ptr = ptr->next)
     372             :         {
     373       50484 :             ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
     374       50484 :             GistTupleSetValid(ptr->itup);
     375             :         }
     376             : 
     377             :         /*
     378             :          * If this is a root split, we construct the new root page with the
     379             :          * downlinks here directly, instead of requiring the caller to insert
     380             :          * them. Add the new root page to the list along with the child pages.
     381             :          */
     382       25184 :         if (is_rootsplit)
     383             :         {
     384             :             IndexTuple *downlinks;
     385         356 :             int         ndownlinks = 0;
     386             :             int         i;
     387             : 
     388         356 :             rootpg.buffer = buffer;
     389         356 :             rootpg.page = PageGetTempPageCopySpecial(BufferGetPage(rootpg.buffer));
     390         356 :             GistPageGetOpaque(rootpg.page)->flags = 0;
     391             : 
     392             :             /* Prepare a vector of all the downlinks */
     393        1074 :             for (ptr = dist; ptr; ptr = ptr->next)
     394         718 :                 ndownlinks++;
     395         356 :             downlinks = palloc(sizeof(IndexTuple) * ndownlinks);
     396        1074 :             for (i = 0, ptr = dist; ptr; ptr = ptr->next)
     397         718 :                 downlinks[i++] = ptr->itup;
     398             : 
     399         356 :             rootpg.block.blkno = GIST_ROOT_BLKNO;
     400         356 :             rootpg.block.num = ndownlinks;
     401         356 :             rootpg.list = gistfillitupvec(downlinks, ndownlinks,
     402             :                                           &(rootpg.lenlist));
     403         356 :             rootpg.itup = NULL;
     404             : 
     405         356 :             rootpg.next = dist;
     406         356 :             dist = &rootpg;
     407             :         }
     408             :         else
     409             :         {
     410             :             /* Prepare split-info to be returned to caller */
     411       74594 :             for (ptr = dist; ptr; ptr = ptr->next)
     412             :             {
     413       49766 :                 GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
     414             : 
     415       49766 :                 si->buf = ptr->buffer;
     416       49766 :                 si->downlink = ptr->itup;
     417       49766 :                 *splitinfo = lappend(*splitinfo, si);
     418             :             }
     419             :         }
     420             : 
     421             :         /*
     422             :          * Fill all pages. All the pages are new, ie. freshly allocated empty
     423             :          * pages, or a temporary copy of the old page.
     424             :          */
     425       76024 :         for (ptr = dist; ptr; ptr = ptr->next)
     426             :         {
     427       50840 :             char       *data = (char *) (ptr->list);
     428             : 
     429     1772254 :             for (int i = 0; i < ptr->block.num; i++)
     430             :             {
     431     1721414 :                 IndexTuple  thistup = (IndexTuple) data;
     432             : 
     433     1721414 :                 if (PageAddItem(ptr->page, data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
     434           0 :                     elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(rel));
     435             : 
     436             :                 /*
     437             :                  * If this is the first inserted/updated tuple, let the caller
     438             :                  * know which page it landed on.
     439             :                  */
     440     1721414 :                 if (newblkno && ItemPointerEquals(&thistup->t_tid, &(*itup)->t_tid))
     441         774 :                     *newblkno = ptr->block.blkno;
     442             : 
     443     1721414 :                 data += IndexTupleSize(thistup);
     444             :             }
     445             : 
     446             :             /* Set up rightlinks */
     447       50840 :             if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO)
     448       50600 :                 GistPageGetOpaque(ptr->page)->rightlink =
     449       25300 :                     ptr->next->block.blkno;
     450             :             else
     451       25540 :                 GistPageGetOpaque(ptr->page)->rightlink = oldrlink;
     452             : 
     453             :             /*
     454             :              * Mark the all but the right-most page with the follow-right
     455             :              * flag. It will be cleared as soon as the downlink is inserted
     456             :              * into the parent, but this ensures that if we error out before
     457             :              * that, the index is still consistent. (in buffering build mode,
     458             :              * any error will abort the index build anyway, so this is not
     459             :              * needed.)
     460             :              */
     461       50840 :             if (ptr->next && !is_rootsplit && markfollowright)
     462       24170 :                 GistMarkFollowRight(ptr->page);
     463             :             else
     464       26670 :                 GistClearFollowRight(ptr->page);
     465             : 
     466             :             /*
     467             :              * Copy the NSN of the original page to all pages. The
     468             :              * F_FOLLOW_RIGHT flags ensure that scans will follow the
     469             :              * rightlinks until the downlinks are inserted.
     470             :              */
     471       50840 :             GistPageSetNSN(ptr->page, oldnsn);
     472             :         }
     473             : 
     474             :         /*
     475             :          * gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
     476             :          * insertion for that. NB: The number of pages and data segments
     477             :          * specified here must match the calculations in gistXLogSplit()!
     478             :          */
     479       25184 :         if (!is_build && RelationNeedsWAL(rel))
     480        3462 :             XLogEnsureRecordSpace(npage, 1 + npage * 2);
     481             : 
     482       25184 :         START_CRIT_SECTION();
     483             : 
     484             :         /*
     485             :          * Must mark buffers dirty before XLogInsert, even though we'll still
     486             :          * be changing their opaque fields below.
     487             :          */
     488       76024 :         for (ptr = dist; ptr; ptr = ptr->next)
     489       50840 :             MarkBufferDirty(ptr->buffer);
     490       25184 :         if (BufferIsValid(leftchildbuf))
     491        5354 :             MarkBufferDirty(leftchildbuf);
     492             : 
     493             :         /*
     494             :          * The first page in the chain was a temporary working copy meant to
     495             :          * replace the old page. Copy it over the old page.
     496             :          */
     497       25184 :         PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
     498       25184 :         dist->page = BufferGetPage(dist->buffer);
     499             : 
     500             :         /*
     501             :          * Write the WAL record.
     502             :          *
     503             :          * If we're building a new index, however, we don't WAL-log changes
     504             :          * yet. The LSN-NSN interlock between parent and child requires that
     505             :          * LSNs never move backwards, so set the LSNs to a value that's
     506             :          * smaller than any real or fake unlogged LSN that might be generated
     507             :          * later. (There can't be any concurrent scans during index build, so
     508             :          * we don't need to be able to detect concurrent splits yet.)
     509             :          */
     510       25184 :         if (is_build)
     511       21718 :             recptr = GistBuildLSN;
     512             :         else
     513             :         {
     514        3466 :             if (RelationNeedsWAL(rel))
     515        3462 :                 recptr = gistXLogSplit(is_leaf,
     516             :                                        dist, oldrlink, oldnsn, leftchildbuf,
     517             :                                        markfollowright);
     518             :             else
     519           4 :                 recptr = gistGetFakeLSN(rel);
     520             :         }
     521             : 
     522       76024 :         for (ptr = dist; ptr; ptr = ptr->next)
     523       50840 :             PageSetLSN(ptr->page, recptr);
     524             : 
     525             :         /*
     526             :          * Return the new child buffers to the caller.
     527             :          *
     528             :          * If this was a root split, we've already inserted the downlink
     529             :          * pointers, in the form of a new root page. Therefore we can release
     530             :          * all the new buffers, and keep just the root page locked.
     531             :          */
     532       25184 :         if (is_rootsplit)
     533             :         {
     534        1074 :             for (ptr = dist->next; ptr; ptr = ptr->next)
     535         718 :                 UnlockReleaseBuffer(ptr->buffer);
     536             :         }
     537             :     }
     538             :     else
     539             :     {
     540             :         /*
     541             :          * Enough space.  We always get here if ntup==0.
     542             :          */
     543     1606296 :         START_CRIT_SECTION();
     544             : 
     545             :         /*
     546             :          * Delete old tuple if any, then insert new tuple(s) if any.  If
     547             :          * possible, use the fast path of PageIndexTupleOverwrite.
     548             :          */
     549     1606296 :         if (OffsetNumberIsValid(oldoffnum))
     550             :         {
     551      735792 :             if (ntup == 1)
     552             :             {
     553             :                 /* One-for-one replacement, so use PageIndexTupleOverwrite */
     554      716324 :                 if (!PageIndexTupleOverwrite(page, oldoffnum, *itup, IndexTupleSize(*itup)))
     555           0 :                     elog(ERROR, "failed to add item to index page in \"%s\"",
     556             :                          RelationGetRelationName(rel));
     557             :             }
     558             :             else
     559             :             {
     560             :                 /* Delete old, then append new tuple(s) to page */
     561       19468 :                 PageIndexTupleDelete(page, oldoffnum);
     562       19468 :                 gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
     563             :             }
     564             :         }
     565             :         else
     566             :         {
     567             :             /* Just append new tuples at the end of the page */
     568      870504 :             gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
     569             :         }
     570             : 
     571     1606296 :         MarkBufferDirty(buffer);
     572             : 
     573     1606296 :         if (BufferIsValid(leftchildbuf))
     574       18816 :             MarkBufferDirty(leftchildbuf);
     575             : 
     576     1606296 :         if (is_build)
     577     1111704 :             recptr = GistBuildLSN;
     578             :         else
     579             :         {
     580      494592 :             if (RelationNeedsWAL(rel))
     581      494514 :             {
     582      494514 :                 OffsetNumber ndeloffs = 0,
     583             :                             deloffs[1];
     584             : 
     585      494514 :                 if (OffsetNumberIsValid(oldoffnum))
     586             :                 {
     587      193740 :                     deloffs[0] = oldoffnum;
     588      193740 :                     ndeloffs = 1;
     589             :                 }
     590             : 
     591      494514 :                 recptr = gistXLogUpdate(buffer,
     592             :                                         deloffs, ndeloffs, itup, ntup,
     593             :                                         leftchildbuf);
     594             :             }
     595             :             else
     596          78 :                 recptr = gistGetFakeLSN(rel);
     597             :         }
     598     1606296 :         PageSetLSN(page, recptr);
     599             : 
     600     1606296 :         if (newblkno)
     601      104370 :             *newblkno = blkno;
     602             :     }
     603             : 
     604             :     /*
     605             :      * If we inserted the downlink for a child page, set NSN and clear
     606             :      * F_FOLLOW_RIGHT flag on the left child, so that concurrent scans know to
     607             :      * follow the rightlink if and only if they looked at the parent page
     608             :      * before we inserted the downlink.
     609             :      *
     610             :      * Note that we do this *after* writing the WAL record. That means that
     611             :      * the possible full page image in the WAL record does not include these
     612             :      * changes, and they must be replayed even if the page is restored from
     613             :      * the full page image. There's a chicken-and-egg problem: if we updated
     614             :      * the child pages first, we wouldn't know the recptr of the WAL record
     615             :      * we're about to write.
     616             :      */
     617     1631480 :     if (BufferIsValid(leftchildbuf))
     618             :     {
     619       24170 :         Page        leftpg = BufferGetPage(leftchildbuf);
     620             : 
     621       24170 :         GistPageSetNSN(leftpg, recptr);
     622       24170 :         GistClearFollowRight(leftpg);
     623             : 
     624       24170 :         PageSetLSN(leftpg, recptr);
     625             :     }
     626             : 
     627     1631480 :     END_CRIT_SECTION();
     628             : 
     629     1631480 :     return is_split;
     630             : }
     631             : 
     632             : /*
     633             :  * Workhorse routine for doing insertion into a GiST index. Note that
     634             :  * this routine assumes it is invoked in a short-lived memory context,
     635             :  * so it does not bother releasing palloc'd allocations.
     636             :  */
     637             : void
     638      854626 : gistdoinsert(Relation r, IndexTuple itup, Size freespace,
     639             :              GISTSTATE *giststate, Relation heapRel, bool is_build)
     640             : {
     641             :     ItemId      iid;
     642             :     IndexTuple  idxtuple;
     643             :     GISTInsertStack firststack;
     644             :     GISTInsertStack *stack;
     645             :     GISTInsertState state;
     646      854626 :     bool        xlocked = false;
     647             : 
     648      854626 :     memset(&state, 0, sizeof(GISTInsertState));
     649      854626 :     state.freespace = freespace;
     650      854626 :     state.r = r;
     651      854626 :     state.heapRel = heapRel;
     652      854626 :     state.is_build = is_build;
     653             : 
     654             :     /* Start from the root */
     655      854626 :     firststack.blkno = GIST_ROOT_BLKNO;
     656      854626 :     firststack.lsn = 0;
     657      854626 :     firststack.retry_from_parent = false;
     658      854626 :     firststack.parent = NULL;
     659      854626 :     firststack.downlinkoffnum = InvalidOffsetNumber;
     660      854626 :     state.stack = stack = &firststack;
     661             : 
     662             :     /*
     663             :      * Walk down along the path of smallest penalty, updating the parent
     664             :      * pointers with the key we're inserting as we go. If we crash in the
     665             :      * middle, the tree is consistent, although the possible parent updates
     666             :      * were a waste.
     667             :      */
     668             :     for (;;)
     669             :     {
     670             :         /*
     671             :          * If we split an internal page while descending the tree, we have to
     672             :          * retry at the parent. (Normally, the LSN-NSN interlock below would
     673             :          * also catch this and cause us to retry. But LSNs are not updated
     674             :          * during index build.)
     675             :          */
     676     1978630 :         while (stack->retry_from_parent)
     677             :         {
     678         382 :             if (xlocked)
     679           0 :                 LockBuffer(stack->buffer, GIST_UNLOCK);
     680         382 :             xlocked = false;
     681         382 :             ReleaseBuffer(stack->buffer);
     682         382 :             state.stack = stack = stack->parent;
     683             :         }
     684             : 
     685     1978248 :         if (XLogRecPtrIsInvalid(stack->lsn))
     686     1978074 :             stack->buffer = ReadBuffer(state.r, stack->blkno);
     687             : 
     688             :         /*
     689             :          * Be optimistic and grab shared lock first. Swap it for an exclusive
     690             :          * lock later if we need to update the page.
     691             :          */
     692     1978248 :         if (!xlocked)
     693             :         {
     694     1978246 :             LockBuffer(stack->buffer, GIST_SHARE);
     695     1978246 :             gistcheckpage(state.r, stack->buffer);
     696             :         }
     697             : 
     698     1978248 :         stack->page = BufferGetPage(stack->buffer);
     699     1978248 :         stack->lsn = xlocked ?
     700     1978248 :             PageGetLSN(stack->page) : BufferGetLSNAtomic(stack->buffer);
     701             :         Assert(!RelationNeedsWAL(state.r) || !XLogRecPtrIsInvalid(stack->lsn));
     702             : 
     703             :         /*
     704             :          * If this page was split but the downlink was never inserted to the
     705             :          * parent because the inserting backend crashed before doing that, fix
     706             :          * that now.
     707             :          */
     708     1978248 :         if (GistFollowRight(stack->page))
     709             :         {
     710           0 :             if (!xlocked)
     711             :             {
     712           0 :                 LockBuffer(stack->buffer, GIST_UNLOCK);
     713           0 :                 LockBuffer(stack->buffer, GIST_EXCLUSIVE);
     714           0 :                 xlocked = true;
     715             :                 /* someone might've completed the split when we unlocked */
     716           0 :                 if (!GistFollowRight(stack->page))
     717           0 :                     continue;
     718             :             }
     719           0 :             gistfixsplit(&state, giststate);
     720             : 
     721           0 :             UnlockReleaseBuffer(stack->buffer);
     722           0 :             xlocked = false;
     723           0 :             state.stack = stack = stack->parent;
     724           0 :             continue;
     725             :         }
     726             : 
     727     3101738 :         if ((stack->blkno != GIST_ROOT_BLKNO &&
     728     1123490 :              stack->parent->lsn < GistPageGetNSN(stack->page)) ||
     729     1978248 :             GistPageIsDeleted(stack->page))
     730             :         {
     731             :             /*
     732             :              * Concurrent split or page deletion detected. There's no
     733             :              * guarantee that the downlink for this page is consistent with
     734             :              * the tuple we're inserting anymore, so go back to parent and
     735             :              * rechoose the best child.
     736             :              */
     737           0 :             UnlockReleaseBuffer(stack->buffer);
     738           0 :             xlocked = false;
     739           0 :             state.stack = stack = stack->parent;
     740           0 :             continue;
     741             :         }
     742             : 
     743     1978248 :         if (!GistPageIsLeaf(stack->page))
     744             :         {
     745             :             /*
     746             :              * This is an internal page so continue to walk down the tree.
     747             :              * Find the child node that has the minimum insertion penalty.
     748             :              */
     749             :             BlockNumber childblkno;
     750             :             IndexTuple  newtup;
     751             :             GISTInsertStack *item;
     752             :             OffsetNumber downlinkoffnum;
     753             : 
     754     1123622 :             downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate);
     755     1123622 :             iid = PageGetItemId(stack->page, downlinkoffnum);
     756     1123622 :             idxtuple = (IndexTuple) PageGetItem(stack->page, iid);
     757     1123622 :             childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
     758             : 
     759             :             /*
     760             :              * Check that it's not a leftover invalid tuple from pre-9.1
     761             :              */
     762     1123622 :             if (GistTupleIsInvalid(idxtuple))
     763           0 :                 ereport(ERROR,
     764             :                         (errmsg("index \"%s\" contains an inner tuple marked as invalid",
     765             :                                 RelationGetRelationName(r)),
     766             :                          errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
     767             :                          errhint("Please REINDEX it.")));
     768             : 
     769             :             /*
     770             :              * Check that the key representing the target child node is
     771             :              * consistent with the key we're inserting. Update it if it's not.
     772             :              */
     773     1123622 :             newtup = gistgetadjusted(state.r, idxtuple, itup, giststate);
     774     1123622 :             if (newtup)
     775             :             {
     776             :                 /*
     777             :                  * Swap shared lock for an exclusive one. Beware, the page may
     778             :                  * change while we unlock/lock the page...
     779             :                  */
     780      647552 :                 if (!xlocked)
     781             :                 {
     782      647552 :                     LockBuffer(stack->buffer, GIST_UNLOCK);
     783      647552 :                     LockBuffer(stack->buffer, GIST_EXCLUSIVE);
     784      647552 :                     xlocked = true;
     785      647552 :                     stack->page = BufferGetPage(stack->buffer);
     786             : 
     787      647552 :                     if (PageGetLSN(stack->page) != stack->lsn)
     788             :                     {
     789             :                         /* the page was changed while we unlocked it, retry */
     790           0 :                         continue;
     791             :                     }
     792             :                 }
     793             : 
     794             :                 /*
     795             :                  * Update the tuple.
     796             :                  *
     797             :                  * We still hold the lock after gistinserttuple(), but it
     798             :                  * might have to split the page to make the updated tuple fit.
     799             :                  * In that case the updated tuple might migrate to the other
     800             :                  * half of the split, so we have to go back to the parent and
     801             :                  * descend back to the half that's a better fit for the new
     802             :                  * tuple.
     803             :                  */
     804      647552 :                 if (gistinserttuple(&state, stack, giststate, newtup,
     805             :                                     downlinkoffnum))
     806             :                 {
     807             :                     /*
     808             :                      * If this was a root split, the root page continues to be
     809             :                      * the parent and the updated tuple went to one of the
     810             :                      * child pages, so we just need to retry from the root
     811             :                      * page.
     812             :                      */
     813         174 :                     if (stack->blkno != GIST_ROOT_BLKNO)
     814             :                     {
     815         172 :                         UnlockReleaseBuffer(stack->buffer);
     816         172 :                         xlocked = false;
     817         172 :                         state.stack = stack = stack->parent;
     818             :                     }
     819         174 :                     continue;
     820             :                 }
     821             :             }
     822     1123448 :             LockBuffer(stack->buffer, GIST_UNLOCK);
     823     1123448 :             xlocked = false;
     824             : 
     825             :             /* descend to the chosen child */
     826     1123448 :             item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
     827     1123448 :             item->blkno = childblkno;
     828     1123448 :             item->parent = stack;
     829     1123448 :             item->downlinkoffnum = downlinkoffnum;
     830     1123448 :             state.stack = stack = item;
     831             :         }
     832             :         else
     833             :         {
     834             :             /*
     835             :              * Leaf page. Insert the new key. We've already updated all the
     836             :              * parents on the way down, but we might have to split the page if
     837             :              * it doesn't fit. gistinserttuple() will take care of that.
     838             :              */
     839             : 
     840             :             /*
     841             :              * Swap shared lock for an exclusive one. Be careful, the page may
     842             :              * change while we unlock/lock the page...
     843             :              */
     844      854626 :             if (!xlocked)
     845             :             {
     846      854626 :                 LockBuffer(stack->buffer, GIST_UNLOCK);
     847      854626 :                 LockBuffer(stack->buffer, GIST_EXCLUSIVE);
     848      854626 :                 xlocked = true;
     849      854626 :                 stack->page = BufferGetPage(stack->buffer);
     850      854626 :                 stack->lsn = PageGetLSN(stack->page);
     851             : 
     852      854626 :                 if (stack->blkno == GIST_ROOT_BLKNO)
     853             :                 {
     854             :                     /*
     855             :                      * the only page that can become inner instead of leaf is
     856             :                      * the root page, so for root we should recheck it
     857             :                      */
     858       38802 :                     if (!GistPageIsLeaf(stack->page))
     859             :                     {
     860             :                         /*
     861             :                          * very rare situation: during unlock/lock index with
     862             :                          * number of pages = 1 was increased
     863             :                          */
     864           0 :                         LockBuffer(stack->buffer, GIST_UNLOCK);
     865           0 :                         xlocked = false;
     866           0 :                         continue;
     867             :                     }
     868             : 
     869             :                     /*
     870             :                      * we don't need to check root split, because checking
     871             :                      * leaf/inner is enough to recognize split for root
     872             :                      */
     873             :                 }
     874     1631648 :                 else if ((GistFollowRight(stack->page) ||
     875      815824 :                           stack->parent->lsn < GistPageGetNSN(stack->page)) ||
     876      815824 :                          GistPageIsDeleted(stack->page))
     877             :                 {
     878             :                     /*
     879             :                      * The page was split or deleted while we momentarily
     880             :                      * unlocked the page. Go back to parent.
     881             :                      */
     882           0 :                     UnlockReleaseBuffer(stack->buffer);
     883           0 :                     xlocked = false;
     884           0 :                     state.stack = stack = stack->parent;
     885           0 :                     continue;
     886             :                 }
     887             :             }
     888             : 
     889             :             /* now state.stack->(page, buffer and blkno) points to leaf page */
     890             : 
     891      854626 :             gistinserttuple(&state, stack, giststate, itup,
     892             :                             InvalidOffsetNumber);
     893      854614 :             LockBuffer(stack->buffer, GIST_UNLOCK);
     894             : 
     895             :             /* Release any pins we might still hold before exiting */
     896     2832112 :             for (; stack; stack = stack->parent)
     897     1977498 :                 ReleaseBuffer(stack->buffer);
     898      854614 :             break;
     899             :         }
     900             :     }
     901      854614 : }
     902             : 
     903             : /*
     904             :  * Traverse the tree to find path from root page to specified "child" block.
     905             :  *
     906             :  * returns a new insertion stack, starting from the parent of "child", up
     907             :  * to the root. *downlinkoffnum is set to the offset of the downlink in the
     908             :  * direct parent of child.
     909             :  *
     910             :  * To prevent deadlocks, this should lock only one page at a time.
     911             :  */
     912             : static GISTInsertStack *
     913           2 : gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
     914             : {
     915             :     Page        page;
     916             :     Buffer      buffer;
     917             :     OffsetNumber i,
     918             :                 maxoff;
     919             :     ItemId      iid;
     920             :     IndexTuple  idxtuple;
     921             :     List       *fifo;
     922             :     GISTInsertStack *top,
     923             :                *ptr;
     924             :     BlockNumber blkno;
     925             : 
     926           2 :     top = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
     927           2 :     top->blkno = GIST_ROOT_BLKNO;
     928           2 :     top->downlinkoffnum = InvalidOffsetNumber;
     929             : 
     930           2 :     fifo = list_make1(top);
     931           4 :     while (fifo != NIL)
     932             :     {
     933             :         /* Get next page to visit */
     934           4 :         top = linitial(fifo);
     935           4 :         fifo = list_delete_first(fifo);
     936             : 
     937           4 :         buffer = ReadBuffer(r, top->blkno);
     938           4 :         LockBuffer(buffer, GIST_SHARE);
     939           4 :         gistcheckpage(r, buffer);
     940           4 :         page = BufferGetPage(buffer);
     941             : 
     942           4 :         if (GistPageIsLeaf(page))
     943             :         {
     944             :             /*
     945             :              * Because we scan the index top-down, all the rest of the pages
     946             :              * in the queue must be leaf pages as well.
     947             :              */
     948           0 :             UnlockReleaseBuffer(buffer);
     949           0 :             break;
     950             :         }
     951             : 
     952             :         /* currently, internal pages are never deleted */
     953             :         Assert(!GistPageIsDeleted(page));
     954             : 
     955           4 :         top->lsn = BufferGetLSNAtomic(buffer);
     956             : 
     957             :         /*
     958             :          * If F_FOLLOW_RIGHT is set, the page to the right doesn't have a
     959             :          * downlink. This should not normally happen..
     960             :          */
     961           4 :         if (GistFollowRight(page))
     962           0 :             elog(ERROR, "concurrent GiST page split was incomplete");
     963             : 
     964           4 :         if (top->parent && top->parent->lsn < GistPageGetNSN(page) &&
     965           0 :             GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
     966             :         {
     967             :             /*
     968             :              * Page was split while we looked elsewhere. We didn't see the
     969             :              * downlink to the right page when we scanned the parent, so add
     970             :              * it to the queue now.
     971             :              *
     972             :              * Put the right page ahead of the queue, so that we visit it
     973             :              * next. That's important, because if this is the lowest internal
     974             :              * level, just above leaves, we might already have queued up some
     975             :              * leaf pages, and we assume that there can't be any non-leaf
     976             :              * pages behind leaf pages.
     977             :              */
     978           0 :             ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
     979           0 :             ptr->blkno = GistPageGetOpaque(page)->rightlink;
     980           0 :             ptr->downlinkoffnum = InvalidOffsetNumber;
     981           0 :             ptr->parent = top->parent;
     982             : 
     983           0 :             fifo = lcons(ptr, fifo);
     984             :         }
     985             : 
     986           4 :         maxoff = PageGetMaxOffsetNumber(page);
     987             : 
     988          24 :         for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
     989             :         {
     990          22 :             iid = PageGetItemId(page, i);
     991          22 :             idxtuple = (IndexTuple) PageGetItem(page, iid);
     992          22 :             blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
     993          22 :             if (blkno == child)
     994             :             {
     995             :                 /* Found it! */
     996           2 :                 UnlockReleaseBuffer(buffer);
     997           2 :                 *downlinkoffnum = i;
     998           2 :                 return top;
     999             :             }
    1000             :             else
    1001             :             {
    1002             :                 /* Append this child to the list of pages to visit later */
    1003          20 :                 ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
    1004          20 :                 ptr->blkno = blkno;
    1005          20 :                 ptr->downlinkoffnum = i;
    1006          20 :                 ptr->parent = top;
    1007             : 
    1008          20 :                 fifo = lappend(fifo, ptr);
    1009             :             }
    1010             :         }
    1011             : 
    1012           2 :         UnlockReleaseBuffer(buffer);
    1013             :     }
    1014             : 
    1015           0 :     elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",
    1016             :          RelationGetRelationName(r), child);
    1017             :     return NULL;                /* keep compiler quiet */
    1018             : }
    1019             : 
    1020             : /*
    1021             :  * Updates the stack so that child->parent is the correct parent of the
    1022             :  * child. child->parent must be exclusively locked on entry, and will
    1023             :  * remain so at exit, but it might not be the same page anymore.
    1024             :  */
    1025             : static void
    1026       24172 : gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build)
    1027             : {
    1028       24172 :     GISTInsertStack *parent = child->parent;
    1029             :     ItemId      iid;
    1030             :     IndexTuple  idxtuple;
    1031             :     OffsetNumber maxoff;
    1032             :     GISTInsertStack *ptr;
    1033             : 
    1034       24172 :     gistcheckpage(r, parent->buffer);
    1035       24172 :     parent->page = BufferGetPage(parent->buffer);
    1036       24172 :     maxoff = PageGetMaxOffsetNumber(parent->page);
    1037             : 
    1038             :     /* Check if the downlink is still where it was before */
    1039       24172 :     if (child->downlinkoffnum != InvalidOffsetNumber && child->downlinkoffnum <= maxoff)
    1040             :     {
    1041       24130 :         iid = PageGetItemId(parent->page, child->downlinkoffnum);
    1042       24130 :         idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
    1043       24130 :         if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
    1044       24130 :             return;             /* still there */
    1045             :     }
    1046             : 
    1047             :     /*
    1048             :      * The page has changed since we looked. During normal operation, every
    1049             :      * update of a page changes its LSN, so the LSN we memorized should have
    1050             :      * changed too.
    1051             :      *
    1052             :      * During index build, however, we don't WAL-log the changes until we have
    1053             :      * built the index, so the LSN doesn't change. There is no concurrent
    1054             :      * activity during index build, but we might have changed the parent
    1055             :      * ourselves.
    1056             :      *
    1057             :      * We will also get here if child->downlinkoffnum is invalid. That happens
    1058             :      * if 'parent' had been updated by an earlier call to this function on its
    1059             :      * grandchild, which had to move right.
    1060             :      */
    1061             :     Assert(parent->lsn != PageGetLSN(parent->page) || is_build ||
    1062             :            child->downlinkoffnum == InvalidOffsetNumber);
    1063             : 
    1064             :     /*
    1065             :      * Scan the page to re-find the downlink. If the page was split, it might
    1066             :      * have moved to a different page, so follow the right links until we find
    1067             :      * it.
    1068             :      */
    1069             :     while (true)
    1070          22 :     {
    1071             :         OffsetNumber i;
    1072             : 
    1073          64 :         maxoff = PageGetMaxOffsetNumber(parent->page);
    1074         302 :         for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
    1075             :         {
    1076         278 :             iid = PageGetItemId(parent->page, i);
    1077         278 :             idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
    1078         278 :             if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
    1079             :             {
    1080             :                 /* yes!!, found */
    1081          40 :                 child->downlinkoffnum = i;
    1082          40 :                 return;
    1083             :             }
    1084             :         }
    1085             : 
    1086          24 :         parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
    1087          24 :         parent->downlinkoffnum = InvalidOffsetNumber;
    1088          24 :         UnlockReleaseBuffer(parent->buffer);
    1089          24 :         if (parent->blkno == InvalidBlockNumber)
    1090             :         {
    1091             :             /*
    1092             :              * End of chain and still didn't find parent. It's a very-very
    1093             :              * rare situation when the root was split.
    1094             :              */
    1095           2 :             break;
    1096             :         }
    1097          22 :         parent->buffer = ReadBuffer(r, parent->blkno);
    1098          22 :         LockBuffer(parent->buffer, GIST_EXCLUSIVE);
    1099          22 :         gistcheckpage(r, parent->buffer);
    1100          22 :         parent->page = BufferGetPage(parent->buffer);
    1101             :     }
    1102             : 
    1103             :     /*
    1104             :      * awful!!, we need search tree to find parent ... , but before we should
    1105             :      * release all old parent
    1106             :      */
    1107             : 
    1108           2 :     ptr = child->parent->parent;  /* child->parent already released above */
    1109           2 :     while (ptr)
    1110             :     {
    1111           0 :         ReleaseBuffer(ptr->buffer);
    1112           0 :         ptr = ptr->parent;
    1113             :     }
    1114             : 
    1115             :     /* ok, find new path */
    1116           2 :     ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum);
    1117             : 
    1118             :     /* read all buffers as expected by caller */
    1119             :     /* note we don't lock them or gistcheckpage them here! */
    1120           6 :     while (ptr)
    1121             :     {
    1122           4 :         ptr->buffer = ReadBuffer(r, ptr->blkno);
    1123           4 :         ptr->page = BufferGetPage(ptr->buffer);
    1124           4 :         ptr = ptr->parent;
    1125             :     }
    1126             : 
    1127             :     /* install new chain of parents to stack */
    1128           2 :     child->parent = parent;
    1129             : 
    1130             :     /* make recursive call to normal processing */
    1131           2 :     LockBuffer(child->parent->buffer, GIST_EXCLUSIVE);
    1132           2 :     gistFindCorrectParent(r, child, is_build);
    1133             : }
    1134             : 
    1135             : /*
    1136             :  * Form a downlink pointer for the page in 'buf'.
    1137             :  */
    1138             : static IndexTuple
    1139           0 : gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate,
    1140             :                  GISTInsertStack *stack, bool is_build)
    1141             : {
    1142           0 :     Page        page = BufferGetPage(buf);
    1143             :     OffsetNumber maxoff;
    1144             :     OffsetNumber offset;
    1145           0 :     IndexTuple  downlink = NULL;
    1146             : 
    1147           0 :     maxoff = PageGetMaxOffsetNumber(page);
    1148           0 :     for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))
    1149             :     {
    1150             :         IndexTuple  ituple = (IndexTuple)
    1151           0 :             PageGetItem(page, PageGetItemId(page, offset));
    1152             : 
    1153           0 :         if (downlink == NULL)
    1154           0 :             downlink = CopyIndexTuple(ituple);
    1155             :         else
    1156             :         {
    1157             :             IndexTuple  newdownlink;
    1158             : 
    1159           0 :             newdownlink = gistgetadjusted(rel, downlink, ituple,
    1160             :                                           giststate);
    1161           0 :             if (newdownlink)
    1162           0 :                 downlink = newdownlink;
    1163             :         }
    1164             :     }
    1165             : 
    1166             :     /*
    1167             :      * If the page is completely empty, we can't form a meaningful downlink
    1168             :      * for it. But we have to insert a downlink for the page. Any key will do,
    1169             :      * as long as its consistent with the downlink of parent page, so that we
    1170             :      * can legally insert it to the parent. A minimal one that matches as few
    1171             :      * scans as possible would be best, to keep scans from doing useless work,
    1172             :      * but we don't know how to construct that. So we just use the downlink of
    1173             :      * the original page that was split - that's as far from optimal as it can
    1174             :      * get but will do..
    1175             :      */
    1176           0 :     if (!downlink)
    1177             :     {
    1178             :         ItemId      iid;
    1179             : 
    1180           0 :         LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
    1181           0 :         gistFindCorrectParent(rel, stack, is_build);
    1182           0 :         iid = PageGetItemId(stack->parent->page, stack->downlinkoffnum);
    1183           0 :         downlink = (IndexTuple) PageGetItem(stack->parent->page, iid);
    1184           0 :         downlink = CopyIndexTuple(downlink);
    1185           0 :         LockBuffer(stack->parent->buffer, GIST_UNLOCK);
    1186             :     }
    1187             : 
    1188           0 :     ItemPointerSetBlockNumber(&(downlink->t_tid), BufferGetBlockNumber(buf));
    1189           0 :     GistTupleSetValid(downlink);
    1190             : 
    1191           0 :     return downlink;
    1192             : }
    1193             : 
    1194             : 
    1195             : /*
    1196             :  * Complete the incomplete split of state->stack->page.
    1197             :  */
    1198             : static void
    1199           0 : gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
    1200             : {
    1201           0 :     GISTInsertStack *stack = state->stack;
    1202             :     Buffer      buf;
    1203             :     Page        page;
    1204           0 :     List       *splitinfo = NIL;
    1205             : 
    1206           0 :     ereport(LOG,
    1207             :             (errmsg("fixing incomplete split in index \"%s\", block %u",
    1208             :                     RelationGetRelationName(state->r), stack->blkno)));
    1209             : 
    1210             :     Assert(GistFollowRight(stack->page));
    1211             :     Assert(OffsetNumberIsValid(stack->downlinkoffnum));
    1212             : 
    1213           0 :     buf = stack->buffer;
    1214             : 
    1215             :     /*
    1216             :      * Read the chain of split pages, following the rightlinks. Construct a
    1217             :      * downlink tuple for each page.
    1218             :      */
    1219             :     for (;;)
    1220           0 :     {
    1221           0 :         GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
    1222             :         IndexTuple  downlink;
    1223             : 
    1224           0 :         page = BufferGetPage(buf);
    1225             : 
    1226             :         /* Form the new downlink tuples to insert to parent */
    1227           0 :         downlink = gistformdownlink(state->r, buf, giststate, stack, state->is_build);
    1228             : 
    1229           0 :         si->buf = buf;
    1230           0 :         si->downlink = downlink;
    1231             : 
    1232           0 :         splitinfo = lappend(splitinfo, si);
    1233             : 
    1234           0 :         if (GistFollowRight(page))
    1235             :         {
    1236             :             /* lock next page */
    1237           0 :             buf = ReadBuffer(state->r, GistPageGetOpaque(page)->rightlink);
    1238           0 :             LockBuffer(buf, GIST_EXCLUSIVE);
    1239             :         }
    1240             :         else
    1241           0 :             break;
    1242             :     }
    1243             : 
    1244             :     /* Insert the downlinks */
    1245           0 :     gistfinishsplit(state, stack, giststate, splitinfo, false);
    1246           0 : }
    1247             : 
    1248             : /*
    1249             :  * Insert or replace a tuple in stack->buffer. If 'oldoffnum' is valid, the
    1250             :  * tuple at 'oldoffnum' is replaced, otherwise the tuple is inserted as new.
    1251             :  * 'stack' represents the path from the root to the page being updated.
    1252             :  *
    1253             :  * The caller must hold an exclusive lock on stack->buffer.  The lock is still
    1254             :  * held on return, but the page might not contain the inserted tuple if the
    1255             :  * page was split. The function returns true if the page was split, false
    1256             :  * otherwise.
    1257             :  */
    1258             : static bool
    1259     1502178 : gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
    1260             :                 GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)
    1261             : {
    1262     1502178 :     return gistinserttuples(state, stack, giststate, &tuple, 1, oldoffnum,
    1263             :                             InvalidBuffer, InvalidBuffer, false, false);
    1264             : }
    1265             : 
    1266             : /* ----------------
    1267             :  * An extended workhorse version of gistinserttuple(). This version allows
    1268             :  * inserting multiple tuples, or replacing a single tuple with multiple tuples.
    1269             :  * This is used to recursively update the downlinks in the parent when a page
    1270             :  * is split.
    1271             :  *
    1272             :  * If leftchild and rightchild are valid, we're inserting/replacing the
    1273             :  * downlink for rightchild, and leftchild is its left sibling. We clear the
    1274             :  * F_FOLLOW_RIGHT flag and update NSN on leftchild, atomically with the
    1275             :  * insertion of the downlink.
    1276             :  *
    1277             :  * To avoid holding locks for longer than necessary, when recursing up the
    1278             :  * tree to update the parents, the locking is a bit peculiar here. On entry,
    1279             :  * the caller must hold an exclusive lock on stack->buffer, as well as
    1280             :  * leftchild and rightchild if given. On return:
    1281             :  *
    1282             :  *  - Lock on stack->buffer is released, if 'unlockbuf' is true. The page is
    1283             :  *    always kept pinned, however.
    1284             :  *  - Lock on 'leftchild' is released, if 'unlockleftchild' is true. The page
    1285             :  *    is kept pinned.
    1286             :  *  - Lock and pin on 'rightchild' are always released.
    1287             :  *
    1288             :  * Returns 'true' if the page had to be split. Note that if the page was
    1289             :  * split, the inserted/updated tuples might've been inserted to a right
    1290             :  * sibling of stack->buffer instead of stack->buffer itself.
    1291             :  */
    1292             : static bool
    1293     1526348 : gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
    1294             :                  GISTSTATE *giststate,
    1295             :                  IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
    1296             :                  Buffer leftchild, Buffer rightchild,
    1297             :                  bool unlockbuf, bool unlockleftchild)
    1298             : {
    1299             :     List       *splitinfo;
    1300             :     bool        is_split;
    1301             : 
    1302             :     /*
    1303             :      * Check for any rw conflicts (in serializable isolation level) just
    1304             :      * before we intend to modify the page
    1305             :      */
    1306     1526348 :     CheckForSerializableConflictIn(state->r, NULL, BufferGetBlockNumber(stack->buffer));
    1307             : 
    1308             :     /* Insert the tuple(s) to the page, splitting the page if necessary */
    1309     1526336 :     is_split = gistplacetopage(state->r, state->freespace, giststate,
    1310             :                                stack->buffer,
    1311             :                                tuples, ntup,
    1312             :                                oldoffnum, NULL,
    1313             :                                leftchild,
    1314             :                                &splitinfo,
    1315             :                                true,
    1316             :                                state->heapRel,
    1317     1526336 :                                state->is_build);
    1318             : 
    1319             :     /*
    1320             :      * Before recursing up in case the page was split, release locks on the
    1321             :      * child pages. We don't need to keep them locked when updating the
    1322             :      * parent.
    1323             :      */
    1324     1526336 :     if (BufferIsValid(rightchild))
    1325       24170 :         UnlockReleaseBuffer(rightchild);
    1326     1526336 :     if (BufferIsValid(leftchild) && unlockleftchild)
    1327        5224 :         LockBuffer(leftchild, GIST_UNLOCK);
    1328             : 
    1329             :     /*
    1330             :      * If we had to split, insert/update the downlinks in the parent. If the
    1331             :      * caller requested us to release the lock on stack->buffer, tell
    1332             :      * gistfinishsplit() to do that as soon as it's safe to do so. If we
    1333             :      * didn't have to split, release it ourselves.
    1334             :      */
    1335     1526336 :     if (splitinfo)
    1336       24060 :         gistfinishsplit(state, stack, giststate, splitinfo, unlockbuf);
    1337     1502276 :     else if (unlockbuf)
    1338       18836 :         LockBuffer(stack->buffer, GIST_UNLOCK);
    1339             : 
    1340     1526336 :     return is_split;
    1341             : }
    1342             : 
    1343             : /*
    1344             :  * Finish an incomplete split by inserting/updating the downlinks in parent
    1345             :  * page. 'splitinfo' contains all the child pages involved in the split,
    1346             :  * from left-to-right.
    1347             :  *
    1348             :  * On entry, the caller must hold a lock on stack->buffer and all the child
    1349             :  * pages in 'splitinfo'. If 'unlockbuf' is true, the lock on stack->buffer is
    1350             :  * released on return. The child pages are always unlocked and unpinned.
    1351             :  */
    1352             : static void
    1353       24060 : gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
    1354             :                 GISTSTATE *giststate, List *splitinfo, bool unlockbuf)
    1355             : {
    1356             :     GISTPageSplitInfo *right;
    1357             :     GISTPageSplitInfo *left;
    1358             :     IndexTuple  tuples[2];
    1359             : 
    1360             :     /* A split always contains at least two halves */
    1361             :     Assert(list_length(splitinfo) >= 2);
    1362             : 
    1363             :     /*
    1364             :      * We need to insert downlinks for each new page, and update the downlink
    1365             :      * for the original (leftmost) page in the split. Begin at the rightmost
    1366             :      * page, inserting one downlink at a time until there's only two pages
    1367             :      * left. Finally insert the downlink for the last new page and update the
    1368             :      * downlink for the original page as one operation.
    1369             :      */
    1370       24060 :     LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
    1371             : 
    1372             :     /*
    1373             :      * Insert downlinks for the siblings from right to left, until there are
    1374             :      * only two siblings left.
    1375             :      */
    1376       24170 :     for (int pos = list_length(splitinfo) - 1; pos > 1; pos--)
    1377             :     {
    1378         110 :         right = (GISTPageSplitInfo *) list_nth(splitinfo, pos);
    1379         110 :         left = (GISTPageSplitInfo *) list_nth(splitinfo, pos - 1);
    1380             : 
    1381         110 :         gistFindCorrectParent(state->r, stack, state->is_build);
    1382         110 :         if (gistinserttuples(state, stack->parent, giststate,
    1383             :                              &right->downlink, 1,
    1384             :                              InvalidOffsetNumber,
    1385             :                              left->buf, right->buf, false, false))
    1386             :         {
    1387             :             /*
    1388             :              * If the parent page was split, the existing downlink might have
    1389             :              * moved.
    1390             :              */
    1391          12 :             stack->downlinkoffnum = InvalidOffsetNumber;
    1392             :         }
    1393             :         /* gistinserttuples() released the lock on right->buf. */
    1394             :     }
    1395             : 
    1396       24060 :     right = (GISTPageSplitInfo *) lsecond(splitinfo);
    1397       24060 :     left = (GISTPageSplitInfo *) linitial(splitinfo);
    1398             : 
    1399             :     /*
    1400             :      * Finally insert downlink for the remaining right page and update the
    1401             :      * downlink for the original page to not contain the tuples that were
    1402             :      * moved to the new pages.
    1403             :      */
    1404       24060 :     tuples[0] = left->downlink;
    1405       24060 :     tuples[1] = right->downlink;
    1406       24060 :     gistFindCorrectParent(state->r, stack, state->is_build);
    1407       24060 :     (void) gistinserttuples(state, stack->parent, giststate,
    1408             :                             tuples, 2,
    1409       24060 :                             stack->downlinkoffnum,
    1410             :                             left->buf, right->buf,
    1411             :                             true,   /* Unlock parent */
    1412             :                             unlockbuf   /* Unlock stack->buffer if caller
    1413             :                                          * wants that */
    1414             :         );
    1415             : 
    1416             :     /*
    1417             :      * The downlink might have moved when we updated it. Even if the page
    1418             :      * wasn't split, because gistinserttuples() implements updating the old
    1419             :      * tuple by removing and re-inserting it!
    1420             :      */
    1421       24060 :     stack->downlinkoffnum = InvalidOffsetNumber;
    1422             : 
    1423             :     Assert(left->buf == stack->buffer);
    1424             : 
    1425             :     /*
    1426             :      * If we split the page because we had to adjust the downlink on an
    1427             :      * internal page, while descending the tree for inserting a new tuple,
    1428             :      * then this might no longer be the correct page for the new tuple. The
    1429             :      * downlink to this page might not cover the new tuple anymore, it might
    1430             :      * need to go to the newly-created right sibling instead. Tell the caller
    1431             :      * to walk back up the stack, to re-check at the parent which page to
    1432             :      * insert to.
    1433             :      *
    1434             :      * Normally, the LSN-NSN interlock during the tree descend would also
    1435             :      * detect that a concurrent split happened (by ourselves), and cause us to
    1436             :      * retry at the parent. But that mechanism doesn't work during index
    1437             :      * build, because we don't do WAL-logging, and don't update LSNs, during
    1438             :      * index build.
    1439             :      */
    1440       24060 :     stack->retry_from_parent = true;
    1441       24060 : }
    1442             : 
    1443             : /*
    1444             :  * gistSplit -- split a page in the tree and fill struct
    1445             :  * used for XLOG and real writes buffers. Function is recursive, ie
    1446             :  * it will split page until keys will fit in every page.
    1447             :  */
    1448             : SplitPageLayout *
    1449       26742 : gistSplit(Relation r,
    1450             :           Page page,
    1451             :           IndexTuple *itup,     /* contains compressed entry */
    1452             :           int len,
    1453             :           GISTSTATE *giststate)
    1454             : {
    1455             :     IndexTuple *lvectup,
    1456             :                *rvectup;
    1457             :     GistSplitVector v;
    1458             :     int         i;
    1459       26742 :     SplitPageLayout *res = NULL;
    1460             : 
    1461             :     /* this should never recurse very deeply, but better safe than sorry */
    1462       26742 :     check_stack_depth();
    1463             : 
    1464             :     /* there's no point in splitting an empty page */
    1465             :     Assert(len > 0);
    1466             : 
    1467             :     /*
    1468             :      * If a single tuple doesn't fit on a page, no amount of splitting will
    1469             :      * help.
    1470             :      */
    1471       26742 :     if (len == 1)
    1472           0 :         ereport(ERROR,
    1473             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1474             :                  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
    1475             :                         IndexTupleSize(itup[0]), GiSTPageSize,
    1476             :                         RelationGetRelationName(r))));
    1477             : 
    1478       26742 :     memset(v.spl_lisnull, true,
    1479       26742 :            sizeof(bool) * giststate->nonLeafTupdesc->natts);
    1480       26742 :     memset(v.spl_risnull, true,
    1481       26742 :            sizeof(bool) * giststate->nonLeafTupdesc->natts);
    1482       26742 :     gistSplitByKey(r, page, itup, len, giststate, &v, 0);
    1483             : 
    1484             :     /* form left and right vector */
    1485       26742 :     lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
    1486       26742 :     rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
    1487             : 
    1488     1145500 :     for (i = 0; i < v.splitVector.spl_nleft; i++)
    1489     1118758 :         lvectup[i] = itup[v.splitVector.spl_left[i] - 1];
    1490             : 
    1491     1277604 :     for (i = 0; i < v.splitVector.spl_nright; i++)
    1492     1250862 :         rvectup[i] = itup[v.splitVector.spl_right[i] - 1];
    1493             : 
    1494             :     /* finalize splitting (may need another split) */
    1495       26742 :     if (!gistfitpage(rvectup, v.splitVector.spl_nright))
    1496             :     {
    1497         698 :         res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate);
    1498             :     }
    1499             :     else
    1500             :     {
    1501       26044 :         ROTATEDIST(res);
    1502       26044 :         res->block.num = v.splitVector.spl_nright;
    1503       26044 :         res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist));
    1504       26044 :         res->itup = gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false);
    1505             :     }
    1506             : 
    1507       26742 :     if (!gistfitpage(lvectup, v.splitVector.spl_nleft))
    1508             :     {
    1509             :         SplitPageLayout *resptr,
    1510             :                    *subres;
    1511             : 
    1512         496 :         resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate);
    1513             : 
    1514             :         /* install on list's tail */
    1515        1234 :         while (resptr->next)
    1516         738 :             resptr = resptr->next;
    1517             : 
    1518         496 :         resptr->next = res;
    1519         496 :         res = subres;
    1520             :     }
    1521             :     else
    1522             :     {
    1523       26246 :         ROTATEDIST(res);
    1524       26246 :         res->block.num = v.splitVector.spl_nleft;
    1525       26246 :         res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist));
    1526       26246 :         res->itup = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false);
    1527             :     }
    1528             : 
    1529       26742 :     return res;
    1530             : }
    1531             : 
    1532             : /*
    1533             :  * Create a GISTSTATE and fill it with information about the index
    1534             :  */
    1535             : GISTSTATE *
    1536        9372 : initGISTstate(Relation index)
    1537             : {
    1538             :     GISTSTATE  *giststate;
    1539             :     MemoryContext scanCxt;
    1540             :     MemoryContext oldCxt;
    1541             :     int         i;
    1542             : 
    1543             :     /* safety check to protect fixed-size arrays in GISTSTATE */
    1544        9372 :     if (index->rd_att->natts > INDEX_MAX_KEYS)
    1545           0 :         elog(ERROR, "numberOfAttributes %d > %d",
    1546             :              index->rd_att->natts, INDEX_MAX_KEYS);
    1547             : 
    1548             :     /* Create the memory context that will hold the GISTSTATE */
    1549        9372 :     scanCxt = AllocSetContextCreate(CurrentMemoryContext,
    1550             :                                     "GiST scan context",
    1551             :                                     ALLOCSET_DEFAULT_SIZES);
    1552        9372 :     oldCxt = MemoryContextSwitchTo(scanCxt);
    1553             : 
    1554             :     /* Create and fill in the GISTSTATE */
    1555        9372 :     giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
    1556             : 
    1557        9372 :     giststate->scanCxt = scanCxt;
    1558        9372 :     giststate->tempCxt = scanCxt;    /* caller must change this if needed */
    1559        9372 :     giststate->leafTupdesc = index->rd_att;
    1560             : 
    1561             :     /*
    1562             :      * The truncated tupdesc for non-leaf index tuples, which doesn't contain
    1563             :      * the INCLUDE attributes.
    1564             :      *
    1565             :      * It is used to form tuples during tuple adjustment and page split.
    1566             :      * B-tree creates shortened tuple descriptor for every truncated tuple,
    1567             :      * because it is doing this less often: it does not have to form truncated
    1568             :      * tuples during page split.  Also, B-tree is not adjusting tuples on
    1569             :      * internal pages the way GiST does.
    1570             :      */
    1571       18744 :     giststate->nonLeafTupdesc = CreateTupleDescTruncatedCopy(index->rd_att,
    1572        9372 :                                                              IndexRelationGetNumberOfKeyAttributes(index));
    1573             : 
    1574       24854 :     for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(index); i++)
    1575             :     {
    1576       15482 :         fmgr_info_copy(&(giststate->consistentFn[i]),
    1577       15482 :                        index_getprocinfo(index, i + 1, GIST_CONSISTENT_PROC),
    1578             :                        scanCxt);
    1579       15482 :         fmgr_info_copy(&(giststate->unionFn[i]),
    1580       15482 :                        index_getprocinfo(index, i + 1, GIST_UNION_PROC),
    1581             :                        scanCxt);
    1582             : 
    1583             :         /* opclasses are not required to provide a Compress method */
    1584       15482 :         if (OidIsValid(index_getprocid(index, i + 1, GIST_COMPRESS_PROC)))
    1585        5390 :             fmgr_info_copy(&(giststate->compressFn[i]),
    1586        5390 :                            index_getprocinfo(index, i + 1, GIST_COMPRESS_PROC),
    1587             :                            scanCxt);
    1588             :         else
    1589       10092 :             giststate->compressFn[i].fn_oid = InvalidOid;
    1590             : 
    1591             :         /* opclasses are not required to provide a Decompress method */
    1592       15482 :         if (OidIsValid(index_getprocid(index, i + 1, GIST_DECOMPRESS_PROC)))
    1593        1622 :             fmgr_info_copy(&(giststate->decompressFn[i]),
    1594        1622 :                            index_getprocinfo(index, i + 1, GIST_DECOMPRESS_PROC),
    1595             :                            scanCxt);
    1596             :         else
    1597       13860 :             giststate->decompressFn[i].fn_oid = InvalidOid;
    1598             : 
    1599       15482 :         fmgr_info_copy(&(giststate->penaltyFn[i]),
    1600       15482 :                        index_getprocinfo(index, i + 1, GIST_PENALTY_PROC),
    1601             :                        scanCxt);
    1602       15482 :         fmgr_info_copy(&(giststate->picksplitFn[i]),
    1603       15482 :                        index_getprocinfo(index, i + 1, GIST_PICKSPLIT_PROC),
    1604             :                        scanCxt);
    1605       15482 :         fmgr_info_copy(&(giststate->equalFn[i]),
    1606       15482 :                        index_getprocinfo(index, i + 1, GIST_EQUAL_PROC),
    1607             :                        scanCxt);
    1608             : 
    1609             :         /* opclasses are not required to provide a Distance method */
    1610       15482 :         if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_PROC)))
    1611        1914 :             fmgr_info_copy(&(giststate->distanceFn[i]),
    1612        1914 :                            index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC),
    1613             :                            scanCxt);
    1614             :         else
    1615       13568 :             giststate->distanceFn[i].fn_oid = InvalidOid;
    1616             : 
    1617             :         /* opclasses are not required to provide a Fetch method */
    1618       15482 :         if (OidIsValid(index_getprocid(index, i + 1, GIST_FETCH_PROC)))
    1619        1378 :             fmgr_info_copy(&(giststate->fetchFn[i]),
    1620        1378 :                            index_getprocinfo(index, i + 1, GIST_FETCH_PROC),
    1621             :                            scanCxt);
    1622             :         else
    1623       14104 :             giststate->fetchFn[i].fn_oid = InvalidOid;
    1624             : 
    1625             :         /*
    1626             :          * If the index column has a specified collation, we should honor that
    1627             :          * while doing comparisons.  However, we may have a collatable storage
    1628             :          * type for a noncollatable indexed data type.  If there's no index
    1629             :          * collation then specify default collation in case the support
    1630             :          * functions need collation.  This is harmless if the support
    1631             :          * functions don't care about collation, so we just do it
    1632             :          * unconditionally.  (We could alternatively call get_typcollation,
    1633             :          * but that seems like expensive overkill --- there aren't going to be
    1634             :          * any cases where a GiST storage type has a nondefault collation.)
    1635             :          */
    1636       15482 :         if (OidIsValid(index->rd_indcollation[i]))
    1637         194 :             giststate->supportCollation[i] = index->rd_indcollation[i];
    1638             :         else
    1639       15288 :             giststate->supportCollation[i] = DEFAULT_COLLATION_OID;
    1640             :     }
    1641             : 
    1642             :     /* No opclass information for INCLUDE attributes */
    1643        9874 :     for (; i < index->rd_att->natts; i++)
    1644             :     {
    1645         502 :         giststate->consistentFn[i].fn_oid = InvalidOid;
    1646         502 :         giststate->unionFn[i].fn_oid = InvalidOid;
    1647         502 :         giststate->compressFn[i].fn_oid = InvalidOid;
    1648         502 :         giststate->decompressFn[i].fn_oid = InvalidOid;
    1649         502 :         giststate->penaltyFn[i].fn_oid = InvalidOid;
    1650         502 :         giststate->picksplitFn[i].fn_oid = InvalidOid;
    1651         502 :         giststate->equalFn[i].fn_oid = InvalidOid;
    1652         502 :         giststate->distanceFn[i].fn_oid = InvalidOid;
    1653         502 :         giststate->fetchFn[i].fn_oid = InvalidOid;
    1654         502 :         giststate->supportCollation[i] = InvalidOid;
    1655             :     }
    1656             : 
    1657        9372 :     MemoryContextSwitchTo(oldCxt);
    1658             : 
    1659        9372 :     return giststate;
    1660             : }
    1661             : 
    1662             : void
    1663        7164 : freeGISTstate(GISTSTATE *giststate)
    1664             : {
    1665             :     /* It's sufficient to delete the scanCxt */
    1666        7164 :     MemoryContextDelete(giststate->scanCxt);
    1667        7164 : }
    1668             : 
    1669             : /*
    1670             :  * gistprunepage() -- try to remove LP_DEAD items from the given page.
    1671             :  * Function assumes that buffer is exclusively locked.
    1672             :  */
    1673             : static void
    1674           0 : gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel)
    1675             : {
    1676             :     OffsetNumber deletable[MaxIndexTuplesPerPage];
    1677           0 :     int         ndeletable = 0;
    1678             :     OffsetNumber offnum,
    1679             :                 maxoff;
    1680             : 
    1681             :     Assert(GistPageIsLeaf(page));
    1682             : 
    1683             :     /*
    1684             :      * Scan over all items to see which ones need to be deleted according to
    1685             :      * LP_DEAD flags.
    1686             :      */
    1687           0 :     maxoff = PageGetMaxOffsetNumber(page);
    1688           0 :     for (offnum = FirstOffsetNumber;
    1689           0 :          offnum <= maxoff;
    1690           0 :          offnum = OffsetNumberNext(offnum))
    1691             :     {
    1692           0 :         ItemId      itemId = PageGetItemId(page, offnum);
    1693             : 
    1694           0 :         if (ItemIdIsDead(itemId))
    1695           0 :             deletable[ndeletable++] = offnum;
    1696             :     }
    1697             : 
    1698           0 :     if (ndeletable > 0)
    1699             :     {
    1700           0 :         TransactionId snapshotConflictHorizon = InvalidTransactionId;
    1701             : 
    1702           0 :         if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
    1703             :             snapshotConflictHorizon =
    1704           0 :                 index_compute_xid_horizon_for_tuples(rel, heapRel, buffer,
    1705             :                                                      deletable, ndeletable);
    1706             : 
    1707           0 :         START_CRIT_SECTION();
    1708             : 
    1709           0 :         PageIndexMultiDelete(page, deletable, ndeletable);
    1710             : 
    1711             :         /*
    1712             :          * Mark the page as not containing any LP_DEAD items.  This is not
    1713             :          * certainly true (there might be some that have recently been marked,
    1714             :          * but weren't included in our target-item list), but it will almost
    1715             :          * always be true and it doesn't seem worth an additional page scan to
    1716             :          * check it. Remember that F_HAS_GARBAGE is only a hint anyway.
    1717             :          */
    1718           0 :         GistClearPageHasGarbage(page);
    1719             : 
    1720           0 :         MarkBufferDirty(buffer);
    1721             : 
    1722             :         /* XLOG stuff */
    1723           0 :         if (RelationNeedsWAL(rel))
    1724           0 :         {
    1725             :             XLogRecPtr  recptr;
    1726             : 
    1727           0 :             recptr = gistXLogDelete(buffer,
    1728             :                                     deletable, ndeletable,
    1729             :                                     snapshotConflictHorizon,
    1730             :                                     heapRel);
    1731             : 
    1732           0 :             PageSetLSN(page, recptr);
    1733             :         }
    1734             :         else
    1735           0 :             PageSetLSN(page, gistGetFakeLSN(rel));
    1736             : 
    1737           0 :         END_CRIT_SECTION();
    1738             :     }
    1739             : 
    1740             :     /*
    1741             :      * Note: if we didn't find any LP_DEAD items, then the page's
    1742             :      * F_HAS_GARBAGE hint bit is falsely set.  We do not bother expending a
    1743             :      * separate write to clear it, however.  We will clear it when we split
    1744             :      * the page.
    1745             :      */
    1746           0 : }

Generated by: LCOV version 1.16