LCOV - code coverage report
Current view: top level - src/backend/access/gist - gistbuild.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 56 289 19.4 %
Date: 2019-11-15 23:07:02 Functions: 2 15 13.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * gistbuild.c
       4             :  *    build algorithm for GiST indexes implementation.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/gist/gistbuild.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include <math.h>
      18             : 
      19             : #include "access/genam.h"
      20             : #include "access/gist_private.h"
      21             : #include "access/gistxlog.h"
      22             : #include "access/tableam.h"
      23             : #include "access/xloginsert.h"
      24             : #include "catalog/index.h"
      25             : #include "miscadmin.h"
      26             : #include "optimizer/optimizer.h"
      27             : #include "storage/bufmgr.h"
      28             : #include "storage/smgr.h"
      29             : #include "utils/memutils.h"
      30             : #include "utils/rel.h"
      31             : 
      32             : /* Step of index tuples for check whether to switch to buffering build mode */
      33             : #define BUFFERING_MODE_SWITCH_CHECK_STEP 256
      34             : 
      35             : /*
      36             :  * Number of tuples to process in the slow way before switching to buffering
      37             :  * mode, when buffering is explicitly turned on. Also, the number of tuples
      38             :  * to process between readjusting the buffer size parameter, while in
      39             :  * buffering mode.
      40             :  */
      41             : #define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET 4096
      42             : 
      43             : typedef enum
      44             : {
      45             :     GIST_BUFFERING_DISABLED,    /* in regular build mode and aren't going to
      46             :                                  * switch */
      47             :     GIST_BUFFERING_AUTO,        /* in regular build mode, but will switch to
      48             :                                  * buffering build mode if the index grows too
      49             :                                  * big */
      50             :     GIST_BUFFERING_STATS,       /* gathering statistics of index tuple size
      51             :                                  * before switching to the buffering build
      52             :                                  * mode */
      53             :     GIST_BUFFERING_ACTIVE       /* in buffering build mode */
      54             : } GistBufferingMode;
      55             : 
      56             : /* Working state for gistbuild and its callback */
      57             : typedef struct
      58             : {
      59             :     Relation    indexrel;
      60             :     Relation    heaprel;
      61             :     GISTSTATE  *giststate;
      62             : 
      63             :     int64       indtuples;      /* number of tuples indexed */
      64             :     int64       indtuplesSize;  /* total size of all indexed tuples */
      65             : 
      66             :     Size        freespace;      /* amount of free space to leave on pages */
      67             : 
      68             :     /*
      69             :      * Extra data structures used during a buffering build. 'gfbb' contains
      70             :      * information related to managing the build buffers. 'parentMap' is a
      71             :      * lookup table of the parent of each internal page.
      72             :      */
      73             :     GISTBuildBuffers *gfbb;
      74             :     HTAB       *parentMap;
      75             : 
      76             :     GistBufferingMode bufferingMode;
      77             : } GISTBuildState;
      78             : 
      79             : /* prototypes for private functions */
      80             : static void gistInitBuffering(GISTBuildState *buildstate);
      81             : static int  calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep);
      82             : static void gistBuildCallback(Relation index,
      83             :                               ItemPointer tid,
      84             :                               Datum *values,
      85             :                               bool *isnull,
      86             :                               bool tupleIsAlive,
      87             :                               void *state);
      88             : static void gistBufferingBuildInsert(GISTBuildState *buildstate,
      89             :                                      IndexTuple itup);
      90             : static bool gistProcessItup(GISTBuildState *buildstate, IndexTuple itup,
      91             :                             BlockNumber startblkno, int startlevel);
      92             : static BlockNumber gistbufferinginserttuples(GISTBuildState *buildstate,
      93             :                                              Buffer buffer, int level,
      94             :                                              IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
      95             :                                              BlockNumber parentblk, OffsetNumber downlinkoffnum);
      96             : static Buffer gistBufferingFindCorrectParent(GISTBuildState *buildstate,
      97             :                                              BlockNumber childblkno, int level,
      98             :                                              BlockNumber *parentblk,
      99             :                                              OffsetNumber *downlinkoffnum);
     100             : static void gistProcessEmptyingQueue(GISTBuildState *buildstate);
     101             : static void gistEmptyAllBuffers(GISTBuildState *buildstate);
     102             : static int  gistGetMaxLevel(Relation index);
     103             : 
     104             : static void gistInitParentMap(GISTBuildState *buildstate);
     105             : static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child,
     106             :                                BlockNumber parent);
     107             : static void gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parent);
     108             : static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child);
     109             : 
     110             : /*
     111             :  * Main entry point to GiST index build. Initially calls insert over and over,
     112             :  * but switches to more efficient buffering build algorithm after a certain
     113             :  * number of tuples (unless buffering mode is disabled).
     114             :  */
     115             : IndexBuildResult *
     116         344 : gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     117             : {
     118             :     IndexBuildResult *result;
     119             :     double      reltuples;
     120             :     GISTBuildState buildstate;
     121             :     Buffer      buffer;
     122             :     Page        page;
     123         344 :     MemoryContext oldcxt = CurrentMemoryContext;
     124             :     int         fillfactor;
     125             : 
     126         344 :     buildstate.indexrel = index;
     127         344 :     buildstate.heaprel = heap;
     128             : 
     129         344 :     if (index->rd_options)
     130             :     {
     131             :         /* Get buffering mode from the options string */
     132          18 :         GiSTOptions *options = (GiSTOptions *) index->rd_options;
     133             : 
     134          18 :         if (options->buffering_mode == GIST_OPTION_BUFFERING_ON)
     135           4 :             buildstate.bufferingMode = GIST_BUFFERING_STATS;
     136          14 :         else if (options->buffering_mode == GIST_OPTION_BUFFERING_OFF)
     137           4 :             buildstate.bufferingMode = GIST_BUFFERING_DISABLED;
     138             :         else
     139          10 :             buildstate.bufferingMode = GIST_BUFFERING_AUTO;
     140             : 
     141          18 :         fillfactor = options->fillfactor;
     142             :     }
     143             :     else
     144             :     {
     145             :         /*
     146             :          * By default, switch to buffering mode when the index grows too large
     147             :          * to fit in cache.
     148             :          */
     149         326 :         buildstate.bufferingMode = GIST_BUFFERING_AUTO;
     150         326 :         fillfactor = GIST_DEFAULT_FILLFACTOR;
     151             :     }
     152             :     /* Calculate target amount of free space to leave on pages */
     153         344 :     buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100;
     154             : 
     155             :     /*
     156             :      * We expect to be called exactly once for any index relation. If that's
     157             :      * not the case, big trouble's what we have.
     158             :      */
     159         344 :     if (RelationGetNumberOfBlocks(index) != 0)
     160           0 :         elog(ERROR, "index \"%s\" already contains data",
     161             :              RelationGetRelationName(index));
     162             : 
     163             :     /* no locking is needed */
     164         344 :     buildstate.giststate = initGISTstate(index);
     165             : 
     166             :     /*
     167             :      * Create a temporary memory context that is reset once for each tuple
     168             :      * processed.  (Note: we don't bother to make this a child of the
     169             :      * giststate's scanCxt, so we have to delete it separately at the end.)
     170             :      */
     171         344 :     buildstate.giststate->tempCxt = createTempGistContext();
     172             : 
     173             :     /* initialize the root page */
     174         344 :     buffer = gistNewBuffer(index);
     175             :     Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
     176         344 :     page = BufferGetPage(buffer);
     177             : 
     178         344 :     START_CRIT_SECTION();
     179             : 
     180         344 :     GISTInitBuffer(buffer, F_LEAF);
     181             : 
     182         344 :     MarkBufferDirty(buffer);
     183         344 :     PageSetLSN(page, GistBuildLSN);
     184             : 
     185         344 :     UnlockReleaseBuffer(buffer);
     186             : 
     187         344 :     END_CRIT_SECTION();
     188             : 
     189             :     /* build the index */
     190         344 :     buildstate.indtuples = 0;
     191         344 :     buildstate.indtuplesSize = 0;
     192             : 
     193             :     /*
     194             :      * Do the heap scan.
     195             :      */
     196         344 :     reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
     197             :                                        gistBuildCallback,
     198             :                                        (void *) &buildstate, NULL);
     199             : 
     200             :     /*
     201             :      * If buffering was used, flush out all the tuples that are still in the
     202             :      * buffers.
     203             :      */
     204         344 :     if (buildstate.bufferingMode == GIST_BUFFERING_ACTIVE)
     205             :     {
     206           0 :         elog(DEBUG1, "all tuples processed, emptying buffers");
     207           0 :         gistEmptyAllBuffers(&buildstate);
     208           0 :         gistFreeBuildBuffers(buildstate.gfbb);
     209             :     }
     210             : 
     211             :     /* okay, all heap tuples are indexed */
     212         344 :     MemoryContextSwitchTo(oldcxt);
     213         344 :     MemoryContextDelete(buildstate.giststate->tempCxt);
     214             : 
     215         344 :     freeGISTstate(buildstate.giststate);
     216             : 
     217             :     /*
     218             :      * We didn't write WAL records as we built the index, so if WAL-logging is
     219             :      * required, write all pages to the WAL now.
     220             :      */
     221         344 :     if (RelationNeedsWAL(index))
     222             :     {
     223         328 :         log_newpage_range(index, MAIN_FORKNUM,
     224             :                           0, RelationGetNumberOfBlocks(index),
     225             :                           true);
     226             :     }
     227             : 
     228             :     /*
     229             :      * Return statistics
     230             :      */
     231         344 :     result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
     232             : 
     233         344 :     result->heap_tuples = reltuples;
     234         344 :     result->index_tuples = (double) buildstate.indtuples;
     235             : 
     236         344 :     return result;
     237             : }
     238             : 
     239             : /*
     240             :  * Attempt to switch to buffering mode.
     241             :  *
     242             :  * If there is not enough memory for buffering build, sets bufferingMode
     243             :  * to GIST_BUFFERING_DISABLED, so that we don't bother to try the switch
     244             :  * anymore. Otherwise initializes the build buffers, and sets bufferingMode to
     245             :  * GIST_BUFFERING_ACTIVE.
     246             :  */
     247             : static void
     248           0 : gistInitBuffering(GISTBuildState *buildstate)
     249             : {
     250           0 :     Relation    index = buildstate->indexrel;
     251             :     int         pagesPerBuffer;
     252             :     Size        pageFreeSpace;
     253             :     Size        itupAvgSize,
     254             :                 itupMinSize;
     255             :     double      avgIndexTuplesPerPage,
     256             :                 maxIndexTuplesPerPage;
     257             :     int         i;
     258             :     int         levelStep;
     259             : 
     260             :     /* Calc space of index page which is available for index tuples */
     261           0 :     pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData)
     262             :         - sizeof(ItemIdData)
     263           0 :         - buildstate->freespace;
     264             : 
     265             :     /*
     266             :      * Calculate average size of already inserted index tuples using gathered
     267             :      * statistics.
     268             :      */
     269           0 :     itupAvgSize = (double) buildstate->indtuplesSize /
     270           0 :         (double) buildstate->indtuples;
     271             : 
     272             :     /*
     273             :      * Calculate minimal possible size of index tuple by index metadata.
     274             :      * Minimal possible size of varlena is VARHDRSZ.
     275             :      *
     276             :      * XXX: that's not actually true, as a short varlen can be just 2 bytes.
     277             :      * And we should take padding into account here.
     278             :      */
     279           0 :     itupMinSize = (Size) MAXALIGN(sizeof(IndexTupleData));
     280           0 :     for (i = 0; i < index->rd_att->natts; i++)
     281             :     {
     282           0 :         if (TupleDescAttr(index->rd_att, i)->attlen < 0)
     283           0 :             itupMinSize += VARHDRSZ;
     284             :         else
     285           0 :             itupMinSize += TupleDescAttr(index->rd_att, i)->attlen;
     286             :     }
     287             : 
     288             :     /* Calculate average and maximal number of index tuples which fit to page */
     289           0 :     avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize;
     290           0 :     maxIndexTuplesPerPage = pageFreeSpace / itupMinSize;
     291             : 
     292             :     /*
     293             :      * We need to calculate two parameters for the buffering algorithm:
     294             :      * levelStep and pagesPerBuffer.
     295             :      *
     296             :      * levelStep determines the size of subtree that we operate on, while
     297             :      * emptying a buffer. A higher value is better, as you need fewer buffer
     298             :      * emptying steps to build the index. However, if you set it too high, the
     299             :      * subtree doesn't fit in cache anymore, and you quickly lose the benefit
     300             :      * of the buffers.
     301             :      *
     302             :      * In Arge et al's paper, levelStep is chosen as logB(M/4B), where B is
     303             :      * the number of tuples on page (ie. fanout), and M is the amount of
     304             :      * internal memory available. Curiously, they doesn't explain *why* that
     305             :      * setting is optimal. We calculate it by taking the highest levelStep so
     306             :      * that a subtree still fits in cache. For a small B, our way of
     307             :      * calculating levelStep is very close to Arge et al's formula. For a
     308             :      * large B, our formula gives a value that is 2x higher.
     309             :      *
     310             :      * The average size (in pages) of a subtree of depth n can be calculated
     311             :      * as a geometric series:
     312             :      *
     313             :      * B^0 + B^1 + B^2 + ... + B^n = (1 - B^(n + 1)) / (1 - B)
     314             :      *
     315             :      * where B is the average number of index tuples on page. The subtree is
     316             :      * cached in the shared buffer cache and the OS cache, so we choose
     317             :      * levelStep so that the subtree size is comfortably smaller than
     318             :      * effective_cache_size, with a safety factor of 4.
     319             :      *
     320             :      * The estimate on the average number of index tuples on page is based on
     321             :      * average tuple sizes observed before switching to buffered build, so the
     322             :      * real subtree size can be somewhat larger. Also, it would selfish to
     323             :      * gobble the whole cache for our index build. The safety factor of 4
     324             :      * should account for those effects.
     325             :      *
     326             :      * The other limiting factor for setting levelStep is that while
     327             :      * processing a subtree, we need to hold one page for each buffer at the
     328             :      * next lower buffered level. The max. number of buffers needed for that
     329             :      * is maxIndexTuplesPerPage^levelStep. This is very conservative, but
     330             :      * hopefully maintenance_work_mem is set high enough that you're
     331             :      * constrained by effective_cache_size rather than maintenance_work_mem.
     332             :      *
     333             :      * XXX: the buffer hash table consumes a fair amount of memory too per
     334             :      * buffer, but that is not currently taken into account. That scales on
     335             :      * the total number of buffers used, ie. the index size and on levelStep.
     336             :      * Note that a higher levelStep *reduces* the amount of memory needed for
     337             :      * the hash table.
     338             :      */
     339           0 :     levelStep = 1;
     340             :     for (;;)
     341           0 :     {
     342             :         double      subtreesize;
     343             :         double      maxlowestlevelpages;
     344             : 
     345             :         /* size of an average subtree at this levelStep (in pages). */
     346           0 :         subtreesize =
     347           0 :             (1 - pow(avgIndexTuplesPerPage, (double) (levelStep + 1))) /
     348           0 :             (1 - avgIndexTuplesPerPage);
     349             : 
     350             :         /* max number of pages at the lowest level of a subtree */
     351           0 :         maxlowestlevelpages = pow(maxIndexTuplesPerPage, (double) levelStep);
     352             : 
     353             :         /* subtree must fit in cache (with safety factor of 4) */
     354           0 :         if (subtreesize > effective_cache_size / 4)
     355           0 :             break;
     356             : 
     357             :         /* each node in the lowest level of a subtree has one page in memory */
     358           0 :         if (maxlowestlevelpages > ((double) maintenance_work_mem * 1024) / BLCKSZ)
     359           0 :             break;
     360             : 
     361             :         /* Good, we can handle this levelStep. See if we can go one higher. */
     362           0 :         levelStep++;
     363             :     }
     364             : 
     365             :     /*
     366             :      * We just reached an unacceptable value of levelStep in previous loop.
     367             :      * So, decrease levelStep to get last acceptable value.
     368             :      */
     369           0 :     levelStep--;
     370             : 
     371             :     /*
     372             :      * If there's not enough cache or maintenance_work_mem, fall back to plain
     373             :      * inserts.
     374             :      */
     375           0 :     if (levelStep <= 0)
     376             :     {
     377           0 :         elog(DEBUG1, "failed to switch to buffered GiST build");
     378           0 :         buildstate->bufferingMode = GIST_BUFFERING_DISABLED;
     379           0 :         return;
     380             :     }
     381             : 
     382             :     /*
     383             :      * The second parameter to set is pagesPerBuffer, which determines the
     384             :      * size of each buffer. We adjust pagesPerBuffer also during the build,
     385             :      * which is why this calculation is in a separate function.
     386             :      */
     387           0 :     pagesPerBuffer = calculatePagesPerBuffer(buildstate, levelStep);
     388             : 
     389             :     /* Initialize GISTBuildBuffers with these parameters */
     390           0 :     buildstate->gfbb = gistInitBuildBuffers(pagesPerBuffer, levelStep,
     391             :                                             gistGetMaxLevel(index));
     392             : 
     393           0 :     gistInitParentMap(buildstate);
     394             : 
     395           0 :     buildstate->bufferingMode = GIST_BUFFERING_ACTIVE;
     396             : 
     397           0 :     elog(DEBUG1, "switched to buffered GiST build; level step = %d, pagesPerBuffer = %d",
     398             :          levelStep, pagesPerBuffer);
     399             : }
     400             : 
     401             : /*
     402             :  * Calculate pagesPerBuffer parameter for the buffering algorithm.
     403             :  *
     404             :  * Buffer size is chosen so that assuming that tuples are distributed
     405             :  * randomly, emptying half a buffer fills on average one page in every buffer
     406             :  * at the next lower level.
     407             :  */
     408             : static int
     409           0 : calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep)
     410             : {
     411             :     double      pagesPerBuffer;
     412             :     double      avgIndexTuplesPerPage;
     413             :     double      itupAvgSize;
     414             :     Size        pageFreeSpace;
     415             : 
     416             :     /* Calc space of index page which is available for index tuples */
     417           0 :     pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData)
     418             :         - sizeof(ItemIdData)
     419           0 :         - buildstate->freespace;
     420             : 
     421             :     /*
     422             :      * Calculate average size of already inserted index tuples using gathered
     423             :      * statistics.
     424             :      */
     425           0 :     itupAvgSize = (double) buildstate->indtuplesSize /
     426           0 :         (double) buildstate->indtuples;
     427             : 
     428           0 :     avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize;
     429             : 
     430             :     /*
     431             :      * Recalculate required size of buffers.
     432             :      */
     433           0 :     pagesPerBuffer = 2 * pow(avgIndexTuplesPerPage, levelStep);
     434             : 
     435           0 :     return (int) rint(pagesPerBuffer);
     436             : }
     437             : 
     438             : /*
     439             :  * Per-tuple callback for table_index_build_scan.
     440             :  */
     441             : static void
     442      446188 : gistBuildCallback(Relation index,
     443             :                   ItemPointer tid,
     444             :                   Datum *values,
     445             :                   bool *isnull,
     446             :                   bool tupleIsAlive,
     447             :                   void *state)
     448             : {
     449      446188 :     GISTBuildState *buildstate = (GISTBuildState *) state;
     450             :     IndexTuple  itup;
     451             :     MemoryContext oldCtx;
     452             : 
     453      446188 :     oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
     454             : 
     455             :     /* form an index tuple and point it at the heap tuple */
     456      446188 :     itup = gistFormTuple(buildstate->giststate, index, values, isnull, true);
     457      446188 :     itup->t_tid = *tid;
     458             : 
     459      446188 :     if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE)
     460             :     {
     461             :         /* We have buffers, so use them. */
     462           0 :         gistBufferingBuildInsert(buildstate, itup);
     463             :     }
     464             :     else
     465             :     {
     466             :         /*
     467             :          * There's no buffers (yet). Since we already have the index relation
     468             :          * locked, we call gistdoinsert directly.
     469             :          */
     470      446188 :         gistdoinsert(index, itup, buildstate->freespace,
     471             :                      buildstate->giststate, buildstate->heaprel, true);
     472             :     }
     473             : 
     474             :     /* Update tuple count and total size. */
     475      446188 :     buildstate->indtuples += 1;
     476      446188 :     buildstate->indtuplesSize += IndexTupleSize(itup);
     477             : 
     478      446188 :     MemoryContextSwitchTo(oldCtx);
     479      446188 :     MemoryContextReset(buildstate->giststate->tempCxt);
     480             : 
     481      446188 :     if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE &&
     482           0 :         buildstate->indtuples % BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET == 0)
     483             :     {
     484             :         /* Adjust the target buffer size now */
     485           0 :         buildstate->gfbb->pagesPerBuffer =
     486           0 :             calculatePagesPerBuffer(buildstate, buildstate->gfbb->levelStep);
     487             :     }
     488             : 
     489             :     /*
     490             :      * In 'auto' mode, check if the index has grown too large to fit in cache,
     491             :      * and switch to buffering mode if it has.
     492             :      *
     493             :      * To avoid excessive calls to smgrnblocks(), only check this every
     494             :      * BUFFERING_MODE_SWITCH_CHECK_STEP index tuples
     495             :      */
     496      892376 :     if ((buildstate->bufferingMode == GIST_BUFFERING_AUTO &&
     497      447874 :          buildstate->indtuples % BUFFERING_MODE_SWITCH_CHECK_STEP == 0 &&
     498      447874 :          effective_cache_size < smgrnblocks(index->rd_smgr, MAIN_FORKNUM)) ||
     499      446188 :         (buildstate->bufferingMode == GIST_BUFFERING_STATS &&
     500           0 :          buildstate->indtuples >= BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET))
     501             :     {
     502             :         /*
     503             :          * Index doesn't fit in effective cache anymore. Try to switch to
     504             :          * buffering build mode.
     505             :          */
     506           0 :         gistInitBuffering(buildstate);
     507             :     }
     508      446188 : }
     509             : 
     510             : /*
     511             :  * Insert function for buffering index build.
     512             :  */
     513             : static void
     514           0 : gistBufferingBuildInsert(GISTBuildState *buildstate, IndexTuple itup)
     515             : {
     516             :     /* Insert the tuple to buffers. */
     517           0 :     gistProcessItup(buildstate, itup, 0, buildstate->gfbb->rootlevel);
     518             : 
     519             :     /* If we filled up (half of a) buffer, process buffer emptying. */
     520           0 :     gistProcessEmptyingQueue(buildstate);
     521           0 : }
     522             : 
     523             : /*
     524             :  * Process an index tuple. Runs the tuple down the tree until we reach a leaf
     525             :  * page or node buffer, and inserts the tuple there. Returns true if we have
     526             :  * to stop buffer emptying process (because one of child buffers can't take
     527             :  * index tuples anymore).
     528             :  */
     529             : static bool
     530           0 : gistProcessItup(GISTBuildState *buildstate, IndexTuple itup,
     531             :                 BlockNumber startblkno, int startlevel)
     532             : {
     533           0 :     GISTSTATE  *giststate = buildstate->giststate;
     534           0 :     GISTBuildBuffers *gfbb = buildstate->gfbb;
     535           0 :     Relation    indexrel = buildstate->indexrel;
     536             :     BlockNumber childblkno;
     537             :     Buffer      buffer;
     538           0 :     bool        result = false;
     539             :     BlockNumber blkno;
     540             :     int         level;
     541           0 :     OffsetNumber downlinkoffnum = InvalidOffsetNumber;
     542           0 :     BlockNumber parentblkno = InvalidBlockNumber;
     543             : 
     544           0 :     CHECK_FOR_INTERRUPTS();
     545             : 
     546             :     /*
     547             :      * Loop until we reach a leaf page (level == 0) or a level with buffers
     548             :      * (not including the level we start at, because we would otherwise make
     549             :      * no progress).
     550             :      */
     551           0 :     blkno = startblkno;
     552           0 :     level = startlevel;
     553             :     for (;;)
     554           0 :     {
     555             :         ItemId      iid;
     556             :         IndexTuple  idxtuple,
     557             :                     newtup;
     558             :         Page        page;
     559             :         OffsetNumber childoffnum;
     560             : 
     561             :         /* Have we reached a level with buffers? */
     562           0 :         if (LEVEL_HAS_BUFFERS(level, gfbb) && level != startlevel)
     563           0 :             break;
     564             : 
     565             :         /* Have we reached a leaf page? */
     566           0 :         if (level == 0)
     567           0 :             break;
     568             : 
     569             :         /*
     570             :          * Nope. Descend down to the next level then. Choose a child to
     571             :          * descend down to.
     572             :          */
     573             : 
     574           0 :         buffer = ReadBuffer(indexrel, blkno);
     575           0 :         LockBuffer(buffer, GIST_EXCLUSIVE);
     576             : 
     577           0 :         page = (Page) BufferGetPage(buffer);
     578           0 :         childoffnum = gistchoose(indexrel, page, itup, giststate);
     579           0 :         iid = PageGetItemId(page, childoffnum);
     580           0 :         idxtuple = (IndexTuple) PageGetItem(page, iid);
     581           0 :         childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
     582             : 
     583           0 :         if (level > 1)
     584           0 :             gistMemorizeParent(buildstate, childblkno, blkno);
     585             : 
     586             :         /*
     587             :          * Check that the key representing the target child node is consistent
     588             :          * with the key we're inserting. Update it if it's not.
     589             :          */
     590           0 :         newtup = gistgetadjusted(indexrel, idxtuple, itup, giststate);
     591           0 :         if (newtup)
     592             :         {
     593           0 :             blkno = gistbufferinginserttuples(buildstate,
     594             :                                               buffer,
     595             :                                               level,
     596             :                                               &newtup,
     597             :                                               1,
     598             :                                               childoffnum,
     599             :                                               InvalidBlockNumber,
     600             :                                               InvalidOffsetNumber);
     601             :             /* gistbufferinginserttuples() released the buffer */
     602             :         }
     603             :         else
     604           0 :             UnlockReleaseBuffer(buffer);
     605             : 
     606             :         /* Descend to the child */
     607           0 :         parentblkno = blkno;
     608           0 :         blkno = childblkno;
     609           0 :         downlinkoffnum = childoffnum;
     610             :         Assert(level > 0);
     611           0 :         level--;
     612             :     }
     613             : 
     614           0 :     if (LEVEL_HAS_BUFFERS(level, gfbb))
     615           0 :     {
     616             :         /*
     617             :          * We've reached level with buffers. Place the index tuple to the
     618             :          * buffer, and add the buffer to the emptying queue if it overflows.
     619             :          */
     620             :         GISTNodeBuffer *childNodeBuffer;
     621             : 
     622             :         /* Find the buffer or create a new one */
     623           0 :         childNodeBuffer = gistGetNodeBuffer(gfbb, giststate, blkno, level);
     624             : 
     625             :         /* Add index tuple to it */
     626           0 :         gistPushItupToNodeBuffer(gfbb, childNodeBuffer, itup);
     627             : 
     628           0 :         if (BUFFER_OVERFLOWED(childNodeBuffer, gfbb))
     629           0 :             result = true;
     630             :     }
     631             :     else
     632             :     {
     633             :         /*
     634             :          * We've reached a leaf page. Place the tuple here.
     635             :          */
     636             :         Assert(level == 0);
     637           0 :         buffer = ReadBuffer(indexrel, blkno);
     638           0 :         LockBuffer(buffer, GIST_EXCLUSIVE);
     639           0 :         gistbufferinginserttuples(buildstate, buffer, level,
     640             :                                   &itup, 1, InvalidOffsetNumber,
     641             :                                   parentblkno, downlinkoffnum);
     642             :         /* gistbufferinginserttuples() released the buffer */
     643             :     }
     644             : 
     645           0 :     return result;
     646             : }
     647             : 
     648             : /*
     649             :  * Insert tuples to a given page.
     650             :  *
     651             :  * This is analogous with gistinserttuples() in the regular insertion code.
     652             :  *
     653             :  * Returns the block number of the page where the (first) new or updated tuple
     654             :  * was inserted. Usually that's the original page, but might be a sibling page
     655             :  * if the original page was split.
     656             :  *
     657             :  * Caller should hold a lock on 'buffer' on entry. This function will unlock
     658             :  * and unpin it.
     659             :  */
     660             : static BlockNumber
     661           0 : gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
     662             :                           IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
     663             :                           BlockNumber parentblk, OffsetNumber downlinkoffnum)
     664             : {
     665           0 :     GISTBuildBuffers *gfbb = buildstate->gfbb;
     666             :     List       *splitinfo;
     667             :     bool        is_split;
     668           0 :     BlockNumber placed_to_blk = InvalidBlockNumber;
     669             : 
     670           0 :     is_split = gistplacetopage(buildstate->indexrel,
     671             :                                buildstate->freespace,
     672             :                                buildstate->giststate,
     673             :                                buffer,
     674             :                                itup, ntup, oldoffnum, &placed_to_blk,
     675             :                                InvalidBuffer,
     676             :                                &splitinfo,
     677             :                                false,
     678             :                                buildstate->heaprel, true);
     679             : 
     680             :     /*
     681             :      * If this is a root split, update the root path item kept in memory. This
     682             :      * ensures that all path stacks are always complete, including all parent
     683             :      * nodes up to the root. That simplifies the algorithm to re-find correct
     684             :      * parent.
     685             :      */
     686           0 :     if (is_split && BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO)
     687             :     {
     688           0 :         Page        page = BufferGetPage(buffer);
     689             :         OffsetNumber off;
     690             :         OffsetNumber maxoff;
     691             : 
     692             :         Assert(level == gfbb->rootlevel);
     693           0 :         gfbb->rootlevel++;
     694             : 
     695           0 :         elog(DEBUG2, "splitting GiST root page, now %d levels deep", gfbb->rootlevel);
     696             : 
     697             :         /*
     698             :          * All the downlinks on the old root page are now on one of the child
     699             :          * pages. Visit all the new child pages to memorize the parents of the
     700             :          * grandchildren.
     701             :          */
     702           0 :         if (gfbb->rootlevel > 1)
     703             :         {
     704           0 :             maxoff = PageGetMaxOffsetNumber(page);
     705           0 :             for (off = FirstOffsetNumber; off <= maxoff; off++)
     706             :             {
     707           0 :                 ItemId      iid = PageGetItemId(page, off);
     708           0 :                 IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     709           0 :                 BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
     710           0 :                 Buffer      childbuf = ReadBuffer(buildstate->indexrel, childblkno);
     711             : 
     712           0 :                 LockBuffer(childbuf, GIST_SHARE);
     713           0 :                 gistMemorizeAllDownlinks(buildstate, childbuf);
     714           0 :                 UnlockReleaseBuffer(childbuf);
     715             : 
     716             :                 /*
     717             :                  * Also remember that the parent of the new child page is the
     718             :                  * root block.
     719             :                  */
     720           0 :                 gistMemorizeParent(buildstate, childblkno, GIST_ROOT_BLKNO);
     721             :             }
     722             :         }
     723             :     }
     724             : 
     725           0 :     if (splitinfo)
     726             :     {
     727             :         /*
     728             :          * Insert the downlinks to the parent. This is analogous with
     729             :          * gistfinishsplit() in the regular insertion code, but the locking is
     730             :          * simpler, and we have to maintain the buffers on internal nodes and
     731             :          * the parent map.
     732             :          */
     733             :         IndexTuple *downlinks;
     734             :         int         ndownlinks,
     735             :                     i;
     736             :         Buffer      parentBuffer;
     737             :         ListCell   *lc;
     738             : 
     739             :         /* Parent may have changed since we memorized this path. */
     740           0 :         parentBuffer =
     741           0 :             gistBufferingFindCorrectParent(buildstate,
     742             :                                            BufferGetBlockNumber(buffer),
     743             :                                            level,
     744             :                                            &parentblk,
     745             :                                            &downlinkoffnum);
     746             : 
     747             :         /*
     748             :          * If there's a buffer associated with this page, that needs to be
     749             :          * split too. gistRelocateBuildBuffersOnSplit() will also adjust the
     750             :          * downlinks in 'splitinfo', to make sure they're consistent not only
     751             :          * with the tuples already on the pages, but also the tuples in the
     752             :          * buffers that will eventually be inserted to them.
     753             :          */
     754           0 :         gistRelocateBuildBuffersOnSplit(gfbb,
     755             :                                         buildstate->giststate,
     756             :                                         buildstate->indexrel,
     757             :                                         level,
     758             :                                         buffer, splitinfo);
     759             : 
     760             :         /* Create an array of all the downlink tuples */
     761           0 :         ndownlinks = list_length(splitinfo);
     762           0 :         downlinks = (IndexTuple *) palloc(sizeof(IndexTuple) * ndownlinks);
     763           0 :         i = 0;
     764           0 :         foreach(lc, splitinfo)
     765             :         {
     766           0 :             GISTPageSplitInfo *splitinfo = lfirst(lc);
     767             : 
     768             :             /*
     769             :              * Remember the parent of each new child page in our parent map.
     770             :              * This assumes that the downlinks fit on the parent page. If the
     771             :              * parent page is split, too, when we recurse up to insert the
     772             :              * downlinks, the recursive gistbufferinginserttuples() call will
     773             :              * update the map again.
     774             :              */
     775           0 :             if (level > 0)
     776           0 :                 gistMemorizeParent(buildstate,
     777             :                                    BufferGetBlockNumber(splitinfo->buf),
     778             :                                    BufferGetBlockNumber(parentBuffer));
     779             : 
     780             :             /*
     781             :              * Also update the parent map for all the downlinks that got moved
     782             :              * to a different page. (actually this also loops through the
     783             :              * downlinks that stayed on the original page, but it does no
     784             :              * harm).
     785             :              */
     786           0 :             if (level > 1)
     787           0 :                 gistMemorizeAllDownlinks(buildstate, splitinfo->buf);
     788             : 
     789             :             /*
     790             :              * Since there's no concurrent access, we can release the lower
     791             :              * level buffers immediately. This includes the original page.
     792             :              */
     793           0 :             UnlockReleaseBuffer(splitinfo->buf);
     794           0 :             downlinks[i++] = splitinfo->downlink;
     795             :         }
     796             : 
     797             :         /* Insert them into parent. */
     798           0 :         gistbufferinginserttuples(buildstate, parentBuffer, level + 1,
     799             :                                   downlinks, ndownlinks, downlinkoffnum,
     800             :                                   InvalidBlockNumber, InvalidOffsetNumber);
     801             : 
     802           0 :         list_free_deep(splitinfo);  /* we don't need this anymore */
     803             :     }
     804             :     else
     805           0 :         UnlockReleaseBuffer(buffer);
     806             : 
     807           0 :     return placed_to_blk;
     808             : }
     809             : 
     810             : /*
     811             :  * Find the downlink pointing to a child page.
     812             :  *
     813             :  * 'childblkno' indicates the child page to find the parent for. 'level' is
     814             :  * the level of the child. On entry, *parentblkno and *downlinkoffnum can
     815             :  * point to a location where the downlink used to be - we will check that
     816             :  * location first, and save some cycles if it hasn't moved. The function
     817             :  * returns a buffer containing the downlink, exclusively-locked, and
     818             :  * *parentblkno and *downlinkoffnum are set to the real location of the
     819             :  * downlink.
     820             :  *
     821             :  * If the child page is a leaf (level == 0), the caller must supply a correct
     822             :  * parentblkno. Otherwise we use the parent map hash table to find the parent
     823             :  * block.
     824             :  *
     825             :  * This function serves the same purpose as gistFindCorrectParent() during
     826             :  * normal index inserts, but this is simpler because we don't need to deal
     827             :  * with concurrent inserts.
     828             :  */
     829             : static Buffer
     830           0 : gistBufferingFindCorrectParent(GISTBuildState *buildstate,
     831             :                                BlockNumber childblkno, int level,
     832             :                                BlockNumber *parentblkno,
     833             :                                OffsetNumber *downlinkoffnum)
     834             : {
     835             :     BlockNumber parent;
     836             :     Buffer      buffer;
     837             :     Page        page;
     838             :     OffsetNumber maxoff;
     839             :     OffsetNumber off;
     840             : 
     841           0 :     if (level > 0)
     842           0 :         parent = gistGetParent(buildstate, childblkno);
     843             :     else
     844             :     {
     845             :         /*
     846             :          * For a leaf page, the caller must supply a correct parent block
     847             :          * number.
     848             :          */
     849           0 :         if (*parentblkno == InvalidBlockNumber)
     850           0 :             elog(ERROR, "no parent buffer provided of child %d", childblkno);
     851           0 :         parent = *parentblkno;
     852             :     }
     853             : 
     854           0 :     buffer = ReadBuffer(buildstate->indexrel, parent);
     855           0 :     page = BufferGetPage(buffer);
     856           0 :     LockBuffer(buffer, GIST_EXCLUSIVE);
     857           0 :     gistcheckpage(buildstate->indexrel, buffer);
     858           0 :     maxoff = PageGetMaxOffsetNumber(page);
     859             : 
     860             :     /* Check if it was not moved */
     861           0 :     if (parent == *parentblkno && *parentblkno != InvalidBlockNumber &&
     862           0 :         *downlinkoffnum != InvalidOffsetNumber && *downlinkoffnum <= maxoff)
     863             :     {
     864           0 :         ItemId      iid = PageGetItemId(page, *downlinkoffnum);
     865           0 :         IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     866             : 
     867           0 :         if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)
     868             :         {
     869             :             /* Still there */
     870           0 :             return buffer;
     871             :         }
     872             :     }
     873             : 
     874             :     /*
     875             :      * Downlink was not at the offset where it used to be. Scan the page to
     876             :      * find it. During normal gist insertions, it might've moved to another
     877             :      * page, to the right, but during a buffering build, we keep track of the
     878             :      * parent of each page in the lookup table so we should always know what
     879             :      * page it's on.
     880             :      */
     881           0 :     for (off = FirstOffsetNumber; off <= maxoff; off = OffsetNumberNext(off))
     882             :     {
     883           0 :         ItemId      iid = PageGetItemId(page, off);
     884           0 :         IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     885             : 
     886           0 :         if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)
     887             :         {
     888             :             /* yes!!, found it */
     889           0 :             *downlinkoffnum = off;
     890           0 :             return buffer;
     891             :         }
     892             :     }
     893             : 
     894           0 :     elog(ERROR, "failed to re-find parent for block %u", childblkno);
     895             :     return InvalidBuffer;       /* keep compiler quiet */
     896             : }
     897             : 
     898             : /*
     899             :  * Process buffers emptying stack. Emptying of one buffer can cause emptying
     900             :  * of other buffers. This function iterates until this cascading emptying
     901             :  * process finished, e.g. until buffers emptying stack is empty.
     902             :  */
     903             : static void
     904           0 : gistProcessEmptyingQueue(GISTBuildState *buildstate)
     905             : {
     906           0 :     GISTBuildBuffers *gfbb = buildstate->gfbb;
     907             : 
     908             :     /* Iterate while we have elements in buffers emptying stack. */
     909           0 :     while (gfbb->bufferEmptyingQueue != NIL)
     910             :     {
     911             :         GISTNodeBuffer *emptyingNodeBuffer;
     912             : 
     913             :         /* Get node buffer from emptying stack. */
     914           0 :         emptyingNodeBuffer = (GISTNodeBuffer *) linitial(gfbb->bufferEmptyingQueue);
     915           0 :         gfbb->bufferEmptyingQueue = list_delete_first(gfbb->bufferEmptyingQueue);
     916           0 :         emptyingNodeBuffer->queuedForEmptying = false;
     917             : 
     918             :         /*
     919             :          * We are going to load last pages of buffers where emptying will be
     920             :          * to. So let's unload any previously loaded buffers.
     921             :          */
     922           0 :         gistUnloadNodeBuffers(gfbb);
     923             : 
     924             :         /*
     925             :          * Pop tuples from the buffer and run them down to the buffers at
     926             :          * lower level, or leaf pages. We continue until one of the lower
     927             :          * level buffers fills up, or this buffer runs empty.
     928             :          *
     929             :          * In Arge et al's paper, the buffer emptying is stopped after
     930             :          * processing 1/2 node buffer worth of tuples, to avoid overfilling
     931             :          * any of the lower level buffers. However, it's more efficient to
     932             :          * keep going until one of the lower level buffers actually fills up,
     933             :          * so that's what we do. This doesn't need to be exact, if a buffer
     934             :          * overfills by a few tuples, there's no harm done.
     935             :          */
     936             :         while (true)
     937           0 :         {
     938             :             IndexTuple  itup;
     939             : 
     940             :             /* Get next index tuple from the buffer */
     941           0 :             if (!gistPopItupFromNodeBuffer(gfbb, emptyingNodeBuffer, &itup))
     942           0 :                 break;
     943             : 
     944             :             /*
     945             :              * Run it down to the underlying node buffer or leaf page.
     946             :              *
     947             :              * Note: it's possible that the buffer we're emptying splits as a
     948             :              * result of this call. If that happens, our emptyingNodeBuffer
     949             :              * points to the left half of the split. After split, it's very
     950             :              * likely that the new left buffer is no longer over the half-full
     951             :              * threshold, but we might as well keep flushing tuples from it
     952             :              * until we fill a lower-level buffer.
     953             :              */
     954           0 :             if (gistProcessItup(buildstate, itup, emptyingNodeBuffer->nodeBlocknum, emptyingNodeBuffer->level))
     955             :             {
     956             :                 /*
     957             :                  * A lower level buffer filled up. Stop emptying this buffer,
     958             :                  * to avoid overflowing the lower level buffer.
     959             :                  */
     960           0 :                 break;
     961             :             }
     962             : 
     963             :             /* Free all the memory allocated during index tuple processing */
     964           0 :             MemoryContextReset(buildstate->giststate->tempCxt);
     965             :         }
     966             :     }
     967           0 : }
     968             : 
     969             : /*
     970             :  * Empty all node buffers, from top to bottom. This is done at the end of
     971             :  * index build to flush all remaining tuples to the index.
     972             :  *
     973             :  * Note: This destroys the buffersOnLevels lists, so the buffers should not
     974             :  * be inserted to after this call.
     975             :  */
     976             : static void
     977           0 : gistEmptyAllBuffers(GISTBuildState *buildstate)
     978             : {
     979           0 :     GISTBuildBuffers *gfbb = buildstate->gfbb;
     980             :     MemoryContext oldCtx;
     981             :     int         i;
     982             : 
     983           0 :     oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
     984             : 
     985             :     /*
     986             :      * Iterate through the levels from top to bottom.
     987             :      */
     988           0 :     for (i = gfbb->buffersOnLevelsLen - 1; i >= 0; i--)
     989             :     {
     990             :         /*
     991             :          * Empty all buffers on this level. Note that new buffers can pop up
     992             :          * in the list during the processing, as a result of page splits, so a
     993             :          * simple walk through the list won't work. We remove buffers from the
     994             :          * list when we see them empty; a buffer can't become non-empty once
     995             :          * it's been fully emptied.
     996             :          */
     997           0 :         while (gfbb->buffersOnLevels[i] != NIL)
     998             :         {
     999             :             GISTNodeBuffer *nodeBuffer;
    1000             : 
    1001           0 :             nodeBuffer = (GISTNodeBuffer *) linitial(gfbb->buffersOnLevels[i]);
    1002             : 
    1003           0 :             if (nodeBuffer->blocksCount != 0)
    1004             :             {
    1005             :                 /*
    1006             :                  * Add this buffer to the emptying queue, and proceed to empty
    1007             :                  * the queue.
    1008             :                  */
    1009           0 :                 if (!nodeBuffer->queuedForEmptying)
    1010             :                 {
    1011           0 :                     MemoryContextSwitchTo(gfbb->context);
    1012           0 :                     nodeBuffer->queuedForEmptying = true;
    1013           0 :                     gfbb->bufferEmptyingQueue =
    1014           0 :                         lcons(nodeBuffer, gfbb->bufferEmptyingQueue);
    1015           0 :                     MemoryContextSwitchTo(buildstate->giststate->tempCxt);
    1016             :                 }
    1017           0 :                 gistProcessEmptyingQueue(buildstate);
    1018             :             }
    1019             :             else
    1020           0 :                 gfbb->buffersOnLevels[i] =
    1021           0 :                     list_delete_first(gfbb->buffersOnLevels[i]);
    1022             :         }
    1023           0 :         elog(DEBUG2, "emptied all buffers at level %d", i);
    1024             :     }
    1025           0 :     MemoryContextSwitchTo(oldCtx);
    1026           0 : }
    1027             : 
    1028             : /*
    1029             :  * Get the depth of the GiST index.
    1030             :  */
    1031             : static int
    1032           0 : gistGetMaxLevel(Relation index)
    1033             : {
    1034             :     int         maxLevel;
    1035             :     BlockNumber blkno;
    1036             : 
    1037             :     /*
    1038             :      * Traverse down the tree, starting from the root, until we hit the leaf
    1039             :      * level.
    1040             :      */
    1041           0 :     maxLevel = 0;
    1042           0 :     blkno = GIST_ROOT_BLKNO;
    1043             :     while (true)
    1044           0 :     {
    1045             :         Buffer      buffer;
    1046             :         Page        page;
    1047             :         IndexTuple  itup;
    1048             : 
    1049           0 :         buffer = ReadBuffer(index, blkno);
    1050             : 
    1051             :         /*
    1052             :          * There's no concurrent access during index build, so locking is just
    1053             :          * pro forma.
    1054             :          */
    1055           0 :         LockBuffer(buffer, GIST_SHARE);
    1056           0 :         page = (Page) BufferGetPage(buffer);
    1057             : 
    1058           0 :         if (GistPageIsLeaf(page))
    1059             :         {
    1060             :             /* We hit the bottom, so we're done. */
    1061           0 :             UnlockReleaseBuffer(buffer);
    1062           0 :             break;
    1063             :         }
    1064             : 
    1065             :         /*
    1066             :          * Pick the first downlink on the page, and follow it. It doesn't
    1067             :          * matter which downlink we choose, the tree has the same depth
    1068             :          * everywhere, so we just pick the first one.
    1069             :          */
    1070           0 :         itup = (IndexTuple) PageGetItem(page,
    1071             :                                         PageGetItemId(page, FirstOffsetNumber));
    1072           0 :         blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
    1073           0 :         UnlockReleaseBuffer(buffer);
    1074             : 
    1075             :         /*
    1076             :          * We're going down on the tree. It means that there is yet one more
    1077             :          * level in the tree.
    1078             :          */
    1079           0 :         maxLevel++;
    1080             :     }
    1081           0 :     return maxLevel;
    1082             : }
    1083             : 
    1084             : 
    1085             : /*
    1086             :  * Routines for managing the parent map.
    1087             :  *
    1088             :  * Whenever a page is split, we need to insert the downlinks into the parent.
    1089             :  * We need to somehow find the parent page to do that. In normal insertions,
    1090             :  * we keep a stack of nodes visited when we descend the tree. However, in
    1091             :  * buffering build, we can start descending the tree from any internal node,
    1092             :  * when we empty a buffer by cascading tuples to its children. So we don't
    1093             :  * have a full stack up to the root available at that time.
    1094             :  *
    1095             :  * So instead, we maintain a hash table to track the parent of every internal
    1096             :  * page. We don't need to track the parents of leaf nodes, however. Whenever
    1097             :  * we insert to a leaf, we've just descended down from its parent, so we know
    1098             :  * its immediate parent already. This helps a lot to limit the memory used
    1099             :  * by this hash table.
    1100             :  *
    1101             :  * Whenever an internal node is split, the parent map needs to be updated.
    1102             :  * the parent of the new child page needs to be recorded, and also the
    1103             :  * entries for all page whose downlinks are moved to a new page at the split
    1104             :  * needs to be updated.
    1105             :  *
    1106             :  * We also update the parent map whenever we descend the tree. That might seem
    1107             :  * unnecessary, because we maintain the map whenever a downlink is moved or
    1108             :  * created, but it is needed because we switch to buffering mode after
    1109             :  * creating a tree with regular index inserts. Any pages created before
    1110             :  * switching to buffering mode will not be present in the parent map initially,
    1111             :  * but will be added there the first time we visit them.
    1112             :  */
    1113             : 
    1114             : typedef struct
    1115             : {
    1116             :     BlockNumber childblkno;     /* hash key */
    1117             :     BlockNumber parentblkno;
    1118             : } ParentMapEntry;
    1119             : 
    1120             : static void
    1121           0 : gistInitParentMap(GISTBuildState *buildstate)
    1122             : {
    1123             :     HASHCTL     hashCtl;
    1124             : 
    1125           0 :     hashCtl.keysize = sizeof(BlockNumber);
    1126           0 :     hashCtl.entrysize = sizeof(ParentMapEntry);
    1127           0 :     hashCtl.hcxt = CurrentMemoryContext;
    1128           0 :     buildstate->parentMap = hash_create("gistbuild parent map",
    1129             :                                         1024,
    1130             :                                         &hashCtl,
    1131             :                                         HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
    1132           0 : }
    1133             : 
    1134             : static void
    1135           0 : gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child, BlockNumber parent)
    1136             : {
    1137             :     ParentMapEntry *entry;
    1138             :     bool        found;
    1139             : 
    1140           0 :     entry = (ParentMapEntry *) hash_search(buildstate->parentMap,
    1141             :                                            (const void *) &child,
    1142             :                                            HASH_ENTER,
    1143             :                                            &found);
    1144           0 :     entry->parentblkno = parent;
    1145           0 : }
    1146             : 
    1147             : /*
    1148             :  * Scan all downlinks on a page, and memorize their parent.
    1149             :  */
    1150             : static void
    1151           0 : gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parentbuf)
    1152             : {
    1153             :     OffsetNumber maxoff;
    1154             :     OffsetNumber off;
    1155           0 :     BlockNumber parentblkno = BufferGetBlockNumber(parentbuf);
    1156           0 :     Page        page = BufferGetPage(parentbuf);
    1157             : 
    1158             :     Assert(!GistPageIsLeaf(page));
    1159             : 
    1160           0 :     maxoff = PageGetMaxOffsetNumber(page);
    1161           0 :     for (off = FirstOffsetNumber; off <= maxoff; off++)
    1162             :     {
    1163           0 :         ItemId      iid = PageGetItemId(page, off);
    1164           0 :         IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
    1165           0 :         BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
    1166             : 
    1167           0 :         gistMemorizeParent(buildstate, childblkno, parentblkno);
    1168             :     }
    1169           0 : }
    1170             : 
    1171             : static BlockNumber
    1172           0 : gistGetParent(GISTBuildState *buildstate, BlockNumber child)
    1173             : {
    1174             :     ParentMapEntry *entry;
    1175             :     bool        found;
    1176             : 
    1177             :     /* Find node buffer in hash table */
    1178           0 :     entry = (ParentMapEntry *) hash_search(buildstate->parentMap,
    1179             :                                            (const void *) &child,
    1180             :                                            HASH_FIND,
    1181             :                                            &found);
    1182           0 :     if (!found)
    1183           0 :         elog(ERROR, "could not find parent of block %d in lookup table", child);
    1184             : 
    1185           0 :     return entry->parentblkno;
    1186             : }

Generated by: LCOV version 1.13