LCOV - code coverage report
Current view: top level - src/backend/access/gin - gininsert.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 588 647 90.9 %
Date: 2026-02-02 14:17:46 Functions: 34 35 97.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * gininsert.c
       4             :  *    insert routines for the postgres inverted index access method.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *          src/backend/access/gin/gininsert.c
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/gin_private.h"
      18             : #include "access/gin_tuple.h"
      19             : #include "access/parallel.h"
      20             : #include "access/table.h"
      21             : #include "access/tableam.h"
      22             : #include "access/xloginsert.h"
      23             : #include "catalog/index.h"
      24             : #include "catalog/pg_collation.h"
      25             : #include "commands/progress.h"
      26             : #include "miscadmin.h"
      27             : #include "nodes/execnodes.h"
      28             : #include "pgstat.h"
      29             : #include "storage/bufmgr.h"
      30             : #include "storage/predicate.h"
      31             : #include "tcop/tcopprot.h"
      32             : #include "utils/datum.h"
      33             : #include "utils/memutils.h"
      34             : #include "utils/builtins.h"
      35             : #include "utils/rel.h"
      36             : #include "utils/typcache.h"
      37             : 
      38             : 
      39             : /* Magic numbers for parallel state sharing */
      40             : #define PARALLEL_KEY_GIN_SHARED         UINT64CONST(0xB000000000000001)
      41             : #define PARALLEL_KEY_TUPLESORT          UINT64CONST(0xB000000000000002)
      42             : #define PARALLEL_KEY_QUERY_TEXT         UINT64CONST(0xB000000000000003)
      43             : #define PARALLEL_KEY_WAL_USAGE          UINT64CONST(0xB000000000000004)
      44             : #define PARALLEL_KEY_BUFFER_USAGE       UINT64CONST(0xB000000000000005)
      45             : 
      46             : /*
      47             :  * Status for index builds performed in parallel.  This is allocated in a
      48             :  * dynamic shared memory segment.
      49             :  */
      50             : typedef struct GinBuildShared
      51             : {
      52             :     /*
      53             :      * These fields are not modified during the build.  They primarily exist
      54             :      * for the benefit of worker processes that need to create state
      55             :      * corresponding to that used by the leader.
      56             :      */
      57             :     Oid         heaprelid;
      58             :     Oid         indexrelid;
      59             :     bool        isconcurrent;
      60             :     int         scantuplesortstates;
      61             : 
      62             :     /*
      63             :      * workersdonecv is used to monitor the progress of workers.  All parallel
      64             :      * participants must indicate that they are done before leader can use
      65             :      * results built by the workers (and before leader can write the data into
      66             :      * the index).
      67             :      */
      68             :     ConditionVariable workersdonecv;
      69             : 
      70             :     /*
      71             :      * mutex protects all following fields
      72             :      *
      73             :      * These fields contain status information of interest to GIN index builds
      74             :      * that must work just the same when an index is built in parallel.
      75             :      */
      76             :     slock_t     mutex;
      77             : 
      78             :     /*
      79             :      * Mutable state that is maintained by workers, and reported back to
      80             :      * leader at end of the scans.
      81             :      *
      82             :      * nparticipantsdone is number of worker processes finished.
      83             :      *
      84             :      * reltuples is the total number of input heap tuples.
      85             :      *
      86             :      * indtuples is the total number of tuples that made it into the index.
      87             :      */
      88             :     int         nparticipantsdone;
      89             :     double      reltuples;
      90             :     double      indtuples;
      91             : 
      92             :     /*
      93             :      * ParallelTableScanDescData data follows. Can't directly embed here, as
      94             :      * implementations of the parallel table scan desc interface might need
      95             :      * stronger alignment.
      96             :      */
      97             : } GinBuildShared;
      98             : 
      99             : /*
     100             :  * Return pointer to a GinBuildShared's parallel table scan.
     101             :  *
     102             :  * c.f. shm_toc_allocate as to why BUFFERALIGN is used, rather than just
     103             :  * MAXALIGN.
     104             :  */
     105             : #define ParallelTableScanFromGinBuildShared(shared) \
     106             :     (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(GinBuildShared)))
     107             : 
     108             : /*
     109             :  * Status for leader in parallel index build.
     110             :  */
     111             : typedef struct GinLeader
     112             : {
     113             :     /* parallel context itself */
     114             :     ParallelContext *pcxt;
     115             : 
     116             :     /*
     117             :      * nparticipanttuplesorts is the exact number of worker processes
     118             :      * successfully launched, plus one leader process if it participates as a
     119             :      * worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader
     120             :      * participating as a worker).
     121             :      */
     122             :     int         nparticipanttuplesorts;
     123             : 
     124             :     /*
     125             :      * Leader process convenience pointers to shared state (leader avoids TOC
     126             :      * lookups).
     127             :      *
     128             :      * GinBuildShared is the shared state for entire build.  sharedsort is the
     129             :      * shared, tuplesort-managed state passed to each process tuplesort.
     130             :      * snapshot is the snapshot used by the scan iff an MVCC snapshot is
     131             :      * required.
     132             :      */
     133             :     GinBuildShared *ginshared;
     134             :     Sharedsort *sharedsort;
     135             :     Snapshot    snapshot;
     136             :     WalUsage   *walusage;
     137             :     BufferUsage *bufferusage;
     138             : } GinLeader;
     139             : 
     140             : typedef struct
     141             : {
     142             :     GinState    ginstate;
     143             :     double      indtuples;
     144             :     GinStatsData buildStats;
     145             :     MemoryContext tmpCtx;
     146             :     MemoryContext funcCtx;
     147             :     BuildAccumulator accum;
     148             :     ItemPointerData tid;
     149             :     int         work_mem;
     150             : 
     151             :     /*
     152             :      * bs_leader is only present when a parallel index build is performed, and
     153             :      * only in the leader process.
     154             :      */
     155             :     GinLeader  *bs_leader;
     156             : 
     157             :     /* number of participating workers (including leader) */
     158             :     int         bs_num_workers;
     159             : 
     160             :     /* used to pass information from workers to leader */
     161             :     double      bs_numtuples;
     162             :     double      bs_reltuples;
     163             : 
     164             :     /*
     165             :      * The sortstate is used by workers (including the leader). It has to be
     166             :      * part of the build state, because that's the only thing passed to the
     167             :      * build callback etc.
     168             :      */
     169             :     Tuplesortstate *bs_sortstate;
     170             : 
     171             :     /*
     172             :      * The sortstate used only within a single worker for the first merge pass
     173             :      * happening there. In principle it doesn't need to be part of the build
     174             :      * state and we could pass it around directly, but it's more convenient
     175             :      * this way. And it's part of the build state, after all.
     176             :      */
     177             :     Tuplesortstate *bs_worker_sort;
     178             : } GinBuildState;
     179             : 
     180             : 
     181             : /* parallel index builds */
     182             : static void _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
     183             :                                 bool isconcurrent, int request);
     184             : static void _gin_end_parallel(GinLeader *ginleader, GinBuildState *state);
     185             : static Size _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot);
     186             : static double _gin_parallel_heapscan(GinBuildState *state);
     187             : static double _gin_parallel_merge(GinBuildState *state);
     188             : static void _gin_leader_participate_as_worker(GinBuildState *buildstate,
     189             :                                               Relation heap, Relation index);
     190             : static void _gin_parallel_scan_and_build(GinBuildState *state,
     191             :                                          GinBuildShared *ginshared,
     192             :                                          Sharedsort *sharedsort,
     193             :                                          Relation heap, Relation index,
     194             :                                          int sortmem, bool progress);
     195             : 
     196             : static ItemPointer _gin_parse_tuple_items(GinTuple *a);
     197             : static Datum _gin_parse_tuple_key(GinTuple *a);
     198             : 
     199             : static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
     200             :                                   Datum key, int16 typlen, bool typbyval,
     201             :                                   ItemPointerData *items, uint32 nitems,
     202             :                                   Size *len);
     203             : 
     204             : /*
     205             :  * Adds array of item pointers to tuple's posting list, or
     206             :  * creates posting tree and tuple pointing to tree in case
     207             :  * of not enough space.  Max size of tuple is defined in
     208             :  * GinFormTuple().  Returns a new, modified index tuple.
     209             :  * items[] must be in sorted order with no duplicates.
     210             :  */
     211             : static IndexTuple
     212      228864 : addItemPointersToLeafTuple(GinState *ginstate,
     213             :                            IndexTuple old,
     214             :                            ItemPointerData *items, uint32 nitem,
     215             :                            GinStatsData *buildStats, Buffer buffer)
     216             : {
     217             :     OffsetNumber attnum;
     218             :     Datum       key;
     219             :     GinNullCategory category;
     220             :     IndexTuple  res;
     221             :     ItemPointerData *newItems,
     222             :                *oldItems;
     223             :     int         oldNPosting,
     224             :                 newNPosting,
     225             :                 nwritten;
     226             :     GinPostingList *compressedList;
     227             : 
     228             :     Assert(!GinIsPostingTree(old));
     229             : 
     230      228864 :     attnum = gintuple_get_attrnum(ginstate, old);
     231      228864 :     key = gintuple_get_key(ginstate, old, &category);
     232             : 
     233             :     /* merge the old and new posting lists */
     234      228864 :     oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting);
     235             : 
     236      228864 :     newItems = ginMergeItemPointers(items, nitem,
     237             :                                     oldItems, oldNPosting,
     238             :                                     &newNPosting);
     239             : 
     240             :     /* Compress the posting list, and try to a build tuple with room for it */
     241      228864 :     res = NULL;
     242      228864 :     compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize, &nwritten);
     243      228864 :     if (nwritten == newNPosting)
     244             :     {
     245      228846 :         res = GinFormTuple(ginstate, attnum, key, category,
     246             :                            (char *) compressedList,
     247      228846 :                            SizeOfGinPostingList(compressedList),
     248             :                            newNPosting,
     249             :                            false);
     250             :     }
     251             : 
     252      228864 :     pfree(newItems);
     253      228864 :     pfree(compressedList);
     254             : 
     255      228864 :     if (!res)
     256             :     {
     257             :         /* posting list would be too big, convert to posting tree */
     258             :         BlockNumber postingRoot;
     259             : 
     260             :         /*
     261             :          * Initialize posting tree with the old tuple's posting list.  It's
     262             :          * surely small enough to fit on one posting-tree page, and should
     263             :          * already be in order with no duplicates.
     264             :          */
     265          28 :         postingRoot = createPostingTree(ginstate->index,
     266             :                                         oldItems,
     267             :                                         oldNPosting,
     268             :                                         buildStats,
     269             :                                         buffer);
     270             : 
     271             :         /* Now insert the TIDs-to-be-added into the posting tree */
     272          28 :         ginInsertItemPointers(ginstate->index, postingRoot,
     273             :                               items, nitem,
     274             :                               buildStats);
     275             : 
     276             :         /* And build a new posting-tree-only result tuple */
     277          28 :         res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
     278          28 :         GinSetPostingTree(res, postingRoot);
     279             :     }
     280      228864 :     pfree(oldItems);
     281             : 
     282      228864 :     return res;
     283             : }
     284             : 
     285             : /*
     286             :  * Build a fresh leaf tuple, either posting-list or posting-tree format
     287             :  * depending on whether the given items list will fit.
     288             :  * items[] must be in sorted order with no duplicates.
     289             :  *
     290             :  * This is basically the same logic as in addItemPointersToLeafTuple,
     291             :  * but working from slightly different input.
     292             :  */
     293             : static IndexTuple
     294      693872 : buildFreshLeafTuple(GinState *ginstate,
     295             :                     OffsetNumber attnum, Datum key, GinNullCategory category,
     296             :                     ItemPointerData *items, uint32 nitem,
     297             :                     GinStatsData *buildStats, Buffer buffer)
     298             : {
     299      693872 :     IndexTuple  res = NULL;
     300             :     GinPostingList *compressedList;
     301             :     int         nwritten;
     302             : 
     303             :     /* try to build a posting list tuple with all the items */
     304      693872 :     compressedList = ginCompressPostingList(items, nitem, GinMaxItemSize, &nwritten);
     305      693872 :     if (nwritten == nitem)
     306             :     {
     307      693770 :         res = GinFormTuple(ginstate, attnum, key, category,
     308             :                            (char *) compressedList,
     309      693770 :                            SizeOfGinPostingList(compressedList),
     310             :                            nitem, false);
     311             :     }
     312      693872 :     pfree(compressedList);
     313             : 
     314      693872 :     if (!res)
     315             :     {
     316             :         /* posting list would be too big, build posting tree */
     317             :         BlockNumber postingRoot;
     318             : 
     319             :         /*
     320             :          * Build posting-tree-only result tuple.  We do this first so as to
     321             :          * fail quickly if the key is too big.
     322             :          */
     323         102 :         res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
     324             : 
     325             :         /*
     326             :          * Initialize a new posting tree with the TIDs.
     327             :          */
     328         102 :         postingRoot = createPostingTree(ginstate->index, items, nitem,
     329             :                                         buildStats, buffer);
     330             : 
     331             :         /* And save the root link in the result tuple */
     332         102 :         GinSetPostingTree(res, postingRoot);
     333             :     }
     334             : 
     335      693872 :     return res;
     336             : }
     337             : 
     338             : /*
     339             :  * Insert one or more heap TIDs associated with the given key value.
     340             :  * This will either add a single key entry, or enlarge a pre-existing entry.
     341             :  *
     342             :  * During an index build, buildStats is non-null and the counters
     343             :  * it contains should be incremented as needed.
     344             :  */
     345             : void
     346      972140 : ginEntryInsert(GinState *ginstate,
     347             :                OffsetNumber attnum, Datum key, GinNullCategory category,
     348             :                ItemPointerData *items, uint32 nitem,
     349             :                GinStatsData *buildStats)
     350             : {
     351             :     GinBtreeData btree;
     352             :     GinBtreeEntryInsertData insertdata;
     353             :     GinBtreeStack *stack;
     354             :     IndexTuple  itup;
     355             :     Page        page;
     356             : 
     357      972140 :     insertdata.isDelete = false;
     358             : 
     359      972140 :     ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
     360      972140 :     btree.isBuild = (buildStats != NULL);
     361             : 
     362      972140 :     stack = ginFindLeafPage(&btree, false, false);
     363      972140 :     page = BufferGetPage(stack->buffer);
     364             : 
     365      972140 :     if (btree.findItem(&btree, stack))
     366             :     {
     367             :         /* found pre-existing entry */
     368      278264 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     369             : 
     370      278264 :         if (GinIsPostingTree(itup))
     371             :         {
     372             :             /* add entries to existing posting tree */
     373       49392 :             BlockNumber rootPostingTree = GinGetPostingTree(itup);
     374             : 
     375             :             /* release all stack */
     376       49392 :             LockBuffer(stack->buffer, GIN_UNLOCK);
     377       49392 :             freeGinBtreeStack(stack);
     378             : 
     379             :             /* insert into posting tree */
     380       49392 :             ginInsertItemPointers(ginstate->index, rootPostingTree,
     381             :                                   items, nitem,
     382             :                                   buildStats);
     383       49388 :             return;
     384             :         }
     385             : 
     386      228872 :         CheckForSerializableConflictIn(ginstate->index, NULL,
     387             :                                        BufferGetBlockNumber(stack->buffer));
     388             :         /* modify an existing leaf entry */
     389      228864 :         itup = addItemPointersToLeafTuple(ginstate, itup,
     390             :                                           items, nitem, buildStats, stack->buffer);
     391             : 
     392      228864 :         insertdata.isDelete = true;
     393             :     }
     394             :     else
     395             :     {
     396      693876 :         CheckForSerializableConflictIn(ginstate->index, NULL,
     397             :                                        BufferGetBlockNumber(stack->buffer));
     398             :         /* no match, so construct a new leaf entry */
     399      693872 :         itup = buildFreshLeafTuple(ginstate, attnum, key, category,
     400             :                                    items, nitem, buildStats, stack->buffer);
     401             : 
     402             :         /*
     403             :          * nEntries counts leaf tuples, so increment it only when we make a
     404             :          * new one.
     405             :          */
     406      693872 :         if (buildStats)
     407      152992 :             buildStats->nEntries++;
     408             :     }
     409             : 
     410             :     /* Insert the new or modified leaf tuple */
     411      922736 :     insertdata.entry = itup;
     412      922736 :     ginInsertValue(&btree, stack, &insertdata, buildStats);
     413      922732 :     pfree(itup);
     414             : }
     415             : 
     416             : /*
     417             :  * Extract index entries for a single indexable item, and add them to the
     418             :  * BuildAccumulator's state.
     419             :  *
     420             :  * This function is used only during initial index creation.
     421             :  */
     422             : static void
     423      945474 : ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum,
     424             :                        Datum value, bool isNull,
     425             :                        ItemPointer heapptr)
     426             : {
     427             :     Datum      *entries;
     428             :     GinNullCategory *categories;
     429             :     int32       nentries;
     430             :     MemoryContext oldCtx;
     431             : 
     432      945474 :     oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
     433      945474 :     entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
     434             :                                 value, isNull,
     435             :                                 &nentries, &categories);
     436      945474 :     MemoryContextSwitchTo(oldCtx);
     437             : 
     438      945474 :     ginInsertBAEntries(&buildstate->accum, heapptr, attnum,
     439             :                        entries, categories, nentries);
     440             : 
     441      945474 :     buildstate->indtuples += nentries;
     442             : 
     443      945474 :     MemoryContextReset(buildstate->funcCtx);
     444      945474 : }
     445             : 
     446             : static void
     447      923588 : ginBuildCallback(Relation index, ItemPointer tid, Datum *values,
     448             :                  bool *isnull, bool tupleIsAlive, void *state)
     449             : {
     450      923588 :     GinBuildState *buildstate = (GinBuildState *) state;
     451             :     MemoryContext oldCtx;
     452             :     int         i;
     453             : 
     454      923588 :     oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
     455             : 
     456     1847798 :     for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
     457      924210 :         ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
     458      924210 :                                values[i], isnull[i], tid);
     459             : 
     460             :     /* If we've maxed out our available memory, dump everything to the index */
     461      923588 :     if (buildstate->accum.allocatedMemory >= maintenance_work_mem * (Size) 1024)
     462             :     {
     463             :         ItemPointerData *list;
     464             :         Datum       key;
     465             :         GinNullCategory category;
     466             :         uint32      nlist;
     467             :         OffsetNumber attnum;
     468             : 
     469           0 :         ginBeginBAScan(&buildstate->accum);
     470           0 :         while ((list = ginGetBAEntry(&buildstate->accum,
     471           0 :                                      &attnum, &key, &category, &nlist)) != NULL)
     472             :         {
     473             :             /* there could be many entries, so be willing to abort here */
     474           0 :             CHECK_FOR_INTERRUPTS();
     475           0 :             ginEntryInsert(&buildstate->ginstate, attnum, key, category,
     476             :                            list, nlist, &buildstate->buildStats);
     477             :         }
     478             : 
     479           0 :         MemoryContextReset(buildstate->tmpCtx);
     480           0 :         ginInitBA(&buildstate->accum);
     481             :     }
     482             : 
     483      923588 :     MemoryContextSwitchTo(oldCtx);
     484      923588 : }
     485             : 
     486             : /*
     487             :  * ginFlushBuildState
     488             :  *      Write all data from BuildAccumulator into the tuplesort.
     489             :  *
     490             :  * The number of TIDs written to the tuplesort at once is limited, to reduce
     491             :  * the amount of memory needed when merging the intermediate results later.
     492             :  * The leader will see up to two chunks per worker, so calculate the limit to
     493             :  * not need more than MaxAllocSize overall.
     494             :  *
     495             :  * We don't need to worry about overflowing maintenance_work_mem. We can't
     496             :  * build chunks larger than work_mem, and that limit was set so that workers
     497             :  * produce sufficiently small chunks.
     498             :  */
     499             : static void
     500          78 : ginFlushBuildState(GinBuildState *buildstate, Relation index)
     501             : {
     502             :     ItemPointerData *list;
     503             :     Datum       key;
     504             :     GinNullCategory category;
     505             :     uint32      nlist;
     506             :     OffsetNumber attnum;
     507          78 :     TupleDesc   tdesc = RelationGetDescr(index);
     508             :     uint32      maxlen;
     509             : 
     510             :     /* maximum number of TIDs per chunk (two chunks per worker) */
     511          78 :     maxlen = MaxAllocSize / sizeof(ItemPointerData);
     512          78 :     maxlen /= (2 * buildstate->bs_num_workers);
     513             : 
     514          78 :     ginBeginBAScan(&buildstate->accum);
     515       31354 :     while ((list = ginGetBAEntry(&buildstate->accum,
     516       31354 :                                  &attnum, &key, &category, &nlist)) != NULL)
     517             :     {
     518             :         /* information about the key */
     519       31276 :         CompactAttribute *attr = TupleDescCompactAttr(tdesc, (attnum - 1));
     520             : 
     521             :         /* start of the chunk */
     522       31276 :         uint32      offset = 0;
     523             : 
     524             :         /* split the entry into smaller chunk with up to maxlen items */
     525       62552 :         while (offset < nlist)
     526             :         {
     527             :             /* GIN tuple and tuple length */
     528             :             GinTuple   *tup;
     529             :             Size        tuplen;
     530       31276 :             uint32      len = Min(maxlen, nlist - offset);
     531             : 
     532             :             /* there could be many entries, so be willing to abort here */
     533       31276 :             CHECK_FOR_INTERRUPTS();
     534             : 
     535       31276 :             tup = _gin_build_tuple(attnum, category,
     536       31276 :                                    key, attr->attlen, attr->attbyval,
     537       31276 :                                    &list[offset], len,
     538             :                                    &tuplen);
     539             : 
     540       31276 :             offset += len;
     541             : 
     542       31276 :             tuplesort_putgintuple(buildstate->bs_worker_sort, tup, tuplen);
     543             : 
     544       31276 :             pfree(tup);
     545             :         }
     546             :     }
     547             : 
     548          78 :     MemoryContextReset(buildstate->tmpCtx);
     549          78 :     ginInitBA(&buildstate->accum);
     550          78 : }
     551             : 
     552             : /*
     553             :  * ginBuildCallbackParallel
     554             :  *      Callback for the parallel index build.
     555             :  *
     556             :  * This is similar to the serial build callback ginBuildCallback, but
     557             :  * instead of writing the accumulated entries into the index, each worker
     558             :  * writes them into a (local) tuplesort.
     559             :  *
     560             :  * The worker then sorts and combines these entries, before writing them
     561             :  * into a shared tuplesort for the leader (see _gin_parallel_scan_and_build
     562             :  * for the whole process).
     563             :  */
     564             : static void
     565       21264 : ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values,
     566             :                          bool *isnull, bool tupleIsAlive, void *state)
     567             : {
     568       21264 :     GinBuildState *buildstate = (GinBuildState *) state;
     569             :     MemoryContext oldCtx;
     570             :     int         i;
     571             : 
     572       21264 :     oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
     573             : 
     574             :     /*
     575             :      * if scan wrapped around - flush accumulated entries and start anew
     576             :      *
     577             :      * With parallel scans, we don't have a guarantee the scan does not start
     578             :      * half-way through the relation (serial builds disable sync scans and
     579             :      * always start from block 0, parallel scans require allow_sync=true).
     580             :      *
     581             :      * Building the posting lists assumes the TIDs are monotonic and never go
     582             :      * back, and the wrap around would break that. We handle that by detecting
     583             :      * the wraparound, and flushing all entries. This means we'll later see
     584             :      * two separate entries with non-overlapping TID lists (which can be
     585             :      * combined by merge sort).
     586             :      *
     587             :      * To detect a wraparound, we remember the last TID seen by each worker
     588             :      * (for any key). If the next TID seen by the worker is lower, the scan
     589             :      * must have wrapped around.
     590             :      */
     591       21264 :     if (ItemPointerCompare(tid, &buildstate->tid) < 0)
     592           0 :         ginFlushBuildState(buildstate, index);
     593             : 
     594             :     /* remember the TID we're about to process */
     595       21264 :     buildstate->tid = *tid;
     596             : 
     597       42528 :     for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
     598       21264 :         ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
     599       21264 :                                values[i], isnull[i], tid);
     600             : 
     601             :     /*
     602             :      * If we've maxed out our available memory, dump everything to the
     603             :      * tuplesort. We use half the per-worker fraction of maintenance_work_mem,
     604             :      * the other half is used for the tuplesort.
     605             :      */
     606       21264 :     if (buildstate->accum.allocatedMemory >= buildstate->work_mem * (Size) 1024)
     607           0 :         ginFlushBuildState(buildstate, index);
     608             : 
     609       21264 :     MemoryContextSwitchTo(oldCtx);
     610       21264 : }
     611             : 
     612             : IndexBuildResult *
     613         418 : ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     614             : {
     615             :     IndexBuildResult *result;
     616             :     double      reltuples;
     617             :     GinBuildState buildstate;
     618         418 :     GinBuildState *state = &buildstate;
     619             :     Buffer      RootBuffer,
     620             :                 MetaBuffer;
     621             :     ItemPointerData *list;
     622             :     Datum       key;
     623             :     GinNullCategory category;
     624             :     uint32      nlist;
     625             :     MemoryContext oldCtx;
     626             :     OffsetNumber attnum;
     627             : 
     628         418 :     if (RelationGetNumberOfBlocks(index) != 0)
     629           0 :         elog(ERROR, "index \"%s\" already contains data",
     630             :              RelationGetRelationName(index));
     631             : 
     632         418 :     initGinState(&buildstate.ginstate, index);
     633         418 :     buildstate.indtuples = 0;
     634         418 :     memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
     635             : 
     636             :     /* Initialize fields for parallel build too. */
     637         418 :     buildstate.bs_numtuples = 0;
     638         418 :     buildstate.bs_reltuples = 0;
     639         418 :     buildstate.bs_leader = NULL;
     640         418 :     memset(&buildstate.tid, 0, sizeof(ItemPointerData));
     641             : 
     642             :     /* initialize the meta page */
     643         418 :     MetaBuffer = GinNewBuffer(index);
     644             : 
     645             :     /* initialize the root page */
     646         418 :     RootBuffer = GinNewBuffer(index);
     647             : 
     648         418 :     START_CRIT_SECTION();
     649         418 :     GinInitMetabuffer(MetaBuffer);
     650         418 :     MarkBufferDirty(MetaBuffer);
     651         418 :     GinInitBuffer(RootBuffer, GIN_LEAF);
     652         418 :     MarkBufferDirty(RootBuffer);
     653             : 
     654             : 
     655         418 :     UnlockReleaseBuffer(MetaBuffer);
     656         418 :     UnlockReleaseBuffer(RootBuffer);
     657         418 :     END_CRIT_SECTION();
     658             : 
     659             :     /* count the root as first entry page */
     660         418 :     buildstate.buildStats.nEntryPages++;
     661             : 
     662             :     /*
     663             :      * create a temporary memory context that is used to hold data not yet
     664             :      * dumped out to the index
     665             :      */
     666         418 :     buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
     667             :                                               "Gin build temporary context",
     668             :                                               ALLOCSET_DEFAULT_SIZES);
     669             : 
     670             :     /*
     671             :      * create a temporary memory context that is used for calling
     672             :      * ginExtractEntries(), and can be reset after each tuple
     673             :      */
     674         418 :     buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
     675             :                                                "Gin build temporary context for user-defined function",
     676             :                                                ALLOCSET_DEFAULT_SIZES);
     677             : 
     678         418 :     buildstate.accum.ginstate = &buildstate.ginstate;
     679         418 :     ginInitBA(&buildstate.accum);
     680             : 
     681             :     /* Report table scan phase started */
     682         418 :     pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
     683             :                                  PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN);
     684             : 
     685             :     /*
     686             :      * Attempt to launch parallel worker scan when required
     687             :      *
     688             :      * XXX plan_create_index_workers makes the number of workers dependent on
     689             :      * maintenance_work_mem, requiring 32MB for each worker. For GIN that's
     690             :      * reasonable too, because we sort the data just like btree. It does
     691             :      * ignore the memory used to accumulate data in memory (set by work_mem),
     692             :      * but there is no way to communicate that to plan_create_index_workers.
     693             :      */
     694         418 :     if (indexInfo->ii_ParallelWorkers > 0)
     695          26 :         _gin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,
     696             :                             indexInfo->ii_ParallelWorkers);
     697             : 
     698             :     /*
     699             :      * If parallel build requested and at least one worker process was
     700             :      * successfully launched, set up coordination state, wait for workers to
     701             :      * complete. Then read all tuples from the shared tuplesort and insert
     702             :      * them into the index.
     703             :      *
     704             :      * In serial mode, simply scan the table and build the index one index
     705             :      * tuple at a time.
     706             :      */
     707         418 :     if (state->bs_leader)
     708             :     {
     709             :         SortCoordinate coordinate;
     710             : 
     711          26 :         coordinate = palloc0_object(SortCoordinateData);
     712          26 :         coordinate->isWorker = false;
     713          26 :         coordinate->nParticipants =
     714          26 :             state->bs_leader->nparticipanttuplesorts;
     715          26 :         coordinate->sharedsort = state->bs_leader->sharedsort;
     716             : 
     717             :         /*
     718             :          * Begin leader tuplesort.
     719             :          *
     720             :          * In cases where parallelism is involved, the leader receives the
     721             :          * same share of maintenance_work_mem as a serial sort (it is
     722             :          * generally treated in the same way as a serial sort once we return).
     723             :          * Parallel worker Tuplesortstates will have received only a fraction
     724             :          * of maintenance_work_mem, though.
     725             :          *
     726             :          * We rely on the lifetime of the Leader Tuplesortstate almost not
     727             :          * overlapping with any worker Tuplesortstate's lifetime.  There may
     728             :          * be some small overlap, but that's okay because we rely on leader
     729             :          * Tuplesortstate only allocating a small, fixed amount of memory
     730             :          * here. When its tuplesort_performsort() is called (by our caller),
     731             :          * and significant amounts of memory are likely to be used, all
     732             :          * workers must have already freed almost all memory held by their
     733             :          * Tuplesortstates (they are about to go away completely, too).  The
     734             :          * overall effect is that maintenance_work_mem always represents an
     735             :          * absolute high watermark on the amount of memory used by a CREATE
     736             :          * INDEX operation, regardless of the use of parallelism or any other
     737             :          * factor.
     738             :          */
     739          26 :         state->bs_sortstate =
     740          26 :             tuplesort_begin_index_gin(heap, index,
     741             :                                       maintenance_work_mem, coordinate,
     742             :                                       TUPLESORT_NONE);
     743             : 
     744             :         /* scan the relation in parallel and merge per-worker results */
     745          26 :         reltuples = _gin_parallel_merge(state);
     746             : 
     747          26 :         _gin_end_parallel(state->bs_leader, state);
     748             :     }
     749             :     else                        /* no parallel index build */
     750             :     {
     751             :         /*
     752             :          * Do the heap scan.  We disallow sync scan here because
     753             :          * dataPlaceToPage prefers to receive tuples in TID order.
     754             :          */
     755         392 :         reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
     756             :                                            ginBuildCallback, &buildstate, NULL);
     757             : 
     758             :         /* dump remaining entries to the index */
     759         392 :         oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
     760         392 :         ginBeginBAScan(&buildstate.accum);
     761      130734 :         while ((list = ginGetBAEntry(&buildstate.accum,
     762      130734 :                                      &attnum, &key, &category, &nlist)) != NULL)
     763             :         {
     764             :             /* there could be many entries, so be willing to abort here */
     765      130342 :             CHECK_FOR_INTERRUPTS();
     766      130342 :             ginEntryInsert(&buildstate.ginstate, attnum, key, category,
     767             :                            list, nlist, &buildstate.buildStats);
     768             :         }
     769         392 :         MemoryContextSwitchTo(oldCtx);
     770             :     }
     771             : 
     772         418 :     MemoryContextDelete(buildstate.funcCtx);
     773         418 :     MemoryContextDelete(buildstate.tmpCtx);
     774             : 
     775             :     /*
     776             :      * Update metapage stats
     777             :      */
     778         418 :     buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
     779         418 :     ginUpdateStats(index, &buildstate.buildStats, true);
     780             : 
     781             :     /*
     782             :      * We didn't write WAL records as we built the index, so if WAL-logging is
     783             :      * required, write all pages to the WAL now.
     784             :      */
     785         418 :     if (RelationNeedsWAL(index))
     786             :     {
     787         300 :         log_newpage_range(index, MAIN_FORKNUM,
     788             :                           0, RelationGetNumberOfBlocks(index),
     789             :                           true);
     790             :     }
     791             : 
     792             :     /*
     793             :      * Return statistics
     794             :      */
     795         418 :     result = palloc_object(IndexBuildResult);
     796             : 
     797         418 :     result->heap_tuples = reltuples;
     798         418 :     result->index_tuples = buildstate.indtuples;
     799             : 
     800         418 :     return result;
     801             : }
     802             : 
     803             : /*
     804             :  *  ginbuildempty() -- build an empty gin index in the initialization fork
     805             :  */
     806             : void
     807           6 : ginbuildempty(Relation index)
     808             : {
     809             :     Buffer      RootBuffer,
     810             :                 MetaBuffer;
     811             : 
     812             :     /* An empty GIN index has two pages. */
     813           6 :     MetaBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
     814             :                                    EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
     815           6 :     RootBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
     816             :                                    EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
     817             : 
     818             :     /* Initialize and xlog metabuffer and root buffer. */
     819           6 :     START_CRIT_SECTION();
     820           6 :     GinInitMetabuffer(MetaBuffer);
     821           6 :     MarkBufferDirty(MetaBuffer);
     822           6 :     log_newpage_buffer(MetaBuffer, true);
     823           6 :     GinInitBuffer(RootBuffer, GIN_LEAF);
     824           6 :     MarkBufferDirty(RootBuffer);
     825           6 :     log_newpage_buffer(RootBuffer, false);
     826           6 :     END_CRIT_SECTION();
     827             : 
     828             :     /* Unlock and release the buffers. */
     829           6 :     UnlockReleaseBuffer(MetaBuffer);
     830           6 :     UnlockReleaseBuffer(RootBuffer);
     831           6 : }
     832             : 
     833             : /*
     834             :  * Insert index entries for a single indexable item during "normal"
     835             :  * (non-fast-update) insertion
     836             :  */
     837             : static void
     838       54096 : ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
     839             :                    Datum value, bool isNull,
     840             :                    ItemPointer item)
     841             : {
     842             :     Datum      *entries;
     843             :     GinNullCategory *categories;
     844             :     int32       i,
     845             :                 nentries;
     846             : 
     847       54096 :     entries = ginExtractEntries(ginstate, attnum, value, isNull,
     848             :                                 &nentries, &categories);
     849             : 
     850      506998 :     for (i = 0; i < nentries; i++)
     851      452922 :         ginEntryInsert(ginstate, attnum, entries[i], categories[i],
     852             :                        item, 1, NULL);
     853       54076 : }
     854             : 
     855             : bool
     856      319994 : gininsert(Relation index, Datum *values, bool *isnull,
     857             :           ItemPointer ht_ctid, Relation heapRel,
     858             :           IndexUniqueCheck checkUnique,
     859             :           bool indexUnchanged,
     860             :           IndexInfo *indexInfo)
     861             : {
     862      319994 :     GinState   *ginstate = (GinState *) indexInfo->ii_AmCache;
     863             :     MemoryContext oldCtx;
     864             :     MemoryContext insertCtx;
     865             :     int         i;
     866             : 
     867             :     /* Initialize GinState cache if first call in this statement */
     868      319994 :     if (ginstate == NULL)
     869             :     {
     870        2906 :         oldCtx = MemoryContextSwitchTo(indexInfo->ii_Context);
     871        2906 :         ginstate = palloc_object(GinState);
     872        2906 :         initGinState(ginstate, index);
     873        2906 :         indexInfo->ii_AmCache = ginstate;
     874        2906 :         MemoryContextSwitchTo(oldCtx);
     875             :     }
     876             : 
     877      319994 :     insertCtx = AllocSetContextCreate(CurrentMemoryContext,
     878             :                                       "Gin insert temporary context",
     879             :                                       ALLOCSET_DEFAULT_SIZES);
     880             : 
     881      319994 :     oldCtx = MemoryContextSwitchTo(insertCtx);
     882             : 
     883      319994 :     if (GinGetUseFastUpdate(index))
     884      265892 :     {
     885             :         GinTupleCollector collector;
     886             : 
     887      265898 :         memset(&collector, 0, sizeof(GinTupleCollector));
     888             : 
     889      651874 :         for (i = 0; i < ginstate->origTupdesc->natts; i++)
     890      385976 :             ginHeapTupleFastCollect(ginstate, &collector,
     891      385976 :                                     (OffsetNumber) (i + 1),
     892      385976 :                                     values[i], isnull[i],
     893             :                                     ht_ctid);
     894             : 
     895      265898 :         ginHeapTupleFastInsert(ginstate, &collector);
     896             :     }
     897             :     else
     898             :     {
     899      108172 :         for (i = 0; i < ginstate->origTupdesc->natts; i++)
     900       54096 :             ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
     901       54096 :                                values[i], isnull[i],
     902             :                                ht_ctid);
     903             :     }
     904             : 
     905      319968 :     MemoryContextSwitchTo(oldCtx);
     906      319968 :     MemoryContextDelete(insertCtx);
     907             : 
     908      319968 :     return false;
     909             : }
     910             : 
     911             : /*
     912             :  * Create parallel context, and launch workers for leader.
     913             :  *
     914             :  * buildstate argument should be initialized (with the exception of the
     915             :  * tuplesort states, which may later be created based on shared
     916             :  * state initially set up here).
     917             :  *
     918             :  * isconcurrent indicates if operation is CREATE INDEX CONCURRENTLY.
     919             :  *
     920             :  * request is the target number of parallel worker processes to launch.
     921             :  *
     922             :  * Sets buildstate's GinLeader, which caller must use to shut down parallel
     923             :  * mode by passing it to _gin_end_parallel() at the very end of its index
     924             :  * build.  If not even a single worker process can be launched, this is
     925             :  * never set, and caller should proceed with a serial index build.
     926             :  */
     927             : static void
     928          26 : _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
     929             :                     bool isconcurrent, int request)
     930             : {
     931             :     ParallelContext *pcxt;
     932             :     int         scantuplesortstates;
     933             :     Snapshot    snapshot;
     934             :     Size        estginshared;
     935             :     Size        estsort;
     936             :     GinBuildShared *ginshared;
     937             :     Sharedsort *sharedsort;
     938          26 :     GinLeader  *ginleader = palloc0_object(GinLeader);
     939             :     WalUsage   *walusage;
     940             :     BufferUsage *bufferusage;
     941          26 :     bool        leaderparticipates = true;
     942             :     int         querylen;
     943             : 
     944             : #ifdef DISABLE_LEADER_PARTICIPATION
     945             :     leaderparticipates = false;
     946             : #endif
     947             : 
     948             :     /*
     949             :      * Enter parallel mode, and create context for parallel build of gin index
     950             :      */
     951          26 :     EnterParallelMode();
     952             :     Assert(request > 0);
     953          26 :     pcxt = CreateParallelContext("postgres", "_gin_parallel_build_main",
     954             :                                  request);
     955             : 
     956          26 :     scantuplesortstates = leaderparticipates ? request + 1 : request;
     957             : 
     958             :     /*
     959             :      * Prepare for scan of the base relation.  In a normal index build, we use
     960             :      * SnapshotAny because we must retrieve all tuples and do our own time
     961             :      * qual checks (because we have to index RECENTLY_DEAD tuples).  In a
     962             :      * concurrent build, we take a regular MVCC snapshot and index whatever's
     963             :      * live according to that.
     964             :      */
     965          26 :     if (!isconcurrent)
     966          20 :         snapshot = SnapshotAny;
     967             :     else
     968           6 :         snapshot = RegisterSnapshot(GetTransactionSnapshot());
     969             : 
     970             :     /*
     971             :      * Estimate size for our own PARALLEL_KEY_GIN_SHARED workspace.
     972             :      */
     973          26 :     estginshared = _gin_parallel_estimate_shared(heap, snapshot);
     974          26 :     shm_toc_estimate_chunk(&pcxt->estimator, estginshared);
     975          26 :     estsort = tuplesort_estimate_shared(scantuplesortstates);
     976          26 :     shm_toc_estimate_chunk(&pcxt->estimator, estsort);
     977             : 
     978          26 :     shm_toc_estimate_keys(&pcxt->estimator, 2);
     979             : 
     980             :     /*
     981             :      * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
     982             :      * and PARALLEL_KEY_BUFFER_USAGE.
     983             :      *
     984             :      * If there are no extensions loaded that care, we could skip this.  We
     985             :      * have no way of knowing whether anyone's looking at pgWalUsage or
     986             :      * pgBufferUsage, so do it unconditionally.
     987             :      */
     988          26 :     shm_toc_estimate_chunk(&pcxt->estimator,
     989             :                            mul_size(sizeof(WalUsage), pcxt->nworkers));
     990          26 :     shm_toc_estimate_keys(&pcxt->estimator, 1);
     991          26 :     shm_toc_estimate_chunk(&pcxt->estimator,
     992             :                            mul_size(sizeof(BufferUsage), pcxt->nworkers));
     993          26 :     shm_toc_estimate_keys(&pcxt->estimator, 1);
     994             : 
     995             :     /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
     996          26 :     if (debug_query_string)
     997             :     {
     998          26 :         querylen = strlen(debug_query_string);
     999          26 :         shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
    1000          26 :         shm_toc_estimate_keys(&pcxt->estimator, 1);
    1001             :     }
    1002             :     else
    1003           0 :         querylen = 0;           /* keep compiler quiet */
    1004             : 
    1005             :     /* Everyone's had a chance to ask for space, so now create the DSM */
    1006          26 :     InitializeParallelDSM(pcxt);
    1007             : 
    1008             :     /* If no DSM segment was available, back out (do serial build) */
    1009          26 :     if (pcxt->seg == NULL)
    1010             :     {
    1011           0 :         if (IsMVCCSnapshot(snapshot))
    1012           0 :             UnregisterSnapshot(snapshot);
    1013           0 :         DestroyParallelContext(pcxt);
    1014           0 :         ExitParallelMode();
    1015           0 :         return;
    1016             :     }
    1017             : 
    1018             :     /* Store shared build state, for which we reserved space */
    1019          26 :     ginshared = (GinBuildShared *) shm_toc_allocate(pcxt->toc, estginshared);
    1020             :     /* Initialize immutable state */
    1021          26 :     ginshared->heaprelid = RelationGetRelid(heap);
    1022          26 :     ginshared->indexrelid = RelationGetRelid(index);
    1023          26 :     ginshared->isconcurrent = isconcurrent;
    1024          26 :     ginshared->scantuplesortstates = scantuplesortstates;
    1025             : 
    1026          26 :     ConditionVariableInit(&ginshared->workersdonecv);
    1027          26 :     SpinLockInit(&ginshared->mutex);
    1028             : 
    1029             :     /* Initialize mutable state */
    1030          26 :     ginshared->nparticipantsdone = 0;
    1031          26 :     ginshared->reltuples = 0.0;
    1032          26 :     ginshared->indtuples = 0.0;
    1033             : 
    1034          26 :     table_parallelscan_initialize(heap,
    1035             :                                   ParallelTableScanFromGinBuildShared(ginshared),
    1036             :                                   snapshot);
    1037             : 
    1038             :     /*
    1039             :      * Store shared tuplesort-private state, for which we reserved space.
    1040             :      * Then, initialize opaque state using tuplesort routine.
    1041             :      */
    1042          26 :     sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
    1043          26 :     tuplesort_initialize_shared(sharedsort, scantuplesortstates,
    1044             :                                 pcxt->seg);
    1045             : 
    1046          26 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_GIN_SHARED, ginshared);
    1047          26 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
    1048             : 
    1049             :     /* Store query string for workers */
    1050          26 :     if (debug_query_string)
    1051             :     {
    1052             :         char       *sharedquery;
    1053             : 
    1054          26 :         sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
    1055          26 :         memcpy(sharedquery, debug_query_string, querylen + 1);
    1056          26 :         shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
    1057             :     }
    1058             : 
    1059             :     /*
    1060             :      * Allocate space for each worker's WalUsage and BufferUsage; no need to
    1061             :      * initialize.
    1062             :      */
    1063          26 :     walusage = shm_toc_allocate(pcxt->toc,
    1064          26 :                                 mul_size(sizeof(WalUsage), pcxt->nworkers));
    1065          26 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
    1066          26 :     bufferusage = shm_toc_allocate(pcxt->toc,
    1067          26 :                                    mul_size(sizeof(BufferUsage), pcxt->nworkers));
    1068          26 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
    1069             : 
    1070             :     /* Launch workers, saving status for leader/caller */
    1071          26 :     LaunchParallelWorkers(pcxt);
    1072          26 :     ginleader->pcxt = pcxt;
    1073          26 :     ginleader->nparticipanttuplesorts = pcxt->nworkers_launched;
    1074          26 :     if (leaderparticipates)
    1075          26 :         ginleader->nparticipanttuplesorts++;
    1076          26 :     ginleader->ginshared = ginshared;
    1077          26 :     ginleader->sharedsort = sharedsort;
    1078          26 :     ginleader->snapshot = snapshot;
    1079          26 :     ginleader->walusage = walusage;
    1080          26 :     ginleader->bufferusage = bufferusage;
    1081             : 
    1082             :     /* If no workers were successfully launched, back out (do serial build) */
    1083          26 :     if (pcxt->nworkers_launched == 0)
    1084             :     {
    1085           0 :         _gin_end_parallel(ginleader, NULL);
    1086           0 :         return;
    1087             :     }
    1088             : 
    1089             :     /* Save leader state now that it's clear build will be parallel */
    1090          26 :     buildstate->bs_leader = ginleader;
    1091             : 
    1092             :     /* Join heap scan ourselves */
    1093          26 :     if (leaderparticipates)
    1094          26 :         _gin_leader_participate_as_worker(buildstate, heap, index);
    1095             : 
    1096             :     /*
    1097             :      * Caller needs to wait for all launched workers when we return.  Make
    1098             :      * sure that the failure-to-start case will not hang forever.
    1099             :      */
    1100          26 :     WaitForParallelWorkersToAttach(pcxt);
    1101             : }
    1102             : 
    1103             : /*
    1104             :  * Shut down workers, destroy parallel context, and end parallel mode.
    1105             :  */
    1106             : static void
    1107          26 : _gin_end_parallel(GinLeader *ginleader, GinBuildState *state)
    1108             : {
    1109             :     int         i;
    1110             : 
    1111             :     /* Shutdown worker processes */
    1112          26 :     WaitForParallelWorkersToFinish(ginleader->pcxt);
    1113             : 
    1114             :     /*
    1115             :      * Next, accumulate WAL usage.  (This must wait for the workers to finish,
    1116             :      * or we might get incomplete data.)
    1117             :      */
    1118          78 :     for (i = 0; i < ginleader->pcxt->nworkers_launched; i++)
    1119          52 :         InstrAccumParallelQuery(&ginleader->bufferusage[i], &ginleader->walusage[i]);
    1120             : 
    1121             :     /* Free last reference to MVCC snapshot, if one was used */
    1122          26 :     if (IsMVCCSnapshot(ginleader->snapshot))
    1123           6 :         UnregisterSnapshot(ginleader->snapshot);
    1124          26 :     DestroyParallelContext(ginleader->pcxt);
    1125          26 :     ExitParallelMode();
    1126          26 : }
    1127             : 
    1128             : /*
    1129             :  * Within leader, wait for end of heap scan.
    1130             :  *
    1131             :  * When called, parallel heap scan started by _gin_begin_parallel() will
    1132             :  * already be underway within worker processes (when leader participates
    1133             :  * as a worker, we should end up here just as workers are finishing).
    1134             :  *
    1135             :  * Returns the total number of heap tuples scanned.
    1136             :  */
    1137             : static double
    1138          26 : _gin_parallel_heapscan(GinBuildState *state)
    1139             : {
    1140          26 :     GinBuildShared *ginshared = state->bs_leader->ginshared;
    1141             :     int         nparticipanttuplesorts;
    1142             : 
    1143          26 :     nparticipanttuplesorts = state->bs_leader->nparticipanttuplesorts;
    1144             :     for (;;)
    1145             :     {
    1146          72 :         SpinLockAcquire(&ginshared->mutex);
    1147          72 :         if (ginshared->nparticipantsdone == nparticipanttuplesorts)
    1148             :         {
    1149             :             /* copy the data into leader state */
    1150          26 :             state->bs_reltuples = ginshared->reltuples;
    1151          26 :             state->bs_numtuples = ginshared->indtuples;
    1152             : 
    1153          26 :             SpinLockRelease(&ginshared->mutex);
    1154          26 :             break;
    1155             :         }
    1156          46 :         SpinLockRelease(&ginshared->mutex);
    1157             : 
    1158          46 :         ConditionVariableSleep(&ginshared->workersdonecv,
    1159             :                                WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN);
    1160             :     }
    1161             : 
    1162          26 :     ConditionVariableCancelSleep();
    1163             : 
    1164          26 :     return state->bs_reltuples;
    1165             : }
    1166             : 
    1167             : /*
    1168             :  * Buffer used to accumulate TIDs from multiple GinTuples for the same key
    1169             :  * (we read these from the tuplesort, sorted by the key).
    1170             :  *
    1171             :  * This is similar to BuildAccumulator in that it's used to collect TIDs
    1172             :  * in memory before inserting them into the index, but it's much simpler
    1173             :  * as it only deals with a single index key at a time.
    1174             :  *
    1175             :  * When adding TIDs to the buffer, we make sure to keep them sorted, both
    1176             :  * during the initial table scan (and detecting when the scan wraps around),
    1177             :  * and during merging (where we do mergesort).
    1178             :  */
    1179             : typedef struct GinBuffer
    1180             : {
    1181             :     OffsetNumber attnum;
    1182             :     GinNullCategory category;
    1183             :     Datum       key;            /* 0 if no key (and keylen == 0) */
    1184             :     Size        keylen;         /* number of bytes (not typlen) */
    1185             : 
    1186             :     /* type info */
    1187             :     int16       typlen;
    1188             :     bool        typbyval;
    1189             : 
    1190             :     /* Number of TIDs to collect before attempt to write some out. */
    1191             :     int         maxitems;
    1192             : 
    1193             :     /* array of TID values */
    1194             :     int         nitems;
    1195             :     int         nfrozen;
    1196             :     SortSupport ssup;           /* for sorting/comparing keys */
    1197             :     ItemPointerData *items;
    1198             : } GinBuffer;
    1199             : 
    1200             : /*
    1201             :  * Check that TID array contains valid values, and that it's sorted (if we
    1202             :  * expect it to be).
    1203             :  */
    1204             : static void
    1205      116478 : AssertCheckItemPointers(GinBuffer *buffer)
    1206             : {
    1207             : #ifdef USE_ASSERT_CHECKING
    1208             :     /* we should not have a buffer with no TIDs to sort */
    1209             :     Assert(buffer->items != NULL);
    1210             :     Assert(buffer->nitems > 0);
    1211             : 
    1212             :     for (int i = 0; i < buffer->nitems; i++)
    1213             :     {
    1214             :         Assert(ItemPointerIsValid(&buffer->items[i]));
    1215             : 
    1216             :         /* don't check ordering for the first TID item */
    1217             :         if (i == 0)
    1218             :             continue;
    1219             : 
    1220             :         Assert(ItemPointerCompare(&buffer->items[i - 1], &buffer->items[i]) < 0);
    1221             :     }
    1222             : #endif
    1223      116478 : }
    1224             : 
    1225             : /*
    1226             :  * GinBuffer checks
    1227             :  *
    1228             :  * Make sure the nitems/items fields are consistent (either the array is empty
    1229             :  * or not empty, the fields need to agree). If there are items, check ordering.
    1230             :  */
    1231             : static void
    1232      125042 : AssertCheckGinBuffer(GinBuffer *buffer)
    1233             : {
    1234             : #ifdef USE_ASSERT_CHECKING
    1235             :     /* if we have any items, the array must exist */
    1236             :     Assert(!((buffer->nitems > 0) && (buffer->items == NULL)));
    1237             : 
    1238             :     /*
    1239             :      * The buffer may be empty, in which case we must not call the check of
    1240             :      * item pointers, because that assumes non-emptiness.
    1241             :      */
    1242             :     if (buffer->nitems == 0)
    1243             :         return;
    1244             : 
    1245             :     /* Make sure the item pointers are valid and sorted. */
    1246             :     AssertCheckItemPointers(buffer);
    1247             : #endif
    1248      125042 : }
    1249             : 
    1250             : /*
    1251             :  * GinBufferInit
    1252             :  *      Initialize buffer to store tuples for a GIN index.
    1253             :  *
    1254             :  * Initialize the buffer used to accumulate TID for a single key at a time
    1255             :  * (we process the data sorted), so we know when we received all data for
    1256             :  * a given key.
    1257             :  *
    1258             :  * Initializes sort support procedures for all index attributes.
    1259             :  */
    1260             : static GinBuffer *
    1261         104 : GinBufferInit(Relation index)
    1262             : {
    1263         104 :     GinBuffer  *buffer = palloc0_object(GinBuffer);
    1264             :     int         i,
    1265             :                 nKeys;
    1266         104 :     TupleDesc   desc = RelationGetDescr(index);
    1267             : 
    1268             :     /*
    1269             :      * How many items can we fit into the memory limit? We don't want to end
    1270             :      * with too many TIDs. and 64kB seems more than enough. But maybe this
    1271             :      * should be tied to maintenance_work_mem or something like that?
    1272             :      */
    1273         104 :     buffer->maxitems = (64 * 1024L) / sizeof(ItemPointerData);
    1274             : 
    1275         104 :     nKeys = IndexRelationGetNumberOfKeyAttributes(index);
    1276             : 
    1277         104 :     buffer->ssup = palloc0_array(SortSupportData, nKeys);
    1278             : 
    1279             :     /*
    1280             :      * Lookup ordering operator for the index key data type, and initialize
    1281             :      * the sort support function.
    1282             :      */
    1283         208 :     for (i = 0; i < nKeys; i++)
    1284             :     {
    1285             :         Oid         cmpFunc;
    1286         104 :         SortSupport sortKey = &buffer->ssup[i];
    1287         104 :         Form_pg_attribute att = TupleDescAttr(desc, i);
    1288             : 
    1289         104 :         sortKey->ssup_cxt = CurrentMemoryContext;
    1290         104 :         sortKey->ssup_collation = index->rd_indcollation[i];
    1291             : 
    1292         104 :         if (!OidIsValid(sortKey->ssup_collation))
    1293         104 :             sortKey->ssup_collation = DEFAULT_COLLATION_OID;
    1294             : 
    1295         104 :         sortKey->ssup_nulls_first = false;
    1296         104 :         sortKey->ssup_attno = i + 1;
    1297         104 :         sortKey->abbreviate = false;
    1298             : 
    1299             :         Assert(sortKey->ssup_attno != 0);
    1300             : 
    1301             :         /*
    1302             :          * If the compare proc isn't specified in the opclass definition, look
    1303             :          * up the index key type's default btree comparator.
    1304             :          */
    1305         104 :         cmpFunc = index_getprocid(index, i + 1, GIN_COMPARE_PROC);
    1306         104 :         if (cmpFunc == InvalidOid)
    1307             :         {
    1308             :             TypeCacheEntry *typentry;
    1309             : 
    1310           0 :             typentry = lookup_type_cache(att->atttypid,
    1311             :                                          TYPECACHE_CMP_PROC_FINFO);
    1312           0 :             if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
    1313           0 :                 ereport(ERROR,
    1314             :                         (errcode(ERRCODE_UNDEFINED_FUNCTION),
    1315             :                          errmsg("could not identify a comparison function for type %s",
    1316             :                                 format_type_be(att->atttypid))));
    1317             : 
    1318           0 :             cmpFunc = typentry->cmp_proc_finfo.fn_oid;
    1319             :         }
    1320             : 
    1321         104 :         PrepareSortSupportComparisonShim(cmpFunc, sortKey);
    1322             :     }
    1323             : 
    1324         104 :     return buffer;
    1325             : }
    1326             : 
    1327             : /* Is the buffer empty, i.e. has no TID values in the array? */
    1328             : static bool
    1329      125312 : GinBufferIsEmpty(GinBuffer *buffer)
    1330             : {
    1331      125312 :     return (buffer->nitems == 0);
    1332             : }
    1333             : 
    1334             : /*
    1335             :  * GinBufferKeyEquals
    1336             :  *      Can the buffer store TIDs for the provided GIN tuple (same key)?
    1337             :  *
    1338             :  * Compare if the tuple matches the already accumulated data in the GIN
    1339             :  * buffer. Compare scalar fields first, before the actual key.
    1340             :  *
    1341             :  * Returns true if the key matches, and the TID belongs to the buffer, or
    1342             :  * false if the key does not match.
    1343             :  */
    1344             : static bool
    1345       62490 : GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup)
    1346             : {
    1347             :     int         r;
    1348             :     Datum       tupkey;
    1349             : 
    1350       62490 :     AssertCheckGinBuffer(buffer);
    1351             : 
    1352       62490 :     if (tup->attrnum != buffer->attnum)
    1353           0 :         return false;
    1354             : 
    1355             :     /* same attribute should have the same type info */
    1356             :     Assert(tup->typbyval == buffer->typbyval);
    1357             :     Assert(tup->typlen == buffer->typlen);
    1358             : 
    1359       62490 :     if (tup->category != buffer->category)
    1360          54 :         return false;
    1361             : 
    1362             :     /*
    1363             :      * For NULL/empty keys, this means equality, for normal keys we need to
    1364             :      * compare the actual key value.
    1365             :      */
    1366       62436 :     if (buffer->category != GIN_CAT_NORM_KEY)
    1367           6 :         return true;
    1368             : 
    1369             :     /*
    1370             :      * For the tuple, get either the first sizeof(Datum) bytes for byval
    1371             :      * types, or a pointer to the beginning of the data array.
    1372             :      */
    1373       62430 :     tupkey = (buffer->typbyval) ? *(Datum *) tup->data : PointerGetDatum(tup->data);
    1374             : 
    1375       62430 :     r = ApplySortComparator(buffer->key, false,
    1376             :                             tupkey, false,
    1377       62430 :                             &buffer->ssup[buffer->attnum - 1]);
    1378             : 
    1379       62430 :     return (r == 0);
    1380             : }
    1381             : 
    1382             : /*
    1383             :  * GinBufferShouldTrim
    1384             :  *      Should we trim the list of item pointers?
    1385             :  *
    1386             :  * By trimming we understand writing out and removing the tuple IDs that
    1387             :  * we know can't change by future merges. We can deduce the TID up to which
    1388             :  * this is guaranteed from the "first" TID in each GIN tuple, which provides
    1389             :  * a "horizon" (for a given key) thanks to the sort.
    1390             :  *
    1391             :  * We don't want to do this too often - compressing longer TID lists is more
    1392             :  * efficient. But we also don't want to accumulate too many TIDs, for two
    1393             :  * reasons. First, it consumes memory and we might exceed maintenance_work_mem
    1394             :  * (or whatever limit applies), even if that's unlikely because TIDs are very
    1395             :  * small so we can fit a lot of them. Second, and more importantly, long TID
    1396             :  * lists are an issue if the scan wraps around, because a key may get a very
    1397             :  * wide list (with min/max TID for that key), forcing "full" mergesorts for
    1398             :  * every list merged into it (instead of the efficient append).
    1399             :  *
    1400             :  * So we look at two things when deciding if to trim - if the resulting list
    1401             :  * (after adding TIDs from the new tuple) would be too long, and if there is
    1402             :  * enough TIDs to trim (with values less than "first" TID from the new tuple),
    1403             :  * we do the trim. By enough we mean at least 128 TIDs (mostly an arbitrary
    1404             :  * number).
    1405             :  *
    1406             :  * We try freezing TIDs at the beginning of the list first, before attempting
    1407             :  * to trim the buffer. This may allow trimming the data earlier, reducing the
    1408             :  * memory usage and excluding it from the mergesort.
    1409             :  */
    1410             : static bool
    1411       62552 : GinBufferShouldTrim(GinBuffer *buffer, GinTuple *tup)
    1412             : {
    1413             :     /*
    1414             :      * Check if the last TID in the current list is frozen. This is the case
    1415             :      * when merging non-overlapping lists, e.g. in each parallel worker.
    1416             :      */
    1417       71178 :     if ((buffer->nitems > 0) &&
    1418        8626 :         (ItemPointerCompare(&buffer->items[buffer->nitems - 1],
    1419        8626 :                             GinTupleGetFirst(tup)) == 0))
    1420           0 :         buffer->nfrozen = buffer->nitems;
    1421             : 
    1422             :     /*
    1423             :      * Now find the last TID we know to be frozen, i.e. the last TID right
    1424             :      * before the new GIN tuple.
    1425             :      *
    1426             :      * Start with the first not-yet-frozen tuple, and walk until we find the
    1427             :      * first TID that's higher. If we already know the whole list is frozen
    1428             :      * (i.e. nfrozen == nitems), this does nothing.
    1429             :      *
    1430             :      * XXX This might do a binary search for sufficiently long lists, but it
    1431             :      * does not seem worth the complexity. Overlapping lists should be rare
    1432             :      * common, TID comparisons are cheap, and we should quickly freeze most of
    1433             :      * the list.
    1434             :      */
    1435      153606 :     for (int i = buffer->nfrozen; i < buffer->nitems; i++)
    1436             :     {
    1437             :         /* Is the TID after the first TID of the new tuple? Can't freeze. */
    1438       98520 :         if (ItemPointerCompare(&buffer->items[i],
    1439       98520 :                                GinTupleGetFirst(tup)) > 0)
    1440        7466 :             break;
    1441             : 
    1442       91054 :         buffer->nfrozen++;
    1443             :     }
    1444             : 
    1445             :     /* not enough TIDs to trim (1024 is somewhat arbitrary number) */
    1446       62552 :     if (buffer->nfrozen < 1024)
    1447       62552 :         return false;
    1448             : 
    1449             :     /* no need to trim if we have not hit the memory limit yet */
    1450           0 :     if ((buffer->nitems + tup->nitems) < buffer->maxitems)
    1451           0 :         return false;
    1452             : 
    1453             :     /*
    1454             :      * OK, we have enough frozen TIDs to flush, and we have hit the memory
    1455             :      * limit, so it's time to write it out.
    1456             :      */
    1457           0 :     return true;
    1458             : }
    1459             : 
    1460             : /*
    1461             :  * GinBufferStoreTuple
    1462             :  *      Add data (especially TID list) from a GIN tuple to the buffer.
    1463             :  *
    1464             :  * The buffer is expected to be empty (in which case it's initialized), or
    1465             :  * having the same key. The TID values from the tuple are combined with the
    1466             :  * stored values using a merge sort.
    1467             :  *
    1468             :  * The tuples (for the same key) are expected to be sorted by first TID. But
    1469             :  * this does not guarantee the lists do not overlap, especially in the leader,
    1470             :  * because the workers process interleaving data. There should be no overlaps
    1471             :  * in a single worker - it could happen when the parallel scan wraps around,
    1472             :  * but we detect that and flush the data (see ginBuildCallbackParallel).
    1473             :  *
    1474             :  * By sorting the GinTuple not only by key, but also by the first TID, we make
    1475             :  * it more less likely the lists will overlap during merge. We merge them using
    1476             :  * mergesort, but it's cheaper to just append one list to the other.
    1477             :  *
    1478             :  * How often can the lists overlap? There should be no overlaps in workers,
    1479             :  * and in the leader we can see overlaps between lists built by different
    1480             :  * workers. But the workers merge the items as much as possible, so there
    1481             :  * should not be too many.
    1482             :  */
    1483             : static void
    1484       62552 : GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
    1485             : {
    1486             :     ItemPointerData *items;
    1487             :     Datum       key;
    1488             : 
    1489       62552 :     AssertCheckGinBuffer(buffer);
    1490             : 
    1491       62552 :     key = _gin_parse_tuple_key(tup);
    1492       62552 :     items = _gin_parse_tuple_items(tup);
    1493             : 
    1494             :     /* if the buffer is empty, set the fields (and copy the key) */
    1495       62552 :     if (GinBufferIsEmpty(buffer))
    1496             :     {
    1497       53926 :         buffer->category = tup->category;
    1498       53926 :         buffer->keylen = tup->keylen;
    1499       53926 :         buffer->attnum = tup->attrnum;
    1500             : 
    1501       53926 :         buffer->typlen = tup->typlen;
    1502       53926 :         buffer->typbyval = tup->typbyval;
    1503             : 
    1504       53926 :         if (tup->category == GIN_CAT_NORM_KEY)
    1505       53872 :             buffer->key = datumCopy(key, buffer->typbyval, buffer->typlen);
    1506             :         else
    1507          54 :             buffer->key = (Datum) 0;
    1508             :     }
    1509             : 
    1510             :     /* add the new TIDs into the buffer, combine using merge-sort */
    1511             :     {
    1512             :         int         nnew;
    1513             :         ItemPointer new;
    1514             : 
    1515             :         /*
    1516             :          * Resize the array - we do this first, because we'll dereference the
    1517             :          * first unfrozen TID, which would fail if the array is NULL. We'll
    1518             :          * still pass 0 as number of elements in that array though.
    1519             :          */
    1520       62552 :         if (buffer->items == NULL)
    1521          62 :             buffer->items = palloc((buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
    1522             :         else
    1523       62490 :             buffer->items = repalloc(buffer->items,
    1524       62490 :                                      (buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
    1525             : 
    1526       62552 :         new = ginMergeItemPointers(&buffer->items[buffer->nfrozen], /* first unfrozen */
    1527       62552 :                                    (buffer->nitems - buffer->nfrozen),    /* num of unfrozen */
    1528       62552 :                                    items, tup->nitems, &nnew);
    1529             : 
    1530             :         Assert(nnew == (tup->nitems + (buffer->nitems - buffer->nfrozen)));
    1531             : 
    1532       62552 :         memcpy(&buffer->items[buffer->nfrozen], new,
    1533             :                nnew * sizeof(ItemPointerData));
    1534             : 
    1535       62552 :         pfree(new);
    1536             : 
    1537       62552 :         buffer->nitems += tup->nitems;
    1538             : 
    1539       62552 :         AssertCheckItemPointers(buffer);
    1540             :     }
    1541             : 
    1542             :     /* free the decompressed TID list */
    1543       62552 :     pfree(items);
    1544       62552 : }
    1545             : 
    1546             : /*
    1547             :  * GinBufferReset
    1548             :  *      Reset the buffer into a state as if it contains no data.
    1549             :  */
    1550             : static void
    1551       53926 : GinBufferReset(GinBuffer *buffer)
    1552             : {
    1553             :     Assert(!GinBufferIsEmpty(buffer));
    1554             : 
    1555             :     /* release byref values, do nothing for by-val ones */
    1556       53926 :     if ((buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
    1557       33916 :         pfree(DatumGetPointer(buffer->key));
    1558             : 
    1559             :     /*
    1560             :      * Not required, but makes it more likely to trigger NULL dereference if
    1561             :      * using the value incorrectly, etc.
    1562             :      */
    1563       53926 :     buffer->key = (Datum) 0;
    1564             : 
    1565       53926 :     buffer->attnum = 0;
    1566       53926 :     buffer->category = 0;
    1567       53926 :     buffer->keylen = 0;
    1568       53926 :     buffer->nitems = 0;
    1569       53926 :     buffer->nfrozen = 0;
    1570             : 
    1571       53926 :     buffer->typlen = 0;
    1572       53926 :     buffer->typbyval = 0;
    1573       53926 : }
    1574             : 
    1575             : /*
    1576             :  * GinBufferTrim
    1577             :  *      Discard the "frozen" part of the TID list (which should have been
    1578             :  *      written to disk/index before this call).
    1579             :  */
    1580             : static void
    1581           0 : GinBufferTrim(GinBuffer *buffer)
    1582             : {
    1583             :     Assert((buffer->nfrozen > 0) && (buffer->nfrozen <= buffer->nitems));
    1584             : 
    1585           0 :     memmove(&buffer->items[0], &buffer->items[buffer->nfrozen],
    1586           0 :             sizeof(ItemPointerData) * (buffer->nitems - buffer->nfrozen));
    1587             : 
    1588           0 :     buffer->nitems -= buffer->nfrozen;
    1589           0 :     buffer->nfrozen = 0;
    1590           0 : }
    1591             : 
    1592             : /*
    1593             :  * GinBufferFree
    1594             :  *      Release memory associated with the GinBuffer (including TID array).
    1595             :  */
    1596             : static void
    1597         104 : GinBufferFree(GinBuffer *buffer)
    1598             : {
    1599         104 :     if (buffer->items)
    1600          62 :         pfree(buffer->items);
    1601             : 
    1602             :     /* release byref values, do nothing for by-val ones */
    1603         104 :     if (!GinBufferIsEmpty(buffer) &&
    1604           0 :         (buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
    1605           0 :         pfree(DatumGetPointer(buffer->key));
    1606             : 
    1607         104 :     pfree(buffer);
    1608         104 : }
    1609             : 
    1610             : /*
    1611             :  * GinBufferCanAddKey
    1612             :  *      Check if a given GIN tuple can be added to the current buffer.
    1613             :  *
    1614             :  * Returns true if the buffer is either empty or for the same index key.
    1615             :  */
    1616             : static bool
    1617       62552 : GinBufferCanAddKey(GinBuffer *buffer, GinTuple *tup)
    1618             : {
    1619             :     /* empty buffer can accept data for any key */
    1620       62552 :     if (GinBufferIsEmpty(buffer))
    1621          62 :         return true;
    1622             : 
    1623             :     /* otherwise just data for the same key */
    1624       62490 :     return GinBufferKeyEquals(buffer, tup);
    1625             : }
    1626             : 
    1627             : /*
    1628             :  * Within leader, wait for end of heap scan and merge per-worker results.
    1629             :  *
    1630             :  * After waiting for all workers to finish, merge the per-worker results into
    1631             :  * the complete index. The results from each worker are sorted by block number
    1632             :  * (start of the page range). While combining the per-worker results we merge
    1633             :  * summaries for the same page range, and also fill-in empty summaries for
    1634             :  * ranges without any tuples.
    1635             :  *
    1636             :  * Returns the total number of heap tuples scanned.
    1637             :  */
    1638             : static double
    1639          26 : _gin_parallel_merge(GinBuildState *state)
    1640             : {
    1641             :     GinTuple   *tup;
    1642             :     Size        tuplen;
    1643          26 :     double      reltuples = 0;
    1644             :     GinBuffer  *buffer;
    1645             : 
    1646             :     /* GIN tuples from workers, merged by leader */
    1647          26 :     double      numtuples = 0;
    1648             : 
    1649             :     /* wait for workers to scan table and produce partial results */
    1650          26 :     reltuples = _gin_parallel_heapscan(state);
    1651             : 
    1652             :     /* Execute the sort */
    1653          26 :     pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
    1654             :                                  PROGRESS_GIN_PHASE_PERFORMSORT_2);
    1655             : 
    1656             :     /* do the actual sort in the leader */
    1657          26 :     tuplesort_performsort(state->bs_sortstate);
    1658             : 
    1659             :     /*
    1660             :      * Initialize buffer to combine entries for the same key.
    1661             :      *
    1662             :      * The leader is allowed to use the whole maintenance_work_mem buffer to
    1663             :      * combine data. The parallel workers already completed.
    1664             :      */
    1665          26 :     buffer = GinBufferInit(state->ginstate.index);
    1666             : 
    1667             :     /*
    1668             :      * Set the progress target for the next phase.  Reset the block number
    1669             :      * values set by table_index_build_scan
    1670             :      */
    1671             :     {
    1672          26 :         const int   progress_index[] = {
    1673             :             PROGRESS_CREATEIDX_SUBPHASE,
    1674             :             PROGRESS_CREATEIDX_TUPLES_TOTAL,
    1675             :             PROGRESS_SCAN_BLOCKS_TOTAL,
    1676             :             PROGRESS_SCAN_BLOCKS_DONE
    1677             :         };
    1678          26 :         const int64 progress_vals[] = {
    1679             :             PROGRESS_GIN_PHASE_MERGE_2,
    1680          26 :             state->bs_numtuples,
    1681             :             0, 0
    1682             :         };
    1683             : 
    1684          26 :         pgstat_progress_update_multi_param(4, progress_index, progress_vals);
    1685             :     }
    1686             : 
    1687             :     /*
    1688             :      * Read the GIN tuples from the shared tuplesort, sorted by category and
    1689             :      * key. That probably gives us order matching how data is organized in the
    1690             :      * index.
    1691             :      *
    1692             :      * We don't insert the GIN tuples right away, but instead accumulate as
    1693             :      * many TIDs for the same key as possible, and then insert that at once.
    1694             :      * This way we don't need to decompress/recompress the posting lists, etc.
    1695             :      */
    1696       31302 :     while ((tup = tuplesort_getgintuple(state->bs_sortstate, &tuplen, true)) != NULL)
    1697             :     {
    1698             :         MemoryContext oldCtx;
    1699             : 
    1700       31276 :         CHECK_FOR_INTERRUPTS();
    1701             : 
    1702             :         /*
    1703             :          * If the buffer can accept the new GIN tuple, just store it there and
    1704             :          * we're done. If it's a different key (or maybe too much data) flush
    1705             :          * the current contents into the index first.
    1706             :          */
    1707       31276 :         if (!GinBufferCanAddKey(buffer, tup))
    1708             :         {
    1709             :             /*
    1710             :              * Buffer is not empty and it's storing a different key - flush
    1711             :              * the data into the insert, and start a new entry for current
    1712             :              * GinTuple.
    1713             :              */
    1714       22626 :             AssertCheckItemPointers(buffer);
    1715             : 
    1716       22626 :             oldCtx = MemoryContextSwitchTo(state->tmpCtx);
    1717             : 
    1718       22626 :             ginEntryInsert(&state->ginstate,
    1719       22626 :                            buffer->attnum, buffer->key, buffer->category,
    1720       22626 :                            buffer->items, buffer->nitems, &state->buildStats);
    1721             : 
    1722       22626 :             MemoryContextSwitchTo(oldCtx);
    1723       22626 :             MemoryContextReset(state->tmpCtx);
    1724             : 
    1725             :             /* discard the existing data */
    1726       22626 :             GinBufferReset(buffer);
    1727             :         }
    1728             : 
    1729             :         /*
    1730             :          * We're about to add a GIN tuple to the buffer - check the memory
    1731             :          * limit first, and maybe write out some of the data into the index
    1732             :          * first, if needed (and possible). We only flush the part of the TID
    1733             :          * list that we know won't change, and only if there's enough data for
    1734             :          * compression to work well.
    1735             :          */
    1736       31276 :         if (GinBufferShouldTrim(buffer, tup))
    1737             :         {
    1738             :             Assert(buffer->nfrozen > 0);
    1739             : 
    1740             :             /*
    1741             :              * Buffer is not empty and it's storing a different key - flush
    1742             :              * the data into the insert, and start a new entry for current
    1743             :              * GinTuple.
    1744             :              */
    1745           0 :             AssertCheckItemPointers(buffer);
    1746             : 
    1747           0 :             oldCtx = MemoryContextSwitchTo(state->tmpCtx);
    1748             : 
    1749           0 :             ginEntryInsert(&state->ginstate,
    1750           0 :                            buffer->attnum, buffer->key, buffer->category,
    1751           0 :                            buffer->items, buffer->nfrozen, &state->buildStats);
    1752             : 
    1753           0 :             MemoryContextSwitchTo(oldCtx);
    1754           0 :             MemoryContextReset(state->tmpCtx);
    1755             : 
    1756             :             /* truncate the data we've just discarded */
    1757           0 :             GinBufferTrim(buffer);
    1758             :         }
    1759             : 
    1760             :         /*
    1761             :          * Remember data for the current tuple (either remember the new key,
    1762             :          * or append if to the existing data).
    1763             :          */
    1764       31276 :         GinBufferStoreTuple(buffer, tup);
    1765             : 
    1766             :         /* Report progress */
    1767       31276 :         pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
    1768             :                                      ++numtuples);
    1769             :     }
    1770             : 
    1771             :     /* flush data remaining in the buffer (for the last key) */
    1772          26 :     if (!GinBufferIsEmpty(buffer))
    1773             :     {
    1774          24 :         AssertCheckItemPointers(buffer);
    1775             : 
    1776          24 :         ginEntryInsert(&state->ginstate,
    1777          24 :                        buffer->attnum, buffer->key, buffer->category,
    1778          24 :                        buffer->items, buffer->nitems, &state->buildStats);
    1779             : 
    1780             :         /* discard the existing data */
    1781          24 :         GinBufferReset(buffer);
    1782             : 
    1783             :         /* Report progress */
    1784          24 :         pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
    1785             :                                      ++numtuples);
    1786             :     }
    1787             : 
    1788             :     /* release all the memory */
    1789          26 :     GinBufferFree(buffer);
    1790             : 
    1791          26 :     tuplesort_end(state->bs_sortstate);
    1792             : 
    1793          26 :     return reltuples;
    1794             : }
    1795             : 
    1796             : /*
    1797             :  * Returns size of shared memory required to store state for a parallel
    1798             :  * gin index build based on the snapshot its parallel scan will use.
    1799             :  */
    1800             : static Size
    1801          26 : _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot)
    1802             : {
    1803             :     /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
    1804          26 :     return add_size(BUFFERALIGN(sizeof(GinBuildShared)),
    1805             :                     table_parallelscan_estimate(heap, snapshot));
    1806             : }
    1807             : 
    1808             : /*
    1809             :  * Within leader, participate as a parallel worker.
    1810             :  */
    1811             : static void
    1812          26 : _gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Relation index)
    1813             : {
    1814          26 :     GinLeader  *ginleader = buildstate->bs_leader;
    1815             :     int         sortmem;
    1816             : 
    1817             :     /*
    1818             :      * Might as well use reliable figure when doling out maintenance_work_mem
    1819             :      * (when requested number of workers were not launched, this will be
    1820             :      * somewhat higher than it is for other workers).
    1821             :      */
    1822          26 :     sortmem = maintenance_work_mem / ginleader->nparticipanttuplesorts;
    1823             : 
    1824             :     /* Perform work common to all participants */
    1825          26 :     _gin_parallel_scan_and_build(buildstate, ginleader->ginshared,
    1826             :                                  ginleader->sharedsort, heap, index,
    1827             :                                  sortmem, true);
    1828          26 : }
    1829             : 
    1830             : /*
    1831             :  * _gin_process_worker_data
    1832             :  *      First phase of the key merging, happening in the worker.
    1833             :  *
    1834             :  * Depending on the number of distinct keys, the TID lists produced by the
    1835             :  * callback may be very short (due to frequent evictions in the callback).
    1836             :  * But combining many tiny lists is expensive, so we try to do as much as
    1837             :  * possible in the workers and only then pass the results to the leader.
    1838             :  *
    1839             :  * We read the tuples sorted by the key, and merge them into larger lists.
    1840             :  * At the moment there's no memory limit, so this will just produce one
    1841             :  * huge (sorted) list per key in each worker. Which means the leader will
    1842             :  * do a very limited number of mergesorts, which is good.
    1843             :  */
    1844             : static void
    1845          78 : _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort,
    1846             :                          bool progress)
    1847             : {
    1848             :     GinTuple   *tup;
    1849             :     Size        tuplen;
    1850             : 
    1851             :     GinBuffer  *buffer;
    1852             : 
    1853             :     /*
    1854             :      * Initialize buffer to combine entries for the same key.
    1855             :      *
    1856             :      * The workers are limited to the same amount of memory as during the sort
    1857             :      * in ginBuildCallbackParallel. But this probably should be the 32MB used
    1858             :      * during planning, just like there.
    1859             :      */
    1860          78 :     buffer = GinBufferInit(state->ginstate.index);
    1861             : 
    1862             :     /* sort the raw per-worker data */
    1863          78 :     if (progress)
    1864          26 :         pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
    1865             :                                      PROGRESS_GIN_PHASE_PERFORMSORT_1);
    1866             : 
    1867          78 :     tuplesort_performsort(state->bs_worker_sort);
    1868             : 
    1869             :     /* reset the number of GIN tuples produced by this worker */
    1870          78 :     state->bs_numtuples = 0;
    1871             : 
    1872          78 :     if (progress)
    1873          26 :         pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
    1874             :                                      PROGRESS_GIN_PHASE_MERGE_1);
    1875             : 
    1876             :     /*
    1877             :      * Read the GIN tuples from the shared tuplesort, sorted by the key, and
    1878             :      * merge them into larger chunks for the leader to combine.
    1879             :      */
    1880       31354 :     while ((tup = tuplesort_getgintuple(worker_sort, &tuplen, true)) != NULL)
    1881             :     {
    1882             : 
    1883       31276 :         CHECK_FOR_INTERRUPTS();
    1884             : 
    1885             :         /*
    1886             :          * If the buffer can accept the new GIN tuple, just store it there and
    1887             :          * we're done. If it's a different key (or maybe too much data) flush
    1888             :          * the current contents into the index first.
    1889             :          */
    1890       31276 :         if (!GinBufferCanAddKey(buffer, tup))
    1891             :         {
    1892             :             GinTuple   *ntup;
    1893             :             Size        ntuplen;
    1894             : 
    1895             :             /*
    1896             :              * Buffer is not empty and it's storing a different key - flush
    1897             :              * the data into the insert, and start a new entry for current
    1898             :              * GinTuple.
    1899             :              */
    1900       31238 :             AssertCheckItemPointers(buffer);
    1901             : 
    1902       31238 :             ntup = _gin_build_tuple(buffer->attnum, buffer->category,
    1903       31238 :                                     buffer->key, buffer->typlen, buffer->typbyval,
    1904       31238 :                                     buffer->items, buffer->nitems, &ntuplen);
    1905             : 
    1906       31238 :             tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
    1907       31238 :             state->bs_numtuples++;
    1908             : 
    1909       31238 :             pfree(ntup);
    1910             : 
    1911             :             /* discard the existing data */
    1912       31238 :             GinBufferReset(buffer);
    1913             :         }
    1914             : 
    1915             :         /*
    1916             :          * We're about to add a GIN tuple to the buffer - check the memory
    1917             :          * limit first, and maybe write out some of the data into the index
    1918             :          * first, if needed (and possible). We only flush the part of the TID
    1919             :          * list that we know won't change, and only if there's enough data for
    1920             :          * compression to work well.
    1921             :          */
    1922       31276 :         if (GinBufferShouldTrim(buffer, tup))
    1923             :         {
    1924             :             GinTuple   *ntup;
    1925             :             Size        ntuplen;
    1926             : 
    1927             :             Assert(buffer->nfrozen > 0);
    1928             : 
    1929             :             /*
    1930             :              * Buffer is not empty and it's storing a different key - flush
    1931             :              * the data into the insert, and start a new entry for current
    1932             :              * GinTuple.
    1933             :              */
    1934           0 :             AssertCheckItemPointers(buffer);
    1935             : 
    1936           0 :             ntup = _gin_build_tuple(buffer->attnum, buffer->category,
    1937           0 :                                     buffer->key, buffer->typlen, buffer->typbyval,
    1938           0 :                                     buffer->items, buffer->nfrozen, &ntuplen);
    1939             : 
    1940           0 :             tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
    1941             : 
    1942           0 :             pfree(ntup);
    1943             : 
    1944             :             /* truncate the data we've just discarded */
    1945           0 :             GinBufferTrim(buffer);
    1946             :         }
    1947             : 
    1948             :         /*
    1949             :          * Remember data for the current tuple (either remember the new key,
    1950             :          * or append if to the existing data).
    1951             :          */
    1952       31276 :         GinBufferStoreTuple(buffer, tup);
    1953             :     }
    1954             : 
    1955             :     /* flush data remaining in the buffer (for the last key) */
    1956          78 :     if (!GinBufferIsEmpty(buffer))
    1957             :     {
    1958             :         GinTuple   *ntup;
    1959             :         Size        ntuplen;
    1960             : 
    1961          38 :         AssertCheckItemPointers(buffer);
    1962             : 
    1963          38 :         ntup = _gin_build_tuple(buffer->attnum, buffer->category,
    1964          38 :                                 buffer->key, buffer->typlen, buffer->typbyval,
    1965          38 :                                 buffer->items, buffer->nitems, &ntuplen);
    1966             : 
    1967          38 :         tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
    1968          38 :         state->bs_numtuples++;
    1969             : 
    1970          38 :         pfree(ntup);
    1971             : 
    1972             :         /* discard the existing data */
    1973          38 :         GinBufferReset(buffer);
    1974             :     }
    1975             : 
    1976             :     /* release all the memory */
    1977          78 :     GinBufferFree(buffer);
    1978             : 
    1979          78 :     tuplesort_end(worker_sort);
    1980          78 : }
    1981             : 
    1982             : /*
    1983             :  * Perform a worker's portion of a parallel GIN index build sort.
    1984             :  *
    1985             :  * This generates a tuplesort for the worker portion of the table.
    1986             :  *
    1987             :  * sortmem is the amount of working memory to use within each worker,
    1988             :  * expressed in KBs.
    1989             :  *
    1990             :  * When this returns, workers are done, and need only release resources.
    1991             :  *
    1992             :  * Before feeding data into a shared tuplesort (for the leader process),
    1993             :  * the workers process data in two phases.
    1994             :  *
    1995             :  * 1) A worker reads a portion of rows from the table, accumulates entries
    1996             :  * in memory, and flushes them into a private tuplesort (e.g. because of
    1997             :  * using too much memory).
    1998             :  *
    1999             :  * 2) The private tuplesort gets sorted (by key and TID), the worker reads
    2000             :  * the data again, and combines the entries as much as possible. This has
    2001             :  * to happen eventually, and this way it's done in workers in parallel.
    2002             :  *
    2003             :  * Finally, the combined entries are written into the shared tuplesort, so
    2004             :  * that the leader can process them.
    2005             :  *
    2006             :  * How well this works (compared to just writing entries into the shared
    2007             :  * tuplesort) depends on the data set. For large tables with many distinct
    2008             :  * keys this helps a lot. With many distinct keys it's likely the buffers has
    2009             :  * to be flushed often, generating many entries with the same key and short
    2010             :  * TID lists. These entries need to be sorted and merged at some point,
    2011             :  * before writing them to the index. The merging is quite expensive, it can
    2012             :  * easily be ~50% of a serial build, and doing as much of it in the workers
    2013             :  * means it's parallelized. The leader still has to merge results from the
    2014             :  * workers, but it's much more efficient to merge few large entries than
    2015             :  * many tiny ones.
    2016             :  *
    2017             :  * This also reduces the amount of data the workers pass to the leader through
    2018             :  * the shared tuplesort. OTOH the workers need more space for the private sort,
    2019             :  * possibly up to 2x of the data, if no entries be merged in a worker. But this
    2020             :  * is very unlikely, and the only consequence is inefficiency, so we ignore it.
    2021             :  */
    2022             : static void
    2023          78 : _gin_parallel_scan_and_build(GinBuildState *state,
    2024             :                              GinBuildShared *ginshared, Sharedsort *sharedsort,
    2025             :                              Relation heap, Relation index,
    2026             :                              int sortmem, bool progress)
    2027             : {
    2028             :     SortCoordinate coordinate;
    2029             :     TableScanDesc scan;
    2030             :     double      reltuples;
    2031             :     IndexInfo  *indexInfo;
    2032             : 
    2033             :     /* Initialize local tuplesort coordination state */
    2034          78 :     coordinate = palloc0_object(SortCoordinateData);
    2035          78 :     coordinate->isWorker = true;
    2036          78 :     coordinate->nParticipants = -1;
    2037          78 :     coordinate->sharedsort = sharedsort;
    2038             : 
    2039             :     /* remember how much space is allowed for the accumulated entries */
    2040          78 :     state->work_mem = (sortmem / 2);
    2041             : 
    2042             :     /* remember how many workers participate in the build */
    2043          78 :     state->bs_num_workers = ginshared->scantuplesortstates;
    2044             : 
    2045             :     /* Begin "partial" tuplesort */
    2046          78 :     state->bs_sortstate = tuplesort_begin_index_gin(heap, index,
    2047             :                                                     state->work_mem,
    2048             :                                                     coordinate,
    2049             :                                                     TUPLESORT_NONE);
    2050             : 
    2051             :     /* Local per-worker sort of raw-data */
    2052          78 :     state->bs_worker_sort = tuplesort_begin_index_gin(heap, index,
    2053             :                                                       state->work_mem,
    2054             :                                                       NULL,
    2055             :                                                       TUPLESORT_NONE);
    2056             : 
    2057             :     /* Join parallel scan */
    2058          78 :     indexInfo = BuildIndexInfo(index);
    2059          78 :     indexInfo->ii_Concurrent = ginshared->isconcurrent;
    2060             : 
    2061          78 :     scan = table_beginscan_parallel(heap,
    2062             :                                     ParallelTableScanFromGinBuildShared(ginshared));
    2063             : 
    2064          78 :     reltuples = table_index_build_scan(heap, index, indexInfo, true, progress,
    2065             :                                        ginBuildCallbackParallel, state, scan);
    2066             : 
    2067             :     /* write remaining accumulated entries */
    2068          78 :     ginFlushBuildState(state, index);
    2069             : 
    2070             :     /*
    2071             :      * Do the first phase of in-worker processing - sort the data produced by
    2072             :      * the callback, and combine them into much larger chunks and place that
    2073             :      * into the shared tuplestore for leader to process.
    2074             :      */
    2075          78 :     _gin_process_worker_data(state, state->bs_worker_sort, progress);
    2076             : 
    2077             :     /* sort the GIN tuples built by this worker */
    2078          78 :     tuplesort_performsort(state->bs_sortstate);
    2079             : 
    2080          78 :     state->bs_reltuples += reltuples;
    2081             : 
    2082             :     /*
    2083             :      * Done.  Record ambuild statistics.
    2084             :      */
    2085          78 :     SpinLockAcquire(&ginshared->mutex);
    2086          78 :     ginshared->nparticipantsdone++;
    2087          78 :     ginshared->reltuples += state->bs_reltuples;
    2088          78 :     ginshared->indtuples += state->bs_numtuples;
    2089          78 :     SpinLockRelease(&ginshared->mutex);
    2090             : 
    2091             :     /* Notify leader */
    2092          78 :     ConditionVariableSignal(&ginshared->workersdonecv);
    2093             : 
    2094          78 :     tuplesort_end(state->bs_sortstate);
    2095          78 : }
    2096             : 
    2097             : /*
    2098             :  * Perform work within a launched parallel process.
    2099             :  */
    2100             : void
    2101          52 : _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
    2102             : {
    2103             :     char       *sharedquery;
    2104             :     GinBuildShared *ginshared;
    2105             :     Sharedsort *sharedsort;
    2106             :     GinBuildState buildstate;
    2107             :     Relation    heapRel;
    2108             :     Relation    indexRel;
    2109             :     LOCKMODE    heapLockmode;
    2110             :     LOCKMODE    indexLockmode;
    2111             :     WalUsage   *walusage;
    2112             :     BufferUsage *bufferusage;
    2113             :     int         sortmem;
    2114             : 
    2115             :     /*
    2116             :      * The only possible status flag that can be set to the parallel worker is
    2117             :      * PROC_IN_SAFE_IC.
    2118             :      */
    2119             :     Assert((MyProc->statusFlags == 0) ||
    2120             :            (MyProc->statusFlags == PROC_IN_SAFE_IC));
    2121             : 
    2122             :     /* Set debug_query_string for individual workers first */
    2123          52 :     sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true);
    2124          52 :     debug_query_string = sharedquery;
    2125             : 
    2126             :     /* Report the query string from leader */
    2127          52 :     pgstat_report_activity(STATE_RUNNING, debug_query_string);
    2128             : 
    2129             :     /* Look up gin shared state */
    2130          52 :     ginshared = shm_toc_lookup(toc, PARALLEL_KEY_GIN_SHARED, false);
    2131             : 
    2132             :     /* Open relations using lock modes known to be obtained by index.c */
    2133          52 :     if (!ginshared->isconcurrent)
    2134             :     {
    2135          40 :         heapLockmode = ShareLock;
    2136          40 :         indexLockmode = AccessExclusiveLock;
    2137             :     }
    2138             :     else
    2139             :     {
    2140          12 :         heapLockmode = ShareUpdateExclusiveLock;
    2141          12 :         indexLockmode = RowExclusiveLock;
    2142             :     }
    2143             : 
    2144             :     /* Open relations within worker */
    2145          52 :     heapRel = table_open(ginshared->heaprelid, heapLockmode);
    2146          52 :     indexRel = index_open(ginshared->indexrelid, indexLockmode);
    2147             : 
    2148             :     /* initialize the GIN build state */
    2149          52 :     initGinState(&buildstate.ginstate, indexRel);
    2150          52 :     buildstate.indtuples = 0;
    2151          52 :     memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
    2152          52 :     memset(&buildstate.tid, 0, sizeof(ItemPointerData));
    2153             : 
    2154             :     /*
    2155             :      * create a temporary memory context that is used to hold data not yet
    2156             :      * dumped out to the index
    2157             :      */
    2158          52 :     buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
    2159             :                                               "Gin build temporary context",
    2160             :                                               ALLOCSET_DEFAULT_SIZES);
    2161             : 
    2162             :     /*
    2163             :      * create a temporary memory context that is used for calling
    2164             :      * ginExtractEntries(), and can be reset after each tuple
    2165             :      */
    2166          52 :     buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
    2167             :                                                "Gin build temporary context for user-defined function",
    2168             :                                                ALLOCSET_DEFAULT_SIZES);
    2169             : 
    2170          52 :     buildstate.accum.ginstate = &buildstate.ginstate;
    2171          52 :     ginInitBA(&buildstate.accum);
    2172             : 
    2173             : 
    2174             :     /* Look up shared state private to tuplesort.c */
    2175          52 :     sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
    2176          52 :     tuplesort_attach_shared(sharedsort, seg);
    2177             : 
    2178             :     /* Prepare to track buffer usage during parallel execution */
    2179          52 :     InstrStartParallelQuery();
    2180             : 
    2181             :     /*
    2182             :      * Might as well use reliable figure when doling out maintenance_work_mem
    2183             :      * (when requested number of workers were not launched, this will be
    2184             :      * somewhat higher than it is for other workers).
    2185             :      */
    2186          52 :     sortmem = maintenance_work_mem / ginshared->scantuplesortstates;
    2187             : 
    2188          52 :     _gin_parallel_scan_and_build(&buildstate, ginshared, sharedsort,
    2189             :                                  heapRel, indexRel, sortmem, false);
    2190             : 
    2191             :     /* Report WAL/buffer usage during parallel execution */
    2192          52 :     bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
    2193          52 :     walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
    2194          52 :     InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber],
    2195          52 :                           &walusage[ParallelWorkerNumber]);
    2196             : 
    2197          52 :     index_close(indexRel, indexLockmode);
    2198          52 :     table_close(heapRel, heapLockmode);
    2199          52 : }
    2200             : 
    2201             : /*
    2202             :  * Used to keep track of compressed TID lists when building a GIN tuple.
    2203             :  */
    2204             : typedef struct
    2205             : {
    2206             :     dlist_node  node;           /* linked list pointers */
    2207             :     GinPostingList *seg;
    2208             : } GinSegmentInfo;
    2209             : 
    2210             : /*
    2211             :  * _gin_build_tuple
    2212             :  *      Serialize the state for an index key into a tuple for tuplesort.
    2213             :  *
    2214             :  * The tuple has a number of scalar fields (mostly matching the build state),
    2215             :  * and then a data array that stores the key first, and then the TID list.
    2216             :  *
    2217             :  * For by-reference data types, we store the actual data. For by-val types
    2218             :  * we simply copy the whole Datum, so that we don't have to care about stuff
    2219             :  * like endianness etc. We could make it a little bit smaller, but it's not
    2220             :  * worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
    2221             :  * start of the TID list anyway. So we wouldn't save anything. (This would
    2222             :  * not be a good idea for the permanent in-index data, since we'd prefer
    2223             :  * that that not depend on sizeof(Datum). But this is just a transient
    2224             :  * representation to use while sorting the data.)
    2225             :  *
    2226             :  * The TID list is serialized as compressed - it's highly compressible, and
    2227             :  * we already have ginCompressPostingList for this purpose. The list may be
    2228             :  * pretty long, so we compress it into multiple segments and then copy all
    2229             :  * of that into the GIN tuple.
    2230             :  */
    2231             : static GinTuple *
    2232       62552 : _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
    2233             :                  Datum key, int16 typlen, bool typbyval,
    2234             :                  ItemPointerData *items, uint32 nitems,
    2235             :                  Size *len)
    2236             : {
    2237             :     GinTuple   *tuple;
    2238             :     char       *ptr;
    2239             : 
    2240             :     Size        tuplen;
    2241             :     int         keylen;
    2242             : 
    2243             :     dlist_mutable_iter iter;
    2244             :     dlist_head  segments;
    2245             :     int         ncompressed;
    2246             :     Size        compresslen;
    2247             : 
    2248             :     /*
    2249             :      * Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
    2250             :      * have actual non-empty key. We include varlena headers and \0 bytes for
    2251             :      * strings, to make it easier to access the data in-line.
    2252             :      *
    2253             :      * For byval types we simply copy the whole Datum. We could store just the
    2254             :      * necessary bytes, but this is simpler to work with and not worth the
    2255             :      * extra complexity. Moreover we still need to do the MAXALIGN to allow
    2256             :      * direct access to items pointers.
    2257             :      *
    2258             :      * XXX Note that for byval types we store the whole datum, no matter what
    2259             :      * the typlen value is.
    2260             :      */
    2261       62552 :     if (category != GIN_CAT_NORM_KEY)
    2262          60 :         keylen = 0;
    2263       62492 :     else if (typbyval)
    2264       19956 :         keylen = sizeof(Datum);
    2265       42536 :     else if (typlen > 0)
    2266           0 :         keylen = typlen;
    2267       42536 :     else if (typlen == -1)
    2268       42536 :         keylen = VARSIZE_ANY(DatumGetPointer(key));
    2269           0 :     else if (typlen == -2)
    2270           0 :         keylen = strlen(DatumGetPointer(key)) + 1;
    2271             :     else
    2272           0 :         elog(ERROR, "unexpected typlen value (%d)", typlen);
    2273             : 
    2274             :     /* compress the item pointers */
    2275       62552 :     ncompressed = 0;
    2276       62552 :     compresslen = 0;
    2277       62552 :     dlist_init(&segments);
    2278             : 
    2279             :     /* generate compressed segments of TID list chunks */
    2280      125104 :     while (ncompressed < nitems)
    2281             :     {
    2282             :         int         cnt;
    2283       62552 :         GinSegmentInfo *seginfo = palloc_object(GinSegmentInfo);
    2284             : 
    2285      125104 :         seginfo->seg = ginCompressPostingList(&items[ncompressed],
    2286       62552 :                                               (nitems - ncompressed),
    2287             :                                               UINT16_MAX,
    2288             :                                               &cnt);
    2289             : 
    2290       62552 :         ncompressed += cnt;
    2291       62552 :         compresslen += SizeOfGinPostingList(seginfo->seg);
    2292             : 
    2293       62552 :         dlist_push_tail(&segments, &seginfo->node);
    2294             :     }
    2295             : 
    2296             :     /*
    2297             :      * Determine GIN tuple length with all the data included. Be careful about
    2298             :      * alignment, to allow direct access to compressed segments (those require
    2299             :      * only SHORTALIGN).
    2300             :      */
    2301       62552 :     tuplen = SHORTALIGN(offsetof(GinTuple, data) + keylen) + compresslen;
    2302             : 
    2303       62552 :     *len = tuplen;
    2304             : 
    2305             :     /*
    2306             :      * Allocate space for the whole GIN tuple.
    2307             :      *
    2308             :      * The palloc0 is needed - writetup_index_gin will write the whole tuple
    2309             :      * to disk, so we need to make sure the padding bytes are defined
    2310             :      * (otherwise valgrind would report this).
    2311             :      */
    2312       62552 :     tuple = palloc0(tuplen);
    2313             : 
    2314       62552 :     tuple->tuplen = tuplen;
    2315       62552 :     tuple->attrnum = attrnum;
    2316       62552 :     tuple->category = category;
    2317       62552 :     tuple->keylen = keylen;
    2318       62552 :     tuple->nitems = nitems;
    2319             : 
    2320             :     /* key type info */
    2321       62552 :     tuple->typlen = typlen;
    2322       62552 :     tuple->typbyval = typbyval;
    2323             : 
    2324             :     /*
    2325             :      * Copy the key and items into the tuple. First the key value, which we
    2326             :      * can simply copy right at the beginning of the data array.
    2327             :      */
    2328       62552 :     if (category == GIN_CAT_NORM_KEY)
    2329             :     {
    2330       62492 :         if (typbyval)
    2331             :         {
    2332       19956 :             memcpy(tuple->data, &key, sizeof(Datum));
    2333             :         }
    2334       42536 :         else if (typlen > 0) /* byref, fixed length */
    2335             :         {
    2336           0 :             memcpy(tuple->data, DatumGetPointer(key), typlen);
    2337             :         }
    2338       42536 :         else if (typlen == -1)
    2339             :         {
    2340       42536 :             memcpy(tuple->data, DatumGetPointer(key), keylen);
    2341             :         }
    2342           0 :         else if (typlen == -2)
    2343             :         {
    2344           0 :             memcpy(tuple->data, DatumGetPointer(key), keylen);
    2345             :         }
    2346             :     }
    2347             : 
    2348             :     /* finally, copy the TIDs into the array */
    2349       62552 :     ptr = (char *) tuple + SHORTALIGN(offsetof(GinTuple, data) + keylen);
    2350             : 
    2351             :     /* copy in the compressed data, and free the segments */
    2352      125104 :     dlist_foreach_modify(iter, &segments)
    2353             :     {
    2354       62552 :         GinSegmentInfo *seginfo = dlist_container(GinSegmentInfo, node, iter.cur);
    2355             : 
    2356       62552 :         memcpy(ptr, seginfo->seg, SizeOfGinPostingList(seginfo->seg));
    2357             : 
    2358       62552 :         ptr += SizeOfGinPostingList(seginfo->seg);
    2359             : 
    2360       62552 :         dlist_delete(&seginfo->node);
    2361             : 
    2362       62552 :         pfree(seginfo->seg);
    2363       62552 :         pfree(seginfo);
    2364             :     }
    2365             : 
    2366       62552 :     return tuple;
    2367             : }
    2368             : 
    2369             : /*
    2370             :  * _gin_parse_tuple_key
    2371             :  *      Return a Datum representing the key stored in the tuple.
    2372             :  *
    2373             :  * Most of the tuple fields are directly accessible, the only thing that
    2374             :  * needs more care is the key and the TID list.
    2375             :  *
    2376             :  * For the key, this returns a regular Datum representing it. It's either the
    2377             :  * actual key value, or a pointer to the beginning of the data array (which is
    2378             :  * where the data was copied by _gin_build_tuple).
    2379             :  */
    2380             : static Datum
    2381      243160 : _gin_parse_tuple_key(GinTuple *a)
    2382             : {
    2383             :     Datum       key;
    2384             : 
    2385      243160 :     if (a->category != GIN_CAT_NORM_KEY)
    2386          60 :         return (Datum) 0;
    2387             : 
    2388      243100 :     if (a->typbyval)
    2389             :     {
    2390       59844 :         memcpy(&key, a->data, a->keylen);
    2391       59844 :         return key;
    2392             :     }
    2393             : 
    2394      183256 :     return PointerGetDatum(a->data);
    2395             : }
    2396             : 
    2397             : /*
    2398             : * _gin_parse_tuple_items
    2399             :  *      Return a pointer to a palloc'd array of decompressed TID array.
    2400             :  */
    2401             : static ItemPointer
    2402       62552 : _gin_parse_tuple_items(GinTuple *a)
    2403             : {
    2404             :     int         len;
    2405             :     char       *ptr;
    2406             :     int         ndecoded;
    2407             :     ItemPointer items;
    2408             : 
    2409       62552 :     len = a->tuplen - SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
    2410       62552 :     ptr = (char *) a + SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
    2411             : 
    2412       62552 :     items = ginPostingListDecodeAllSegments((GinPostingList *) ptr, len, &ndecoded);
    2413             : 
    2414             :     Assert(ndecoded == a->nitems);
    2415             : 
    2416       62552 :     return items;
    2417             : }
    2418             : 
    2419             : /*
    2420             :  * _gin_compare_tuples
    2421             :  *      Compare GIN tuples, used by tuplesort during parallel index build.
    2422             :  *
    2423             :  * The scalar fields (attrnum, category) are compared first, the key value is
    2424             :  * compared last. The comparisons are done using type-specific sort support
    2425             :  * functions.
    2426             :  *
    2427             :  * If the key value matches, we compare the first TID value in the TID list,
    2428             :  * which means the tuples are merged in an order in which they are most
    2429             :  * likely to be simply concatenated. (This "first" TID will also allow us
    2430             :  * to determine a point up to which the list is fully determined and can be
    2431             :  * written into the index to enforce a memory limit etc.)
    2432             :  */
    2433             : int
    2434       90394 : _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)
    2435             : {
    2436             :     int         r;
    2437             :     Datum       keya,
    2438             :                 keyb;
    2439             : 
    2440       90394 :     if (a->attrnum < b->attrnum)
    2441           0 :         return -1;
    2442             : 
    2443       90394 :     if (a->attrnum > b->attrnum)
    2444           0 :         return 1;
    2445             : 
    2446       90394 :     if (a->category < b->category)
    2447          66 :         return -1;
    2448             : 
    2449       90328 :     if (a->category > b->category)
    2450          14 :         return 1;
    2451             : 
    2452       90314 :     if (a->category == GIN_CAT_NORM_KEY)
    2453             :     {
    2454       90304 :         keya = _gin_parse_tuple_key(a);
    2455       90304 :         keyb = _gin_parse_tuple_key(b);
    2456             : 
    2457       90304 :         r = ApplySortComparator(keya, false,
    2458             :                                 keyb, false,
    2459       90304 :                                 &ssup[a->attrnum - 1]);
    2460             : 
    2461             :         /* if the key is the same, consider the first TID in the array */
    2462      101488 :         return (r != 0) ? r : ItemPointerCompare(GinTupleGetFirst(a),
    2463       11184 :                                                  GinTupleGetFirst(b));
    2464             :     }
    2465             : 
    2466          10 :     return ItemPointerCompare(GinTupleGetFirst(a),
    2467          10 :                               GinTupleGetFirst(b));
    2468             : }

Generated by: LCOV version 1.16