LCOV - code coverage report
Current view: top level - src/backend/access/gin - gininsert.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 90.9 % 647 588
Test Date: 2026-03-10 12:14:48 Functions: 97.1 % 35 34
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * gininsert.c
       4              :  *    insert routines for the postgres inverted index access method.
       5              :  *
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  * Portions Copyright (c) 1994, Regents of the University of California
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *          src/backend/access/gin/gininsert.c
      12              :  *-------------------------------------------------------------------------
      13              :  */
      14              : 
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/gin_private.h"
      18              : #include "access/gin_tuple.h"
      19              : #include "access/parallel.h"
      20              : #include "access/table.h"
      21              : #include "access/tableam.h"
      22              : #include "access/xloginsert.h"
      23              : #include "catalog/index.h"
      24              : #include "catalog/pg_collation.h"
      25              : #include "commands/progress.h"
      26              : #include "miscadmin.h"
      27              : #include "nodes/execnodes.h"
      28              : #include "pgstat.h"
      29              : #include "storage/bufmgr.h"
      30              : #include "storage/proc.h"
      31              : #include "storage/predicate.h"
      32              : #include "tcop/tcopprot.h"
      33              : #include "utils/datum.h"
      34              : #include "utils/memutils.h"
      35              : #include "utils/builtins.h"
      36              : #include "utils/rel.h"
      37              : #include "utils/typcache.h"
      38              : #include "utils/wait_event.h"
      39              : 
      40              : 
      41              : /* Magic numbers for parallel state sharing */
      42              : #define PARALLEL_KEY_GIN_SHARED         UINT64CONST(0xB000000000000001)
      43              : #define PARALLEL_KEY_TUPLESORT          UINT64CONST(0xB000000000000002)
      44              : #define PARALLEL_KEY_QUERY_TEXT         UINT64CONST(0xB000000000000003)
      45              : #define PARALLEL_KEY_WAL_USAGE          UINT64CONST(0xB000000000000004)
      46              : #define PARALLEL_KEY_BUFFER_USAGE       UINT64CONST(0xB000000000000005)
      47              : 
      48              : /*
      49              :  * Status for index builds performed in parallel.  This is allocated in a
      50              :  * dynamic shared memory segment.
      51              :  */
      52              : typedef struct GinBuildShared
      53              : {
      54              :     /*
      55              :      * These fields are not modified during the build.  They primarily exist
      56              :      * for the benefit of worker processes that need to create state
      57              :      * corresponding to that used by the leader.
      58              :      */
      59              :     Oid         heaprelid;
      60              :     Oid         indexrelid;
      61              :     bool        isconcurrent;
      62              :     int         scantuplesortstates;
      63              : 
      64              :     /*
      65              :      * workersdonecv is used to monitor the progress of workers.  All parallel
      66              :      * participants must indicate that they are done before leader can use
      67              :      * results built by the workers (and before leader can write the data into
      68              :      * the index).
      69              :      */
      70              :     ConditionVariable workersdonecv;
      71              : 
      72              :     /*
      73              :      * mutex protects all following fields
      74              :      *
      75              :      * These fields contain status information of interest to GIN index builds
      76              :      * that must work just the same when an index is built in parallel.
      77              :      */
      78              :     slock_t     mutex;
      79              : 
      80              :     /*
      81              :      * Mutable state that is maintained by workers, and reported back to
      82              :      * leader at end of the scans.
      83              :      *
      84              :      * nparticipantsdone is number of worker processes finished.
      85              :      *
      86              :      * reltuples is the total number of input heap tuples.
      87              :      *
      88              :      * indtuples is the total number of tuples that made it into the index.
      89              :      */
      90              :     int         nparticipantsdone;
      91              :     double      reltuples;
      92              :     double      indtuples;
      93              : 
      94              :     /*
      95              :      * ParallelTableScanDescData data follows. Can't directly embed here, as
      96              :      * implementations of the parallel table scan desc interface might need
      97              :      * stronger alignment.
      98              :      */
      99              : } GinBuildShared;
     100              : 
     101              : /*
     102              :  * Return pointer to a GinBuildShared's parallel table scan.
     103              :  *
     104              :  * c.f. shm_toc_allocate as to why BUFFERALIGN is used, rather than just
     105              :  * MAXALIGN.
     106              :  */
     107              : #define ParallelTableScanFromGinBuildShared(shared) \
     108              :     (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(GinBuildShared)))
     109              : 
     110              : /*
     111              :  * Status for leader in parallel index build.
     112              :  */
     113              : typedef struct GinLeader
     114              : {
     115              :     /* parallel context itself */
     116              :     ParallelContext *pcxt;
     117              : 
     118              :     /*
     119              :      * nparticipanttuplesorts is the exact number of worker processes
     120              :      * successfully launched, plus one leader process if it participates as a
     121              :      * worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader
     122              :      * participating as a worker).
     123              :      */
     124              :     int         nparticipanttuplesorts;
     125              : 
     126              :     /*
     127              :      * Leader process convenience pointers to shared state (leader avoids TOC
     128              :      * lookups).
     129              :      *
     130              :      * GinBuildShared is the shared state for entire build.  sharedsort is the
     131              :      * shared, tuplesort-managed state passed to each process tuplesort.
     132              :      * snapshot is the snapshot used by the scan iff an MVCC snapshot is
     133              :      * required.
     134              :      */
     135              :     GinBuildShared *ginshared;
     136              :     Sharedsort *sharedsort;
     137              :     Snapshot    snapshot;
     138              :     WalUsage   *walusage;
     139              :     BufferUsage *bufferusage;
     140              : } GinLeader;
     141              : 
     142              : typedef struct
     143              : {
     144              :     GinState    ginstate;
     145              :     double      indtuples;
     146              :     GinStatsData buildStats;
     147              :     MemoryContext tmpCtx;
     148              :     MemoryContext funcCtx;
     149              :     BuildAccumulator accum;
     150              :     ItemPointerData tid;
     151              :     int         work_mem;
     152              : 
     153              :     /*
     154              :      * bs_leader is only present when a parallel index build is performed, and
     155              :      * only in the leader process.
     156              :      */
     157              :     GinLeader  *bs_leader;
     158              : 
     159              :     /* number of participating workers (including leader) */
     160              :     int         bs_num_workers;
     161              : 
     162              :     /* used to pass information from workers to leader */
     163              :     double      bs_numtuples;
     164              :     double      bs_reltuples;
     165              : 
     166              :     /*
     167              :      * The sortstate is used by workers (including the leader). It has to be
     168              :      * part of the build state, because that's the only thing passed to the
     169              :      * build callback etc.
     170              :      */
     171              :     Tuplesortstate *bs_sortstate;
     172              : 
     173              :     /*
     174              :      * The sortstate used only within a single worker for the first merge pass
     175              :      * happening there. In principle it doesn't need to be part of the build
     176              :      * state and we could pass it around directly, but it's more convenient
     177              :      * this way. And it's part of the build state, after all.
     178              :      */
     179              :     Tuplesortstate *bs_worker_sort;
     180              : } GinBuildState;
     181              : 
     182              : 
     183              : /* parallel index builds */
     184              : static void _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
     185              :                                 bool isconcurrent, int request);
     186              : static void _gin_end_parallel(GinLeader *ginleader, GinBuildState *state);
     187              : static Size _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot);
     188              : static double _gin_parallel_heapscan(GinBuildState *state);
     189              : static double _gin_parallel_merge(GinBuildState *state);
     190              : static void _gin_leader_participate_as_worker(GinBuildState *buildstate,
     191              :                                               Relation heap, Relation index);
     192              : static void _gin_parallel_scan_and_build(GinBuildState *state,
     193              :                                          GinBuildShared *ginshared,
     194              :                                          Sharedsort *sharedsort,
     195              :                                          Relation heap, Relation index,
     196              :                                          int sortmem, bool progress);
     197              : 
     198              : static ItemPointer _gin_parse_tuple_items(GinTuple *a);
     199              : static Datum _gin_parse_tuple_key(GinTuple *a);
     200              : 
     201              : static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
     202              :                                   Datum key, int16 typlen, bool typbyval,
     203              :                                   ItemPointerData *items, uint32 nitems,
     204              :                                   Size *len);
     205              : 
     206              : /*
     207              :  * Adds array of item pointers to tuple's posting list, or
     208              :  * creates posting tree and tuple pointing to tree in case
     209              :  * of not enough space.  Max size of tuple is defined in
     210              :  * GinFormTuple().  Returns a new, modified index tuple.
     211              :  * items[] must be in sorted order with no duplicates.
     212              :  */
     213              : static IndexTuple
     214       114400 : addItemPointersToLeafTuple(GinState *ginstate,
     215              :                            IndexTuple old,
     216              :                            ItemPointerData *items, uint32 nitem,
     217              :                            GinStatsData *buildStats, Buffer buffer)
     218              : {
     219              :     OffsetNumber attnum;
     220              :     Datum       key;
     221              :     GinNullCategory category;
     222              :     IndexTuple  res;
     223              :     ItemPointerData *newItems,
     224              :                *oldItems;
     225              :     int         oldNPosting,
     226              :                 newNPosting,
     227              :                 nwritten;
     228              :     GinPostingList *compressedList;
     229              : 
     230              :     Assert(!GinIsPostingTree(old));
     231              : 
     232       114400 :     attnum = gintuple_get_attrnum(ginstate, old);
     233       114400 :     key = gintuple_get_key(ginstate, old, &category);
     234              : 
     235              :     /* merge the old and new posting lists */
     236       114400 :     oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting);
     237              : 
     238       114400 :     newItems = ginMergeItemPointers(items, nitem,
     239              :                                     oldItems, oldNPosting,
     240              :                                     &newNPosting);
     241              : 
     242              :     /* Compress the posting list, and try to a build tuple with room for it */
     243       114400 :     res = NULL;
     244       114400 :     compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize, &nwritten);
     245       114400 :     if (nwritten == newNPosting)
     246              :     {
     247       114397 :         res = GinFormTuple(ginstate, attnum, key, category,
     248              :                            (char *) compressedList,
     249       114397 :                            SizeOfGinPostingList(compressedList),
     250              :                            newNPosting,
     251              :                            false);
     252              :     }
     253              : 
     254       114400 :     pfree(newItems);
     255       114400 :     pfree(compressedList);
     256              : 
     257       114400 :     if (!res)
     258              :     {
     259              :         /* posting list would be too big, convert to posting tree */
     260              :         BlockNumber postingRoot;
     261              : 
     262              :         /*
     263              :          * Initialize posting tree with the old tuple's posting list.  It's
     264              :          * surely small enough to fit on one posting-tree page, and should
     265              :          * already be in order with no duplicates.
     266              :          */
     267            8 :         postingRoot = createPostingTree(ginstate->index,
     268              :                                         oldItems,
     269              :                                         oldNPosting,
     270              :                                         buildStats,
     271              :                                         buffer);
     272              : 
     273              :         /* Now insert the TIDs-to-be-added into the posting tree */
     274            8 :         ginInsertItemPointers(ginstate->index, postingRoot,
     275              :                               items, nitem,
     276              :                               buildStats);
     277              : 
     278              :         /* And build a new posting-tree-only result tuple */
     279            8 :         res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
     280            8 :         GinSetPostingTree(res, postingRoot);
     281              :     }
     282       114400 :     pfree(oldItems);
     283              : 
     284       114400 :     return res;
     285              : }
     286              : 
     287              : /*
     288              :  * Build a fresh leaf tuple, either posting-list or posting-tree format
     289              :  * depending on whether the given items list will fit.
     290              :  * items[] must be in sorted order with no duplicates.
     291              :  *
     292              :  * This is basically the same logic as in addItemPointersToLeafTuple,
     293              :  * but working from slightly different input.
     294              :  */
     295              : static IndexTuple
     296       347032 : buildFreshLeafTuple(GinState *ginstate,
     297              :                     OffsetNumber attnum, Datum key, GinNullCategory category,
     298              :                     ItemPointerData *items, uint32 nitem,
     299              :                     GinStatsData *buildStats, Buffer buffer)
     300              : {
     301       347032 :     IndexTuple  res = NULL;
     302              :     GinPostingList *compressedList;
     303              :     int         nwritten;
     304              : 
     305              :     /* try to build a posting list tuple with all the items */
     306       347032 :     compressedList = ginCompressPostingList(items, nitem, GinMaxItemSize, &nwritten);
     307       347032 :     if (nwritten == nitem)
     308              :     {
     309       346981 :         res = GinFormTuple(ginstate, attnum, key, category,
     310              :                            (char *) compressedList,
     311       346981 :                            SizeOfGinPostingList(compressedList),
     312              :                            nitem, false);
     313              :     }
     314       347032 :     pfree(compressedList);
     315              : 
     316       347032 :     if (!res)
     317              :     {
     318              :         /* posting list would be too big, build posting tree */
     319              :         BlockNumber postingRoot;
     320              : 
     321              :         /*
     322              :          * Build posting-tree-only result tuple.  We do this first so as to
     323              :          * fail quickly if the key is too big.
     324              :          */
     325           51 :         res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
     326              : 
     327              :         /*
     328              :          * Initialize a new posting tree with the TIDs.
     329              :          */
     330           51 :         postingRoot = createPostingTree(ginstate->index, items, nitem,
     331              :                                         buildStats, buffer);
     332              : 
     333              :         /* And save the root link in the result tuple */
     334           51 :         GinSetPostingTree(res, postingRoot);
     335              :     }
     336              : 
     337       347032 :     return res;
     338              : }
     339              : 
     340              : /*
     341              :  * Insert one or more heap TIDs associated with the given key value.
     342              :  * This will either add a single key entry, or enlarge a pre-existing entry.
     343              :  *
     344              :  * During an index build, buildStats is non-null and the counters
     345              :  * it contains should be incremented as needed.
     346              :  */
     347              : void
     348       486134 : ginEntryInsert(GinState *ginstate,
     349              :                OffsetNumber attnum, Datum key, GinNullCategory category,
     350              :                ItemPointerData *items, uint32 nitem,
     351              :                GinStatsData *buildStats)
     352              : {
     353              :     GinBtreeData btree;
     354              :     GinBtreeEntryInsertData insertdata;
     355              :     GinBtreeStack *stack;
     356              :     IndexTuple  itup;
     357              :     Page        page;
     358              : 
     359       486134 :     insertdata.isDelete = false;
     360              : 
     361       486134 :     ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
     362       486134 :     btree.isBuild = (buildStats != NULL);
     363              : 
     364       486134 :     stack = ginFindLeafPage(&btree, false, false);
     365       486134 :     page = BufferGetPage(stack->buffer);
     366              : 
     367       486134 :     if (btree.findItem(&btree, stack))
     368              :     {
     369              :         /* found pre-existing entry */
     370       139100 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     371              : 
     372       139100 :         if (GinIsPostingTree(itup))
     373              :         {
     374              :             /* add entries to existing posting tree */
     375        24696 :             BlockNumber rootPostingTree = GinGetPostingTree(itup);
     376              : 
     377              :             /* release all stack */
     378        24696 :             LockBuffer(stack->buffer, GIN_UNLOCK);
     379        24696 :             freeGinBtreeStack(stack);
     380              : 
     381              :             /* insert into posting tree */
     382        24696 :             ginInsertItemPointers(ginstate->index, rootPostingTree,
     383              :                                   items, nitem,
     384              :                                   buildStats);
     385        24694 :             return;
     386              :         }
     387              : 
     388       114404 :         CheckForSerializableConflictIn(ginstate->index, NULL,
     389              :                                        BufferGetBlockNumber(stack->buffer));
     390              :         /* modify an existing leaf entry */
     391       114400 :         itup = addItemPointersToLeafTuple(ginstate, itup,
     392              :                                           items, nitem, buildStats, stack->buffer);
     393              : 
     394       114400 :         insertdata.isDelete = true;
     395              :     }
     396              :     else
     397              :     {
     398       347034 :         CheckForSerializableConflictIn(ginstate->index, NULL,
     399              :                                        BufferGetBlockNumber(stack->buffer));
     400              :         /* no match, so construct a new leaf entry */
     401       347032 :         itup = buildFreshLeafTuple(ginstate, attnum, key, category,
     402              :                                    items, nitem, buildStats, stack->buffer);
     403              : 
     404              :         /*
     405              :          * nEntries counts leaf tuples, so increment it only when we make a
     406              :          * new one.
     407              :          */
     408       347032 :         if (buildStats)
     409        76601 :             buildStats->nEntries++;
     410              :     }
     411              : 
     412              :     /* Insert the new or modified leaf tuple */
     413       461432 :     insertdata.entry = itup;
     414       461432 :     ginInsertValue(&btree, stack, &insertdata, buildStats);
     415       461430 :     pfree(itup);
     416              : }
     417              : 
     418              : /*
     419              :  * Extract index entries for a single indexable item, and add them to the
     420              :  * BuildAccumulator's state.
     421              :  *
     422              :  * This function is used only during initial index creation.
     423              :  */
     424              : static void
     425       473609 : ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum,
     426              :                        Datum value, bool isNull,
     427              :                        ItemPointer heapptr)
     428              : {
     429              :     Datum      *entries;
     430              :     GinNullCategory *categories;
     431              :     int32       nentries;
     432              :     MemoryContext oldCtx;
     433              : 
     434       473609 :     oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
     435       473609 :     entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
     436              :                                 value, isNull,
     437              :                                 &nentries, &categories);
     438       473609 :     MemoryContextSwitchTo(oldCtx);
     439              : 
     440       473609 :     ginInsertBAEntries(&buildstate->accum, heapptr, attnum,
     441              :                        entries, categories, nentries);
     442              : 
     443       473609 :     buildstate->indtuples += nentries;
     444              : 
     445       473609 :     MemoryContextReset(buildstate->funcCtx);
     446       473609 : }
     447              : 
     448              : static void
     449       462666 : ginBuildCallback(Relation index, ItemPointer tid, Datum *values,
     450              :                  bool *isnull, bool tupleIsAlive, void *state)
     451              : {
     452       462666 :     GinBuildState *buildstate = (GinBuildState *) state;
     453              :     MemoryContext oldCtx;
     454              :     int         i;
     455              : 
     456       462666 :     oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
     457              : 
     458       925643 :     for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
     459       462977 :         ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
     460       462977 :                                values[i], isnull[i], tid);
     461              : 
     462              :     /* If we've maxed out our available memory, dump everything to the index */
     463       462666 :     if (buildstate->accum.allocatedMemory >= maintenance_work_mem * (Size) 1024)
     464              :     {
     465              :         ItemPointerData *list;
     466              :         Datum       key;
     467              :         GinNullCategory category;
     468              :         uint32      nlist;
     469              :         OffsetNumber attnum;
     470              : 
     471            0 :         ginBeginBAScan(&buildstate->accum);
     472            0 :         while ((list = ginGetBAEntry(&buildstate->accum,
     473            0 :                                      &attnum, &key, &category, &nlist)) != NULL)
     474              :         {
     475              :             /* there could be many entries, so be willing to abort here */
     476            0 :             CHECK_FOR_INTERRUPTS();
     477            0 :             ginEntryInsert(&buildstate->ginstate, attnum, key, category,
     478              :                            list, nlist, &buildstate->buildStats);
     479              :         }
     480              : 
     481            0 :         MemoryContextReset(buildstate->tmpCtx);
     482            0 :         ginInitBA(&buildstate->accum);
     483              :     }
     484              : 
     485       462666 :     MemoryContextSwitchTo(oldCtx);
     486       462666 : }
     487              : 
     488              : /*
     489              :  * ginFlushBuildState
     490              :  *      Write all data from BuildAccumulator into the tuplesort.
     491              :  *
     492              :  * The number of TIDs written to the tuplesort at once is limited, to reduce
     493              :  * the amount of memory needed when merging the intermediate results later.
     494              :  * The leader will see up to two chunks per worker, so calculate the limit to
     495              :  * not need more than MaxAllocSize overall.
     496              :  *
     497              :  * We don't need to worry about overflowing maintenance_work_mem. We can't
     498              :  * build chunks larger than work_mem, and that limit was set so that workers
     499              :  * produce sufficiently small chunks.
     500              :  */
     501              : static void
     502           39 : ginFlushBuildState(GinBuildState *buildstate, Relation index)
     503              : {
     504              :     ItemPointerData *list;
     505              :     Datum       key;
     506              :     GinNullCategory category;
     507              :     uint32      nlist;
     508              :     OffsetNumber attnum;
     509           39 :     TupleDesc   tdesc = RelationGetDescr(index);
     510              :     uint32      maxlen;
     511              : 
     512              :     /* maximum number of TIDs per chunk (two chunks per worker) */
     513           39 :     maxlen = MaxAllocSize / sizeof(ItemPointerData);
     514           39 :     maxlen /= (2 * buildstate->bs_num_workers);
     515              : 
     516           39 :     ginBeginBAScan(&buildstate->accum);
     517        16628 :     while ((list = ginGetBAEntry(&buildstate->accum,
     518        16628 :                                  &attnum, &key, &category, &nlist)) != NULL)
     519              :     {
     520              :         /* information about the key */
     521        16589 :         CompactAttribute *attr = TupleDescCompactAttr(tdesc, (attnum - 1));
     522              : 
     523              :         /* start of the chunk */
     524        16589 :         uint32      offset = 0;
     525              : 
     526              :         /* split the entry into smaller chunk with up to maxlen items */
     527        33178 :         while (offset < nlist)
     528              :         {
     529              :             /* GIN tuple and tuple length */
     530              :             GinTuple   *tup;
     531              :             Size        tuplen;
     532        16589 :             uint32      len = Min(maxlen, nlist - offset);
     533              : 
     534              :             /* there could be many entries, so be willing to abort here */
     535        16589 :             CHECK_FOR_INTERRUPTS();
     536              : 
     537        16589 :             tup = _gin_build_tuple(attnum, category,
     538        16589 :                                    key, attr->attlen, attr->attbyval,
     539        16589 :                                    &list[offset], len,
     540              :                                    &tuplen);
     541              : 
     542        16589 :             offset += len;
     543              : 
     544        16589 :             tuplesort_putgintuple(buildstate->bs_worker_sort, tup, tuplen);
     545              : 
     546        16589 :             pfree(tup);
     547              :         }
     548              :     }
     549              : 
     550           39 :     MemoryContextReset(buildstate->tmpCtx);
     551           39 :     ginInitBA(&buildstate->accum);
     552           39 : }
     553              : 
     554              : /*
     555              :  * ginBuildCallbackParallel
     556              :  *      Callback for the parallel index build.
     557              :  *
     558              :  * This is similar to the serial build callback ginBuildCallback, but
     559              :  * instead of writing the accumulated entries into the index, each worker
     560              :  * writes them into a (local) tuplesort.
     561              :  *
     562              :  * The worker then sorts and combines these entries, before writing them
     563              :  * into a shared tuplesort for the leader (see _gin_parallel_scan_and_build
     564              :  * for the whole process).
     565              :  */
     566              : static void
     567        10632 : ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values,
     568              :                          bool *isnull, bool tupleIsAlive, void *state)
     569              : {
     570        10632 :     GinBuildState *buildstate = (GinBuildState *) state;
     571              :     MemoryContext oldCtx;
     572              :     int         i;
     573              : 
     574        10632 :     oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
     575              : 
     576              :     /*
     577              :      * if scan wrapped around - flush accumulated entries and start anew
     578              :      *
     579              :      * With parallel scans, we don't have a guarantee the scan does not start
     580              :      * half-way through the relation (serial builds disable sync scans and
     581              :      * always start from block 0, parallel scans require allow_sync=true).
     582              :      *
     583              :      * Building the posting lists assumes the TIDs are monotonic and never go
     584              :      * back, and the wrap around would break that. We handle that by detecting
     585              :      * the wraparound, and flushing all entries. This means we'll later see
     586              :      * two separate entries with non-overlapping TID lists (which can be
     587              :      * combined by merge sort).
     588              :      *
     589              :      * To detect a wraparound, we remember the last TID seen by each worker
     590              :      * (for any key). If the next TID seen by the worker is lower, the scan
     591              :      * must have wrapped around.
     592              :      */
     593        10632 :     if (ItemPointerCompare(tid, &buildstate->tid) < 0)
     594            0 :         ginFlushBuildState(buildstate, index);
     595              : 
     596              :     /* remember the TID we're about to process */
     597        10632 :     buildstate->tid = *tid;
     598              : 
     599        21264 :     for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
     600        10632 :         ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
     601        10632 :                                values[i], isnull[i], tid);
     602              : 
     603              :     /*
     604              :      * If we've maxed out our available memory, dump everything to the
     605              :      * tuplesort. We use half the per-worker fraction of maintenance_work_mem,
     606              :      * the other half is used for the tuplesort.
     607              :      */
     608        10632 :     if (buildstate->accum.allocatedMemory >= buildstate->work_mem * (Size) 1024)
     609            0 :         ginFlushBuildState(buildstate, index);
     610              : 
     611        10632 :     MemoryContextSwitchTo(oldCtx);
     612        10632 : }
     613              : 
     614              : IndexBuildResult *
     615          218 : ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     616              : {
     617              :     IndexBuildResult *result;
     618              :     double      reltuples;
     619              :     GinBuildState buildstate;
     620          218 :     GinBuildState *state = &buildstate;
     621              :     Buffer      RootBuffer,
     622              :                 MetaBuffer;
     623              :     ItemPointerData *list;
     624              :     Datum       key;
     625              :     GinNullCategory category;
     626              :     uint32      nlist;
     627              :     MemoryContext oldCtx;
     628              :     OffsetNumber attnum;
     629              : 
     630          218 :     if (RelationGetNumberOfBlocks(index) != 0)
     631            0 :         elog(ERROR, "index \"%s\" already contains data",
     632              :              RelationGetRelationName(index));
     633              : 
     634          218 :     initGinState(&buildstate.ginstate, index);
     635          218 :     buildstate.indtuples = 0;
     636          218 :     memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
     637              : 
     638              :     /* Initialize fields for parallel build too. */
     639          218 :     buildstate.bs_numtuples = 0;
     640          218 :     buildstate.bs_reltuples = 0;
     641          218 :     buildstate.bs_leader = NULL;
     642          218 :     memset(&buildstate.tid, 0, sizeof(ItemPointerData));
     643              : 
     644              :     /* initialize the meta page */
     645          218 :     MetaBuffer = GinNewBuffer(index);
     646              : 
     647              :     /* initialize the root page */
     648          218 :     RootBuffer = GinNewBuffer(index);
     649              : 
     650          218 :     START_CRIT_SECTION();
     651          218 :     GinInitMetabuffer(MetaBuffer);
     652          218 :     MarkBufferDirty(MetaBuffer);
     653          218 :     GinInitBuffer(RootBuffer, GIN_LEAF);
     654          218 :     MarkBufferDirty(RootBuffer);
     655              : 
     656              : 
     657          218 :     UnlockReleaseBuffer(MetaBuffer);
     658          218 :     UnlockReleaseBuffer(RootBuffer);
     659          218 :     END_CRIT_SECTION();
     660              : 
     661              :     /* count the root as first entry page */
     662          218 :     buildstate.buildStats.nEntryPages++;
     663              : 
     664              :     /*
     665              :      * create a temporary memory context that is used to hold data not yet
     666              :      * dumped out to the index
     667              :      */
     668          218 :     buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
     669              :                                               "Gin build temporary context",
     670              :                                               ALLOCSET_DEFAULT_SIZES);
     671              : 
     672              :     /*
     673              :      * create a temporary memory context that is used for calling
     674              :      * ginExtractEntries(), and can be reset after each tuple
     675              :      */
     676          218 :     buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
     677              :                                                "Gin build temporary context for user-defined function",
     678              :                                                ALLOCSET_DEFAULT_SIZES);
     679              : 
     680          218 :     buildstate.accum.ginstate = &buildstate.ginstate;
     681          218 :     ginInitBA(&buildstate.accum);
     682              : 
     683              :     /* Report table scan phase started */
     684          218 :     pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
     685              :                                  PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN);
     686              : 
     687              :     /*
     688              :      * Attempt to launch parallel worker scan when required
     689              :      *
     690              :      * XXX plan_create_index_workers makes the number of workers dependent on
     691              :      * maintenance_work_mem, requiring 32MB for each worker. For GIN that's
     692              :      * reasonable too, because we sort the data just like btree. It does
     693              :      * ignore the memory used to accumulate data in memory (set by work_mem),
     694              :      * but there is no way to communicate that to plan_create_index_workers.
     695              :      */
     696          218 :     if (indexInfo->ii_ParallelWorkers > 0)
     697           13 :         _gin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,
     698              :                             indexInfo->ii_ParallelWorkers);
     699              : 
     700              :     /*
     701              :      * If parallel build requested and at least one worker process was
     702              :      * successfully launched, set up coordination state, wait for workers to
     703              :      * complete. Then read all tuples from the shared tuplesort and insert
     704              :      * them into the index.
     705              :      *
     706              :      * In serial mode, simply scan the table and build the index one index
     707              :      * tuple at a time.
     708              :      */
     709          218 :     if (state->bs_leader)
     710              :     {
     711              :         SortCoordinate coordinate;
     712              : 
     713           13 :         coordinate = palloc0_object(SortCoordinateData);
     714           13 :         coordinate->isWorker = false;
     715           13 :         coordinate->nParticipants =
     716           13 :             state->bs_leader->nparticipanttuplesorts;
     717           13 :         coordinate->sharedsort = state->bs_leader->sharedsort;
     718              : 
     719              :         /*
     720              :          * Begin leader tuplesort.
     721              :          *
     722              :          * In cases where parallelism is involved, the leader receives the
     723              :          * same share of maintenance_work_mem as a serial sort (it is
     724              :          * generally treated in the same way as a serial sort once we return).
     725              :          * Parallel worker Tuplesortstates will have received only a fraction
     726              :          * of maintenance_work_mem, though.
     727              :          *
     728              :          * We rely on the lifetime of the Leader Tuplesortstate almost not
     729              :          * overlapping with any worker Tuplesortstate's lifetime.  There may
     730              :          * be some small overlap, but that's okay because we rely on leader
     731              :          * Tuplesortstate only allocating a small, fixed amount of memory
     732              :          * here. When its tuplesort_performsort() is called (by our caller),
     733              :          * and significant amounts of memory are likely to be used, all
     734              :          * workers must have already freed almost all memory held by their
     735              :          * Tuplesortstates (they are about to go away completely, too).  The
     736              :          * overall effect is that maintenance_work_mem always represents an
     737              :          * absolute high watermark on the amount of memory used by a CREATE
     738              :          * INDEX operation, regardless of the use of parallelism or any other
     739              :          * factor.
     740              :          */
     741           13 :         state->bs_sortstate =
     742           13 :             tuplesort_begin_index_gin(heap, index,
     743              :                                       maintenance_work_mem, coordinate,
     744              :                                       TUPLESORT_NONE);
     745              : 
     746              :         /* scan the relation in parallel and merge per-worker results */
     747           13 :         reltuples = _gin_parallel_merge(state);
     748              : 
     749           13 :         _gin_end_parallel(state->bs_leader, state);
     750              :     }
     751              :     else                        /* no parallel index build */
     752              :     {
     753              :         /*
     754              :          * Do the heap scan.  We disallow sync scan here because
     755              :          * dataPlaceToPage prefers to receive tuples in TID order.
     756              :          */
     757          205 :         reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
     758              :                                            ginBuildCallback, &buildstate, NULL);
     759              : 
     760              :         /* dump remaining entries to the index */
     761          205 :         oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
     762          205 :         ginBeginBAScan(&buildstate.accum);
     763        65481 :         while ((list = ginGetBAEntry(&buildstate.accum,
     764        65481 :                                      &attnum, &key, &category, &nlist)) != NULL)
     765              :         {
     766              :             /* there could be many entries, so be willing to abort here */
     767        65276 :             CHECK_FOR_INTERRUPTS();
     768        65276 :             ginEntryInsert(&buildstate.ginstate, attnum, key, category,
     769              :                            list, nlist, &buildstate.buildStats);
     770              :         }
     771          205 :         MemoryContextSwitchTo(oldCtx);
     772              :     }
     773              : 
     774          218 :     MemoryContextDelete(buildstate.funcCtx);
     775          218 :     MemoryContextDelete(buildstate.tmpCtx);
     776              : 
     777              :     /*
     778              :      * Update metapage stats
     779              :      */
     780          218 :     buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
     781          218 :     ginUpdateStats(index, &buildstate.buildStats, true);
     782              : 
     783              :     /*
     784              :      * We didn't write WAL records as we built the index, so if WAL-logging is
     785              :      * required, write all pages to the WAL now.
     786              :      */
     787          218 :     if (RelationNeedsWAL(index))
     788              :     {
     789          159 :         log_newpage_range(index, MAIN_FORKNUM,
     790              :                           0, RelationGetNumberOfBlocks(index),
     791              :                           true);
     792              :     }
     793              : 
     794              :     /*
     795              :      * Return statistics
     796              :      */
     797          218 :     result = palloc_object(IndexBuildResult);
     798              : 
     799          218 :     result->heap_tuples = reltuples;
     800          218 :     result->index_tuples = buildstate.indtuples;
     801              : 
     802          218 :     return result;
     803              : }
     804              : 
     805              : /*
     806              :  *  ginbuildempty() -- build an empty gin index in the initialization fork
     807              :  */
     808              : void
     809            3 : ginbuildempty(Relation index)
     810              : {
     811              :     Buffer      RootBuffer,
     812              :                 MetaBuffer;
     813              : 
     814              :     /* An empty GIN index has two pages. */
     815            3 :     MetaBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
     816              :                                    EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
     817            3 :     RootBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
     818              :                                    EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
     819              : 
     820              :     /* Initialize and xlog metabuffer and root buffer. */
     821            3 :     START_CRIT_SECTION();
     822            3 :     GinInitMetabuffer(MetaBuffer);
     823            3 :     MarkBufferDirty(MetaBuffer);
     824            3 :     log_newpage_buffer(MetaBuffer, true);
     825            3 :     GinInitBuffer(RootBuffer, GIN_LEAF);
     826            3 :     MarkBufferDirty(RootBuffer);
     827            3 :     log_newpage_buffer(RootBuffer, false);
     828            3 :     END_CRIT_SECTION();
     829              : 
     830              :     /* Unlock and release the buffers. */
     831            3 :     UnlockReleaseBuffer(MetaBuffer);
     832            3 :     UnlockReleaseBuffer(RootBuffer);
     833            3 : }
     834              : 
     835              : /*
     836              :  * Insert index entries for a single indexable item during "normal"
     837              :  * (non-fast-update) insertion
     838              :  */
     839              : static void
     840        27048 : ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
     841              :                    Datum value, bool isNull,
     842              :                    ItemPointer item)
     843              : {
     844              :     Datum      *entries;
     845              :     GinNullCategory *categories;
     846              :     int32       i,
     847              :                 nentries;
     848              : 
     849        27048 :     entries = ginExtractEntries(ginstate, attnum, value, isNull,
     850              :                                 &nentries, &categories);
     851              : 
     852       253499 :     for (i = 0; i < nentries; i++)
     853       226461 :         ginEntryInsert(ginstate, attnum, entries[i], categories[i],
     854              :                        item, 1, NULL);
     855        27038 : }
     856              : 
     857              : bool
     858       159852 : gininsert(Relation index, Datum *values, bool *isnull,
     859              :           ItemPointer ht_ctid, Relation heapRel,
     860              :           IndexUniqueCheck checkUnique,
     861              :           bool indexUnchanged,
     862              :           IndexInfo *indexInfo)
     863              : {
     864       159852 :     GinState   *ginstate = (GinState *) indexInfo->ii_AmCache;
     865              :     MemoryContext oldCtx;
     866              :     MemoryContext insertCtx;
     867              :     int         i;
     868              : 
     869              :     /* Initialize GinState cache if first call in this statement */
     870       159852 :     if (ginstate == NULL)
     871              :     {
     872         1386 :         oldCtx = MemoryContextSwitchTo(indexInfo->ii_Context);
     873         1386 :         ginstate = palloc_object(GinState);
     874         1386 :         initGinState(ginstate, index);
     875         1386 :         indexInfo->ii_AmCache = ginstate;
     876         1386 :         MemoryContextSwitchTo(oldCtx);
     877              :     }
     878              : 
     879       159852 :     insertCtx = AllocSetContextCreate(CurrentMemoryContext,
     880              :                                       "Gin insert temporary context",
     881              :                                       ALLOCSET_DEFAULT_SIZES);
     882              : 
     883       159852 :     oldCtx = MemoryContextSwitchTo(insertCtx);
     884              : 
     885       159852 :     if (GinGetUseFastUpdate(index))
     886       132801 :     {
     887              :         GinTupleCollector collector;
     888              : 
     889       132804 :         memset(&collector, 0, sizeof(GinTupleCollector));
     890              : 
     891       325647 :         for (i = 0; i < ginstate->origTupdesc->natts; i++)
     892       192843 :             ginHeapTupleFastCollect(ginstate, &collector,
     893       192843 :                                     (OffsetNumber) (i + 1),
     894       192843 :                                     values[i], isnull[i],
     895              :                                     ht_ctid);
     896              : 
     897       132804 :         ginHeapTupleFastInsert(ginstate, &collector);
     898              :     }
     899              :     else
     900              :     {
     901        54086 :         for (i = 0; i < ginstate->origTupdesc->natts; i++)
     902        27048 :             ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
     903        27048 :                                values[i], isnull[i],
     904              :                                ht_ctid);
     905              :     }
     906              : 
     907       159839 :     MemoryContextSwitchTo(oldCtx);
     908       159839 :     MemoryContextDelete(insertCtx);
     909              : 
     910       159839 :     return false;
     911              : }
     912              : 
     913              : /*
     914              :  * Create parallel context, and launch workers for leader.
     915              :  *
     916              :  * buildstate argument should be initialized (with the exception of the
     917              :  * tuplesort states, which may later be created based on shared
     918              :  * state initially set up here).
     919              :  *
     920              :  * isconcurrent indicates if operation is CREATE INDEX CONCURRENTLY.
     921              :  *
     922              :  * request is the target number of parallel worker processes to launch.
     923              :  *
     924              :  * Sets buildstate's GinLeader, which caller must use to shut down parallel
     925              :  * mode by passing it to _gin_end_parallel() at the very end of its index
     926              :  * build.  If not even a single worker process can be launched, this is
     927              :  * never set, and caller should proceed with a serial index build.
     928              :  */
     929              : static void
     930           13 : _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
     931              :                     bool isconcurrent, int request)
     932              : {
     933              :     ParallelContext *pcxt;
     934              :     int         scantuplesortstates;
     935              :     Snapshot    snapshot;
     936              :     Size        estginshared;
     937              :     Size        estsort;
     938              :     GinBuildShared *ginshared;
     939              :     Sharedsort *sharedsort;
     940           13 :     GinLeader  *ginleader = palloc0_object(GinLeader);
     941              :     WalUsage   *walusage;
     942              :     BufferUsage *bufferusage;
     943           13 :     bool        leaderparticipates = true;
     944              :     int         querylen;
     945              : 
     946              : #ifdef DISABLE_LEADER_PARTICIPATION
     947              :     leaderparticipates = false;
     948              : #endif
     949              : 
     950              :     /*
     951              :      * Enter parallel mode, and create context for parallel build of gin index
     952              :      */
     953           13 :     EnterParallelMode();
     954              :     Assert(request > 0);
     955           13 :     pcxt = CreateParallelContext("postgres", "_gin_parallel_build_main",
     956              :                                  request);
     957              : 
     958           13 :     scantuplesortstates = leaderparticipates ? request + 1 : request;
     959              : 
     960              :     /*
     961              :      * Prepare for scan of the base relation.  In a normal index build, we use
     962              :      * SnapshotAny because we must retrieve all tuples and do our own time
     963              :      * qual checks (because we have to index RECENTLY_DEAD tuples).  In a
     964              :      * concurrent build, we take a regular MVCC snapshot and index whatever's
     965              :      * live according to that.
     966              :      */
     967           13 :     if (!isconcurrent)
     968           10 :         snapshot = SnapshotAny;
     969              :     else
     970            3 :         snapshot = RegisterSnapshot(GetTransactionSnapshot());
     971              : 
     972              :     /*
     973              :      * Estimate size for our own PARALLEL_KEY_GIN_SHARED workspace.
     974              :      */
     975           13 :     estginshared = _gin_parallel_estimate_shared(heap, snapshot);
     976           13 :     shm_toc_estimate_chunk(&pcxt->estimator, estginshared);
     977           13 :     estsort = tuplesort_estimate_shared(scantuplesortstates);
     978           13 :     shm_toc_estimate_chunk(&pcxt->estimator, estsort);
     979              : 
     980           13 :     shm_toc_estimate_keys(&pcxt->estimator, 2);
     981              : 
     982              :     /*
     983              :      * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
     984              :      * and PARALLEL_KEY_BUFFER_USAGE.
     985              :      *
     986              :      * If there are no extensions loaded that care, we could skip this.  We
     987              :      * have no way of knowing whether anyone's looking at pgWalUsage or
     988              :      * pgBufferUsage, so do it unconditionally.
     989              :      */
     990           13 :     shm_toc_estimate_chunk(&pcxt->estimator,
     991              :                            mul_size(sizeof(WalUsage), pcxt->nworkers));
     992           13 :     shm_toc_estimate_keys(&pcxt->estimator, 1);
     993           13 :     shm_toc_estimate_chunk(&pcxt->estimator,
     994              :                            mul_size(sizeof(BufferUsage), pcxt->nworkers));
     995           13 :     shm_toc_estimate_keys(&pcxt->estimator, 1);
     996              : 
     997              :     /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
     998           13 :     if (debug_query_string)
     999              :     {
    1000           13 :         querylen = strlen(debug_query_string);
    1001           13 :         shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
    1002           13 :         shm_toc_estimate_keys(&pcxt->estimator, 1);
    1003              :     }
    1004              :     else
    1005            0 :         querylen = 0;           /* keep compiler quiet */
    1006              : 
    1007              :     /* Everyone's had a chance to ask for space, so now create the DSM */
    1008           13 :     InitializeParallelDSM(pcxt);
    1009              : 
    1010              :     /* If no DSM segment was available, back out (do serial build) */
    1011           13 :     if (pcxt->seg == NULL)
    1012              :     {
    1013            0 :         if (IsMVCCSnapshot(snapshot))
    1014            0 :             UnregisterSnapshot(snapshot);
    1015            0 :         DestroyParallelContext(pcxt);
    1016            0 :         ExitParallelMode();
    1017            0 :         return;
    1018              :     }
    1019              : 
    1020              :     /* Store shared build state, for which we reserved space */
    1021           13 :     ginshared = (GinBuildShared *) shm_toc_allocate(pcxt->toc, estginshared);
    1022              :     /* Initialize immutable state */
    1023           13 :     ginshared->heaprelid = RelationGetRelid(heap);
    1024           13 :     ginshared->indexrelid = RelationGetRelid(index);
    1025           13 :     ginshared->isconcurrent = isconcurrent;
    1026           13 :     ginshared->scantuplesortstates = scantuplesortstates;
    1027              : 
    1028           13 :     ConditionVariableInit(&ginshared->workersdonecv);
    1029           13 :     SpinLockInit(&ginshared->mutex);
    1030              : 
    1031              :     /* Initialize mutable state */
    1032           13 :     ginshared->nparticipantsdone = 0;
    1033           13 :     ginshared->reltuples = 0.0;
    1034           13 :     ginshared->indtuples = 0.0;
    1035              : 
    1036           13 :     table_parallelscan_initialize(heap,
    1037              :                                   ParallelTableScanFromGinBuildShared(ginshared),
    1038              :                                   snapshot);
    1039              : 
    1040              :     /*
    1041              :      * Store shared tuplesort-private state, for which we reserved space.
    1042              :      * Then, initialize opaque state using tuplesort routine.
    1043              :      */
    1044           13 :     sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
    1045           13 :     tuplesort_initialize_shared(sharedsort, scantuplesortstates,
    1046              :                                 pcxt->seg);
    1047              : 
    1048           13 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_GIN_SHARED, ginshared);
    1049           13 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
    1050              : 
    1051              :     /* Store query string for workers */
    1052           13 :     if (debug_query_string)
    1053              :     {
    1054              :         char       *sharedquery;
    1055              : 
    1056           13 :         sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
    1057           13 :         memcpy(sharedquery, debug_query_string, querylen + 1);
    1058           13 :         shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
    1059              :     }
    1060              : 
    1061              :     /*
    1062              :      * Allocate space for each worker's WalUsage and BufferUsage; no need to
    1063              :      * initialize.
    1064              :      */
    1065           13 :     walusage = shm_toc_allocate(pcxt->toc,
    1066           13 :                                 mul_size(sizeof(WalUsage), pcxt->nworkers));
    1067           13 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
    1068           13 :     bufferusage = shm_toc_allocate(pcxt->toc,
    1069           13 :                                    mul_size(sizeof(BufferUsage), pcxt->nworkers));
    1070           13 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
    1071              : 
    1072              :     /* Launch workers, saving status for leader/caller */
    1073           13 :     LaunchParallelWorkers(pcxt);
    1074           13 :     ginleader->pcxt = pcxt;
    1075           13 :     ginleader->nparticipanttuplesorts = pcxt->nworkers_launched;
    1076           13 :     if (leaderparticipates)
    1077           13 :         ginleader->nparticipanttuplesorts++;
    1078           13 :     ginleader->ginshared = ginshared;
    1079           13 :     ginleader->sharedsort = sharedsort;
    1080           13 :     ginleader->snapshot = snapshot;
    1081           13 :     ginleader->walusage = walusage;
    1082           13 :     ginleader->bufferusage = bufferusage;
    1083              : 
    1084              :     /* If no workers were successfully launched, back out (do serial build) */
    1085           13 :     if (pcxt->nworkers_launched == 0)
    1086              :     {
    1087            0 :         _gin_end_parallel(ginleader, NULL);
    1088            0 :         return;
    1089              :     }
    1090              : 
    1091              :     /* Save leader state now that it's clear build will be parallel */
    1092           13 :     buildstate->bs_leader = ginleader;
    1093              : 
    1094              :     /* Join heap scan ourselves */
    1095           13 :     if (leaderparticipates)
    1096           13 :         _gin_leader_participate_as_worker(buildstate, heap, index);
    1097              : 
    1098              :     /*
    1099              :      * Caller needs to wait for all launched workers when we return.  Make
    1100              :      * sure that the failure-to-start case will not hang forever.
    1101              :      */
    1102           13 :     WaitForParallelWorkersToAttach(pcxt);
    1103              : }
    1104              : 
    1105              : /*
    1106              :  * Shut down workers, destroy parallel context, and end parallel mode.
    1107              :  */
    1108              : static void
    1109           13 : _gin_end_parallel(GinLeader *ginleader, GinBuildState *state)
    1110              : {
    1111              :     int         i;
    1112              : 
    1113              :     /* Shutdown worker processes */
    1114           13 :     WaitForParallelWorkersToFinish(ginleader->pcxt);
    1115              : 
    1116              :     /*
    1117              :      * Next, accumulate WAL usage.  (This must wait for the workers to finish,
    1118              :      * or we might get incomplete data.)
    1119              :      */
    1120           39 :     for (i = 0; i < ginleader->pcxt->nworkers_launched; i++)
    1121           26 :         InstrAccumParallelQuery(&ginleader->bufferusage[i], &ginleader->walusage[i]);
    1122              : 
    1123              :     /* Free last reference to MVCC snapshot, if one was used */
    1124           13 :     if (IsMVCCSnapshot(ginleader->snapshot))
    1125            3 :         UnregisterSnapshot(ginleader->snapshot);
    1126           13 :     DestroyParallelContext(ginleader->pcxt);
    1127           13 :     ExitParallelMode();
    1128           13 : }
    1129              : 
    1130              : /*
    1131              :  * Within leader, wait for end of heap scan.
    1132              :  *
    1133              :  * When called, parallel heap scan started by _gin_begin_parallel() will
    1134              :  * already be underway within worker processes (when leader participates
    1135              :  * as a worker, we should end up here just as workers are finishing).
    1136              :  *
    1137              :  * Returns the total number of heap tuples scanned.
    1138              :  */
    1139              : static double
    1140           13 : _gin_parallel_heapscan(GinBuildState *state)
    1141              : {
    1142           13 :     GinBuildShared *ginshared = state->bs_leader->ginshared;
    1143              :     int         nparticipanttuplesorts;
    1144              : 
    1145           13 :     nparticipanttuplesorts = state->bs_leader->nparticipanttuplesorts;
    1146              :     for (;;)
    1147              :     {
    1148           32 :         SpinLockAcquire(&ginshared->mutex);
    1149           32 :         if (ginshared->nparticipantsdone == nparticipanttuplesorts)
    1150              :         {
    1151              :             /* copy the data into leader state */
    1152           13 :             state->bs_reltuples = ginshared->reltuples;
    1153           13 :             state->bs_numtuples = ginshared->indtuples;
    1154              : 
    1155           13 :             SpinLockRelease(&ginshared->mutex);
    1156           13 :             break;
    1157              :         }
    1158           19 :         SpinLockRelease(&ginshared->mutex);
    1159              : 
    1160           19 :         ConditionVariableSleep(&ginshared->workersdonecv,
    1161              :                                WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN);
    1162              :     }
    1163              : 
    1164           13 :     ConditionVariableCancelSleep();
    1165              : 
    1166           13 :     return state->bs_reltuples;
    1167              : }
    1168              : 
    1169              : /*
    1170              :  * Buffer used to accumulate TIDs from multiple GinTuples for the same key
    1171              :  * (we read these from the tuplesort, sorted by the key).
    1172              :  *
    1173              :  * This is similar to BuildAccumulator in that it's used to collect TIDs
    1174              :  * in memory before inserting them into the index, but it's much simpler
    1175              :  * as it only deals with a single index key at a time.
    1176              :  *
    1177              :  * When adding TIDs to the buffer, we make sure to keep them sorted, both
    1178              :  * during the initial table scan (and detecting when the scan wraps around),
    1179              :  * and during merging (where we do mergesort).
    1180              :  */
    1181              : typedef struct GinBuffer
    1182              : {
    1183              :     OffsetNumber attnum;
    1184              :     GinNullCategory category;
    1185              :     Datum       key;            /* 0 if no key (and keylen == 0) */
    1186              :     Size        keylen;         /* number of bytes (not typlen) */
    1187              : 
    1188              :     /* type info */
    1189              :     int16       typlen;
    1190              :     bool        typbyval;
    1191              : 
    1192              :     /* Number of TIDs to collect before attempt to write some out. */
    1193              :     int         maxitems;
    1194              : 
    1195              :     /* array of TID values */
    1196              :     int         nitems;
    1197              :     int         nfrozen;
    1198              :     SortSupport ssup;           /* for sorting/comparing keys */
    1199              :     ItemPointerData *items;
    1200              : } GinBuffer;
    1201              : 
    1202              : /*
    1203              :  * Check that TID array contains valid values, and that it's sorted (if we
    1204              :  * expect it to be).
    1205              :  */
    1206              : static void
    1207        61092 : AssertCheckItemPointers(GinBuffer *buffer)
    1208              : {
    1209              : #ifdef USE_ASSERT_CHECKING
    1210              :     /* we should not have a buffer with no TIDs to sort */
    1211              :     Assert(buffer->items != NULL);
    1212              :     Assert(buffer->nitems > 0);
    1213              : 
    1214              :     for (int i = 0; i < buffer->nitems; i++)
    1215              :     {
    1216              :         Assert(ItemPointerIsValid(&buffer->items[i]));
    1217              : 
    1218              :         /* don't check ordering for the first TID item */
    1219              :         if (i == 0)
    1220              :             continue;
    1221              : 
    1222              :         Assert(ItemPointerCompare(&buffer->items[i - 1], &buffer->items[i]) < 0);
    1223              :     }
    1224              : #endif
    1225        61092 : }
    1226              : 
    1227              : /*
    1228              :  * GinBuffer checks
    1229              :  *
    1230              :  * Make sure the nitems/items fields are consistent (either the array is empty
    1231              :  * or not empty, the fields need to agree). If there are items, check ordering.
    1232              :  */
    1233              : static void
    1234        66324 : AssertCheckGinBuffer(GinBuffer *buffer)
    1235              : {
    1236              : #ifdef USE_ASSERT_CHECKING
    1237              :     /* if we have any items, the array must exist */
    1238              :     Assert(!((buffer->nitems > 0) && (buffer->items == NULL)));
    1239              : 
    1240              :     /*
    1241              :      * The buffer may be empty, in which case we must not call the check of
    1242              :      * item pointers, because that assumes non-emptiness.
    1243              :      */
    1244              :     if (buffer->nitems == 0)
    1245              :         return;
    1246              : 
    1247              :     /* Make sure the item pointers are valid and sorted. */
    1248              :     AssertCheckItemPointers(buffer);
    1249              : #endif
    1250        66324 : }
    1251              : 
    1252              : /*
    1253              :  * GinBufferInit
    1254              :  *      Initialize buffer to store tuples for a GIN index.
    1255              :  *
    1256              :  * Initialize the buffer used to accumulate TID for a single key at a time
    1257              :  * (we process the data sorted), so we know when we received all data for
    1258              :  * a given key.
    1259              :  *
    1260              :  * Initializes sort support procedures for all index attributes.
    1261              :  */
    1262              : static GinBuffer *
    1263           52 : GinBufferInit(Relation index)
    1264              : {
    1265           52 :     GinBuffer  *buffer = palloc0_object(GinBuffer);
    1266              :     int         i,
    1267              :                 nKeys;
    1268           52 :     TupleDesc   desc = RelationGetDescr(index);
    1269              : 
    1270              :     /*
    1271              :      * How many items can we fit into the memory limit? We don't want to end
    1272              :      * with too many TIDs. and 64kB seems more than enough. But maybe this
    1273              :      * should be tied to maintenance_work_mem or something like that?
    1274              :      */
    1275           52 :     buffer->maxitems = (64 * 1024L) / sizeof(ItemPointerData);
    1276              : 
    1277           52 :     nKeys = IndexRelationGetNumberOfKeyAttributes(index);
    1278              : 
    1279           52 :     buffer->ssup = palloc0_array(SortSupportData, nKeys);
    1280              : 
    1281              :     /*
    1282              :      * Lookup ordering operator for the index key data type, and initialize
    1283              :      * the sort support function.
    1284              :      */
    1285          104 :     for (i = 0; i < nKeys; i++)
    1286              :     {
    1287              :         Oid         cmpFunc;
    1288           52 :         SortSupport sortKey = &buffer->ssup[i];
    1289           52 :         Form_pg_attribute att = TupleDescAttr(desc, i);
    1290              : 
    1291           52 :         sortKey->ssup_cxt = CurrentMemoryContext;
    1292           52 :         sortKey->ssup_collation = index->rd_indcollation[i];
    1293              : 
    1294           52 :         if (!OidIsValid(sortKey->ssup_collation))
    1295           52 :             sortKey->ssup_collation = DEFAULT_COLLATION_OID;
    1296              : 
    1297           52 :         sortKey->ssup_nulls_first = false;
    1298           52 :         sortKey->ssup_attno = i + 1;
    1299           52 :         sortKey->abbreviate = false;
    1300              : 
    1301              :         Assert(sortKey->ssup_attno != 0);
    1302              : 
    1303              :         /*
    1304              :          * If the compare proc isn't specified in the opclass definition, look
    1305              :          * up the index key type's default btree comparator.
    1306              :          */
    1307           52 :         cmpFunc = index_getprocid(index, i + 1, GIN_COMPARE_PROC);
    1308           52 :         if (cmpFunc == InvalidOid)
    1309              :         {
    1310              :             TypeCacheEntry *typentry;
    1311              : 
    1312            0 :             typentry = lookup_type_cache(att->atttypid,
    1313              :                                          TYPECACHE_CMP_PROC_FINFO);
    1314            0 :             if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
    1315            0 :                 ereport(ERROR,
    1316              :                         (errcode(ERRCODE_UNDEFINED_FUNCTION),
    1317              :                          errmsg("could not identify a comparison function for type %s",
    1318              :                                 format_type_be(att->atttypid))));
    1319              : 
    1320            0 :             cmpFunc = typentry->cmp_proc_finfo.fn_oid;
    1321              :         }
    1322              : 
    1323           52 :         PrepareSortSupportComparisonShim(cmpFunc, sortKey);
    1324              :     }
    1325              : 
    1326           52 :     return buffer;
    1327              : }
    1328              : 
    1329              : /* Is the buffer empty, i.e. has no TID values in the array? */
    1330              : static bool
    1331        66460 : GinBufferIsEmpty(GinBuffer *buffer)
    1332              : {
    1333        66460 :     return (buffer->nitems == 0);
    1334              : }
    1335              : 
    1336              : /*
    1337              :  * GinBufferKeyEquals
    1338              :  *      Can the buffer store TIDs for the provided GIN tuple (same key)?
    1339              :  *
    1340              :  * Compare if the tuple matches the already accumulated data in the GIN
    1341              :  * buffer. Compare scalar fields first, before the actual key.
    1342              :  *
    1343              :  * Returns true if the key matches, and the TID belongs to the buffer, or
    1344              :  * false if the key does not match.
    1345              :  */
    1346              : static bool
    1347        33146 : GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup)
    1348              : {
    1349              :     int         r;
    1350              :     Datum       tupkey;
    1351              : 
    1352        33146 :     AssertCheckGinBuffer(buffer);
    1353              : 
    1354        33146 :     if (tup->attrnum != buffer->attnum)
    1355            0 :         return false;
    1356              : 
    1357              :     /* same attribute should have the same type info */
    1358              :     Assert(tup->typbyval == buffer->typbyval);
    1359              :     Assert(tup->typlen == buffer->typlen);
    1360              : 
    1361        33146 :     if (tup->category != buffer->category)
    1362           27 :         return false;
    1363              : 
    1364              :     /*
    1365              :      * For NULL/empty keys, this means equality, for normal keys we need to
    1366              :      * compare the actual key value.
    1367              :      */
    1368        33119 :     if (buffer->category != GIN_CAT_NORM_KEY)
    1369            3 :         return true;
    1370              : 
    1371              :     /*
    1372              :      * For the tuple, get either the first sizeof(Datum) bytes for byval
    1373              :      * types, or a pointer to the beginning of the data array.
    1374              :      */
    1375        33116 :     tupkey = (buffer->typbyval) ? *(Datum *) tup->data : PointerGetDatum(tup->data);
    1376              : 
    1377        33116 :     r = ApplySortComparator(buffer->key, false,
    1378              :                             tupkey, false,
    1379        33116 :                             &buffer->ssup[buffer->attnum - 1]);
    1380              : 
    1381        33116 :     return (r == 0);
    1382              : }
    1383              : 
    1384              : /*
    1385              :  * GinBufferShouldTrim
    1386              :  *      Should we trim the list of item pointers?
    1387              :  *
    1388              :  * By trimming we understand writing out and removing the tuple IDs that
    1389              :  * we know can't change by future merges. We can deduce the TID up to which
    1390              :  * this is guaranteed from the "first" TID in each GIN tuple, which provides
    1391              :  * a "horizon" (for a given key) thanks to the sort.
    1392              :  *
    1393              :  * We don't want to do this too often - compressing longer TID lists is more
    1394              :  * efficient. But we also don't want to accumulate too many TIDs, for two
    1395              :  * reasons. First, it consumes memory and we might exceed maintenance_work_mem
    1396              :  * (or whatever limit applies), even if that's unlikely because TIDs are very
    1397              :  * small so we can fit a lot of them. Second, and more importantly, long TID
    1398              :  * lists are an issue if the scan wraps around, because a key may get a very
    1399              :  * wide list (with min/max TID for that key), forcing "full" mergesorts for
    1400              :  * every list merged into it (instead of the efficient append).
    1401              :  *
    1402              :  * So we look at two things when deciding if to trim - if the resulting list
    1403              :  * (after adding TIDs from the new tuple) would be too long, and if there is
    1404              :  * enough TIDs to trim (with values less than "first" TID from the new tuple),
    1405              :  * we do the trim. By enough we mean at least 128 TIDs (mostly an arbitrary
    1406              :  * number).
    1407              :  *
    1408              :  * We try freezing TIDs at the beginning of the list first, before attempting
    1409              :  * to trim the buffer. This may allow trimming the data earlier, reducing the
    1410              :  * memory usage and excluding it from the mergesort.
    1411              :  */
    1412              : static bool
    1413        33178 : GinBufferShouldTrim(GinBuffer *buffer, GinTuple *tup)
    1414              : {
    1415              :     /*
    1416              :      * Check if the last TID in the current list is frozen. This is the case
    1417              :      * when merging non-overlapping lists, e.g. in each parallel worker.
    1418              :      */
    1419        38442 :     if ((buffer->nitems > 0) &&
    1420         5264 :         (ItemPointerCompare(&buffer->items[buffer->nitems - 1],
    1421         5264 :                             GinTupleGetFirst(tup)) == 0))
    1422            0 :         buffer->nfrozen = buffer->nitems;
    1423              : 
    1424              :     /*
    1425              :      * Now find the last TID we know to be frozen, i.e. the last TID right
    1426              :      * before the new GIN tuple.
    1427              :      *
    1428              :      * Start with the first not-yet-frozen tuple, and walk until we find the
    1429              :      * first TID that's higher. If we already know the whole list is frozen
    1430              :      * (i.e. nfrozen == nitems), this does nothing.
    1431              :      *
    1432              :      * XXX This might do a binary search for sufficiently long lists, but it
    1433              :      * does not seem worth the complexity. Overlapping lists should be rare
    1434              :      * common, TID comparisons are cheap, and we should quickly freeze most of
    1435              :      * the list.
    1436              :      */
    1437        94650 :     for (int i = buffer->nfrozen; i < buffer->nitems; i++)
    1438              :     {
    1439              :         /* Is the TID after the first TID of the new tuple? Can't freeze. */
    1440        66224 :         if (ItemPointerCompare(&buffer->items[i],
    1441        66224 :                                GinTupleGetFirst(tup)) > 0)
    1442         4752 :             break;
    1443              : 
    1444        61472 :         buffer->nfrozen++;
    1445              :     }
    1446              : 
    1447              :     /* not enough TIDs to trim (1024 is somewhat arbitrary number) */
    1448        33178 :     if (buffer->nfrozen < 1024)
    1449        33178 :         return false;
    1450              : 
    1451              :     /* no need to trim if we have not hit the memory limit yet */
    1452            0 :     if ((buffer->nitems + tup->nitems) < buffer->maxitems)
    1453            0 :         return false;
    1454              : 
    1455              :     /*
    1456              :      * OK, we have enough frozen TIDs to flush, and we have hit the memory
    1457              :      * limit, so it's time to write it out.
    1458              :      */
    1459            0 :     return true;
    1460              : }
    1461              : 
    1462              : /*
    1463              :  * GinBufferStoreTuple
    1464              :  *      Add data (especially TID list) from a GIN tuple to the buffer.
    1465              :  *
    1466              :  * The buffer is expected to be empty (in which case it's initialized), or
    1467              :  * having the same key. The TID values from the tuple are combined with the
    1468              :  * stored values using a merge sort.
    1469              :  *
    1470              :  * The tuples (for the same key) are expected to be sorted by first TID. But
    1471              :  * this does not guarantee the lists do not overlap, especially in the leader,
    1472              :  * because the workers process interleaving data. There should be no overlaps
    1473              :  * in a single worker - it could happen when the parallel scan wraps around,
    1474              :  * but we detect that and flush the data (see ginBuildCallbackParallel).
    1475              :  *
    1476              :  * By sorting the GinTuple not only by key, but also by the first TID, we make
    1477              :  * it more less likely the lists will overlap during merge. We merge them using
    1478              :  * mergesort, but it's cheaper to just append one list to the other.
    1479              :  *
    1480              :  * How often can the lists overlap? There should be no overlaps in workers,
    1481              :  * and in the leader we can see overlaps between lists built by different
    1482              :  * workers. But the workers merge the items as much as possible, so there
    1483              :  * should not be too many.
    1484              :  */
    1485              : static void
    1486        33178 : GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
    1487              : {
    1488              :     ItemPointerData *items;
    1489              :     Datum       key;
    1490              : 
    1491        33178 :     AssertCheckGinBuffer(buffer);
    1492              : 
    1493        33178 :     key = _gin_parse_tuple_key(tup);
    1494        33178 :     items = _gin_parse_tuple_items(tup);
    1495              : 
    1496              :     /* if the buffer is empty, set the fields (and copy the key) */
    1497        33178 :     if (GinBufferIsEmpty(buffer))
    1498              :     {
    1499        27914 :         buffer->category = tup->category;
    1500        27914 :         buffer->keylen = tup->keylen;
    1501        27914 :         buffer->attnum = tup->attrnum;
    1502              : 
    1503        27914 :         buffer->typlen = tup->typlen;
    1504        27914 :         buffer->typbyval = tup->typbyval;
    1505              : 
    1506        27914 :         if (tup->category == GIN_CAT_NORM_KEY)
    1507        27887 :             buffer->key = datumCopy(key, buffer->typbyval, buffer->typlen);
    1508              :         else
    1509           27 :             buffer->key = (Datum) 0;
    1510              :     }
    1511              : 
    1512              :     /* add the new TIDs into the buffer, combine using merge-sort */
    1513              :     {
    1514              :         int         nnew;
    1515              :         ItemPointer new;
    1516              : 
    1517              :         /*
    1518              :          * Resize the array - we do this first, because we'll dereference the
    1519              :          * first unfrozen TID, which would fail if the array is NULL. We'll
    1520              :          * still pass 0 as number of elements in that array though.
    1521              :          */
    1522        33178 :         if (buffer->items == NULL)
    1523           32 :             buffer->items = palloc((buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
    1524              :         else
    1525        33146 :             buffer->items = repalloc(buffer->items,
    1526        33146 :                                      (buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
    1527              : 
    1528        33178 :         new = ginMergeItemPointers(&buffer->items[buffer->nfrozen], /* first unfrozen */
    1529        33178 :                                    (buffer->nitems - buffer->nfrozen),    /* num of unfrozen */
    1530        33178 :                                    items, tup->nitems, &nnew);
    1531              : 
    1532              :         Assert(nnew == (tup->nitems + (buffer->nitems - buffer->nfrozen)));
    1533              : 
    1534        33178 :         memcpy(&buffer->items[buffer->nfrozen], new,
    1535              :                nnew * sizeof(ItemPointerData));
    1536              : 
    1537        33178 :         pfree(new);
    1538              : 
    1539        33178 :         buffer->nitems += tup->nitems;
    1540              : 
    1541        33178 :         AssertCheckItemPointers(buffer);
    1542              :     }
    1543              : 
    1544              :     /* free the decompressed TID list */
    1545        33178 :     pfree(items);
    1546        33178 : }
    1547              : 
    1548              : /*
    1549              :  * GinBufferReset
    1550              :  *      Reset the buffer into a state as if it contains no data.
    1551              :  */
    1552              : static void
    1553        27914 : GinBufferReset(GinBuffer *buffer)
    1554              : {
    1555              :     Assert(!GinBufferIsEmpty(buffer));
    1556              : 
    1557              :     /* release byref values, do nothing for by-val ones */
    1558        27914 :     if ((buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
    1559        17909 :         pfree(DatumGetPointer(buffer->key));
    1560              : 
    1561              :     /*
    1562              :      * Not required, but makes it more likely to trigger NULL dereference if
    1563              :      * using the value incorrectly, etc.
    1564              :      */
    1565        27914 :     buffer->key = (Datum) 0;
    1566              : 
    1567        27914 :     buffer->attnum = 0;
    1568        27914 :     buffer->category = 0;
    1569        27914 :     buffer->keylen = 0;
    1570        27914 :     buffer->nitems = 0;
    1571        27914 :     buffer->nfrozen = 0;
    1572              : 
    1573        27914 :     buffer->typlen = 0;
    1574        27914 :     buffer->typbyval = 0;
    1575        27914 : }
    1576              : 
    1577              : /*
    1578              :  * GinBufferTrim
    1579              :  *      Discard the "frozen" part of the TID list (which should have been
    1580              :  *      written to disk/index before this call).
    1581              :  */
    1582              : static void
    1583            0 : GinBufferTrim(GinBuffer *buffer)
    1584              : {
    1585              :     Assert((buffer->nfrozen > 0) && (buffer->nfrozen <= buffer->nitems));
    1586              : 
    1587            0 :     memmove(&buffer->items[0], &buffer->items[buffer->nfrozen],
    1588            0 :             sizeof(ItemPointerData) * (buffer->nitems - buffer->nfrozen));
    1589              : 
    1590            0 :     buffer->nitems -= buffer->nfrozen;
    1591            0 :     buffer->nfrozen = 0;
    1592            0 : }
    1593              : 
    1594              : /*
    1595              :  * GinBufferFree
    1596              :  *      Release memory associated with the GinBuffer (including TID array).
    1597              :  */
    1598              : static void
    1599           52 : GinBufferFree(GinBuffer *buffer)
    1600              : {
    1601           52 :     if (buffer->items)
    1602           32 :         pfree(buffer->items);
    1603              : 
    1604              :     /* release byref values, do nothing for by-val ones */
    1605           52 :     if (!GinBufferIsEmpty(buffer) &&
    1606            0 :         (buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
    1607            0 :         pfree(DatumGetPointer(buffer->key));
    1608              : 
    1609           52 :     pfree(buffer);
    1610           52 : }
    1611              : 
    1612              : /*
    1613              :  * GinBufferCanAddKey
    1614              :  *      Check if a given GIN tuple can be added to the current buffer.
    1615              :  *
    1616              :  * Returns true if the buffer is either empty or for the same index key.
    1617              :  */
    1618              : static bool
    1619        33178 : GinBufferCanAddKey(GinBuffer *buffer, GinTuple *tup)
    1620              : {
    1621              :     /* empty buffer can accept data for any key */
    1622        33178 :     if (GinBufferIsEmpty(buffer))
    1623           32 :         return true;
    1624              : 
    1625              :     /* otherwise just data for the same key */
    1626        33146 :     return GinBufferKeyEquals(buffer, tup);
    1627              : }
    1628              : 
    1629              : /*
    1630              :  * Within leader, wait for end of heap scan and merge per-worker results.
    1631              :  *
    1632              :  * After waiting for all workers to finish, merge the per-worker results into
    1633              :  * the complete index. The results from each worker are sorted by block number
    1634              :  * (start of the page range). While combining the per-worker results we merge
    1635              :  * summaries for the same page range, and also fill-in empty summaries for
    1636              :  * ranges without any tuples.
    1637              :  *
    1638              :  * Returns the total number of heap tuples scanned.
    1639              :  */
    1640              : static double
    1641           13 : _gin_parallel_merge(GinBuildState *state)
    1642              : {
    1643              :     GinTuple   *tup;
    1644              :     Size        tuplen;
    1645           13 :     double      reltuples = 0;
    1646              :     GinBuffer  *buffer;
    1647              : 
    1648              :     /* GIN tuples from workers, merged by leader */
    1649           13 :     double      numtuples = 0;
    1650              : 
    1651              :     /* wait for workers to scan table and produce partial results */
    1652           13 :     reltuples = _gin_parallel_heapscan(state);
    1653              : 
    1654              :     /* Execute the sort */
    1655           13 :     pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
    1656              :                                  PROGRESS_GIN_PHASE_PERFORMSORT_2);
    1657              : 
    1658              :     /* do the actual sort in the leader */
    1659           13 :     tuplesort_performsort(state->bs_sortstate);
    1660              : 
    1661              :     /*
    1662              :      * Initialize buffer to combine entries for the same key.
    1663              :      *
    1664              :      * The leader is allowed to use the whole maintenance_work_mem buffer to
    1665              :      * combine data. The parallel workers already completed.
    1666              :      */
    1667           13 :     buffer = GinBufferInit(state->ginstate.index);
    1668              : 
    1669              :     /*
    1670              :      * Set the progress target for the next phase.  Reset the block number
    1671              :      * values set by table_index_build_scan
    1672              :      */
    1673              :     {
    1674           13 :         const int   progress_index[] = {
    1675              :             PROGRESS_CREATEIDX_SUBPHASE,
    1676              :             PROGRESS_CREATEIDX_TUPLES_TOTAL,
    1677              :             PROGRESS_SCAN_BLOCKS_TOTAL,
    1678              :             PROGRESS_SCAN_BLOCKS_DONE
    1679              :         };
    1680           13 :         const int64 progress_vals[] = {
    1681              :             PROGRESS_GIN_PHASE_MERGE_2,
    1682           13 :             state->bs_numtuples,
    1683              :             0, 0
    1684              :         };
    1685              : 
    1686           13 :         pgstat_progress_update_multi_param(4, progress_index, progress_vals);
    1687              :     }
    1688              : 
    1689              :     /*
    1690              :      * Read the GIN tuples from the shared tuplesort, sorted by category and
    1691              :      * key. That probably gives us order matching how data is organized in the
    1692              :      * index.
    1693              :      *
    1694              :      * We don't insert the GIN tuples right away, but instead accumulate as
    1695              :      * many TIDs for the same key as possible, and then insert that at once.
    1696              :      * This way we don't need to decompress/recompress the posting lists, etc.
    1697              :      */
    1698        16602 :     while ((tup = tuplesort_getgintuple(state->bs_sortstate, &tuplen, true)) != NULL)
    1699              :     {
    1700              :         MemoryContext oldCtx;
    1701              : 
    1702        16589 :         CHECK_FOR_INTERRUPTS();
    1703              : 
    1704              :         /*
    1705              :          * If the buffer can accept the new GIN tuple, just store it there and
    1706              :          * we're done. If it's a different key (or maybe too much data) flush
    1707              :          * the current contents into the index first.
    1708              :          */
    1709        16589 :         if (!GinBufferCanAddKey(buffer, tup))
    1710              :         {
    1711              :             /*
    1712              :              * Buffer is not empty and it's storing a different key - flush
    1713              :              * the data into the insert, and start a new entry for current
    1714              :              * GinTuple.
    1715              :              */
    1716        11313 :             AssertCheckItemPointers(buffer);
    1717              : 
    1718        11313 :             oldCtx = MemoryContextSwitchTo(state->tmpCtx);
    1719              : 
    1720        11313 :             ginEntryInsert(&state->ginstate,
    1721        11313 :                            buffer->attnum, buffer->key, buffer->category,
    1722        11313 :                            buffer->items, buffer->nitems, &state->buildStats);
    1723              : 
    1724        11313 :             MemoryContextSwitchTo(oldCtx);
    1725        11313 :             MemoryContextReset(state->tmpCtx);
    1726              : 
    1727              :             /* discard the existing data */
    1728        11313 :             GinBufferReset(buffer);
    1729              :         }
    1730              : 
    1731              :         /*
    1732              :          * We're about to add a GIN tuple to the buffer - check the memory
    1733              :          * limit first, and maybe write out some of the data into the index
    1734              :          * first, if needed (and possible). We only flush the part of the TID
    1735              :          * list that we know won't change, and only if there's enough data for
    1736              :          * compression to work well.
    1737              :          */
    1738        16589 :         if (GinBufferShouldTrim(buffer, tup))
    1739              :         {
    1740              :             Assert(buffer->nfrozen > 0);
    1741              : 
    1742              :             /*
    1743              :              * Buffer is not empty and it's storing a different key - flush
    1744              :              * the data into the insert, and start a new entry for current
    1745              :              * GinTuple.
    1746              :              */
    1747            0 :             AssertCheckItemPointers(buffer);
    1748              : 
    1749            0 :             oldCtx = MemoryContextSwitchTo(state->tmpCtx);
    1750              : 
    1751            0 :             ginEntryInsert(&state->ginstate,
    1752            0 :                            buffer->attnum, buffer->key, buffer->category,
    1753            0 :                            buffer->items, buffer->nfrozen, &state->buildStats);
    1754              : 
    1755            0 :             MemoryContextSwitchTo(oldCtx);
    1756            0 :             MemoryContextReset(state->tmpCtx);
    1757              : 
    1758              :             /* truncate the data we've just discarded */
    1759            0 :             GinBufferTrim(buffer);
    1760              :         }
    1761              : 
    1762              :         /*
    1763              :          * Remember data for the current tuple (either remember the new key,
    1764              :          * or append if to the existing data).
    1765              :          */
    1766        16589 :         GinBufferStoreTuple(buffer, tup);
    1767              : 
    1768              :         /* Report progress */
    1769        16589 :         pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
    1770              :                                      ++numtuples);
    1771              :     }
    1772              : 
    1773              :     /* flush data remaining in the buffer (for the last key) */
    1774           13 :     if (!GinBufferIsEmpty(buffer))
    1775              :     {
    1776           12 :         AssertCheckItemPointers(buffer);
    1777              : 
    1778           12 :         ginEntryInsert(&state->ginstate,
    1779           12 :                        buffer->attnum, buffer->key, buffer->category,
    1780           12 :                        buffer->items, buffer->nitems, &state->buildStats);
    1781              : 
    1782              :         /* discard the existing data */
    1783           12 :         GinBufferReset(buffer);
    1784              : 
    1785              :         /* Report progress */
    1786           12 :         pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
    1787              :                                      ++numtuples);
    1788              :     }
    1789              : 
    1790              :     /* release all the memory */
    1791           13 :     GinBufferFree(buffer);
    1792              : 
    1793           13 :     tuplesort_end(state->bs_sortstate);
    1794              : 
    1795           13 :     return reltuples;
    1796              : }
    1797              : 
    1798              : /*
    1799              :  * Returns size of shared memory required to store state for a parallel
    1800              :  * gin index build based on the snapshot its parallel scan will use.
    1801              :  */
    1802              : static Size
    1803           13 : _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot)
    1804              : {
    1805              :     /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
    1806           13 :     return add_size(BUFFERALIGN(sizeof(GinBuildShared)),
    1807              :                     table_parallelscan_estimate(heap, snapshot));
    1808              : }
    1809              : 
    1810              : /*
    1811              :  * Within leader, participate as a parallel worker.
    1812              :  */
    1813              : static void
    1814           13 : _gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Relation index)
    1815              : {
    1816           13 :     GinLeader  *ginleader = buildstate->bs_leader;
    1817              :     int         sortmem;
    1818              : 
    1819              :     /*
    1820              :      * Might as well use reliable figure when doling out maintenance_work_mem
    1821              :      * (when requested number of workers were not launched, this will be
    1822              :      * somewhat higher than it is for other workers).
    1823              :      */
    1824           13 :     sortmem = maintenance_work_mem / ginleader->nparticipanttuplesorts;
    1825              : 
    1826              :     /* Perform work common to all participants */
    1827           13 :     _gin_parallel_scan_and_build(buildstate, ginleader->ginshared,
    1828              :                                  ginleader->sharedsort, heap, index,
    1829              :                                  sortmem, true);
    1830           13 : }
    1831              : 
    1832              : /*
    1833              :  * _gin_process_worker_data
    1834              :  *      First phase of the key merging, happening in the worker.
    1835              :  *
    1836              :  * Depending on the number of distinct keys, the TID lists produced by the
    1837              :  * callback may be very short (due to frequent evictions in the callback).
    1838              :  * But combining many tiny lists is expensive, so we try to do as much as
    1839              :  * possible in the workers and only then pass the results to the leader.
    1840              :  *
    1841              :  * We read the tuples sorted by the key, and merge them into larger lists.
    1842              :  * At the moment there's no memory limit, so this will just produce one
    1843              :  * huge (sorted) list per key in each worker. Which means the leader will
    1844              :  * do a very limited number of mergesorts, which is good.
    1845              :  */
    1846              : static void
    1847           39 : _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort,
    1848              :                          bool progress)
    1849              : {
    1850              :     GinTuple   *tup;
    1851              :     Size        tuplen;
    1852              : 
    1853              :     GinBuffer  *buffer;
    1854              : 
    1855              :     /*
    1856              :      * Initialize buffer to combine entries for the same key.
    1857              :      *
    1858              :      * The workers are limited to the same amount of memory as during the sort
    1859              :      * in ginBuildCallbackParallel. But this probably should be the 32MB used
    1860              :      * during planning, just like there.
    1861              :      */
    1862           39 :     buffer = GinBufferInit(state->ginstate.index);
    1863              : 
    1864              :     /* sort the raw per-worker data */
    1865           39 :     if (progress)
    1866           13 :         pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
    1867              :                                      PROGRESS_GIN_PHASE_PERFORMSORT_1);
    1868              : 
    1869           39 :     tuplesort_performsort(state->bs_worker_sort);
    1870              : 
    1871              :     /* reset the number of GIN tuples produced by this worker */
    1872           39 :     state->bs_numtuples = 0;
    1873              : 
    1874           39 :     if (progress)
    1875           13 :         pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
    1876              :                                      PROGRESS_GIN_PHASE_MERGE_1);
    1877              : 
    1878              :     /*
    1879              :      * Read the GIN tuples from the shared tuplesort, sorted by the key, and
    1880              :      * merge them into larger chunks for the leader to combine.
    1881              :      */
    1882        16628 :     while ((tup = tuplesort_getgintuple(worker_sort, &tuplen, true)) != NULL)
    1883              :     {
    1884              : 
    1885        16589 :         CHECK_FOR_INTERRUPTS();
    1886              : 
    1887              :         /*
    1888              :          * If the buffer can accept the new GIN tuple, just store it there and
    1889              :          * we're done. If it's a different key (or maybe too much data) flush
    1890              :          * the current contents into the index first.
    1891              :          */
    1892        16589 :         if (!GinBufferCanAddKey(buffer, tup))
    1893              :         {
    1894              :             GinTuple   *ntup;
    1895              :             Size        ntuplen;
    1896              : 
    1897              :             /*
    1898              :              * Buffer is not empty and it's storing a different key - flush
    1899              :              * the data into the insert, and start a new entry for current
    1900              :              * GinTuple.
    1901              :              */
    1902        16569 :             AssertCheckItemPointers(buffer);
    1903              : 
    1904        16569 :             ntup = _gin_build_tuple(buffer->attnum, buffer->category,
    1905        16569 :                                     buffer->key, buffer->typlen, buffer->typbyval,
    1906        16569 :                                     buffer->items, buffer->nitems, &ntuplen);
    1907              : 
    1908        16569 :             tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
    1909        16569 :             state->bs_numtuples++;
    1910              : 
    1911        16569 :             pfree(ntup);
    1912              : 
    1913              :             /* discard the existing data */
    1914        16569 :             GinBufferReset(buffer);
    1915              :         }
    1916              : 
    1917              :         /*
    1918              :          * We're about to add a GIN tuple to the buffer - check the memory
    1919              :          * limit first, and maybe write out some of the data into the index
    1920              :          * first, if needed (and possible). We only flush the part of the TID
    1921              :          * list that we know won't change, and only if there's enough data for
    1922              :          * compression to work well.
    1923              :          */
    1924        16589 :         if (GinBufferShouldTrim(buffer, tup))
    1925              :         {
    1926              :             GinTuple   *ntup;
    1927              :             Size        ntuplen;
    1928              : 
    1929              :             Assert(buffer->nfrozen > 0);
    1930              : 
    1931              :             /*
    1932              :              * Buffer is not empty and it's storing a different key - flush
    1933              :              * the data into the insert, and start a new entry for current
    1934              :              * GinTuple.
    1935              :              */
    1936            0 :             AssertCheckItemPointers(buffer);
    1937              : 
    1938            0 :             ntup = _gin_build_tuple(buffer->attnum, buffer->category,
    1939            0 :                                     buffer->key, buffer->typlen, buffer->typbyval,
    1940            0 :                                     buffer->items, buffer->nfrozen, &ntuplen);
    1941              : 
    1942            0 :             tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
    1943              : 
    1944            0 :             pfree(ntup);
    1945              : 
    1946              :             /* truncate the data we've just discarded */
    1947            0 :             GinBufferTrim(buffer);
    1948              :         }
    1949              : 
    1950              :         /*
    1951              :          * Remember data for the current tuple (either remember the new key,
    1952              :          * or append if to the existing data).
    1953              :          */
    1954        16589 :         GinBufferStoreTuple(buffer, tup);
    1955              :     }
    1956              : 
    1957              :     /* flush data remaining in the buffer (for the last key) */
    1958           39 :     if (!GinBufferIsEmpty(buffer))
    1959              :     {
    1960              :         GinTuple   *ntup;
    1961              :         Size        ntuplen;
    1962              : 
    1963           20 :         AssertCheckItemPointers(buffer);
    1964              : 
    1965           20 :         ntup = _gin_build_tuple(buffer->attnum, buffer->category,
    1966           20 :                                 buffer->key, buffer->typlen, buffer->typbyval,
    1967           20 :                                 buffer->items, buffer->nitems, &ntuplen);
    1968              : 
    1969           20 :         tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
    1970           20 :         state->bs_numtuples++;
    1971              : 
    1972           20 :         pfree(ntup);
    1973              : 
    1974              :         /* discard the existing data */
    1975           20 :         GinBufferReset(buffer);
    1976              :     }
    1977              : 
    1978              :     /* release all the memory */
    1979           39 :     GinBufferFree(buffer);
    1980              : 
    1981           39 :     tuplesort_end(worker_sort);
    1982           39 : }
    1983              : 
    1984              : /*
    1985              :  * Perform a worker's portion of a parallel GIN index build sort.
    1986              :  *
    1987              :  * This generates a tuplesort for the worker portion of the table.
    1988              :  *
    1989              :  * sortmem is the amount of working memory to use within each worker,
    1990              :  * expressed in KBs.
    1991              :  *
    1992              :  * When this returns, workers are done, and need only release resources.
    1993              :  *
    1994              :  * Before feeding data into a shared tuplesort (for the leader process),
    1995              :  * the workers process data in two phases.
    1996              :  *
    1997              :  * 1) A worker reads a portion of rows from the table, accumulates entries
    1998              :  * in memory, and flushes them into a private tuplesort (e.g. because of
    1999              :  * using too much memory).
    2000              :  *
    2001              :  * 2) The private tuplesort gets sorted (by key and TID), the worker reads
    2002              :  * the data again, and combines the entries as much as possible. This has
    2003              :  * to happen eventually, and this way it's done in workers in parallel.
    2004              :  *
    2005              :  * Finally, the combined entries are written into the shared tuplesort, so
    2006              :  * that the leader can process them.
    2007              :  *
    2008              :  * How well this works (compared to just writing entries into the shared
    2009              :  * tuplesort) depends on the data set. For large tables with many distinct
    2010              :  * keys this helps a lot. With many distinct keys it's likely the buffers has
    2011              :  * to be flushed often, generating many entries with the same key and short
    2012              :  * TID lists. These entries need to be sorted and merged at some point,
    2013              :  * before writing them to the index. The merging is quite expensive, it can
    2014              :  * easily be ~50% of a serial build, and doing as much of it in the workers
    2015              :  * means it's parallelized. The leader still has to merge results from the
    2016              :  * workers, but it's much more efficient to merge few large entries than
    2017              :  * many tiny ones.
    2018              :  *
    2019              :  * This also reduces the amount of data the workers pass to the leader through
    2020              :  * the shared tuplesort. OTOH the workers need more space for the private sort,
    2021              :  * possibly up to 2x of the data, if no entries be merged in a worker. But this
    2022              :  * is very unlikely, and the only consequence is inefficiency, so we ignore it.
    2023              :  */
    2024              : static void
    2025           39 : _gin_parallel_scan_and_build(GinBuildState *state,
    2026              :                              GinBuildShared *ginshared, Sharedsort *sharedsort,
    2027              :                              Relation heap, Relation index,
    2028              :                              int sortmem, bool progress)
    2029              : {
    2030              :     SortCoordinate coordinate;
    2031              :     TableScanDesc scan;
    2032              :     double      reltuples;
    2033              :     IndexInfo  *indexInfo;
    2034              : 
    2035              :     /* Initialize local tuplesort coordination state */
    2036           39 :     coordinate = palloc0_object(SortCoordinateData);
    2037           39 :     coordinate->isWorker = true;
    2038           39 :     coordinate->nParticipants = -1;
    2039           39 :     coordinate->sharedsort = sharedsort;
    2040              : 
    2041              :     /* remember how much space is allowed for the accumulated entries */
    2042           39 :     state->work_mem = (sortmem / 2);
    2043              : 
    2044              :     /* remember how many workers participate in the build */
    2045           39 :     state->bs_num_workers = ginshared->scantuplesortstates;
    2046              : 
    2047              :     /* Begin "partial" tuplesort */
    2048           39 :     state->bs_sortstate = tuplesort_begin_index_gin(heap, index,
    2049              :                                                     state->work_mem,
    2050              :                                                     coordinate,
    2051              :                                                     TUPLESORT_NONE);
    2052              : 
    2053              :     /* Local per-worker sort of raw-data */
    2054           39 :     state->bs_worker_sort = tuplesort_begin_index_gin(heap, index,
    2055              :                                                       state->work_mem,
    2056              :                                                       NULL,
    2057              :                                                       TUPLESORT_NONE);
    2058              : 
    2059              :     /* Join parallel scan */
    2060           39 :     indexInfo = BuildIndexInfo(index);
    2061           39 :     indexInfo->ii_Concurrent = ginshared->isconcurrent;
    2062              : 
    2063           39 :     scan = table_beginscan_parallel(heap,
    2064              :                                     ParallelTableScanFromGinBuildShared(ginshared));
    2065              : 
    2066           39 :     reltuples = table_index_build_scan(heap, index, indexInfo, true, progress,
    2067              :                                        ginBuildCallbackParallel, state, scan);
    2068              : 
    2069              :     /* write remaining accumulated entries */
    2070           39 :     ginFlushBuildState(state, index);
    2071              : 
    2072              :     /*
    2073              :      * Do the first phase of in-worker processing - sort the data produced by
    2074              :      * the callback, and combine them into much larger chunks and place that
    2075              :      * into the shared tuplestore for leader to process.
    2076              :      */
    2077           39 :     _gin_process_worker_data(state, state->bs_worker_sort, progress);
    2078              : 
    2079              :     /* sort the GIN tuples built by this worker */
    2080           39 :     tuplesort_performsort(state->bs_sortstate);
    2081              : 
    2082           39 :     state->bs_reltuples += reltuples;
    2083              : 
    2084              :     /*
    2085              :      * Done.  Record ambuild statistics.
    2086              :      */
    2087           39 :     SpinLockAcquire(&ginshared->mutex);
    2088           39 :     ginshared->nparticipantsdone++;
    2089           39 :     ginshared->reltuples += state->bs_reltuples;
    2090           39 :     ginshared->indtuples += state->bs_numtuples;
    2091           39 :     SpinLockRelease(&ginshared->mutex);
    2092              : 
    2093              :     /* Notify leader */
    2094           39 :     ConditionVariableSignal(&ginshared->workersdonecv);
    2095              : 
    2096           39 :     tuplesort_end(state->bs_sortstate);
    2097           39 : }
    2098              : 
    2099              : /*
    2100              :  * Perform work within a launched parallel process.
    2101              :  */
    2102              : void
    2103           26 : _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
    2104              : {
    2105              :     char       *sharedquery;
    2106              :     GinBuildShared *ginshared;
    2107              :     Sharedsort *sharedsort;
    2108              :     GinBuildState buildstate;
    2109              :     Relation    heapRel;
    2110              :     Relation    indexRel;
    2111              :     LOCKMODE    heapLockmode;
    2112              :     LOCKMODE    indexLockmode;
    2113              :     WalUsage   *walusage;
    2114              :     BufferUsage *bufferusage;
    2115              :     int         sortmem;
    2116              : 
    2117              :     /*
    2118              :      * The only possible status flag that can be set to the parallel worker is
    2119              :      * PROC_IN_SAFE_IC.
    2120              :      */
    2121              :     Assert((MyProc->statusFlags == 0) ||
    2122              :            (MyProc->statusFlags == PROC_IN_SAFE_IC));
    2123              : 
    2124              :     /* Set debug_query_string for individual workers first */
    2125           26 :     sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true);
    2126           26 :     debug_query_string = sharedquery;
    2127              : 
    2128              :     /* Report the query string from leader */
    2129           26 :     pgstat_report_activity(STATE_RUNNING, debug_query_string);
    2130              : 
    2131              :     /* Look up gin shared state */
    2132           26 :     ginshared = shm_toc_lookup(toc, PARALLEL_KEY_GIN_SHARED, false);
    2133              : 
    2134              :     /* Open relations using lock modes known to be obtained by index.c */
    2135           26 :     if (!ginshared->isconcurrent)
    2136              :     {
    2137           20 :         heapLockmode = ShareLock;
    2138           20 :         indexLockmode = AccessExclusiveLock;
    2139              :     }
    2140              :     else
    2141              :     {
    2142            6 :         heapLockmode = ShareUpdateExclusiveLock;
    2143            6 :         indexLockmode = RowExclusiveLock;
    2144              :     }
    2145              : 
    2146              :     /* Open relations within worker */
    2147           26 :     heapRel = table_open(ginshared->heaprelid, heapLockmode);
    2148           26 :     indexRel = index_open(ginshared->indexrelid, indexLockmode);
    2149              : 
    2150              :     /* initialize the GIN build state */
    2151           26 :     initGinState(&buildstate.ginstate, indexRel);
    2152           26 :     buildstate.indtuples = 0;
    2153           26 :     memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
    2154           26 :     memset(&buildstate.tid, 0, sizeof(ItemPointerData));
    2155              : 
    2156              :     /*
    2157              :      * create a temporary memory context that is used to hold data not yet
    2158              :      * dumped out to the index
    2159              :      */
    2160           26 :     buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
    2161              :                                               "Gin build temporary context",
    2162              :                                               ALLOCSET_DEFAULT_SIZES);
    2163              : 
    2164              :     /*
    2165              :      * create a temporary memory context that is used for calling
    2166              :      * ginExtractEntries(), and can be reset after each tuple
    2167              :      */
    2168           26 :     buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
    2169              :                                                "Gin build temporary context for user-defined function",
    2170              :                                                ALLOCSET_DEFAULT_SIZES);
    2171              : 
    2172           26 :     buildstate.accum.ginstate = &buildstate.ginstate;
    2173           26 :     ginInitBA(&buildstate.accum);
    2174              : 
    2175              : 
    2176              :     /* Look up shared state private to tuplesort.c */
    2177           26 :     sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
    2178           26 :     tuplesort_attach_shared(sharedsort, seg);
    2179              : 
    2180              :     /* Prepare to track buffer usage during parallel execution */
    2181           26 :     InstrStartParallelQuery();
    2182              : 
    2183              :     /*
    2184              :      * Might as well use reliable figure when doling out maintenance_work_mem
    2185              :      * (when requested number of workers were not launched, this will be
    2186              :      * somewhat higher than it is for other workers).
    2187              :      */
    2188           26 :     sortmem = maintenance_work_mem / ginshared->scantuplesortstates;
    2189              : 
    2190           26 :     _gin_parallel_scan_and_build(&buildstate, ginshared, sharedsort,
    2191              :                                  heapRel, indexRel, sortmem, false);
    2192              : 
    2193              :     /* Report WAL/buffer usage during parallel execution */
    2194           26 :     bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
    2195           26 :     walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
    2196           26 :     InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber],
    2197           26 :                           &walusage[ParallelWorkerNumber]);
    2198              : 
    2199           26 :     index_close(indexRel, indexLockmode);
    2200           26 :     table_close(heapRel, heapLockmode);
    2201           26 : }
    2202              : 
    2203              : /*
    2204              :  * Used to keep track of compressed TID lists when building a GIN tuple.
    2205              :  */
    2206              : typedef struct
    2207              : {
    2208              :     dlist_node  node;           /* linked list pointers */
    2209              :     GinPostingList *seg;
    2210              : } GinSegmentInfo;
    2211              : 
    2212              : /*
    2213              :  * _gin_build_tuple
    2214              :  *      Serialize the state for an index key into a tuple for tuplesort.
    2215              :  *
    2216              :  * The tuple has a number of scalar fields (mostly matching the build state),
    2217              :  * and then a data array that stores the key first, and then the TID list.
    2218              :  *
    2219              :  * For by-reference data types, we store the actual data. For by-val types
    2220              :  * we simply copy the whole Datum, so that we don't have to care about stuff
    2221              :  * like endianness etc. We could make it a little bit smaller, but it's not
    2222              :  * worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
    2223              :  * start of the TID list anyway. So we wouldn't save anything. (This would
    2224              :  * not be a good idea for the permanent in-index data, since we'd prefer
    2225              :  * that that not depend on sizeof(Datum). But this is just a transient
    2226              :  * representation to use while sorting the data.)
    2227              :  *
    2228              :  * The TID list is serialized as compressed - it's highly compressible, and
    2229              :  * we already have ginCompressPostingList for this purpose. The list may be
    2230              :  * pretty long, so we compress it into multiple segments and then copy all
    2231              :  * of that into the GIN tuple.
    2232              :  */
    2233              : static GinTuple *
    2234        33178 : _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
    2235              :                  Datum key, int16 typlen, bool typbyval,
    2236              :                  ItemPointerData *items, uint32 nitems,
    2237              :                  Size *len)
    2238              : {
    2239              :     GinTuple   *tuple;
    2240              :     char       *ptr;
    2241              : 
    2242              :     Size        tuplen;
    2243              :     int         keylen;
    2244              : 
    2245              :     dlist_mutable_iter iter;
    2246              :     dlist_head  segments;
    2247              :     int         ncompressed;
    2248              :     Size        compresslen;
    2249              : 
    2250              :     /*
    2251              :      * Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
    2252              :      * have actual non-empty key. We include varlena headers and \0 bytes for
    2253              :      * strings, to make it easier to access the data in-line.
    2254              :      *
    2255              :      * For byval types we simply copy the whole Datum. We could store just the
    2256              :      * necessary bytes, but this is simpler to work with and not worth the
    2257              :      * extra complexity. Moreover we still need to do the MAXALIGN to allow
    2258              :      * direct access to items pointers.
    2259              :      *
    2260              :      * XXX Note that for byval types we store the whole datum, no matter what
    2261              :      * the typlen value is.
    2262              :      */
    2263        33178 :     if (category != GIN_CAT_NORM_KEY)
    2264           30 :         keylen = 0;
    2265        33148 :     else if (typbyval)
    2266         9978 :         keylen = sizeof(Datum);
    2267        23170 :     else if (typlen > 0)
    2268            0 :         keylen = typlen;
    2269        23170 :     else if (typlen == -1)
    2270        23170 :         keylen = VARSIZE_ANY(DatumGetPointer(key));
    2271            0 :     else if (typlen == -2)
    2272            0 :         keylen = strlen(DatumGetPointer(key)) + 1;
    2273              :     else
    2274            0 :         elog(ERROR, "unexpected typlen value (%d)", typlen);
    2275              : 
    2276              :     /* compress the item pointers */
    2277        33178 :     ncompressed = 0;
    2278        33178 :     compresslen = 0;
    2279        33178 :     dlist_init(&segments);
    2280              : 
    2281              :     /* generate compressed segments of TID list chunks */
    2282        66356 :     while (ncompressed < nitems)
    2283              :     {
    2284              :         int         cnt;
    2285        33178 :         GinSegmentInfo *seginfo = palloc_object(GinSegmentInfo);
    2286              : 
    2287        66356 :         seginfo->seg = ginCompressPostingList(&items[ncompressed],
    2288        33178 :                                               (nitems - ncompressed),
    2289              :                                               UINT16_MAX,
    2290              :                                               &cnt);
    2291              : 
    2292        33178 :         ncompressed += cnt;
    2293        33178 :         compresslen += SizeOfGinPostingList(seginfo->seg);
    2294              : 
    2295        33178 :         dlist_push_tail(&segments, &seginfo->node);
    2296              :     }
    2297              : 
    2298              :     /*
    2299              :      * Determine GIN tuple length with all the data included. Be careful about
    2300              :      * alignment, to allow direct access to compressed segments (those require
    2301              :      * only SHORTALIGN).
    2302              :      */
    2303        33178 :     tuplen = SHORTALIGN(offsetof(GinTuple, data) + keylen) + compresslen;
    2304              : 
    2305        33178 :     *len = tuplen;
    2306              : 
    2307              :     /*
    2308              :      * Allocate space for the whole GIN tuple.
    2309              :      *
    2310              :      * The palloc0 is needed - writetup_index_gin will write the whole tuple
    2311              :      * to disk, so we need to make sure the padding bytes are defined
    2312              :      * (otherwise valgrind would report this).
    2313              :      */
    2314        33178 :     tuple = palloc0(tuplen);
    2315              : 
    2316        33178 :     tuple->tuplen = tuplen;
    2317        33178 :     tuple->attrnum = attrnum;
    2318        33178 :     tuple->category = category;
    2319        33178 :     tuple->keylen = keylen;
    2320        33178 :     tuple->nitems = nitems;
    2321              : 
    2322              :     /* key type info */
    2323        33178 :     tuple->typlen = typlen;
    2324        33178 :     tuple->typbyval = typbyval;
    2325              : 
    2326              :     /*
    2327              :      * Copy the key and items into the tuple. First the key value, which we
    2328              :      * can simply copy right at the beginning of the data array.
    2329              :      */
    2330        33178 :     if (category == GIN_CAT_NORM_KEY)
    2331              :     {
    2332        33148 :         if (typbyval)
    2333              :         {
    2334         9978 :             memcpy(tuple->data, &key, sizeof(Datum));
    2335              :         }
    2336        23170 :         else if (typlen > 0) /* byref, fixed length */
    2337              :         {
    2338            0 :             memcpy(tuple->data, DatumGetPointer(key), typlen);
    2339              :         }
    2340        23170 :         else if (typlen == -1)
    2341              :         {
    2342        23170 :             memcpy(tuple->data, DatumGetPointer(key), keylen);
    2343              :         }
    2344            0 :         else if (typlen == -2)
    2345              :         {
    2346            0 :             memcpy(tuple->data, DatumGetPointer(key), keylen);
    2347              :         }
    2348              :     }
    2349              : 
    2350              :     /* finally, copy the TIDs into the array */
    2351        33178 :     ptr = (char *) tuple + SHORTALIGN(offsetof(GinTuple, data) + keylen);
    2352              : 
    2353              :     /* copy in the compressed data, and free the segments */
    2354        66356 :     dlist_foreach_modify(iter, &segments)
    2355              :     {
    2356        33178 :         GinSegmentInfo *seginfo = dlist_container(GinSegmentInfo, node, iter.cur);
    2357              : 
    2358        33178 :         memcpy(ptr, seginfo->seg, SizeOfGinPostingList(seginfo->seg));
    2359              : 
    2360        33178 :         ptr += SizeOfGinPostingList(seginfo->seg);
    2361              : 
    2362        33178 :         dlist_delete(&seginfo->node);
    2363              : 
    2364        33178 :         pfree(seginfo->seg);
    2365        33178 :         pfree(seginfo);
    2366              :     }
    2367              : 
    2368        33178 :     return tuple;
    2369              : }
    2370              : 
    2371              : /*
    2372              :  * _gin_parse_tuple_key
    2373              :  *      Return a Datum representing the key stored in the tuple.
    2374              :  *
    2375              :  * Most of the tuple fields are directly accessible, the only thing that
    2376              :  * needs more care is the key and the TID list.
    2377              :  *
    2378              :  * For the key, this returns a regular Datum representing it. It's either the
    2379              :  * actual key value, or a pointer to the beginning of the data array (which is
    2380              :  * where the data was copied by _gin_build_tuple).
    2381              :  */
    2382              : static Datum
    2383       134836 : _gin_parse_tuple_key(GinTuple *a)
    2384              : {
    2385              :     Datum       key;
    2386              : 
    2387       134836 :     if (a->category != GIN_CAT_NORM_KEY)
    2388           30 :         return (Datum) 0;
    2389              : 
    2390       134806 :     if (a->typbyval)
    2391              :     {
    2392        29922 :         memcpy(&key, a->data, a->keylen);
    2393        29922 :         return key;
    2394              :     }
    2395              : 
    2396       104884 :     return PointerGetDatum(a->data);
    2397              : }
    2398              : 
    2399              : /*
    2400              : * _gin_parse_tuple_items
    2401              :  *      Return a pointer to a palloc'd array of decompressed TID array.
    2402              :  */
    2403              : static ItemPointer
    2404        33178 : _gin_parse_tuple_items(GinTuple *a)
    2405              : {
    2406              :     int         len;
    2407              :     char       *ptr;
    2408              :     int         ndecoded;
    2409              :     ItemPointer items;
    2410              : 
    2411        33178 :     len = a->tuplen - SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
    2412        33178 :     ptr = (char *) a + SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
    2413              : 
    2414        33178 :     items = ginPostingListDecodeAllSegments((GinPostingList *) ptr, len, &ndecoded);
    2415              : 
    2416              :     Assert(ndecoded == a->nitems);
    2417              : 
    2418        33178 :     return items;
    2419              : }
    2420              : 
    2421              : /*
    2422              :  * _gin_compare_tuples
    2423              :  *      Compare GIN tuples, used by tuplesort during parallel index build.
    2424              :  *
    2425              :  * The scalar fields (attrnum, category) are compared first, the key value is
    2426              :  * compared last. The comparisons are done using type-specific sort support
    2427              :  * functions.
    2428              :  *
    2429              :  * If the key value matches, we compare the first TID value in the TID list,
    2430              :  * which means the tuples are merged in an order in which they are most
    2431              :  * likely to be simply concatenated. (This "first" TID will also allow us
    2432              :  * to determine a point up to which the list is fully determined and can be
    2433              :  * written into the index to enforce a memory limit etc.)
    2434              :  */
    2435              : int
    2436        50872 : _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)
    2437              : {
    2438              :     int         r;
    2439              :     Datum       keya,
    2440              :                 keyb;
    2441              : 
    2442        50872 :     if (a->attrnum < b->attrnum)
    2443            0 :         return -1;
    2444              : 
    2445        50872 :     if (a->attrnum > b->attrnum)
    2446            0 :         return 1;
    2447              : 
    2448        50872 :     if (a->category < b->category)
    2449           34 :         return -1;
    2450              : 
    2451        50838 :     if (a->category > b->category)
    2452            6 :         return 1;
    2453              : 
    2454        50832 :     if (a->category == GIN_CAT_NORM_KEY)
    2455              :     {
    2456        50829 :         keya = _gin_parse_tuple_key(a);
    2457        50829 :         keyb = _gin_parse_tuple_key(b);
    2458              : 
    2459        50829 :         r = ApplySortComparator(keya, false,
    2460              :                                 keyb, false,
    2461        50829 :                                 &ssup[a->attrnum - 1]);
    2462              : 
    2463              :         /* if the key is the same, consider the first TID in the array */
    2464        57954 :         return (r != 0) ? r : ItemPointerCompare(GinTupleGetFirst(a),
    2465         7125 :                                                  GinTupleGetFirst(b));
    2466              :     }
    2467              : 
    2468            3 :     return ItemPointerCompare(GinTupleGetFirst(a),
    2469            3 :                               GinTupleGetFirst(b));
    2470              : }
        

Generated by: LCOV version 2.0-1