LCOV - code coverage report
Current view: top level - src/backend/access/gin - gininsert.c (source / functions) Coverage Total Hit
Test: PostgreSQL 20devel Lines: 90.9 % 648 589
Test Date: 2026-07-03 19:57:34 Functions: 97.1 % 35 34
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 75.2 % 210 158

             Branch data     Line data    Source code
       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * gininsert.c
       4                 :             :  *    insert routines for the postgres inverted index access method.
       5                 :             :  *
       6                 :             :  *
       7                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :             :  *
      10                 :             :  * IDENTIFICATION
      11                 :             :  *          src/backend/access/gin/gininsert.c
      12                 :             :  *-------------------------------------------------------------------------
      13                 :             :  */
      14                 :             : 
      15                 :             : #include "postgres.h"
      16                 :             : 
      17                 :             : #include "access/gin_private.h"
      18                 :             : #include "access/gin_tuple.h"
      19                 :             : #include "access/parallel.h"
      20                 :             : #include "access/table.h"
      21                 :             : #include "access/tableam.h"
      22                 :             : #include "access/xloginsert.h"
      23                 :             : #include "catalog/index.h"
      24                 :             : #include "catalog/pg_collation.h"
      25                 :             : #include "commands/progress.h"
      26                 :             : #include "executor/instrument.h"
      27                 :             : #include "miscadmin.h"
      28                 :             : #include "nodes/execnodes.h"
      29                 :             : #include "pgstat.h"
      30                 :             : #include "storage/bufmgr.h"
      31                 :             : #include "storage/condition_variable.h"
      32                 :             : #include "storage/proc.h"
      33                 :             : #include "storage/predicate.h"
      34                 :             : #include "tcop/tcopprot.h"
      35                 :             : #include "utils/datum.h"
      36                 :             : #include "utils/memutils.h"
      37                 :             : #include "utils/builtins.h"
      38                 :             : #include "utils/rel.h"
      39                 :             : #include "utils/tuplesort.h"
      40                 :             : #include "utils/typcache.h"
      41                 :             : #include "utils/wait_event.h"
      42                 :             : 
      43                 :             : 
      44                 :             : /* Magic numbers for parallel state sharing */
      45                 :             : #define PARALLEL_KEY_GIN_SHARED         UINT64CONST(0xB000000000000001)
      46                 :             : #define PARALLEL_KEY_TUPLESORT          UINT64CONST(0xB000000000000002)
      47                 :             : #define PARALLEL_KEY_QUERY_TEXT         UINT64CONST(0xB000000000000003)
      48                 :             : #define PARALLEL_KEY_WAL_USAGE          UINT64CONST(0xB000000000000004)
      49                 :             : #define PARALLEL_KEY_BUFFER_USAGE       UINT64CONST(0xB000000000000005)
      50                 :             : 
      51                 :             : /*
      52                 :             :  * Status for index builds performed in parallel.  This is allocated in a
      53                 :             :  * dynamic shared memory segment.
      54                 :             :  */
      55                 :             : typedef struct GinBuildShared
      56                 :             : {
      57                 :             :     /*
      58                 :             :      * These fields are not modified during the build.  They primarily exist
      59                 :             :      * for the benefit of worker processes that need to create state
      60                 :             :      * corresponding to that used by the leader.
      61                 :             :      */
      62                 :             :     Oid         heaprelid;
      63                 :             :     Oid         indexrelid;
      64                 :             :     bool        isconcurrent;
      65                 :             :     int         scantuplesortstates;
      66                 :             : 
      67                 :             :     /*
      68                 :             :      * workersdonecv is used to monitor the progress of workers.  All parallel
      69                 :             :      * participants must indicate that they are done before leader can use
      70                 :             :      * results built by the workers (and before leader can write the data into
      71                 :             :      * the index).
      72                 :             :      */
      73                 :             :     ConditionVariable workersdonecv;
      74                 :             : 
      75                 :             :     /*
      76                 :             :      * mutex protects all following fields
      77                 :             :      *
      78                 :             :      * These fields contain status information of interest to GIN index builds
      79                 :             :      * that must work just the same when an index is built in parallel.
      80                 :             :      */
      81                 :             :     slock_t     mutex;
      82                 :             : 
      83                 :             :     /*
      84                 :             :      * Mutable state that is maintained by workers, and reported back to
      85                 :             :      * leader at end of the scans.
      86                 :             :      *
      87                 :             :      * nparticipantsdone is number of worker processes finished.
      88                 :             :      *
      89                 :             :      * reltuples is the total number of input heap tuples.
      90                 :             :      *
      91                 :             :      * indtuples is the total number of tuples that made it into the index.
      92                 :             :      */
      93                 :             :     int         nparticipantsdone;
      94                 :             :     double      reltuples;
      95                 :             :     double      indtuples;
      96                 :             : 
      97                 :             :     /*
      98                 :             :      * ParallelTableScanDescData data follows. Can't directly embed here, as
      99                 :             :      * implementations of the parallel table scan desc interface might need
     100                 :             :      * stronger alignment.
     101                 :             :      */
     102                 :             : } GinBuildShared;
     103                 :             : 
     104                 :             : /*
     105                 :             :  * Return pointer to a GinBuildShared's parallel table scan.
     106                 :             :  *
     107                 :             :  * c.f. shm_toc_allocate as to why BUFFERALIGN is used, rather than just
     108                 :             :  * MAXALIGN.
     109                 :             :  */
     110                 :             : #define ParallelTableScanFromGinBuildShared(shared) \
     111                 :             :     (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(GinBuildShared)))
     112                 :             : 
     113                 :             : /*
     114                 :             :  * Status for leader in parallel index build.
     115                 :             :  */
     116                 :             : typedef struct GinLeader
     117                 :             : {
     118                 :             :     /* parallel context itself */
     119                 :             :     ParallelContext *pcxt;
     120                 :             : 
     121                 :             :     /*
     122                 :             :      * nparticipanttuplesorts is the exact number of worker processes
     123                 :             :      * successfully launched, plus one leader process if it participates as a
     124                 :             :      * worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader
     125                 :             :      * participating as a worker).
     126                 :             :      */
     127                 :             :     int         nparticipanttuplesorts;
     128                 :             : 
     129                 :             :     /*
     130                 :             :      * Leader process convenience pointers to shared state (leader avoids TOC
     131                 :             :      * lookups).
     132                 :             :      *
     133                 :             :      * GinBuildShared is the shared state for entire build.  sharedsort is the
     134                 :             :      * shared, tuplesort-managed state passed to each process tuplesort.
     135                 :             :      * snapshot is the snapshot used by the scan iff an MVCC snapshot is
     136                 :             :      * required.
     137                 :             :      */
     138                 :             :     GinBuildShared *ginshared;
     139                 :             :     Sharedsort *sharedsort;
     140                 :             :     Snapshot    snapshot;
     141                 :             :     WalUsage   *walusage;
     142                 :             :     BufferUsage *bufferusage;
     143                 :             : } GinLeader;
     144                 :             : 
     145                 :             : typedef struct
     146                 :             : {
     147                 :             :     GinState    ginstate;
     148                 :             :     double      indtuples;
     149                 :             :     GinStatsData buildStats;
     150                 :             :     MemoryContext tmpCtx;
     151                 :             :     MemoryContext funcCtx;
     152                 :             :     BuildAccumulator accum;
     153                 :             :     ItemPointerData tid;
     154                 :             :     int         work_mem;
     155                 :             : 
     156                 :             :     /*
     157                 :             :      * bs_leader is only present when a parallel index build is performed, and
     158                 :             :      * only in the leader process.
     159                 :             :      */
     160                 :             :     GinLeader  *bs_leader;
     161                 :             : 
     162                 :             :     /* number of participating workers (including leader) */
     163                 :             :     int         bs_num_workers;
     164                 :             : 
     165                 :             :     /* used to pass information from workers to leader */
     166                 :             :     double      bs_numtuples;
     167                 :             :     double      bs_reltuples;
     168                 :             : 
     169                 :             :     /*
     170                 :             :      * The sortstate is used by workers (including the leader). It has to be
     171                 :             :      * part of the build state, because that's the only thing passed to the
     172                 :             :      * build callback etc.
     173                 :             :      */
     174                 :             :     Tuplesortstate *bs_sortstate;
     175                 :             : 
     176                 :             :     /*
     177                 :             :      * The sortstate used only within a single worker for the first merge pass
     178                 :             :      * happening there. In principle it doesn't need to be part of the build
     179                 :             :      * state and we could pass it around directly, but it's more convenient
     180                 :             :      * this way. And it's part of the build state, after all.
     181                 :             :      */
     182                 :             :     Tuplesortstate *bs_worker_sort;
     183                 :             : } GinBuildState;
     184                 :             : 
     185                 :             : 
     186                 :             : /* parallel index builds */
     187                 :             : static void _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
     188                 :             :                                 bool isconcurrent, int request);
     189                 :             : static void _gin_end_parallel(GinLeader *ginleader, GinBuildState *state);
     190                 :             : static Size _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot);
     191                 :             : static double _gin_parallel_heapscan(GinBuildState *state);
     192                 :             : static double _gin_parallel_merge(GinBuildState *state);
     193                 :             : static void _gin_leader_participate_as_worker(GinBuildState *buildstate,
     194                 :             :                                               Relation heap, Relation index);
     195                 :             : static void _gin_parallel_scan_and_build(GinBuildState *state,
     196                 :             :                                          GinBuildShared *ginshared,
     197                 :             :                                          Sharedsort *sharedsort,
     198                 :             :                                          Relation heap, Relation index,
     199                 :             :                                          int sortmem, bool progress);
     200                 :             : 
     201                 :             : static ItemPointer _gin_parse_tuple_items(GinTuple *a);
     202                 :             : static Datum _gin_parse_tuple_key(GinTuple *a);
     203                 :             : 
     204                 :             : static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
     205                 :             :                                   Datum key, int16 typlen, bool typbyval,
     206                 :             :                                   ItemPointerData *items, uint32 nitems,
     207                 :             :                                   Size *len);
     208                 :             : 
     209                 :             : /*
     210                 :             :  * Adds array of item pointers to tuple's posting list, or
     211                 :             :  * creates posting tree and tuple pointing to tree in case
     212                 :             :  * of not enough space.  Max size of tuple is defined in
     213                 :             :  * GinFormTuple().  Returns a new, modified index tuple.
     214                 :             :  * items[] must be in sorted order with no duplicates.
     215                 :             :  */
     216                 :             : static IndexTuple
     217                 :      118094 : addItemPointersToLeafTuple(GinState *ginstate,
     218                 :             :                            IndexTuple old,
     219                 :             :                            ItemPointerData *items, uint32 nitem,
     220                 :             :                            GinStatsData *buildStats, Buffer buffer)
     221                 :             : {
     222                 :             :     OffsetNumber attnum;
     223                 :             :     Datum       key;
     224                 :             :     GinNullCategory category;
     225                 :             :     IndexTuple  res;
     226                 :             :     ItemPointerData *newItems,
     227                 :             :                *oldItems;
     228                 :             :     int         oldNPosting,
     229                 :             :                 newNPosting,
     230                 :             :                 nwritten;
     231                 :             :     GinPostingList *compressedList;
     232                 :             : 
     233                 :             :     Assert(!GinIsPostingTree(old));
     234                 :             : 
     235                 :      118094 :     attnum = gintuple_get_attrnum(ginstate, old);
     236                 :      118094 :     key = gintuple_get_key(ginstate, old, &category);
     237                 :             : 
     238                 :             :     /* merge the old and new posting lists */
     239                 :      118094 :     oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting);
     240                 :             : 
     241                 :      118094 :     newItems = ginMergeItemPointers(items, nitem,
     242                 :             :                                     oldItems, oldNPosting,
     243                 :             :                                     &newNPosting);
     244                 :             : 
     245                 :             :     /* Compress the posting list, and try to a build tuple with room for it */
     246                 :      118094 :     res = NULL;
     247                 :      118094 :     compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize, &nwritten);
     248         [ +  + ]:      118094 :     if (nwritten == newNPosting)
     249                 :             :     {
     250                 :      118088 :         res = GinFormTuple(ginstate, attnum, key, category,
     251                 :             :                            (char *) compressedList,
     252                 :      118088 :                            SizeOfGinPostingList(compressedList),
     253                 :             :                            newNPosting,
     254                 :             :                            false);
     255                 :             :     }
     256                 :             : 
     257                 :      118094 :     pfree(newItems);
     258                 :      118094 :     pfree(compressedList);
     259                 :             : 
     260         [ +  + ]:      118094 :     if (!res)
     261                 :             :     {
     262                 :             :         /* posting list would be too big, convert to posting tree */
     263                 :             :         BlockNumber postingRoot;
     264                 :             : 
     265                 :             :         /*
     266                 :             :          * Initialize posting tree with the old tuple's posting list.  It's
     267                 :             :          * surely small enough to fit on one posting-tree page, and should
     268                 :             :          * already be in order with no duplicates.
     269                 :             :          */
     270                 :          12 :         postingRoot = createPostingTree(ginstate->index,
     271                 :             :                                         oldItems,
     272                 :             :                                         oldNPosting,
     273                 :             :                                         buildStats,
     274                 :             :                                         buffer);
     275                 :             : 
     276                 :             :         /* Now insert the TIDs-to-be-added into the posting tree */
     277                 :          12 :         ginInsertItemPointers(ginstate->index, postingRoot,
     278                 :             :                               items, nitem,
     279                 :             :                               buildStats);
     280                 :             : 
     281                 :             :         /* And build a new posting-tree-only result tuple */
     282                 :          12 :         res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
     283                 :          12 :         GinSetPostingTree(res, postingRoot);
     284                 :             :     }
     285                 :      118094 :     pfree(oldItems);
     286                 :             : 
     287                 :      118094 :     return res;
     288                 :             : }
     289                 :             : 
     290                 :             : /*
     291                 :             :  * Build a fresh leaf tuple, either posting-list or posting-tree format
     292                 :             :  * depending on whether the given items list will fit.
     293                 :             :  * items[] must be in sorted order with no duplicates.
     294                 :             :  *
     295                 :             :  * This is basically the same logic as in addItemPointersToLeafTuple,
     296                 :             :  * but working from slightly different input.
     297                 :             :  */
     298                 :             : static IndexTuple
     299                 :      421281 : buildFreshLeafTuple(GinState *ginstate,
     300                 :             :                     OffsetNumber attnum, Datum key, GinNullCategory category,
     301                 :             :                     ItemPointerData *items, uint32 nitem,
     302                 :             :                     GinStatsData *buildStats, Buffer buffer)
     303                 :             : {
     304                 :      421281 :     IndexTuple  res = NULL;
     305                 :             :     GinPostingList *compressedList;
     306                 :             :     int         nwritten;
     307                 :             : 
     308                 :             :     /* try to build a posting list tuple with all the items */
     309                 :      421281 :     compressedList = ginCompressPostingList(items, nitem, GinMaxItemSize, &nwritten);
     310         [ +  + ]:      421281 :     if (nwritten == nitem)
     311                 :             :     {
     312                 :      421225 :         res = GinFormTuple(ginstate, attnum, key, category,
     313                 :             :                            (char *) compressedList,
     314                 :      421225 :                            SizeOfGinPostingList(compressedList),
     315                 :             :                            nitem, false);
     316                 :             :     }
     317                 :      421281 :     pfree(compressedList);
     318                 :             : 
     319         [ +  + ]:      421281 :     if (!res)
     320                 :             :     {
     321                 :             :         /* posting list would be too big, build posting tree */
     322                 :             :         BlockNumber postingRoot;
     323                 :             : 
     324                 :             :         /*
     325                 :             :          * Build posting-tree-only result tuple.  We do this first so as to
     326                 :             :          * fail quickly if the key is too big.
     327                 :             :          */
     328                 :          56 :         res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
     329                 :             : 
     330                 :             :         /*
     331                 :             :          * Initialize a new posting tree with the TIDs.
     332                 :             :          */
     333                 :          56 :         postingRoot = createPostingTree(ginstate->index, items, nitem,
     334                 :             :                                         buildStats, buffer);
     335                 :             : 
     336                 :             :         /* And save the root link in the result tuple */
     337                 :          56 :         GinSetPostingTree(res, postingRoot);
     338                 :             :     }
     339                 :             : 
     340                 :      421281 :     return res;
     341                 :             : }
     342                 :             : 
     343                 :             : /*
     344                 :             :  * Insert one or more heap TIDs associated with the given key value.
     345                 :             :  * This will either add a single key entry, or enlarge a pre-existing entry.
     346                 :             :  *
     347                 :             :  * During an index build, buildStats is non-null and the counters
     348                 :             :  * it contains should be incremented as needed.
     349                 :             :  */
     350                 :             : void
     351                 :      567419 : ginEntryInsert(GinState *ginstate,
     352                 :             :                OffsetNumber attnum, Datum key, GinNullCategory category,
     353                 :             :                ItemPointerData *items, uint32 nitem,
     354                 :             :                GinStatsData *buildStats)
     355                 :             : {
     356                 :             :     GinBtreeData btree;
     357                 :             :     GinBtreeEntryInsertData insertdata;
     358                 :             :     GinBtreeStack *stack;
     359                 :             :     IndexTuple  itup;
     360                 :             :     Page        page;
     361                 :             : 
     362                 :      567419 :     insertdata.isDelete = false;
     363                 :             : 
     364                 :      567419 :     ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
     365                 :      567419 :     btree.isBuild = (buildStats != NULL);
     366                 :             : 
     367                 :      567419 :     stack = ginFindLeafPage(&btree, false, false);
     368                 :      567419 :     page = BufferGetPage(stack->buffer);
     369                 :             : 
     370         [ +  + ]:      567419 :     if (btree.findItem(&btree, stack))
     371                 :             :     {
     372                 :             :         /* found pre-existing entry */
     373                 :      146136 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     374                 :             : 
     375         [ +  + ]:      146136 :         if (GinIsPostingTree(itup))
     376                 :             :         {
     377                 :             :             /* add entries to existing posting tree */
     378                 :       28038 :             BlockNumber rootPostingTree = GinGetPostingTree(itup);
     379                 :             : 
     380                 :             :             /* release all stack */
     381                 :       28038 :             LockBuffer(stack->buffer, GIN_UNLOCK);
     382                 :       28038 :             freeGinBtreeStack(stack);
     383                 :             : 
     384                 :             :             /* insert into posting tree */
     385                 :       28038 :             ginInsertItemPointers(ginstate->index, rootPostingTree,
     386                 :             :                                   items, nitem,
     387                 :             :                                   buildStats);
     388                 :       28036 :             return;
     389                 :             :         }
     390                 :             : 
     391                 :      118098 :         CheckForSerializableConflictIn(ginstate->index, NULL,
     392                 :             :                                        BufferGetBlockNumber(stack->buffer));
     393                 :             :         /* modify an existing leaf entry */
     394                 :      118094 :         itup = addItemPointersToLeafTuple(ginstate, itup,
     395                 :             :                                           items, nitem, buildStats, stack->buffer);
     396                 :             : 
     397                 :      118094 :         insertdata.isDelete = true;
     398                 :             :     }
     399                 :             :     else
     400                 :             :     {
     401                 :      421283 :         CheckForSerializableConflictIn(ginstate->index, NULL,
     402                 :             :                                        BufferGetBlockNumber(stack->buffer));
     403                 :             :         /* no match, so construct a new leaf entry */
     404                 :      421281 :         itup = buildFreshLeafTuple(ginstate, attnum, key, category,
     405                 :             :                                    items, nitem, buildStats, stack->buffer);
     406                 :             : 
     407                 :             :         /*
     408                 :             :          * nEntries counts leaf tuples, so increment it only when we make a
     409                 :             :          * new one.
     410                 :             :          */
     411         [ +  + ]:      421281 :         if (buildStats)
     412                 :       90840 :             buildStats->nEntries++;
     413                 :             :     }
     414                 :             : 
     415                 :             :     /* Insert the new or modified leaf tuple */
     416                 :      539375 :     insertdata.entry = itup;
     417                 :      539375 :     ginInsertValue(&btree, stack, &insertdata, buildStats);
     418                 :      539373 :     pfree(itup);
     419                 :             : }
     420                 :             : 
     421                 :             : /*
     422                 :             :  * Extract index entries for a single indexable item, and add them to the
     423                 :             :  * BuildAccumulator's state.
     424                 :             :  *
     425                 :             :  * This function is used only during initial index creation.
     426                 :             :  */
     427                 :             : static void
     428                 :      486246 : ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum,
     429                 :             :                        Datum value, bool isNull,
     430                 :             :                        ItemPointer heapptr)
     431                 :             : {
     432                 :             :     Datum      *entries;
     433                 :             :     GinNullCategory *categories;
     434                 :             :     int32       nentries;
     435                 :             :     MemoryContext oldCtx;
     436                 :             : 
     437                 :      486246 :     oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
     438                 :      486246 :     entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
     439                 :             :                                 value, isNull,
     440                 :             :                                 &nentries, &categories);
     441                 :      486246 :     MemoryContextSwitchTo(oldCtx);
     442                 :             : 
     443                 :      486246 :     ginInsertBAEntries(&buildstate->accum, heapptr, attnum,
     444                 :             :                        entries, categories, nentries);
     445                 :             : 
     446                 :      486246 :     buildstate->indtuples += nentries;
     447                 :             : 
     448                 :      486246 :     MemoryContextReset(buildstate->funcCtx);
     449                 :      486246 : }
     450                 :             : 
     451                 :             : static void
     452                 :      471656 : ginBuildCallback(Relation index, ItemPointer tid, Datum *values,
     453                 :             :                  bool *isnull, bool tupleIsAlive, void *state)
     454                 :             : {
     455                 :      471656 :     GinBuildState *buildstate = (GinBuildState *) state;
     456                 :             :     MemoryContext oldCtx;
     457                 :             :     int         i;
     458                 :             : 
     459                 :      471656 :     oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
     460                 :             : 
     461         [ +  + ]:      943726 :     for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
     462                 :      472070 :         ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
     463                 :      472070 :                                values[i], isnull[i], tid);
     464                 :             : 
     465                 :             :     /* If we've maxed out our available memory, dump everything to the index */
     466         [ -  + ]:      471656 :     if (buildstate->accum.allocatedMemory >= maintenance_work_mem * (Size) 1024)
     467                 :             :     {
     468                 :             :         ItemPointerData *list;
     469                 :             :         Datum       key;
     470                 :             :         GinNullCategory category;
     471                 :             :         uint32      nlist;
     472                 :             :         OffsetNumber attnum;
     473                 :             : 
     474                 :           0 :         ginBeginBAScan(&buildstate->accum);
     475                 :           0 :         while ((list = ginGetBAEntry(&buildstate->accum,
     476         [ #  # ]:           0 :                                      &attnum, &key, &category, &nlist)) != NULL)
     477                 :             :         {
     478                 :             :             /* there could be many entries, so be willing to abort here */
     479         [ #  # ]:           0 :             CHECK_FOR_INTERRUPTS();
     480                 :           0 :             ginEntryInsert(&buildstate->ginstate, attnum, key, category,
     481                 :             :                            list, nlist, &buildstate->buildStats);
     482                 :             :         }
     483                 :             : 
     484                 :           0 :         MemoryContextReset(buildstate->tmpCtx);
     485                 :           0 :         ginInitBA(&buildstate->accum);
     486                 :             :     }
     487                 :             : 
     488                 :      471656 :     MemoryContextSwitchTo(oldCtx);
     489                 :      471656 : }
     490                 :             : 
     491                 :             : /*
     492                 :             :  * ginFlushBuildState
     493                 :             :  *      Write all data from BuildAccumulator into the tuplesort.
     494                 :             :  *
     495                 :             :  * The number of TIDs written to the tuplesort at once is limited, to reduce
     496                 :             :  * the amount of memory needed when merging the intermediate results later.
     497                 :             :  * The leader will see up to two chunks per worker, so calculate the limit to
     498                 :             :  * not need more than MaxAllocSize overall.
     499                 :             :  *
     500                 :             :  * We don't need to worry about overflowing maintenance_work_mem. We can't
     501                 :             :  * build chunks larger than work_mem, and that limit was set so that workers
     502                 :             :  * produce sufficiently small chunks.
     503                 :             :  */
     504                 :             : static void
     505                 :          51 : ginFlushBuildState(GinBuildState *buildstate, Relation index)
     506                 :             : {
     507                 :             :     ItemPointerData *list;
     508                 :             :     Datum       key;
     509                 :             :     GinNullCategory category;
     510                 :             :     uint32      nlist;
     511                 :             :     OffsetNumber attnum;
     512                 :          51 :     TupleDesc   tdesc = RelationGetDescr(index);
     513                 :             :     uint32      maxlen;
     514                 :             : 
     515                 :             :     /* maximum number of TIDs per chunk (two chunks per worker) */
     516                 :          51 :     maxlen = MaxAllocSize / sizeof(ItemPointerData);
     517                 :          51 :     maxlen /= (2 * buildstate->bs_num_workers);
     518                 :             : 
     519                 :          51 :     ginBeginBAScan(&buildstate->accum);
     520                 :       23452 :     while ((list = ginGetBAEntry(&buildstate->accum,
     521         [ +  + ]:       23452 :                                  &attnum, &key, &category, &nlist)) != NULL)
     522                 :             :     {
     523                 :             :         /* information about the key */
     524                 :       23401 :         CompactAttribute *attr = TupleDescCompactAttr(tdesc, (attnum - 1));
     525                 :             : 
     526                 :             :         /* start of the chunk */
     527                 :       23401 :         uint32      offset = 0;
     528                 :             : 
     529                 :             :         /* split the entry into smaller chunk with up to maxlen items */
     530         [ +  + ]:       46802 :         while (offset < nlist)
     531                 :             :         {
     532                 :             :             /* GIN tuple and tuple length */
     533                 :             :             GinTuple   *tup;
     534                 :             :             Size        tuplen;
     535                 :       23401 :             uint32      len = Min(maxlen, nlist - offset);
     536                 :             : 
     537                 :             :             /* there could be many entries, so be willing to abort here */
     538         [ +  + ]:       23401 :             CHECK_FOR_INTERRUPTS();
     539                 :             : 
     540                 :       23401 :             tup = _gin_build_tuple(attnum, category,
     541                 :       23401 :                                    key, attr->attlen, attr->attbyval,
     542                 :       23401 :                                    &list[offset], len,
     543                 :             :                                    &tuplen);
     544                 :             : 
     545                 :       23401 :             offset += len;
     546                 :             : 
     547                 :       23401 :             tuplesort_putgintuple(buildstate->bs_worker_sort, tup, tuplen);
     548                 :             : 
     549                 :       23401 :             pfree(tup);
     550                 :             :         }
     551                 :             :     }
     552                 :             : 
     553                 :          51 :     MemoryContextReset(buildstate->tmpCtx);
     554                 :          51 :     ginInitBA(&buildstate->accum);
     555                 :          51 : }
     556                 :             : 
     557                 :             : /*
     558                 :             :  * ginBuildCallbackParallel
     559                 :             :  *      Callback for the parallel index build.
     560                 :             :  *
     561                 :             :  * This is similar to the serial build callback ginBuildCallback, but
     562                 :             :  * instead of writing the accumulated entries into the index, each worker
     563                 :             :  * writes them into a (local) tuplesort.
     564                 :             :  *
     565                 :             :  * The worker then sorts and combines these entries, before writing them
     566                 :             :  * into a shared tuplesort for the leader (see _gin_parallel_scan_and_build
     567                 :             :  * for the whole process).
     568                 :             :  */
     569                 :             : static void
     570                 :       14176 : ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values,
     571                 :             :                          bool *isnull, bool tupleIsAlive, void *state)
     572                 :             : {
     573                 :       14176 :     GinBuildState *buildstate = (GinBuildState *) state;
     574                 :             :     MemoryContext oldCtx;
     575                 :             :     int         i;
     576                 :             : 
     577                 :       14176 :     oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
     578                 :             : 
     579                 :             :     /*
     580                 :             :      * if scan wrapped around - flush accumulated entries and start anew
     581                 :             :      *
     582                 :             :      * With parallel scans, we don't have a guarantee the scan does not start
     583                 :             :      * half-way through the relation (serial builds disable sync scans and
     584                 :             :      * always start from block 0, parallel scans require allow_sync=true).
     585                 :             :      *
     586                 :             :      * Building the posting lists assumes the TIDs are monotonic and never go
     587                 :             :      * back, and the wrap around would break that. We handle that by detecting
     588                 :             :      * the wraparound, and flushing all entries. This means we'll later see
     589                 :             :      * two separate entries with non-overlapping TID lists (which can be
     590                 :             :      * combined by merge sort).
     591                 :             :      *
     592                 :             :      * To detect a wraparound, we remember the last TID seen by each worker
     593                 :             :      * (for any key). If the next TID seen by the worker is lower, the scan
     594                 :             :      * must have wrapped around.
     595                 :             :      */
     596         [ -  + ]:       14176 :     if (ItemPointerCompare(tid, &buildstate->tid) < 0)
     597                 :           0 :         ginFlushBuildState(buildstate, index);
     598                 :             : 
     599                 :             :     /* remember the TID we're about to process */
     600                 :       14176 :     buildstate->tid = *tid;
     601                 :             : 
     602         [ +  + ]:       28352 :     for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
     603                 :       14176 :         ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
     604                 :       14176 :                                values[i], isnull[i], tid);
     605                 :             : 
     606                 :             :     /*
     607                 :             :      * If we've maxed out our available memory, dump everything to the
     608                 :             :      * tuplesort. We use half the per-worker fraction of maintenance_work_mem,
     609                 :             :      * the other half is used for the tuplesort.
     610                 :             :      */
     611         [ -  + ]:       14176 :     if (buildstate->accum.allocatedMemory >= buildstate->work_mem * (Size) 1024)
     612                 :           0 :         ginFlushBuildState(buildstate, index);
     613                 :             : 
     614                 :       14176 :     MemoryContextSwitchTo(oldCtx);
     615                 :       14176 : }
     616                 :             : 
     617                 :             : IndexBuildResult *
     618                 :         217 : ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     619                 :             : {
     620                 :             :     IndexBuildResult *result;
     621                 :             :     double      reltuples;
     622                 :             :     GinBuildState buildstate;
     623                 :         217 :     GinBuildState *state = &buildstate;
     624                 :             :     Buffer      RootBuffer,
     625                 :             :                 MetaBuffer;
     626                 :             :     ItemPointerData *list;
     627                 :             :     Datum       key;
     628                 :             :     GinNullCategory category;
     629                 :             :     uint32      nlist;
     630                 :             :     MemoryContext oldCtx;
     631                 :             :     OffsetNumber attnum;
     632                 :             : 
     633         [ -  + ]:         217 :     if (RelationGetNumberOfBlocks(index) != 0)
     634         [ #  # ]:           0 :         elog(ERROR, "index \"%s\" already contains data",
     635                 :             :              RelationGetRelationName(index));
     636                 :             : 
     637                 :         217 :     initGinState(&buildstate.ginstate, index);
     638                 :         217 :     buildstate.indtuples = 0;
     639                 :         217 :     memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
     640                 :             : 
     641                 :             :     /* Initialize fields for parallel build too. */
     642                 :         217 :     buildstate.bs_numtuples = 0;
     643                 :         217 :     buildstate.bs_reltuples = 0;
     644                 :         217 :     buildstate.bs_leader = NULL;
     645                 :         217 :     memset(&buildstate.tid, 0, sizeof(ItemPointerData));
     646                 :             : 
     647                 :             :     /* initialize the meta page */
     648                 :         217 :     MetaBuffer = GinNewBuffer(index);
     649                 :             : 
     650                 :             :     /* initialize the root page */
     651                 :         217 :     RootBuffer = GinNewBuffer(index);
     652                 :             : 
     653                 :         217 :     START_CRIT_SECTION();
     654                 :         217 :     GinInitMetabuffer(MetaBuffer);
     655                 :         217 :     MarkBufferDirty(MetaBuffer);
     656                 :         217 :     GinInitBuffer(RootBuffer, GIN_LEAF);
     657                 :         217 :     MarkBufferDirty(RootBuffer);
     658                 :             : 
     659                 :             : 
     660                 :         217 :     UnlockReleaseBuffer(MetaBuffer);
     661                 :         217 :     UnlockReleaseBuffer(RootBuffer);
     662                 :         217 :     END_CRIT_SECTION();
     663                 :             : 
     664                 :             :     /* count the root as first entry page */
     665                 :         217 :     buildstate.buildStats.nEntryPages++;
     666                 :             : 
     667                 :             :     /*
     668                 :             :      * create a temporary memory context that is used to hold data not yet
     669                 :             :      * dumped out to the index
     670                 :             :      */
     671                 :         217 :     buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
     672                 :             :                                               "Gin build temporary context",
     673                 :             :                                               ALLOCSET_DEFAULT_SIZES);
     674                 :             : 
     675                 :             :     /*
     676                 :             :      * create a temporary memory context that is used for calling
     677                 :             :      * ginExtractEntries(), and can be reset after each tuple
     678                 :             :      */
     679                 :         217 :     buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
     680                 :             :                                                "Gin build temporary context for user-defined function",
     681                 :             :                                                ALLOCSET_DEFAULT_SIZES);
     682                 :             : 
     683                 :         217 :     buildstate.accum.ginstate = &buildstate.ginstate;
     684                 :         217 :     ginInitBA(&buildstate.accum);
     685                 :             : 
     686                 :             :     /* Report table scan phase started */
     687                 :         217 :     pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
     688                 :             :                                  PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN);
     689                 :             : 
     690                 :             :     /*
     691                 :             :      * Attempt to launch parallel worker scan when required
     692                 :             :      *
     693                 :             :      * XXX plan_create_index_workers makes the number of workers dependent on
     694                 :             :      * maintenance_work_mem, requiring 32MB for each worker. For GIN that's
     695                 :             :      * reasonable too, because we sort the data just like btree. It does
     696                 :             :      * ignore the memory used to accumulate data in memory (set by work_mem),
     697                 :             :      * but there is no way to communicate that to plan_create_index_workers.
     698                 :             :      */
     699         [ +  + ]:         217 :     if (indexInfo->ii_ParallelWorkers > 0)
     700                 :          17 :         _gin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,
     701                 :             :                             indexInfo->ii_ParallelWorkers);
     702                 :             : 
     703                 :             :     /*
     704                 :             :      * If parallel build requested and at least one worker process was
     705                 :             :      * successfully launched, set up coordination state, wait for workers to
     706                 :             :      * complete. Then read all tuples from the shared tuplesort and insert
     707                 :             :      * them into the index.
     708                 :             :      *
     709                 :             :      * In serial mode, simply scan the table and build the index one index
     710                 :             :      * tuple at a time.
     711                 :             :      */
     712         [ +  + ]:         217 :     if (state->bs_leader)
     713                 :             :     {
     714                 :             :         SortCoordinate coordinate;
     715                 :             : 
     716                 :          17 :         coordinate = palloc0_object(SortCoordinateData);
     717                 :          17 :         coordinate->isWorker = false;
     718                 :          17 :         coordinate->nParticipants =
     719                 :          17 :             state->bs_leader->nparticipanttuplesorts;
     720                 :          17 :         coordinate->sharedsort = state->bs_leader->sharedsort;
     721                 :             : 
     722                 :             :         /*
     723                 :             :          * Begin leader tuplesort.
     724                 :             :          *
     725                 :             :          * In cases where parallelism is involved, the leader receives the
     726                 :             :          * same share of maintenance_work_mem as a serial sort (it is
     727                 :             :          * generally treated in the same way as a serial sort once we return).
     728                 :             :          * Parallel worker Tuplesortstates will have received only a fraction
     729                 :             :          * of maintenance_work_mem, though.
     730                 :             :          *
     731                 :             :          * We rely on the lifetime of the Leader Tuplesortstate almost not
     732                 :             :          * overlapping with any worker Tuplesortstate's lifetime.  There may
     733                 :             :          * be some small overlap, but that's okay because we rely on leader
     734                 :             :          * Tuplesortstate only allocating a small, fixed amount of memory
     735                 :             :          * here. When its tuplesort_performsort() is called (by our caller),
     736                 :             :          * and significant amounts of memory are likely to be used, all
     737                 :             :          * workers must have already freed almost all memory held by their
     738                 :             :          * Tuplesortstates (they are about to go away completely, too).  The
     739                 :             :          * overall effect is that maintenance_work_mem always represents an
     740                 :             :          * absolute high watermark on the amount of memory used by a CREATE
     741                 :             :          * INDEX operation, regardless of the use of parallelism or any other
     742                 :             :          * factor.
     743                 :             :          */
     744                 :          17 :         state->bs_sortstate =
     745                 :          17 :             tuplesort_begin_index_gin(heap, index,
     746                 :             :                                       maintenance_work_mem, coordinate,
     747                 :             :                                       TUPLESORT_NONE);
     748                 :             : 
     749                 :             :         /* scan the relation in parallel and merge per-worker results */
     750                 :          17 :         reltuples = _gin_parallel_merge(state);
     751                 :             : 
     752                 :          17 :         _gin_end_parallel(state->bs_leader, state);
     753                 :             :     }
     754                 :             :     else                        /* no parallel index build */
     755                 :             :     {
     756                 :             :         /*
     757                 :             :          * Do the heap scan.  We disallow sync scan here because
     758                 :             :          * dataPlaceToPage prefers to receive tuples in TID order.
     759                 :             :          */
     760                 :         200 :         reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
     761                 :             :                                            ginBuildCallback, &buildstate, NULL);
     762                 :             : 
     763                 :             :         /* dump remaining entries to the index */
     764                 :         200 :         oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
     765                 :         200 :         ginBeginBAScan(&buildstate.accum);
     766                 :       75940 :         while ((list = ginGetBAEntry(&buildstate.accum,
     767         [ +  + ]:       75940 :                                      &attnum, &key, &category, &nlist)) != NULL)
     768                 :             :         {
     769                 :             :             /* there could be many entries, so be willing to abort here */
     770         [ -  + ]:       75740 :             CHECK_FOR_INTERRUPTS();
     771                 :       75740 :             ginEntryInsert(&buildstate.ginstate, attnum, key, category,
     772                 :             :                            list, nlist, &buildstate.buildStats);
     773                 :             :         }
     774                 :         200 :         MemoryContextSwitchTo(oldCtx);
     775                 :             :     }
     776                 :             : 
     777                 :         217 :     MemoryContextDelete(buildstate.funcCtx);
     778                 :         217 :     MemoryContextDelete(buildstate.tmpCtx);
     779                 :             : 
     780                 :             :     /*
     781                 :             :      * Update metapage stats
     782                 :             :      */
     783                 :         217 :     buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
     784                 :         217 :     ginUpdateStats(index, &buildstate.buildStats, true);
     785                 :             : 
     786                 :             :     /*
     787                 :             :      * We didn't write WAL records as we built the index, so if WAL-logging is
     788                 :             :      * required, write all pages to the WAL now.
     789                 :             :      */
     790   [ +  +  +  +  :         217 :     if (RelationNeedsWAL(index))
             +  +  +  + ]
     791                 :             :     {
     792                 :         153 :         log_newpage_range(index, MAIN_FORKNUM,
     793                 :             :                           0, RelationGetNumberOfBlocks(index),
     794                 :             :                           true);
     795                 :             :     }
     796                 :             : 
     797                 :             :     /*
     798                 :             :      * Return statistics
     799                 :             :      */
     800                 :         217 :     result = palloc_object(IndexBuildResult);
     801                 :             : 
     802                 :         217 :     result->heap_tuples = reltuples;
     803                 :         217 :     result->index_tuples = buildstate.indtuples;
     804                 :             : 
     805                 :         217 :     return result;
     806                 :             : }
     807                 :             : 
     808                 :             : /*
     809                 :             :  *  ginbuildempty() -- build an empty gin index in the initialization fork
     810                 :             :  */
     811                 :             : void
     812                 :           4 : ginbuildempty(Relation index)
     813                 :             : {
     814                 :             :     Buffer      RootBuffer,
     815                 :             :                 MetaBuffer;
     816                 :             : 
     817                 :             :     /* An empty GIN index has two pages. */
     818                 :           4 :     MetaBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
     819                 :             :                                    EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
     820                 :           4 :     RootBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
     821                 :             :                                    EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
     822                 :             : 
     823                 :             :     /* Initialize and xlog metabuffer and root buffer. */
     824                 :           4 :     START_CRIT_SECTION();
     825                 :           4 :     GinInitMetabuffer(MetaBuffer);
     826                 :           4 :     MarkBufferDirty(MetaBuffer);
     827                 :           4 :     log_newpage_buffer(MetaBuffer, true);
     828                 :           4 :     GinInitBuffer(RootBuffer, GIN_LEAF);
     829                 :           4 :     MarkBufferDirty(RootBuffer);
     830                 :           4 :     log_newpage_buffer(RootBuffer, false);
     831                 :           4 :     END_CRIT_SECTION();
     832                 :             : 
     833                 :             :     /* Unlock and release the buffers. */
     834                 :           4 :     UnlockReleaseBuffer(MetaBuffer);
     835                 :           4 :     UnlockReleaseBuffer(RootBuffer);
     836                 :           4 : }
     837                 :             : 
     838                 :             : /*
     839                 :             :  * Insert index entries for a single indexable item during "normal"
     840                 :             :  * (non-fast-update) insertion
     841                 :             :  */
     842                 :             : static void
     843                 :       29048 : ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
     844                 :             :                    Datum value, bool isNull,
     845                 :             :                    ItemPointer item)
     846                 :             : {
     847                 :             :     Datum      *entries;
     848                 :             :     GinNullCategory *categories;
     849                 :             :     int32       i,
     850                 :             :                 nentries;
     851                 :             : 
     852                 :       29048 :     entries = ginExtractEntries(ginstate, attnum, value, isNull,
     853                 :             :                                 &nentries, &categories);
     854                 :             : 
     855         [ +  + ]:      261495 :     for (i = 0; i < nentries; i++)
     856                 :             :     {
     857                 :             :         /* there could be many entries, so be willing to abort here */
     858         [ -  + ]:      232457 :         CHECK_FOR_INTERRUPTS();
     859                 :      232457 :         ginEntryInsert(ginstate, attnum, entries[i], categories[i],
     860                 :             :                        item, 1, NULL);
     861                 :             :     }
     862                 :       29038 : }
     863                 :             : 
     864                 :             : bool
     865                 :      205891 : gininsert(Relation index, Datum *values, bool *isnull,
     866                 :             :           ItemPointer ht_ctid, Relation heapRel,
     867                 :             :           IndexUniqueCheck checkUnique,
     868                 :             :           bool indexUnchanged,
     869                 :             :           IndexInfo *indexInfo)
     870                 :             : {
     871                 :      205891 :     GinState   *ginstate = (GinState *) indexInfo->ii_AmCache;
     872                 :             :     MemoryContext oldCtx;
     873                 :             :     MemoryContext insertCtx;
     874                 :             :     int         i;
     875                 :             : 
     876                 :             :     /* Initialize GinState cache if first call in this statement */
     877         [ +  + ]:      205891 :     if (ginstate == NULL)
     878                 :             :     {
     879                 :        1455 :         oldCtx = MemoryContextSwitchTo(indexInfo->ii_Context);
     880                 :        1455 :         ginstate = palloc_object(GinState);
     881                 :        1455 :         initGinState(ginstate, index);
     882                 :        1455 :         indexInfo->ii_AmCache = ginstate;
     883                 :        1455 :         MemoryContextSwitchTo(oldCtx);
     884                 :             :     }
     885                 :             : 
     886                 :      205891 :     insertCtx = AllocSetContextCreate(CurrentMemoryContext,
     887                 :             :                                       "Gin insert temporary context",
     888                 :             :                                       ALLOCSET_DEFAULT_SIZES);
     889                 :             : 
     890                 :      205891 :     oldCtx = MemoryContextSwitchTo(insertCtx);
     891                 :             : 
     892   [ +  +  +  + ]:      205891 :     if (GinGetUseFastUpdate(index))
     893                 :      176840 :     {
     894                 :             :         GinTupleCollector collector;
     895                 :             : 
     896                 :      176843 :         memset(&collector, 0, sizeof(GinTupleCollector));
     897                 :             : 
     898         [ +  + ]:      433738 :         for (i = 0; i < ginstate->origTupdesc->natts; i++)
     899                 :      256895 :             ginHeapTupleFastCollect(ginstate, &collector,
     900                 :      256895 :                                     (OffsetNumber) (i + 1),
     901                 :      256895 :                                     values[i], isnull[i],
     902                 :             :                                     ht_ctid);
     903                 :             : 
     904                 :      176843 :         ginHeapTupleFastInsert(ginstate, &collector);
     905                 :             :     }
     906                 :             :     else
     907                 :             :     {
     908         [ +  + ]:       58086 :         for (i = 0; i < ginstate->origTupdesc->natts; i++)
     909                 :       29048 :             ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
     910                 :       29048 :                                values[i], isnull[i],
     911                 :             :                                ht_ctid);
     912                 :             :     }
     913                 :             : 
     914                 :      205878 :     MemoryContextSwitchTo(oldCtx);
     915                 :      205878 :     MemoryContextDelete(insertCtx);
     916                 :             : 
     917                 :      205878 :     return false;
     918                 :             : }
     919                 :             : 
     920                 :             : /*
     921                 :             :  * Create parallel context, and launch workers for leader.
     922                 :             :  *
     923                 :             :  * buildstate argument should be initialized (with the exception of the
     924                 :             :  * tuplesort states, which may later be created based on shared
     925                 :             :  * state initially set up here).
     926                 :             :  *
     927                 :             :  * isconcurrent indicates if operation is CREATE INDEX CONCURRENTLY.
     928                 :             :  *
     929                 :             :  * request is the target number of parallel worker processes to launch.
     930                 :             :  *
     931                 :             :  * Sets buildstate's GinLeader, which caller must use to shut down parallel
     932                 :             :  * mode by passing it to _gin_end_parallel() at the very end of its index
     933                 :             :  * build.  If not even a single worker process can be launched, this is
     934                 :             :  * never set, and caller should proceed with a serial index build.
     935                 :             :  */
     936                 :             : static void
     937                 :          17 : _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
     938                 :             :                     bool isconcurrent, int request)
     939                 :             : {
     940                 :             :     ParallelContext *pcxt;
     941                 :             :     int         scantuplesortstates;
     942                 :             :     Snapshot    snapshot;
     943                 :             :     Size        estginshared;
     944                 :             :     Size        estsort;
     945                 :             :     GinBuildShared *ginshared;
     946                 :             :     Sharedsort *sharedsort;
     947                 :          17 :     GinLeader  *ginleader = palloc0_object(GinLeader);
     948                 :             :     WalUsage   *walusage;
     949                 :             :     BufferUsage *bufferusage;
     950                 :          17 :     bool        leaderparticipates = true;
     951                 :             :     int         querylen;
     952                 :             : 
     953                 :             : #ifdef DISABLE_LEADER_PARTICIPATION
     954                 :             :     leaderparticipates = false;
     955                 :             : #endif
     956                 :             : 
     957                 :             :     /*
     958                 :             :      * Enter parallel mode, and create context for parallel build of gin index
     959                 :             :      */
     960                 :          17 :     EnterParallelMode();
     961                 :             :     Assert(request > 0);
     962                 :          17 :     pcxt = CreateParallelContext("postgres", "_gin_parallel_build_main",
     963                 :             :                                  request);
     964                 :             : 
     965                 :          17 :     scantuplesortstates = leaderparticipates ? request + 1 : request;
     966                 :             : 
     967                 :             :     /*
     968                 :             :      * Prepare for scan of the base relation.  In a normal index build, we use
     969                 :             :      * SnapshotAny because we must retrieve all tuples and do our own time
     970                 :             :      * qual checks (because we have to index RECENTLY_DEAD tuples).  In a
     971                 :             :      * concurrent build, we take a regular MVCC snapshot and index whatever's
     972                 :             :      * live according to that.
     973                 :             :      */
     974         [ +  + ]:          17 :     if (!isconcurrent)
     975                 :          13 :         snapshot = SnapshotAny;
     976                 :             :     else
     977                 :           4 :         snapshot = RegisterSnapshot(GetTransactionSnapshot());
     978                 :             : 
     979                 :             :     /*
     980                 :             :      * Estimate size for our own PARALLEL_KEY_GIN_SHARED workspace.
     981                 :             :      */
     982                 :          17 :     estginshared = _gin_parallel_estimate_shared(heap, snapshot);
     983                 :          17 :     shm_toc_estimate_chunk(&pcxt->estimator, estginshared);
     984                 :          17 :     estsort = tuplesort_estimate_shared(scantuplesortstates);
     985                 :          17 :     shm_toc_estimate_chunk(&pcxt->estimator, estsort);
     986                 :             : 
     987                 :          17 :     shm_toc_estimate_keys(&pcxt->estimator, 2);
     988                 :             : 
     989                 :             :     /*
     990                 :             :      * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
     991                 :             :      * and PARALLEL_KEY_BUFFER_USAGE.
     992                 :             :      *
     993                 :             :      * If there are no extensions loaded that care, we could skip this.  We
     994                 :             :      * have no way of knowing whether anyone's looking at pgWalUsage or
     995                 :             :      * pgBufferUsage, so do it unconditionally.
     996                 :             :      */
     997                 :          17 :     shm_toc_estimate_chunk(&pcxt->estimator,
     998                 :             :                            mul_size(sizeof(WalUsage), pcxt->nworkers));
     999                 :          17 :     shm_toc_estimate_keys(&pcxt->estimator, 1);
    1000                 :          17 :     shm_toc_estimate_chunk(&pcxt->estimator,
    1001                 :             :                            mul_size(sizeof(BufferUsage), pcxt->nworkers));
    1002                 :          17 :     shm_toc_estimate_keys(&pcxt->estimator, 1);
    1003                 :             : 
    1004                 :             :     /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
    1005         [ +  - ]:          17 :     if (debug_query_string)
    1006                 :             :     {
    1007                 :          17 :         querylen = strlen(debug_query_string);
    1008                 :          17 :         shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
    1009                 :          17 :         shm_toc_estimate_keys(&pcxt->estimator, 1);
    1010                 :             :     }
    1011                 :             :     else
    1012                 :           0 :         querylen = 0;           /* keep compiler quiet */
    1013                 :             : 
    1014                 :             :     /* Everyone's had a chance to ask for space, so now create the DSM */
    1015                 :          17 :     InitializeParallelDSM(pcxt);
    1016                 :             : 
    1017                 :             :     /* If no DSM segment was available, back out (do serial build) */
    1018         [ -  + ]:          17 :     if (pcxt->seg == NULL)
    1019                 :             :     {
    1020         [ #  # ]:           0 :         if (IsMVCCSnapshot(snapshot))
    1021                 :           0 :             UnregisterSnapshot(snapshot);
    1022                 :           0 :         DestroyParallelContext(pcxt);
    1023                 :           0 :         ExitParallelMode();
    1024                 :           0 :         return;
    1025                 :             :     }
    1026                 :             : 
    1027                 :             :     /* Store shared build state, for which we reserved space */
    1028                 :          17 :     ginshared = (GinBuildShared *) shm_toc_allocate(pcxt->toc, estginshared);
    1029                 :             :     /* Initialize immutable state */
    1030                 :          17 :     ginshared->heaprelid = RelationGetRelid(heap);
    1031                 :          17 :     ginshared->indexrelid = RelationGetRelid(index);
    1032                 :          17 :     ginshared->isconcurrent = isconcurrent;
    1033                 :          17 :     ginshared->scantuplesortstates = scantuplesortstates;
    1034                 :             : 
    1035                 :          17 :     ConditionVariableInit(&ginshared->workersdonecv);
    1036                 :          17 :     SpinLockInit(&ginshared->mutex);
    1037                 :             : 
    1038                 :             :     /* Initialize mutable state */
    1039                 :          17 :     ginshared->nparticipantsdone = 0;
    1040                 :          17 :     ginshared->reltuples = 0.0;
    1041                 :          17 :     ginshared->indtuples = 0.0;
    1042                 :             : 
    1043                 :          17 :     table_parallelscan_initialize(heap,
    1044                 :             :                                   ParallelTableScanFromGinBuildShared(ginshared),
    1045                 :             :                                   snapshot);
    1046                 :             : 
    1047                 :             :     /*
    1048                 :             :      * Store shared tuplesort-private state, for which we reserved space.
    1049                 :             :      * Then, initialize opaque state using tuplesort routine.
    1050                 :             :      */
    1051                 :          17 :     sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
    1052                 :          17 :     tuplesort_initialize_shared(sharedsort, scantuplesortstates,
    1053                 :             :                                 pcxt->seg);
    1054                 :             : 
    1055                 :          17 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_GIN_SHARED, ginshared);
    1056                 :          17 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
    1057                 :             : 
    1058                 :             :     /* Store query string for workers */
    1059         [ +  - ]:          17 :     if (debug_query_string)
    1060                 :             :     {
    1061                 :             :         char       *sharedquery;
    1062                 :             : 
    1063                 :          17 :         sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
    1064                 :          17 :         memcpy(sharedquery, debug_query_string, querylen + 1);
    1065                 :          17 :         shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
    1066                 :             :     }
    1067                 :             : 
    1068                 :             :     /*
    1069                 :             :      * Allocate space for each worker's WalUsage and BufferUsage; no need to
    1070                 :             :      * initialize.
    1071                 :             :      */
    1072                 :          17 :     walusage = shm_toc_allocate(pcxt->toc,
    1073                 :          17 :                                 mul_size(sizeof(WalUsage), pcxt->nworkers));
    1074                 :          17 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
    1075                 :          17 :     bufferusage = shm_toc_allocate(pcxt->toc,
    1076                 :          17 :                                    mul_size(sizeof(BufferUsage), pcxt->nworkers));
    1077                 :          17 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
    1078                 :             : 
    1079                 :             :     /* Launch workers, saving status for leader/caller */
    1080                 :          17 :     LaunchParallelWorkers(pcxt);
    1081                 :          17 :     ginleader->pcxt = pcxt;
    1082                 :          17 :     ginleader->nparticipanttuplesorts = pcxt->nworkers_launched;
    1083         [ +  - ]:          17 :     if (leaderparticipates)
    1084                 :          17 :         ginleader->nparticipanttuplesorts++;
    1085                 :          17 :     ginleader->ginshared = ginshared;
    1086                 :          17 :     ginleader->sharedsort = sharedsort;
    1087                 :          17 :     ginleader->snapshot = snapshot;
    1088                 :          17 :     ginleader->walusage = walusage;
    1089                 :          17 :     ginleader->bufferusage = bufferusage;
    1090                 :             : 
    1091                 :             :     /* If no workers were successfully launched, back out (do serial build) */
    1092         [ -  + ]:          17 :     if (pcxt->nworkers_launched == 0)
    1093                 :             :     {
    1094                 :           0 :         _gin_end_parallel(ginleader, NULL);
    1095                 :           0 :         return;
    1096                 :             :     }
    1097                 :             : 
    1098                 :             :     /* Save leader state now that it's clear build will be parallel */
    1099                 :          17 :     buildstate->bs_leader = ginleader;
    1100                 :             : 
    1101                 :             :     /* Join heap scan ourselves */
    1102         [ +  - ]:          17 :     if (leaderparticipates)
    1103                 :          17 :         _gin_leader_participate_as_worker(buildstate, heap, index);
    1104                 :             : 
    1105                 :             :     /*
    1106                 :             :      * Caller needs to wait for all launched workers when we return.  Make
    1107                 :             :      * sure that the failure-to-start case will not hang forever.
    1108                 :             :      */
    1109                 :          17 :     WaitForParallelWorkersToAttach(pcxt);
    1110                 :             : }
    1111                 :             : 
    1112                 :             : /*
    1113                 :             :  * Shut down workers, destroy parallel context, and end parallel mode.
    1114                 :             :  */
    1115                 :             : static void
    1116                 :          17 : _gin_end_parallel(GinLeader *ginleader, GinBuildState *state)
    1117                 :             : {
    1118                 :             :     int         i;
    1119                 :             : 
    1120                 :             :     /* Shutdown worker processes */
    1121                 :          17 :     WaitForParallelWorkersToFinish(ginleader->pcxt);
    1122                 :             : 
    1123                 :             :     /*
    1124                 :             :      * Next, accumulate WAL usage.  (This must wait for the workers to finish,
    1125                 :             :      * or we might get incomplete data.)
    1126                 :             :      */
    1127         [ +  + ]:          51 :     for (i = 0; i < ginleader->pcxt->nworkers_launched; i++)
    1128                 :          34 :         InstrAccumParallelQuery(&ginleader->bufferusage[i], &ginleader->walusage[i]);
    1129                 :             : 
    1130                 :             :     /* Free last reference to MVCC snapshot, if one was used */
    1131         [ +  + ]:          17 :     if (IsMVCCSnapshot(ginleader->snapshot))
    1132                 :           4 :         UnregisterSnapshot(ginleader->snapshot);
    1133                 :          17 :     DestroyParallelContext(ginleader->pcxt);
    1134                 :          17 :     ExitParallelMode();
    1135                 :          17 : }
    1136                 :             : 
    1137                 :             : /*
    1138                 :             :  * Within leader, wait for end of heap scan.
    1139                 :             :  *
    1140                 :             :  * When called, parallel heap scan started by _gin_begin_parallel() will
    1141                 :             :  * already be underway within worker processes (when leader participates
    1142                 :             :  * as a worker, we should end up here just as workers are finishing).
    1143                 :             :  *
    1144                 :             :  * Returns the total number of heap tuples scanned.
    1145                 :             :  */
    1146                 :             : static double
    1147                 :          17 : _gin_parallel_heapscan(GinBuildState *state)
    1148                 :             : {
    1149                 :          17 :     GinBuildShared *ginshared = state->bs_leader->ginshared;
    1150                 :             :     int         nparticipanttuplesorts;
    1151                 :             : 
    1152                 :          17 :     nparticipanttuplesorts = state->bs_leader->nparticipanttuplesorts;
    1153                 :             :     for (;;)
    1154                 :             :     {
    1155                 :          51 :         SpinLockAcquire(&ginshared->mutex);
    1156         [ +  + ]:          51 :         if (ginshared->nparticipantsdone == nparticipanttuplesorts)
    1157                 :             :         {
    1158                 :             :             /* copy the data into leader state */
    1159                 :          17 :             state->bs_reltuples = ginshared->reltuples;
    1160                 :          17 :             state->bs_numtuples = ginshared->indtuples;
    1161                 :             : 
    1162                 :          17 :             SpinLockRelease(&ginshared->mutex);
    1163                 :          17 :             break;
    1164                 :             :         }
    1165                 :          34 :         SpinLockRelease(&ginshared->mutex);
    1166                 :             : 
    1167                 :          34 :         ConditionVariableSleep(&ginshared->workersdonecv,
    1168                 :             :                                WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN);
    1169                 :             :     }
    1170                 :             : 
    1171                 :          17 :     ConditionVariableCancelSleep();
    1172                 :             : 
    1173                 :          17 :     return state->bs_reltuples;
    1174                 :             : }
    1175                 :             : 
    1176                 :             : /*
    1177                 :             :  * Buffer used to accumulate TIDs from multiple GinTuples for the same key
    1178                 :             :  * (we read these from the tuplesort, sorted by the key).
    1179                 :             :  *
    1180                 :             :  * This is similar to BuildAccumulator in that it's used to collect TIDs
    1181                 :             :  * in memory before inserting them into the index, but it's much simpler
    1182                 :             :  * as it only deals with a single index key at a time.
    1183                 :             :  *
    1184                 :             :  * When adding TIDs to the buffer, we make sure to keep them sorted, both
    1185                 :             :  * during the initial table scan (and detecting when the scan wraps around),
    1186                 :             :  * and during merging (where we do mergesort).
    1187                 :             :  */
    1188                 :             : typedef struct GinBuffer
    1189                 :             : {
    1190                 :             :     OffsetNumber attnum;
    1191                 :             :     GinNullCategory category;
    1192                 :             :     Datum       key;            /* 0 if no key (and keylen == 0) */
    1193                 :             :     Size        keylen;         /* number of bytes (not typlen) */
    1194                 :             : 
    1195                 :             :     /* type info */
    1196                 :             :     int16       typlen;
    1197                 :             :     bool        typbyval;
    1198                 :             : 
    1199                 :             :     /* Number of TIDs to collect before attempt to write some out. */
    1200                 :             :     int         maxitems;
    1201                 :             : 
    1202                 :             :     /* array of TID values */
    1203                 :             :     int         nitems;
    1204                 :             :     int         nfrozen;
    1205                 :             :     SortSupport ssup;           /* for sorting/comparing keys */
    1206                 :             :     ItemPointerData *items;
    1207                 :             : } GinBuffer;
    1208                 :             : 
    1209                 :             : /*
    1210                 :             :  * Check that TID array contains valid values, and that it's sorted (if we
    1211                 :             :  * expect it to be).
    1212                 :             :  */
    1213                 :             : static void
    1214                 :       85303 : AssertCheckItemPointers(GinBuffer *buffer)
    1215                 :             : {
    1216                 :             : #ifdef USE_ASSERT_CHECKING
    1217                 :             :     /* we should not have a buffer with no TIDs to sort */
    1218                 :             :     Assert(buffer->items != NULL);
    1219                 :             :     Assert(buffer->nitems > 0);
    1220                 :             : 
    1221                 :             :     for (int i = 0; i < buffer->nitems; i++)
    1222                 :             :     {
    1223                 :             :         Assert(ItemPointerIsValid(&buffer->items[i]));
    1224                 :             : 
    1225                 :             :         /* don't check ordering for the first TID item */
    1226                 :             :         if (i == 0)
    1227                 :             :             continue;
    1228                 :             : 
    1229                 :             :         Assert(ItemPointerCompare(&buffer->items[i - 1], &buffer->items[i]) < 0);
    1230                 :             :     }
    1231                 :             : #endif
    1232                 :       85303 : }
    1233                 :             : 
    1234                 :             : /*
    1235                 :             :  * GinBuffer checks
    1236                 :             :  *
    1237                 :             :  * Make sure the nitems/items fields are consistent (either the array is empty
    1238                 :             :  * or not empty, the fields need to agree). If there are items, check ordering.
    1239                 :             :  */
    1240                 :             : static void
    1241                 :       93559 : AssertCheckGinBuffer(GinBuffer *buffer)
    1242                 :             : {
    1243                 :             : #ifdef USE_ASSERT_CHECKING
    1244                 :             :     /* if we have any items, the array must exist */
    1245                 :             :     Assert(!((buffer->nitems > 0) && (buffer->items == NULL)));
    1246                 :             : 
    1247                 :             :     /*
    1248                 :             :      * The buffer may be empty, in which case we must not call the check of
    1249                 :             :      * item pointers, because that assumes non-emptiness.
    1250                 :             :      */
    1251                 :             :     if (buffer->nitems == 0)
    1252                 :             :         return;
    1253                 :             : 
    1254                 :             :     /* Make sure the item pointers are valid and sorted. */
    1255                 :             :     AssertCheckItemPointers(buffer);
    1256                 :             : #endif
    1257                 :       93559 : }
    1258                 :             : 
    1259                 :             : /*
    1260                 :             :  * GinBufferInit
    1261                 :             :  *      Initialize buffer to store tuples for a GIN index.
    1262                 :             :  *
    1263                 :             :  * Initialize the buffer used to accumulate TID for a single key at a time
    1264                 :             :  * (we process the data sorted), so we know when we received all data for
    1265                 :             :  * a given key.
    1266                 :             :  *
    1267                 :             :  * Initializes sort support procedures for all index attributes.
    1268                 :             :  */
    1269                 :             : static GinBuffer *
    1270                 :          68 : GinBufferInit(Relation index)
    1271                 :             : {
    1272                 :          68 :     GinBuffer  *buffer = palloc0_object(GinBuffer);
    1273                 :             :     int         i,
    1274                 :             :                 nKeys;
    1275                 :          68 :     TupleDesc   desc = RelationGetDescr(index);
    1276                 :             : 
    1277                 :             :     /*
    1278                 :             :      * How many items can we fit into the memory limit? We don't want to end
    1279                 :             :      * with too many TIDs. and 64kB seems more than enough. But maybe this
    1280                 :             :      * should be tied to maintenance_work_mem or something like that?
    1281                 :             :      */
    1282                 :          68 :     buffer->maxitems = (64 * 1024L) / sizeof(ItemPointerData);
    1283                 :             : 
    1284                 :          68 :     nKeys = IndexRelationGetNumberOfKeyAttributes(index);
    1285                 :             : 
    1286                 :          68 :     buffer->ssup = palloc0_array(SortSupportData, nKeys);
    1287                 :             : 
    1288                 :             :     /*
    1289                 :             :      * Lookup ordering operator for the index key data type, and initialize
    1290                 :             :      * the sort support function.
    1291                 :             :      */
    1292         [ +  + ]:         136 :     for (i = 0; i < nKeys; i++)
    1293                 :             :     {
    1294                 :             :         Oid         cmpFunc;
    1295                 :          68 :         SortSupport sortKey = &buffer->ssup[i];
    1296                 :          68 :         Form_pg_attribute att = TupleDescAttr(desc, i);
    1297                 :             : 
    1298                 :          68 :         sortKey->ssup_cxt = CurrentMemoryContext;
    1299                 :          68 :         sortKey->ssup_collation = index->rd_indcollation[i];
    1300                 :             : 
    1301         [ +  - ]:          68 :         if (!OidIsValid(sortKey->ssup_collation))
    1302                 :          68 :             sortKey->ssup_collation = DEFAULT_COLLATION_OID;
    1303                 :             : 
    1304                 :          68 :         sortKey->ssup_nulls_first = false;
    1305                 :          68 :         sortKey->ssup_attno = i + 1;
    1306                 :          68 :         sortKey->abbreviate = false;
    1307                 :             : 
    1308                 :             :         Assert(sortKey->ssup_attno != 0);
    1309                 :             : 
    1310                 :             :         /*
    1311                 :             :          * If the compare proc isn't specified in the opclass definition, look
    1312                 :             :          * up the index key type's default btree comparator.
    1313                 :             :          */
    1314                 :          68 :         cmpFunc = index_getprocid(index, i + 1, GIN_COMPARE_PROC);
    1315         [ -  + ]:          68 :         if (cmpFunc == InvalidOid)
    1316                 :             :         {
    1317                 :             :             TypeCacheEntry *typentry;
    1318                 :             : 
    1319                 :           0 :             typentry = lookup_type_cache(att->atttypid,
    1320                 :             :                                          TYPECACHE_CMP_PROC_FINFO);
    1321         [ #  # ]:           0 :             if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
    1322         [ #  # ]:           0 :                 ereport(ERROR,
    1323                 :             :                         (errcode(ERRCODE_UNDEFINED_FUNCTION),
    1324                 :             :                          errmsg("could not identify a comparison function for type %s",
    1325                 :             :                                 format_type_be(att->atttypid))));
    1326                 :             : 
    1327                 :           0 :             cmpFunc = typentry->cmp_proc_finfo.fn_oid;
    1328                 :             :         }
    1329                 :             : 
    1330                 :          68 :         PrepareSortSupportComparisonShim(cmpFunc, sortKey);
    1331                 :             :     }
    1332                 :             : 
    1333                 :          68 :     return buffer;
    1334                 :             : }
    1335                 :             : 
    1336                 :             : /* Is the buffer empty, i.e. has no TID values in the array? */
    1337                 :             : static bool
    1338                 :       93740 : GinBufferIsEmpty(GinBuffer *buffer)
    1339                 :             : {
    1340                 :       93740 :     return (buffer->nitems == 0);
    1341                 :             : }
    1342                 :             : 
    1343                 :             : /*
    1344                 :             :  * GinBufferKeyEquals
    1345                 :             :  *      Can the buffer store TIDs for the provided GIN tuple (same key)?
    1346                 :             :  *
    1347                 :             :  * Compare if the tuple matches the already accumulated data in the GIN
    1348                 :             :  * buffer. Compare scalar fields first, before the actual key.
    1349                 :             :  *
    1350                 :             :  * Returns true if the key matches, and the TID belongs to the buffer, or
    1351                 :             :  * false if the key does not match.
    1352                 :             :  */
    1353                 :             : static bool
    1354                 :       46757 : GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup)
    1355                 :             : {
    1356                 :             :     int         r;
    1357                 :             :     Datum       tupkey;
    1358                 :             : 
    1359                 :       46757 :     AssertCheckGinBuffer(buffer);
    1360                 :             : 
    1361         [ -  + ]:       46757 :     if (tup->attrnum != buffer->attnum)
    1362                 :           0 :         return false;
    1363                 :             : 
    1364                 :             :     /* same attribute should have the same type info */
    1365                 :             :     Assert(tup->typbyval == buffer->typbyval);
    1366                 :             :     Assert(tup->typlen == buffer->typlen);
    1367                 :             : 
    1368         [ +  + ]:       46757 :     if (tup->category != buffer->category)
    1369                 :          37 :         return false;
    1370                 :             : 
    1371                 :             :     /*
    1372                 :             :      * For NULL/empty keys, this means equality, for normal keys we need to
    1373                 :             :      * compare the actual key value.
    1374                 :             :      */
    1375         [ +  + ]:       46720 :     if (buffer->category != GIN_CAT_NORM_KEY)
    1376                 :           5 :         return true;
    1377                 :             : 
    1378                 :             :     /*
    1379                 :             :      * For the tuple, get either the first sizeof(Datum) bytes for byval
    1380                 :             :      * types, or a pointer to the beginning of the data array.
    1381                 :             :      */
    1382         [ +  + ]:       46715 :     tupkey = (buffer->typbyval) ? *(Datum *) tup->data : PointerGetDatum(tup->data);
    1383                 :             : 
    1384                 :       46715 :     r = ApplySortComparator(buffer->key, false,
    1385                 :             :                             tupkey, false,
    1386                 :       46715 :                             &buffer->ssup[buffer->attnum - 1]);
    1387                 :             : 
    1388                 :       46715 :     return (r == 0);
    1389                 :             : }
    1390                 :             : 
    1391                 :             : /*
    1392                 :             :  * GinBufferShouldTrim
    1393                 :             :  *      Should we trim the list of item pointers?
    1394                 :             :  *
    1395                 :             :  * By trimming we understand writing out and removing the tuple IDs that
    1396                 :             :  * we know can't change by future merges. We can deduce the TID up to which
    1397                 :             :  * this is guaranteed from the "first" TID in each GIN tuple, which provides
    1398                 :             :  * a "horizon" (for a given key) thanks to the sort.
    1399                 :             :  *
    1400                 :             :  * We don't want to do this too often - compressing longer TID lists is more
    1401                 :             :  * efficient. But we also don't want to accumulate too many TIDs, for two
    1402                 :             :  * reasons. First, it consumes memory and we might exceed maintenance_work_mem
    1403                 :             :  * (or whatever limit applies), even if that's unlikely because TIDs are very
    1404                 :             :  * small so we can fit a lot of them. Second, and more importantly, long TID
    1405                 :             :  * lists are an issue if the scan wraps around, because a key may get a very
    1406                 :             :  * wide list (with min/max TID for that key), forcing "full" mergesorts for
    1407                 :             :  * every list merged into it (instead of the efficient append).
    1408                 :             :  *
    1409                 :             :  * So we look at two things when deciding if to trim - if the resulting list
    1410                 :             :  * (after adding TIDs from the new tuple) would be too long, and if there is
    1411                 :             :  * enough TIDs to trim (with values less than "first" TID from the new tuple),
    1412                 :             :  * we do the trim. By enough we mean at least 128 TIDs (mostly an arbitrary
    1413                 :             :  * number).
    1414                 :             :  *
    1415                 :             :  * We try freezing TIDs at the beginning of the list first, before attempting
    1416                 :             :  * to trim the buffer. This may allow trimming the data earlier, reducing the
    1417                 :             :  * memory usage and excluding it from the mergesort.
    1418                 :             :  */
    1419                 :             : static bool
    1420                 :       46802 : GinBufferShouldTrim(GinBuffer *buffer, GinTuple *tup)
    1421                 :             : {
    1422                 :             :     /*
    1423                 :             :      * Check if the last TID in the current list is frozen. This is the case
    1424                 :             :      * when merging non-overlapping lists, e.g. in each parallel worker.
    1425                 :             :      */
    1426   [ +  +  -  + ]:       55103 :     if ((buffer->nitems > 0) &&
    1427                 :        8301 :         (ItemPointerCompare(&buffer->items[buffer->nitems - 1],
    1428                 :        8301 :                             GinTupleGetFirst(tup)) == 0))
    1429                 :           0 :         buffer->nfrozen = buffer->nitems;
    1430                 :             : 
    1431                 :             :     /*
    1432                 :             :      * Now find the last TID we know to be frozen, i.e. the last TID right
    1433                 :             :      * before the new GIN tuple.
    1434                 :             :      *
    1435                 :             :      * Start with the first not-yet-frozen tuple, and walk until we find the
    1436                 :             :      * first TID that's higher. If we already know the whole list is frozen
    1437                 :             :      * (i.e. nfrozen == nitems), this does nothing.
    1438                 :             :      *
    1439                 :             :      * XXX This might do a binary search for sufficiently long lists, but it
    1440                 :             :      * does not seem worth the complexity. Overlapping lists should be rare
    1441                 :             :      * common, TID comparisons are cheap, and we should quickly freeze most of
    1442                 :             :      * the list.
    1443                 :             :      */
    1444         [ +  + ]:      132591 :     for (int i = buffer->nfrozen; i < buffer->nitems; i++)
    1445                 :             :     {
    1446                 :             :         /* Is the TID after the first TID of the new tuple? Can't freeze. */
    1447         [ +  + ]:       92733 :         if (ItemPointerCompare(&buffer->items[i],
    1448                 :       92733 :                                GinTupleGetFirst(tup)) > 0)
    1449                 :        6944 :             break;
    1450                 :             : 
    1451                 :       85789 :         buffer->nfrozen++;
    1452                 :             :     }
    1453                 :             : 
    1454                 :             :     /* not enough TIDs to trim (1024 is somewhat arbitrary number) */
    1455         [ +  - ]:       46802 :     if (buffer->nfrozen < 1024)
    1456                 :       46802 :         return false;
    1457                 :             : 
    1458                 :             :     /* no need to trim if we have not hit the memory limit yet */
    1459         [ #  # ]:           0 :     if ((buffer->nitems + tup->nitems) < buffer->maxitems)
    1460                 :           0 :         return false;
    1461                 :             : 
    1462                 :             :     /*
    1463                 :             :      * OK, we have enough frozen TIDs to flush, and we have hit the memory
    1464                 :             :      * limit, so it's time to write it out.
    1465                 :             :      */
    1466                 :           0 :     return true;
    1467                 :             : }
    1468                 :             : 
    1469                 :             : /*
    1470                 :             :  * GinBufferStoreTuple
    1471                 :             :  *      Add data (especially TID list) from a GIN tuple to the buffer.
    1472                 :             :  *
    1473                 :             :  * The buffer is expected to be empty (in which case it's initialized), or
    1474                 :             :  * having the same key. The TID values from the tuple are combined with the
    1475                 :             :  * stored values using a merge sort.
    1476                 :             :  *
    1477                 :             :  * The tuples (for the same key) are expected to be sorted by first TID. But
    1478                 :             :  * this does not guarantee the lists do not overlap, especially in the leader,
    1479                 :             :  * because the workers process interleaving data. There should be no overlaps
    1480                 :             :  * in a single worker - it could happen when the parallel scan wraps around,
    1481                 :             :  * but we detect that and flush the data (see ginBuildCallbackParallel).
    1482                 :             :  *
    1483                 :             :  * By sorting the GinTuple not only by key, but also by the first TID, we make
    1484                 :             :  * it more less likely the lists will overlap during merge. We merge them using
    1485                 :             :  * mergesort, but it's cheaper to just append one list to the other.
    1486                 :             :  *
    1487                 :             :  * How often can the lists overlap? There should be no overlaps in workers,
    1488                 :             :  * and in the leader we can see overlaps between lists built by different
    1489                 :             :  * workers. But the workers merge the items as much as possible, so there
    1490                 :             :  * should not be too many.
    1491                 :             :  */
    1492                 :             : static void
    1493                 :       46802 : GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
    1494                 :             : {
    1495                 :             :     ItemPointerData *items;
    1496                 :             :     Datum       key;
    1497                 :             : 
    1498                 :       46802 :     AssertCheckGinBuffer(buffer);
    1499                 :             : 
    1500                 :       46802 :     key = _gin_parse_tuple_key(tup);
    1501                 :       46802 :     items = _gin_parse_tuple_items(tup);
    1502                 :             : 
    1503                 :             :     /* if the buffer is empty, set the fields (and copy the key) */
    1504         [ +  + ]:       46802 :     if (GinBufferIsEmpty(buffer))
    1505                 :             :     {
    1506                 :       38501 :         buffer->category = tup->category;
    1507                 :       38501 :         buffer->keylen = tup->keylen;
    1508                 :       38501 :         buffer->attnum = tup->attrnum;
    1509                 :             : 
    1510                 :       38501 :         buffer->typlen = tup->typlen;
    1511                 :       38501 :         buffer->typbyval = tup->typbyval;
    1512                 :             : 
    1513         [ +  + ]:       38501 :         if (tup->category == GIN_CAT_NORM_KEY)
    1514                 :       38464 :             buffer->key = datumCopy(key, buffer->typbyval, buffer->typlen);
    1515                 :             :         else
    1516                 :          37 :             buffer->key = (Datum) 0;
    1517                 :             :     }
    1518                 :             : 
    1519                 :             :     /* add the new TIDs into the buffer, combine using merge-sort */
    1520                 :             :     {
    1521                 :             :         int         nnew;
    1522                 :             :         ItemPointer new;
    1523                 :             : 
    1524                 :             :         /*
    1525                 :             :          * Resize the array - we do this first, because we'll dereference the
    1526                 :             :          * first unfrozen TID, which would fail if the array is NULL. We'll
    1527                 :             :          * still pass 0 as number of elements in that array though.
    1528                 :             :          */
    1529         [ +  + ]:       46802 :         if (buffer->items == NULL)
    1530                 :          45 :             buffer->items = palloc((buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
    1531                 :             :         else
    1532                 :       46757 :             buffer->items = repalloc(buffer->items,
    1533                 :       46757 :                                      (buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
    1534                 :             : 
    1535                 :       46802 :         new = ginMergeItemPointers(&buffer->items[buffer->nfrozen], /* first unfrozen */
    1536                 :       46802 :                                    (buffer->nitems - buffer->nfrozen),    /* num of unfrozen */
    1537                 :       46802 :                                    items, tup->nitems, &nnew);
    1538                 :             : 
    1539                 :             :         Assert(nnew == (tup->nitems + (buffer->nitems - buffer->nfrozen)));
    1540                 :             : 
    1541                 :       46802 :         memcpy(&buffer->items[buffer->nfrozen], new,
    1542                 :             :                nnew * sizeof(ItemPointerData));
    1543                 :             : 
    1544                 :       46802 :         pfree(new);
    1545                 :             : 
    1546                 :       46802 :         buffer->nitems += tup->nitems;
    1547                 :             : 
    1548                 :       46802 :         AssertCheckItemPointers(buffer);
    1549                 :             :     }
    1550                 :             : 
    1551                 :             :     /* free the decompressed TID list */
    1552                 :       46802 :     pfree(items);
    1553                 :       46802 : }
    1554                 :             : 
    1555                 :             : /*
    1556                 :             :  * GinBufferReset
    1557                 :             :  *      Reset the buffer into a state as if it contains no data.
    1558                 :             :  */
    1559                 :             : static void
    1560                 :       38501 : GinBufferReset(GinBuffer *buffer)
    1561                 :             : {
    1562                 :             :     Assert(!GinBufferIsEmpty(buffer));
    1563                 :             : 
    1564                 :             :     /* release byref values, do nothing for by-val ones */
    1565   [ +  +  +  + ]:       38501 :     if ((buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
    1566                 :       25160 :         pfree(DatumGetPointer(buffer->key));
    1567                 :             : 
    1568                 :             :     /*
    1569                 :             :      * Not required, but makes it more likely to trigger NULL dereference if
    1570                 :             :      * using the value incorrectly, etc.
    1571                 :             :      */
    1572                 :       38501 :     buffer->key = (Datum) 0;
    1573                 :             : 
    1574                 :       38501 :     buffer->attnum = 0;
    1575                 :       38501 :     buffer->category = 0;
    1576                 :       38501 :     buffer->keylen = 0;
    1577                 :       38501 :     buffer->nitems = 0;
    1578                 :       38501 :     buffer->nfrozen = 0;
    1579                 :             : 
    1580                 :       38501 :     buffer->typlen = 0;
    1581                 :       38501 :     buffer->typbyval = 0;
    1582                 :       38501 : }
    1583                 :             : 
    1584                 :             : /*
    1585                 :             :  * GinBufferTrim
    1586                 :             :  *      Discard the "frozen" part of the TID list (which should have been
    1587                 :             :  *      written to disk/index before this call).
    1588                 :             :  */
    1589                 :             : static void
    1590                 :           0 : GinBufferTrim(GinBuffer *buffer)
    1591                 :             : {
    1592                 :             :     Assert((buffer->nfrozen > 0) && (buffer->nfrozen <= buffer->nitems));
    1593                 :             : 
    1594                 :           0 :     memmove(&buffer->items[0], &buffer->items[buffer->nfrozen],
    1595                 :           0 :             sizeof(ItemPointerData) * (buffer->nitems - buffer->nfrozen));
    1596                 :             : 
    1597                 :           0 :     buffer->nitems -= buffer->nfrozen;
    1598                 :           0 :     buffer->nfrozen = 0;
    1599                 :           0 : }
    1600                 :             : 
    1601                 :             : /*
    1602                 :             :  * GinBufferFree
    1603                 :             :  *      Release memory associated with the GinBuffer (including TID array).
    1604                 :             :  */
    1605                 :             : static void
    1606                 :          68 : GinBufferFree(GinBuffer *buffer)
    1607                 :             : {
    1608         [ +  + ]:          68 :     if (buffer->items)
    1609                 :          45 :         pfree(buffer->items);
    1610                 :             : 
    1611                 :             :     /* release byref values, do nothing for by-val ones */
    1612         [ -  + ]:          68 :     if (!GinBufferIsEmpty(buffer) &&
    1613   [ #  #  #  # ]:           0 :         (buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
    1614                 :           0 :         pfree(DatumGetPointer(buffer->key));
    1615                 :             : 
    1616                 :          68 :     pfree(buffer);
    1617                 :          68 : }
    1618                 :             : 
    1619                 :             : /*
    1620                 :             :  * GinBufferCanAddKey
    1621                 :             :  *      Check if a given GIN tuple can be added to the current buffer.
    1622                 :             :  *
    1623                 :             :  * Returns true if the buffer is either empty or for the same index key.
    1624                 :             :  */
    1625                 :             : static bool
    1626                 :       46802 : GinBufferCanAddKey(GinBuffer *buffer, GinTuple *tup)
    1627                 :             : {
    1628                 :             :     /* empty buffer can accept data for any key */
    1629         [ +  + ]:       46802 :     if (GinBufferIsEmpty(buffer))
    1630                 :          45 :         return true;
    1631                 :             : 
    1632                 :             :     /* otherwise just data for the same key */
    1633                 :       46757 :     return GinBufferKeyEquals(buffer, tup);
    1634                 :             : }
    1635                 :             : 
    1636                 :             : /*
    1637                 :             :  * Within leader, wait for end of heap scan and merge per-worker results.
    1638                 :             :  *
    1639                 :             :  * After waiting for all workers to finish, merge the per-worker results into
    1640                 :             :  * the complete index. The results from each worker are sorted by block number
    1641                 :             :  * (start of the page range). While combining the per-worker results we merge
    1642                 :             :  * summaries for the same page range, and also fill-in empty summaries for
    1643                 :             :  * ranges without any tuples.
    1644                 :             :  *
    1645                 :             :  * Returns the total number of heap tuples scanned.
    1646                 :             :  */
    1647                 :             : static double
    1648                 :          17 : _gin_parallel_merge(GinBuildState *state)
    1649                 :             : {
    1650                 :             :     GinTuple   *tup;
    1651                 :             :     Size        tuplen;
    1652                 :          17 :     double      reltuples = 0;
    1653                 :             :     GinBuffer  *buffer;
    1654                 :             : 
    1655                 :             :     /* GIN tuples from workers, merged by leader */
    1656                 :          17 :     double      numtuples = 0;
    1657                 :             : 
    1658                 :             :     /* wait for workers to scan table and produce partial results */
    1659                 :          17 :     reltuples = _gin_parallel_heapscan(state);
    1660                 :             : 
    1661                 :             :     /* Execute the sort */
    1662                 :          17 :     pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
    1663                 :             :                                  PROGRESS_GIN_PHASE_PERFORMSORT_2);
    1664                 :             : 
    1665                 :             :     /* do the actual sort in the leader */
    1666                 :          17 :     tuplesort_performsort(state->bs_sortstate);
    1667                 :             : 
    1668                 :             :     /*
    1669                 :             :      * Initialize buffer to combine entries for the same key.
    1670                 :             :      *
    1671                 :             :      * The leader is allowed to use the whole maintenance_work_mem buffer to
    1672                 :             :      * combine data. The parallel workers already completed.
    1673                 :             :      */
    1674                 :          17 :     buffer = GinBufferInit(state->ginstate.index);
    1675                 :             : 
    1676                 :             :     /*
    1677                 :             :      * Set the progress target for the next phase.  Reset the block number
    1678                 :             :      * values set by table_index_build_scan
    1679                 :             :      */
    1680                 :             :     {
    1681                 :          17 :         const int   progress_index[] = {
    1682                 :             :             PROGRESS_CREATEIDX_SUBPHASE,
    1683                 :             :             PROGRESS_CREATEIDX_TUPLES_TOTAL,
    1684                 :             :             PROGRESS_SCAN_BLOCKS_TOTAL,
    1685                 :             :             PROGRESS_SCAN_BLOCKS_DONE
    1686                 :             :         };
    1687                 :          17 :         const int64 progress_vals[] = {
    1688                 :             :             PROGRESS_GIN_PHASE_MERGE_2,
    1689                 :          17 :             state->bs_numtuples,
    1690                 :             :             0, 0
    1691                 :             :         };
    1692                 :             : 
    1693                 :          17 :         pgstat_progress_update_multi_param(4, progress_index, progress_vals);
    1694                 :             :     }
    1695                 :             : 
    1696                 :             :     /*
    1697                 :             :      * Read the GIN tuples from the shared tuplesort, sorted by category and
    1698                 :             :      * key. That probably gives us order matching how data is organized in the
    1699                 :             :      * index.
    1700                 :             :      *
    1701                 :             :      * We don't insert the GIN tuples right away, but instead accumulate as
    1702                 :             :      * many TIDs for the same key as possible, and then insert that at once.
    1703                 :             :      * This way we don't need to decompress/recompress the posting lists, etc.
    1704                 :             :      */
    1705         [ +  + ]:       23418 :     while ((tup = tuplesort_getgintuple(state->bs_sortstate, &tuplen, true)) != NULL)
    1706                 :             :     {
    1707                 :             :         MemoryContext oldCtx;
    1708                 :             : 
    1709         [ -  + ]:       23401 :         CHECK_FOR_INTERRUPTS();
    1710                 :             : 
    1711                 :             :         /*
    1712                 :             :          * If the buffer can accept the new GIN tuple, just store it there and
    1713                 :             :          * we're done. If it's a different key (or maybe too much data) flush
    1714                 :             :          * the current contents into the index first.
    1715                 :             :          */
    1716         [ +  + ]:       23401 :         if (!GinBufferCanAddKey(buffer, tup))
    1717                 :             :         {
    1718                 :             :             /*
    1719                 :             :              * Buffer is not empty and it's storing a different key - flush
    1720                 :             :              * the data into the insert, and start a new entry for current
    1721                 :             :              * GinTuple.
    1722                 :             :              */
    1723                 :       15084 :             AssertCheckItemPointers(buffer);
    1724                 :             : 
    1725                 :       15084 :             oldCtx = MemoryContextSwitchTo(state->tmpCtx);
    1726                 :             : 
    1727                 :       15084 :             ginEntryInsert(&state->ginstate,
    1728                 :       15084 :                            buffer->attnum, buffer->key, buffer->category,
    1729                 :       15084 :                            buffer->items, buffer->nitems, &state->buildStats);
    1730                 :             : 
    1731                 :       15084 :             MemoryContextSwitchTo(oldCtx);
    1732                 :       15084 :             MemoryContextReset(state->tmpCtx);
    1733                 :             : 
    1734                 :             :             /* discard the existing data */
    1735                 :       15084 :             GinBufferReset(buffer);
    1736                 :             :         }
    1737                 :             : 
    1738                 :             :         /*
    1739                 :             :          * We're about to add a GIN tuple to the buffer - check the memory
    1740                 :             :          * limit first, and maybe write out some of the data into the index
    1741                 :             :          * first, if needed (and possible). We only flush the part of the TID
    1742                 :             :          * list that we know won't change, and only if there's enough data for
    1743                 :             :          * compression to work well.
    1744                 :             :          */
    1745         [ -  + ]:       23401 :         if (GinBufferShouldTrim(buffer, tup))
    1746                 :             :         {
    1747                 :             :             Assert(buffer->nfrozen > 0);
    1748                 :             : 
    1749                 :             :             /*
    1750                 :             :              * Buffer is not empty and it's storing a different key - flush
    1751                 :             :              * the data into the insert, and start a new entry for current
    1752                 :             :              * GinTuple.
    1753                 :             :              */
    1754                 :           0 :             AssertCheckItemPointers(buffer);
    1755                 :             : 
    1756                 :           0 :             oldCtx = MemoryContextSwitchTo(state->tmpCtx);
    1757                 :             : 
    1758                 :           0 :             ginEntryInsert(&state->ginstate,
    1759                 :           0 :                            buffer->attnum, buffer->key, buffer->category,
    1760                 :           0 :                            buffer->items, buffer->nfrozen, &state->buildStats);
    1761                 :             : 
    1762                 :           0 :             MemoryContextSwitchTo(oldCtx);
    1763                 :           0 :             MemoryContextReset(state->tmpCtx);
    1764                 :             : 
    1765                 :             :             /* truncate the data we've just discarded */
    1766                 :           0 :             GinBufferTrim(buffer);
    1767                 :             :         }
    1768                 :             : 
    1769                 :             :         /*
    1770                 :             :          * Remember data for the current tuple (either remember the new key,
    1771                 :             :          * or append if to the existing data).
    1772                 :             :          */
    1773                 :       23401 :         GinBufferStoreTuple(buffer, tup);
    1774                 :             : 
    1775                 :             :         /* Report progress */
    1776                 :       23401 :         pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
    1777                 :             :                                      ++numtuples);
    1778                 :             :     }
    1779                 :             : 
    1780                 :             :     /* flush data remaining in the buffer (for the last key) */
    1781         [ +  + ]:          17 :     if (!GinBufferIsEmpty(buffer))
    1782                 :             :     {
    1783                 :          16 :         AssertCheckItemPointers(buffer);
    1784                 :             : 
    1785                 :          16 :         ginEntryInsert(&state->ginstate,
    1786                 :          16 :                        buffer->attnum, buffer->key, buffer->category,
    1787                 :          16 :                        buffer->items, buffer->nitems, &state->buildStats);
    1788                 :             : 
    1789                 :             :         /* discard the existing data */
    1790                 :          16 :         GinBufferReset(buffer);
    1791                 :             : 
    1792                 :             :         /* Report progress */
    1793                 :          16 :         pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
    1794                 :             :                                      ++numtuples);
    1795                 :             :     }
    1796                 :             : 
    1797                 :             :     /* release all the memory */
    1798                 :          17 :     GinBufferFree(buffer);
    1799                 :             : 
    1800                 :          17 :     tuplesort_end(state->bs_sortstate);
    1801                 :             : 
    1802                 :          17 :     return reltuples;
    1803                 :             : }
    1804                 :             : 
    1805                 :             : /*
    1806                 :             :  * Returns size of shared memory required to store state for a parallel
    1807                 :             :  * gin index build based on the snapshot its parallel scan will use.
    1808                 :             :  */
    1809                 :             : static Size
    1810                 :          17 : _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot)
    1811                 :             : {
    1812                 :             :     /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
    1813                 :          17 :     return add_size(BUFFERALIGN(sizeof(GinBuildShared)),
    1814                 :             :                     table_parallelscan_estimate(heap, snapshot));
    1815                 :             : }
    1816                 :             : 
    1817                 :             : /*
    1818                 :             :  * Within leader, participate as a parallel worker.
    1819                 :             :  */
    1820                 :             : static void
    1821                 :          17 : _gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Relation index)
    1822                 :             : {
    1823                 :          17 :     GinLeader  *ginleader = buildstate->bs_leader;
    1824                 :             :     int         sortmem;
    1825                 :             : 
    1826                 :             :     /*
    1827                 :             :      * Might as well use reliable figure when doling out maintenance_work_mem
    1828                 :             :      * (when requested number of workers were not launched, this will be
    1829                 :             :      * somewhat higher than it is for other workers).
    1830                 :             :      */
    1831                 :          17 :     sortmem = maintenance_work_mem / ginleader->nparticipanttuplesorts;
    1832                 :             : 
    1833                 :             :     /* Perform work common to all participants */
    1834                 :          17 :     _gin_parallel_scan_and_build(buildstate, ginleader->ginshared,
    1835                 :             :                                  ginleader->sharedsort, heap, index,
    1836                 :             :                                  sortmem, true);
    1837                 :          17 : }
    1838                 :             : 
    1839                 :             : /*
    1840                 :             :  * _gin_process_worker_data
    1841                 :             :  *      First phase of the key merging, happening in the worker.
    1842                 :             :  *
    1843                 :             :  * Depending on the number of distinct keys, the TID lists produced by the
    1844                 :             :  * callback may be very short (due to frequent evictions in the callback).
    1845                 :             :  * But combining many tiny lists is expensive, so we try to do as much as
    1846                 :             :  * possible in the workers and only then pass the results to the leader.
    1847                 :             :  *
    1848                 :             :  * We read the tuples sorted by the key, and merge them into larger lists.
    1849                 :             :  * At the moment there's no memory limit, so this will just produce one
    1850                 :             :  * huge (sorted) list per key in each worker. Which means the leader will
    1851                 :             :  * do a very limited number of mergesorts, which is good.
    1852                 :             :  */
    1853                 :             : static void
    1854                 :          51 : _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort,
    1855                 :             :                          bool progress)
    1856                 :             : {
    1857                 :             :     GinTuple   *tup;
    1858                 :             :     Size        tuplen;
    1859                 :             : 
    1860                 :             :     GinBuffer  *buffer;
    1861                 :             : 
    1862                 :             :     /*
    1863                 :             :      * Initialize buffer to combine entries for the same key.
    1864                 :             :      *
    1865                 :             :      * The workers are limited to the same amount of memory as during the sort
    1866                 :             :      * in ginBuildCallbackParallel. But this probably should be the 32MB used
    1867                 :             :      * during planning, just like there.
    1868                 :             :      */
    1869                 :          51 :     buffer = GinBufferInit(state->ginstate.index);
    1870                 :             : 
    1871                 :             :     /* sort the raw per-worker data */
    1872         [ +  + ]:          51 :     if (progress)
    1873                 :          17 :         pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
    1874                 :             :                                      PROGRESS_GIN_PHASE_PERFORMSORT_1);
    1875                 :             : 
    1876                 :          51 :     tuplesort_performsort(state->bs_worker_sort);
    1877                 :             : 
    1878                 :             :     /* reset the number of GIN tuples produced by this worker */
    1879                 :          51 :     state->bs_numtuples = 0;
    1880                 :             : 
    1881         [ +  + ]:          51 :     if (progress)
    1882                 :          17 :         pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
    1883                 :             :                                      PROGRESS_GIN_PHASE_MERGE_1);
    1884                 :             : 
    1885                 :             :     /*
    1886                 :             :      * Read the GIN tuples from the shared tuplesort, sorted by the key, and
    1887                 :             :      * merge them into larger chunks for the leader to combine.
    1888                 :             :      */
    1889         [ +  + ]:       23452 :     while ((tup = tuplesort_getgintuple(worker_sort, &tuplen, true)) != NULL)
    1890                 :             :     {
    1891                 :             : 
    1892         [ +  + ]:       23401 :         CHECK_FOR_INTERRUPTS();
    1893                 :             : 
    1894                 :             :         /*
    1895                 :             :          * If the buffer can accept the new GIN tuple, just store it there and
    1896                 :             :          * we're done. If it's a different key (or maybe too much data) flush
    1897                 :             :          * the current contents into the index first.
    1898                 :             :          */
    1899         [ +  + ]:       23401 :         if (!GinBufferCanAddKey(buffer, tup))
    1900                 :             :         {
    1901                 :             :             GinTuple   *ntup;
    1902                 :             :             Size        ntuplen;
    1903                 :             : 
    1904                 :             :             /*
    1905                 :             :              * Buffer is not empty and it's storing a different key - flush
    1906                 :             :              * the data into the insert, and start a new entry for current
    1907                 :             :              * GinTuple.
    1908                 :             :              */
    1909                 :       23372 :             AssertCheckItemPointers(buffer);
    1910                 :             : 
    1911                 :       23372 :             ntup = _gin_build_tuple(buffer->attnum, buffer->category,
    1912                 :       23372 :                                     buffer->key, buffer->typlen, buffer->typbyval,
    1913                 :       23372 :                                     buffer->items, buffer->nitems, &ntuplen);
    1914                 :             : 
    1915                 :       23372 :             tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
    1916                 :       23372 :             state->bs_numtuples++;
    1917                 :             : 
    1918                 :       23372 :             pfree(ntup);
    1919                 :             : 
    1920                 :             :             /* discard the existing data */
    1921                 :       23372 :             GinBufferReset(buffer);
    1922                 :             :         }
    1923                 :             : 
    1924                 :             :         /*
    1925                 :             :          * We're about to add a GIN tuple to the buffer - check the memory
    1926                 :             :          * limit first, and maybe write out some of the data into the index
    1927                 :             :          * first, if needed (and possible). We only flush the part of the TID
    1928                 :             :          * list that we know won't change, and only if there's enough data for
    1929                 :             :          * compression to work well.
    1930                 :             :          */
    1931         [ -  + ]:       23401 :         if (GinBufferShouldTrim(buffer, tup))
    1932                 :             :         {
    1933                 :             :             GinTuple   *ntup;
    1934                 :             :             Size        ntuplen;
    1935                 :             : 
    1936                 :             :             Assert(buffer->nfrozen > 0);
    1937                 :             : 
    1938                 :             :             /*
    1939                 :             :              * Buffer is not empty and it's storing a different key - flush
    1940                 :             :              * the data into the insert, and start a new entry for current
    1941                 :             :              * GinTuple.
    1942                 :             :              */
    1943                 :           0 :             AssertCheckItemPointers(buffer);
    1944                 :             : 
    1945                 :           0 :             ntup = _gin_build_tuple(buffer->attnum, buffer->category,
    1946                 :           0 :                                     buffer->key, buffer->typlen, buffer->typbyval,
    1947                 :           0 :                                     buffer->items, buffer->nfrozen, &ntuplen);
    1948                 :             : 
    1949                 :           0 :             tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
    1950                 :             : 
    1951                 :           0 :             pfree(ntup);
    1952                 :             : 
    1953                 :             :             /* truncate the data we've just discarded */
    1954                 :           0 :             GinBufferTrim(buffer);
    1955                 :             :         }
    1956                 :             : 
    1957                 :             :         /*
    1958                 :             :          * Remember data for the current tuple (either remember the new key,
    1959                 :             :          * or append if to the existing data).
    1960                 :             :          */
    1961                 :       23401 :         GinBufferStoreTuple(buffer, tup);
    1962                 :             :     }
    1963                 :             : 
    1964                 :             :     /* flush data remaining in the buffer (for the last key) */
    1965         [ +  + ]:          51 :     if (!GinBufferIsEmpty(buffer))
    1966                 :             :     {
    1967                 :             :         GinTuple   *ntup;
    1968                 :             :         Size        ntuplen;
    1969                 :             : 
    1970                 :          29 :         AssertCheckItemPointers(buffer);
    1971                 :             : 
    1972                 :          29 :         ntup = _gin_build_tuple(buffer->attnum, buffer->category,
    1973                 :          29 :                                 buffer->key, buffer->typlen, buffer->typbyval,
    1974                 :          29 :                                 buffer->items, buffer->nitems, &ntuplen);
    1975                 :             : 
    1976                 :          29 :         tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
    1977                 :          29 :         state->bs_numtuples++;
    1978                 :             : 
    1979                 :          29 :         pfree(ntup);
    1980                 :             : 
    1981                 :             :         /* discard the existing data */
    1982                 :          29 :         GinBufferReset(buffer);
    1983                 :             :     }
    1984                 :             : 
    1985                 :             :     /* release all the memory */
    1986                 :          51 :     GinBufferFree(buffer);
    1987                 :             : 
    1988                 :          51 :     tuplesort_end(worker_sort);
    1989                 :          51 : }
    1990                 :             : 
    1991                 :             : /*
    1992                 :             :  * Perform a worker's portion of a parallel GIN index build sort.
    1993                 :             :  *
    1994                 :             :  * This generates a tuplesort for the worker portion of the table.
    1995                 :             :  *
    1996                 :             :  * sortmem is the amount of working memory to use within each worker,
    1997                 :             :  * expressed in KBs.
    1998                 :             :  *
    1999                 :             :  * When this returns, workers are done, and need only release resources.
    2000                 :             :  *
    2001                 :             :  * Before feeding data into a shared tuplesort (for the leader process),
    2002                 :             :  * the workers process data in two phases.
    2003                 :             :  *
    2004                 :             :  * 1) A worker reads a portion of rows from the table, accumulates entries
    2005                 :             :  * in memory, and flushes them into a private tuplesort (e.g. because of
    2006                 :             :  * using too much memory).
    2007                 :             :  *
    2008                 :             :  * 2) The private tuplesort gets sorted (by key and TID), the worker reads
    2009                 :             :  * the data again, and combines the entries as much as possible. This has
    2010                 :             :  * to happen eventually, and this way it's done in workers in parallel.
    2011                 :             :  *
    2012                 :             :  * Finally, the combined entries are written into the shared tuplesort, so
    2013                 :             :  * that the leader can process them.
    2014                 :             :  *
    2015                 :             :  * How well this works (compared to just writing entries into the shared
    2016                 :             :  * tuplesort) depends on the data set. For large tables with many distinct
    2017                 :             :  * keys this helps a lot. With many distinct keys it's likely the buffers has
    2018                 :             :  * to be flushed often, generating many entries with the same key and short
    2019                 :             :  * TID lists. These entries need to be sorted and merged at some point,
    2020                 :             :  * before writing them to the index. The merging is quite expensive, it can
    2021                 :             :  * easily be ~50% of a serial build, and doing as much of it in the workers
    2022                 :             :  * means it's parallelized. The leader still has to merge results from the
    2023                 :             :  * workers, but it's much more efficient to merge few large entries than
    2024                 :             :  * many tiny ones.
    2025                 :             :  *
    2026                 :             :  * This also reduces the amount of data the workers pass to the leader through
    2027                 :             :  * the shared tuplesort. OTOH the workers need more space for the private sort,
    2028                 :             :  * possibly up to 2x of the data, if no entries be merged in a worker. But this
    2029                 :             :  * is very unlikely, and the only consequence is inefficiency, so we ignore it.
    2030                 :             :  */
    2031                 :             : static void
    2032                 :          51 : _gin_parallel_scan_and_build(GinBuildState *state,
    2033                 :             :                              GinBuildShared *ginshared, Sharedsort *sharedsort,
    2034                 :             :                              Relation heap, Relation index,
    2035                 :             :                              int sortmem, bool progress)
    2036                 :             : {
    2037                 :             :     SortCoordinate coordinate;
    2038                 :             :     TableScanDesc scan;
    2039                 :             :     double      reltuples;
    2040                 :             :     IndexInfo  *indexInfo;
    2041                 :             : 
    2042                 :             :     /* Initialize local tuplesort coordination state */
    2043                 :          51 :     coordinate = palloc0_object(SortCoordinateData);
    2044                 :          51 :     coordinate->isWorker = true;
    2045                 :          51 :     coordinate->nParticipants = -1;
    2046                 :          51 :     coordinate->sharedsort = sharedsort;
    2047                 :             : 
    2048                 :             :     /* remember how much space is allowed for the accumulated entries */
    2049                 :          51 :     state->work_mem = (sortmem / 2);
    2050                 :             : 
    2051                 :             :     /* remember how many workers participate in the build */
    2052                 :          51 :     state->bs_num_workers = ginshared->scantuplesortstates;
    2053                 :             : 
    2054                 :             :     /* Begin "partial" tuplesort */
    2055                 :          51 :     state->bs_sortstate = tuplesort_begin_index_gin(heap, index,
    2056                 :             :                                                     state->work_mem,
    2057                 :             :                                                     coordinate,
    2058                 :             :                                                     TUPLESORT_NONE);
    2059                 :             : 
    2060                 :             :     /* Local per-worker sort of raw-data */
    2061                 :          51 :     state->bs_worker_sort = tuplesort_begin_index_gin(heap, index,
    2062                 :             :                                                       state->work_mem,
    2063                 :             :                                                       NULL,
    2064                 :             :                                                       TUPLESORT_NONE);
    2065                 :             : 
    2066                 :             :     /* Join parallel scan */
    2067                 :          51 :     indexInfo = BuildIndexInfo(index);
    2068                 :          51 :     indexInfo->ii_Concurrent = ginshared->isconcurrent;
    2069                 :             : 
    2070                 :          51 :     scan = table_beginscan_parallel(heap,
    2071                 :             :                                     ParallelTableScanFromGinBuildShared(ginshared),
    2072                 :             :                                     SO_NONE);
    2073                 :             : 
    2074                 :          51 :     reltuples = table_index_build_scan(heap, index, indexInfo, true, progress,
    2075                 :             :                                        ginBuildCallbackParallel, state, scan);
    2076                 :             : 
    2077                 :             :     /* write remaining accumulated entries */
    2078                 :          51 :     ginFlushBuildState(state, index);
    2079                 :             : 
    2080                 :             :     /*
    2081                 :             :      * Do the first phase of in-worker processing - sort the data produced by
    2082                 :             :      * the callback, and combine them into much larger chunks and place that
    2083                 :             :      * into the shared tuplestore for leader to process.
    2084                 :             :      */
    2085                 :          51 :     _gin_process_worker_data(state, state->bs_worker_sort, progress);
    2086                 :             : 
    2087                 :             :     /* sort the GIN tuples built by this worker */
    2088                 :          51 :     tuplesort_performsort(state->bs_sortstate);
    2089                 :             : 
    2090                 :          51 :     state->bs_reltuples += reltuples;
    2091                 :             : 
    2092                 :             :     /*
    2093                 :             :      * Done.  Record ambuild statistics.
    2094                 :             :      */
    2095                 :          51 :     SpinLockAcquire(&ginshared->mutex);
    2096                 :          51 :     ginshared->nparticipantsdone++;
    2097                 :          51 :     ginshared->reltuples += state->bs_reltuples;
    2098                 :          51 :     ginshared->indtuples += state->bs_numtuples;
    2099                 :          51 :     SpinLockRelease(&ginshared->mutex);
    2100                 :             : 
    2101                 :             :     /* Notify leader */
    2102                 :          51 :     ConditionVariableSignal(&ginshared->workersdonecv);
    2103                 :             : 
    2104                 :          51 :     tuplesort_end(state->bs_sortstate);
    2105                 :          51 : }
    2106                 :             : 
    2107                 :             : /*
    2108                 :             :  * Perform work within a launched parallel process.
    2109                 :             :  */
    2110                 :             : void
    2111                 :          34 : _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
    2112                 :             : {
    2113                 :             :     char       *sharedquery;
    2114                 :             :     GinBuildShared *ginshared;
    2115                 :             :     Sharedsort *sharedsort;
    2116                 :             :     GinBuildState buildstate;
    2117                 :             :     Relation    heapRel;
    2118                 :             :     Relation    indexRel;
    2119                 :             :     LOCKMODE    heapLockmode;
    2120                 :             :     LOCKMODE    indexLockmode;
    2121                 :             :     WalUsage   *walusage;
    2122                 :             :     BufferUsage *bufferusage;
    2123                 :             :     int         sortmem;
    2124                 :             : 
    2125                 :             :     /*
    2126                 :             :      * The only possible status flag that can be set to the parallel worker is
    2127                 :             :      * PROC_IN_SAFE_IC.
    2128                 :             :      */
    2129                 :             :     Assert((MyProc->statusFlags == 0) ||
    2130                 :             :            (MyProc->statusFlags == PROC_IN_SAFE_IC));
    2131                 :             : 
    2132                 :             :     /* Set debug_query_string for individual workers first */
    2133                 :          34 :     sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true);
    2134                 :          34 :     debug_query_string = sharedquery;
    2135                 :             : 
    2136                 :             :     /* Report the query string from leader */
    2137                 :          34 :     pgstat_report_activity(STATE_RUNNING, debug_query_string);
    2138                 :             : 
    2139                 :             :     /* Look up gin shared state */
    2140                 :          34 :     ginshared = shm_toc_lookup(toc, PARALLEL_KEY_GIN_SHARED, false);
    2141                 :             : 
    2142                 :             :     /* Open relations using lock modes known to be obtained by index.c */
    2143         [ +  + ]:          34 :     if (!ginshared->isconcurrent)
    2144                 :             :     {
    2145                 :          26 :         heapLockmode = ShareLock;
    2146                 :          26 :         indexLockmode = AccessExclusiveLock;
    2147                 :             :     }
    2148                 :             :     else
    2149                 :             :     {
    2150                 :           8 :         heapLockmode = ShareUpdateExclusiveLock;
    2151                 :           8 :         indexLockmode = RowExclusiveLock;
    2152                 :             :     }
    2153                 :             : 
    2154                 :             :     /* Open relations within worker */
    2155                 :          34 :     heapRel = table_open(ginshared->heaprelid, heapLockmode);
    2156                 :          34 :     indexRel = index_open(ginshared->indexrelid, indexLockmode);
    2157                 :             : 
    2158                 :             :     /* initialize the GIN build state */
    2159                 :          34 :     initGinState(&buildstate.ginstate, indexRel);
    2160                 :          34 :     buildstate.indtuples = 0;
    2161                 :          34 :     memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
    2162                 :          34 :     memset(&buildstate.tid, 0, sizeof(ItemPointerData));
    2163                 :             : 
    2164                 :             :     /*
    2165                 :             :      * create a temporary memory context that is used to hold data not yet
    2166                 :             :      * dumped out to the index
    2167                 :             :      */
    2168                 :          34 :     buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
    2169                 :             :                                               "Gin build temporary context",
    2170                 :             :                                               ALLOCSET_DEFAULT_SIZES);
    2171                 :             : 
    2172                 :             :     /*
    2173                 :             :      * create a temporary memory context that is used for calling
    2174                 :             :      * ginExtractEntries(), and can be reset after each tuple
    2175                 :             :      */
    2176                 :          34 :     buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
    2177                 :             :                                                "Gin build temporary context for user-defined function",
    2178                 :             :                                                ALLOCSET_DEFAULT_SIZES);
    2179                 :             : 
    2180                 :          34 :     buildstate.accum.ginstate = &buildstate.ginstate;
    2181                 :          34 :     ginInitBA(&buildstate.accum);
    2182                 :             : 
    2183                 :             : 
    2184                 :             :     /* Look up shared state private to tuplesort.c */
    2185                 :          34 :     sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
    2186                 :          34 :     tuplesort_attach_shared(sharedsort, seg);
    2187                 :             : 
    2188                 :             :     /* Prepare to track buffer usage during parallel execution */
    2189                 :          34 :     InstrStartParallelQuery();
    2190                 :             : 
    2191                 :             :     /*
    2192                 :             :      * Might as well use reliable figure when doling out maintenance_work_mem
    2193                 :             :      * (when requested number of workers were not launched, this will be
    2194                 :             :      * somewhat higher than it is for other workers).
    2195                 :             :      */
    2196                 :          34 :     sortmem = maintenance_work_mem / ginshared->scantuplesortstates;
    2197                 :             : 
    2198                 :          34 :     _gin_parallel_scan_and_build(&buildstate, ginshared, sharedsort,
    2199                 :             :                                  heapRel, indexRel, sortmem, false);
    2200                 :             : 
    2201                 :             :     /* Report WAL/buffer usage during parallel execution */
    2202                 :          34 :     bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
    2203                 :          34 :     walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
    2204                 :          34 :     InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber],
    2205                 :          34 :                           &walusage[ParallelWorkerNumber]);
    2206                 :             : 
    2207                 :          34 :     index_close(indexRel, indexLockmode);
    2208                 :          34 :     table_close(heapRel, heapLockmode);
    2209                 :          34 : }
    2210                 :             : 
    2211                 :             : /*
    2212                 :             :  * Used to keep track of compressed TID lists when building a GIN tuple.
    2213                 :             :  */
    2214                 :             : typedef struct
    2215                 :             : {
    2216                 :             :     dlist_node  node;           /* linked list pointers */
    2217                 :             :     GinPostingList *seg;
    2218                 :             : } GinSegmentInfo;
    2219                 :             : 
    2220                 :             : /*
    2221                 :             :  * _gin_build_tuple
    2222                 :             :  *      Serialize the state for an index key into a tuple for tuplesort.
    2223                 :             :  *
    2224                 :             :  * The tuple has a number of scalar fields (mostly matching the build state),
    2225                 :             :  * and then a data array that stores the key first, and then the TID list.
    2226                 :             :  *
    2227                 :             :  * For by-reference data types, we store the actual data. For by-val types
    2228                 :             :  * we simply copy the whole Datum, so that we don't have to care about stuff
    2229                 :             :  * like endianness etc. We could make it a little bit smaller, but it's not
    2230                 :             :  * worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
    2231                 :             :  * start of the TID list anyway. So we wouldn't save anything. (This would
    2232                 :             :  * not be a good idea for the permanent in-index data, since we'd prefer
    2233                 :             :  * that that not depend on sizeof(Datum). But this is just a transient
    2234                 :             :  * representation to use while sorting the data.)
    2235                 :             :  *
    2236                 :             :  * The TID list is serialized as compressed - it's highly compressible, and
    2237                 :             :  * we already have ginCompressPostingList for this purpose. The list may be
    2238                 :             :  * pretty long, so we compress it into multiple segments and then copy all
    2239                 :             :  * of that into the GIN tuple.
    2240                 :             :  */
    2241                 :             : static GinTuple *
    2242                 :       46802 : _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
    2243                 :             :                  Datum key, int16 typlen, bool typbyval,
    2244                 :             :                  ItemPointerData *items, uint32 nitems,
    2245                 :             :                  Size *len)
    2246                 :             : {
    2247                 :             :     GinTuple   *tuple;
    2248                 :             :     char       *ptr;
    2249                 :             : 
    2250                 :             :     Size        tuplen;
    2251                 :             :     int         keylen;
    2252                 :             : 
    2253                 :             :     dlist_mutable_iter iter;
    2254                 :             :     dlist_head  segments;
    2255                 :             :     int         ncompressed;
    2256                 :             :     Size        compresslen;
    2257                 :             : 
    2258                 :             :     /*
    2259                 :             :      * Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
    2260                 :             :      * have actual non-empty key. We include varlena headers and \0 bytes for
    2261                 :             :      * strings, to make it easier to access the data in-line.
    2262                 :             :      *
    2263                 :             :      * For byval types we simply copy the whole Datum. We could store just the
    2264                 :             :      * necessary bytes, but this is simpler to work with and not worth the
    2265                 :             :      * extra complexity. Moreover we still need to do the MAXALIGN to allow
    2266                 :             :      * direct access to items pointers.
    2267                 :             :      *
    2268                 :             :      * XXX Note that for byval types we store the whole datum, no matter what
    2269                 :             :      * the typlen value is.
    2270                 :             :      */
    2271         [ +  + ]:       46802 :     if (category != GIN_CAT_NORM_KEY)
    2272                 :          42 :         keylen = 0;
    2273         [ +  + ]:       46760 :     else if (typbyval)
    2274                 :       13304 :         keylen = sizeof(Datum);
    2275         [ -  + ]:       33456 :     else if (typlen > 0)
    2276                 :           0 :         keylen = typlen;
    2277         [ +  - ]:       33456 :     else if (typlen == -1)
    2278                 :       33456 :         keylen = VARSIZE_ANY(DatumGetPointer(key));
    2279         [ #  # ]:           0 :     else if (typlen == -2)
    2280                 :           0 :         keylen = strlen(DatumGetPointer(key)) + 1;
    2281                 :             :     else
    2282         [ #  # ]:           0 :         elog(ERROR, "unexpected typlen value (%d)", typlen);
    2283                 :             : 
    2284                 :             :     /* compress the item pointers */
    2285                 :       46802 :     ncompressed = 0;
    2286                 :       46802 :     compresslen = 0;
    2287                 :       46802 :     dlist_init(&segments);
    2288                 :             : 
    2289                 :             :     /* generate compressed segments of TID list chunks */
    2290         [ +  + ]:       93604 :     while (ncompressed < nitems)
    2291                 :             :     {
    2292                 :             :         int         cnt;
    2293                 :       46802 :         GinSegmentInfo *seginfo = palloc_object(GinSegmentInfo);
    2294                 :             : 
    2295                 :       93604 :         seginfo->seg = ginCompressPostingList(&items[ncompressed],
    2296                 :       46802 :                                               (nitems - ncompressed),
    2297                 :             :                                               UINT16_MAX,
    2298                 :             :                                               &cnt);
    2299                 :             : 
    2300                 :       46802 :         ncompressed += cnt;
    2301                 :       46802 :         compresslen += SizeOfGinPostingList(seginfo->seg);
    2302                 :             : 
    2303                 :       46802 :         dlist_push_tail(&segments, &seginfo->node);
    2304                 :             :     }
    2305                 :             : 
    2306                 :             :     /*
    2307                 :             :      * Determine GIN tuple length with all the data included. Be careful about
    2308                 :             :      * alignment, to allow direct access to compressed segments (those require
    2309                 :             :      * only SHORTALIGN).
    2310                 :             :      */
    2311                 :       46802 :     tuplen = SHORTALIGN(offsetof(GinTuple, data) + keylen) + compresslen;
    2312                 :             : 
    2313                 :       46802 :     *len = tuplen;
    2314                 :             : 
    2315                 :             :     /*
    2316                 :             :      * Allocate space for the whole GIN tuple.
    2317                 :             :      *
    2318                 :             :      * The palloc0 is needed - writetup_index_gin will write the whole tuple
    2319                 :             :      * to disk, so we need to make sure the padding bytes are defined
    2320                 :             :      * (otherwise valgrind would report this).
    2321                 :             :      */
    2322                 :       46802 :     tuple = palloc0(tuplen);
    2323                 :             : 
    2324                 :       46802 :     tuple->tuplen = tuplen;
    2325                 :       46802 :     tuple->attrnum = attrnum;
    2326                 :       46802 :     tuple->category = category;
    2327                 :       46802 :     tuple->keylen = keylen;
    2328                 :       46802 :     tuple->nitems = nitems;
    2329                 :             : 
    2330                 :             :     /* key type info */
    2331                 :       46802 :     tuple->typlen = typlen;
    2332                 :       46802 :     tuple->typbyval = typbyval;
    2333                 :             : 
    2334                 :             :     /*
    2335                 :             :      * Copy the key and items into the tuple. First the key value, which we
    2336                 :             :      * can simply copy right at the beginning of the data array.
    2337                 :             :      */
    2338         [ +  + ]:       46802 :     if (category == GIN_CAT_NORM_KEY)
    2339                 :             :     {
    2340         [ +  + ]:       46760 :         if (typbyval)
    2341                 :             :         {
    2342                 :       13304 :             memcpy(tuple->data, &key, sizeof(Datum));
    2343                 :             :         }
    2344         [ -  + ]:       33456 :         else if (typlen > 0) /* byref, fixed length */
    2345                 :             :         {
    2346                 :           0 :             memcpy(tuple->data, DatumGetPointer(key), typlen);
    2347                 :             :         }
    2348         [ +  - ]:       33456 :         else if (typlen == -1)
    2349                 :             :         {
    2350                 :       33456 :             memcpy(tuple->data, DatumGetPointer(key), keylen);
    2351                 :             :         }
    2352         [ #  # ]:           0 :         else if (typlen == -2)
    2353                 :             :         {
    2354                 :           0 :             memcpy(tuple->data, DatumGetPointer(key), keylen);
    2355                 :             :         }
    2356                 :             :     }
    2357                 :             : 
    2358                 :             :     /* finally, copy the TIDs into the array */
    2359                 :       46802 :     ptr = (char *) tuple + SHORTALIGN(offsetof(GinTuple, data) + keylen);
    2360                 :             : 
    2361                 :             :     /* copy in the compressed data, and free the segments */
    2362   [ +  -  +  + ]:       93604 :     dlist_foreach_modify(iter, &segments)
    2363                 :             :     {
    2364                 :       46802 :         GinSegmentInfo *seginfo = dlist_container(GinSegmentInfo, node, iter.cur);
    2365                 :             : 
    2366                 :       46802 :         memcpy(ptr, seginfo->seg, SizeOfGinPostingList(seginfo->seg));
    2367                 :             : 
    2368                 :       46802 :         ptr += SizeOfGinPostingList(seginfo->seg);
    2369                 :             : 
    2370                 :       46802 :         dlist_delete(&seginfo->node);
    2371                 :             : 
    2372                 :       46802 :         pfree(seginfo->seg);
    2373                 :       46802 :         pfree(seginfo);
    2374                 :             :     }
    2375                 :             : 
    2376                 :       46802 :     return tuple;
    2377                 :             : }
    2378                 :             : 
    2379                 :             : /*
    2380                 :             :  * _gin_parse_tuple_key
    2381                 :             :  *      Return a Datum representing the key stored in the tuple.
    2382                 :             :  *
    2383                 :             :  * Most of the tuple fields are directly accessible, the only thing that
    2384                 :             :  * needs more care is the key and the TID list.
    2385                 :             :  *
    2386                 :             :  * For the key, this returns a regular Datum representing it. It's either the
    2387                 :             :  * actual key value, or a pointer to the beginning of the data array (which is
    2388                 :             :  * where the data was copied by _gin_build_tuple).
    2389                 :             :  */
    2390                 :             : static Datum
    2391                 :      200478 : _gin_parse_tuple_key(GinTuple *a)
    2392                 :             : {
    2393                 :             :     Datum       key;
    2394                 :             : 
    2395         [ +  + ]:      200478 :     if (a->category != GIN_CAT_NORM_KEY)
    2396                 :          42 :         return (Datum) 0;
    2397                 :             : 
    2398         [ +  + ]:      200436 :     if (a->typbyval)
    2399                 :             :     {
    2400                 :       39896 :         memcpy(&key, a->data, a->keylen);
    2401                 :       39896 :         return key;
    2402                 :             :     }
    2403                 :             : 
    2404                 :      160540 :     return PointerGetDatum(a->data);
    2405                 :             : }
    2406                 :             : 
    2407                 :             : /*
    2408                 :             :  * _gin_parse_tuple_items
    2409                 :             :  *      Return a pointer to a palloc'd array of decompressed TID array.
    2410                 :             :  */
    2411                 :             : static ItemPointer
    2412                 :       46802 : _gin_parse_tuple_items(GinTuple *a)
    2413                 :             : {
    2414                 :             :     int         len;
    2415                 :             :     char       *ptr;
    2416                 :             :     int         ndecoded;
    2417                 :             :     ItemPointer items;
    2418                 :             : 
    2419                 :       46802 :     len = a->tuplen - SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
    2420                 :       46802 :     ptr = (char *) a + SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
    2421                 :             : 
    2422                 :       46802 :     items = ginPostingListDecodeAllSegments((GinPostingList *) ptr, len, &ndecoded);
    2423                 :             : 
    2424                 :             :     Assert(ndecoded == a->nitems);
    2425                 :             : 
    2426                 :       46802 :     return items;
    2427                 :             : }
    2428                 :             : 
    2429                 :             : /*
    2430                 :             :  * _gin_compare_tuples
    2431                 :             :  *      Compare GIN tuples, used by tuplesort during parallel index build.
    2432                 :             :  *
    2433                 :             :  * The scalar fields (attrnum, category) are compared first, the key value is
    2434                 :             :  * compared last. The comparisons are done using type-specific sort support
    2435                 :             :  * functions.
    2436                 :             :  *
    2437                 :             :  * If the key value matches, we compare the first TID value in the TID list,
    2438                 :             :  * which means the tuples are merged in an order in which they are most
    2439                 :             :  * likely to be simply concatenated. (This "first" TID will also allow us
    2440                 :             :  * to determine a point up to which the list is fully determined and can be
    2441                 :             :  * written into the index to enforce a memory limit etc.)
    2442                 :             :  */
    2443                 :             : int
    2444                 :       76902 : _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)
    2445                 :             : {
    2446                 :             :     int         r;
    2447                 :             :     Datum       keya,
    2448                 :             :                 keyb;
    2449                 :             : 
    2450         [ -  + ]:       76902 :     if (a->attrnum < b->attrnum)
    2451                 :           0 :         return -1;
    2452                 :             : 
    2453         [ -  + ]:       76902 :     if (a->attrnum > b->attrnum)
    2454                 :           0 :         return 1;
    2455                 :             : 
    2456         [ +  + ]:       76902 :     if (a->category < b->category)
    2457                 :          49 :         return -1;
    2458                 :             : 
    2459         [ +  + ]:       76853 :     if (a->category > b->category)
    2460                 :           9 :         return 1;
    2461                 :             : 
    2462         [ +  + ]:       76844 :     if (a->category == GIN_CAT_NORM_KEY)
    2463                 :             :     {
    2464                 :       76838 :         keya = _gin_parse_tuple_key(a);
    2465                 :       76838 :         keyb = _gin_parse_tuple_key(b);
    2466                 :             : 
    2467                 :       76838 :         r = ApplySortComparator(keya, false,
    2468                 :             :                                 keyb, false,
    2469                 :       76838 :                                 &ssup[a->attrnum - 1]);
    2470                 :             : 
    2471                 :             :         /* if the key is the same, consider the first TID in the array */
    2472         [ +  + ]:       89313 :         return (r != 0) ? r : ItemPointerCompare(GinTupleGetFirst(a),
    2473                 :       12475 :                                                  GinTupleGetFirst(b));
    2474                 :             :     }
    2475                 :             : 
    2476                 :           6 :     return ItemPointerCompare(GinTupleGetFirst(a),
    2477                 :           6 :                               GinTupleGetFirst(b));
    2478                 :             : }
        

Generated by: LCOV version 2.0-1