Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gininsert.c
4 : * insert routines for the postgres inverted index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/gininsert.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gin_private.h"
18 : #include "access/gin_tuple.h"
19 : #include "access/parallel.h"
20 : #include "access/table.h"
21 : #include "access/tableam.h"
22 : #include "access/xloginsert.h"
23 : #include "catalog/index.h"
24 : #include "catalog/pg_collation.h"
25 : #include "commands/progress.h"
26 : #include "miscadmin.h"
27 : #include "nodes/execnodes.h"
28 : #include "pgstat.h"
29 : #include "storage/bufmgr.h"
30 : #include "storage/predicate.h"
31 : #include "tcop/tcopprot.h"
32 : #include "utils/datum.h"
33 : #include "utils/memutils.h"
34 : #include "utils/rel.h"
35 : #include "utils/builtins.h"
36 :
37 :
38 : /* Magic numbers for parallel state sharing */
39 : #define PARALLEL_KEY_GIN_SHARED UINT64CONST(0xB000000000000001)
40 : #define PARALLEL_KEY_TUPLESORT UINT64CONST(0xB000000000000002)
41 : #define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xB000000000000003)
42 : #define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xB000000000000004)
43 : #define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xB000000000000005)
44 :
45 : /*
46 : * Status for index builds performed in parallel. This is allocated in a
47 : * dynamic shared memory segment.
48 : */
49 : typedef struct GinBuildShared
50 : {
51 : /*
52 : * These fields are not modified during the build. They primarily exist
53 : * for the benefit of worker processes that need to create state
54 : * corresponding to that used by the leader.
55 : */
56 : Oid heaprelid;
57 : Oid indexrelid;
58 : bool isconcurrent;
59 : int scantuplesortstates;
60 :
61 : /*
62 : * workersdonecv is used to monitor the progress of workers. All parallel
63 : * participants must indicate that they are done before leader can use
64 : * results built by the workers (and before leader can write the data into
65 : * the index).
66 : */
67 : ConditionVariable workersdonecv;
68 :
69 : /*
70 : * mutex protects all following fields
71 : *
72 : * These fields contain status information of interest to GIN index builds
73 : * that must work just the same when an index is built in parallel.
74 : */
75 : slock_t mutex;
76 :
77 : /*
78 : * Mutable state that is maintained by workers, and reported back to
79 : * leader at end of the scans.
80 : *
81 : * nparticipantsdone is number of worker processes finished.
82 : *
83 : * reltuples is the total number of input heap tuples.
84 : *
85 : * indtuples is the total number of tuples that made it into the index.
86 : */
87 : int nparticipantsdone;
88 : double reltuples;
89 : double indtuples;
90 :
91 : /*
92 : * ParallelTableScanDescData data follows. Can't directly embed here, as
93 : * implementations of the parallel table scan desc interface might need
94 : * stronger alignment.
95 : */
96 : } GinBuildShared;
97 :
98 : /*
99 : * Return pointer to a GinBuildShared's parallel table scan.
100 : *
101 : * c.f. shm_toc_allocate as to why BUFFERALIGN is used, rather than just
102 : * MAXALIGN.
103 : */
104 : #define ParallelTableScanFromGinBuildShared(shared) \
105 : (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(GinBuildShared)))
106 :
107 : /*
108 : * Status for leader in parallel index build.
109 : */
110 : typedef struct GinLeader
111 : {
112 : /* parallel context itself */
113 : ParallelContext *pcxt;
114 :
115 : /*
116 : * nparticipanttuplesorts is the exact number of worker processes
117 : * successfully launched, plus one leader process if it participates as a
118 : * worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader
119 : * participating as a worker).
120 : */
121 : int nparticipanttuplesorts;
122 :
123 : /*
124 : * Leader process convenience pointers to shared state (leader avoids TOC
125 : * lookups).
126 : *
127 : * GinBuildShared is the shared state for entire build. sharedsort is the
128 : * shared, tuplesort-managed state passed to each process tuplesort.
129 : * snapshot is the snapshot used by the scan iff an MVCC snapshot is
130 : * required.
131 : */
132 : GinBuildShared *ginshared;
133 : Sharedsort *sharedsort;
134 : Snapshot snapshot;
135 : WalUsage *walusage;
136 : BufferUsage *bufferusage;
137 : } GinLeader;
138 :
139 : typedef struct
140 : {
141 : GinState ginstate;
142 : double indtuples;
143 : GinStatsData buildStats;
144 : MemoryContext tmpCtx;
145 : MemoryContext funcCtx;
146 : BuildAccumulator accum;
147 : ItemPointerData tid;
148 : int work_mem;
149 :
150 : /*
151 : * bs_leader is only present when a parallel index build is performed, and
152 : * only in the leader process.
153 : */
154 : GinLeader *bs_leader;
155 : int bs_worker_id;
156 :
157 : /* used to pass information from workers to leader */
158 : double bs_numtuples;
159 : double bs_reltuples;
160 :
161 : /*
162 : * The sortstate is used by workers (including the leader). It has to be
163 : * part of the build state, because that's the only thing passed to the
164 : * build callback etc.
165 : */
166 : Tuplesortstate *bs_sortstate;
167 :
168 : /*
169 : * The sortstate used only within a single worker for the first merge pass
170 : * happening there. In principle it doesn't need to be part of the build
171 : * state and we could pass it around directly, but it's more convenient
172 : * this way. And it's part of the build state, after all.
173 : */
174 : Tuplesortstate *bs_worker_sort;
175 : } GinBuildState;
176 :
177 :
178 : /* parallel index builds */
179 : static void _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
180 : bool isconcurrent, int request);
181 : static void _gin_end_parallel(GinLeader *ginleader, GinBuildState *state);
182 : static Size _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot);
183 : static double _gin_parallel_heapscan(GinBuildState *state);
184 : static double _gin_parallel_merge(GinBuildState *state);
185 : static void _gin_leader_participate_as_worker(GinBuildState *buildstate,
186 : Relation heap, Relation index);
187 : static void _gin_parallel_scan_and_build(GinBuildState *state,
188 : GinBuildShared *ginshared,
189 : Sharedsort *sharedsort,
190 : Relation heap, Relation index,
191 : int sortmem, bool progress);
192 :
193 : static ItemPointer _gin_parse_tuple_items(GinTuple *a);
194 : static Datum _gin_parse_tuple_key(GinTuple *a);
195 :
196 : static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
197 : Datum key, int16 typlen, bool typbyval,
198 : ItemPointerData *items, uint32 nitems,
199 : Size *len);
200 :
201 : /*
202 : * Adds array of item pointers to tuple's posting list, or
203 : * creates posting tree and tuple pointing to tree in case
204 : * of not enough space. Max size of tuple is defined in
205 : * GinFormTuple(). Returns a new, modified index tuple.
206 : * items[] must be in sorted order with no duplicates.
207 : */
208 : static IndexTuple
209 228788 : addItemPointersToLeafTuple(GinState *ginstate,
210 : IndexTuple old,
211 : ItemPointerData *items, uint32 nitem,
212 : GinStatsData *buildStats, Buffer buffer)
213 : {
214 : OffsetNumber attnum;
215 : Datum key;
216 : GinNullCategory category;
217 : IndexTuple res;
218 : ItemPointerData *newItems,
219 : *oldItems;
220 : int oldNPosting,
221 : newNPosting,
222 : nwritten;
223 : GinPostingList *compressedList;
224 :
225 : Assert(!GinIsPostingTree(old));
226 :
227 228788 : attnum = gintuple_get_attrnum(ginstate, old);
228 228788 : key = gintuple_get_key(ginstate, old, &category);
229 :
230 : /* merge the old and new posting lists */
231 228788 : oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting);
232 :
233 228788 : newItems = ginMergeItemPointers(items, nitem,
234 : oldItems, oldNPosting,
235 : &newNPosting);
236 :
237 : /* Compress the posting list, and try to a build tuple with room for it */
238 228788 : res = NULL;
239 228788 : compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize, &nwritten);
240 228788 : if (nwritten == newNPosting)
241 : {
242 228776 : res = GinFormTuple(ginstate, attnum, key, category,
243 : (char *) compressedList,
244 228776 : SizeOfGinPostingList(compressedList),
245 : newNPosting,
246 : false);
247 : }
248 :
249 228788 : pfree(newItems);
250 228788 : pfree(compressedList);
251 :
252 228788 : if (!res)
253 : {
254 : /* posting list would be too big, convert to posting tree */
255 : BlockNumber postingRoot;
256 :
257 : /*
258 : * Initialize posting tree with the old tuple's posting list. It's
259 : * surely small enough to fit on one posting-tree page, and should
260 : * already be in order with no duplicates.
261 : */
262 22 : postingRoot = createPostingTree(ginstate->index,
263 : oldItems,
264 : oldNPosting,
265 : buildStats,
266 : buffer);
267 :
268 : /* Now insert the TIDs-to-be-added into the posting tree */
269 22 : ginInsertItemPointers(ginstate->index, postingRoot,
270 : items, nitem,
271 : buildStats);
272 :
273 : /* And build a new posting-tree-only result tuple */
274 22 : res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
275 22 : GinSetPostingTree(res, postingRoot);
276 : }
277 228788 : pfree(oldItems);
278 :
279 228788 : return res;
280 : }
281 :
282 : /*
283 : * Build a fresh leaf tuple, either posting-list or posting-tree format
284 : * depending on whether the given items list will fit.
285 : * items[] must be in sorted order with no duplicates.
286 : *
287 : * This is basically the same logic as in addItemPointersToLeafTuple,
288 : * but working from slightly different input.
289 : */
290 : static IndexTuple
291 693592 : buildFreshLeafTuple(GinState *ginstate,
292 : OffsetNumber attnum, Datum key, GinNullCategory category,
293 : ItemPointerData *items, uint32 nitem,
294 : GinStatsData *buildStats, Buffer buffer)
295 : {
296 693592 : IndexTuple res = NULL;
297 : GinPostingList *compressedList;
298 : int nwritten;
299 :
300 : /* try to build a posting list tuple with all the items */
301 693592 : compressedList = ginCompressPostingList(items, nitem, GinMaxItemSize, &nwritten);
302 693592 : if (nwritten == nitem)
303 : {
304 693490 : res = GinFormTuple(ginstate, attnum, key, category,
305 : (char *) compressedList,
306 693490 : SizeOfGinPostingList(compressedList),
307 : nitem, false);
308 : }
309 693592 : pfree(compressedList);
310 :
311 693592 : if (!res)
312 : {
313 : /* posting list would be too big, build posting tree */
314 : BlockNumber postingRoot;
315 :
316 : /*
317 : * Build posting-tree-only result tuple. We do this first so as to
318 : * fail quickly if the key is too big.
319 : */
320 102 : res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
321 :
322 : /*
323 : * Initialize a new posting tree with the TIDs.
324 : */
325 102 : postingRoot = createPostingTree(ginstate->index, items, nitem,
326 : buildStats, buffer);
327 :
328 : /* And save the root link in the result tuple */
329 102 : GinSetPostingTree(res, postingRoot);
330 : }
331 :
332 693592 : return res;
333 : }
334 :
335 : /*
336 : * Insert one or more heap TIDs associated with the given key value.
337 : * This will either add a single key entry, or enlarge a pre-existing entry.
338 : *
339 : * During an index build, buildStats is non-null and the counters
340 : * it contains should be incremented as needed.
341 : */
342 : void
343 971784 : ginEntryInsert(GinState *ginstate,
344 : OffsetNumber attnum, Datum key, GinNullCategory category,
345 : ItemPointerData *items, uint32 nitem,
346 : GinStatsData *buildStats)
347 : {
348 : GinBtreeData btree;
349 : GinBtreeEntryInsertData insertdata;
350 : GinBtreeStack *stack;
351 : IndexTuple itup;
352 : Page page;
353 :
354 971784 : insertdata.isDelete = false;
355 :
356 971784 : ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
357 971784 : btree.isBuild = (buildStats != NULL);
358 :
359 971784 : stack = ginFindLeafPage(&btree, false, false);
360 971784 : page = BufferGetPage(stack->buffer);
361 :
362 971784 : if (btree.findItem(&btree, stack))
363 : {
364 : /* found pre-existing entry */
365 278188 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
366 :
367 278188 : if (GinIsPostingTree(itup))
368 : {
369 : /* add entries to existing posting tree */
370 49392 : BlockNumber rootPostingTree = GinGetPostingTree(itup);
371 :
372 : /* release all stack */
373 49392 : LockBuffer(stack->buffer, GIN_UNLOCK);
374 49392 : freeGinBtreeStack(stack);
375 :
376 : /* insert into posting tree */
377 49392 : ginInsertItemPointers(ginstate->index, rootPostingTree,
378 : items, nitem,
379 : buildStats);
380 49388 : return;
381 : }
382 :
383 228796 : CheckForSerializableConflictIn(ginstate->index, NULL,
384 : BufferGetBlockNumber(stack->buffer));
385 : /* modify an existing leaf entry */
386 228788 : itup = addItemPointersToLeafTuple(ginstate, itup,
387 : items, nitem, buildStats, stack->buffer);
388 :
389 228788 : insertdata.isDelete = true;
390 : }
391 : else
392 : {
393 693596 : CheckForSerializableConflictIn(ginstate->index, NULL,
394 : BufferGetBlockNumber(stack->buffer));
395 : /* no match, so construct a new leaf entry */
396 693592 : itup = buildFreshLeafTuple(ginstate, attnum, key, category,
397 : items, nitem, buildStats, stack->buffer);
398 :
399 : /*
400 : * nEntries counts leaf tuples, so increment it only when we make a
401 : * new one.
402 : */
403 693592 : if (buildStats)
404 152720 : buildStats->nEntries++;
405 : }
406 :
407 : /* Insert the new or modified leaf tuple */
408 922380 : insertdata.entry = itup;
409 922380 : ginInsertValue(&btree, stack, &insertdata, buildStats);
410 922376 : pfree(itup);
411 : }
412 :
413 : /*
414 : * Extract index entries for a single indexable item, and add them to the
415 : * BuildAccumulator's state.
416 : *
417 : * This function is used only during initial index creation.
418 : */
419 : static void
420 939004 : ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum,
421 : Datum value, bool isNull,
422 : ItemPointer heapptr)
423 : {
424 : Datum *entries;
425 : GinNullCategory *categories;
426 : int32 nentries;
427 : MemoryContext oldCtx;
428 :
429 939004 : oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
430 939004 : entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
431 : value, isNull,
432 : &nentries, &categories);
433 939004 : MemoryContextSwitchTo(oldCtx);
434 :
435 939004 : ginInsertBAEntries(&buildstate->accum, heapptr, attnum,
436 : entries, categories, nentries);
437 :
438 939004 : buildstate->indtuples += nentries;
439 :
440 939004 : MemoryContextReset(buildstate->funcCtx);
441 939004 : }
442 :
443 : static void
444 938382 : ginBuildCallback(Relation index, ItemPointer tid, Datum *values,
445 : bool *isnull, bool tupleIsAlive, void *state)
446 : {
447 938382 : GinBuildState *buildstate = (GinBuildState *) state;
448 : MemoryContext oldCtx;
449 : int i;
450 :
451 938382 : oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
452 :
453 1877386 : for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
454 939004 : ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
455 939004 : values[i], isnull[i], tid);
456 :
457 : /* If we've maxed out our available memory, dump everything to the index */
458 938382 : if (buildstate->accum.allocatedMemory >= maintenance_work_mem * (Size) 1024)
459 : {
460 : ItemPointerData *list;
461 : Datum key;
462 : GinNullCategory category;
463 : uint32 nlist;
464 : OffsetNumber attnum;
465 :
466 0 : ginBeginBAScan(&buildstate->accum);
467 0 : while ((list = ginGetBAEntry(&buildstate->accum,
468 0 : &attnum, &key, &category, &nlist)) != NULL)
469 : {
470 : /* there could be many entries, so be willing to abort here */
471 0 : CHECK_FOR_INTERRUPTS();
472 0 : ginEntryInsert(&buildstate->ginstate, attnum, key, category,
473 : list, nlist, &buildstate->buildStats);
474 : }
475 :
476 0 : MemoryContextReset(buildstate->tmpCtx);
477 0 : ginInitBA(&buildstate->accum);
478 : }
479 :
480 938382 : MemoryContextSwitchTo(oldCtx);
481 938382 : }
482 :
483 : /*
484 : * ginFlushBuildState
485 : * Write all data from BuildAccumulator into the tuplesort.
486 : */
487 : static void
488 0 : ginFlushBuildState(GinBuildState *buildstate, Relation index)
489 : {
490 : ItemPointerData *list;
491 : Datum key;
492 : GinNullCategory category;
493 : uint32 nlist;
494 : OffsetNumber attnum;
495 0 : TupleDesc tdesc = RelationGetDescr(index);
496 :
497 0 : ginBeginBAScan(&buildstate->accum);
498 0 : while ((list = ginGetBAEntry(&buildstate->accum,
499 0 : &attnum, &key, &category, &nlist)) != NULL)
500 : {
501 : /* information about the key */
502 0 : CompactAttribute *attr = TupleDescCompactAttr(tdesc, (attnum - 1));
503 :
504 : /* GIN tuple and tuple length */
505 : GinTuple *tup;
506 : Size tuplen;
507 :
508 : /* there could be many entries, so be willing to abort here */
509 0 : CHECK_FOR_INTERRUPTS();
510 :
511 0 : tup = _gin_build_tuple(attnum, category,
512 0 : key, attr->attlen, attr->attbyval,
513 : list, nlist, &tuplen);
514 :
515 0 : tuplesort_putgintuple(buildstate->bs_worker_sort, tup, tuplen);
516 :
517 0 : pfree(tup);
518 : }
519 :
520 0 : MemoryContextReset(buildstate->tmpCtx);
521 0 : ginInitBA(&buildstate->accum);
522 0 : }
523 :
524 : /*
525 : * ginBuildCallbackParallel
526 : * Callback for the parallel index build.
527 : *
528 : * This is similar to the serial build callback ginBuildCallback, but
529 : * instead of writing the accumulated entries into the index, each worker
530 : * writes them into a (local) tuplesort.
531 : *
532 : * The worker then sorts and combines these entries, before writing them
533 : * into a shared tuplesort for the leader (see _gin_parallel_scan_and_build
534 : * for the whole process).
535 : */
536 : static void
537 0 : ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values,
538 : bool *isnull, bool tupleIsAlive, void *state)
539 : {
540 0 : GinBuildState *buildstate = (GinBuildState *) state;
541 : MemoryContext oldCtx;
542 : int i;
543 :
544 0 : oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
545 :
546 : /*
547 : * if scan wrapped around - flush accumulated entries and start anew
548 : *
549 : * With parallel scans, we don't have a guarantee the scan does not start
550 : * half-way through the relation (serial builds disable sync scans and
551 : * always start from block 0, parallel scans require allow_sync=true).
552 : *
553 : * Building the posting lists assumes the TIDs are monotonic and never go
554 : * back, and the wrap around would break that. We handle that by detecting
555 : * the wraparound, and flushing all entries. This means we'll later see
556 : * two separate entries with non-overlapping TID lists (which can be
557 : * combined by merge sort).
558 : *
559 : * To detect a wraparound, we remember the last TID seen by each worker
560 : * (for any key). If the next TID seen by the worker is lower, the scan
561 : * must have wrapped around.
562 : */
563 0 : if (ItemPointerCompare(tid, &buildstate->tid) < 0)
564 0 : ginFlushBuildState(buildstate, index);
565 :
566 : /* remember the TID we're about to process */
567 0 : buildstate->tid = *tid;
568 :
569 0 : for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
570 0 : ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
571 0 : values[i], isnull[i], tid);
572 :
573 : /*
574 : * If we've maxed out our available memory, dump everything to the
575 : * tuplesort. We use half the per-worker fraction of maintenance_work_mem,
576 : * the other half is used for the tuplesort.
577 : */
578 0 : if (buildstate->accum.allocatedMemory >= buildstate->work_mem * (Size) 1024)
579 0 : ginFlushBuildState(buildstate, index);
580 :
581 0 : MemoryContextSwitchTo(oldCtx);
582 0 : }
583 :
584 : IndexBuildResult *
585 392 : ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
586 : {
587 : IndexBuildResult *result;
588 : double reltuples;
589 : GinBuildState buildstate;
590 392 : GinBuildState *state = &buildstate;
591 : Buffer RootBuffer,
592 : MetaBuffer;
593 : ItemPointerData *list;
594 : Datum key;
595 : GinNullCategory category;
596 : uint32 nlist;
597 : MemoryContext oldCtx;
598 : OffsetNumber attnum;
599 :
600 392 : if (RelationGetNumberOfBlocks(index) != 0)
601 0 : elog(ERROR, "index \"%s\" already contains data",
602 : RelationGetRelationName(index));
603 :
604 392 : initGinState(&buildstate.ginstate, index);
605 392 : buildstate.indtuples = 0;
606 392 : memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
607 :
608 : /* Initialize fields for parallel build too. */
609 392 : buildstate.bs_numtuples = 0;
610 392 : buildstate.bs_reltuples = 0;
611 392 : buildstate.bs_leader = NULL;
612 392 : memset(&buildstate.tid, 0, sizeof(ItemPointerData));
613 :
614 : /* initialize the meta page */
615 392 : MetaBuffer = GinNewBuffer(index);
616 :
617 : /* initialize the root page */
618 392 : RootBuffer = GinNewBuffer(index);
619 :
620 392 : START_CRIT_SECTION();
621 392 : GinInitMetabuffer(MetaBuffer);
622 392 : MarkBufferDirty(MetaBuffer);
623 392 : GinInitBuffer(RootBuffer, GIN_LEAF);
624 392 : MarkBufferDirty(RootBuffer);
625 :
626 :
627 392 : UnlockReleaseBuffer(MetaBuffer);
628 392 : UnlockReleaseBuffer(RootBuffer);
629 392 : END_CRIT_SECTION();
630 :
631 : /* count the root as first entry page */
632 392 : buildstate.buildStats.nEntryPages++;
633 :
634 : /*
635 : * create a temporary memory context that is used to hold data not yet
636 : * dumped out to the index
637 : */
638 392 : buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
639 : "Gin build temporary context",
640 : ALLOCSET_DEFAULT_SIZES);
641 :
642 : /*
643 : * create a temporary memory context that is used for calling
644 : * ginExtractEntries(), and can be reset after each tuple
645 : */
646 392 : buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
647 : "Gin build temporary context for user-defined function",
648 : ALLOCSET_DEFAULT_SIZES);
649 :
650 392 : buildstate.accum.ginstate = &buildstate.ginstate;
651 392 : ginInitBA(&buildstate.accum);
652 :
653 : /* Report table scan phase started */
654 392 : pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
655 : PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN);
656 :
657 : /*
658 : * Attempt to launch parallel worker scan when required
659 : *
660 : * XXX plan_create_index_workers makes the number of workers dependent on
661 : * maintenance_work_mem, requiring 32MB for each worker. For GIN that's
662 : * reasonable too, because we sort the data just like btree. It does
663 : * ignore the memory used to accumulate data in memory (set by work_mem),
664 : * but there is no way to communicate that to plan_create_index_workers.
665 : */
666 392 : if (indexInfo->ii_ParallelWorkers > 0)
667 0 : _gin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,
668 : indexInfo->ii_ParallelWorkers);
669 :
670 : /*
671 : * If parallel build requested and at least one worker process was
672 : * successfully launched, set up coordination state, wait for workers to
673 : * complete. Then read all tuples from the shared tuplesort and insert
674 : * them into the index.
675 : *
676 : * In serial mode, simply scan the table and build the index one index
677 : * tuple at a time.
678 : */
679 392 : if (state->bs_leader)
680 : {
681 : SortCoordinate coordinate;
682 :
683 0 : coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
684 0 : coordinate->isWorker = false;
685 0 : coordinate->nParticipants =
686 0 : state->bs_leader->nparticipanttuplesorts;
687 0 : coordinate->sharedsort = state->bs_leader->sharedsort;
688 :
689 : /*
690 : * Begin leader tuplesort.
691 : *
692 : * In cases where parallelism is involved, the leader receives the
693 : * same share of maintenance_work_mem as a serial sort (it is
694 : * generally treated in the same way as a serial sort once we return).
695 : * Parallel worker Tuplesortstates will have received only a fraction
696 : * of maintenance_work_mem, though.
697 : *
698 : * We rely on the lifetime of the Leader Tuplesortstate almost not
699 : * overlapping with any worker Tuplesortstate's lifetime. There may
700 : * be some small overlap, but that's okay because we rely on leader
701 : * Tuplesortstate only allocating a small, fixed amount of memory
702 : * here. When its tuplesort_performsort() is called (by our caller),
703 : * and significant amounts of memory are likely to be used, all
704 : * workers must have already freed almost all memory held by their
705 : * Tuplesortstates (they are about to go away completely, too). The
706 : * overall effect is that maintenance_work_mem always represents an
707 : * absolute high watermark on the amount of memory used by a CREATE
708 : * INDEX operation, regardless of the use of parallelism or any other
709 : * factor.
710 : */
711 0 : state->bs_sortstate =
712 0 : tuplesort_begin_index_gin(heap, index,
713 : maintenance_work_mem, coordinate,
714 : TUPLESORT_NONE);
715 :
716 : /* scan the relation in parallel and merge per-worker results */
717 0 : reltuples = _gin_parallel_merge(state);
718 :
719 0 : _gin_end_parallel(state->bs_leader, state);
720 : }
721 : else /* no parallel index build */
722 : {
723 : /*
724 : * Do the heap scan. We disallow sync scan here because
725 : * dataPlaceToPage prefers to receive tuples in TID order.
726 : */
727 392 : reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
728 : ginBuildCallback, &buildstate, NULL);
729 :
730 : /* dump remaining entries to the index */
731 392 : oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
732 392 : ginBeginBAScan(&buildstate.accum);
733 153112 : while ((list = ginGetBAEntry(&buildstate.accum,
734 153112 : &attnum, &key, &category, &nlist)) != NULL)
735 : {
736 : /* there could be many entries, so be willing to abort here */
737 152720 : CHECK_FOR_INTERRUPTS();
738 152720 : ginEntryInsert(&buildstate.ginstate, attnum, key, category,
739 : list, nlist, &buildstate.buildStats);
740 : }
741 392 : MemoryContextSwitchTo(oldCtx);
742 : }
743 :
744 392 : MemoryContextDelete(buildstate.funcCtx);
745 392 : MemoryContextDelete(buildstate.tmpCtx);
746 :
747 : /*
748 : * Update metapage stats
749 : */
750 392 : buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
751 392 : ginUpdateStats(index, &buildstate.buildStats, true);
752 :
753 : /*
754 : * We didn't write WAL records as we built the index, so if WAL-logging is
755 : * required, write all pages to the WAL now.
756 : */
757 392 : if (RelationNeedsWAL(index))
758 : {
759 250 : log_newpage_range(index, MAIN_FORKNUM,
760 : 0, RelationGetNumberOfBlocks(index),
761 : true);
762 : }
763 :
764 : /*
765 : * Return statistics
766 : */
767 392 : result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
768 :
769 392 : result->heap_tuples = reltuples;
770 392 : result->index_tuples = buildstate.indtuples;
771 :
772 392 : return result;
773 : }
774 :
775 : /*
776 : * ginbuildempty() -- build an empty gin index in the initialization fork
777 : */
778 : void
779 6 : ginbuildempty(Relation index)
780 : {
781 : Buffer RootBuffer,
782 : MetaBuffer;
783 :
784 : /* An empty GIN index has two pages. */
785 6 : MetaBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
786 : EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
787 6 : RootBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
788 : EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
789 :
790 : /* Initialize and xlog metabuffer and root buffer. */
791 6 : START_CRIT_SECTION();
792 6 : GinInitMetabuffer(MetaBuffer);
793 6 : MarkBufferDirty(MetaBuffer);
794 6 : log_newpage_buffer(MetaBuffer, true);
795 6 : GinInitBuffer(RootBuffer, GIN_LEAF);
796 6 : MarkBufferDirty(RootBuffer);
797 6 : log_newpage_buffer(RootBuffer, false);
798 6 : END_CRIT_SECTION();
799 :
800 : /* Unlock and release the buffers. */
801 6 : UnlockReleaseBuffer(MetaBuffer);
802 6 : UnlockReleaseBuffer(RootBuffer);
803 6 : }
804 :
805 : /*
806 : * Insert index entries for a single indexable item during "normal"
807 : * (non-fast-update) insertion
808 : */
809 : static void
810 54096 : ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
811 : Datum value, bool isNull,
812 : ItemPointer item)
813 : {
814 : Datum *entries;
815 : GinNullCategory *categories;
816 : int32 i,
817 : nentries;
818 :
819 54096 : entries = ginExtractEntries(ginstate, attnum, value, isNull,
820 : &nentries, &categories);
821 :
822 506998 : for (i = 0; i < nentries; i++)
823 452922 : ginEntryInsert(ginstate, attnum, entries[i], categories[i],
824 : item, 1, NULL);
825 54076 : }
826 :
827 : bool
828 319932 : gininsert(Relation index, Datum *values, bool *isnull,
829 : ItemPointer ht_ctid, Relation heapRel,
830 : IndexUniqueCheck checkUnique,
831 : bool indexUnchanged,
832 : IndexInfo *indexInfo)
833 : {
834 319932 : GinState *ginstate = (GinState *) indexInfo->ii_AmCache;
835 : MemoryContext oldCtx;
836 : MemoryContext insertCtx;
837 : int i;
838 :
839 : /* Initialize GinState cache if first call in this statement */
840 319932 : if (ginstate == NULL)
841 : {
842 2910 : oldCtx = MemoryContextSwitchTo(indexInfo->ii_Context);
843 2910 : ginstate = (GinState *) palloc(sizeof(GinState));
844 2910 : initGinState(ginstate, index);
845 2910 : indexInfo->ii_AmCache = ginstate;
846 2910 : MemoryContextSwitchTo(oldCtx);
847 : }
848 :
849 319932 : insertCtx = AllocSetContextCreate(CurrentMemoryContext,
850 : "Gin insert temporary context",
851 : ALLOCSET_DEFAULT_SIZES);
852 :
853 319932 : oldCtx = MemoryContextSwitchTo(insertCtx);
854 :
855 319932 : if (GinGetUseFastUpdate(index))
856 265830 : {
857 : GinTupleCollector collector;
858 :
859 265836 : memset(&collector, 0, sizeof(GinTupleCollector));
860 :
861 651750 : for (i = 0; i < ginstate->origTupdesc->natts; i++)
862 385914 : ginHeapTupleFastCollect(ginstate, &collector,
863 385914 : (OffsetNumber) (i + 1),
864 385914 : values[i], isnull[i],
865 : ht_ctid);
866 :
867 265836 : ginHeapTupleFastInsert(ginstate, &collector);
868 : }
869 : else
870 : {
871 108172 : for (i = 0; i < ginstate->origTupdesc->natts; i++)
872 54096 : ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
873 54096 : values[i], isnull[i],
874 : ht_ctid);
875 : }
876 :
877 319906 : MemoryContextSwitchTo(oldCtx);
878 319906 : MemoryContextDelete(insertCtx);
879 :
880 319906 : return false;
881 : }
882 :
883 : /*
884 : * Create parallel context, and launch workers for leader.
885 : *
886 : * buildstate argument should be initialized (with the exception of the
887 : * tuplesort states, which may later be created based on shared
888 : * state initially set up here).
889 : *
890 : * isconcurrent indicates if operation is CREATE INDEX CONCURRENTLY.
891 : *
892 : * request is the target number of parallel worker processes to launch.
893 : *
894 : * Sets buildstate's GinLeader, which caller must use to shut down parallel
895 : * mode by passing it to _gin_end_parallel() at the very end of its index
896 : * build. If not even a single worker process can be launched, this is
897 : * never set, and caller should proceed with a serial index build.
898 : */
899 : static void
900 0 : _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
901 : bool isconcurrent, int request)
902 : {
903 : ParallelContext *pcxt;
904 : int scantuplesortstates;
905 : Snapshot snapshot;
906 : Size estginshared;
907 : Size estsort;
908 : GinBuildShared *ginshared;
909 : Sharedsort *sharedsort;
910 0 : GinLeader *ginleader = (GinLeader *) palloc0(sizeof(GinLeader));
911 : WalUsage *walusage;
912 : BufferUsage *bufferusage;
913 0 : bool leaderparticipates = true;
914 : int querylen;
915 :
916 : #ifdef DISABLE_LEADER_PARTICIPATION
917 : leaderparticipates = false;
918 : #endif
919 :
920 : /*
921 : * Enter parallel mode, and create context for parallel build of gin index
922 : */
923 0 : EnterParallelMode();
924 : Assert(request > 0);
925 0 : pcxt = CreateParallelContext("postgres", "_gin_parallel_build_main",
926 : request);
927 :
928 0 : scantuplesortstates = leaderparticipates ? request + 1 : request;
929 :
930 : /*
931 : * Prepare for scan of the base relation. In a normal index build, we use
932 : * SnapshotAny because we must retrieve all tuples and do our own time
933 : * qual checks (because we have to index RECENTLY_DEAD tuples). In a
934 : * concurrent build, we take a regular MVCC snapshot and index whatever's
935 : * live according to that.
936 : */
937 0 : if (!isconcurrent)
938 0 : snapshot = SnapshotAny;
939 : else
940 0 : snapshot = RegisterSnapshot(GetTransactionSnapshot());
941 :
942 : /*
943 : * Estimate size for our own PARALLEL_KEY_GIN_SHARED workspace.
944 : */
945 0 : estginshared = _gin_parallel_estimate_shared(heap, snapshot);
946 0 : shm_toc_estimate_chunk(&pcxt->estimator, estginshared);
947 0 : estsort = tuplesort_estimate_shared(scantuplesortstates);
948 0 : shm_toc_estimate_chunk(&pcxt->estimator, estsort);
949 :
950 0 : shm_toc_estimate_keys(&pcxt->estimator, 2);
951 :
952 : /*
953 : * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
954 : * and PARALLEL_KEY_BUFFER_USAGE.
955 : *
956 : * If there are no extensions loaded that care, we could skip this. We
957 : * have no way of knowing whether anyone's looking at pgWalUsage or
958 : * pgBufferUsage, so do it unconditionally.
959 : */
960 0 : shm_toc_estimate_chunk(&pcxt->estimator,
961 : mul_size(sizeof(WalUsage), pcxt->nworkers));
962 0 : shm_toc_estimate_keys(&pcxt->estimator, 1);
963 0 : shm_toc_estimate_chunk(&pcxt->estimator,
964 : mul_size(sizeof(BufferUsage), pcxt->nworkers));
965 0 : shm_toc_estimate_keys(&pcxt->estimator, 1);
966 :
967 : /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
968 0 : if (debug_query_string)
969 : {
970 0 : querylen = strlen(debug_query_string);
971 0 : shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
972 0 : shm_toc_estimate_keys(&pcxt->estimator, 1);
973 : }
974 : else
975 0 : querylen = 0; /* keep compiler quiet */
976 :
977 : /* Everyone's had a chance to ask for space, so now create the DSM */
978 0 : InitializeParallelDSM(pcxt);
979 :
980 : /* If no DSM segment was available, back out (do serial build) */
981 0 : if (pcxt->seg == NULL)
982 : {
983 0 : if (IsMVCCSnapshot(snapshot))
984 0 : UnregisterSnapshot(snapshot);
985 0 : DestroyParallelContext(pcxt);
986 0 : ExitParallelMode();
987 0 : return;
988 : }
989 :
990 : /* Store shared build state, for which we reserved space */
991 0 : ginshared = (GinBuildShared *) shm_toc_allocate(pcxt->toc, estginshared);
992 : /* Initialize immutable state */
993 0 : ginshared->heaprelid = RelationGetRelid(heap);
994 0 : ginshared->indexrelid = RelationGetRelid(index);
995 0 : ginshared->isconcurrent = isconcurrent;
996 0 : ginshared->scantuplesortstates = scantuplesortstates;
997 :
998 0 : ConditionVariableInit(&ginshared->workersdonecv);
999 0 : SpinLockInit(&ginshared->mutex);
1000 :
1001 : /* Initialize mutable state */
1002 0 : ginshared->nparticipantsdone = 0;
1003 0 : ginshared->reltuples = 0.0;
1004 0 : ginshared->indtuples = 0.0;
1005 :
1006 0 : table_parallelscan_initialize(heap,
1007 : ParallelTableScanFromGinBuildShared(ginshared),
1008 : snapshot);
1009 :
1010 : /*
1011 : * Store shared tuplesort-private state, for which we reserved space.
1012 : * Then, initialize opaque state using tuplesort routine.
1013 : */
1014 0 : sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
1015 0 : tuplesort_initialize_shared(sharedsort, scantuplesortstates,
1016 : pcxt->seg);
1017 :
1018 0 : shm_toc_insert(pcxt->toc, PARALLEL_KEY_GIN_SHARED, ginshared);
1019 0 : shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
1020 :
1021 : /* Store query string for workers */
1022 0 : if (debug_query_string)
1023 : {
1024 : char *sharedquery;
1025 :
1026 0 : sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
1027 0 : memcpy(sharedquery, debug_query_string, querylen + 1);
1028 0 : shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
1029 : }
1030 :
1031 : /*
1032 : * Allocate space for each worker's WalUsage and BufferUsage; no need to
1033 : * initialize.
1034 : */
1035 0 : walusage = shm_toc_allocate(pcxt->toc,
1036 0 : mul_size(sizeof(WalUsage), pcxt->nworkers));
1037 0 : shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
1038 0 : bufferusage = shm_toc_allocate(pcxt->toc,
1039 0 : mul_size(sizeof(BufferUsage), pcxt->nworkers));
1040 0 : shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
1041 :
1042 : /* Launch workers, saving status for leader/caller */
1043 0 : LaunchParallelWorkers(pcxt);
1044 0 : ginleader->pcxt = pcxt;
1045 0 : ginleader->nparticipanttuplesorts = pcxt->nworkers_launched;
1046 0 : if (leaderparticipates)
1047 0 : ginleader->nparticipanttuplesorts++;
1048 0 : ginleader->ginshared = ginshared;
1049 0 : ginleader->sharedsort = sharedsort;
1050 0 : ginleader->snapshot = snapshot;
1051 0 : ginleader->walusage = walusage;
1052 0 : ginleader->bufferusage = bufferusage;
1053 :
1054 : /* If no workers were successfully launched, back out (do serial build) */
1055 0 : if (pcxt->nworkers_launched == 0)
1056 : {
1057 0 : _gin_end_parallel(ginleader, NULL);
1058 0 : return;
1059 : }
1060 :
1061 : /* Save leader state now that it's clear build will be parallel */
1062 0 : buildstate->bs_leader = ginleader;
1063 :
1064 : /* Join heap scan ourselves */
1065 0 : if (leaderparticipates)
1066 0 : _gin_leader_participate_as_worker(buildstate, heap, index);
1067 :
1068 : /*
1069 : * Caller needs to wait for all launched workers when we return. Make
1070 : * sure that the failure-to-start case will not hang forever.
1071 : */
1072 0 : WaitForParallelWorkersToAttach(pcxt);
1073 : }
1074 :
1075 : /*
1076 : * Shut down workers, destroy parallel context, and end parallel mode.
1077 : */
1078 : static void
1079 0 : _gin_end_parallel(GinLeader *ginleader, GinBuildState *state)
1080 : {
1081 : int i;
1082 :
1083 : /* Shutdown worker processes */
1084 0 : WaitForParallelWorkersToFinish(ginleader->pcxt);
1085 :
1086 : /*
1087 : * Next, accumulate WAL usage. (This must wait for the workers to finish,
1088 : * or we might get incomplete data.)
1089 : */
1090 0 : for (i = 0; i < ginleader->pcxt->nworkers_launched; i++)
1091 0 : InstrAccumParallelQuery(&ginleader->bufferusage[i], &ginleader->walusage[i]);
1092 :
1093 : /* Free last reference to MVCC snapshot, if one was used */
1094 0 : if (IsMVCCSnapshot(ginleader->snapshot))
1095 0 : UnregisterSnapshot(ginleader->snapshot);
1096 0 : DestroyParallelContext(ginleader->pcxt);
1097 0 : ExitParallelMode();
1098 0 : }
1099 :
1100 : /*
1101 : * Within leader, wait for end of heap scan.
1102 : *
1103 : * When called, parallel heap scan started by _gin_begin_parallel() will
1104 : * already be underway within worker processes (when leader participates
1105 : * as a worker, we should end up here just as workers are finishing).
1106 : *
1107 : * Returns the total number of heap tuples scanned.
1108 : */
1109 : static double
1110 0 : _gin_parallel_heapscan(GinBuildState *state)
1111 : {
1112 0 : GinBuildShared *ginshared = state->bs_leader->ginshared;
1113 : int nparticipanttuplesorts;
1114 :
1115 0 : nparticipanttuplesorts = state->bs_leader->nparticipanttuplesorts;
1116 : for (;;)
1117 : {
1118 0 : SpinLockAcquire(&ginshared->mutex);
1119 0 : if (ginshared->nparticipantsdone == nparticipanttuplesorts)
1120 : {
1121 : /* copy the data into leader state */
1122 0 : state->bs_reltuples = ginshared->reltuples;
1123 0 : state->bs_numtuples = ginshared->indtuples;
1124 :
1125 0 : SpinLockRelease(&ginshared->mutex);
1126 0 : break;
1127 : }
1128 0 : SpinLockRelease(&ginshared->mutex);
1129 :
1130 0 : ConditionVariableSleep(&ginshared->workersdonecv,
1131 : WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN);
1132 : }
1133 :
1134 0 : ConditionVariableCancelSleep();
1135 :
1136 0 : return state->bs_reltuples;
1137 : }
1138 :
1139 : /*
1140 : * Buffer used to accumulate TIDs from multiple GinTuples for the same key
1141 : * (we read these from the tuplesort, sorted by the key).
1142 : *
1143 : * This is similar to BuildAccumulator in that it's used to collect TIDs
1144 : * in memory before inserting them into the index, but it's much simpler
1145 : * as it only deals with a single index key at a time.
1146 : *
1147 : * When adding TIDs to the buffer, we make sure to keep them sorted, both
1148 : * during the initial table scan (and detecting when the scan wraps around),
1149 : * and during merging (where we do mergesort).
1150 : */
1151 : typedef struct GinBuffer
1152 : {
1153 : OffsetNumber attnum;
1154 : GinNullCategory category;
1155 : Datum key; /* 0 if no key (and keylen == 0) */
1156 : Size keylen; /* number of bytes (not typlen) */
1157 :
1158 : /* type info */
1159 : int16 typlen;
1160 : bool typbyval;
1161 :
1162 : /* Number of TIDs to collect before attempt to write some out. */
1163 : int maxitems;
1164 :
1165 : /* array of TID values */
1166 : int nitems;
1167 : int nfrozen;
1168 : SortSupport ssup; /* for sorting/comparing keys */
1169 : ItemPointerData *items;
1170 : } GinBuffer;
1171 :
1172 : /*
1173 : * Check that TID array contains valid values, and that it's sorted (if we
1174 : * expect it to be).
1175 : */
1176 : static void
1177 0 : AssertCheckItemPointers(GinBuffer *buffer)
1178 : {
1179 : #ifdef USE_ASSERT_CHECKING
1180 : /* we should not have a buffer with no TIDs to sort */
1181 : Assert(buffer->items != NULL);
1182 : Assert(buffer->nitems > 0);
1183 :
1184 : for (int i = 0; i < buffer->nitems; i++)
1185 : {
1186 : Assert(ItemPointerIsValid(&buffer->items[i]));
1187 :
1188 : /* don't check ordering for the first TID item */
1189 : if (i == 0)
1190 : continue;
1191 :
1192 : Assert(ItemPointerCompare(&buffer->items[i - 1], &buffer->items[i]) < 0);
1193 : }
1194 : #endif
1195 0 : }
1196 :
1197 : /*
1198 : * GinBuffer checks
1199 : *
1200 : * Make sure the nitems/items fields are consistent (either the array is empty
1201 : * or not empty, the fields need to agree). If there are items, check ordering.
1202 : */
1203 : static void
1204 0 : AssertCheckGinBuffer(GinBuffer *buffer)
1205 : {
1206 : #ifdef USE_ASSERT_CHECKING
1207 : /* if we have any items, the array must exist */
1208 : Assert(!((buffer->nitems > 0) && (buffer->items == NULL)));
1209 :
1210 : /*
1211 : * The buffer may be empty, in which case we must not call the check of
1212 : * item pointers, because that assumes non-emptiness.
1213 : */
1214 : if (buffer->nitems == 0)
1215 : return;
1216 :
1217 : /* Make sure the item pointers are valid and sorted. */
1218 : AssertCheckItemPointers(buffer);
1219 : #endif
1220 0 : }
1221 :
1222 : /*
1223 : * GinBufferInit
1224 : * Initialize buffer to store tuples for a GIN index.
1225 : *
1226 : * Initialize the buffer used to accumulate TID for a single key at a time
1227 : * (we process the data sorted), so we know when we received all data for
1228 : * a given key.
1229 : *
1230 : * Initializes sort support procedures for all index attributes.
1231 : */
1232 : static GinBuffer *
1233 0 : GinBufferInit(Relation index)
1234 : {
1235 0 : GinBuffer *buffer = palloc0(sizeof(GinBuffer));
1236 : int i,
1237 : nKeys;
1238 0 : TupleDesc desc = RelationGetDescr(index);
1239 :
1240 : /*
1241 : * How many items can we fit into the memory limit? We don't want to end
1242 : * with too many TIDs. and 64kB seems more than enough. But maybe this
1243 : * should be tied to maintenance_work_mem or something like that?
1244 : */
1245 0 : buffer->maxitems = (64 * 1024L) / sizeof(ItemPointerData);
1246 :
1247 0 : nKeys = IndexRelationGetNumberOfKeyAttributes(index);
1248 :
1249 0 : buffer->ssup = palloc0(sizeof(SortSupportData) * nKeys);
1250 :
1251 : /*
1252 : * Lookup ordering operator for the index key data type, and initialize
1253 : * the sort support function.
1254 : */
1255 0 : for (i = 0; i < nKeys; i++)
1256 : {
1257 : Oid cmpFunc;
1258 0 : SortSupport sortKey = &buffer->ssup[i];
1259 0 : Form_pg_attribute att = TupleDescAttr(desc, i);
1260 :
1261 0 : sortKey->ssup_cxt = CurrentMemoryContext;
1262 0 : sortKey->ssup_collation = index->rd_indcollation[i];
1263 :
1264 0 : if (!OidIsValid(sortKey->ssup_collation))
1265 0 : sortKey->ssup_collation = DEFAULT_COLLATION_OID;
1266 :
1267 0 : sortKey->ssup_nulls_first = false;
1268 0 : sortKey->ssup_attno = i + 1;
1269 0 : sortKey->abbreviate = false;
1270 :
1271 : Assert(sortKey->ssup_attno != 0);
1272 :
1273 : /*
1274 : * If the compare proc isn't specified in the opclass definition, look
1275 : * up the index key type's default btree comparator.
1276 : */
1277 0 : cmpFunc = index_getprocid(index, i + 1, GIN_COMPARE_PROC);
1278 0 : if (cmpFunc == InvalidOid)
1279 : {
1280 : TypeCacheEntry *typentry;
1281 :
1282 0 : typentry = lookup_type_cache(att->atttypid,
1283 : TYPECACHE_CMP_PROC_FINFO);
1284 0 : if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
1285 0 : ereport(ERROR,
1286 : (errcode(ERRCODE_UNDEFINED_FUNCTION),
1287 : errmsg("could not identify a comparison function for type %s",
1288 : format_type_be(att->atttypid))));
1289 :
1290 0 : cmpFunc = typentry->cmp_proc_finfo.fn_oid;
1291 : }
1292 :
1293 0 : PrepareSortSupportComparisonShim(cmpFunc, sortKey);
1294 : }
1295 :
1296 0 : return buffer;
1297 : }
1298 :
1299 : /* Is the buffer empty, i.e. has no TID values in the array? */
1300 : static bool
1301 0 : GinBufferIsEmpty(GinBuffer *buffer)
1302 : {
1303 0 : return (buffer->nitems == 0);
1304 : }
1305 :
1306 : /*
1307 : * GinBufferKeyEquals
1308 : * Can the buffer store TIDs for the provided GIN tuple (same key)?
1309 : *
1310 : * Compare if the tuple matches the already accumulated data in the GIN
1311 : * buffer. Compare scalar fields first, before the actual key.
1312 : *
1313 : * Returns true if the key matches, and the TID belongs to the buffer, or
1314 : * false if the key does not match.
1315 : */
1316 : static bool
1317 0 : GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup)
1318 : {
1319 : int r;
1320 : Datum tupkey;
1321 :
1322 0 : AssertCheckGinBuffer(buffer);
1323 :
1324 0 : if (tup->attrnum != buffer->attnum)
1325 0 : return false;
1326 :
1327 : /* same attribute should have the same type info */
1328 : Assert(tup->typbyval == buffer->typbyval);
1329 : Assert(tup->typlen == buffer->typlen);
1330 :
1331 0 : if (tup->category != buffer->category)
1332 0 : return false;
1333 :
1334 : /*
1335 : * For NULL/empty keys, this means equality, for normal keys we need to
1336 : * compare the actual key value.
1337 : */
1338 0 : if (buffer->category != GIN_CAT_NORM_KEY)
1339 0 : return true;
1340 :
1341 : /*
1342 : * For the tuple, get either the first sizeof(Datum) bytes for byval
1343 : * types, or a pointer to the beginning of the data array.
1344 : */
1345 0 : tupkey = (buffer->typbyval) ? *(Datum *) tup->data : PointerGetDatum(tup->data);
1346 :
1347 0 : r = ApplySortComparator(buffer->key, false,
1348 : tupkey, false,
1349 0 : &buffer->ssup[buffer->attnum - 1]);
1350 :
1351 0 : return (r == 0);
1352 : }
1353 :
1354 : /*
1355 : * GinBufferShouldTrim
1356 : * Should we trim the list of item pointers?
1357 : *
1358 : * By trimming we understand writing out and removing the tuple IDs that
1359 : * we know can't change by future merges. We can deduce the TID up to which
1360 : * this is guaranteed from the "first" TID in each GIN tuple, which provides
1361 : * a "horizon" (for a given key) thanks to the sort.
1362 : *
1363 : * We don't want to do this too often - compressing longer TID lists is more
1364 : * efficient. But we also don't want to accumulate too many TIDs, for two
1365 : * reasons. First, it consumes memory and we might exceed maintenance_work_mem
1366 : * (or whatever limit applies), even if that's unlikely because TIDs are very
1367 : * small so we can fit a lot of them. Second, and more importantly, long TID
1368 : * lists are an issue if the scan wraps around, because a key may get a very
1369 : * wide list (with min/max TID for that key), forcing "full" mergesorts for
1370 : * every list merged into it (instead of the efficient append).
1371 : *
1372 : * So we look at two things when deciding if to trim - if the resulting list
1373 : * (after adding TIDs from the new tuple) would be too long, and if there is
1374 : * enough TIDs to trim (with values less than "first" TID from the new tuple),
1375 : * we do the trim. By enough we mean at least 128 TIDs (mostly an arbitrary
1376 : * number).
1377 : */
1378 : static bool
1379 0 : GinBufferShouldTrim(GinBuffer *buffer, GinTuple *tup)
1380 : {
1381 : /* not enough TIDs to trim (1024 is somewhat arbitrary number) */
1382 0 : if (buffer->nfrozen < 1024)
1383 0 : return false;
1384 :
1385 : /* no need to trim if we have not hit the memory limit yet */
1386 0 : if ((buffer->nitems + tup->nitems) < buffer->maxitems)
1387 0 : return false;
1388 :
1389 : /*
1390 : * OK, we have enough frozen TIDs to flush, and we have hit the memory
1391 : * limit, so it's time to write it out.
1392 : */
1393 0 : return true;
1394 : }
1395 :
1396 : /*
1397 : * GinBufferStoreTuple
1398 : * Add data (especially TID list) from a GIN tuple to the buffer.
1399 : *
1400 : * The buffer is expected to be empty (in which case it's initialized), or
1401 : * having the same key. The TID values from the tuple are combined with the
1402 : * stored values using a merge sort.
1403 : *
1404 : * The tuples (for the same key) are expected to be sorted by first TID. But
1405 : * this does not guarantee the lists do not overlap, especially in the leader,
1406 : * because the workers process interleaving data. There should be no overlaps
1407 : * in a single worker - it could happen when the parallel scan wraps around,
1408 : * but we detect that and flush the data (see ginBuildCallbackParallel).
1409 : *
1410 : * By sorting the GinTuple not only by key, but also by the first TID, we make
1411 : * it more less likely the lists will overlap during merge. We merge them using
1412 : * mergesort, but it's cheaper to just append one list to the other.
1413 : *
1414 : * How often can the lists overlap? There should be no overlaps in workers,
1415 : * and in the leader we can see overlaps between lists built by different
1416 : * workers. But the workers merge the items as much as possible, so there
1417 : * should not be too many.
1418 : */
1419 : static void
1420 0 : GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
1421 : {
1422 : ItemPointerData *items;
1423 : Datum key;
1424 :
1425 0 : AssertCheckGinBuffer(buffer);
1426 :
1427 0 : key = _gin_parse_tuple_key(tup);
1428 0 : items = _gin_parse_tuple_items(tup);
1429 :
1430 : /* if the buffer is empty, set the fields (and copy the key) */
1431 0 : if (GinBufferIsEmpty(buffer))
1432 : {
1433 0 : buffer->category = tup->category;
1434 0 : buffer->keylen = tup->keylen;
1435 0 : buffer->attnum = tup->attrnum;
1436 :
1437 0 : buffer->typlen = tup->typlen;
1438 0 : buffer->typbyval = tup->typbyval;
1439 :
1440 0 : if (tup->category == GIN_CAT_NORM_KEY)
1441 0 : buffer->key = datumCopy(key, buffer->typbyval, buffer->typlen);
1442 : else
1443 0 : buffer->key = (Datum) 0;
1444 : }
1445 :
1446 : /*
1447 : * Try freeze TIDs at the beginning of the list, i.e. exclude them from
1448 : * the mergesort. We can do that with TIDs before the first TID in the new
1449 : * tuple we're about to add into the buffer.
1450 : *
1451 : * We do this incrementally when adding data into the in-memory buffer,
1452 : * and not later (e.g. when hitting a memory limit), because it allows us
1453 : * to skip the frozen data during the mergesort, making it cheaper.
1454 : */
1455 :
1456 : /*
1457 : * Check if the last TID in the current list is frozen. This is the case
1458 : * when merging non-overlapping lists, e.g. in each parallel worker.
1459 : */
1460 0 : if ((buffer->nitems > 0) &&
1461 0 : (ItemPointerCompare(&buffer->items[buffer->nitems - 1],
1462 : GinTupleGetFirst(tup)) == 0))
1463 0 : buffer->nfrozen = buffer->nitems;
1464 :
1465 : /*
1466 : * Now find the last TID we know to be frozen, i.e. the last TID right
1467 : * before the new GIN tuple.
1468 : *
1469 : * Start with the first not-yet-frozen tuple, and walk until we find the
1470 : * first TID that's higher. If we already know the whole list is frozen
1471 : * (i.e. nfrozen == nitems), this does nothing.
1472 : *
1473 : * XXX This might do a binary search for sufficiently long lists, but it
1474 : * does not seem worth the complexity. Overlapping lists should be rare
1475 : * common, TID comparisons are cheap, and we should quickly freeze most of
1476 : * the list.
1477 : */
1478 0 : for (int i = buffer->nfrozen; i < buffer->nitems; i++)
1479 : {
1480 : /* Is the TID after the first TID of the new tuple? Can't freeze. */
1481 0 : if (ItemPointerCompare(&buffer->items[i],
1482 : GinTupleGetFirst(tup)) > 0)
1483 0 : break;
1484 :
1485 0 : buffer->nfrozen++;
1486 : }
1487 :
1488 : /* add the new TIDs into the buffer, combine using merge-sort */
1489 : {
1490 : int nnew;
1491 : ItemPointer new;
1492 :
1493 : /*
1494 : * Resize the array - we do this first, because we'll dereference the
1495 : * first unfrozen TID, which would fail if the array is NULL. We'll
1496 : * still pass 0 as number of elements in that array though.
1497 : */
1498 0 : if (buffer->items == NULL)
1499 0 : buffer->items = palloc((buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
1500 : else
1501 0 : buffer->items = repalloc(buffer->items,
1502 0 : (buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
1503 :
1504 0 : new = ginMergeItemPointers(&buffer->items[buffer->nfrozen], /* first unfrozen */
1505 0 : (buffer->nitems - buffer->nfrozen), /* num of unfrozen */
1506 0 : items, tup->nitems, &nnew);
1507 :
1508 : Assert(nnew == (tup->nitems + (buffer->nitems - buffer->nfrozen)));
1509 :
1510 0 : memcpy(&buffer->items[buffer->nfrozen], new,
1511 : nnew * sizeof(ItemPointerData));
1512 :
1513 0 : pfree(new);
1514 :
1515 0 : buffer->nitems += tup->nitems;
1516 :
1517 0 : AssertCheckItemPointers(buffer);
1518 : }
1519 :
1520 : /* free the decompressed TID list */
1521 0 : pfree(items);
1522 0 : }
1523 :
1524 : /*
1525 : * GinBufferReset
1526 : * Reset the buffer into a state as if it contains no data.
1527 : */
1528 : static void
1529 0 : GinBufferReset(GinBuffer *buffer)
1530 : {
1531 : Assert(!GinBufferIsEmpty(buffer));
1532 :
1533 : /* release byref values, do nothing for by-val ones */
1534 0 : if ((buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
1535 0 : pfree(DatumGetPointer(buffer->key));
1536 :
1537 : /*
1538 : * Not required, but makes it more likely to trigger NULL dereference if
1539 : * using the value incorrectly, etc.
1540 : */
1541 0 : buffer->key = (Datum) 0;
1542 :
1543 0 : buffer->attnum = 0;
1544 0 : buffer->category = 0;
1545 0 : buffer->keylen = 0;
1546 0 : buffer->nitems = 0;
1547 0 : buffer->nfrozen = 0;
1548 :
1549 0 : buffer->typlen = 0;
1550 0 : buffer->typbyval = 0;
1551 0 : }
1552 :
1553 : /*
1554 : * GinBufferTrim
1555 : * Discard the "frozen" part of the TID list (which should have been
1556 : * written to disk/index before this call).
1557 : */
1558 : static void
1559 0 : GinBufferTrim(GinBuffer *buffer)
1560 : {
1561 : Assert((buffer->nfrozen > 0) && (buffer->nfrozen <= buffer->nitems));
1562 :
1563 0 : memmove(&buffer->items[0], &buffer->items[buffer->nfrozen],
1564 0 : sizeof(ItemPointerData) * (buffer->nitems - buffer->nfrozen));
1565 :
1566 0 : buffer->nitems -= buffer->nfrozen;
1567 0 : buffer->nfrozen = 0;
1568 0 : }
1569 :
1570 : /*
1571 : * GinBufferFree
1572 : * Release memory associated with the GinBuffer (including TID array).
1573 : */
1574 : static void
1575 0 : GinBufferFree(GinBuffer *buffer)
1576 : {
1577 0 : if (buffer->items)
1578 0 : pfree(buffer->items);
1579 :
1580 : /* release byref values, do nothing for by-val ones */
1581 0 : if (!GinBufferIsEmpty(buffer) &&
1582 0 : (buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
1583 0 : pfree(DatumGetPointer(buffer->key));
1584 :
1585 0 : pfree(buffer);
1586 0 : }
1587 :
1588 : /*
1589 : * GinBufferCanAddKey
1590 : * Check if a given GIN tuple can be added to the current buffer.
1591 : *
1592 : * Returns true if the buffer is either empty or for the same index key.
1593 : */
1594 : static bool
1595 0 : GinBufferCanAddKey(GinBuffer *buffer, GinTuple *tup)
1596 : {
1597 : /* empty buffer can accept data for any key */
1598 0 : if (GinBufferIsEmpty(buffer))
1599 0 : return true;
1600 :
1601 : /* otherwise just data for the same key */
1602 0 : return GinBufferKeyEquals(buffer, tup);
1603 : }
1604 :
1605 : /*
1606 : * Within leader, wait for end of heap scan and merge per-worker results.
1607 : *
1608 : * After waiting for all workers to finish, merge the per-worker results into
1609 : * the complete index. The results from each worker are sorted by block number
1610 : * (start of the page range). While combining the per-worker results we merge
1611 : * summaries for the same page range, and also fill-in empty summaries for
1612 : * ranges without any tuples.
1613 : *
1614 : * Returns the total number of heap tuples scanned.
1615 : */
1616 : static double
1617 0 : _gin_parallel_merge(GinBuildState *state)
1618 : {
1619 : GinTuple *tup;
1620 : Size tuplen;
1621 0 : double reltuples = 0;
1622 : GinBuffer *buffer;
1623 :
1624 : /* GIN tuples from workers, merged by leader */
1625 0 : double numtuples = 0;
1626 :
1627 : /* wait for workers to scan table and produce partial results */
1628 0 : reltuples = _gin_parallel_heapscan(state);
1629 :
1630 : /* Execute the sort */
1631 0 : pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
1632 : PROGRESS_GIN_PHASE_PERFORMSORT_2);
1633 :
1634 : /* do the actual sort in the leader */
1635 0 : tuplesort_performsort(state->bs_sortstate);
1636 :
1637 : /*
1638 : * Initialize buffer to combine entries for the same key.
1639 : *
1640 : * The leader is allowed to use the whole maintenance_work_mem buffer to
1641 : * combine data. The parallel workers already completed.
1642 : */
1643 0 : buffer = GinBufferInit(state->ginstate.index);
1644 :
1645 : /*
1646 : * Set the progress target for the next phase. Reset the block number
1647 : * values set by table_index_build_scan
1648 : */
1649 : {
1650 0 : const int progress_index[] = {
1651 : PROGRESS_CREATEIDX_SUBPHASE,
1652 : PROGRESS_CREATEIDX_TUPLES_TOTAL,
1653 : PROGRESS_SCAN_BLOCKS_TOTAL,
1654 : PROGRESS_SCAN_BLOCKS_DONE
1655 : };
1656 0 : const int64 progress_vals[] = {
1657 : PROGRESS_GIN_PHASE_MERGE_2,
1658 0 : state->bs_numtuples,
1659 : 0, 0
1660 : };
1661 :
1662 0 : pgstat_progress_update_multi_param(4, progress_index, progress_vals);
1663 : }
1664 :
1665 : /*
1666 : * Read the GIN tuples from the shared tuplesort, sorted by category and
1667 : * key. That probably gives us order matching how data is organized in the
1668 : * index.
1669 : *
1670 : * We don't insert the GIN tuples right away, but instead accumulate as
1671 : * many TIDs for the same key as possible, and then insert that at once.
1672 : * This way we don't need to decompress/recompress the posting lists, etc.
1673 : */
1674 0 : while ((tup = tuplesort_getgintuple(state->bs_sortstate, &tuplen, true)) != NULL)
1675 : {
1676 : MemoryContext oldCtx;
1677 :
1678 0 : CHECK_FOR_INTERRUPTS();
1679 :
1680 : /*
1681 : * If the buffer can accept the new GIN tuple, just store it there and
1682 : * we're done. If it's a different key (or maybe too much data) flush
1683 : * the current contents into the index first.
1684 : */
1685 0 : if (!GinBufferCanAddKey(buffer, tup))
1686 : {
1687 : /*
1688 : * Buffer is not empty and it's storing a different key - flush
1689 : * the data into the insert, and start a new entry for current
1690 : * GinTuple.
1691 : */
1692 0 : AssertCheckItemPointers(buffer);
1693 :
1694 0 : oldCtx = MemoryContextSwitchTo(state->tmpCtx);
1695 :
1696 0 : ginEntryInsert(&state->ginstate,
1697 0 : buffer->attnum, buffer->key, buffer->category,
1698 0 : buffer->items, buffer->nitems, &state->buildStats);
1699 :
1700 0 : MemoryContextSwitchTo(oldCtx);
1701 0 : MemoryContextReset(state->tmpCtx);
1702 :
1703 : /* discard the existing data */
1704 0 : GinBufferReset(buffer);
1705 : }
1706 :
1707 : /*
1708 : * We're about to add a GIN tuple to the buffer - check the memory
1709 : * limit first, and maybe write out some of the data into the index
1710 : * first, if needed (and possible). We only flush the part of the TID
1711 : * list that we know won't change, and only if there's enough data for
1712 : * compression to work well.
1713 : */
1714 0 : if (GinBufferShouldTrim(buffer, tup))
1715 : {
1716 : Assert(buffer->nfrozen > 0);
1717 :
1718 : /*
1719 : * Buffer is not empty and it's storing a different key - flush
1720 : * the data into the insert, and start a new entry for current
1721 : * GinTuple.
1722 : */
1723 0 : AssertCheckItemPointers(buffer);
1724 :
1725 0 : oldCtx = MemoryContextSwitchTo(state->tmpCtx);
1726 :
1727 0 : ginEntryInsert(&state->ginstate,
1728 0 : buffer->attnum, buffer->key, buffer->category,
1729 0 : buffer->items, buffer->nfrozen, &state->buildStats);
1730 :
1731 0 : MemoryContextSwitchTo(oldCtx);
1732 0 : MemoryContextReset(state->tmpCtx);
1733 :
1734 : /* truncate the data we've just discarded */
1735 0 : GinBufferTrim(buffer);
1736 : }
1737 :
1738 : /*
1739 : * Remember data for the current tuple (either remember the new key,
1740 : * or append if to the existing data).
1741 : */
1742 0 : GinBufferStoreTuple(buffer, tup);
1743 :
1744 : /* Report progress */
1745 0 : pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
1746 : ++numtuples);
1747 : }
1748 :
1749 : /* flush data remaining in the buffer (for the last key) */
1750 0 : if (!GinBufferIsEmpty(buffer))
1751 : {
1752 0 : AssertCheckItemPointers(buffer);
1753 :
1754 0 : ginEntryInsert(&state->ginstate,
1755 0 : buffer->attnum, buffer->key, buffer->category,
1756 0 : buffer->items, buffer->nitems, &state->buildStats);
1757 :
1758 : /* discard the existing data */
1759 0 : GinBufferReset(buffer);
1760 :
1761 : /* Report progress */
1762 0 : pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
1763 : ++numtuples);
1764 : }
1765 :
1766 : /* relase all the memory */
1767 0 : GinBufferFree(buffer);
1768 :
1769 0 : tuplesort_end(state->bs_sortstate);
1770 :
1771 0 : return reltuples;
1772 : }
1773 :
1774 : /*
1775 : * Returns size of shared memory required to store state for a parallel
1776 : * gin index build based on the snapshot its parallel scan will use.
1777 : */
1778 : static Size
1779 0 : _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot)
1780 : {
1781 : /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
1782 0 : return add_size(BUFFERALIGN(sizeof(GinBuildShared)),
1783 : table_parallelscan_estimate(heap, snapshot));
1784 : }
1785 :
1786 : /*
1787 : * Within leader, participate as a parallel worker.
1788 : */
1789 : static void
1790 0 : _gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Relation index)
1791 : {
1792 0 : GinLeader *ginleader = buildstate->bs_leader;
1793 : int sortmem;
1794 :
1795 : /*
1796 : * Might as well use reliable figure when doling out maintenance_work_mem
1797 : * (when requested number of workers were not launched, this will be
1798 : * somewhat higher than it is for other workers).
1799 : */
1800 0 : sortmem = maintenance_work_mem / ginleader->nparticipanttuplesorts;
1801 :
1802 : /* Perform work common to all participants */
1803 0 : _gin_parallel_scan_and_build(buildstate, ginleader->ginshared,
1804 : ginleader->sharedsort, heap, index,
1805 : sortmem, true);
1806 0 : }
1807 :
1808 : /*
1809 : * _gin_process_worker_data
1810 : * First phase of the key merging, happening in the worker.
1811 : *
1812 : * Depending on the number of distinct keys, the TID lists produced by the
1813 : * callback may be very short (due to frequent evictions in the callback).
1814 : * But combining many tiny lists is expensive, so we try to do as much as
1815 : * possible in the workers and only then pass the results to the leader.
1816 : *
1817 : * We read the tuples sorted by the key, and merge them into larger lists.
1818 : * At the moment there's no memory limit, so this will just produce one
1819 : * huge (sorted) list per key in each worker. Which means the leader will
1820 : * do a very limited number of mergesorts, which is good.
1821 : */
1822 : static void
1823 0 : _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort,
1824 : bool progress)
1825 : {
1826 : GinTuple *tup;
1827 : Size tuplen;
1828 :
1829 : GinBuffer *buffer;
1830 :
1831 : /*
1832 : * Initialize buffer to combine entries for the same key.
1833 : *
1834 : * The workers are limited to the same amount of memory as during the sort
1835 : * in ginBuildCallbackParallel. But this probably should be the 32MB used
1836 : * during planning, just like there.
1837 : */
1838 0 : buffer = GinBufferInit(state->ginstate.index);
1839 :
1840 : /* sort the raw per-worker data */
1841 0 : if (progress)
1842 0 : pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
1843 : PROGRESS_GIN_PHASE_PERFORMSORT_1);
1844 :
1845 0 : tuplesort_performsort(state->bs_worker_sort);
1846 :
1847 : /* reset the number of GIN tuples produced by this worker */
1848 0 : state->bs_numtuples = 0;
1849 :
1850 0 : if (progress)
1851 0 : pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
1852 : PROGRESS_GIN_PHASE_MERGE_1);
1853 :
1854 : /*
1855 : * Read the GIN tuples from the shared tuplesort, sorted by the key, and
1856 : * merge them into larger chunks for the leader to combine.
1857 : */
1858 0 : while ((tup = tuplesort_getgintuple(worker_sort, &tuplen, true)) != NULL)
1859 : {
1860 :
1861 0 : CHECK_FOR_INTERRUPTS();
1862 :
1863 : /*
1864 : * If the buffer can accept the new GIN tuple, just store it there and
1865 : * we're done. If it's a different key (or maybe too much data) flush
1866 : * the current contents into the index first.
1867 : */
1868 0 : if (!GinBufferCanAddKey(buffer, tup))
1869 : {
1870 : GinTuple *ntup;
1871 : Size ntuplen;
1872 :
1873 : /*
1874 : * Buffer is not empty and it's storing a different key - flush
1875 : * the data into the insert, and start a new entry for current
1876 : * GinTuple.
1877 : */
1878 0 : AssertCheckItemPointers(buffer);
1879 :
1880 0 : ntup = _gin_build_tuple(buffer->attnum, buffer->category,
1881 0 : buffer->key, buffer->typlen, buffer->typbyval,
1882 0 : buffer->items, buffer->nitems, &ntuplen);
1883 :
1884 0 : tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
1885 0 : state->bs_numtuples++;
1886 :
1887 0 : pfree(ntup);
1888 :
1889 : /* discard the existing data */
1890 0 : GinBufferReset(buffer);
1891 : }
1892 :
1893 : /*
1894 : * We're about to add a GIN tuple to the buffer - check the memory
1895 : * limit first, and maybe write out some of the data into the index
1896 : * first, if needed (and possible). We only flush the part of the TID
1897 : * list that we know won't change, and only if there's enough data for
1898 : * compression to work well.
1899 : */
1900 0 : if (GinBufferShouldTrim(buffer, tup))
1901 : {
1902 : GinTuple *ntup;
1903 : Size ntuplen;
1904 :
1905 : Assert(buffer->nfrozen > 0);
1906 :
1907 : /*
1908 : * Buffer is not empty and it's storing a different key - flush
1909 : * the data into the insert, and start a new entry for current
1910 : * GinTuple.
1911 : */
1912 0 : AssertCheckItemPointers(buffer);
1913 :
1914 0 : ntup = _gin_build_tuple(buffer->attnum, buffer->category,
1915 0 : buffer->key, buffer->typlen, buffer->typbyval,
1916 0 : buffer->items, buffer->nfrozen, &ntuplen);
1917 :
1918 0 : tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
1919 :
1920 0 : pfree(ntup);
1921 :
1922 : /* truncate the data we've just discarded */
1923 0 : GinBufferTrim(buffer);
1924 : }
1925 :
1926 : /*
1927 : * Remember data for the current tuple (either remember the new key,
1928 : * or append if to the existing data).
1929 : */
1930 0 : GinBufferStoreTuple(buffer, tup);
1931 : }
1932 :
1933 : /* flush data remaining in the buffer (for the last key) */
1934 0 : if (!GinBufferIsEmpty(buffer))
1935 : {
1936 : GinTuple *ntup;
1937 : Size ntuplen;
1938 :
1939 0 : AssertCheckItemPointers(buffer);
1940 :
1941 0 : ntup = _gin_build_tuple(buffer->attnum, buffer->category,
1942 0 : buffer->key, buffer->typlen, buffer->typbyval,
1943 0 : buffer->items, buffer->nitems, &ntuplen);
1944 :
1945 0 : tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
1946 0 : state->bs_numtuples++;
1947 :
1948 0 : pfree(ntup);
1949 :
1950 : /* discard the existing data */
1951 0 : GinBufferReset(buffer);
1952 : }
1953 :
1954 : /* relase all the memory */
1955 0 : GinBufferFree(buffer);
1956 :
1957 0 : tuplesort_end(worker_sort);
1958 0 : }
1959 :
1960 : /*
1961 : * Perform a worker's portion of a parallel GIN index build sort.
1962 : *
1963 : * This generates a tuplesort for the worker portion of the table.
1964 : *
1965 : * sortmem is the amount of working memory to use within each worker,
1966 : * expressed in KBs.
1967 : *
1968 : * When this returns, workers are done, and need only release resources.
1969 : *
1970 : * Before feeding data into a shared tuplesort (for the leader process),
1971 : * the workers process data in two phases.
1972 : *
1973 : * 1) A worker reads a portion of rows from the table, accumulates entries
1974 : * in memory, and flushes them into a private tuplesort (e.g. because of
1975 : * using too much memory).
1976 : *
1977 : * 2) The private tuplesort gets sorted (by key and TID), the worker reads
1978 : * the data again, and combines the entries as much as possible. This has
1979 : * to happen eventually, and this way it's done in workers in parallel.
1980 : *
1981 : * Finally, the combined entries are written into the shared tuplesort, so
1982 : * that the leader can process them.
1983 : *
1984 : * How well this works (compared to just writing entries into the shared
1985 : * tuplesort) depends on the data set. For large tables with many distinct
1986 : * keys this helps a lot. With many distinct keys it's likely the buffers has
1987 : * to be flushed often, generating many entries with the same key and short
1988 : * TID lists. These entries need to be sorted and merged at some point,
1989 : * before writing them to the index. The merging is quite expensive, it can
1990 : * easily be ~50% of a serial build, and doing as much of it in the workers
1991 : * means it's parallelized. The leader still has to merge results from the
1992 : * workers, but it's much more efficient to merge few large entries than
1993 : * many tiny ones.
1994 : *
1995 : * This also reduces the amount of data the workers pass to the leader through
1996 : * the shared tuplesort. OTOH the workers need more space for the private sort,
1997 : * possibly up to 2x of the data, if no entries be merged in a worker. But this
1998 : * is very unlikely, and the only consequence is inefficiency, so we ignore it.
1999 : */
2000 : static void
2001 0 : _gin_parallel_scan_and_build(GinBuildState *state,
2002 : GinBuildShared *ginshared, Sharedsort *sharedsort,
2003 : Relation heap, Relation index,
2004 : int sortmem, bool progress)
2005 : {
2006 : SortCoordinate coordinate;
2007 : TableScanDesc scan;
2008 : double reltuples;
2009 : IndexInfo *indexInfo;
2010 :
2011 : /* Initialize local tuplesort coordination state */
2012 0 : coordinate = palloc0(sizeof(SortCoordinateData));
2013 0 : coordinate->isWorker = true;
2014 0 : coordinate->nParticipants = -1;
2015 0 : coordinate->sharedsort = sharedsort;
2016 :
2017 : /* remember how much space is allowed for the accumulated entries */
2018 0 : state->work_mem = (sortmem / 2);
2019 :
2020 : /* Begin "partial" tuplesort */
2021 0 : state->bs_sortstate = tuplesort_begin_index_gin(heap, index,
2022 : state->work_mem,
2023 : coordinate,
2024 : TUPLESORT_NONE);
2025 :
2026 : /* Local per-worker sort of raw-data */
2027 0 : state->bs_worker_sort = tuplesort_begin_index_gin(heap, index,
2028 : state->work_mem,
2029 : NULL,
2030 : TUPLESORT_NONE);
2031 :
2032 : /* Join parallel scan */
2033 0 : indexInfo = BuildIndexInfo(index);
2034 0 : indexInfo->ii_Concurrent = ginshared->isconcurrent;
2035 :
2036 0 : scan = table_beginscan_parallel(heap,
2037 : ParallelTableScanFromGinBuildShared(ginshared));
2038 :
2039 0 : reltuples = table_index_build_scan(heap, index, indexInfo, true, progress,
2040 : ginBuildCallbackParallel, state, scan);
2041 :
2042 : /* write remaining accumulated entries */
2043 0 : ginFlushBuildState(state, index);
2044 :
2045 : /*
2046 : * Do the first phase of in-worker processing - sort the data produced by
2047 : * the callback, and combine them into much larger chunks and place that
2048 : * into the shared tuplestore for leader to process.
2049 : */
2050 0 : _gin_process_worker_data(state, state->bs_worker_sort, progress);
2051 :
2052 : /* sort the GIN tuples built by this worker */
2053 0 : tuplesort_performsort(state->bs_sortstate);
2054 :
2055 0 : state->bs_reltuples += reltuples;
2056 :
2057 : /*
2058 : * Done. Record ambuild statistics.
2059 : */
2060 0 : SpinLockAcquire(&ginshared->mutex);
2061 0 : ginshared->nparticipantsdone++;
2062 0 : ginshared->reltuples += state->bs_reltuples;
2063 0 : ginshared->indtuples += state->bs_numtuples;
2064 0 : SpinLockRelease(&ginshared->mutex);
2065 :
2066 : /* Notify leader */
2067 0 : ConditionVariableSignal(&ginshared->workersdonecv);
2068 :
2069 0 : tuplesort_end(state->bs_sortstate);
2070 0 : }
2071 :
2072 : /*
2073 : * Perform work within a launched parallel process.
2074 : */
2075 : void
2076 0 : _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
2077 : {
2078 : char *sharedquery;
2079 : GinBuildShared *ginshared;
2080 : Sharedsort *sharedsort;
2081 : GinBuildState buildstate;
2082 : Relation heapRel;
2083 : Relation indexRel;
2084 : LOCKMODE heapLockmode;
2085 : LOCKMODE indexLockmode;
2086 : WalUsage *walusage;
2087 : BufferUsage *bufferusage;
2088 : int sortmem;
2089 :
2090 : /*
2091 : * The only possible status flag that can be set to the parallel worker is
2092 : * PROC_IN_SAFE_IC.
2093 : */
2094 : Assert((MyProc->statusFlags == 0) ||
2095 : (MyProc->statusFlags == PROC_IN_SAFE_IC));
2096 :
2097 : /* Set debug_query_string for individual workers first */
2098 0 : sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true);
2099 0 : debug_query_string = sharedquery;
2100 :
2101 : /* Report the query string from leader */
2102 0 : pgstat_report_activity(STATE_RUNNING, debug_query_string);
2103 :
2104 : /* Look up gin shared state */
2105 0 : ginshared = shm_toc_lookup(toc, PARALLEL_KEY_GIN_SHARED, false);
2106 :
2107 : /* Open relations using lock modes known to be obtained by index.c */
2108 0 : if (!ginshared->isconcurrent)
2109 : {
2110 0 : heapLockmode = ShareLock;
2111 0 : indexLockmode = AccessExclusiveLock;
2112 : }
2113 : else
2114 : {
2115 0 : heapLockmode = ShareUpdateExclusiveLock;
2116 0 : indexLockmode = RowExclusiveLock;
2117 : }
2118 :
2119 : /* Open relations within worker */
2120 0 : heapRel = table_open(ginshared->heaprelid, heapLockmode);
2121 0 : indexRel = index_open(ginshared->indexrelid, indexLockmode);
2122 :
2123 : /* initialize the GIN build state */
2124 0 : initGinState(&buildstate.ginstate, indexRel);
2125 0 : buildstate.indtuples = 0;
2126 0 : memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
2127 0 : memset(&buildstate.tid, 0, sizeof(ItemPointerData));
2128 :
2129 : /*
2130 : * create a temporary memory context that is used to hold data not yet
2131 : * dumped out to the index
2132 : */
2133 0 : buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
2134 : "Gin build temporary context",
2135 : ALLOCSET_DEFAULT_SIZES);
2136 :
2137 : /*
2138 : * create a temporary memory context that is used for calling
2139 : * ginExtractEntries(), and can be reset after each tuple
2140 : */
2141 0 : buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
2142 : "Gin build temporary context for user-defined function",
2143 : ALLOCSET_DEFAULT_SIZES);
2144 :
2145 0 : buildstate.accum.ginstate = &buildstate.ginstate;
2146 0 : ginInitBA(&buildstate.accum);
2147 :
2148 :
2149 : /* Look up shared state private to tuplesort.c */
2150 0 : sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
2151 0 : tuplesort_attach_shared(sharedsort, seg);
2152 :
2153 : /* Prepare to track buffer usage during parallel execution */
2154 0 : InstrStartParallelQuery();
2155 :
2156 : /*
2157 : * Might as well use reliable figure when doling out maintenance_work_mem
2158 : * (when requested number of workers were not launched, this will be
2159 : * somewhat higher than it is for other workers).
2160 : */
2161 0 : sortmem = maintenance_work_mem / ginshared->scantuplesortstates;
2162 :
2163 0 : _gin_parallel_scan_and_build(&buildstate, ginshared, sharedsort,
2164 : heapRel, indexRel, sortmem, false);
2165 :
2166 : /* Report WAL/buffer usage during parallel execution */
2167 0 : bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
2168 0 : walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
2169 0 : InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber],
2170 0 : &walusage[ParallelWorkerNumber]);
2171 :
2172 0 : index_close(indexRel, indexLockmode);
2173 0 : table_close(heapRel, heapLockmode);
2174 0 : }
2175 :
2176 : /*
2177 : * Used to keep track of compressed TID lists when building a GIN tuple.
2178 : */
2179 : typedef struct
2180 : {
2181 : dlist_node node; /* linked list pointers */
2182 : GinPostingList *seg;
2183 : } GinSegmentInfo;
2184 :
2185 : /*
2186 : * _gin_build_tuple
2187 : * Serialize the state for an index key into a tuple for tuplesort.
2188 : *
2189 : * The tuple has a number of scalar fields (mostly matching the build state),
2190 : * and then a data array that stores the key first, and then the TID list.
2191 : *
2192 : * For by-reference data types, we store the actual data. For by-val types
2193 : * we simply copy the whole Datum, so that we don't have to care about stuff
2194 : * like endianess etc. We could make it a little bit smaller, but it's not
2195 : * worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
2196 : * start of the TID list anyway. So we wouldn't save anything. (This would
2197 : * not be a good idea for the permanent in-index data, since we'd prefer
2198 : * that that not depend on sizeof(Datum). But this is just a transient
2199 : * representation to use while sorting the data.)
2200 : *
2201 : * The TID list is serialized as compressed - it's highly compressible, and
2202 : * we already have ginCompressPostingList for this purpose. The list may be
2203 : * pretty long, so we compress it into multiple segments and then copy all
2204 : * of that into the GIN tuple.
2205 : */
2206 : static GinTuple *
2207 0 : _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
2208 : Datum key, int16 typlen, bool typbyval,
2209 : ItemPointerData *items, uint32 nitems,
2210 : Size *len)
2211 : {
2212 : GinTuple *tuple;
2213 : char *ptr;
2214 :
2215 : Size tuplen;
2216 : int keylen;
2217 :
2218 : dlist_mutable_iter iter;
2219 : dlist_head segments;
2220 : int ncompressed;
2221 : Size compresslen;
2222 :
2223 : /*
2224 : * Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
2225 : * have actual non-empty key. We include varlena headers and \0 bytes for
2226 : * strings, to make it easier to access the data in-line.
2227 : *
2228 : * For byval types we simply copy the whole Datum. We could store just the
2229 : * necessary bytes, but this is simpler to work with and not worth the
2230 : * extra complexity. Moreover we still need to do the MAXALIGN to allow
2231 : * direct access to items pointers.
2232 : *
2233 : * XXX Note that for byval types we store the whole datum, no matter what
2234 : * the typlen value is.
2235 : */
2236 0 : if (category != GIN_CAT_NORM_KEY)
2237 0 : keylen = 0;
2238 0 : else if (typbyval)
2239 0 : keylen = sizeof(Datum);
2240 0 : else if (typlen > 0)
2241 0 : keylen = typlen;
2242 0 : else if (typlen == -1)
2243 0 : keylen = VARSIZE_ANY(DatumGetPointer(key));
2244 0 : else if (typlen == -2)
2245 0 : keylen = strlen(DatumGetPointer(key)) + 1;
2246 : else
2247 0 : elog(ERROR, "unexpected typlen value (%d)", typlen);
2248 :
2249 : /* compress the item pointers */
2250 0 : ncompressed = 0;
2251 0 : compresslen = 0;
2252 0 : dlist_init(&segments);
2253 :
2254 : /* generate compressed segments of TID list chunks */
2255 0 : while (ncompressed < nitems)
2256 : {
2257 : int cnt;
2258 0 : GinSegmentInfo *seginfo = palloc(sizeof(GinSegmentInfo));
2259 :
2260 0 : seginfo->seg = ginCompressPostingList(&items[ncompressed],
2261 0 : (nitems - ncompressed),
2262 : UINT16_MAX,
2263 : &cnt);
2264 :
2265 0 : ncompressed += cnt;
2266 0 : compresslen += SizeOfGinPostingList(seginfo->seg);
2267 :
2268 0 : dlist_push_tail(&segments, &seginfo->node);
2269 : }
2270 :
2271 : /*
2272 : * Determine GIN tuple length with all the data included. Be careful about
2273 : * alignment, to allow direct access to compressed segments (those require
2274 : * only SHORTALIGN).
2275 : */
2276 0 : tuplen = SHORTALIGN(offsetof(GinTuple, data) + keylen) + compresslen;
2277 :
2278 0 : *len = tuplen;
2279 :
2280 : /*
2281 : * Allocate space for the whole GIN tuple.
2282 : *
2283 : * The palloc0 is needed - writetup_index_gin will write the whole tuple
2284 : * to disk, so we need to make sure the padding bytes are defined
2285 : * (otherwise valgrind would report this).
2286 : */
2287 0 : tuple = palloc0(tuplen);
2288 :
2289 0 : tuple->tuplen = tuplen;
2290 0 : tuple->attrnum = attrnum;
2291 0 : tuple->category = category;
2292 0 : tuple->keylen = keylen;
2293 0 : tuple->nitems = nitems;
2294 :
2295 : /* key type info */
2296 0 : tuple->typlen = typlen;
2297 0 : tuple->typbyval = typbyval;
2298 :
2299 : /*
2300 : * Copy the key and items into the tuple. First the key value, which we
2301 : * can simply copy right at the beginning of the data array.
2302 : */
2303 0 : if (category == GIN_CAT_NORM_KEY)
2304 : {
2305 0 : if (typbyval)
2306 : {
2307 0 : memcpy(tuple->data, &key, sizeof(Datum));
2308 : }
2309 0 : else if (typlen > 0) /* byref, fixed length */
2310 : {
2311 0 : memcpy(tuple->data, DatumGetPointer(key), typlen);
2312 : }
2313 0 : else if (typlen == -1)
2314 : {
2315 0 : memcpy(tuple->data, DatumGetPointer(key), keylen);
2316 : }
2317 0 : else if (typlen == -2)
2318 : {
2319 0 : memcpy(tuple->data, DatumGetPointer(key), keylen);
2320 : }
2321 : }
2322 :
2323 : /* finally, copy the TIDs into the array */
2324 0 : ptr = (char *) tuple + SHORTALIGN(offsetof(GinTuple, data) + keylen);
2325 :
2326 : /* copy in the compressed data, and free the segments */
2327 0 : dlist_foreach_modify(iter, &segments)
2328 : {
2329 0 : GinSegmentInfo *seginfo = dlist_container(GinSegmentInfo, node, iter.cur);
2330 :
2331 0 : memcpy(ptr, seginfo->seg, SizeOfGinPostingList(seginfo->seg));
2332 :
2333 0 : ptr += SizeOfGinPostingList(seginfo->seg);
2334 :
2335 0 : dlist_delete(&seginfo->node);
2336 :
2337 0 : pfree(seginfo->seg);
2338 0 : pfree(seginfo);
2339 : }
2340 :
2341 0 : return tuple;
2342 : }
2343 :
2344 : /*
2345 : * _gin_parse_tuple_key
2346 : * Return a Datum representing the key stored in the tuple.
2347 : *
2348 : * Most of the tuple fields are directly accessible, the only thing that
2349 : * needs more care is the key and the TID list.
2350 : *
2351 : * For the key, this returns a regular Datum representing it. It's either the
2352 : * actual key value, or a pointer to the beginning of the data array (which is
2353 : * where the data was copied by _gin_build_tuple).
2354 : */
2355 : static Datum
2356 0 : _gin_parse_tuple_key(GinTuple *a)
2357 : {
2358 : Datum key;
2359 :
2360 0 : if (a->category != GIN_CAT_NORM_KEY)
2361 0 : return (Datum) 0;
2362 :
2363 0 : if (a->typbyval)
2364 : {
2365 0 : memcpy(&key, a->data, a->keylen);
2366 0 : return key;
2367 : }
2368 :
2369 0 : return PointerGetDatum(a->data);
2370 : }
2371 :
2372 : /*
2373 : * _gin_parse_tuple_items
2374 : * Return a pointer to a palloc'd array of decompressed TID array.
2375 : */
2376 : static ItemPointer
2377 0 : _gin_parse_tuple_items(GinTuple *a)
2378 : {
2379 : int len;
2380 : char *ptr;
2381 : int ndecoded;
2382 : ItemPointer items;
2383 :
2384 0 : len = a->tuplen - SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
2385 0 : ptr = (char *) a + SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
2386 :
2387 0 : items = ginPostingListDecodeAllSegments((GinPostingList *) ptr, len, &ndecoded);
2388 :
2389 : Assert(ndecoded == a->nitems);
2390 :
2391 0 : return (ItemPointer) items;
2392 : }
2393 :
2394 : /*
2395 : * _gin_compare_tuples
2396 : * Compare GIN tuples, used by tuplesort during parallel index build.
2397 : *
2398 : * The scalar fields (attrnum, category) are compared first, the key value is
2399 : * compared last. The comparisons are done using type-specific sort support
2400 : * functions.
2401 : *
2402 : * If the key value matches, we compare the first TID value in the TID list,
2403 : * which means the tuples are merged in an order in which they are most
2404 : * likely to be simply concatenated. (This "first" TID will also allow us
2405 : * to determine a point up to which the list is fully determined and can be
2406 : * written into the index to enforce a memory limit etc.)
2407 : */
2408 : int
2409 0 : _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)
2410 : {
2411 : int r;
2412 : Datum keya,
2413 : keyb;
2414 :
2415 0 : if (a->attrnum < b->attrnum)
2416 0 : return -1;
2417 :
2418 0 : if (a->attrnum > b->attrnum)
2419 0 : return 1;
2420 :
2421 0 : if (a->category < b->category)
2422 0 : return -1;
2423 :
2424 0 : if (a->category > b->category)
2425 0 : return 1;
2426 :
2427 0 : if (a->category == GIN_CAT_NORM_KEY)
2428 : {
2429 0 : keya = _gin_parse_tuple_key(a);
2430 0 : keyb = _gin_parse_tuple_key(b);
2431 :
2432 0 : r = ApplySortComparator(keya, false,
2433 : keyb, false,
2434 0 : &ssup[a->attrnum - 1]);
2435 :
2436 : /* if the key is the same, consider the first TID in the array */
2437 0 : return (r != 0) ? r : ItemPointerCompare(GinTupleGetFirst(a),
2438 : GinTupleGetFirst(b));
2439 : }
2440 :
2441 0 : return ItemPointerCompare(GinTupleGetFirst(a),
2442 : GinTupleGetFirst(b));
2443 : }
|