Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gininsert.c
4 : * insert routines for the postgres inverted index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/gininsert.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gin_private.h"
18 : #include "access/tableam.h"
19 : #include "access/xloginsert.h"
20 : #include "miscadmin.h"
21 : #include "nodes/execnodes.h"
22 : #include "storage/bufmgr.h"
23 : #include "storage/predicate.h"
24 : #include "utils/memutils.h"
25 : #include "utils/rel.h"
26 :
27 : typedef struct
28 : {
29 : GinState ginstate;
30 : double indtuples;
31 : GinStatsData buildStats;
32 : MemoryContext tmpCtx;
33 : MemoryContext funcCtx;
34 : BuildAccumulator accum;
35 : } GinBuildState;
36 :
37 :
38 : /*
39 : * Adds array of item pointers to tuple's posting list, or
40 : * creates posting tree and tuple pointing to tree in case
41 : * of not enough space. Max size of tuple is defined in
42 : * GinFormTuple(). Returns a new, modified index tuple.
43 : * items[] must be in sorted order with no duplicates.
44 : */
45 : static IndexTuple
46 32674 : addItemPointersToLeafTuple(GinState *ginstate,
47 : IndexTuple old,
48 : ItemPointerData *items, uint32 nitem,
49 : GinStatsData *buildStats, Buffer buffer)
50 : {
51 : OffsetNumber attnum;
52 : Datum key;
53 : GinNullCategory category;
54 : IndexTuple res;
55 : ItemPointerData *newItems,
56 : *oldItems;
57 : int oldNPosting,
58 : newNPosting;
59 : GinPostingList *compressedList;
60 :
61 : Assert(!GinIsPostingTree(old));
62 :
63 32674 : attnum = gintuple_get_attrnum(ginstate, old);
64 32674 : key = gintuple_get_key(ginstate, old, &category);
65 :
66 : /* merge the old and new posting lists */
67 32674 : oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting);
68 :
69 32674 : newItems = ginMergeItemPointers(items, nitem,
70 : oldItems, oldNPosting,
71 : &newNPosting);
72 :
73 : /* Compress the posting list, and try to a build tuple with room for it */
74 32674 : res = NULL;
75 32674 : compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize,
76 : NULL);
77 32674 : pfree(newItems);
78 32674 : if (compressedList)
79 : {
80 32674 : res = GinFormTuple(ginstate, attnum, key, category,
81 : (char *) compressedList,
82 32674 : SizeOfGinPostingList(compressedList),
83 : newNPosting,
84 : false);
85 32674 : pfree(compressedList);
86 : }
87 32674 : if (!res)
88 : {
89 : /* posting list would be too big, convert to posting tree */
90 : BlockNumber postingRoot;
91 :
92 : /*
93 : * Initialize posting tree with the old tuple's posting list. It's
94 : * surely small enough to fit on one posting-tree page, and should
95 : * already be in order with no duplicates.
96 : */
97 16 : postingRoot = createPostingTree(ginstate->index,
98 : oldItems,
99 : oldNPosting,
100 : buildStats,
101 : buffer);
102 :
103 : /* Now insert the TIDs-to-be-added into the posting tree */
104 16 : ginInsertItemPointers(ginstate->index, postingRoot,
105 : items, nitem,
106 : buildStats);
107 :
108 : /* And build a new posting-tree-only result tuple */
109 16 : res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
110 16 : GinSetPostingTree(res, postingRoot);
111 : }
112 32674 : pfree(oldItems);
113 :
114 32674 : return res;
115 : }
116 :
117 : /*
118 : * Build a fresh leaf tuple, either posting-list or posting-tree format
119 : * depending on whether the given items list will fit.
120 : * items[] must be in sorted order with no duplicates.
121 : *
122 : * This is basically the same logic as in addItemPointersToLeafTuple,
123 : * but working from slightly different input.
124 : */
125 : static IndexTuple
126 689416 : buildFreshLeafTuple(GinState *ginstate,
127 : OffsetNumber attnum, Datum key, GinNullCategory category,
128 : ItemPointerData *items, uint32 nitem,
129 : GinStatsData *buildStats, Buffer buffer)
130 : {
131 689416 : IndexTuple res = NULL;
132 : GinPostingList *compressedList;
133 :
134 : /* try to build a posting list tuple with all the items */
135 689416 : compressedList = ginCompressPostingList(items, nitem, GinMaxItemSize, NULL);
136 689416 : if (compressedList)
137 : {
138 689416 : res = GinFormTuple(ginstate, attnum, key, category,
139 : (char *) compressedList,
140 689416 : SizeOfGinPostingList(compressedList),
141 : nitem, false);
142 689416 : pfree(compressedList);
143 : }
144 689416 : if (!res)
145 : {
146 : /* posting list would be too big, build posting tree */
147 : BlockNumber postingRoot;
148 :
149 : /*
150 : * Build posting-tree-only result tuple. We do this first so as to
151 : * fail quickly if the key is too big.
152 : */
153 100 : res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
154 :
155 : /*
156 : * Initialize a new posting tree with the TIDs.
157 : */
158 100 : postingRoot = createPostingTree(ginstate->index, items, nitem,
159 : buildStats, buffer);
160 :
161 : /* And save the root link in the result tuple */
162 100 : GinSetPostingTree(res, postingRoot);
163 : }
164 :
165 689416 : return res;
166 : }
167 :
168 : /*
169 : * Insert one or more heap TIDs associated with the given key value.
170 : * This will either add a single key entry, or enlarge a pre-existing entry.
171 : *
172 : * During an index build, buildStats is non-null and the counters
173 : * it contains should be incremented as needed.
174 : */
175 : void
176 771494 : ginEntryInsert(GinState *ginstate,
177 : OffsetNumber attnum, Datum key, GinNullCategory category,
178 : ItemPointerData *items, uint32 nitem,
179 : GinStatsData *buildStats)
180 : {
181 : GinBtreeData btree;
182 : GinBtreeEntryInsertData insertdata;
183 : GinBtreeStack *stack;
184 : IndexTuple itup;
185 : Page page;
186 :
187 771494 : insertdata.isDelete = false;
188 :
189 771494 : ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
190 771494 : btree.isBuild = (buildStats != NULL);
191 :
192 771494 : stack = ginFindLeafPage(&btree, false, false);
193 771494 : page = BufferGetPage(stack->buffer);
194 :
195 771494 : if (btree.findItem(&btree, stack))
196 : {
197 : /* found pre-existing entry */
198 82074 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
199 :
200 82074 : if (GinIsPostingTree(itup))
201 : {
202 : /* add entries to existing posting tree */
203 49392 : BlockNumber rootPostingTree = GinGetPostingTree(itup);
204 :
205 : /* release all stack */
206 49392 : LockBuffer(stack->buffer, GIN_UNLOCK);
207 49392 : freeGinBtreeStack(stack);
208 :
209 : /* insert into posting tree */
210 49392 : ginInsertItemPointers(ginstate->index, rootPostingTree,
211 : items, nitem,
212 : buildStats);
213 49388 : return;
214 : }
215 :
216 32682 : CheckForSerializableConflictIn(ginstate->index, NULL,
217 : BufferGetBlockNumber(stack->buffer));
218 : /* modify an existing leaf entry */
219 32674 : itup = addItemPointersToLeafTuple(ginstate, itup,
220 : items, nitem, buildStats, stack->buffer);
221 :
222 32674 : insertdata.isDelete = true;
223 : }
224 : else
225 : {
226 689420 : CheckForSerializableConflictIn(ginstate->index, NULL,
227 : BufferGetBlockNumber(stack->buffer));
228 : /* no match, so construct a new leaf entry */
229 689416 : itup = buildFreshLeafTuple(ginstate, attnum, key, category,
230 : items, nitem, buildStats, stack->buffer);
231 :
232 : /*
233 : * nEntries counts leaf tuples, so increment it only when we make a
234 : * new one.
235 : */
236 689416 : if (buildStats)
237 149146 : buildStats->nEntries++;
238 : }
239 :
240 : /* Insert the new or modified leaf tuple */
241 722090 : insertdata.entry = itup;
242 722090 : ginInsertValue(&btree, stack, &insertdata, buildStats);
243 722086 : pfree(itup);
244 : }
245 :
246 : /*
247 : * Extract index entries for a single indexable item, and add them to the
248 : * BuildAccumulator's state.
249 : *
250 : * This function is used only during initial index creation.
251 : */
252 : static void
253 869230 : ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum,
254 : Datum value, bool isNull,
255 : ItemPointer heapptr)
256 : {
257 : Datum *entries;
258 : GinNullCategory *categories;
259 : int32 nentries;
260 : MemoryContext oldCtx;
261 :
262 869230 : oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
263 869230 : entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
264 : value, isNull,
265 : &nentries, &categories);
266 869230 : MemoryContextSwitchTo(oldCtx);
267 :
268 869230 : ginInsertBAEntries(&buildstate->accum, heapptr, attnum,
269 : entries, categories, nentries);
270 :
271 869230 : buildstate->indtuples += nentries;
272 :
273 869230 : MemoryContextReset(buildstate->funcCtx);
274 869230 : }
275 :
276 : static void
277 868612 : ginBuildCallback(Relation index, ItemPointer tid, Datum *values,
278 : bool *isnull, bool tupleIsAlive, void *state)
279 : {
280 868612 : GinBuildState *buildstate = (GinBuildState *) state;
281 : MemoryContext oldCtx;
282 : int i;
283 :
284 868612 : oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
285 :
286 1737842 : for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
287 869230 : ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
288 869230 : values[i], isnull[i], tid);
289 :
290 : /* If we've maxed out our available memory, dump everything to the index */
291 868612 : if (buildstate->accum.allocatedMemory >= (Size) maintenance_work_mem * 1024L)
292 : {
293 : ItemPointerData *list;
294 : Datum key;
295 : GinNullCategory category;
296 : uint32 nlist;
297 : OffsetNumber attnum;
298 :
299 0 : ginBeginBAScan(&buildstate->accum);
300 0 : while ((list = ginGetBAEntry(&buildstate->accum,
301 : &attnum, &key, &category, &nlist)) != NULL)
302 : {
303 : /* there could be many entries, so be willing to abort here */
304 0 : CHECK_FOR_INTERRUPTS();
305 0 : ginEntryInsert(&buildstate->ginstate, attnum, key, category,
306 : list, nlist, &buildstate->buildStats);
307 : }
308 :
309 0 : MemoryContextReset(buildstate->tmpCtx);
310 0 : ginInitBA(&buildstate->accum);
311 : }
312 :
313 868612 : MemoryContextSwitchTo(oldCtx);
314 868612 : }
315 :
316 : IndexBuildResult *
317 314 : ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
318 : {
319 : IndexBuildResult *result;
320 : double reltuples;
321 : GinBuildState buildstate;
322 : Buffer RootBuffer,
323 : MetaBuffer;
324 : ItemPointerData *list;
325 : Datum key;
326 : GinNullCategory category;
327 : uint32 nlist;
328 : MemoryContext oldCtx;
329 : OffsetNumber attnum;
330 :
331 314 : if (RelationGetNumberOfBlocks(index) != 0)
332 0 : elog(ERROR, "index \"%s\" already contains data",
333 : RelationGetRelationName(index));
334 :
335 314 : initGinState(&buildstate.ginstate, index);
336 314 : buildstate.indtuples = 0;
337 314 : memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
338 :
339 : /* initialize the meta page */
340 314 : MetaBuffer = GinNewBuffer(index);
341 :
342 : /* initialize the root page */
343 314 : RootBuffer = GinNewBuffer(index);
344 :
345 314 : START_CRIT_SECTION();
346 314 : GinInitMetabuffer(MetaBuffer);
347 314 : MarkBufferDirty(MetaBuffer);
348 314 : GinInitBuffer(RootBuffer, GIN_LEAF);
349 314 : MarkBufferDirty(RootBuffer);
350 :
351 :
352 314 : UnlockReleaseBuffer(MetaBuffer);
353 314 : UnlockReleaseBuffer(RootBuffer);
354 314 : END_CRIT_SECTION();
355 :
356 : /* count the root as first entry page */
357 314 : buildstate.buildStats.nEntryPages++;
358 :
359 : /*
360 : * create a temporary memory context that is used to hold data not yet
361 : * dumped out to the index
362 : */
363 314 : buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
364 : "Gin build temporary context",
365 : ALLOCSET_DEFAULT_SIZES);
366 :
367 : /*
368 : * create a temporary memory context that is used for calling
369 : * ginExtractEntries(), and can be reset after each tuple
370 : */
371 314 : buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
372 : "Gin build temporary context for user-defined function",
373 : ALLOCSET_DEFAULT_SIZES);
374 :
375 314 : buildstate.accum.ginstate = &buildstate.ginstate;
376 314 : ginInitBA(&buildstate.accum);
377 :
378 : /*
379 : * Do the heap scan. We disallow sync scan here because dataPlaceToPage
380 : * prefers to receive tuples in TID order.
381 : */
382 314 : reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
383 : ginBuildCallback, (void *) &buildstate,
384 : NULL);
385 :
386 : /* dump remaining entries to the index */
387 314 : oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
388 314 : ginBeginBAScan(&buildstate.accum);
389 149460 : while ((list = ginGetBAEntry(&buildstate.accum,
390 : &attnum, &key, &category, &nlist)) != NULL)
391 : {
392 : /* there could be many entries, so be willing to abort here */
393 149146 : CHECK_FOR_INTERRUPTS();
394 149146 : ginEntryInsert(&buildstate.ginstate, attnum, key, category,
395 : list, nlist, &buildstate.buildStats);
396 : }
397 314 : MemoryContextSwitchTo(oldCtx);
398 :
399 314 : MemoryContextDelete(buildstate.funcCtx);
400 314 : MemoryContextDelete(buildstate.tmpCtx);
401 :
402 : /*
403 : * Update metapage stats
404 : */
405 314 : buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
406 314 : ginUpdateStats(index, &buildstate.buildStats, true);
407 :
408 : /*
409 : * We didn't write WAL records as we built the index, so if WAL-logging is
410 : * required, write all pages to the WAL now.
411 : */
412 314 : if (RelationNeedsWAL(index))
413 : {
414 190 : log_newpage_range(index, MAIN_FORKNUM,
415 : 0, RelationGetNumberOfBlocks(index),
416 : true);
417 : }
418 :
419 : /*
420 : * Return statistics
421 : */
422 314 : result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
423 :
424 314 : result->heap_tuples = reltuples;
425 314 : result->index_tuples = buildstate.indtuples;
426 :
427 314 : return result;
428 : }
429 :
430 : /*
431 : * ginbuildempty() -- build an empty gin index in the initialization fork
432 : */
433 : void
434 6 : ginbuildempty(Relation index)
435 : {
436 : Buffer RootBuffer,
437 : MetaBuffer;
438 :
439 : /* An empty GIN index has two pages. */
440 6 : MetaBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
441 : EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
442 6 : RootBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
443 : EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
444 :
445 : /* Initialize and xlog metabuffer and root buffer. */
446 6 : START_CRIT_SECTION();
447 6 : GinInitMetabuffer(MetaBuffer);
448 6 : MarkBufferDirty(MetaBuffer);
449 6 : log_newpage_buffer(MetaBuffer, true);
450 6 : GinInitBuffer(RootBuffer, GIN_LEAF);
451 6 : MarkBufferDirty(RootBuffer);
452 6 : log_newpage_buffer(RootBuffer, false);
453 6 : END_CRIT_SECTION();
454 :
455 : /* Unlock and release the buffers. */
456 6 : UnlockReleaseBuffer(MetaBuffer);
457 6 : UnlockReleaseBuffer(RootBuffer);
458 6 : }
459 :
460 : /*
461 : * Insert index entries for a single indexable item during "normal"
462 : * (non-fast-update) insertion
463 : */
464 : static void
465 33896 : ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
466 : Datum value, bool isNull,
467 : ItemPointer item)
468 : {
469 : Datum *entries;
470 : GinNullCategory *categories;
471 : int32 i,
472 : nentries;
473 :
474 33896 : entries = ginExtractEntries(ginstate, attnum, value, isNull,
475 : &nentries, &categories);
476 :
477 290142 : for (i = 0; i < nentries; i++)
478 256266 : ginEntryInsert(ginstate, attnum, entries[i], categories[i],
479 : item, 1, NULL);
480 33876 : }
481 :
482 : bool
483 298686 : gininsert(Relation index, Datum *values, bool *isnull,
484 : ItemPointer ht_ctid, Relation heapRel,
485 : IndexUniqueCheck checkUnique,
486 : bool indexUnchanged,
487 : IndexInfo *indexInfo)
488 : {
489 298686 : GinState *ginstate = (GinState *) indexInfo->ii_AmCache;
490 : MemoryContext oldCtx;
491 : MemoryContext insertCtx;
492 : int i;
493 :
494 : /* Initialize GinState cache if first call in this statement */
495 298686 : if (ginstate == NULL)
496 : {
497 2006 : oldCtx = MemoryContextSwitchTo(indexInfo->ii_Context);
498 2006 : ginstate = (GinState *) palloc(sizeof(GinState));
499 2006 : initGinState(ginstate, index);
500 2006 : indexInfo->ii_AmCache = (void *) ginstate;
501 2006 : MemoryContextSwitchTo(oldCtx);
502 : }
503 :
504 298686 : insertCtx = AllocSetContextCreate(CurrentMemoryContext,
505 : "Gin insert temporary context",
506 : ALLOCSET_DEFAULT_SIZES);
507 :
508 298686 : oldCtx = MemoryContextSwitchTo(insertCtx);
509 :
510 298686 : if (GinGetUseFastUpdate(index))
511 264784 : {
512 : GinTupleCollector collector;
513 :
514 264790 : memset(&collector, 0, sizeof(GinTupleCollector));
515 :
516 649658 : for (i = 0; i < ginstate->origTupdesc->natts; i++)
517 384868 : ginHeapTupleFastCollect(ginstate, &collector,
518 384868 : (OffsetNumber) (i + 1),
519 384868 : values[i], isnull[i],
520 : ht_ctid);
521 :
522 264790 : ginHeapTupleFastInsert(ginstate, &collector);
523 : }
524 : else
525 : {
526 67772 : for (i = 0; i < ginstate->origTupdesc->natts; i++)
527 33896 : ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
528 33896 : values[i], isnull[i],
529 : ht_ctid);
530 : }
531 :
532 298660 : MemoryContextSwitchTo(oldCtx);
533 298660 : MemoryContextDelete(insertCtx);
534 :
535 298660 : return false;
536 : }
|