Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gindatapage.c
4 : * routines for handling GIN posting tree pages.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/gindatapage.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gin_private.h"
18 : #include "access/ginxlog.h"
19 : #include "access/xloginsert.h"
20 : #include "lib/ilist.h"
21 : #include "miscadmin.h"
22 : #include "storage/predicate.h"
23 : #include "utils/rel.h"
24 :
25 : /*
26 : * Min, Max and Target size of posting lists stored on leaf pages, in bytes.
27 : *
28 : * The code can deal with any size, but random access is more efficient when
29 : * a number of smaller lists are stored, rather than one big list. If a
30 : * posting list would become larger than Max size as a result of insertions,
31 : * it is split into two. If a posting list would be smaller than minimum
32 : * size, it is merged with the next posting list.
33 : */
34 : #define GinPostingListSegmentMaxSize 384
35 : #define GinPostingListSegmentTargetSize 256
36 : #define GinPostingListSegmentMinSize 128
37 :
38 : /*
39 : * At least this many items fit in a GinPostingListSegmentMaxSize-bytes
40 : * long segment. This is used when estimating how much space is required
41 : * for N items, at minimum.
42 : */
43 : #define MinTuplesPerSegment ((GinPostingListSegmentMaxSize - 2) / 6)
44 :
45 : /*
46 : * A working struct for manipulating a posting tree leaf page.
47 : */
48 : typedef struct
49 : {
50 : dlist_head segments; /* a list of leafSegmentInfos */
51 :
52 : /*
53 : * The following fields represent how the segments are split across pages,
54 : * if a page split is required. Filled in by leafRepackItems.
55 : */
56 : dlist_node *lastleft; /* last segment on left page */
57 : int lsize; /* total size on left page */
58 : int rsize; /* total size on right page */
59 :
60 : bool oldformat; /* page is in pre-9.4 format on disk */
61 :
62 : /*
63 : * If we need WAL data representing the reconstructed leaf page, it's
64 : * stored here by computeLeafRecompressWALData.
65 : */
66 : char *walinfo; /* buffer start */
67 : int walinfolen; /* and length */
68 : } disassembledLeaf;
69 :
70 : typedef struct
71 : {
72 : dlist_node node; /* linked list pointers */
73 :
74 : /*-------------
75 : * 'action' indicates the status of this in-memory segment, compared to
76 : * what's on disk. It is one of the GIN_SEGMENT_* action codes:
77 : *
78 : * UNMODIFIED no changes
79 : * DELETE the segment is to be removed. 'seg' and 'items' are
80 : * ignored
81 : * INSERT this is a completely new segment
82 : * REPLACE this replaces an existing segment with new content
83 : * ADDITEMS like REPLACE, but no items have been removed, and we track
84 : * in detail what items have been added to this segment, in
85 : * 'modifieditems'
86 : *-------------
87 : */
88 : char action;
89 :
90 : ItemPointerData *modifieditems;
91 : uint16 nmodifieditems;
92 :
93 : /*
94 : * The following fields represent the items in this segment. If 'items' is
95 : * not NULL, it contains a palloc'd array of the items in this segment. If
96 : * 'seg' is not NULL, it contains the items in an already-compressed
97 : * format. It can point to an on-disk page (!modified), or a palloc'd
98 : * segment in memory. If both are set, they must represent the same items.
99 : */
100 : GinPostingList *seg;
101 : ItemPointer items;
102 : int nitems; /* # of items in 'items', if items != NULL */
103 : } leafSegmentInfo;
104 :
105 : static ItemPointer dataLeafPageGetUncompressed(Page page, int *nitems);
106 : static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
107 : GinBtreeStack *stack,
108 : void *insertdata, BlockNumber updateblkno,
109 : Page *newlpage, Page *newrpage);
110 :
111 : static disassembledLeaf *disassembleLeaf(Page page);
112 : static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
113 : static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
114 : int nNewItems);
115 :
116 : static void computeLeafRecompressWALData(disassembledLeaf *leaf);
117 : static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
118 : static void dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
119 : ItemPointerData lbound, ItemPointerData rbound,
120 : Page lpage, Page rpage);
121 :
122 : /*
123 : * Read TIDs from leaf data page to single uncompressed array. The TIDs are
124 : * returned in ascending order.
125 : *
126 : * advancePast is a hint, indicating that the caller is only interested in
127 : * TIDs > advancePast. To return all items, use ItemPointerSetMin.
128 : *
129 : * Note: This function can still return items smaller than advancePast that
130 : * are in the same posting list as the items of interest, so the caller must
131 : * still check all the returned items. But passing it allows this function to
132 : * skip whole posting lists.
133 : */
134 : ItemPointer
135 164 : GinDataLeafPageGetItems(Page page, int *nitems, ItemPointerData advancePast)
136 : {
137 : ItemPointer result;
138 :
139 164 : if (GinPageIsCompressed(page))
140 : {
141 164 : GinPostingList *seg = GinDataLeafPageGetPostingList(page);
142 164 : Size len = GinDataLeafPageGetPostingListSize(page);
143 164 : Pointer endptr = ((Pointer) seg) + len;
144 : GinPostingList *next;
145 :
146 : /* Skip to the segment containing advancePast+1 */
147 164 : if (ItemPointerIsValid(&advancePast))
148 : {
149 94 : next = GinNextPostingListSegment(seg);
150 178 : while ((Pointer) next < endptr &&
151 78 : ginCompareItemPointers(&next->first, &advancePast) <= 0)
152 : {
153 6 : seg = next;
154 6 : next = GinNextPostingListSegment(seg);
155 : }
156 94 : len = endptr - (Pointer) seg;
157 : }
158 :
159 164 : if (len > 0)
160 152 : result = ginPostingListDecodeAllSegments(seg, len, nitems);
161 : else
162 : {
163 12 : result = NULL;
164 12 : *nitems = 0;
165 : }
166 : }
167 : else
168 : {
169 0 : ItemPointer tmp = dataLeafPageGetUncompressed(page, nitems);
170 :
171 0 : result = palloc((*nitems) * sizeof(ItemPointerData));
172 0 : memcpy(result, tmp, (*nitems) * sizeof(ItemPointerData));
173 : }
174 :
175 164 : return result;
176 : }
177 :
178 : /*
179 : * Places all TIDs from leaf data page to bitmap.
180 : */
181 : int
182 30 : GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm)
183 : {
184 : ItemPointer uncompressed;
185 : int nitems;
186 :
187 30 : if (GinPageIsCompressed(page))
188 : {
189 30 : GinPostingList *segment = GinDataLeafPageGetPostingList(page);
190 30 : Size len = GinDataLeafPageGetPostingListSize(page);
191 :
192 30 : nitems = ginPostingListDecodeAllSegmentsToTbm(segment, len, tbm);
193 : }
194 : else
195 : {
196 0 : uncompressed = dataLeafPageGetUncompressed(page, &nitems);
197 :
198 0 : if (nitems > 0)
199 0 : tbm_add_tuples(tbm, uncompressed, nitems, false);
200 : }
201 :
202 30 : return nitems;
203 : }
204 :
205 : /*
206 : * Get pointer to the uncompressed array of items on a pre-9.4 format
207 : * uncompressed leaf page. The number of items in the array is returned in
208 : * *nitems.
209 : */
210 : static ItemPointer
211 0 : dataLeafPageGetUncompressed(Page page, int *nitems)
212 : {
213 : ItemPointer items;
214 :
215 : Assert(!GinPageIsCompressed(page));
216 :
217 : /*
218 : * In the old pre-9.4 page format, the whole page content is used for
219 : * uncompressed items, and the number of items is stored in 'maxoff'
220 : */
221 0 : items = (ItemPointer) GinDataPageGetData(page);
222 0 : *nitems = GinPageGetOpaque(page)->maxoff;
223 :
224 0 : return items;
225 : }
226 :
227 : /*
228 : * Check if we should follow the right link to find the item we're searching
229 : * for.
230 : *
231 : * Compares inserting item pointer with the right bound of the current page.
232 : */
233 : static bool
234 26616 : dataIsMoveRight(GinBtree btree, Page page)
235 : {
236 26616 : ItemPointer iptr = GinDataPageGetRightBound(page);
237 :
238 26616 : if (GinPageRightMost(page))
239 26616 : return false;
240 :
241 0 : if (GinPageIsDeleted(page))
242 0 : return true;
243 :
244 0 : return (ginCompareItemPointers(&btree->itemptr, iptr) > 0);
245 : }
246 :
247 : /*
248 : * Find correct PostingItem in non-leaf page. It is assumed that this is
249 : * the correct page, and the searched value SHOULD be on the page.
250 : */
251 : static BlockNumber
252 26692 : dataLocateItem(GinBtree btree, GinBtreeStack *stack)
253 : {
254 : OffsetNumber low,
255 : high,
256 : maxoff;
257 26692 : PostingItem *pitem = NULL;
258 : int result;
259 26692 : Page page = BufferGetPage(stack->buffer);
260 :
261 : Assert(!GinPageIsLeaf(page));
262 : Assert(GinPageIsData(page));
263 :
264 26692 : if (btree->fullScan)
265 : {
266 76 : stack->off = FirstOffsetNumber;
267 76 : stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
268 76 : return btree->getLeftMostChild(btree, page);
269 : }
270 :
271 26616 : low = FirstOffsetNumber;
272 26616 : maxoff = high = GinPageGetOpaque(page)->maxoff;
273 : Assert(high >= low);
274 :
275 26616 : high++;
276 :
277 79848 : while (high > low)
278 : {
279 53232 : OffsetNumber mid = low + ((high - low) / 2);
280 :
281 53232 : pitem = GinDataPageGetPostingItem(page, mid);
282 :
283 53232 : if (mid == maxoff)
284 : {
285 : /*
286 : * Right infinity, page already correctly chosen with a help of
287 : * dataIsMoveRight
288 : */
289 26616 : result = -1;
290 : }
291 : else
292 : {
293 26616 : pitem = GinDataPageGetPostingItem(page, mid);
294 26616 : result = ginCompareItemPointers(&btree->itemptr, &(pitem->key));
295 : }
296 :
297 53232 : if (result == 0)
298 : {
299 0 : stack->off = mid;
300 0 : return PostingItemGetBlockNumber(pitem);
301 : }
302 53232 : else if (result > 0)
303 26616 : low = mid + 1;
304 : else
305 26616 : high = mid;
306 : }
307 :
308 : Assert(high >= FirstOffsetNumber && high <= maxoff);
309 :
310 26616 : stack->off = high;
311 26616 : pitem = GinDataPageGetPostingItem(page, high);
312 26616 : return PostingItemGetBlockNumber(pitem);
313 : }
314 :
315 : /*
316 : * Find link to blkno on non-leaf page, returns offset of PostingItem
317 : */
318 : static OffsetNumber
319 46 : dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
320 : {
321 : OffsetNumber i,
322 46 : maxoff = GinPageGetOpaque(page)->maxoff;
323 : PostingItem *pitem;
324 :
325 : Assert(!GinPageIsLeaf(page));
326 : Assert(GinPageIsData(page));
327 :
328 : /* if page isn't changed, we return storedOff */
329 46 : if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
330 : {
331 46 : pitem = GinDataPageGetPostingItem(page, storedOff);
332 46 : if (PostingItemGetBlockNumber(pitem) == blkno)
333 46 : return storedOff;
334 :
335 : /*
336 : * we hope, that needed pointer goes to right. It's true if there
337 : * wasn't a deletion
338 : */
339 0 : for (i = storedOff + 1; i <= maxoff; i++)
340 : {
341 0 : pitem = GinDataPageGetPostingItem(page, i);
342 0 : if (PostingItemGetBlockNumber(pitem) == blkno)
343 0 : return i;
344 : }
345 :
346 0 : maxoff = storedOff - 1;
347 : }
348 :
349 : /* last chance */
350 0 : for (i = FirstOffsetNumber; i <= maxoff; i++)
351 : {
352 0 : pitem = GinDataPageGetPostingItem(page, i);
353 0 : if (PostingItemGetBlockNumber(pitem) == blkno)
354 0 : return i;
355 : }
356 :
357 0 : return InvalidOffsetNumber;
358 : }
359 :
360 : /*
361 : * Return blkno of leftmost child
362 : */
363 : static BlockNumber
364 76 : dataGetLeftMostPage(GinBtree btree, Page page)
365 : {
366 : PostingItem *pitem;
367 :
368 : Assert(!GinPageIsLeaf(page));
369 : Assert(GinPageIsData(page));
370 : Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
371 :
372 76 : pitem = GinDataPageGetPostingItem(page, FirstOffsetNumber);
373 76 : return PostingItemGetBlockNumber(pitem);
374 : }
375 :
376 : /*
377 : * Add PostingItem to a non-leaf page.
378 : */
379 : void
380 258 : GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
381 : {
382 258 : OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
383 : char *ptr;
384 :
385 : Assert(PostingItemGetBlockNumber(data) != InvalidBlockNumber);
386 : Assert(!GinPageIsLeaf(page));
387 :
388 258 : if (offset == InvalidOffsetNumber)
389 : {
390 208 : ptr = (char *) GinDataPageGetPostingItem(page, maxoff + 1);
391 : }
392 : else
393 : {
394 50 : ptr = (char *) GinDataPageGetPostingItem(page, offset);
395 50 : if (offset != maxoff + 1)
396 50 : memmove(ptr + sizeof(PostingItem),
397 : ptr,
398 50 : (maxoff - offset + 1) * sizeof(PostingItem));
399 : }
400 258 : memcpy(ptr, data, sizeof(PostingItem));
401 :
402 258 : maxoff++;
403 258 : GinPageGetOpaque(page)->maxoff = maxoff;
404 :
405 : /*
406 : * Also set pd_lower to the end of the posting items, to follow the
407 : * "standard" page layout, so that we can squeeze out the unused space
408 : * from full-page images.
409 : */
410 258 : GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
411 258 : }
412 :
413 : /*
414 : * Delete posting item from non-leaf page
415 : */
416 : void
417 12 : GinPageDeletePostingItem(Page page, OffsetNumber offset)
418 : {
419 12 : OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
420 :
421 : Assert(!GinPageIsLeaf(page));
422 : Assert(offset >= FirstOffsetNumber && offset <= maxoff);
423 :
424 12 : if (offset != maxoff)
425 12 : memmove(GinDataPageGetPostingItem(page, offset),
426 12 : GinDataPageGetPostingItem(page, offset + 1),
427 12 : sizeof(PostingItem) * (maxoff - offset));
428 :
429 12 : maxoff--;
430 12 : GinPageGetOpaque(page)->maxoff = maxoff;
431 :
432 12 : GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
433 12 : }
434 :
435 : /*
436 : * Prepare to insert data on a leaf data page.
437 : *
438 : * If it will fit, return GPTP_INSERT after doing whatever setup is needed
439 : * before we enter the insertion critical section. *ptp_workspace can be
440 : * set to pass information along to the execPlaceToPage function.
441 : *
442 : * If it won't fit, perform a page split and return two temporary page
443 : * images into *newlpage and *newrpage, with result GPTP_SPLIT.
444 : *
445 : * In neither case should the given page buffer be modified here.
446 : */
447 : static GinPlaceToPageRC
448 49556 : dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
449 : void *insertdata,
450 : void **ptp_workspace,
451 : Page *newlpage, Page *newrpage)
452 : {
453 49556 : GinBtreeDataLeafInsertData *items = insertdata;
454 49556 : ItemPointer newItems = &items->items[items->curitem];
455 49556 : int maxitems = items->nitem - items->curitem;
456 49556 : Page page = BufferGetPage(buf);
457 : int i;
458 : ItemPointerData rbound;
459 : ItemPointerData lbound;
460 : bool needsplit;
461 : bool append;
462 : int segsize;
463 : Size freespace;
464 : disassembledLeaf *leaf;
465 : leafSegmentInfo *lastleftinfo;
466 : ItemPointerData maxOldItem;
467 : ItemPointerData remaining;
468 :
469 49556 : rbound = *GinDataPageGetRightBound(page);
470 :
471 : /*
472 : * Count how many of the new items belong to this page.
473 : */
474 49556 : if (!GinPageRightMost(page))
475 : {
476 0 : for (i = 0; i < maxitems; i++)
477 : {
478 0 : if (ginCompareItemPointers(&newItems[i], &rbound) > 0)
479 : {
480 : /*
481 : * This needs to go to some other location in the tree. (The
482 : * caller should've chosen the insert location so that at
483 : * least the first item goes here.)
484 : */
485 : Assert(i > 0);
486 0 : break;
487 : }
488 : }
489 0 : maxitems = i;
490 : }
491 :
492 : /* Disassemble the data on the page */
493 49556 : leaf = disassembleLeaf(page);
494 :
495 : /*
496 : * Are we appending to the end of the page? IOW, are all the new items
497 : * larger than any of the existing items.
498 : */
499 49556 : if (!dlist_is_empty(&leaf->segments))
500 : {
501 49556 : lastleftinfo = dlist_container(leafSegmentInfo, node,
502 : dlist_tail_node(&leaf->segments));
503 49556 : if (!lastleftinfo->items)
504 49556 : lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
505 : &lastleftinfo->nitems);
506 49556 : maxOldItem = lastleftinfo->items[lastleftinfo->nitems - 1];
507 49556 : if (ginCompareItemPointers(&newItems[0], &maxOldItem) >= 0)
508 49556 : append = true;
509 : else
510 0 : append = false;
511 : }
512 : else
513 : {
514 0 : ItemPointerSetMin(&maxOldItem);
515 0 : append = true;
516 : }
517 :
518 : /*
519 : * If we're appending to the end of the page, we will append as many items
520 : * as we can fit (after splitting), and stop when the pages becomes full.
521 : * Otherwise we have to limit the number of new items to insert, because
522 : * once we start packing we can't just stop when we run out of space,
523 : * because we must make sure that all the old items still fit.
524 : */
525 49556 : if (GinPageIsCompressed(page))
526 49556 : freespace = GinDataLeafPageGetFreeSpace(page);
527 : else
528 0 : freespace = 0;
529 49556 : if (append)
530 : {
531 : /*
532 : * Even when appending, trying to append more items than will fit is
533 : * not completely free, because we will merge the new items and old
534 : * items into an array below. In the best case, every new item fits in
535 : * a single byte, and we can use all the free space on the old page as
536 : * well as the new page. For simplicity, ignore segment overhead etc.
537 : */
538 49556 : maxitems = Min(maxitems, freespace + GinDataPageMaxDataSize);
539 : }
540 : else
541 : {
542 : /*
543 : * Calculate a conservative estimate of how many new items we can fit
544 : * on the two pages after splitting.
545 : *
546 : * We can use any remaining free space on the old page to store full
547 : * segments, as well as the new page. Each full-sized segment can hold
548 : * at least MinTuplesPerSegment items
549 : */
550 : int nnewsegments;
551 :
552 0 : nnewsegments = freespace / GinPostingListSegmentMaxSize;
553 0 : nnewsegments += GinDataPageMaxDataSize / GinPostingListSegmentMaxSize;
554 0 : maxitems = Min(maxitems, nnewsegments * MinTuplesPerSegment);
555 : }
556 :
557 : /* Add the new items to the segment list */
558 49556 : if (!addItemsToLeaf(leaf, newItems, maxitems))
559 : {
560 : /* all items were duplicates, we have nothing to do */
561 0 : items->curitem += maxitems;
562 :
563 0 : return GPTP_NO_WORK;
564 : }
565 :
566 : /*
567 : * Pack the items back to compressed segments, ready for writing to disk.
568 : */
569 49556 : needsplit = leafRepackItems(leaf, &remaining);
570 :
571 : /*
572 : * Did all the new items fit?
573 : *
574 : * If we're appending, it's OK if they didn't. But as a sanity check,
575 : * verify that all the old items fit.
576 : */
577 49556 : if (ItemPointerIsValid(&remaining))
578 : {
579 40 : if (!append || ItemPointerCompare(&maxOldItem, &remaining) >= 0)
580 0 : elog(ERROR, "could not split GIN page; all old items didn't fit");
581 :
582 : /* Count how many of the new items did fit. */
583 304842 : for (i = 0; i < maxitems; i++)
584 : {
585 304842 : if (ginCompareItemPointers(&newItems[i], &remaining) >= 0)
586 40 : break;
587 : }
588 40 : if (i == 0)
589 0 : elog(ERROR, "could not split GIN page; no new items fit");
590 40 : maxitems = i;
591 : }
592 :
593 49556 : if (!needsplit)
594 : {
595 : /*
596 : * Great, all the items fit on a single page. If needed, prepare data
597 : * for a WAL record describing the changes we'll make.
598 : */
599 49406 : if (RelationNeedsWAL(btree->index) && !btree->isBuild)
600 49406 : computeLeafRecompressWALData(leaf);
601 :
602 : /*
603 : * We're ready to enter the critical section, but
604 : * dataExecPlaceToPageLeaf will need access to the "leaf" data.
605 : */
606 49406 : *ptp_workspace = leaf;
607 :
608 49406 : if (append)
609 49406 : elog(DEBUG2, "appended %d new items to block %u; %d bytes (%d to go)",
610 : maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize,
611 : items->nitem - items->curitem - maxitems);
612 : else
613 0 : elog(DEBUG2, "inserted %d new items to block %u; %d bytes (%d to go)",
614 : maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize,
615 : items->nitem - items->curitem - maxitems);
616 : }
617 : else
618 : {
619 : /*
620 : * Have to split.
621 : *
622 : * leafRepackItems already divided the segments between the left and
623 : * the right page. It filled the left page as full as possible, and
624 : * put the rest to the right page. When building a new index, that's
625 : * good, because the table is scanned from beginning to end and there
626 : * won't be any more insertions to the left page during the build.
627 : * This packs the index as tight as possible. But otherwise, split
628 : * 50/50, by moving segments from the left page to the right page
629 : * until they're balanced.
630 : *
631 : * As a further heuristic, when appending items to the end of the
632 : * page, try to make the left page 75% full, on the assumption that
633 : * subsequent insertions will probably also go to the end. This packs
634 : * the index somewhat tighter when appending to a table, which is very
635 : * common.
636 : */
637 150 : if (!btree->isBuild)
638 : {
639 260 : while (dlist_has_prev(&leaf->segments, leaf->lastleft))
640 : {
641 260 : lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
642 :
643 : /* ignore deleted segments */
644 260 : if (lastleftinfo->action != GIN_SEGMENT_DELETE)
645 : {
646 260 : segsize = SizeOfGinPostingList(lastleftinfo->seg);
647 :
648 : /*
649 : * Note that we check that the right page doesn't become
650 : * more full than the left page even when appending. It's
651 : * possible that we added enough items to make both pages
652 : * more than 75% full.
653 : */
654 260 : if ((leaf->lsize - segsize) - (leaf->rsize + segsize) < 0)
655 48 : break;
656 212 : if (append)
657 : {
658 212 : if ((leaf->lsize - segsize) < (BLCKSZ * 3) / 4)
659 10 : break;
660 : }
661 :
662 202 : leaf->lsize -= segsize;
663 202 : leaf->rsize += segsize;
664 : }
665 202 : leaf->lastleft = dlist_prev_node(&leaf->segments, leaf->lastleft);
666 : }
667 : }
668 : Assert(leaf->lsize <= GinDataPageMaxDataSize);
669 : Assert(leaf->rsize <= GinDataPageMaxDataSize);
670 :
671 : /*
672 : * Fetch the max item in the left page's last segment; it becomes the
673 : * right bound of the page.
674 : */
675 150 : lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
676 150 : if (!lastleftinfo->items)
677 150 : lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
678 : &lastleftinfo->nitems);
679 150 : lbound = lastleftinfo->items[lastleftinfo->nitems - 1];
680 :
681 : /*
682 : * Now allocate a couple of temporary page images, and fill them.
683 : */
684 150 : *newlpage = palloc(BLCKSZ);
685 150 : *newrpage = palloc(BLCKSZ);
686 :
687 150 : dataPlaceToPageLeafSplit(leaf, lbound, rbound,
688 : *newlpage, *newrpage);
689 :
690 : Assert(GinPageRightMost(page) ||
691 : ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
692 : GinDataPageGetRightBound(*newrpage)) < 0);
693 :
694 150 : if (append)
695 150 : elog(DEBUG2, "appended %d items to block %u; split %d/%d (%d to go)",
696 : maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize, (int) leaf->rsize,
697 : items->nitem - items->curitem - maxitems);
698 : else
699 0 : elog(DEBUG2, "inserted %d items to block %u; split %d/%d (%d to go)",
700 : maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize, (int) leaf->rsize,
701 : items->nitem - items->curitem - maxitems);
702 : }
703 :
704 49556 : items->curitem += maxitems;
705 :
706 49556 : return needsplit ? GPTP_SPLIT : GPTP_INSERT;
707 : }
708 :
709 : /*
710 : * Perform data insertion after beginPlaceToPage has decided it will fit.
711 : *
712 : * This is invoked within a critical section, and XLOG record creation (if
713 : * needed) is already started. The target buffer is registered in slot 0.
714 : */
715 : static void
716 49406 : dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
717 : void *insertdata, void *ptp_workspace)
718 : {
719 49406 : disassembledLeaf *leaf = (disassembledLeaf *) ptp_workspace;
720 :
721 : /* Apply changes to page */
722 49406 : dataPlaceToPageLeafRecompress(buf, leaf);
723 :
724 49406 : MarkBufferDirty(buf);
725 :
726 : /* If needed, register WAL data built by computeLeafRecompressWALData */
727 49406 : if (RelationNeedsWAL(btree->index) && !btree->isBuild)
728 : {
729 49406 : XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
730 49406 : XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
731 : }
732 49406 : }
733 :
734 : /*
735 : * Vacuum a posting tree leaf page.
736 : */
737 : void
738 54 : ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
739 : {
740 54 : Page page = BufferGetPage(buffer);
741 : disassembledLeaf *leaf;
742 54 : bool removedsomething = false;
743 : dlist_iter iter;
744 :
745 54 : leaf = disassembleLeaf(page);
746 :
747 : /* Vacuum each segment. */
748 1062 : dlist_foreach(iter, &leaf->segments)
749 : {
750 1008 : leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
751 : int oldsegsize;
752 : ItemPointer cleaned;
753 : int ncleaned;
754 :
755 1008 : if (!seginfo->items)
756 1008 : seginfo->items = ginPostingListDecode(seginfo->seg,
757 : &seginfo->nitems);
758 1008 : if (seginfo->seg)
759 1008 : oldsegsize = SizeOfGinPostingList(seginfo->seg);
760 : else
761 0 : oldsegsize = GinDataPageMaxDataSize;
762 :
763 1008 : cleaned = ginVacuumItemPointers(gvs,
764 : seginfo->items,
765 : seginfo->nitems,
766 : &ncleaned);
767 1008 : pfree(seginfo->items);
768 1008 : seginfo->items = NULL;
769 1008 : seginfo->nitems = 0;
770 1008 : if (cleaned)
771 : {
772 936 : if (ncleaned > 0)
773 : {
774 : int npacked;
775 :
776 36 : seginfo->seg = ginCompressPostingList(cleaned,
777 : ncleaned,
778 : oldsegsize,
779 : &npacked);
780 : /* Removing an item never increases the size of the segment */
781 36 : if (npacked != ncleaned)
782 0 : elog(ERROR, "could not fit vacuumed posting list");
783 36 : seginfo->action = GIN_SEGMENT_REPLACE;
784 : }
785 : else
786 : {
787 900 : seginfo->seg = NULL;
788 900 : seginfo->items = NULL;
789 900 : seginfo->action = GIN_SEGMENT_DELETE;
790 : }
791 936 : seginfo->nitems = ncleaned;
792 :
793 936 : removedsomething = true;
794 : }
795 : }
796 :
797 : /*
798 : * If we removed any items, reconstruct the page from the pieces.
799 : *
800 : * We don't try to re-encode the segments here, even though some of them
801 : * might be really small now that we've removed some items from them. It
802 : * seems like a waste of effort, as there isn't really any benefit from
803 : * larger segments per se; larger segments only help to pack more items in
804 : * the same space. We might as well delay doing that until the next
805 : * insertion, which will need to re-encode at least part of the page
806 : * anyway.
807 : *
808 : * Also note if the page was in uncompressed, pre-9.4 format before, it is
809 : * now represented as one huge segment that contains all the items. It
810 : * might make sense to split that, to speed up random access, but we don't
811 : * bother. You'll have to REINDEX anyway if you want the full gain of the
812 : * new tighter index format.
813 : */
814 54 : if (removedsomething)
815 : {
816 : bool modified;
817 :
818 : /*
819 : * Make sure we have a palloc'd copy of all segments, after the first
820 : * segment that is modified. (dataPlaceToPageLeafRecompress requires
821 : * this).
822 : */
823 54 : modified = false;
824 1062 : dlist_foreach(iter, &leaf->segments)
825 : {
826 1008 : leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
827 : iter.cur);
828 :
829 1008 : if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
830 936 : modified = true;
831 1008 : if (modified && seginfo->action != GIN_SEGMENT_DELETE)
832 : {
833 108 : int segsize = SizeOfGinPostingList(seginfo->seg);
834 108 : GinPostingList *tmp = (GinPostingList *) palloc(segsize);
835 :
836 108 : memcpy(tmp, seginfo->seg, segsize);
837 108 : seginfo->seg = tmp;
838 : }
839 : }
840 :
841 54 : if (RelationNeedsWAL(indexrel))
842 18 : computeLeafRecompressWALData(leaf);
843 :
844 : /* Apply changes to page */
845 54 : START_CRIT_SECTION();
846 :
847 54 : dataPlaceToPageLeafRecompress(buffer, leaf);
848 :
849 54 : MarkBufferDirty(buffer);
850 :
851 54 : if (RelationNeedsWAL(indexrel))
852 : {
853 : XLogRecPtr recptr;
854 :
855 18 : XLogBeginInsert();
856 18 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
857 18 : XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
858 18 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE);
859 18 : PageSetLSN(page, recptr);
860 : }
861 :
862 54 : END_CRIT_SECTION();
863 : }
864 54 : }
865 :
866 : /*
867 : * Construct a ginxlogRecompressDataLeaf record representing the changes
868 : * in *leaf. (Because this requires a palloc, we have to do it before
869 : * we enter the critical section that actually updates the page.)
870 : */
871 : static void
872 49424 : computeLeafRecompressWALData(disassembledLeaf *leaf)
873 : {
874 49424 : int nmodified = 0;
875 : char *walbufbegin;
876 : char *walbufend;
877 : dlist_iter iter;
878 : int segno;
879 : ginxlogRecompressDataLeaf *recompress_xlog;
880 :
881 : /* Count the modified segments */
882 892054 : dlist_foreach(iter, &leaf->segments)
883 : {
884 842630 : leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
885 : iter.cur);
886 :
887 842630 : if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
888 49568 : nmodified++;
889 : }
890 :
891 : walbufbegin =
892 49424 : palloc(sizeof(ginxlogRecompressDataLeaf) +
893 : BLCKSZ + /* max size needed to hold the segment data */
894 49424 : nmodified * 2 /* (segno + action) per action */
895 : );
896 49424 : walbufend = walbufbegin;
897 :
898 49424 : recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
899 49424 : walbufend += sizeof(ginxlogRecompressDataLeaf);
900 :
901 49424 : recompress_xlog->nactions = nmodified;
902 :
903 49424 : segno = 0;
904 892054 : dlist_foreach(iter, &leaf->segments)
905 : {
906 842630 : leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
907 : iter.cur);
908 842630 : int segsize = 0;
909 : int datalen;
910 842630 : uint8 action = seginfo->action;
911 :
912 842630 : if (action == GIN_SEGMENT_UNMODIFIED)
913 : {
914 793062 : segno++;
915 793062 : continue;
916 : }
917 :
918 49568 : if (action != GIN_SEGMENT_DELETE)
919 49508 : segsize = SizeOfGinPostingList(seginfo->seg);
920 :
921 : /*
922 : * If storing the uncompressed list of added item pointers would take
923 : * more space than storing the compressed segment as is, do that
924 : * instead.
925 : */
926 49568 : if (action == GIN_SEGMENT_ADDITEMS &&
927 49160 : seginfo->nmodifieditems * sizeof(ItemPointerData) > segsize)
928 : {
929 0 : action = GIN_SEGMENT_REPLACE;
930 : }
931 :
932 49568 : *((uint8 *) (walbufend++)) = segno;
933 49568 : *(walbufend++) = action;
934 :
935 49568 : switch (action)
936 : {
937 60 : case GIN_SEGMENT_DELETE:
938 60 : datalen = 0;
939 60 : break;
940 :
941 49160 : case GIN_SEGMENT_ADDITEMS:
942 49160 : datalen = seginfo->nmodifieditems * sizeof(ItemPointerData);
943 49160 : memcpy(walbufend, &seginfo->nmodifieditems, sizeof(uint16));
944 49160 : memcpy(walbufend + sizeof(uint16), seginfo->modifieditems, datalen);
945 49160 : datalen += sizeof(uint16);
946 49160 : break;
947 :
948 348 : case GIN_SEGMENT_INSERT:
949 : case GIN_SEGMENT_REPLACE:
950 348 : datalen = SHORTALIGN(segsize);
951 348 : memcpy(walbufend, seginfo->seg, segsize);
952 348 : break;
953 :
954 0 : default:
955 0 : elog(ERROR, "unexpected GIN leaf action %d", action);
956 : }
957 49568 : walbufend += datalen;
958 :
959 49568 : if (action != GIN_SEGMENT_INSERT)
960 49268 : segno++;
961 : }
962 :
963 : /* Pass back the constructed info via *leaf */
964 49424 : leaf->walinfo = walbufbegin;
965 49424 : leaf->walinfolen = walbufend - walbufbegin;
966 49424 : }
967 :
968 : /*
969 : * Assemble a disassembled posting tree leaf page back to a buffer.
970 : *
971 : * This just updates the target buffer; WAL stuff is caller's responsibility.
972 : *
973 : * NOTE: The segment pointers must not point directly to the same buffer,
974 : * except for segments that have not been modified and whose preceding
975 : * segments have not been modified either.
976 : */
977 : static void
978 49460 : dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
979 : {
980 49460 : Page page = BufferGetPage(buf);
981 : char *ptr;
982 : int newsize;
983 49460 : bool modified = false;
984 : dlist_iter iter;
985 : int segsize;
986 :
987 : /*
988 : * If the page was in pre-9.4 format before, convert the header, and force
989 : * all segments to be copied to the page whether they were modified or
990 : * not.
991 : */
992 49460 : if (!GinPageIsCompressed(page))
993 : {
994 : Assert(leaf->oldformat);
995 0 : GinPageSetCompressed(page);
996 0 : GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber;
997 0 : modified = true;
998 : }
999 :
1000 49460 : ptr = (char *) GinDataLeafPageGetPostingList(page);
1001 49460 : newsize = 0;
1002 892936 : dlist_foreach(iter, &leaf->segments)
1003 : {
1004 843476 : leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
1005 :
1006 843476 : if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
1007 50414 : modified = true;
1008 :
1009 843476 : if (seginfo->action != GIN_SEGMENT_DELETE)
1010 : {
1011 842576 : segsize = SizeOfGinPostingList(seginfo->seg);
1012 :
1013 842576 : if (modified)
1014 49586 : memcpy(ptr, seginfo->seg, segsize);
1015 :
1016 842576 : ptr += segsize;
1017 842576 : newsize += segsize;
1018 : }
1019 : }
1020 :
1021 : Assert(newsize <= GinDataPageMaxDataSize);
1022 49460 : GinDataPageSetDataSize(page, newsize);
1023 49460 : }
1024 :
1025 : /*
1026 : * Like dataPlaceToPageLeafRecompress, but writes the disassembled leaf
1027 : * segments to two pages instead of one.
1028 : *
1029 : * This is different from the non-split cases in that this does not modify
1030 : * the original page directly, but writes to temporary in-memory copies of
1031 : * the new left and right pages.
1032 : */
1033 : static void
1034 150 : dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
1035 : ItemPointerData lbound, ItemPointerData rbound,
1036 : Page lpage, Page rpage)
1037 : {
1038 : char *ptr;
1039 : int segsize;
1040 : int lsize;
1041 : int rsize;
1042 : dlist_node *node;
1043 : dlist_node *firstright;
1044 : leafSegmentInfo *seginfo;
1045 :
1046 : /* Initialize temporary pages to hold the new left and right pages */
1047 150 : GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
1048 150 : GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
1049 :
1050 : /*
1051 : * Copy the segments that go to the left page.
1052 : *
1053 : * XXX: We should skip copying the unmodified part of the left page, like
1054 : * we do when recompressing.
1055 : */
1056 150 : lsize = 0;
1057 150 : ptr = (char *) GinDataLeafPageGetPostingList(lpage);
1058 150 : firstright = dlist_next_node(&leaf->segments, leaf->lastleft);
1059 3586 : for (node = dlist_head_node(&leaf->segments);
1060 : node != firstright;
1061 3436 : node = dlist_next_node(&leaf->segments, node))
1062 : {
1063 3436 : seginfo = dlist_container(leafSegmentInfo, node, node);
1064 :
1065 3436 : if (seginfo->action != GIN_SEGMENT_DELETE)
1066 : {
1067 3436 : segsize = SizeOfGinPostingList(seginfo->seg);
1068 3436 : memcpy(ptr, seginfo->seg, segsize);
1069 3436 : ptr += segsize;
1070 3436 : lsize += segsize;
1071 : }
1072 : }
1073 : Assert(lsize == leaf->lsize);
1074 150 : GinDataPageSetDataSize(lpage, lsize);
1075 150 : *GinDataPageGetRightBound(lpage) = lbound;
1076 :
1077 : /* Copy the segments that go to the right page */
1078 150 : ptr = (char *) GinDataLeafPageGetPostingList(rpage);
1079 150 : rsize = 0;
1080 150 : for (node = firstright;
1081 : ;
1082 1944 : node = dlist_next_node(&leaf->segments, node))
1083 : {
1084 2094 : seginfo = dlist_container(leafSegmentInfo, node, node);
1085 :
1086 2094 : if (seginfo->action != GIN_SEGMENT_DELETE)
1087 : {
1088 2094 : segsize = SizeOfGinPostingList(seginfo->seg);
1089 2094 : memcpy(ptr, seginfo->seg, segsize);
1090 2094 : ptr += segsize;
1091 2094 : rsize += segsize;
1092 : }
1093 :
1094 2094 : if (!dlist_has_next(&leaf->segments, node))
1095 150 : break;
1096 : }
1097 : Assert(rsize == leaf->rsize);
1098 150 : GinDataPageSetDataSize(rpage, rsize);
1099 150 : *GinDataPageGetRightBound(rpage) = rbound;
1100 150 : }
1101 :
1102 : /*
1103 : * Prepare to insert data on an internal data page.
1104 : *
1105 : * If it will fit, return GPTP_INSERT after doing whatever setup is needed
1106 : * before we enter the insertion critical section. *ptp_workspace can be
1107 : * set to pass information along to the execPlaceToPage function.
1108 : *
1109 : * If it won't fit, perform a page split and return two temporary page
1110 : * images into *newlpage and *newrpage, with result GPTP_SPLIT.
1111 : *
1112 : * In neither case should the given page buffer be modified here.
1113 : *
1114 : * Note: on insertion to an internal node, in addition to inserting the given
1115 : * item, the downlink of the existing item at stack->off will be updated to
1116 : * point to updateblkno.
1117 : */
1118 : static GinPlaceToPageRC
1119 46 : dataBeginPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
1120 : void *insertdata, BlockNumber updateblkno,
1121 : void **ptp_workspace,
1122 : Page *newlpage, Page *newrpage)
1123 : {
1124 46 : Page page = BufferGetPage(buf);
1125 :
1126 : /* If it doesn't fit, deal with split case */
1127 46 : if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
1128 : {
1129 0 : dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
1130 : newlpage, newrpage);
1131 0 : return GPTP_SPLIT;
1132 : }
1133 :
1134 : /* Else, we're ready to proceed with insertion */
1135 46 : return GPTP_INSERT;
1136 : }
1137 :
1138 : /*
1139 : * Perform data insertion after beginPlaceToPage has decided it will fit.
1140 : *
1141 : * This is invoked within a critical section, and XLOG record creation (if
1142 : * needed) is already started. The target buffer is registered in slot 0.
1143 : */
1144 : static void
1145 46 : dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
1146 : void *insertdata, BlockNumber updateblkno,
1147 : void *ptp_workspace)
1148 : {
1149 46 : Page page = BufferGetPage(buf);
1150 46 : OffsetNumber off = stack->off;
1151 : PostingItem *pitem;
1152 :
1153 : /* Update existing downlink to point to next page (on internal page) */
1154 46 : pitem = GinDataPageGetPostingItem(page, off);
1155 46 : PostingItemSetBlockNumber(pitem, updateblkno);
1156 :
1157 : /* Add new item */
1158 46 : pitem = (PostingItem *) insertdata;
1159 46 : GinDataPageAddPostingItem(page, pitem, off);
1160 :
1161 46 : MarkBufferDirty(buf);
1162 :
1163 46 : if (RelationNeedsWAL(btree->index) && !btree->isBuild)
1164 : {
1165 : /*
1166 : * This must be static, because it has to survive until XLogInsert,
1167 : * and we can't palloc here. Ugly, but the XLogInsert infrastructure
1168 : * isn't reentrant anyway.
1169 : */
1170 : static ginxlogInsertDataInternal data;
1171 :
1172 18 : data.offset = off;
1173 18 : data.newitem = *pitem;
1174 :
1175 18 : XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
1176 18 : XLogRegisterBufData(0, (char *) &data,
1177 : sizeof(ginxlogInsertDataInternal));
1178 : }
1179 46 : }
1180 :
1181 : /*
1182 : * Prepare to insert data on a posting-tree data page.
1183 : *
1184 : * If it will fit, return GPTP_INSERT after doing whatever setup is needed
1185 : * before we enter the insertion critical section. *ptp_workspace can be
1186 : * set to pass information along to the execPlaceToPage function.
1187 : *
1188 : * If it won't fit, perform a page split and return two temporary page
1189 : * images into *newlpage and *newrpage, with result GPTP_SPLIT.
1190 : *
1191 : * In neither case should the given page buffer be modified here.
1192 : *
1193 : * Note: on insertion to an internal node, in addition to inserting the given
1194 : * item, the downlink of the existing item at stack->off will be updated to
1195 : * point to updateblkno.
1196 : *
1197 : * Calls relevant function for internal or leaf page because they are handled
1198 : * very differently.
1199 : */
1200 : static GinPlaceToPageRC
1201 49602 : dataBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
1202 : void *insertdata, BlockNumber updateblkno,
1203 : void **ptp_workspace,
1204 : Page *newlpage, Page *newrpage)
1205 : {
1206 49602 : Page page = BufferGetPage(buf);
1207 :
1208 : Assert(GinPageIsData(page));
1209 :
1210 49602 : if (GinPageIsLeaf(page))
1211 49556 : return dataBeginPlaceToPageLeaf(btree, buf, stack, insertdata,
1212 : ptp_workspace,
1213 : newlpage, newrpage);
1214 : else
1215 46 : return dataBeginPlaceToPageInternal(btree, buf, stack,
1216 : insertdata, updateblkno,
1217 : ptp_workspace,
1218 : newlpage, newrpage);
1219 : }
1220 :
1221 : /*
1222 : * Perform data insertion after beginPlaceToPage has decided it will fit.
1223 : *
1224 : * This is invoked within a critical section, and XLOG record creation (if
1225 : * needed) is already started. The target buffer is registered in slot 0.
1226 : *
1227 : * Calls relevant function for internal or leaf page because they are handled
1228 : * very differently.
1229 : */
1230 : static void
1231 49452 : dataExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
1232 : void *insertdata, BlockNumber updateblkno,
1233 : void *ptp_workspace)
1234 : {
1235 49452 : Page page = BufferGetPage(buf);
1236 :
1237 49452 : if (GinPageIsLeaf(page))
1238 49406 : dataExecPlaceToPageLeaf(btree, buf, stack, insertdata,
1239 : ptp_workspace);
1240 : else
1241 46 : dataExecPlaceToPageInternal(btree, buf, stack, insertdata,
1242 : updateblkno, ptp_workspace);
1243 49452 : }
1244 :
1245 : /*
1246 : * Split internal page and insert new data.
1247 : *
1248 : * Returns new temp pages to *newlpage and *newrpage.
1249 : * The original buffer is left untouched.
1250 : */
1251 : static void
1252 0 : dataSplitPageInternal(GinBtree btree, Buffer origbuf,
1253 : GinBtreeStack *stack,
1254 : void *insertdata, BlockNumber updateblkno,
1255 : Page *newlpage, Page *newrpage)
1256 : {
1257 0 : Page oldpage = BufferGetPage(origbuf);
1258 0 : OffsetNumber off = stack->off;
1259 0 : int nitems = GinPageGetOpaque(oldpage)->maxoff;
1260 : int nleftitems;
1261 : int nrightitems;
1262 0 : Size pageSize = PageGetPageSize(oldpage);
1263 0 : ItemPointerData oldbound = *GinDataPageGetRightBound(oldpage);
1264 : ItemPointer bound;
1265 : Page lpage;
1266 : Page rpage;
1267 : OffsetNumber separator;
1268 : PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
1269 :
1270 0 : lpage = PageGetTempPage(oldpage);
1271 0 : rpage = PageGetTempPage(oldpage);
1272 0 : GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
1273 0 : GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
1274 :
1275 : /*
1276 : * First construct a new list of PostingItems, which includes all the old
1277 : * items, and the new item.
1278 : */
1279 0 : memcpy(allitems, GinDataPageGetPostingItem(oldpage, FirstOffsetNumber),
1280 0 : (off - 1) * sizeof(PostingItem));
1281 :
1282 0 : allitems[off - 1] = *((PostingItem *) insertdata);
1283 0 : memcpy(&allitems[off], GinDataPageGetPostingItem(oldpage, off),
1284 0 : (nitems - (off - 1)) * sizeof(PostingItem));
1285 0 : nitems++;
1286 :
1287 : /* Update existing downlink to point to next page */
1288 0 : PostingItemSetBlockNumber(&allitems[off], updateblkno);
1289 :
1290 : /*
1291 : * When creating a new index, fit as many tuples as possible on the left
1292 : * page, on the assumption that the table is scanned from beginning to
1293 : * end. This packs the index as tight as possible.
1294 : */
1295 0 : if (btree->isBuild && GinPageRightMost(oldpage))
1296 0 : separator = GinNonLeafDataPageGetFreeSpace(rpage) / sizeof(PostingItem);
1297 : else
1298 0 : separator = nitems / 2;
1299 0 : nleftitems = separator;
1300 0 : nrightitems = nitems - separator;
1301 :
1302 0 : memcpy(GinDataPageGetPostingItem(lpage, FirstOffsetNumber),
1303 : allitems,
1304 : nleftitems * sizeof(PostingItem));
1305 0 : GinPageGetOpaque(lpage)->maxoff = nleftitems;
1306 0 : memcpy(GinDataPageGetPostingItem(rpage, FirstOffsetNumber),
1307 0 : &allitems[separator],
1308 : nrightitems * sizeof(PostingItem));
1309 0 : GinPageGetOpaque(rpage)->maxoff = nrightitems;
1310 :
1311 : /*
1312 : * Also set pd_lower for both pages, like GinDataPageAddPostingItem does.
1313 : */
1314 0 : GinDataPageSetDataSize(lpage, nleftitems * sizeof(PostingItem));
1315 0 : GinDataPageSetDataSize(rpage, nrightitems * sizeof(PostingItem));
1316 :
1317 : /* set up right bound for left page */
1318 0 : bound = GinDataPageGetRightBound(lpage);
1319 0 : *bound = GinDataPageGetPostingItem(lpage, nleftitems)->key;
1320 :
1321 : /* set up right bound for right page */
1322 0 : *GinDataPageGetRightBound(rpage) = oldbound;
1323 :
1324 : /* return temp pages to caller */
1325 0 : *newlpage = lpage;
1326 0 : *newrpage = rpage;
1327 0 : }
1328 :
1329 : /*
1330 : * Construct insertion payload for inserting the downlink for given buffer.
1331 : */
1332 : static void *
1333 46 : dataPrepareDownlink(GinBtree btree, Buffer lbuf)
1334 : {
1335 46 : PostingItem *pitem = palloc(sizeof(PostingItem));
1336 46 : Page lpage = BufferGetPage(lbuf);
1337 :
1338 46 : PostingItemSetBlockNumber(pitem, BufferGetBlockNumber(lbuf));
1339 46 : pitem->key = *GinDataPageGetRightBound(lpage);
1340 :
1341 46 : return pitem;
1342 : }
1343 :
1344 : /*
1345 : * Fills new root by right bound values from child.
1346 : * Also called from ginxlog, should not use btree
1347 : */
1348 : void
1349 104 : ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage)
1350 : {
1351 : PostingItem li,
1352 : ri;
1353 :
1354 104 : li.key = *GinDataPageGetRightBound(lpage);
1355 104 : PostingItemSetBlockNumber(&li, lblkno);
1356 104 : GinDataPageAddPostingItem(root, &li, InvalidOffsetNumber);
1357 :
1358 104 : ri.key = *GinDataPageGetRightBound(rpage);
1359 104 : PostingItemSetBlockNumber(&ri, rblkno);
1360 104 : GinDataPageAddPostingItem(root, &ri, InvalidOffsetNumber);
1361 104 : }
1362 :
1363 :
1364 : /*** Functions to work with disassembled leaf pages ***/
1365 :
1366 : /*
1367 : * Disassemble page into a disassembledLeaf struct.
1368 : */
1369 : static disassembledLeaf *
1370 49610 : disassembleLeaf(Page page)
1371 : {
1372 : disassembledLeaf *leaf;
1373 : GinPostingList *seg;
1374 : Pointer segbegin;
1375 : Pointer segend;
1376 :
1377 49610 : leaf = palloc0(sizeof(disassembledLeaf));
1378 49610 : dlist_init(&leaf->segments);
1379 :
1380 49610 : if (GinPageIsCompressed(page))
1381 : {
1382 : /*
1383 : * Create a leafSegmentInfo entry for each segment.
1384 : */
1385 49610 : seg = GinDataLeafPageGetPostingList(page);
1386 49610 : segbegin = (Pointer) seg;
1387 49610 : segend = segbegin + GinDataLeafPageGetPostingListSize(page);
1388 896434 : while ((Pointer) seg < segend)
1389 : {
1390 846824 : leafSegmentInfo *seginfo = palloc(sizeof(leafSegmentInfo));
1391 :
1392 846824 : seginfo->action = GIN_SEGMENT_UNMODIFIED;
1393 846824 : seginfo->seg = seg;
1394 846824 : seginfo->items = NULL;
1395 846824 : seginfo->nitems = 0;
1396 846824 : dlist_push_tail(&leaf->segments, &seginfo->node);
1397 :
1398 846824 : seg = GinNextPostingListSegment(seg);
1399 : }
1400 49610 : leaf->oldformat = false;
1401 : }
1402 : else
1403 : {
1404 : /*
1405 : * A pre-9.4 format uncompressed page is represented by a single
1406 : * segment, with an array of items. The corner case is uncompressed
1407 : * page containing no items, which is represented as no segments.
1408 : */
1409 : ItemPointer uncompressed;
1410 : int nuncompressed;
1411 : leafSegmentInfo *seginfo;
1412 :
1413 0 : uncompressed = dataLeafPageGetUncompressed(page, &nuncompressed);
1414 :
1415 0 : if (nuncompressed > 0)
1416 : {
1417 0 : seginfo = palloc(sizeof(leafSegmentInfo));
1418 :
1419 0 : seginfo->action = GIN_SEGMENT_REPLACE;
1420 0 : seginfo->seg = NULL;
1421 0 : seginfo->items = palloc(nuncompressed * sizeof(ItemPointerData));
1422 0 : memcpy(seginfo->items, uncompressed, nuncompressed * sizeof(ItemPointerData));
1423 0 : seginfo->nitems = nuncompressed;
1424 :
1425 0 : dlist_push_tail(&leaf->segments, &seginfo->node);
1426 : }
1427 :
1428 0 : leaf->oldformat = true;
1429 : }
1430 :
1431 49610 : return leaf;
1432 : }
1433 :
1434 : /*
1435 : * Distribute newItems to the segments.
1436 : *
1437 : * Any segments that acquire new items are decoded, and the new items are
1438 : * merged with the old items.
1439 : *
1440 : * Returns true if any new items were added. False means they were all
1441 : * duplicates of existing items on the page.
1442 : */
1443 : static bool
1444 49556 : addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems, int nNewItems)
1445 : {
1446 : dlist_iter iter;
1447 49556 : ItemPointer nextnew = newItems;
1448 49556 : int newleft = nNewItems;
1449 49556 : bool modified = false;
1450 : leafSegmentInfo *newseg;
1451 :
1452 : /*
1453 : * If the page is completely empty, just construct one new segment to hold
1454 : * all the new items.
1455 : */
1456 49556 : if (dlist_is_empty(&leaf->segments))
1457 : {
1458 0 : newseg = palloc(sizeof(leafSegmentInfo));
1459 0 : newseg->seg = NULL;
1460 0 : newseg->items = newItems;
1461 0 : newseg->nitems = nNewItems;
1462 0 : newseg->action = GIN_SEGMENT_INSERT;
1463 0 : dlist_push_tail(&leaf->segments, &newseg->node);
1464 0 : return true;
1465 : }
1466 :
1467 845816 : dlist_foreach(iter, &leaf->segments)
1468 : {
1469 845816 : leafSegmentInfo *cur = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node, iter.cur);
1470 : int nthis;
1471 : ItemPointer tmpitems;
1472 : int ntmpitems;
1473 :
1474 : /*
1475 : * How many of the new items fall into this segment?
1476 : */
1477 845816 : if (!dlist_has_next(&leaf->segments, iter.cur))
1478 49556 : nthis = newleft;
1479 : else
1480 : {
1481 : leafSegmentInfo *next;
1482 : ItemPointerData next_first;
1483 :
1484 796260 : next = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node,
1485 : dlist_next_node(&leaf->segments, iter.cur));
1486 796260 : if (next->items)
1487 49544 : next_first = next->items[0];
1488 : else
1489 : {
1490 : Assert(next->seg != NULL);
1491 746716 : next_first = next->seg->first;
1492 : }
1493 :
1494 796260 : nthis = 0;
1495 796260 : while (nthis < newleft && ginCompareItemPointers(&nextnew[nthis], &next_first) < 0)
1496 0 : nthis++;
1497 : }
1498 845816 : if (nthis == 0)
1499 796260 : continue;
1500 :
1501 : /* Merge the new items with the existing items. */
1502 49556 : if (!cur->items)
1503 0 : cur->items = ginPostingListDecode(cur->seg, &cur->nitems);
1504 :
1505 : /*
1506 : * Fast path for the important special case that we're appending to
1507 : * the end of the page: don't let the last segment on the page grow
1508 : * larger than the target, create a new segment before that happens.
1509 : */
1510 99112 : if (!dlist_has_next(&leaf->segments, iter.cur) &&
1511 49556 : ginCompareItemPointers(&cur->items[cur->nitems - 1], &nextnew[0]) < 0 &&
1512 49556 : cur->seg != NULL &&
1513 49556 : SizeOfGinPostingList(cur->seg) >= GinPostingListSegmentTargetSize)
1514 : {
1515 368 : newseg = palloc(sizeof(leafSegmentInfo));
1516 368 : newseg->seg = NULL;
1517 368 : newseg->items = nextnew;
1518 368 : newseg->nitems = nthis;
1519 368 : newseg->action = GIN_SEGMENT_INSERT;
1520 368 : dlist_push_tail(&leaf->segments, &newseg->node);
1521 368 : modified = true;
1522 49556 : break;
1523 : }
1524 :
1525 49188 : tmpitems = ginMergeItemPointers(cur->items, cur->nitems,
1526 : nextnew, nthis,
1527 : &ntmpitems);
1528 49188 : if (ntmpitems != cur->nitems)
1529 : {
1530 : /*
1531 : * If there are no duplicates, track the added items so that we
1532 : * can emit a compact ADDITEMS WAL record later on. (it doesn't
1533 : * seem worth re-checking which items were duplicates, if there
1534 : * were any)
1535 : */
1536 49188 : if (ntmpitems == nthis + cur->nitems &&
1537 49188 : cur->action == GIN_SEGMENT_UNMODIFIED)
1538 : {
1539 49188 : cur->action = GIN_SEGMENT_ADDITEMS;
1540 49188 : cur->modifieditems = nextnew;
1541 49188 : cur->nmodifieditems = nthis;
1542 : }
1543 : else
1544 0 : cur->action = GIN_SEGMENT_REPLACE;
1545 :
1546 49188 : cur->items = tmpitems;
1547 49188 : cur->nitems = ntmpitems;
1548 49188 : cur->seg = NULL;
1549 49188 : modified = true;
1550 : }
1551 :
1552 49188 : nextnew += nthis;
1553 49188 : newleft -= nthis;
1554 49188 : if (newleft == 0)
1555 49188 : break;
1556 : }
1557 :
1558 49556 : return modified;
1559 : }
1560 :
1561 : /*
1562 : * Recompresses all segments that have been modified.
1563 : *
1564 : * If not all the items fit on two pages (ie. after split), we store as
1565 : * many items as fit, and set *remaining to the first item that didn't fit.
1566 : * If all items fit, *remaining is set to invalid.
1567 : *
1568 : * Returns true if the page has to be split.
1569 : */
1570 : static bool
1571 49556 : leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
1572 : {
1573 49556 : int pgused = 0;
1574 49556 : bool needsplit = false;
1575 : dlist_iter iter;
1576 : int segsize;
1577 : leafSegmentInfo *nextseg;
1578 : int npacked;
1579 : bool modified;
1580 : dlist_node *cur_node;
1581 : dlist_node *next_node;
1582 :
1583 49556 : ItemPointerSetInvalid(remaining);
1584 :
1585 : /*
1586 : * cannot use dlist_foreach_modify here because we insert adjacent items
1587 : * while iterating.
1588 : */
1589 897554 : for (cur_node = dlist_head_node(&leaf->segments);
1590 : cur_node != NULL;
1591 847998 : cur_node = next_node)
1592 : {
1593 848038 : leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
1594 : cur_node);
1595 :
1596 848038 : if (dlist_has_next(&leaf->segments, cur_node))
1597 796628 : next_node = dlist_next_node(&leaf->segments, cur_node);
1598 : else
1599 51410 : next_node = NULL;
1600 :
1601 : /* Compress the posting list, if necessary */
1602 848038 : if (seginfo->action != GIN_SEGMENT_DELETE)
1603 : {
1604 848038 : if (seginfo->seg == NULL)
1605 : {
1606 51410 : if (seginfo->nitems > GinPostingListSegmentMaxSize)
1607 1888 : npacked = 0; /* no chance that it would fit. */
1608 : else
1609 : {
1610 49522 : seginfo->seg = ginCompressPostingList(seginfo->items,
1611 : seginfo->nitems,
1612 : GinPostingListSegmentMaxSize,
1613 : &npacked);
1614 : }
1615 51410 : if (npacked != seginfo->nitems)
1616 : {
1617 : /*
1618 : * Too large. Compress again to the target size, and
1619 : * create a new segment to represent the remaining items.
1620 : * The new segment is inserted after this one, so it will
1621 : * be processed in the next iteration of this loop.
1622 : */
1623 1894 : if (seginfo->seg)
1624 6 : pfree(seginfo->seg);
1625 1894 : seginfo->seg = ginCompressPostingList(seginfo->items,
1626 : seginfo->nitems,
1627 : GinPostingListSegmentTargetSize,
1628 : &npacked);
1629 1894 : if (seginfo->action != GIN_SEGMENT_INSERT)
1630 18 : seginfo->action = GIN_SEGMENT_REPLACE;
1631 :
1632 1894 : nextseg = palloc(sizeof(leafSegmentInfo));
1633 1894 : nextseg->action = GIN_SEGMENT_INSERT;
1634 1894 : nextseg->seg = NULL;
1635 1894 : nextseg->items = &seginfo->items[npacked];
1636 1894 : nextseg->nitems = seginfo->nitems - npacked;
1637 1894 : next_node = &nextseg->node;
1638 1894 : dlist_insert_after(cur_node, next_node);
1639 : }
1640 : }
1641 :
1642 : /*
1643 : * If the segment is very small, merge it with the next segment.
1644 : */
1645 848038 : if (SizeOfGinPostingList(seginfo->seg) < GinPostingListSegmentMinSize && next_node)
1646 : {
1647 : int nmerged;
1648 :
1649 0 : nextseg = dlist_container(leafSegmentInfo, node, next_node);
1650 :
1651 0 : if (seginfo->items == NULL)
1652 0 : seginfo->items = ginPostingListDecode(seginfo->seg,
1653 : &seginfo->nitems);
1654 0 : if (nextseg->items == NULL)
1655 0 : nextseg->items = ginPostingListDecode(nextseg->seg,
1656 : &nextseg->nitems);
1657 0 : nextseg->items =
1658 0 : ginMergeItemPointers(seginfo->items, seginfo->nitems,
1659 0 : nextseg->items, nextseg->nitems,
1660 : &nmerged);
1661 : Assert(nmerged == seginfo->nitems + nextseg->nitems);
1662 0 : nextseg->nitems = nmerged;
1663 0 : nextseg->seg = NULL;
1664 :
1665 0 : nextseg->action = GIN_SEGMENT_REPLACE;
1666 0 : nextseg->modifieditems = NULL;
1667 0 : nextseg->nmodifieditems = 0;
1668 :
1669 0 : if (seginfo->action == GIN_SEGMENT_INSERT)
1670 : {
1671 0 : dlist_delete(cur_node);
1672 0 : continue;
1673 : }
1674 : else
1675 : {
1676 0 : seginfo->action = GIN_SEGMENT_DELETE;
1677 0 : seginfo->seg = NULL;
1678 : }
1679 : }
1680 :
1681 848038 : seginfo->items = NULL;
1682 848038 : seginfo->nitems = 0;
1683 : }
1684 :
1685 848038 : if (seginfo->action == GIN_SEGMENT_DELETE)
1686 0 : continue;
1687 :
1688 : /*
1689 : * OK, we now have a compressed version of this segment ready for
1690 : * copying to the page. Did we exceed the size that fits on one page?
1691 : */
1692 848038 : segsize = SizeOfGinPostingList(seginfo->seg);
1693 848038 : if (pgused + segsize > GinDataPageMaxDataSize)
1694 : {
1695 190 : if (!needsplit)
1696 : {
1697 : /* switch to right page */
1698 : Assert(pgused > 0);
1699 150 : leaf->lastleft = dlist_prev_node(&leaf->segments, cur_node);
1700 150 : needsplit = true;
1701 150 : leaf->lsize = pgused;
1702 150 : pgused = 0;
1703 : }
1704 : else
1705 : {
1706 : /*
1707 : * Filled both pages. The last segment we constructed did not
1708 : * fit.
1709 : */
1710 40 : *remaining = seginfo->seg->first;
1711 :
1712 : /*
1713 : * remove all segments that did not fit from the list.
1714 : */
1715 80 : while (dlist_has_next(&leaf->segments, cur_node))
1716 40 : dlist_delete(dlist_next_node(&leaf->segments, cur_node));
1717 40 : dlist_delete(cur_node);
1718 40 : break;
1719 : }
1720 : }
1721 :
1722 847998 : pgused += segsize;
1723 : }
1724 :
1725 49556 : if (!needsplit)
1726 : {
1727 49406 : leaf->lsize = pgused;
1728 49406 : leaf->rsize = 0;
1729 : }
1730 : else
1731 150 : leaf->rsize = pgused;
1732 :
1733 : Assert(leaf->lsize <= GinDataPageMaxDataSize);
1734 : Assert(leaf->rsize <= GinDataPageMaxDataSize);
1735 :
1736 : /*
1737 : * Make a palloc'd copy of every segment after the first modified one,
1738 : * because as we start copying items to the original page, we might
1739 : * overwrite an existing segment.
1740 : */
1741 49556 : modified = false;
1742 897554 : dlist_foreach(iter, &leaf->segments)
1743 : {
1744 847998 : leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
1745 : iter.cur);
1746 :
1747 847998 : if (!modified && seginfo->action != GIN_SEGMENT_UNMODIFIED)
1748 : {
1749 49556 : modified = true;
1750 : }
1751 798442 : else if (modified && seginfo->action == GIN_SEGMENT_UNMODIFIED)
1752 : {
1753 : GinPostingList *tmp;
1754 :
1755 0 : segsize = SizeOfGinPostingList(seginfo->seg);
1756 0 : tmp = palloc(segsize);
1757 0 : memcpy(tmp, seginfo->seg, segsize);
1758 0 : seginfo->seg = tmp;
1759 : }
1760 : }
1761 :
1762 49556 : return needsplit;
1763 : }
1764 :
1765 :
1766 : /*** Functions that are exported to the rest of the GIN code ***/
1767 :
1768 : /*
1769 : * Creates new posting tree containing the given TIDs. Returns the page
1770 : * number of the root of the new posting tree.
1771 : *
1772 : * items[] must be in sorted order with no duplicates.
1773 : */
1774 : BlockNumber
1775 128 : createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
1776 : GinStatsData *buildStats, Buffer entrybuffer)
1777 : {
1778 : BlockNumber blkno;
1779 : Buffer buffer;
1780 : Page tmppage;
1781 : Page page;
1782 : Pointer ptr;
1783 : int nrootitems;
1784 : int rootsize;
1785 128 : bool is_build = (buildStats != NULL);
1786 :
1787 : /* Construct the new root page in memory first. */
1788 128 : tmppage = (Page) palloc(BLCKSZ);
1789 128 : GinInitPage(tmppage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
1790 128 : GinPageGetOpaque(tmppage)->rightlink = InvalidBlockNumber;
1791 :
1792 : /*
1793 : * Write as many of the items to the root page as fit. In segments of max
1794 : * GinPostingListSegmentMaxSize bytes each.
1795 : */
1796 128 : nrootitems = 0;
1797 128 : rootsize = 0;
1798 128 : ptr = (Pointer) GinDataLeafPageGetPostingList(tmppage);
1799 2416 : while (nrootitems < nitems)
1800 : {
1801 : GinPostingList *segment;
1802 : int npacked;
1803 : int segsize;
1804 :
1805 2388 : segment = ginCompressPostingList(&items[nrootitems],
1806 2388 : nitems - nrootitems,
1807 : GinPostingListSegmentMaxSize,
1808 : &npacked);
1809 2388 : segsize = SizeOfGinPostingList(segment);
1810 2388 : if (rootsize + segsize > GinDataPageMaxDataSize)
1811 100 : break;
1812 :
1813 2288 : memcpy(ptr, segment, segsize);
1814 2288 : ptr += segsize;
1815 2288 : rootsize += segsize;
1816 2288 : nrootitems += npacked;
1817 2288 : pfree(segment);
1818 : }
1819 128 : GinDataPageSetDataSize(tmppage, rootsize);
1820 :
1821 : /*
1822 : * All set. Get a new physical page, and copy the in-memory page to it.
1823 : */
1824 128 : buffer = GinNewBuffer(index);
1825 128 : page = BufferGetPage(buffer);
1826 128 : blkno = BufferGetBlockNumber(buffer);
1827 :
1828 : /*
1829 : * Copy any predicate locks from the entry tree leaf (containing posting
1830 : * list) to the posting tree.
1831 : */
1832 128 : PredicateLockPageSplit(index, BufferGetBlockNumber(entrybuffer), blkno);
1833 :
1834 128 : START_CRIT_SECTION();
1835 :
1836 128 : PageRestoreTempPage(tmppage, page);
1837 128 : MarkBufferDirty(buffer);
1838 :
1839 128 : if (RelationNeedsWAL(index) && !is_build)
1840 : {
1841 : XLogRecPtr recptr;
1842 : ginxlogCreatePostingTree data;
1843 :
1844 40 : data.size = rootsize;
1845 :
1846 40 : XLogBeginInsert();
1847 40 : XLogRegisterData((char *) &data, sizeof(ginxlogCreatePostingTree));
1848 :
1849 40 : XLogRegisterData((char *) GinDataLeafPageGetPostingList(page),
1850 : rootsize);
1851 40 : XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
1852 :
1853 40 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE);
1854 40 : PageSetLSN(page, recptr);
1855 : }
1856 :
1857 128 : UnlockReleaseBuffer(buffer);
1858 :
1859 128 : END_CRIT_SECTION();
1860 :
1861 : /* During index build, count the newly-added data page */
1862 128 : if (buildStats)
1863 76 : buildStats->nDataPages++;
1864 :
1865 128 : elog(DEBUG2, "created GIN posting tree with %d items", nrootitems);
1866 :
1867 : /*
1868 : * Add any remaining TIDs to the newly-created posting tree.
1869 : */
1870 128 : if (nitems > nrootitems)
1871 : {
1872 100 : ginInsertItemPointers(index, blkno,
1873 100 : items + nrootitems,
1874 : nitems - nrootitems,
1875 : buildStats);
1876 : }
1877 :
1878 128 : return blkno;
1879 : }
1880 :
1881 : static void
1882 49596 : ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno)
1883 : {
1884 49596 : memset(btree, 0, sizeof(GinBtreeData));
1885 :
1886 49596 : btree->index = index;
1887 49596 : btree->rootBlkno = rootBlkno;
1888 :
1889 49596 : btree->findChildPage = dataLocateItem;
1890 49596 : btree->getLeftMostChild = dataGetLeftMostPage;
1891 49596 : btree->isMoveRight = dataIsMoveRight;
1892 49596 : btree->findItem = NULL;
1893 49596 : btree->findChildPtr = dataFindChildPtr;
1894 49596 : btree->beginPlaceToPage = dataBeginPlaceToPage;
1895 49596 : btree->execPlaceToPage = dataExecPlaceToPage;
1896 49596 : btree->fillRoot = ginDataFillRoot;
1897 49596 : btree->prepareDownlink = dataPrepareDownlink;
1898 :
1899 49596 : btree->isData = true;
1900 49596 : btree->fullScan = false;
1901 49596 : btree->isBuild = false;
1902 49596 : }
1903 :
1904 : /*
1905 : * Inserts array of item pointers, may execute several tree scan (very rare)
1906 : */
1907 : void
1908 49520 : ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
1909 : ItemPointerData *items, uint32 nitem,
1910 : GinStatsData *buildStats)
1911 : {
1912 : GinBtreeData btree;
1913 : GinBtreeDataLeafInsertData insertdata;
1914 : GinBtreeStack *stack;
1915 :
1916 49520 : ginPrepareDataScan(&btree, index, rootBlkno);
1917 49520 : btree.isBuild = (buildStats != NULL);
1918 49520 : insertdata.items = items;
1919 49520 : insertdata.nitem = nitem;
1920 49520 : insertdata.curitem = 0;
1921 :
1922 99076 : while (insertdata.curitem < insertdata.nitem)
1923 : {
1924 : /* search for the leaf page where the first item should go to */
1925 49560 : btree.itemptr = insertdata.items[insertdata.curitem];
1926 49560 : stack = ginFindLeafPage(&btree, false, true);
1927 :
1928 49556 : ginInsertValue(&btree, stack, &insertdata, buildStats);
1929 : }
1930 49516 : }
1931 :
1932 : /*
1933 : * Starts a new scan on a posting tree.
1934 : */
1935 : GinBtreeStack *
1936 76 : ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno)
1937 : {
1938 : GinBtreeStack *stack;
1939 :
1940 76 : ginPrepareDataScan(btree, index, rootBlkno);
1941 :
1942 76 : btree->fullScan = true;
1943 :
1944 76 : stack = ginFindLeafPage(btree, true, false);
1945 :
1946 76 : return stack;
1947 : }
|