LCOV - code coverage report
Current view: top level - src/backend/access/gin - gindatapage.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 80.4 % 663 533
Test Date: 2026-03-01 19:14:57 Functions: 93.1 % 29 27
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * gindatapage.c
       4              :  *    routines for handling GIN posting tree pages.
       5              :  *
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  * Portions Copyright (c) 1994, Regents of the University of California
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *          src/backend/access/gin/gindatapage.c
      12              :  *-------------------------------------------------------------------------
      13              :  */
      14              : 
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/gin_private.h"
      18              : #include "access/ginxlog.h"
      19              : #include "access/xloginsert.h"
      20              : #include "lib/ilist.h"
      21              : #include "miscadmin.h"
      22              : #include "storage/predicate.h"
      23              : #include "utils/rel.h"
      24              : 
      25              : /*
      26              :  * Min, Max and Target size of posting lists stored on leaf pages, in bytes.
      27              :  *
      28              :  * The code can deal with any size, but random access is more efficient when
      29              :  * a number of smaller lists are stored, rather than one big list. If a
      30              :  * posting list would become larger than Max size as a result of insertions,
      31              :  * it is split into two. If a posting list would be smaller than minimum
      32              :  * size, it is merged with the next posting list.
      33              :  */
      34              : #define GinPostingListSegmentMaxSize 384
      35              : #define GinPostingListSegmentTargetSize 256
      36              : #define GinPostingListSegmentMinSize 128
      37              : 
      38              : /*
      39              :  * At least this many items fit in a GinPostingListSegmentMaxSize-bytes
      40              :  * long segment. This is used when estimating how much space is required
      41              :  * for N items, at minimum.
      42              :  */
      43              : #define MinTuplesPerSegment ((GinPostingListSegmentMaxSize - 2) / 6)
      44              : 
      45              : /*
      46              :  * A working struct for manipulating a posting tree leaf page.
      47              :  */
      48              : typedef struct
      49              : {
      50              :     dlist_head  segments;       /* a list of leafSegmentInfos */
      51              : 
      52              :     /*
      53              :      * The following fields represent how the segments are split across pages,
      54              :      * if a page split is required. Filled in by leafRepackItems.
      55              :      */
      56              :     dlist_node *lastleft;       /* last segment on left page */
      57              :     int         lsize;          /* total size on left page */
      58              :     int         rsize;          /* total size on right page */
      59              : 
      60              :     bool        oldformat;      /* page is in pre-9.4 format on disk */
      61              : 
      62              :     /*
      63              :      * If we need WAL data representing the reconstructed leaf page, it's
      64              :      * stored here by computeLeafRecompressWALData.
      65              :      */
      66              :     void       *walinfo;        /* buffer start */
      67              :     int         walinfolen;     /* and length */
      68              : } disassembledLeaf;
      69              : 
      70              : typedef struct
      71              : {
      72              :     dlist_node  node;           /* linked list pointers */
      73              : 
      74              :     /*-------------
      75              :      * 'action' indicates the status of this in-memory segment, compared to
      76              :      * what's on disk. It is one of the GIN_SEGMENT_* action codes:
      77              :      *
      78              :      * UNMODIFIED   no changes
      79              :      * DELETE       the segment is to be removed. 'seg' and 'items' are
      80              :      *              ignored
      81              :      * INSERT       this is a completely new segment
      82              :      * REPLACE      this replaces an existing segment with new content
      83              :      * ADDITEMS     like REPLACE, but no items have been removed, and we track
      84              :      *              in detail what items have been added to this segment, in
      85              :      *              'modifieditems'
      86              :      *-------------
      87              :      */
      88              :     char        action;
      89              : 
      90              :     ItemPointerData *modifieditems;
      91              :     uint16      nmodifieditems;
      92              : 
      93              :     /*
      94              :      * The following fields represent the items in this segment. If 'items' is
      95              :      * not NULL, it contains a palloc'd array of the items in this segment. If
      96              :      * 'seg' is not NULL, it contains the items in an already-compressed
      97              :      * format. It can point to an on-disk page (!modified), or a palloc'd
      98              :      * segment in memory. If both are set, they must represent the same items.
      99              :      */
     100              :     GinPostingList *seg;
     101              :     ItemPointer items;
     102              :     int         nitems;         /* # of items in 'items', if items != NULL */
     103              : } leafSegmentInfo;
     104              : 
     105              : static ItemPointer dataLeafPageGetUncompressed(Page page, int *nitems);
     106              : static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
     107              :                                   GinBtreeStack *stack,
     108              :                                   void *insertdata, BlockNumber updateblkno,
     109              :                                   Page *newlpage, Page *newrpage);
     110              : 
     111              : static disassembledLeaf *disassembleLeaf(Page page);
     112              : static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
     113              : static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
     114              :                            int nNewItems);
     115              : 
     116              : static void computeLeafRecompressWALData(disassembledLeaf *leaf);
     117              : static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
     118              : static void dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
     119              :                                      ItemPointerData lbound, ItemPointerData rbound,
     120              :                                      Page lpage, Page rpage);
     121              : 
     122              : /*
     123              :  * Read TIDs from leaf data page to single uncompressed array. The TIDs are
     124              :  * returned in ascending order.
     125              :  *
     126              :  * advancePast is a hint, indicating that the caller is only interested in
     127              :  * TIDs > advancePast. To return all items, use ItemPointerSetMin.
     128              :  *
     129              :  * Note: This function can still return items smaller than advancePast that
     130              :  * are in the same posting list as the items of interest, so the caller must
     131              :  * still check all the returned items. But passing it allows this function to
     132              :  * skip whole posting lists.
     133              :  */
     134              : ItemPointer
     135           82 : GinDataLeafPageGetItems(Page page, int *nitems, ItemPointerData advancePast)
     136              : {
     137              :     ItemPointer result;
     138              : 
     139           82 :     if (GinPageIsCompressed(page))
     140              :     {
     141           82 :         GinPostingList *seg = GinDataLeafPageGetPostingList(page);
     142           82 :         Size        len = GinDataLeafPageGetPostingListSize(page);
     143           82 :         char       *endptr = (char *) seg + len;
     144              :         GinPostingList *next;
     145              : 
     146              :         /* Skip to the segment containing advancePast+1 */
     147           82 :         if (ItemPointerIsValid(&advancePast))
     148              :         {
     149           47 :             next = GinNextPostingListSegment(seg);
     150           89 :             while ((char *) next < endptr &&
     151           39 :                    ginCompareItemPointers(&next->first, &advancePast) <= 0)
     152              :             {
     153            3 :                 seg = next;
     154            3 :                 next = GinNextPostingListSegment(seg);
     155              :             }
     156           47 :             len = endptr - (char *) seg;
     157              :         }
     158              : 
     159           82 :         if (len > 0)
     160           76 :             result = ginPostingListDecodeAllSegments(seg, len, nitems);
     161              :         else
     162              :         {
     163            6 :             result = NULL;
     164            6 :             *nitems = 0;
     165              :         }
     166              :     }
     167              :     else
     168              :     {
     169            0 :         ItemPointer tmp = dataLeafPageGetUncompressed(page, nitems);
     170              : 
     171            0 :         result = palloc((*nitems) * sizeof(ItemPointerData));
     172            0 :         memcpy(result, tmp, (*nitems) * sizeof(ItemPointerData));
     173              :     }
     174              : 
     175           82 :     return result;
     176              : }
     177              : 
     178              : /*
     179              :  * Places all TIDs from leaf data page to bitmap.
     180              :  */
     181              : int
     182           15 : GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm)
     183              : {
     184              :     ItemPointer uncompressed;
     185              :     int         nitems;
     186              : 
     187           15 :     if (GinPageIsCompressed(page))
     188              :     {
     189           15 :         GinPostingList *segment = GinDataLeafPageGetPostingList(page);
     190           15 :         Size        len = GinDataLeafPageGetPostingListSize(page);
     191              : 
     192           15 :         nitems = ginPostingListDecodeAllSegmentsToTbm(segment, len, tbm);
     193              :     }
     194              :     else
     195              :     {
     196            0 :         uncompressed = dataLeafPageGetUncompressed(page, &nitems);
     197              : 
     198            0 :         if (nitems > 0)
     199            0 :             tbm_add_tuples(tbm, uncompressed, nitems, false);
     200              :     }
     201              : 
     202           15 :     return nitems;
     203              : }
     204              : 
     205              : /*
     206              :  * Get pointer to the uncompressed array of items on a pre-9.4 format
     207              :  * uncompressed leaf page. The number of items in the array is returned in
     208              :  * *nitems.
     209              :  */
     210              : static ItemPointer
     211            0 : dataLeafPageGetUncompressed(Page page, int *nitems)
     212              : {
     213              :     ItemPointer items;
     214              : 
     215              :     Assert(!GinPageIsCompressed(page));
     216              : 
     217              :     /*
     218              :      * In the old pre-9.4 page format, the whole page content is used for
     219              :      * uncompressed items, and the number of items is stored in 'maxoff'
     220              :      */
     221            0 :     items = (ItemPointer) GinDataPageGetData(page);
     222            0 :     *nitems = GinPageGetOpaque(page)->maxoff;
     223              : 
     224            0 :     return items;
     225              : }
     226              : 
     227              : /*
     228              :  * Check if we should follow the right link to find the item we're searching
     229              :  * for.
     230              :  *
     231              :  * Compares inserting item pointer with the right bound of the current page.
     232              :  */
     233              : static bool
     234        13308 : dataIsMoveRight(GinBtree btree, Page page)
     235              : {
     236        13308 :     ItemPointer iptr = GinDataPageGetRightBound(page);
     237              : 
     238        13308 :     if (GinPageRightMost(page))
     239        13308 :         return false;
     240              : 
     241            0 :     if (GinPageIsDeleted(page))
     242            0 :         return true;
     243              : 
     244            0 :     return (ginCompareItemPointers(&btree->itemptr, iptr) > 0);
     245              : }
     246              : 
     247              : /*
     248              :  * Find correct PostingItem in non-leaf page. It is assumed that this is
     249              :  * the correct page, and the searched value SHOULD be on the page.
     250              :  */
     251              : static BlockNumber
     252        13346 : dataLocateItem(GinBtree btree, GinBtreeStack *stack)
     253              : {
     254              :     OffsetNumber low,
     255              :                 high,
     256              :                 maxoff;
     257        13346 :     PostingItem *pitem = NULL;
     258              :     int         result;
     259        13346 :     Page        page = BufferGetPage(stack->buffer);
     260              : 
     261              :     Assert(!GinPageIsLeaf(page));
     262              :     Assert(GinPageIsData(page));
     263              : 
     264        13346 :     if (btree->fullScan)
     265              :     {
     266           38 :         stack->off = FirstOffsetNumber;
     267           38 :         stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
     268           38 :         return btree->getLeftMostChild(btree, page);
     269              :     }
     270              : 
     271        13308 :     low = FirstOffsetNumber;
     272        13308 :     maxoff = high = GinPageGetOpaque(page)->maxoff;
     273              :     Assert(high >= low);
     274              : 
     275        13308 :     high++;
     276              : 
     277        39924 :     while (high > low)
     278              :     {
     279        26616 :         OffsetNumber mid = low + ((high - low) / 2);
     280              : 
     281        26616 :         pitem = GinDataPageGetPostingItem(page, mid);
     282              : 
     283        26616 :         if (mid == maxoff)
     284              :         {
     285              :             /*
     286              :              * Right infinity, page already correctly chosen with a help of
     287              :              * dataIsMoveRight
     288              :              */
     289        13308 :             result = -1;
     290              :         }
     291              :         else
     292              :         {
     293        13308 :             pitem = GinDataPageGetPostingItem(page, mid);
     294        13308 :             result = ginCompareItemPointers(&btree->itemptr, &(pitem->key));
     295              :         }
     296              : 
     297        26616 :         if (result == 0)
     298              :         {
     299            0 :             stack->off = mid;
     300            0 :             return PostingItemGetBlockNumber(pitem);
     301              :         }
     302        26616 :         else if (result > 0)
     303        13308 :             low = mid + 1;
     304              :         else
     305        13308 :             high = mid;
     306              :     }
     307              : 
     308              :     Assert(high >= FirstOffsetNumber && high <= maxoff);
     309              : 
     310        13308 :     stack->off = high;
     311        13308 :     pitem = GinDataPageGetPostingItem(page, high);
     312        13308 :     return PostingItemGetBlockNumber(pitem);
     313              : }
     314              : 
     315              : /*
     316              :  * Find link to blkno on non-leaf page, returns offset of PostingItem
     317              :  */
     318              : static OffsetNumber
     319           23 : dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
     320              : {
     321              :     OffsetNumber i,
     322           23 :                 maxoff = GinPageGetOpaque(page)->maxoff;
     323              :     PostingItem *pitem;
     324              : 
     325              :     Assert(!GinPageIsLeaf(page));
     326              :     Assert(GinPageIsData(page));
     327              : 
     328              :     /* if page isn't changed, we return storedOff */
     329           23 :     if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
     330              :     {
     331           23 :         pitem = GinDataPageGetPostingItem(page, storedOff);
     332           23 :         if (PostingItemGetBlockNumber(pitem) == blkno)
     333           23 :             return storedOff;
     334              : 
     335              :         /*
     336              :          * we hope, that needed pointer goes to right. It's true if there
     337              :          * wasn't a deletion
     338              :          */
     339            0 :         for (i = storedOff + 1; i <= maxoff; i++)
     340              :         {
     341            0 :             pitem = GinDataPageGetPostingItem(page, i);
     342            0 :             if (PostingItemGetBlockNumber(pitem) == blkno)
     343            0 :                 return i;
     344              :         }
     345              : 
     346            0 :         maxoff = storedOff - 1;
     347              :     }
     348              : 
     349              :     /* last chance */
     350            0 :     for (i = FirstOffsetNumber; i <= maxoff; i++)
     351              :     {
     352            0 :         pitem = GinDataPageGetPostingItem(page, i);
     353            0 :         if (PostingItemGetBlockNumber(pitem) == blkno)
     354            0 :             return i;
     355              :     }
     356              : 
     357            0 :     return InvalidOffsetNumber;
     358              : }
     359              : 
     360              : /*
     361              :  * Return blkno of leftmost child
     362              :  */
     363              : static BlockNumber
     364           38 : dataGetLeftMostPage(GinBtree btree, Page page)
     365              : {
     366              :     PostingItem *pitem;
     367              : 
     368              :     Assert(!GinPageIsLeaf(page));
     369              :     Assert(GinPageIsData(page));
     370              :     Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
     371              : 
     372           38 :     pitem = GinDataPageGetPostingItem(page, FirstOffsetNumber);
     373           38 :     return PostingItemGetBlockNumber(pitem);
     374              : }
     375              : 
     376              : /*
     377              :  * Add PostingItem to a non-leaf page.
     378              :  */
     379              : void
     380          131 : GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
     381              : {
     382          131 :     OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
     383              :     char       *ptr;
     384              : 
     385              :     Assert(PostingItemGetBlockNumber(data) != InvalidBlockNumber);
     386              :     Assert(!GinPageIsLeaf(page));
     387              : 
     388          131 :     if (offset == InvalidOffsetNumber)
     389              :     {
     390          106 :         ptr = (char *) GinDataPageGetPostingItem(page, maxoff + 1);
     391              :     }
     392              :     else
     393              :     {
     394           25 :         ptr = (char *) GinDataPageGetPostingItem(page, offset);
     395           25 :         if (offset != maxoff + 1)
     396           25 :             memmove(ptr + sizeof(PostingItem),
     397              :                     ptr,
     398           25 :                     (maxoff - offset + 1) * sizeof(PostingItem));
     399              :     }
     400          131 :     memcpy(ptr, data, sizeof(PostingItem));
     401              : 
     402          131 :     maxoff++;
     403          131 :     GinPageGetOpaque(page)->maxoff = maxoff;
     404              : 
     405              :     /*
     406              :      * Also set pd_lower to the end of the posting items, to follow the
     407              :      * "standard" page layout, so that we can squeeze out the unused space
     408              :      * from full-page images.
     409              :      */
     410          131 :     GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
     411          131 : }
     412              : 
     413              : /*
     414              :  * Delete posting item from non-leaf page
     415              :  */
     416              : void
     417            6 : GinPageDeletePostingItem(Page page, OffsetNumber offset)
     418              : {
     419            6 :     OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
     420              : 
     421              :     Assert(!GinPageIsLeaf(page));
     422              :     Assert(offset >= FirstOffsetNumber && offset <= maxoff);
     423              : 
     424            6 :     if (offset != maxoff)
     425            6 :         memmove(GinDataPageGetPostingItem(page, offset),
     426            6 :                 GinDataPageGetPostingItem(page, offset + 1),
     427            6 :                 sizeof(PostingItem) * (maxoff - offset));
     428              : 
     429            6 :     maxoff--;
     430            6 :     GinPageGetOpaque(page)->maxoff = maxoff;
     431              : 
     432            6 :     GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
     433            6 : }
     434              : 
     435              : /*
     436              :  * Prepare to insert data on a leaf data page.
     437              :  *
     438              :  * If it will fit, return GPTP_INSERT after doing whatever setup is needed
     439              :  * before we enter the insertion critical section.  *ptp_workspace can be
     440              :  * set to pass information along to the execPlaceToPage function.
     441              :  *
     442              :  * If it won't fit, perform a page split and return two temporary page
     443              :  * images into *newlpage and *newrpage, with result GPTP_SPLIT.
     444              :  *
     445              :  * In neither case should the given page buffer be modified here.
     446              :  */
     447              : static GinPlaceToPageRC
     448        24779 : dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
     449              :                          void *insertdata,
     450              :                          void **ptp_workspace,
     451              :                          Page *newlpage, Page *newrpage)
     452              : {
     453        24779 :     GinBtreeDataLeafInsertData *items = insertdata;
     454        24779 :     ItemPointer newItems = &items->items[items->curitem];
     455        24779 :     int         maxitems = items->nitem - items->curitem;
     456        24779 :     Page        page = BufferGetPage(buf);
     457              :     int         i;
     458              :     ItemPointerData rbound;
     459              :     ItemPointerData lbound;
     460              :     bool        needsplit;
     461              :     bool        append;
     462              :     int         segsize;
     463              :     Size        freespace;
     464              :     disassembledLeaf *leaf;
     465              :     leafSegmentInfo *lastleftinfo;
     466              :     ItemPointerData maxOldItem;
     467              :     ItemPointerData remaining;
     468              : 
     469        24779 :     rbound = *GinDataPageGetRightBound(page);
     470              : 
     471              :     /*
     472              :      * Count how many of the new items belong to this page.
     473              :      */
     474        24779 :     if (!GinPageRightMost(page))
     475              :     {
     476            0 :         for (i = 0; i < maxitems; i++)
     477              :         {
     478            0 :             if (ginCompareItemPointers(&newItems[i], &rbound) > 0)
     479              :             {
     480              :                 /*
     481              :                  * This needs to go to some other location in the tree. (The
     482              :                  * caller should've chosen the insert location so that at
     483              :                  * least the first item goes here.)
     484              :                  */
     485              :                 Assert(i > 0);
     486            0 :                 break;
     487              :             }
     488              :         }
     489            0 :         maxitems = i;
     490              :     }
     491              : 
     492              :     /* Disassemble the data on the page */
     493        24779 :     leaf = disassembleLeaf(page);
     494              : 
     495              :     /*
     496              :      * Are we appending to the end of the page? IOW, are all the new items
     497              :      * larger than any of the existing items.
     498              :      */
     499        24779 :     if (!dlist_is_empty(&leaf->segments))
     500              :     {
     501        24779 :         lastleftinfo = dlist_container(leafSegmentInfo, node,
     502              :                                        dlist_tail_node(&leaf->segments));
     503        24779 :         if (!lastleftinfo->items)
     504        24779 :             lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
     505              :                                                        &lastleftinfo->nitems);
     506        24779 :         maxOldItem = lastleftinfo->items[lastleftinfo->nitems - 1];
     507        24779 :         if (ginCompareItemPointers(&newItems[0], &maxOldItem) >= 0)
     508        24779 :             append = true;
     509              :         else
     510            0 :             append = false;
     511              :     }
     512              :     else
     513              :     {
     514            0 :         ItemPointerSetMin(&maxOldItem);
     515            0 :         append = true;
     516              :     }
     517              : 
     518              :     /*
     519              :      * If we're appending to the end of the page, we will append as many items
     520              :      * as we can fit (after splitting), and stop when the pages becomes full.
     521              :      * Otherwise we have to limit the number of new items to insert, because
     522              :      * once we start packing we can't just stop when we run out of space,
     523              :      * because we must make sure that all the old items still fit.
     524              :      */
     525        24779 :     if (GinPageIsCompressed(page))
     526        24779 :         freespace = GinDataLeafPageGetFreeSpace(page);
     527              :     else
     528            0 :         freespace = 0;
     529        24779 :     if (append)
     530              :     {
     531              :         /*
     532              :          * Even when appending, trying to append more items than will fit is
     533              :          * not completely free, because we will merge the new items and old
     534              :          * items into an array below. In the best case, every new item fits in
     535              :          * a single byte, and we can use all the free space on the old page as
     536              :          * well as the new page. For simplicity, ignore segment overhead etc.
     537              :          */
     538        24779 :         maxitems = Min(maxitems, freespace + GinDataPageMaxDataSize);
     539              :     }
     540              :     else
     541              :     {
     542              :         /*
     543              :          * Calculate a conservative estimate of how many new items we can fit
     544              :          * on the two pages after splitting.
     545              :          *
     546              :          * We can use any remaining free space on the old page to store full
     547              :          * segments, as well as the new page. Each full-sized segment can hold
     548              :          * at least MinTuplesPerSegment items
     549              :          */
     550              :         int         nnewsegments;
     551              : 
     552            0 :         nnewsegments = freespace / GinPostingListSegmentMaxSize;
     553            0 :         nnewsegments += GinDataPageMaxDataSize / GinPostingListSegmentMaxSize;
     554            0 :         maxitems = Min(maxitems, nnewsegments * MinTuplesPerSegment);
     555              :     }
     556              : 
     557              :     /* Add the new items to the segment list */
     558        24779 :     if (!addItemsToLeaf(leaf, newItems, maxitems))
     559              :     {
     560              :         /* all items were duplicates, we have nothing to do */
     561            0 :         items->curitem += maxitems;
     562              : 
     563            0 :         return GPTP_NO_WORK;
     564              :     }
     565              : 
     566              :     /*
     567              :      * Pack the items back to compressed segments, ready for writing to disk.
     568              :      */
     569        24779 :     needsplit = leafRepackItems(leaf, &remaining);
     570              : 
     571              :     /*
     572              :      * Did all the new items fit?
     573              :      *
     574              :      * If we're appending, it's OK if they didn't. But as a sanity check,
     575              :      * verify that all the old items fit.
     576              :      */
     577        24779 :     if (ItemPointerIsValid(&remaining))
     578              :     {
     579           20 :         if (!append || ItemPointerCompare(&maxOldItem, &remaining) >= 0)
     580            0 :             elog(ERROR, "could not split GIN page; all old items didn't fit");
     581              : 
     582              :         /* Count how many of the new items did fit. */
     583       152421 :         for (i = 0; i < maxitems; i++)
     584              :         {
     585       152421 :             if (ginCompareItemPointers(&newItems[i], &remaining) >= 0)
     586           20 :                 break;
     587              :         }
     588           20 :         if (i == 0)
     589            0 :             elog(ERROR, "could not split GIN page; no new items fit");
     590           20 :         maxitems = i;
     591              :     }
     592              : 
     593        24779 :     if (!needsplit)
     594              :     {
     595              :         /*
     596              :          * Great, all the items fit on a single page.  If needed, prepare data
     597              :          * for a WAL record describing the changes we'll make.
     598              :          */
     599        24703 :         if (RelationNeedsWAL(btree->index) && !btree->isBuild)
     600        24703 :             computeLeafRecompressWALData(leaf);
     601              : 
     602              :         /*
     603              :          * We're ready to enter the critical section, but
     604              :          * dataExecPlaceToPageLeaf will need access to the "leaf" data.
     605              :          */
     606        24703 :         *ptp_workspace = leaf;
     607              : 
     608        24703 :         if (append)
     609        24703 :             elog(DEBUG2, "appended %d new items to block %u; %d bytes (%d to go)",
     610              :                  maxitems, BufferGetBlockNumber(buf), leaf->lsize,
     611              :                  items->nitem - items->curitem - maxitems);
     612              :         else
     613            0 :             elog(DEBUG2, "inserted %d new items to block %u; %d bytes (%d to go)",
     614              :                  maxitems, BufferGetBlockNumber(buf), leaf->lsize,
     615              :                  items->nitem - items->curitem - maxitems);
     616              :     }
     617              :     else
     618              :     {
     619              :         /*
     620              :          * Have to split.
     621              :          *
     622              :          * leafRepackItems already divided the segments between the left and
     623              :          * the right page. It filled the left page as full as possible, and
     624              :          * put the rest to the right page. When building a new index, that's
     625              :          * good, because the table is scanned from beginning to end and there
     626              :          * won't be any more insertions to the left page during the build.
     627              :          * This packs the index as tight as possible. But otherwise, split
     628              :          * 50/50, by moving segments from the left page to the right page
     629              :          * until they're balanced.
     630              :          *
     631              :          * As a further heuristic, when appending items to the end of the
     632              :          * page, try to make the left page 75% full, on the assumption that
     633              :          * subsequent insertions will probably also go to the end. This packs
     634              :          * the index somewhat tighter when appending to a table, which is very
     635              :          * common.
     636              :          */
     637           76 :         if (!btree->isBuild)
     638              :         {
     639          130 :             while (dlist_has_prev(&leaf->segments, leaf->lastleft))
     640              :             {
     641          130 :                 lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
     642              : 
     643              :                 /* ignore deleted segments */
     644          130 :                 if (lastleftinfo->action != GIN_SEGMENT_DELETE)
     645              :                 {
     646          130 :                     segsize = SizeOfGinPostingList(lastleftinfo->seg);
     647              : 
     648              :                     /*
     649              :                      * Note that we check that the right page doesn't become
     650              :                      * more full than the left page even when appending. It's
     651              :                      * possible that we added enough items to make both pages
     652              :                      * more than 75% full.
     653              :                      */
     654          130 :                     if ((leaf->lsize - segsize) - (leaf->rsize + segsize) < 0)
     655           24 :                         break;
     656          106 :                     if (append)
     657              :                     {
     658          106 :                         if ((leaf->lsize - segsize) < (BLCKSZ * 3) / 4)
     659            5 :                             break;
     660              :                     }
     661              : 
     662          101 :                     leaf->lsize -= segsize;
     663          101 :                     leaf->rsize += segsize;
     664              :                 }
     665          101 :                 leaf->lastleft = dlist_prev_node(&leaf->segments, leaf->lastleft);
     666              :             }
     667              :         }
     668              :         Assert(leaf->lsize <= GinDataPageMaxDataSize);
     669              :         Assert(leaf->rsize <= GinDataPageMaxDataSize);
     670              : 
     671              :         /*
     672              :          * Fetch the max item in the left page's last segment; it becomes the
     673              :          * right bound of the page.
     674              :          */
     675           76 :         lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
     676           76 :         if (!lastleftinfo->items)
     677           76 :             lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
     678              :                                                        &lastleftinfo->nitems);
     679           76 :         lbound = lastleftinfo->items[lastleftinfo->nitems - 1];
     680              : 
     681              :         /*
     682              :          * Now allocate a couple of temporary page images, and fill them.
     683              :          */
     684           76 :         *newlpage = palloc(BLCKSZ);
     685           76 :         *newrpage = palloc(BLCKSZ);
     686              : 
     687           76 :         dataPlaceToPageLeafSplit(leaf, lbound, rbound,
     688              :                                  *newlpage, *newrpage);
     689              : 
     690              :         Assert(GinPageRightMost(page) ||
     691              :                ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
     692              :                                       GinDataPageGetRightBound(*newrpage)) < 0);
     693              : 
     694           76 :         if (append)
     695           76 :             elog(DEBUG2, "appended %d items to block %u; split %d/%d (%d to go)",
     696              :                  maxitems, BufferGetBlockNumber(buf), leaf->lsize, leaf->rsize,
     697              :                  items->nitem - items->curitem - maxitems);
     698              :         else
     699            0 :             elog(DEBUG2, "inserted %d items to block %u; split %d/%d (%d to go)",
     700              :                  maxitems, BufferGetBlockNumber(buf), leaf->lsize, leaf->rsize,
     701              :                  items->nitem - items->curitem - maxitems);
     702              :     }
     703              : 
     704        24779 :     items->curitem += maxitems;
     705              : 
     706        24779 :     return needsplit ? GPTP_SPLIT : GPTP_INSERT;
     707              : }
     708              : 
     709              : /*
     710              :  * Perform data insertion after beginPlaceToPage has decided it will fit.
     711              :  *
     712              :  * This is invoked within a critical section, and XLOG record creation (if
     713              :  * needed) is already started.  The target buffer is registered in slot 0.
     714              :  */
     715              : static void
     716        24703 : dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
     717              :                         void *insertdata, void *ptp_workspace)
     718              : {
     719        24703 :     disassembledLeaf *leaf = (disassembledLeaf *) ptp_workspace;
     720              : 
     721              :     /* Apply changes to page */
     722        24703 :     dataPlaceToPageLeafRecompress(buf, leaf);
     723              : 
     724        24703 :     MarkBufferDirty(buf);
     725              : 
     726              :     /* If needed, register WAL data built by computeLeafRecompressWALData */
     727        24703 :     if (RelationNeedsWAL(btree->index) && !btree->isBuild)
     728              :     {
     729        24703 :         XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
     730        24703 :         XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
     731              :     }
     732        24703 : }
     733              : 
     734              : /*
     735              :  * Vacuum a posting tree leaf page.
     736              :  */
     737              : void
     738           27 : ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
     739              : {
     740           27 :     Page        page = BufferGetPage(buffer);
     741              :     disassembledLeaf *leaf;
     742           27 :     bool        removedsomething = false;
     743              :     dlist_iter  iter;
     744              : 
     745           27 :     leaf = disassembleLeaf(page);
     746              : 
     747              :     /* Vacuum each segment. */
     748          531 :     dlist_foreach(iter, &leaf->segments)
     749              :     {
     750          504 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
     751              :         int         oldsegsize;
     752              :         ItemPointer cleaned;
     753              :         int         ncleaned;
     754              : 
     755          504 :         if (!seginfo->items)
     756          504 :             seginfo->items = ginPostingListDecode(seginfo->seg,
     757              :                                                   &seginfo->nitems);
     758          504 :         if (seginfo->seg)
     759          504 :             oldsegsize = SizeOfGinPostingList(seginfo->seg);
     760              :         else
     761            0 :             oldsegsize = GinDataPageMaxDataSize;
     762              : 
     763          504 :         cleaned = ginVacuumItemPointers(gvs,
     764              :                                         seginfo->items,
     765              :                                         seginfo->nitems,
     766              :                                         &ncleaned);
     767          504 :         pfree(seginfo->items);
     768          504 :         seginfo->items = NULL;
     769          504 :         seginfo->nitems = 0;
     770          504 :         if (cleaned)
     771              :         {
     772          468 :             if (ncleaned > 0)
     773              :             {
     774              :                 int         npacked;
     775              : 
     776           18 :                 seginfo->seg = ginCompressPostingList(cleaned,
     777              :                                                       ncleaned,
     778              :                                                       oldsegsize,
     779              :                                                       &npacked);
     780              :                 /* Removing an item never increases the size of the segment */
     781           18 :                 if (npacked != ncleaned)
     782            0 :                     elog(ERROR, "could not fit vacuumed posting list");
     783           18 :                 seginfo->action = GIN_SEGMENT_REPLACE;
     784              :             }
     785              :             else
     786              :             {
     787          450 :                 seginfo->seg = NULL;
     788          450 :                 seginfo->items = NULL;
     789          450 :                 seginfo->action = GIN_SEGMENT_DELETE;
     790              :             }
     791          468 :             seginfo->nitems = ncleaned;
     792              : 
     793          468 :             removedsomething = true;
     794              :         }
     795              :     }
     796              : 
     797              :     /*
     798              :      * If we removed any items, reconstruct the page from the pieces.
     799              :      *
     800              :      * We don't try to re-encode the segments here, even though some of them
     801              :      * might be really small now that we've removed some items from them. It
     802              :      * seems like a waste of effort, as there isn't really any benefit from
     803              :      * larger segments per se; larger segments only help to pack more items in
     804              :      * the same space. We might as well delay doing that until the next
     805              :      * insertion, which will need to re-encode at least part of the page
     806              :      * anyway.
     807              :      *
     808              :      * Also note if the page was in uncompressed, pre-9.4 format before, it is
     809              :      * now represented as one huge segment that contains all the items. It
     810              :      * might make sense to split that, to speed up random access, but we don't
     811              :      * bother. You'll have to REINDEX anyway if you want the full gain of the
     812              :      * new tighter index format.
     813              :      */
     814           27 :     if (removedsomething)
     815              :     {
     816              :         bool        modified;
     817              : 
     818              :         /*
     819              :          * Make sure we have a palloc'd copy of all segments, after the first
     820              :          * segment that is modified. (dataPlaceToPageLeafRecompress requires
     821              :          * this).
     822              :          */
     823           27 :         modified = false;
     824          531 :         dlist_foreach(iter, &leaf->segments)
     825              :         {
     826          504 :             leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
     827              :                                                        iter.cur);
     828              : 
     829          504 :             if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
     830          468 :                 modified = true;
     831          504 :             if (modified && seginfo->action != GIN_SEGMENT_DELETE)
     832              :             {
     833           54 :                 int         segsize = SizeOfGinPostingList(seginfo->seg);
     834           54 :                 GinPostingList *tmp = (GinPostingList *) palloc(segsize);
     835              : 
     836           54 :                 memcpy(tmp, seginfo->seg, segsize);
     837           54 :                 seginfo->seg = tmp;
     838              :             }
     839              :         }
     840              : 
     841           27 :         if (RelationNeedsWAL(indexrel))
     842            9 :             computeLeafRecompressWALData(leaf);
     843              : 
     844              :         /* Apply changes to page */
     845           27 :         START_CRIT_SECTION();
     846              : 
     847           27 :         dataPlaceToPageLeafRecompress(buffer, leaf);
     848              : 
     849           27 :         MarkBufferDirty(buffer);
     850              : 
     851           27 :         if (RelationNeedsWAL(indexrel))
     852              :         {
     853              :             XLogRecPtr  recptr;
     854              : 
     855            9 :             XLogBeginInsert();
     856            9 :             XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
     857            9 :             XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
     858            9 :             recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE);
     859            9 :             PageSetLSN(page, recptr);
     860              :         }
     861              : 
     862           27 :         END_CRIT_SECTION();
     863              :     }
     864           27 : }
     865              : 
     866              : /*
     867              :  * Construct a ginxlogRecompressDataLeaf record representing the changes
     868              :  * in *leaf.  (Because this requires a palloc, we have to do it before
     869              :  * we enter the critical section that actually updates the page.)
     870              :  */
     871              : static void
     872        24712 : computeLeafRecompressWALData(disassembledLeaf *leaf)
     873              : {
     874        24712 :     int         nmodified = 0;
     875              :     char       *walbufbegin;
     876              :     char       *walbufend;
     877              :     dlist_iter  iter;
     878              :     int         segno;
     879              :     ginxlogRecompressDataLeaf *recompress_xlog;
     880              : 
     881              :     /* Count the modified segments */
     882       446027 :     dlist_foreach(iter, &leaf->segments)
     883              :     {
     884       421315 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
     885              :                                                    iter.cur);
     886              : 
     887       421315 :         if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
     888        24784 :             nmodified++;
     889              :     }
     890              : 
     891              :     walbufbegin =
     892        24712 :         palloc(sizeof(ginxlogRecompressDataLeaf) +
     893              :                BLCKSZ +         /* max size needed to hold the segment data */
     894        24712 :                nmodified * 2    /* (segno + action) per action */
     895              :         );
     896        24712 :     walbufend = walbufbegin;
     897              : 
     898        24712 :     recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
     899        24712 :     walbufend += sizeof(ginxlogRecompressDataLeaf);
     900              : 
     901        24712 :     recompress_xlog->nactions = nmodified;
     902              : 
     903        24712 :     segno = 0;
     904       446027 :     dlist_foreach(iter, &leaf->segments)
     905              :     {
     906       421315 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
     907              :                                                    iter.cur);
     908       421315 :         int         segsize = 0;
     909              :         int         datalen;
     910       421315 :         uint8       action = seginfo->action;
     911              : 
     912       421315 :         if (action == GIN_SEGMENT_UNMODIFIED)
     913              :         {
     914       396531 :             segno++;
     915       396531 :             continue;
     916              :         }
     917              : 
     918        24784 :         if (action != GIN_SEGMENT_DELETE)
     919        24754 :             segsize = SizeOfGinPostingList(seginfo->seg);
     920              : 
     921              :         /*
     922              :          * If storing the uncompressed list of added item pointers would take
     923              :          * more space than storing the compressed segment as is, do that
     924              :          * instead.
     925              :          */
     926        24784 :         if (action == GIN_SEGMENT_ADDITEMS &&
     927        24580 :             seginfo->nmodifieditems * sizeof(ItemPointerData) > segsize)
     928              :         {
     929            0 :             action = GIN_SEGMENT_REPLACE;
     930              :         }
     931              : 
     932        24784 :         *((uint8 *) (walbufend++)) = segno;
     933        24784 :         *(walbufend++) = action;
     934              : 
     935        24784 :         switch (action)
     936              :         {
     937           30 :             case GIN_SEGMENT_DELETE:
     938           30 :                 datalen = 0;
     939           30 :                 break;
     940              : 
     941        24580 :             case GIN_SEGMENT_ADDITEMS:
     942        24580 :                 datalen = seginfo->nmodifieditems * sizeof(ItemPointerData);
     943        24580 :                 memcpy(walbufend, &seginfo->nmodifieditems, sizeof(uint16));
     944        24580 :                 memcpy(walbufend + sizeof(uint16), seginfo->modifieditems, datalen);
     945        24580 :                 datalen += sizeof(uint16);
     946        24580 :                 break;
     947              : 
     948          174 :             case GIN_SEGMENT_INSERT:
     949              :             case GIN_SEGMENT_REPLACE:
     950          174 :                 datalen = SHORTALIGN(segsize);
     951          174 :                 memcpy(walbufend, seginfo->seg, segsize);
     952          174 :                 break;
     953              : 
     954            0 :             default:
     955            0 :                 elog(ERROR, "unexpected GIN leaf action %d", action);
     956              :         }
     957        24784 :         walbufend += datalen;
     958              : 
     959        24784 :         if (action != GIN_SEGMENT_INSERT)
     960        24634 :             segno++;
     961              :     }
     962              : 
     963              :     /* Pass back the constructed info via *leaf */
     964        24712 :     leaf->walinfo = walbufbegin;
     965        24712 :     leaf->walinfolen = walbufend - walbufbegin;
     966        24712 : }
     967              : 
     968              : /*
     969              :  * Assemble a disassembled posting tree leaf page back to a buffer.
     970              :  *
     971              :  * This just updates the target buffer; WAL stuff is caller's responsibility.
     972              :  *
     973              :  * NOTE: The segment pointers must not point directly to the same buffer,
     974              :  * except for segments that have not been modified and whose preceding
     975              :  * segments have not been modified either.
     976              :  */
     977              : static void
     978        24730 : dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
     979              : {
     980        24730 :     Page        page = BufferGetPage(buf);
     981              :     char       *ptr;
     982              :     int         newsize;
     983        24730 :     bool        modified = false;
     984              :     dlist_iter  iter;
     985              :     int         segsize;
     986              : 
     987              :     /*
     988              :      * If the page was in pre-9.4 format before, convert the header, and force
     989              :      * all segments to be copied to the page whether they were modified or
     990              :      * not.
     991              :      */
     992        24730 :     if (!GinPageIsCompressed(page))
     993              :     {
     994              :         Assert(leaf->oldformat);
     995            0 :         GinPageSetCompressed(page);
     996            0 :         GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber;
     997            0 :         modified = true;
     998              :     }
     999              : 
    1000        24730 :     ptr = (char *) GinDataLeafPageGetPostingList(page);
    1001        24730 :     newsize = 0;
    1002       446468 :     dlist_foreach(iter, &leaf->segments)
    1003              :     {
    1004       421738 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
    1005              : 
    1006       421738 :         if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
    1007        25207 :             modified = true;
    1008              : 
    1009       421738 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1010              :         {
    1011       421288 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1012              : 
    1013       421288 :             if (modified)
    1014        24793 :                 memcpy(ptr, seginfo->seg, segsize);
    1015              : 
    1016       421288 :             ptr += segsize;
    1017       421288 :             newsize += segsize;
    1018              :         }
    1019              :     }
    1020              : 
    1021              :     Assert(newsize <= GinDataPageMaxDataSize);
    1022        24730 :     GinDataPageSetDataSize(page, newsize);
    1023        24730 : }
    1024              : 
    1025              : /*
    1026              :  * Like dataPlaceToPageLeafRecompress, but writes the disassembled leaf
    1027              :  * segments to two pages instead of one.
    1028              :  *
    1029              :  * This is different from the non-split cases in that this does not modify
    1030              :  * the original page directly, but writes to temporary in-memory copies of
    1031              :  * the new left and right pages.
    1032              :  */
    1033              : static void
    1034           76 : dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
    1035              :                          ItemPointerData lbound, ItemPointerData rbound,
    1036              :                          Page lpage, Page rpage)
    1037              : {
    1038              :     char       *ptr;
    1039              :     int         segsize;
    1040              :     int         lsize;
    1041              :     int         rsize;
    1042              :     dlist_node *node;
    1043              :     dlist_node *firstright;
    1044              :     leafSegmentInfo *seginfo;
    1045              : 
    1046              :     /* Initialize temporary pages to hold the new left and right pages */
    1047           76 :     GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
    1048           76 :     GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
    1049              : 
    1050              :     /*
    1051              :      * Copy the segments that go to the left page.
    1052              :      *
    1053              :      * XXX: We should skip copying the unmodified part of the left page, like
    1054              :      * we do when recompressing.
    1055              :      */
    1056           76 :     lsize = 0;
    1057           76 :     ptr = (char *) GinDataLeafPageGetPostingList(lpage);
    1058           76 :     firstright = dlist_next_node(&leaf->segments, leaf->lastleft);
    1059           76 :     for (node = dlist_head_node(&leaf->segments);
    1060         1815 :          node != firstright;
    1061         1739 :          node = dlist_next_node(&leaf->segments, node))
    1062              :     {
    1063         1739 :         seginfo = dlist_container(leafSegmentInfo, node, node);
    1064              : 
    1065         1739 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1066              :         {
    1067         1739 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1068         1739 :             memcpy(ptr, seginfo->seg, segsize);
    1069         1739 :             ptr += segsize;
    1070         1739 :             lsize += segsize;
    1071              :         }
    1072              :     }
    1073              :     Assert(lsize == leaf->lsize);
    1074           76 :     GinDataPageSetDataSize(lpage, lsize);
    1075           76 :     *GinDataPageGetRightBound(lpage) = lbound;
    1076              : 
    1077              :     /* Copy the segments that go to the right page */
    1078           76 :     ptr = (char *) GinDataLeafPageGetPostingList(rpage);
    1079           76 :     rsize = 0;
    1080           76 :     for (node = firstright;
    1081              :          ;
    1082          980 :          node = dlist_next_node(&leaf->segments, node))
    1083              :     {
    1084         1056 :         seginfo = dlist_container(leafSegmentInfo, node, node);
    1085              : 
    1086         1056 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1087              :         {
    1088         1056 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1089         1056 :             memcpy(ptr, seginfo->seg, segsize);
    1090         1056 :             ptr += segsize;
    1091         1056 :             rsize += segsize;
    1092              :         }
    1093              : 
    1094         1056 :         if (!dlist_has_next(&leaf->segments, node))
    1095           76 :             break;
    1096              :     }
    1097              :     Assert(rsize == leaf->rsize);
    1098           76 :     GinDataPageSetDataSize(rpage, rsize);
    1099           76 :     *GinDataPageGetRightBound(rpage) = rbound;
    1100           76 : }
    1101              : 
    1102              : /*
    1103              :  * Prepare to insert data on an internal data page.
    1104              :  *
    1105              :  * If it will fit, return GPTP_INSERT after doing whatever setup is needed
    1106              :  * before we enter the insertion critical section.  *ptp_workspace can be
    1107              :  * set to pass information along to the execPlaceToPage function.
    1108              :  *
    1109              :  * If it won't fit, perform a page split and return two temporary page
    1110              :  * images into *newlpage and *newrpage, with result GPTP_SPLIT.
    1111              :  *
    1112              :  * In neither case should the given page buffer be modified here.
    1113              :  *
    1114              :  * Note: on insertion to an internal node, in addition to inserting the given
    1115              :  * item, the downlink of the existing item at stack->off will be updated to
    1116              :  * point to updateblkno.
    1117              :  */
    1118              : static GinPlaceToPageRC
    1119           23 : dataBeginPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1120              :                              void *insertdata, BlockNumber updateblkno,
    1121              :                              void **ptp_workspace,
    1122              :                              Page *newlpage, Page *newrpage)
    1123              : {
    1124           23 :     Page        page = BufferGetPage(buf);
    1125              : 
    1126              :     /* If it doesn't fit, deal with split case */
    1127           23 :     if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
    1128              :     {
    1129            0 :         dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
    1130              :                               newlpage, newrpage);
    1131            0 :         return GPTP_SPLIT;
    1132              :     }
    1133              : 
    1134              :     /* Else, we're ready to proceed with insertion */
    1135           23 :     return GPTP_INSERT;
    1136              : }
    1137              : 
    1138              : /*
    1139              :  * Perform data insertion after beginPlaceToPage has decided it will fit.
    1140              :  *
    1141              :  * This is invoked within a critical section, and XLOG record creation (if
    1142              :  * needed) is already started.  The target buffer is registered in slot 0.
    1143              :  */
    1144              : static void
    1145           23 : dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1146              :                             void *insertdata, BlockNumber updateblkno,
    1147              :                             void *ptp_workspace)
    1148              : {
    1149           23 :     Page        page = BufferGetPage(buf);
    1150           23 :     OffsetNumber off = stack->off;
    1151              :     PostingItem *pitem;
    1152              : 
    1153              :     /* Update existing downlink to point to next page (on internal page) */
    1154           23 :     pitem = GinDataPageGetPostingItem(page, off);
    1155           23 :     PostingItemSetBlockNumber(pitem, updateblkno);
    1156              : 
    1157              :     /* Add new item */
    1158           23 :     pitem = (PostingItem *) insertdata;
    1159           23 :     GinDataPageAddPostingItem(page, pitem, off);
    1160              : 
    1161           23 :     MarkBufferDirty(buf);
    1162              : 
    1163           23 :     if (RelationNeedsWAL(btree->index) && !btree->isBuild)
    1164              :     {
    1165              :         /*
    1166              :          * This must be static, because it has to survive until XLogInsert,
    1167              :          * and we can't palloc here.  Ugly, but the XLogInsert infrastructure
    1168              :          * isn't reentrant anyway.
    1169              :          */
    1170              :         static ginxlogInsertDataInternal data;
    1171              : 
    1172            9 :         data.offset = off;
    1173            9 :         data.newitem = *pitem;
    1174              : 
    1175            9 :         XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
    1176            9 :         XLogRegisterBufData(0, &data,
    1177              :                             sizeof(ginxlogInsertDataInternal));
    1178              :     }
    1179           23 : }
    1180              : 
    1181              : /*
    1182              :  * Prepare to insert data on a posting-tree data page.
    1183              :  *
    1184              :  * If it will fit, return GPTP_INSERT after doing whatever setup is needed
    1185              :  * before we enter the insertion critical section.  *ptp_workspace can be
    1186              :  * set to pass information along to the execPlaceToPage function.
    1187              :  *
    1188              :  * If it won't fit, perform a page split and return two temporary page
    1189              :  * images into *newlpage and *newrpage, with result GPTP_SPLIT.
    1190              :  *
    1191              :  * In neither case should the given page buffer be modified here.
    1192              :  *
    1193              :  * Note: on insertion to an internal node, in addition to inserting the given
    1194              :  * item, the downlink of the existing item at stack->off will be updated to
    1195              :  * point to updateblkno.
    1196              :  *
    1197              :  * Calls relevant function for internal or leaf page because they are handled
    1198              :  * very differently.
    1199              :  */
    1200              : static GinPlaceToPageRC
    1201        24802 : dataBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1202              :                      void *insertdata, BlockNumber updateblkno,
    1203              :                      void **ptp_workspace,
    1204              :                      Page *newlpage, Page *newrpage)
    1205              : {
    1206        24802 :     Page        page = BufferGetPage(buf);
    1207              : 
    1208              :     Assert(GinPageIsData(page));
    1209              : 
    1210        24802 :     if (GinPageIsLeaf(page))
    1211        24779 :         return dataBeginPlaceToPageLeaf(btree, buf, stack, insertdata,
    1212              :                                         ptp_workspace,
    1213              :                                         newlpage, newrpage);
    1214              :     else
    1215           23 :         return dataBeginPlaceToPageInternal(btree, buf, stack,
    1216              :                                             insertdata, updateblkno,
    1217              :                                             ptp_workspace,
    1218              :                                             newlpage, newrpage);
    1219              : }
    1220              : 
    1221              : /*
    1222              :  * Perform data insertion after beginPlaceToPage has decided it will fit.
    1223              :  *
    1224              :  * This is invoked within a critical section, and XLOG record creation (if
    1225              :  * needed) is already started.  The target buffer is registered in slot 0.
    1226              :  *
    1227              :  * Calls relevant function for internal or leaf page because they are handled
    1228              :  * very differently.
    1229              :  */
    1230              : static void
    1231        24726 : dataExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1232              :                     void *insertdata, BlockNumber updateblkno,
    1233              :                     void *ptp_workspace)
    1234              : {
    1235        24726 :     Page        page = BufferGetPage(buf);
    1236              : 
    1237        24726 :     if (GinPageIsLeaf(page))
    1238        24703 :         dataExecPlaceToPageLeaf(btree, buf, stack, insertdata,
    1239              :                                 ptp_workspace);
    1240              :     else
    1241           23 :         dataExecPlaceToPageInternal(btree, buf, stack, insertdata,
    1242              :                                     updateblkno, ptp_workspace);
    1243        24726 : }
    1244              : 
    1245              : /*
    1246              :  * Split internal page and insert new data.
    1247              :  *
    1248              :  * Returns new temp pages to *newlpage and *newrpage.
    1249              :  * The original buffer is left untouched.
    1250              :  */
    1251              : static void
    1252            0 : dataSplitPageInternal(GinBtree btree, Buffer origbuf,
    1253              :                       GinBtreeStack *stack,
    1254              :                       void *insertdata, BlockNumber updateblkno,
    1255              :                       Page *newlpage, Page *newrpage)
    1256              : {
    1257            0 :     Page        oldpage = BufferGetPage(origbuf);
    1258            0 :     OffsetNumber off = stack->off;
    1259            0 :     int         nitems = GinPageGetOpaque(oldpage)->maxoff;
    1260              :     int         nleftitems;
    1261              :     int         nrightitems;
    1262            0 :     Size        pageSize = PageGetPageSize(oldpage);
    1263            0 :     ItemPointerData oldbound = *GinDataPageGetRightBound(oldpage);
    1264              :     ItemPointer bound;
    1265              :     Page        lpage;
    1266              :     Page        rpage;
    1267              :     OffsetNumber separator;
    1268              :     PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
    1269              : 
    1270            0 :     lpage = PageGetTempPage(oldpage);
    1271            0 :     rpage = PageGetTempPage(oldpage);
    1272            0 :     GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
    1273            0 :     GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
    1274              : 
    1275              :     /*
    1276              :      * First construct a new list of PostingItems, which includes all the old
    1277              :      * items, and the new item.
    1278              :      */
    1279            0 :     memcpy(allitems, GinDataPageGetPostingItem(oldpage, FirstOffsetNumber),
    1280            0 :            (off - 1) * sizeof(PostingItem));
    1281              : 
    1282            0 :     allitems[off - 1] = *((PostingItem *) insertdata);
    1283            0 :     memcpy(&allitems[off], GinDataPageGetPostingItem(oldpage, off),
    1284            0 :            (nitems - (off - 1)) * sizeof(PostingItem));
    1285            0 :     nitems++;
    1286              : 
    1287              :     /* Update existing downlink to point to next page */
    1288            0 :     PostingItemSetBlockNumber(&allitems[off], updateblkno);
    1289              : 
    1290              :     /*
    1291              :      * When creating a new index, fit as many tuples as possible on the left
    1292              :      * page, on the assumption that the table is scanned from beginning to
    1293              :      * end. This packs the index as tight as possible.
    1294              :      */
    1295            0 :     if (btree->isBuild && GinPageRightMost(oldpage))
    1296            0 :         separator = GinNonLeafDataPageGetFreeSpace(rpage) / sizeof(PostingItem);
    1297              :     else
    1298            0 :         separator = nitems / 2;
    1299            0 :     nleftitems = separator;
    1300            0 :     nrightitems = nitems - separator;
    1301              : 
    1302            0 :     memcpy(GinDataPageGetPostingItem(lpage, FirstOffsetNumber),
    1303              :            allitems,
    1304              :            nleftitems * sizeof(PostingItem));
    1305            0 :     GinPageGetOpaque(lpage)->maxoff = nleftitems;
    1306            0 :     memcpy(GinDataPageGetPostingItem(rpage, FirstOffsetNumber),
    1307            0 :            &allitems[separator],
    1308              :            nrightitems * sizeof(PostingItem));
    1309            0 :     GinPageGetOpaque(rpage)->maxoff = nrightitems;
    1310              : 
    1311              :     /*
    1312              :      * Also set pd_lower for both pages, like GinDataPageAddPostingItem does.
    1313              :      */
    1314            0 :     GinDataPageSetDataSize(lpage, nleftitems * sizeof(PostingItem));
    1315            0 :     GinDataPageSetDataSize(rpage, nrightitems * sizeof(PostingItem));
    1316              : 
    1317              :     /* set up right bound for left page */
    1318            0 :     bound = GinDataPageGetRightBound(lpage);
    1319            0 :     *bound = GinDataPageGetPostingItem(lpage, nleftitems)->key;
    1320              : 
    1321              :     /* set up right bound for right page */
    1322            0 :     *GinDataPageGetRightBound(rpage) = oldbound;
    1323              : 
    1324              :     /* return temp pages to caller */
    1325            0 :     *newlpage = lpage;
    1326            0 :     *newrpage = rpage;
    1327            0 : }
    1328              : 
    1329              : /*
    1330              :  * Construct insertion payload for inserting the downlink for given buffer.
    1331              :  */
    1332              : static void *
    1333           23 : dataPrepareDownlink(GinBtree btree, Buffer lbuf)
    1334              : {
    1335           23 :     PostingItem *pitem = palloc_object(PostingItem);
    1336           23 :     Page        lpage = BufferGetPage(lbuf);
    1337              : 
    1338           23 :     PostingItemSetBlockNumber(pitem, BufferGetBlockNumber(lbuf));
    1339           23 :     pitem->key = *GinDataPageGetRightBound(lpage);
    1340              : 
    1341           23 :     return pitem;
    1342              : }
    1343              : 
    1344              : /*
    1345              :  * Fills new root by right bound values from child.
    1346              :  * Also called from ginxlog, should not use btree
    1347              :  */
    1348              : void
    1349           53 : ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage)
    1350              : {
    1351              :     PostingItem li,
    1352              :                 ri;
    1353              : 
    1354           53 :     li.key = *GinDataPageGetRightBound(lpage);
    1355           53 :     PostingItemSetBlockNumber(&li, lblkno);
    1356           53 :     GinDataPageAddPostingItem(root, &li, InvalidOffsetNumber);
    1357              : 
    1358           53 :     ri.key = *GinDataPageGetRightBound(rpage);
    1359           53 :     PostingItemSetBlockNumber(&ri, rblkno);
    1360           53 :     GinDataPageAddPostingItem(root, &ri, InvalidOffsetNumber);
    1361           53 : }
    1362              : 
    1363              : 
    1364              : /*** Functions to work with disassembled leaf pages ***/
    1365              : 
    1366              : /*
    1367              :  * Disassemble page into a disassembledLeaf struct.
    1368              :  */
    1369              : static disassembledLeaf *
    1370        24806 : disassembleLeaf(Page page)
    1371              : {
    1372              :     disassembledLeaf *leaf;
    1373              :     GinPostingList *seg;
    1374              :     char       *segbegin;
    1375              :     char       *segend;
    1376              : 
    1377        24806 :     leaf = palloc0_object(disassembledLeaf);
    1378        24806 :     dlist_init(&leaf->segments);
    1379              : 
    1380        24806 :     if (GinPageIsCompressed(page))
    1381              :     {
    1382              :         /*
    1383              :          * Create a leafSegmentInfo entry for each segment.
    1384              :          */
    1385        24806 :         seg = GinDataLeafPageGetPostingList(page);
    1386        24806 :         segbegin = (char *) seg;
    1387        24806 :         segend = segbegin + GinDataLeafPageGetPostingListSize(page);
    1388       448239 :         while ((char *) seg < segend)
    1389              :         {
    1390       423433 :             leafSegmentInfo *seginfo = palloc_object(leafSegmentInfo);
    1391              : 
    1392       423433 :             seginfo->action = GIN_SEGMENT_UNMODIFIED;
    1393       423433 :             seginfo->seg = seg;
    1394       423433 :             seginfo->items = NULL;
    1395       423433 :             seginfo->nitems = 0;
    1396       423433 :             dlist_push_tail(&leaf->segments, &seginfo->node);
    1397              : 
    1398       423433 :             seg = GinNextPostingListSegment(seg);
    1399              :         }
    1400        24806 :         leaf->oldformat = false;
    1401              :     }
    1402              :     else
    1403              :     {
    1404              :         /*
    1405              :          * A pre-9.4 format uncompressed page is represented by a single
    1406              :          * segment, with an array of items.  The corner case is uncompressed
    1407              :          * page containing no items, which is represented as no segments.
    1408              :          */
    1409              :         ItemPointer uncompressed;
    1410              :         int         nuncompressed;
    1411              :         leafSegmentInfo *seginfo;
    1412              : 
    1413            0 :         uncompressed = dataLeafPageGetUncompressed(page, &nuncompressed);
    1414              : 
    1415            0 :         if (nuncompressed > 0)
    1416              :         {
    1417            0 :             seginfo = palloc_object(leafSegmentInfo);
    1418              : 
    1419            0 :             seginfo->action = GIN_SEGMENT_REPLACE;
    1420            0 :             seginfo->seg = NULL;
    1421            0 :             seginfo->items = palloc(nuncompressed * sizeof(ItemPointerData));
    1422            0 :             memcpy(seginfo->items, uncompressed, nuncompressed * sizeof(ItemPointerData));
    1423            0 :             seginfo->nitems = nuncompressed;
    1424              : 
    1425            0 :             dlist_push_tail(&leaf->segments, &seginfo->node);
    1426              :         }
    1427              : 
    1428            0 :         leaf->oldformat = true;
    1429              :     }
    1430              : 
    1431        24806 :     return leaf;
    1432              : }
    1433              : 
    1434              : /*
    1435              :  * Distribute newItems to the segments.
    1436              :  *
    1437              :  * Any segments that acquire new items are decoded, and the new items are
    1438              :  * merged with the old items.
    1439              :  *
    1440              :  * Returns true if any new items were added. False means they were all
    1441              :  * duplicates of existing items on the page.
    1442              :  */
    1443              : static bool
    1444        24779 : addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems, int nNewItems)
    1445              : {
    1446              :     dlist_iter  iter;
    1447        24779 :     ItemPointer nextnew = newItems;
    1448        24779 :     int         newleft = nNewItems;
    1449        24779 :     bool        modified = false;
    1450              :     leafSegmentInfo *newseg;
    1451              : 
    1452              :     /*
    1453              :      * If the page is completely empty, just construct one new segment to hold
    1454              :      * all the new items.
    1455              :      */
    1456        24779 :     if (dlist_is_empty(&leaf->segments))
    1457              :     {
    1458            0 :         newseg = palloc_object(leafSegmentInfo);
    1459            0 :         newseg->seg = NULL;
    1460            0 :         newseg->items = newItems;
    1461            0 :         newseg->nitems = nNewItems;
    1462            0 :         newseg->action = GIN_SEGMENT_INSERT;
    1463            0 :         dlist_push_tail(&leaf->segments, &newseg->node);
    1464            0 :         return true;
    1465              :     }
    1466              : 
    1467       422929 :     dlist_foreach(iter, &leaf->segments)
    1468              :     {
    1469       422929 :         leafSegmentInfo *cur = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node, iter.cur);
    1470              :         int         nthis;
    1471              :         ItemPointer tmpitems;
    1472              :         int         ntmpitems;
    1473              : 
    1474              :         /*
    1475              :          * How many of the new items fall into this segment?
    1476              :          */
    1477       422929 :         if (!dlist_has_next(&leaf->segments, iter.cur))
    1478        24779 :             nthis = newleft;
    1479              :         else
    1480              :         {
    1481              :             leafSegmentInfo *next;
    1482              :             ItemPointerData next_first;
    1483              : 
    1484       398150 :             next = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node,
    1485              :                                                        dlist_next_node(&leaf->segments, iter.cur));
    1486       398150 :             if (next->items)
    1487        24773 :                 next_first = next->items[0];
    1488              :             else
    1489              :             {
    1490              :                 Assert(next->seg != NULL);
    1491       373377 :                 next_first = next->seg->first;
    1492              :             }
    1493              : 
    1494       398150 :             nthis = 0;
    1495       398150 :             while (nthis < newleft && ginCompareItemPointers(&nextnew[nthis], &next_first) < 0)
    1496            0 :                 nthis++;
    1497              :         }
    1498       422929 :         if (nthis == 0)
    1499       398150 :             continue;
    1500              : 
    1501              :         /* Merge the new items with the existing items. */
    1502        24779 :         if (!cur->items)
    1503            0 :             cur->items = ginPostingListDecode(cur->seg, &cur->nitems);
    1504              : 
    1505              :         /*
    1506              :          * Fast path for the important special case that we're appending to
    1507              :          * the end of the page: don't let the last segment on the page grow
    1508              :          * larger than the target, create a new segment before that happens.
    1509              :          */
    1510        49558 :         if (!dlist_has_next(&leaf->segments, iter.cur) &&
    1511        24779 :             ginCompareItemPointers(&cur->items[cur->nitems - 1], &nextnew[0]) < 0 &&
    1512        24779 :             cur->seg != NULL &&
    1513        24779 :             SizeOfGinPostingList(cur->seg) >= GinPostingListSegmentTargetSize)
    1514              :         {
    1515          185 :             newseg = palloc_object(leafSegmentInfo);
    1516          185 :             newseg->seg = NULL;
    1517          185 :             newseg->items = nextnew;
    1518          185 :             newseg->nitems = nthis;
    1519          185 :             newseg->action = GIN_SEGMENT_INSERT;
    1520          185 :             dlist_push_tail(&leaf->segments, &newseg->node);
    1521          185 :             modified = true;
    1522        24779 :             break;
    1523              :         }
    1524              : 
    1525        24594 :         tmpitems = ginMergeItemPointers(cur->items, cur->nitems,
    1526              :                                         nextnew, nthis,
    1527              :                                         &ntmpitems);
    1528        24594 :         if (ntmpitems != cur->nitems)
    1529              :         {
    1530              :             /*
    1531              :              * If there are no duplicates, track the added items so that we
    1532              :              * can emit a compact ADDITEMS WAL record later on. (it doesn't
    1533              :              * seem worth re-checking which items were duplicates, if there
    1534              :              * were any)
    1535              :              */
    1536        24594 :             if (ntmpitems == nthis + cur->nitems &&
    1537        24594 :                 cur->action == GIN_SEGMENT_UNMODIFIED)
    1538              :             {
    1539        24594 :                 cur->action = GIN_SEGMENT_ADDITEMS;
    1540        24594 :                 cur->modifieditems = nextnew;
    1541        24594 :                 cur->nmodifieditems = nthis;
    1542              :             }
    1543              :             else
    1544            0 :                 cur->action = GIN_SEGMENT_REPLACE;
    1545              : 
    1546        24594 :             cur->items = tmpitems;
    1547        24594 :             cur->nitems = ntmpitems;
    1548        24594 :             cur->seg = NULL;
    1549        24594 :             modified = true;
    1550              :         }
    1551              : 
    1552        24594 :         nextnew += nthis;
    1553        24594 :         newleft -= nthis;
    1554        24594 :         if (newleft == 0)
    1555        24594 :             break;
    1556              :     }
    1557              : 
    1558        24779 :     return modified;
    1559              : }
    1560              : 
    1561              : /*
    1562              :  * Recompresses all segments that have been modified.
    1563              :  *
    1564              :  * If not all the items fit on two pages (ie. after split), we store as
    1565              :  * many items as fit, and set *remaining to the first item that didn't fit.
    1566              :  * If all items fit, *remaining is set to invalid.
    1567              :  *
    1568              :  * Returns true if the page has to be split.
    1569              :  */
    1570              : static bool
    1571        24779 : leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
    1572              : {
    1573        24779 :     int         pgused = 0;
    1574        24779 :     bool        needsplit = false;
    1575              :     dlist_iter  iter;
    1576              :     int         segsize;
    1577              :     leafSegmentInfo *nextseg;
    1578              :     int         npacked;
    1579              :     bool        modified;
    1580              :     dlist_node *cur_node;
    1581              :     dlist_node *next_node;
    1582              : 
    1583        24779 :     ItemPointerSetInvalid(remaining);
    1584              : 
    1585              :     /*
    1586              :      * cannot use dlist_foreach_modify here because we insert adjacent items
    1587              :      * while iterating.
    1588              :      */
    1589        24779 :     for (cur_node = dlist_head_node(&leaf->segments);
    1590       448808 :          cur_node != NULL;
    1591       424029 :          cur_node = next_node)
    1592              :     {
    1593       424049 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
    1594              :                                                    cur_node);
    1595              : 
    1596       424049 :         if (dlist_has_next(&leaf->segments, cur_node))
    1597       398335 :             next_node = dlist_next_node(&leaf->segments, cur_node);
    1598              :         else
    1599        25714 :             next_node = NULL;
    1600              : 
    1601              :         /* Compress the posting list, if necessary */
    1602       424049 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1603              :         {
    1604       424049 :             if (seginfo->seg == NULL)
    1605              :             {
    1606        25714 :                 if (seginfo->nitems > GinPostingListSegmentMaxSize)
    1607          952 :                     npacked = 0;    /* no chance that it would fit. */
    1608              :                 else
    1609              :                 {
    1610        24762 :                     seginfo->seg = ginCompressPostingList(seginfo->items,
    1611              :                                                           seginfo->nitems,
    1612              :                                                           GinPostingListSegmentMaxSize,
    1613              :                                                           &npacked);
    1614              :                 }
    1615        25714 :                 if (npacked != seginfo->nitems)
    1616              :                 {
    1617              :                     /*
    1618              :                      * Too large. Compress again to the target size, and
    1619              :                      * create a new segment to represent the remaining items.
    1620              :                      * The new segment is inserted after this one, so it will
    1621              :                      * be processed in the next iteration of this loop.
    1622              :                      */
    1623          955 :                     if (seginfo->seg)
    1624            3 :                         pfree(seginfo->seg);
    1625          955 :                     seginfo->seg = ginCompressPostingList(seginfo->items,
    1626              :                                                           seginfo->nitems,
    1627              :                                                           GinPostingListSegmentTargetSize,
    1628              :                                                           &npacked);
    1629          955 :                     if (seginfo->action != GIN_SEGMENT_INSERT)
    1630            9 :                         seginfo->action = GIN_SEGMENT_REPLACE;
    1631              : 
    1632          955 :                     nextseg = palloc_object(leafSegmentInfo);
    1633          955 :                     nextseg->action = GIN_SEGMENT_INSERT;
    1634          955 :                     nextseg->seg = NULL;
    1635          955 :                     nextseg->items = &seginfo->items[npacked];
    1636          955 :                     nextseg->nitems = seginfo->nitems - npacked;
    1637          955 :                     next_node = &nextseg->node;
    1638          955 :                     dlist_insert_after(cur_node, next_node);
    1639              :                 }
    1640              :             }
    1641              : 
    1642              :             /*
    1643              :              * If the segment is very small, merge it with the next segment.
    1644              :              */
    1645       424049 :             if (SizeOfGinPostingList(seginfo->seg) < GinPostingListSegmentMinSize && next_node)
    1646              :             {
    1647              :                 int         nmerged;
    1648              : 
    1649            0 :                 nextseg = dlist_container(leafSegmentInfo, node, next_node);
    1650              : 
    1651            0 :                 if (seginfo->items == NULL)
    1652            0 :                     seginfo->items = ginPostingListDecode(seginfo->seg,
    1653              :                                                           &seginfo->nitems);
    1654            0 :                 if (nextseg->items == NULL)
    1655            0 :                     nextseg->items = ginPostingListDecode(nextseg->seg,
    1656              :                                                           &nextseg->nitems);
    1657            0 :                 nextseg->items =
    1658            0 :                     ginMergeItemPointers(seginfo->items, seginfo->nitems,
    1659            0 :                                          nextseg->items, nextseg->nitems,
    1660              :                                          &nmerged);
    1661              :                 Assert(nmerged == seginfo->nitems + nextseg->nitems);
    1662            0 :                 nextseg->nitems = nmerged;
    1663            0 :                 nextseg->seg = NULL;
    1664              : 
    1665            0 :                 nextseg->action = GIN_SEGMENT_REPLACE;
    1666            0 :                 nextseg->modifieditems = NULL;
    1667            0 :                 nextseg->nmodifieditems = 0;
    1668              : 
    1669            0 :                 if (seginfo->action == GIN_SEGMENT_INSERT)
    1670              :                 {
    1671            0 :                     dlist_delete(cur_node);
    1672            0 :                     continue;
    1673              :                 }
    1674              :                 else
    1675              :                 {
    1676            0 :                     seginfo->action = GIN_SEGMENT_DELETE;
    1677            0 :                     seginfo->seg = NULL;
    1678              :                 }
    1679              :             }
    1680              : 
    1681       424049 :             seginfo->items = NULL;
    1682       424049 :             seginfo->nitems = 0;
    1683              :         }
    1684              : 
    1685       424049 :         if (seginfo->action == GIN_SEGMENT_DELETE)
    1686            0 :             continue;
    1687              : 
    1688              :         /*
    1689              :          * OK, we now have a compressed version of this segment ready for
    1690              :          * copying to the page. Did we exceed the size that fits on one page?
    1691              :          */
    1692       424049 :         segsize = SizeOfGinPostingList(seginfo->seg);
    1693       424049 :         if (pgused + segsize > GinDataPageMaxDataSize)
    1694              :         {
    1695           96 :             if (!needsplit)
    1696              :             {
    1697              :                 /* switch to right page */
    1698              :                 Assert(pgused > 0);
    1699           76 :                 leaf->lastleft = dlist_prev_node(&leaf->segments, cur_node);
    1700           76 :                 needsplit = true;
    1701           76 :                 leaf->lsize = pgused;
    1702           76 :                 pgused = 0;
    1703              :             }
    1704              :             else
    1705              :             {
    1706              :                 /*
    1707              :                  * Filled both pages. The last segment we constructed did not
    1708              :                  * fit.
    1709              :                  */
    1710           20 :                 *remaining = seginfo->seg->first;
    1711              : 
    1712              :                 /*
    1713              :                  * remove all segments that did not fit from the list.
    1714              :                  */
    1715           40 :                 while (dlist_has_next(&leaf->segments, cur_node))
    1716           20 :                     dlist_delete(dlist_next_node(&leaf->segments, cur_node));
    1717           20 :                 dlist_delete(cur_node);
    1718           20 :                 break;
    1719              :             }
    1720              :         }
    1721              : 
    1722       424029 :         pgused += segsize;
    1723              :     }
    1724              : 
    1725        24779 :     if (!needsplit)
    1726              :     {
    1727        24703 :         leaf->lsize = pgused;
    1728        24703 :         leaf->rsize = 0;
    1729              :     }
    1730              :     else
    1731           76 :         leaf->rsize = pgused;
    1732              : 
    1733              :     Assert(leaf->lsize <= GinDataPageMaxDataSize);
    1734              :     Assert(leaf->rsize <= GinDataPageMaxDataSize);
    1735              : 
    1736              :     /*
    1737              :      * Make a palloc'd copy of every segment after the first modified one,
    1738              :      * because as we start copying items to the original page, we might
    1739              :      * overwrite an existing segment.
    1740              :      */
    1741        24779 :     modified = false;
    1742       448808 :     dlist_foreach(iter, &leaf->segments)
    1743              :     {
    1744       424029 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
    1745              :                                                    iter.cur);
    1746              : 
    1747       424029 :         if (!modified && seginfo->action != GIN_SEGMENT_UNMODIFIED)
    1748              :         {
    1749        24779 :             modified = true;
    1750              :         }
    1751       399250 :         else if (modified && seginfo->action == GIN_SEGMENT_UNMODIFIED)
    1752              :         {
    1753              :             GinPostingList *tmp;
    1754              : 
    1755            0 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1756            0 :             tmp = palloc(segsize);
    1757            0 :             memcpy(tmp, seginfo->seg, segsize);
    1758            0 :             seginfo->seg = tmp;
    1759              :         }
    1760              :     }
    1761              : 
    1762        24779 :     return needsplit;
    1763              : }
    1764              : 
    1765              : 
    1766              : /*** Functions that are exported to the rest of the GIN code ***/
    1767              : 
    1768              : /*
    1769              :  * Creates new posting tree containing the given TIDs. Returns the page
    1770              :  * number of the root of the new posting tree.
    1771              :  *
    1772              :  * items[] must be in sorted order with no duplicates.
    1773              :  */
    1774              : BlockNumber
    1775           65 : createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
    1776              :                   GinStatsData *buildStats, Buffer entrybuffer)
    1777              : {
    1778              :     BlockNumber blkno;
    1779              :     Buffer      buffer;
    1780              :     Page        tmppage;
    1781              :     Page        page;
    1782              :     char       *ptr;
    1783              :     int         nrootitems;
    1784              :     int         rootsize;
    1785           65 :     bool        is_build = (buildStats != NULL);
    1786              : 
    1787              :     /* Construct the new root page in memory first. */
    1788           65 :     tmppage = (Page) palloc(BLCKSZ);
    1789           65 :     GinInitPage(tmppage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
    1790           65 :     GinPageGetOpaque(tmppage)->rightlink = InvalidBlockNumber;
    1791              : 
    1792              :     /*
    1793              :      * Write as many of the items to the root page as fit. In segments of max
    1794              :      * GinPostingListSegmentMaxSize bytes each.
    1795              :      */
    1796           65 :     nrootitems = 0;
    1797           65 :     rootsize = 0;
    1798           65 :     ptr = (char *) GinDataLeafPageGetPostingList(tmppage);
    1799         1230 :     while (nrootitems < nitems)
    1800              :     {
    1801              :         GinPostingList *segment;
    1802              :         int         npacked;
    1803              :         int         segsize;
    1804              : 
    1805         1216 :         segment = ginCompressPostingList(&items[nrootitems],
    1806         1216 :                                          nitems - nrootitems,
    1807              :                                          GinPostingListSegmentMaxSize,
    1808              :                                          &npacked);
    1809         1216 :         segsize = SizeOfGinPostingList(segment);
    1810         1216 :         if (rootsize + segsize > GinDataPageMaxDataSize)
    1811           51 :             break;
    1812              : 
    1813         1165 :         memcpy(ptr, segment, segsize);
    1814         1165 :         ptr += segsize;
    1815         1165 :         rootsize += segsize;
    1816         1165 :         nrootitems += npacked;
    1817         1165 :         pfree(segment);
    1818              :     }
    1819           65 :     GinDataPageSetDataSize(tmppage, rootsize);
    1820              : 
    1821              :     /*
    1822              :      * All set. Get a new physical page, and copy the in-memory page to it.
    1823              :      */
    1824           65 :     buffer = GinNewBuffer(index);
    1825           65 :     page = BufferGetPage(buffer);
    1826           65 :     blkno = BufferGetBlockNumber(buffer);
    1827              : 
    1828              :     /*
    1829              :      * Copy any predicate locks from the entry tree leaf (containing posting
    1830              :      * list) to the posting tree.
    1831              :      */
    1832           65 :     PredicateLockPageSplit(index, BufferGetBlockNumber(entrybuffer), blkno);
    1833              : 
    1834           65 :     START_CRIT_SECTION();
    1835              : 
    1836           65 :     PageRestoreTempPage(tmppage, page);
    1837           65 :     MarkBufferDirty(buffer);
    1838              : 
    1839           65 :     if (RelationNeedsWAL(index) && !is_build)
    1840              :     {
    1841              :         XLogRecPtr  recptr;
    1842              :         ginxlogCreatePostingTree data;
    1843              : 
    1844           20 :         data.size = rootsize;
    1845              : 
    1846           20 :         XLogBeginInsert();
    1847           20 :         XLogRegisterData(&data, sizeof(ginxlogCreatePostingTree));
    1848              : 
    1849           20 :         XLogRegisterData(GinDataLeafPageGetPostingList(page),
    1850              :                          rootsize);
    1851           20 :         XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
    1852              : 
    1853           20 :         recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE);
    1854           20 :         PageSetLSN(page, recptr);
    1855              :     }
    1856              : 
    1857           65 :     END_CRIT_SECTION();
    1858              : 
    1859           65 :     UnlockReleaseBuffer(buffer);
    1860              : 
    1861              :     /* During index build, count the newly-added data page */
    1862           65 :     if (buildStats)
    1863           39 :         buildStats->nDataPages++;
    1864              : 
    1865           65 :     elog(DEBUG2, "created GIN posting tree with %d items", nrootitems);
    1866              : 
    1867              :     /*
    1868              :      * Add any remaining TIDs to the newly-created posting tree.
    1869              :      */
    1870           65 :     if (nitems > nrootitems)
    1871              :     {
    1872           51 :         ginInsertItemPointers(index, blkno,
    1873           51 :                               items + nrootitems,
    1874              :                               nitems - nrootitems,
    1875              :                               buildStats);
    1876              :     }
    1877              : 
    1878           65 :     return blkno;
    1879              : }
    1880              : 
    1881              : static void
    1882        24799 : ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno)
    1883              : {
    1884        24799 :     memset(btree, 0, sizeof(GinBtreeData));
    1885              : 
    1886        24799 :     btree->index = index;
    1887        24799 :     btree->rootBlkno = rootBlkno;
    1888              : 
    1889        24799 :     btree->findChildPage = dataLocateItem;
    1890        24799 :     btree->getLeftMostChild = dataGetLeftMostPage;
    1891        24799 :     btree->isMoveRight = dataIsMoveRight;
    1892        24799 :     btree->findItem = NULL;
    1893        24799 :     btree->findChildPtr = dataFindChildPtr;
    1894        24799 :     btree->beginPlaceToPage = dataBeginPlaceToPage;
    1895        24799 :     btree->execPlaceToPage = dataExecPlaceToPage;
    1896        24799 :     btree->fillRoot = ginDataFillRoot;
    1897        24799 :     btree->prepareDownlink = dataPrepareDownlink;
    1898              : 
    1899        24799 :     btree->isData = true;
    1900        24799 :     btree->fullScan = false;
    1901        24799 :     btree->isBuild = false;
    1902        24799 : }
    1903              : 
    1904              : /*
    1905              :  * Inserts array of item pointers, may execute several tree scan (very rare)
    1906              :  */
    1907              : void
    1908        24761 : ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
    1909              :                       ItemPointerData *items, uint32 nitem,
    1910              :                       GinStatsData *buildStats)
    1911              : {
    1912              :     GinBtreeData btree;
    1913              :     GinBtreeDataLeafInsertData insertdata;
    1914              :     GinBtreeStack *stack;
    1915              : 
    1916        24761 :     ginPrepareDataScan(&btree, index, rootBlkno);
    1917        24761 :     btree.isBuild = (buildStats != NULL);
    1918        24761 :     insertdata.items = items;
    1919        24761 :     insertdata.nitem = nitem;
    1920        24761 :     insertdata.curitem = 0;
    1921              : 
    1922        49540 :     while (insertdata.curitem < insertdata.nitem)
    1923              :     {
    1924              :         /* search for the leaf page where the first item should go to */
    1925        24781 :         btree.itemptr = insertdata.items[insertdata.curitem];
    1926        24781 :         stack = ginFindLeafPage(&btree, false, true);
    1927              : 
    1928        24779 :         ginInsertValue(&btree, stack, &insertdata, buildStats);
    1929              :     }
    1930        24759 : }
    1931              : 
    1932              : /*
    1933              :  * Starts a new scan on a posting tree.
    1934              :  */
    1935              : GinBtreeStack *
    1936           38 : ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno)
    1937              : {
    1938              :     GinBtreeStack *stack;
    1939              : 
    1940           38 :     ginPrepareDataScan(btree, index, rootBlkno);
    1941              : 
    1942           38 :     btree->fullScan = true;
    1943              : 
    1944           38 :     stack = ginFindLeafPage(btree, true, false);
    1945              : 
    1946           38 :     return stack;
    1947              : }
        

Generated by: LCOV version 2.0-1