LCOV - code coverage report
Current view: top level - src/backend/access/gin - gindatapage.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 531 661 80.3 %
Date: 2025-01-18 04:15:08 Functions: 27 29 93.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * gindatapage.c
       4             :  *    routines for handling GIN posting tree pages.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *          src/backend/access/gin/gindatapage.c
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/gin_private.h"
      18             : #include "access/ginxlog.h"
      19             : #include "access/xloginsert.h"
      20             : #include "lib/ilist.h"
      21             : #include "miscadmin.h"
      22             : #include "storage/predicate.h"
      23             : #include "utils/rel.h"
      24             : 
      25             : /*
      26             :  * Min, Max and Target size of posting lists stored on leaf pages, in bytes.
      27             :  *
      28             :  * The code can deal with any size, but random access is more efficient when
      29             :  * a number of smaller lists are stored, rather than one big list. If a
      30             :  * posting list would become larger than Max size as a result of insertions,
      31             :  * it is split into two. If a posting list would be smaller than minimum
      32             :  * size, it is merged with the next posting list.
      33             :  */
      34             : #define GinPostingListSegmentMaxSize 384
      35             : #define GinPostingListSegmentTargetSize 256
      36             : #define GinPostingListSegmentMinSize 128
      37             : 
      38             : /*
      39             :  * At least this many items fit in a GinPostingListSegmentMaxSize-bytes
      40             :  * long segment. This is used when estimating how much space is required
      41             :  * for N items, at minimum.
      42             :  */
      43             : #define MinTuplesPerSegment ((GinPostingListSegmentMaxSize - 2) / 6)
      44             : 
      45             : /*
      46             :  * A working struct for manipulating a posting tree leaf page.
      47             :  */
      48             : typedef struct
      49             : {
      50             :     dlist_head  segments;       /* a list of leafSegmentInfos */
      51             : 
      52             :     /*
      53             :      * The following fields represent how the segments are split across pages,
      54             :      * if a page split is required. Filled in by leafRepackItems.
      55             :      */
      56             :     dlist_node *lastleft;       /* last segment on left page */
      57             :     int         lsize;          /* total size on left page */
      58             :     int         rsize;          /* total size on right page */
      59             : 
      60             :     bool        oldformat;      /* page is in pre-9.4 format on disk */
      61             : 
      62             :     /*
      63             :      * If we need WAL data representing the reconstructed leaf page, it's
      64             :      * stored here by computeLeafRecompressWALData.
      65             :      */
      66             :     char       *walinfo;        /* buffer start */
      67             :     int         walinfolen;     /* and length */
      68             : } disassembledLeaf;
      69             : 
      70             : typedef struct
      71             : {
      72             :     dlist_node  node;           /* linked list pointers */
      73             : 
      74             :     /*-------------
      75             :      * 'action' indicates the status of this in-memory segment, compared to
      76             :      * what's on disk. It is one of the GIN_SEGMENT_* action codes:
      77             :      *
      78             :      * UNMODIFIED   no changes
      79             :      * DELETE       the segment is to be removed. 'seg' and 'items' are
      80             :      *              ignored
      81             :      * INSERT       this is a completely new segment
      82             :      * REPLACE      this replaces an existing segment with new content
      83             :      * ADDITEMS     like REPLACE, but no items have been removed, and we track
      84             :      *              in detail what items have been added to this segment, in
      85             :      *              'modifieditems'
      86             :      *-------------
      87             :      */
      88             :     char        action;
      89             : 
      90             :     ItemPointerData *modifieditems;
      91             :     uint16      nmodifieditems;
      92             : 
      93             :     /*
      94             :      * The following fields represent the items in this segment. If 'items' is
      95             :      * not NULL, it contains a palloc'd array of the items in this segment. If
      96             :      * 'seg' is not NULL, it contains the items in an already-compressed
      97             :      * format. It can point to an on-disk page (!modified), or a palloc'd
      98             :      * segment in memory. If both are set, they must represent the same items.
      99             :      */
     100             :     GinPostingList *seg;
     101             :     ItemPointer items;
     102             :     int         nitems;         /* # of items in 'items', if items != NULL */
     103             : } leafSegmentInfo;
     104             : 
     105             : static ItemPointer dataLeafPageGetUncompressed(Page page, int *nitems);
     106             : static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
     107             :                                   GinBtreeStack *stack,
     108             :                                   void *insertdata, BlockNumber updateblkno,
     109             :                                   Page *newlpage, Page *newrpage);
     110             : 
     111             : static disassembledLeaf *disassembleLeaf(Page page);
     112             : static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
     113             : static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
     114             :                            int nNewItems);
     115             : 
     116             : static void computeLeafRecompressWALData(disassembledLeaf *leaf);
     117             : static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
     118             : static void dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
     119             :                                      ItemPointerData lbound, ItemPointerData rbound,
     120             :                                      Page lpage, Page rpage);
     121             : 
     122             : /*
     123             :  * Read TIDs from leaf data page to single uncompressed array. The TIDs are
     124             :  * returned in ascending order.
     125             :  *
     126             :  * advancePast is a hint, indicating that the caller is only interested in
     127             :  * TIDs > advancePast. To return all items, use ItemPointerSetMin.
     128             :  *
     129             :  * Note: This function can still return items smaller than advancePast that
     130             :  * are in the same posting list as the items of interest, so the caller must
     131             :  * still check all the returned items. But passing it allows this function to
     132             :  * skip whole posting lists.
     133             :  */
     134             : ItemPointer
     135         164 : GinDataLeafPageGetItems(Page page, int *nitems, ItemPointerData advancePast)
     136             : {
     137             :     ItemPointer result;
     138             : 
     139         164 :     if (GinPageIsCompressed(page))
     140             :     {
     141         164 :         GinPostingList *seg = GinDataLeafPageGetPostingList(page);
     142         164 :         Size        len = GinDataLeafPageGetPostingListSize(page);
     143         164 :         Pointer     endptr = ((Pointer) seg) + len;
     144             :         GinPostingList *next;
     145             : 
     146             :         /* Skip to the segment containing advancePast+1 */
     147         164 :         if (ItemPointerIsValid(&advancePast))
     148             :         {
     149          94 :             next = GinNextPostingListSegment(seg);
     150         178 :             while ((Pointer) next < endptr &&
     151          78 :                    ginCompareItemPointers(&next->first, &advancePast) <= 0)
     152             :             {
     153           6 :                 seg = next;
     154           6 :                 next = GinNextPostingListSegment(seg);
     155             :             }
     156          94 :             len = endptr - (Pointer) seg;
     157             :         }
     158             : 
     159         164 :         if (len > 0)
     160         152 :             result = ginPostingListDecodeAllSegments(seg, len, nitems);
     161             :         else
     162             :         {
     163          12 :             result = NULL;
     164          12 :             *nitems = 0;
     165             :         }
     166             :     }
     167             :     else
     168             :     {
     169           0 :         ItemPointer tmp = dataLeafPageGetUncompressed(page, nitems);
     170             : 
     171           0 :         result = palloc((*nitems) * sizeof(ItemPointerData));
     172           0 :         memcpy(result, tmp, (*nitems) * sizeof(ItemPointerData));
     173             :     }
     174             : 
     175         164 :     return result;
     176             : }
     177             : 
     178             : /*
     179             :  * Places all TIDs from leaf data page to bitmap.
     180             :  */
     181             : int
     182          30 : GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm)
     183             : {
     184             :     ItemPointer uncompressed;
     185             :     int         nitems;
     186             : 
     187          30 :     if (GinPageIsCompressed(page))
     188             :     {
     189          30 :         GinPostingList *segment = GinDataLeafPageGetPostingList(page);
     190          30 :         Size        len = GinDataLeafPageGetPostingListSize(page);
     191             : 
     192          30 :         nitems = ginPostingListDecodeAllSegmentsToTbm(segment, len, tbm);
     193             :     }
     194             :     else
     195             :     {
     196           0 :         uncompressed = dataLeafPageGetUncompressed(page, &nitems);
     197             : 
     198           0 :         if (nitems > 0)
     199           0 :             tbm_add_tuples(tbm, uncompressed, nitems, false);
     200             :     }
     201             : 
     202          30 :     return nitems;
     203             : }
     204             : 
     205             : /*
     206             :  * Get pointer to the uncompressed array of items on a pre-9.4 format
     207             :  * uncompressed leaf page. The number of items in the array is returned in
     208             :  * *nitems.
     209             :  */
     210             : static ItemPointer
     211           0 : dataLeafPageGetUncompressed(Page page, int *nitems)
     212             : {
     213             :     ItemPointer items;
     214             : 
     215             :     Assert(!GinPageIsCompressed(page));
     216             : 
     217             :     /*
     218             :      * In the old pre-9.4 page format, the whole page content is used for
     219             :      * uncompressed items, and the number of items is stored in 'maxoff'
     220             :      */
     221           0 :     items = (ItemPointer) GinDataPageGetData(page);
     222           0 :     *nitems = GinPageGetOpaque(page)->maxoff;
     223             : 
     224           0 :     return items;
     225             : }
     226             : 
     227             : /*
     228             :  * Check if we should follow the right link to find the item we're searching
     229             :  * for.
     230             :  *
     231             :  * Compares inserting item pointer with the right bound of the current page.
     232             :  */
     233             : static bool
     234       26616 : dataIsMoveRight(GinBtree btree, Page page)
     235             : {
     236       26616 :     ItemPointer iptr = GinDataPageGetRightBound(page);
     237             : 
     238       26616 :     if (GinPageRightMost(page))
     239       26616 :         return false;
     240             : 
     241           0 :     if (GinPageIsDeleted(page))
     242           0 :         return true;
     243             : 
     244           0 :     return (ginCompareItemPointers(&btree->itemptr, iptr) > 0);
     245             : }
     246             : 
     247             : /*
     248             :  * Find correct PostingItem in non-leaf page. It is assumed that this is
     249             :  * the correct page, and the searched value SHOULD be on the page.
     250             :  */
     251             : static BlockNumber
     252       26692 : dataLocateItem(GinBtree btree, GinBtreeStack *stack)
     253             : {
     254             :     OffsetNumber low,
     255             :                 high,
     256             :                 maxoff;
     257       26692 :     PostingItem *pitem = NULL;
     258             :     int         result;
     259       26692 :     Page        page = BufferGetPage(stack->buffer);
     260             : 
     261             :     Assert(!GinPageIsLeaf(page));
     262             :     Assert(GinPageIsData(page));
     263             : 
     264       26692 :     if (btree->fullScan)
     265             :     {
     266          76 :         stack->off = FirstOffsetNumber;
     267          76 :         stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
     268          76 :         return btree->getLeftMostChild(btree, page);
     269             :     }
     270             : 
     271       26616 :     low = FirstOffsetNumber;
     272       26616 :     maxoff = high = GinPageGetOpaque(page)->maxoff;
     273             :     Assert(high >= low);
     274             : 
     275       26616 :     high++;
     276             : 
     277       79848 :     while (high > low)
     278             :     {
     279       53232 :         OffsetNumber mid = low + ((high - low) / 2);
     280             : 
     281       53232 :         pitem = GinDataPageGetPostingItem(page, mid);
     282             : 
     283       53232 :         if (mid == maxoff)
     284             :         {
     285             :             /*
     286             :              * Right infinity, page already correctly chosen with a help of
     287             :              * dataIsMoveRight
     288             :              */
     289       26616 :             result = -1;
     290             :         }
     291             :         else
     292             :         {
     293       26616 :             pitem = GinDataPageGetPostingItem(page, mid);
     294       26616 :             result = ginCompareItemPointers(&btree->itemptr, &(pitem->key));
     295             :         }
     296             : 
     297       53232 :         if (result == 0)
     298             :         {
     299           0 :             stack->off = mid;
     300           0 :             return PostingItemGetBlockNumber(pitem);
     301             :         }
     302       53232 :         else if (result > 0)
     303       26616 :             low = mid + 1;
     304             :         else
     305       26616 :             high = mid;
     306             :     }
     307             : 
     308             :     Assert(high >= FirstOffsetNumber && high <= maxoff);
     309             : 
     310       26616 :     stack->off = high;
     311       26616 :     pitem = GinDataPageGetPostingItem(page, high);
     312       26616 :     return PostingItemGetBlockNumber(pitem);
     313             : }
     314             : 
     315             : /*
     316             :  * Find link to blkno on non-leaf page, returns offset of PostingItem
     317             :  */
     318             : static OffsetNumber
     319          46 : dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
     320             : {
     321             :     OffsetNumber i,
     322          46 :                 maxoff = GinPageGetOpaque(page)->maxoff;
     323             :     PostingItem *pitem;
     324             : 
     325             :     Assert(!GinPageIsLeaf(page));
     326             :     Assert(GinPageIsData(page));
     327             : 
     328             :     /* if page isn't changed, we return storedOff */
     329          46 :     if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
     330             :     {
     331          46 :         pitem = GinDataPageGetPostingItem(page, storedOff);
     332          46 :         if (PostingItemGetBlockNumber(pitem) == blkno)
     333          46 :             return storedOff;
     334             : 
     335             :         /*
     336             :          * we hope, that needed pointer goes to right. It's true if there
     337             :          * wasn't a deletion
     338             :          */
     339           0 :         for (i = storedOff + 1; i <= maxoff; i++)
     340             :         {
     341           0 :             pitem = GinDataPageGetPostingItem(page, i);
     342           0 :             if (PostingItemGetBlockNumber(pitem) == blkno)
     343           0 :                 return i;
     344             :         }
     345             : 
     346           0 :         maxoff = storedOff - 1;
     347             :     }
     348             : 
     349             :     /* last chance */
     350           0 :     for (i = FirstOffsetNumber; i <= maxoff; i++)
     351             :     {
     352           0 :         pitem = GinDataPageGetPostingItem(page, i);
     353           0 :         if (PostingItemGetBlockNumber(pitem) == blkno)
     354           0 :             return i;
     355             :     }
     356             : 
     357           0 :     return InvalidOffsetNumber;
     358             : }
     359             : 
     360             : /*
     361             :  * Return blkno of leftmost child
     362             :  */
     363             : static BlockNumber
     364          76 : dataGetLeftMostPage(GinBtree btree, Page page)
     365             : {
     366             :     PostingItem *pitem;
     367             : 
     368             :     Assert(!GinPageIsLeaf(page));
     369             :     Assert(GinPageIsData(page));
     370             :     Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
     371             : 
     372          76 :     pitem = GinDataPageGetPostingItem(page, FirstOffsetNumber);
     373          76 :     return PostingItemGetBlockNumber(pitem);
     374             : }
     375             : 
     376             : /*
     377             :  * Add PostingItem to a non-leaf page.
     378             :  */
     379             : void
     380         258 : GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
     381             : {
     382         258 :     OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
     383             :     char       *ptr;
     384             : 
     385             :     Assert(PostingItemGetBlockNumber(data) != InvalidBlockNumber);
     386             :     Assert(!GinPageIsLeaf(page));
     387             : 
     388         258 :     if (offset == InvalidOffsetNumber)
     389             :     {
     390         208 :         ptr = (char *) GinDataPageGetPostingItem(page, maxoff + 1);
     391             :     }
     392             :     else
     393             :     {
     394          50 :         ptr = (char *) GinDataPageGetPostingItem(page, offset);
     395          50 :         if (offset != maxoff + 1)
     396          50 :             memmove(ptr + sizeof(PostingItem),
     397             :                     ptr,
     398          50 :                     (maxoff - offset + 1) * sizeof(PostingItem));
     399             :     }
     400         258 :     memcpy(ptr, data, sizeof(PostingItem));
     401             : 
     402         258 :     maxoff++;
     403         258 :     GinPageGetOpaque(page)->maxoff = maxoff;
     404             : 
     405             :     /*
     406             :      * Also set pd_lower to the end of the posting items, to follow the
     407             :      * "standard" page layout, so that we can squeeze out the unused space
     408             :      * from full-page images.
     409             :      */
     410         258 :     GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
     411         258 : }
     412             : 
     413             : /*
     414             :  * Delete posting item from non-leaf page
     415             :  */
     416             : void
     417          12 : GinPageDeletePostingItem(Page page, OffsetNumber offset)
     418             : {
     419          12 :     OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
     420             : 
     421             :     Assert(!GinPageIsLeaf(page));
     422             :     Assert(offset >= FirstOffsetNumber && offset <= maxoff);
     423             : 
     424          12 :     if (offset != maxoff)
     425          12 :         memmove(GinDataPageGetPostingItem(page, offset),
     426          12 :                 GinDataPageGetPostingItem(page, offset + 1),
     427          12 :                 sizeof(PostingItem) * (maxoff - offset));
     428             : 
     429          12 :     maxoff--;
     430          12 :     GinPageGetOpaque(page)->maxoff = maxoff;
     431             : 
     432          12 :     GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
     433          12 : }
     434             : 
     435             : /*
     436             :  * Prepare to insert data on a leaf data page.
     437             :  *
     438             :  * If it will fit, return GPTP_INSERT after doing whatever setup is needed
     439             :  * before we enter the insertion critical section.  *ptp_workspace can be
     440             :  * set to pass information along to the execPlaceToPage function.
     441             :  *
     442             :  * If it won't fit, perform a page split and return two temporary page
     443             :  * images into *newlpage and *newrpage, with result GPTP_SPLIT.
     444             :  *
     445             :  * In neither case should the given page buffer be modified here.
     446             :  */
     447             : static GinPlaceToPageRC
     448       49556 : dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
     449             :                          void *insertdata,
     450             :                          void **ptp_workspace,
     451             :                          Page *newlpage, Page *newrpage)
     452             : {
     453       49556 :     GinBtreeDataLeafInsertData *items = insertdata;
     454       49556 :     ItemPointer newItems = &items->items[items->curitem];
     455       49556 :     int         maxitems = items->nitem - items->curitem;
     456       49556 :     Page        page = BufferGetPage(buf);
     457             :     int         i;
     458             :     ItemPointerData rbound;
     459             :     ItemPointerData lbound;
     460             :     bool        needsplit;
     461             :     bool        append;
     462             :     int         segsize;
     463             :     Size        freespace;
     464             :     disassembledLeaf *leaf;
     465             :     leafSegmentInfo *lastleftinfo;
     466             :     ItemPointerData maxOldItem;
     467             :     ItemPointerData remaining;
     468             : 
     469       49556 :     rbound = *GinDataPageGetRightBound(page);
     470             : 
     471             :     /*
     472             :      * Count how many of the new items belong to this page.
     473             :      */
     474       49556 :     if (!GinPageRightMost(page))
     475             :     {
     476           0 :         for (i = 0; i < maxitems; i++)
     477             :         {
     478           0 :             if (ginCompareItemPointers(&newItems[i], &rbound) > 0)
     479             :             {
     480             :                 /*
     481             :                  * This needs to go to some other location in the tree. (The
     482             :                  * caller should've chosen the insert location so that at
     483             :                  * least the first item goes here.)
     484             :                  */
     485             :                 Assert(i > 0);
     486           0 :                 break;
     487             :             }
     488             :         }
     489           0 :         maxitems = i;
     490             :     }
     491             : 
     492             :     /* Disassemble the data on the page */
     493       49556 :     leaf = disassembleLeaf(page);
     494             : 
     495             :     /*
     496             :      * Are we appending to the end of the page? IOW, are all the new items
     497             :      * larger than any of the existing items.
     498             :      */
     499       49556 :     if (!dlist_is_empty(&leaf->segments))
     500             :     {
     501       49556 :         lastleftinfo = dlist_container(leafSegmentInfo, node,
     502             :                                        dlist_tail_node(&leaf->segments));
     503       49556 :         if (!lastleftinfo->items)
     504       49556 :             lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
     505             :                                                        &lastleftinfo->nitems);
     506       49556 :         maxOldItem = lastleftinfo->items[lastleftinfo->nitems - 1];
     507       49556 :         if (ginCompareItemPointers(&newItems[0], &maxOldItem) >= 0)
     508       49556 :             append = true;
     509             :         else
     510           0 :             append = false;
     511             :     }
     512             :     else
     513             :     {
     514           0 :         ItemPointerSetMin(&maxOldItem);
     515           0 :         append = true;
     516             :     }
     517             : 
     518             :     /*
     519             :      * If we're appending to the end of the page, we will append as many items
     520             :      * as we can fit (after splitting), and stop when the pages becomes full.
     521             :      * Otherwise we have to limit the number of new items to insert, because
     522             :      * once we start packing we can't just stop when we run out of space,
     523             :      * because we must make sure that all the old items still fit.
     524             :      */
     525       49556 :     if (GinPageIsCompressed(page))
     526       49556 :         freespace = GinDataLeafPageGetFreeSpace(page);
     527             :     else
     528           0 :         freespace = 0;
     529       49556 :     if (append)
     530             :     {
     531             :         /*
     532             :          * Even when appending, trying to append more items than will fit is
     533             :          * not completely free, because we will merge the new items and old
     534             :          * items into an array below. In the best case, every new item fits in
     535             :          * a single byte, and we can use all the free space on the old page as
     536             :          * well as the new page. For simplicity, ignore segment overhead etc.
     537             :          */
     538       49556 :         maxitems = Min(maxitems, freespace + GinDataPageMaxDataSize);
     539             :     }
     540             :     else
     541             :     {
     542             :         /*
     543             :          * Calculate a conservative estimate of how many new items we can fit
     544             :          * on the two pages after splitting.
     545             :          *
     546             :          * We can use any remaining free space on the old page to store full
     547             :          * segments, as well as the new page. Each full-sized segment can hold
     548             :          * at least MinTuplesPerSegment items
     549             :          */
     550             :         int         nnewsegments;
     551             : 
     552           0 :         nnewsegments = freespace / GinPostingListSegmentMaxSize;
     553           0 :         nnewsegments += GinDataPageMaxDataSize / GinPostingListSegmentMaxSize;
     554           0 :         maxitems = Min(maxitems, nnewsegments * MinTuplesPerSegment);
     555             :     }
     556             : 
     557             :     /* Add the new items to the segment list */
     558       49556 :     if (!addItemsToLeaf(leaf, newItems, maxitems))
     559             :     {
     560             :         /* all items were duplicates, we have nothing to do */
     561           0 :         items->curitem += maxitems;
     562             : 
     563           0 :         return GPTP_NO_WORK;
     564             :     }
     565             : 
     566             :     /*
     567             :      * Pack the items back to compressed segments, ready for writing to disk.
     568             :      */
     569       49556 :     needsplit = leafRepackItems(leaf, &remaining);
     570             : 
     571             :     /*
     572             :      * Did all the new items fit?
     573             :      *
     574             :      * If we're appending, it's OK if they didn't. But as a sanity check,
     575             :      * verify that all the old items fit.
     576             :      */
     577       49556 :     if (ItemPointerIsValid(&remaining))
     578             :     {
     579          40 :         if (!append || ItemPointerCompare(&maxOldItem, &remaining) >= 0)
     580           0 :             elog(ERROR, "could not split GIN page; all old items didn't fit");
     581             : 
     582             :         /* Count how many of the new items did fit. */
     583      304842 :         for (i = 0; i < maxitems; i++)
     584             :         {
     585      304842 :             if (ginCompareItemPointers(&newItems[i], &remaining) >= 0)
     586          40 :                 break;
     587             :         }
     588          40 :         if (i == 0)
     589           0 :             elog(ERROR, "could not split GIN page; no new items fit");
     590          40 :         maxitems = i;
     591             :     }
     592             : 
     593       49556 :     if (!needsplit)
     594             :     {
     595             :         /*
     596             :          * Great, all the items fit on a single page.  If needed, prepare data
     597             :          * for a WAL record describing the changes we'll make.
     598             :          */
     599       49406 :         if (RelationNeedsWAL(btree->index) && !btree->isBuild)
     600       49406 :             computeLeafRecompressWALData(leaf);
     601             : 
     602             :         /*
     603             :          * We're ready to enter the critical section, but
     604             :          * dataExecPlaceToPageLeaf will need access to the "leaf" data.
     605             :          */
     606       49406 :         *ptp_workspace = leaf;
     607             : 
     608       49406 :         if (append)
     609       49406 :             elog(DEBUG2, "appended %d new items to block %u; %d bytes (%d to go)",
     610             :                  maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize,
     611             :                  items->nitem - items->curitem - maxitems);
     612             :         else
     613           0 :             elog(DEBUG2, "inserted %d new items to block %u; %d bytes (%d to go)",
     614             :                  maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize,
     615             :                  items->nitem - items->curitem - maxitems);
     616             :     }
     617             :     else
     618             :     {
     619             :         /*
     620             :          * Have to split.
     621             :          *
     622             :          * leafRepackItems already divided the segments between the left and
     623             :          * the right page. It filled the left page as full as possible, and
     624             :          * put the rest to the right page. When building a new index, that's
     625             :          * good, because the table is scanned from beginning to end and there
     626             :          * won't be any more insertions to the left page during the build.
     627             :          * This packs the index as tight as possible. But otherwise, split
     628             :          * 50/50, by moving segments from the left page to the right page
     629             :          * until they're balanced.
     630             :          *
     631             :          * As a further heuristic, when appending items to the end of the
     632             :          * page, try to make the left page 75% full, on the assumption that
     633             :          * subsequent insertions will probably also go to the end. This packs
     634             :          * the index somewhat tighter when appending to a table, which is very
     635             :          * common.
     636             :          */
     637         150 :         if (!btree->isBuild)
     638             :         {
     639         260 :             while (dlist_has_prev(&leaf->segments, leaf->lastleft))
     640             :             {
     641         260 :                 lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
     642             : 
     643             :                 /* ignore deleted segments */
     644         260 :                 if (lastleftinfo->action != GIN_SEGMENT_DELETE)
     645             :                 {
     646         260 :                     segsize = SizeOfGinPostingList(lastleftinfo->seg);
     647             : 
     648             :                     /*
     649             :                      * Note that we check that the right page doesn't become
     650             :                      * more full than the left page even when appending. It's
     651             :                      * possible that we added enough items to make both pages
     652             :                      * more than 75% full.
     653             :                      */
     654         260 :                     if ((leaf->lsize - segsize) - (leaf->rsize + segsize) < 0)
     655          48 :                         break;
     656         212 :                     if (append)
     657             :                     {
     658         212 :                         if ((leaf->lsize - segsize) < (BLCKSZ * 3) / 4)
     659          10 :                             break;
     660             :                     }
     661             : 
     662         202 :                     leaf->lsize -= segsize;
     663         202 :                     leaf->rsize += segsize;
     664             :                 }
     665         202 :                 leaf->lastleft = dlist_prev_node(&leaf->segments, leaf->lastleft);
     666             :             }
     667             :         }
     668             :         Assert(leaf->lsize <= GinDataPageMaxDataSize);
     669             :         Assert(leaf->rsize <= GinDataPageMaxDataSize);
     670             : 
     671             :         /*
     672             :          * Fetch the max item in the left page's last segment; it becomes the
     673             :          * right bound of the page.
     674             :          */
     675         150 :         lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
     676         150 :         if (!lastleftinfo->items)
     677         150 :             lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
     678             :                                                        &lastleftinfo->nitems);
     679         150 :         lbound = lastleftinfo->items[lastleftinfo->nitems - 1];
     680             : 
     681             :         /*
     682             :          * Now allocate a couple of temporary page images, and fill them.
     683             :          */
     684         150 :         *newlpage = palloc(BLCKSZ);
     685         150 :         *newrpage = palloc(BLCKSZ);
     686             : 
     687         150 :         dataPlaceToPageLeafSplit(leaf, lbound, rbound,
     688             :                                  *newlpage, *newrpage);
     689             : 
     690             :         Assert(GinPageRightMost(page) ||
     691             :                ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
     692             :                                       GinDataPageGetRightBound(*newrpage)) < 0);
     693             : 
     694         150 :         if (append)
     695         150 :             elog(DEBUG2, "appended %d items to block %u; split %d/%d (%d to go)",
     696             :                  maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize, (int) leaf->rsize,
     697             :                  items->nitem - items->curitem - maxitems);
     698             :         else
     699           0 :             elog(DEBUG2, "inserted %d items to block %u; split %d/%d (%d to go)",
     700             :                  maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize, (int) leaf->rsize,
     701             :                  items->nitem - items->curitem - maxitems);
     702             :     }
     703             : 
     704       49556 :     items->curitem += maxitems;
     705             : 
     706       49556 :     return needsplit ? GPTP_SPLIT : GPTP_INSERT;
     707             : }
     708             : 
     709             : /*
     710             :  * Perform data insertion after beginPlaceToPage has decided it will fit.
     711             :  *
     712             :  * This is invoked within a critical section, and XLOG record creation (if
     713             :  * needed) is already started.  The target buffer is registered in slot 0.
     714             :  */
     715             : static void
     716       49406 : dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
     717             :                         void *insertdata, void *ptp_workspace)
     718             : {
     719       49406 :     disassembledLeaf *leaf = (disassembledLeaf *) ptp_workspace;
     720             : 
     721             :     /* Apply changes to page */
     722       49406 :     dataPlaceToPageLeafRecompress(buf, leaf);
     723             : 
     724       49406 :     MarkBufferDirty(buf);
     725             : 
     726             :     /* If needed, register WAL data built by computeLeafRecompressWALData */
     727       49406 :     if (RelationNeedsWAL(btree->index) && !btree->isBuild)
     728             :     {
     729       49406 :         XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
     730       49406 :         XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
     731             :     }
     732       49406 : }
     733             : 
     734             : /*
     735             :  * Vacuum a posting tree leaf page.
     736             :  */
     737             : void
     738          54 : ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
     739             : {
     740          54 :     Page        page = BufferGetPage(buffer);
     741             :     disassembledLeaf *leaf;
     742          54 :     bool        removedsomething = false;
     743             :     dlist_iter  iter;
     744             : 
     745          54 :     leaf = disassembleLeaf(page);
     746             : 
     747             :     /* Vacuum each segment. */
     748        1062 :     dlist_foreach(iter, &leaf->segments)
     749             :     {
     750        1008 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
     751             :         int         oldsegsize;
     752             :         ItemPointer cleaned;
     753             :         int         ncleaned;
     754             : 
     755        1008 :         if (!seginfo->items)
     756        1008 :             seginfo->items = ginPostingListDecode(seginfo->seg,
     757             :                                                   &seginfo->nitems);
     758        1008 :         if (seginfo->seg)
     759        1008 :             oldsegsize = SizeOfGinPostingList(seginfo->seg);
     760             :         else
     761           0 :             oldsegsize = GinDataPageMaxDataSize;
     762             : 
     763        1008 :         cleaned = ginVacuumItemPointers(gvs,
     764             :                                         seginfo->items,
     765             :                                         seginfo->nitems,
     766             :                                         &ncleaned);
     767        1008 :         pfree(seginfo->items);
     768        1008 :         seginfo->items = NULL;
     769        1008 :         seginfo->nitems = 0;
     770        1008 :         if (cleaned)
     771             :         {
     772         936 :             if (ncleaned > 0)
     773             :             {
     774             :                 int         npacked;
     775             : 
     776          36 :                 seginfo->seg = ginCompressPostingList(cleaned,
     777             :                                                       ncleaned,
     778             :                                                       oldsegsize,
     779             :                                                       &npacked);
     780             :                 /* Removing an item never increases the size of the segment */
     781          36 :                 if (npacked != ncleaned)
     782           0 :                     elog(ERROR, "could not fit vacuumed posting list");
     783          36 :                 seginfo->action = GIN_SEGMENT_REPLACE;
     784             :             }
     785             :             else
     786             :             {
     787         900 :                 seginfo->seg = NULL;
     788         900 :                 seginfo->items = NULL;
     789         900 :                 seginfo->action = GIN_SEGMENT_DELETE;
     790             :             }
     791         936 :             seginfo->nitems = ncleaned;
     792             : 
     793         936 :             removedsomething = true;
     794             :         }
     795             :     }
     796             : 
     797             :     /*
     798             :      * If we removed any items, reconstruct the page from the pieces.
     799             :      *
     800             :      * We don't try to re-encode the segments here, even though some of them
     801             :      * might be really small now that we've removed some items from them. It
     802             :      * seems like a waste of effort, as there isn't really any benefit from
     803             :      * larger segments per se; larger segments only help to pack more items in
     804             :      * the same space. We might as well delay doing that until the next
     805             :      * insertion, which will need to re-encode at least part of the page
     806             :      * anyway.
     807             :      *
     808             :      * Also note if the page was in uncompressed, pre-9.4 format before, it is
     809             :      * now represented as one huge segment that contains all the items. It
     810             :      * might make sense to split that, to speed up random access, but we don't
     811             :      * bother. You'll have to REINDEX anyway if you want the full gain of the
     812             :      * new tighter index format.
     813             :      */
     814          54 :     if (removedsomething)
     815             :     {
     816             :         bool        modified;
     817             : 
     818             :         /*
     819             :          * Make sure we have a palloc'd copy of all segments, after the first
     820             :          * segment that is modified. (dataPlaceToPageLeafRecompress requires
     821             :          * this).
     822             :          */
     823          54 :         modified = false;
     824        1062 :         dlist_foreach(iter, &leaf->segments)
     825             :         {
     826        1008 :             leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
     827             :                                                        iter.cur);
     828             : 
     829        1008 :             if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
     830         936 :                 modified = true;
     831        1008 :             if (modified && seginfo->action != GIN_SEGMENT_DELETE)
     832             :             {
     833         108 :                 int         segsize = SizeOfGinPostingList(seginfo->seg);
     834         108 :                 GinPostingList *tmp = (GinPostingList *) palloc(segsize);
     835             : 
     836         108 :                 memcpy(tmp, seginfo->seg, segsize);
     837         108 :                 seginfo->seg = tmp;
     838             :             }
     839             :         }
     840             : 
     841          54 :         if (RelationNeedsWAL(indexrel))
     842          18 :             computeLeafRecompressWALData(leaf);
     843             : 
     844             :         /* Apply changes to page */
     845          54 :         START_CRIT_SECTION();
     846             : 
     847          54 :         dataPlaceToPageLeafRecompress(buffer, leaf);
     848             : 
     849          54 :         MarkBufferDirty(buffer);
     850             : 
     851          54 :         if (RelationNeedsWAL(indexrel))
     852             :         {
     853             :             XLogRecPtr  recptr;
     854             : 
     855          18 :             XLogBeginInsert();
     856          18 :             XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
     857          18 :             XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
     858          18 :             recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE);
     859          18 :             PageSetLSN(page, recptr);
     860             :         }
     861             : 
     862          54 :         END_CRIT_SECTION();
     863             :     }
     864          54 : }
     865             : 
     866             : /*
     867             :  * Construct a ginxlogRecompressDataLeaf record representing the changes
     868             :  * in *leaf.  (Because this requires a palloc, we have to do it before
     869             :  * we enter the critical section that actually updates the page.)
     870             :  */
     871             : static void
     872       49424 : computeLeafRecompressWALData(disassembledLeaf *leaf)
     873             : {
     874       49424 :     int         nmodified = 0;
     875             :     char       *walbufbegin;
     876             :     char       *walbufend;
     877             :     dlist_iter  iter;
     878             :     int         segno;
     879             :     ginxlogRecompressDataLeaf *recompress_xlog;
     880             : 
     881             :     /* Count the modified segments */
     882      892054 :     dlist_foreach(iter, &leaf->segments)
     883             :     {
     884      842630 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
     885             :                                                    iter.cur);
     886             : 
     887      842630 :         if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
     888       49568 :             nmodified++;
     889             :     }
     890             : 
     891             :     walbufbegin =
     892       49424 :         palloc(sizeof(ginxlogRecompressDataLeaf) +
     893             :                BLCKSZ +         /* max size needed to hold the segment data */
     894       49424 :                nmodified * 2    /* (segno + action) per action */
     895             :         );
     896       49424 :     walbufend = walbufbegin;
     897             : 
     898       49424 :     recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
     899       49424 :     walbufend += sizeof(ginxlogRecompressDataLeaf);
     900             : 
     901       49424 :     recompress_xlog->nactions = nmodified;
     902             : 
     903       49424 :     segno = 0;
     904      892054 :     dlist_foreach(iter, &leaf->segments)
     905             :     {
     906      842630 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
     907             :                                                    iter.cur);
     908      842630 :         int         segsize = 0;
     909             :         int         datalen;
     910      842630 :         uint8       action = seginfo->action;
     911             : 
     912      842630 :         if (action == GIN_SEGMENT_UNMODIFIED)
     913             :         {
     914      793062 :             segno++;
     915      793062 :             continue;
     916             :         }
     917             : 
     918       49568 :         if (action != GIN_SEGMENT_DELETE)
     919       49508 :             segsize = SizeOfGinPostingList(seginfo->seg);
     920             : 
     921             :         /*
     922             :          * If storing the uncompressed list of added item pointers would take
     923             :          * more space than storing the compressed segment as is, do that
     924             :          * instead.
     925             :          */
     926       49568 :         if (action == GIN_SEGMENT_ADDITEMS &&
     927       49160 :             seginfo->nmodifieditems * sizeof(ItemPointerData) > segsize)
     928             :         {
     929           0 :             action = GIN_SEGMENT_REPLACE;
     930             :         }
     931             : 
     932       49568 :         *((uint8 *) (walbufend++)) = segno;
     933       49568 :         *(walbufend++) = action;
     934             : 
     935       49568 :         switch (action)
     936             :         {
     937          60 :             case GIN_SEGMENT_DELETE:
     938          60 :                 datalen = 0;
     939          60 :                 break;
     940             : 
     941       49160 :             case GIN_SEGMENT_ADDITEMS:
     942       49160 :                 datalen = seginfo->nmodifieditems * sizeof(ItemPointerData);
     943       49160 :                 memcpy(walbufend, &seginfo->nmodifieditems, sizeof(uint16));
     944       49160 :                 memcpy(walbufend + sizeof(uint16), seginfo->modifieditems, datalen);
     945       49160 :                 datalen += sizeof(uint16);
     946       49160 :                 break;
     947             : 
     948         348 :             case GIN_SEGMENT_INSERT:
     949             :             case GIN_SEGMENT_REPLACE:
     950         348 :                 datalen = SHORTALIGN(segsize);
     951         348 :                 memcpy(walbufend, seginfo->seg, segsize);
     952         348 :                 break;
     953             : 
     954           0 :             default:
     955           0 :                 elog(ERROR, "unexpected GIN leaf action %d", action);
     956             :         }
     957       49568 :         walbufend += datalen;
     958             : 
     959       49568 :         if (action != GIN_SEGMENT_INSERT)
     960       49268 :             segno++;
     961             :     }
     962             : 
     963             :     /* Pass back the constructed info via *leaf */
     964       49424 :     leaf->walinfo = walbufbegin;
     965       49424 :     leaf->walinfolen = walbufend - walbufbegin;
     966       49424 : }
     967             : 
     968             : /*
     969             :  * Assemble a disassembled posting tree leaf page back to a buffer.
     970             :  *
     971             :  * This just updates the target buffer; WAL stuff is caller's responsibility.
     972             :  *
     973             :  * NOTE: The segment pointers must not point directly to the same buffer,
     974             :  * except for segments that have not been modified and whose preceding
     975             :  * segments have not been modified either.
     976             :  */
     977             : static void
     978       49460 : dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
     979             : {
     980       49460 :     Page        page = BufferGetPage(buf);
     981             :     char       *ptr;
     982             :     int         newsize;
     983       49460 :     bool        modified = false;
     984             :     dlist_iter  iter;
     985             :     int         segsize;
     986             : 
     987             :     /*
     988             :      * If the page was in pre-9.4 format before, convert the header, and force
     989             :      * all segments to be copied to the page whether they were modified or
     990             :      * not.
     991             :      */
     992       49460 :     if (!GinPageIsCompressed(page))
     993             :     {
     994             :         Assert(leaf->oldformat);
     995           0 :         GinPageSetCompressed(page);
     996           0 :         GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber;
     997           0 :         modified = true;
     998             :     }
     999             : 
    1000       49460 :     ptr = (char *) GinDataLeafPageGetPostingList(page);
    1001       49460 :     newsize = 0;
    1002      892936 :     dlist_foreach(iter, &leaf->segments)
    1003             :     {
    1004      843476 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
    1005             : 
    1006      843476 :         if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
    1007       50414 :             modified = true;
    1008             : 
    1009      843476 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1010             :         {
    1011      842576 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1012             : 
    1013      842576 :             if (modified)
    1014       49586 :                 memcpy(ptr, seginfo->seg, segsize);
    1015             : 
    1016      842576 :             ptr += segsize;
    1017      842576 :             newsize += segsize;
    1018             :         }
    1019             :     }
    1020             : 
    1021             :     Assert(newsize <= GinDataPageMaxDataSize);
    1022       49460 :     GinDataPageSetDataSize(page, newsize);
    1023       49460 : }
    1024             : 
    1025             : /*
    1026             :  * Like dataPlaceToPageLeafRecompress, but writes the disassembled leaf
    1027             :  * segments to two pages instead of one.
    1028             :  *
    1029             :  * This is different from the non-split cases in that this does not modify
    1030             :  * the original page directly, but writes to temporary in-memory copies of
    1031             :  * the new left and right pages.
    1032             :  */
    1033             : static void
    1034         150 : dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
    1035             :                          ItemPointerData lbound, ItemPointerData rbound,
    1036             :                          Page lpage, Page rpage)
    1037             : {
    1038             :     char       *ptr;
    1039             :     int         segsize;
    1040             :     int         lsize;
    1041             :     int         rsize;
    1042             :     dlist_node *node;
    1043             :     dlist_node *firstright;
    1044             :     leafSegmentInfo *seginfo;
    1045             : 
    1046             :     /* Initialize temporary pages to hold the new left and right pages */
    1047         150 :     GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
    1048         150 :     GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
    1049             : 
    1050             :     /*
    1051             :      * Copy the segments that go to the left page.
    1052             :      *
    1053             :      * XXX: We should skip copying the unmodified part of the left page, like
    1054             :      * we do when recompressing.
    1055             :      */
    1056         150 :     lsize = 0;
    1057         150 :     ptr = (char *) GinDataLeafPageGetPostingList(lpage);
    1058         150 :     firstright = dlist_next_node(&leaf->segments, leaf->lastleft);
    1059        3586 :     for (node = dlist_head_node(&leaf->segments);
    1060             :          node != firstright;
    1061        3436 :          node = dlist_next_node(&leaf->segments, node))
    1062             :     {
    1063        3436 :         seginfo = dlist_container(leafSegmentInfo, node, node);
    1064             : 
    1065        3436 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1066             :         {
    1067        3436 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1068        3436 :             memcpy(ptr, seginfo->seg, segsize);
    1069        3436 :             ptr += segsize;
    1070        3436 :             lsize += segsize;
    1071             :         }
    1072             :     }
    1073             :     Assert(lsize == leaf->lsize);
    1074         150 :     GinDataPageSetDataSize(lpage, lsize);
    1075         150 :     *GinDataPageGetRightBound(lpage) = lbound;
    1076             : 
    1077             :     /* Copy the segments that go to the right page */
    1078         150 :     ptr = (char *) GinDataLeafPageGetPostingList(rpage);
    1079         150 :     rsize = 0;
    1080         150 :     for (node = firstright;
    1081             :          ;
    1082        1944 :          node = dlist_next_node(&leaf->segments, node))
    1083             :     {
    1084        2094 :         seginfo = dlist_container(leafSegmentInfo, node, node);
    1085             : 
    1086        2094 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1087             :         {
    1088        2094 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1089        2094 :             memcpy(ptr, seginfo->seg, segsize);
    1090        2094 :             ptr += segsize;
    1091        2094 :             rsize += segsize;
    1092             :         }
    1093             : 
    1094        2094 :         if (!dlist_has_next(&leaf->segments, node))
    1095         150 :             break;
    1096             :     }
    1097             :     Assert(rsize == leaf->rsize);
    1098         150 :     GinDataPageSetDataSize(rpage, rsize);
    1099         150 :     *GinDataPageGetRightBound(rpage) = rbound;
    1100         150 : }
    1101             : 
    1102             : /*
    1103             :  * Prepare to insert data on an internal data page.
    1104             :  *
    1105             :  * If it will fit, return GPTP_INSERT after doing whatever setup is needed
    1106             :  * before we enter the insertion critical section.  *ptp_workspace can be
    1107             :  * set to pass information along to the execPlaceToPage function.
    1108             :  *
    1109             :  * If it won't fit, perform a page split and return two temporary page
    1110             :  * images into *newlpage and *newrpage, with result GPTP_SPLIT.
    1111             :  *
    1112             :  * In neither case should the given page buffer be modified here.
    1113             :  *
    1114             :  * Note: on insertion to an internal node, in addition to inserting the given
    1115             :  * item, the downlink of the existing item at stack->off will be updated to
    1116             :  * point to updateblkno.
    1117             :  */
    1118             : static GinPlaceToPageRC
    1119          46 : dataBeginPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1120             :                              void *insertdata, BlockNumber updateblkno,
    1121             :                              void **ptp_workspace,
    1122             :                              Page *newlpage, Page *newrpage)
    1123             : {
    1124          46 :     Page        page = BufferGetPage(buf);
    1125             : 
    1126             :     /* If it doesn't fit, deal with split case */
    1127          46 :     if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
    1128             :     {
    1129           0 :         dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
    1130             :                               newlpage, newrpage);
    1131           0 :         return GPTP_SPLIT;
    1132             :     }
    1133             : 
    1134             :     /* Else, we're ready to proceed with insertion */
    1135          46 :     return GPTP_INSERT;
    1136             : }
    1137             : 
    1138             : /*
    1139             :  * Perform data insertion after beginPlaceToPage has decided it will fit.
    1140             :  *
    1141             :  * This is invoked within a critical section, and XLOG record creation (if
    1142             :  * needed) is already started.  The target buffer is registered in slot 0.
    1143             :  */
    1144             : static void
    1145          46 : dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1146             :                             void *insertdata, BlockNumber updateblkno,
    1147             :                             void *ptp_workspace)
    1148             : {
    1149          46 :     Page        page = BufferGetPage(buf);
    1150          46 :     OffsetNumber off = stack->off;
    1151             :     PostingItem *pitem;
    1152             : 
    1153             :     /* Update existing downlink to point to next page (on internal page) */
    1154          46 :     pitem = GinDataPageGetPostingItem(page, off);
    1155          46 :     PostingItemSetBlockNumber(pitem, updateblkno);
    1156             : 
    1157             :     /* Add new item */
    1158          46 :     pitem = (PostingItem *) insertdata;
    1159          46 :     GinDataPageAddPostingItem(page, pitem, off);
    1160             : 
    1161          46 :     MarkBufferDirty(buf);
    1162             : 
    1163          46 :     if (RelationNeedsWAL(btree->index) && !btree->isBuild)
    1164             :     {
    1165             :         /*
    1166             :          * This must be static, because it has to survive until XLogInsert,
    1167             :          * and we can't palloc here.  Ugly, but the XLogInsert infrastructure
    1168             :          * isn't reentrant anyway.
    1169             :          */
    1170             :         static ginxlogInsertDataInternal data;
    1171             : 
    1172          18 :         data.offset = off;
    1173          18 :         data.newitem = *pitem;
    1174             : 
    1175          18 :         XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
    1176          18 :         XLogRegisterBufData(0, (char *) &data,
    1177             :                             sizeof(ginxlogInsertDataInternal));
    1178             :     }
    1179          46 : }
    1180             : 
    1181             : /*
    1182             :  * Prepare to insert data on a posting-tree data page.
    1183             :  *
    1184             :  * If it will fit, return GPTP_INSERT after doing whatever setup is needed
    1185             :  * before we enter the insertion critical section.  *ptp_workspace can be
    1186             :  * set to pass information along to the execPlaceToPage function.
    1187             :  *
    1188             :  * If it won't fit, perform a page split and return two temporary page
    1189             :  * images into *newlpage and *newrpage, with result GPTP_SPLIT.
    1190             :  *
    1191             :  * In neither case should the given page buffer be modified here.
    1192             :  *
    1193             :  * Note: on insertion to an internal node, in addition to inserting the given
    1194             :  * item, the downlink of the existing item at stack->off will be updated to
    1195             :  * point to updateblkno.
    1196             :  *
    1197             :  * Calls relevant function for internal or leaf page because they are handled
    1198             :  * very differently.
    1199             :  */
    1200             : static GinPlaceToPageRC
    1201       49602 : dataBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1202             :                      void *insertdata, BlockNumber updateblkno,
    1203             :                      void **ptp_workspace,
    1204             :                      Page *newlpage, Page *newrpage)
    1205             : {
    1206       49602 :     Page        page = BufferGetPage(buf);
    1207             : 
    1208             :     Assert(GinPageIsData(page));
    1209             : 
    1210       49602 :     if (GinPageIsLeaf(page))
    1211       49556 :         return dataBeginPlaceToPageLeaf(btree, buf, stack, insertdata,
    1212             :                                         ptp_workspace,
    1213             :                                         newlpage, newrpage);
    1214             :     else
    1215          46 :         return dataBeginPlaceToPageInternal(btree, buf, stack,
    1216             :                                             insertdata, updateblkno,
    1217             :                                             ptp_workspace,
    1218             :                                             newlpage, newrpage);
    1219             : }
    1220             : 
    1221             : /*
    1222             :  * Perform data insertion after beginPlaceToPage has decided it will fit.
    1223             :  *
    1224             :  * This is invoked within a critical section, and XLOG record creation (if
    1225             :  * needed) is already started.  The target buffer is registered in slot 0.
    1226             :  *
    1227             :  * Calls relevant function for internal or leaf page because they are handled
    1228             :  * very differently.
    1229             :  */
    1230             : static void
    1231       49452 : dataExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1232             :                     void *insertdata, BlockNumber updateblkno,
    1233             :                     void *ptp_workspace)
    1234             : {
    1235       49452 :     Page        page = BufferGetPage(buf);
    1236             : 
    1237       49452 :     if (GinPageIsLeaf(page))
    1238       49406 :         dataExecPlaceToPageLeaf(btree, buf, stack, insertdata,
    1239             :                                 ptp_workspace);
    1240             :     else
    1241          46 :         dataExecPlaceToPageInternal(btree, buf, stack, insertdata,
    1242             :                                     updateblkno, ptp_workspace);
    1243       49452 : }
    1244             : 
    1245             : /*
    1246             :  * Split internal page and insert new data.
    1247             :  *
    1248             :  * Returns new temp pages to *newlpage and *newrpage.
    1249             :  * The original buffer is left untouched.
    1250             :  */
    1251             : static void
    1252           0 : dataSplitPageInternal(GinBtree btree, Buffer origbuf,
    1253             :                       GinBtreeStack *stack,
    1254             :                       void *insertdata, BlockNumber updateblkno,
    1255             :                       Page *newlpage, Page *newrpage)
    1256             : {
    1257           0 :     Page        oldpage = BufferGetPage(origbuf);
    1258           0 :     OffsetNumber off = stack->off;
    1259           0 :     int         nitems = GinPageGetOpaque(oldpage)->maxoff;
    1260             :     int         nleftitems;
    1261             :     int         nrightitems;
    1262           0 :     Size        pageSize = PageGetPageSize(oldpage);
    1263           0 :     ItemPointerData oldbound = *GinDataPageGetRightBound(oldpage);
    1264             :     ItemPointer bound;
    1265             :     Page        lpage;
    1266             :     Page        rpage;
    1267             :     OffsetNumber separator;
    1268             :     PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
    1269             : 
    1270           0 :     lpage = PageGetTempPage(oldpage);
    1271           0 :     rpage = PageGetTempPage(oldpage);
    1272           0 :     GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
    1273           0 :     GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
    1274             : 
    1275             :     /*
    1276             :      * First construct a new list of PostingItems, which includes all the old
    1277             :      * items, and the new item.
    1278             :      */
    1279           0 :     memcpy(allitems, GinDataPageGetPostingItem(oldpage, FirstOffsetNumber),
    1280           0 :            (off - 1) * sizeof(PostingItem));
    1281             : 
    1282           0 :     allitems[off - 1] = *((PostingItem *) insertdata);
    1283           0 :     memcpy(&allitems[off], GinDataPageGetPostingItem(oldpage, off),
    1284           0 :            (nitems - (off - 1)) * sizeof(PostingItem));
    1285           0 :     nitems++;
    1286             : 
    1287             :     /* Update existing downlink to point to next page */
    1288           0 :     PostingItemSetBlockNumber(&allitems[off], updateblkno);
    1289             : 
    1290             :     /*
    1291             :      * When creating a new index, fit as many tuples as possible on the left
    1292             :      * page, on the assumption that the table is scanned from beginning to
    1293             :      * end. This packs the index as tight as possible.
    1294             :      */
    1295           0 :     if (btree->isBuild && GinPageRightMost(oldpage))
    1296           0 :         separator = GinNonLeafDataPageGetFreeSpace(rpage) / sizeof(PostingItem);
    1297             :     else
    1298           0 :         separator = nitems / 2;
    1299           0 :     nleftitems = separator;
    1300           0 :     nrightitems = nitems - separator;
    1301             : 
    1302           0 :     memcpy(GinDataPageGetPostingItem(lpage, FirstOffsetNumber),
    1303             :            allitems,
    1304             :            nleftitems * sizeof(PostingItem));
    1305           0 :     GinPageGetOpaque(lpage)->maxoff = nleftitems;
    1306           0 :     memcpy(GinDataPageGetPostingItem(rpage, FirstOffsetNumber),
    1307           0 :            &allitems[separator],
    1308             :            nrightitems * sizeof(PostingItem));
    1309           0 :     GinPageGetOpaque(rpage)->maxoff = nrightitems;
    1310             : 
    1311             :     /*
    1312             :      * Also set pd_lower for both pages, like GinDataPageAddPostingItem does.
    1313             :      */
    1314           0 :     GinDataPageSetDataSize(lpage, nleftitems * sizeof(PostingItem));
    1315           0 :     GinDataPageSetDataSize(rpage, nrightitems * sizeof(PostingItem));
    1316             : 
    1317             :     /* set up right bound for left page */
    1318           0 :     bound = GinDataPageGetRightBound(lpage);
    1319           0 :     *bound = GinDataPageGetPostingItem(lpage, nleftitems)->key;
    1320             : 
    1321             :     /* set up right bound for right page */
    1322           0 :     *GinDataPageGetRightBound(rpage) = oldbound;
    1323             : 
    1324             :     /* return temp pages to caller */
    1325           0 :     *newlpage = lpage;
    1326           0 :     *newrpage = rpage;
    1327           0 : }
    1328             : 
    1329             : /*
    1330             :  * Construct insertion payload for inserting the downlink for given buffer.
    1331             :  */
    1332             : static void *
    1333          46 : dataPrepareDownlink(GinBtree btree, Buffer lbuf)
    1334             : {
    1335          46 :     PostingItem *pitem = palloc(sizeof(PostingItem));
    1336          46 :     Page        lpage = BufferGetPage(lbuf);
    1337             : 
    1338          46 :     PostingItemSetBlockNumber(pitem, BufferGetBlockNumber(lbuf));
    1339          46 :     pitem->key = *GinDataPageGetRightBound(lpage);
    1340             : 
    1341          46 :     return pitem;
    1342             : }
    1343             : 
    1344             : /*
    1345             :  * Fills new root by right bound values from child.
    1346             :  * Also called from ginxlog, should not use btree
    1347             :  */
    1348             : void
    1349         104 : ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage)
    1350             : {
    1351             :     PostingItem li,
    1352             :                 ri;
    1353             : 
    1354         104 :     li.key = *GinDataPageGetRightBound(lpage);
    1355         104 :     PostingItemSetBlockNumber(&li, lblkno);
    1356         104 :     GinDataPageAddPostingItem(root, &li, InvalidOffsetNumber);
    1357             : 
    1358         104 :     ri.key = *GinDataPageGetRightBound(rpage);
    1359         104 :     PostingItemSetBlockNumber(&ri, rblkno);
    1360         104 :     GinDataPageAddPostingItem(root, &ri, InvalidOffsetNumber);
    1361         104 : }
    1362             : 
    1363             : 
    1364             : /*** Functions to work with disassembled leaf pages ***/
    1365             : 
    1366             : /*
    1367             :  * Disassemble page into a disassembledLeaf struct.
    1368             :  */
    1369             : static disassembledLeaf *
    1370       49610 : disassembleLeaf(Page page)
    1371             : {
    1372             :     disassembledLeaf *leaf;
    1373             :     GinPostingList *seg;
    1374             :     Pointer     segbegin;
    1375             :     Pointer     segend;
    1376             : 
    1377       49610 :     leaf = palloc0(sizeof(disassembledLeaf));
    1378       49610 :     dlist_init(&leaf->segments);
    1379             : 
    1380       49610 :     if (GinPageIsCompressed(page))
    1381             :     {
    1382             :         /*
    1383             :          * Create a leafSegmentInfo entry for each segment.
    1384             :          */
    1385       49610 :         seg = GinDataLeafPageGetPostingList(page);
    1386       49610 :         segbegin = (Pointer) seg;
    1387       49610 :         segend = segbegin + GinDataLeafPageGetPostingListSize(page);
    1388      896434 :         while ((Pointer) seg < segend)
    1389             :         {
    1390      846824 :             leafSegmentInfo *seginfo = palloc(sizeof(leafSegmentInfo));
    1391             : 
    1392      846824 :             seginfo->action = GIN_SEGMENT_UNMODIFIED;
    1393      846824 :             seginfo->seg = seg;
    1394      846824 :             seginfo->items = NULL;
    1395      846824 :             seginfo->nitems = 0;
    1396      846824 :             dlist_push_tail(&leaf->segments, &seginfo->node);
    1397             : 
    1398      846824 :             seg = GinNextPostingListSegment(seg);
    1399             :         }
    1400       49610 :         leaf->oldformat = false;
    1401             :     }
    1402             :     else
    1403             :     {
    1404             :         /*
    1405             :          * A pre-9.4 format uncompressed page is represented by a single
    1406             :          * segment, with an array of items.  The corner case is uncompressed
    1407             :          * page containing no items, which is represented as no segments.
    1408             :          */
    1409             :         ItemPointer uncompressed;
    1410             :         int         nuncompressed;
    1411             :         leafSegmentInfo *seginfo;
    1412             : 
    1413           0 :         uncompressed = dataLeafPageGetUncompressed(page, &nuncompressed);
    1414             : 
    1415           0 :         if (nuncompressed > 0)
    1416             :         {
    1417           0 :             seginfo = palloc(sizeof(leafSegmentInfo));
    1418             : 
    1419           0 :             seginfo->action = GIN_SEGMENT_REPLACE;
    1420           0 :             seginfo->seg = NULL;
    1421           0 :             seginfo->items = palloc(nuncompressed * sizeof(ItemPointerData));
    1422           0 :             memcpy(seginfo->items, uncompressed, nuncompressed * sizeof(ItemPointerData));
    1423           0 :             seginfo->nitems = nuncompressed;
    1424             : 
    1425           0 :             dlist_push_tail(&leaf->segments, &seginfo->node);
    1426             :         }
    1427             : 
    1428           0 :         leaf->oldformat = true;
    1429             :     }
    1430             : 
    1431       49610 :     return leaf;
    1432             : }
    1433             : 
    1434             : /*
    1435             :  * Distribute newItems to the segments.
    1436             :  *
    1437             :  * Any segments that acquire new items are decoded, and the new items are
    1438             :  * merged with the old items.
    1439             :  *
    1440             :  * Returns true if any new items were added. False means they were all
    1441             :  * duplicates of existing items on the page.
    1442             :  */
    1443             : static bool
    1444       49556 : addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems, int nNewItems)
    1445             : {
    1446             :     dlist_iter  iter;
    1447       49556 :     ItemPointer nextnew = newItems;
    1448       49556 :     int         newleft = nNewItems;
    1449       49556 :     bool        modified = false;
    1450             :     leafSegmentInfo *newseg;
    1451             : 
    1452             :     /*
    1453             :      * If the page is completely empty, just construct one new segment to hold
    1454             :      * all the new items.
    1455             :      */
    1456       49556 :     if (dlist_is_empty(&leaf->segments))
    1457             :     {
    1458           0 :         newseg = palloc(sizeof(leafSegmentInfo));
    1459           0 :         newseg->seg = NULL;
    1460           0 :         newseg->items = newItems;
    1461           0 :         newseg->nitems = nNewItems;
    1462           0 :         newseg->action = GIN_SEGMENT_INSERT;
    1463           0 :         dlist_push_tail(&leaf->segments, &newseg->node);
    1464           0 :         return true;
    1465             :     }
    1466             : 
    1467      845816 :     dlist_foreach(iter, &leaf->segments)
    1468             :     {
    1469      845816 :         leafSegmentInfo *cur = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node, iter.cur);
    1470             :         int         nthis;
    1471             :         ItemPointer tmpitems;
    1472             :         int         ntmpitems;
    1473             : 
    1474             :         /*
    1475             :          * How many of the new items fall into this segment?
    1476             :          */
    1477      845816 :         if (!dlist_has_next(&leaf->segments, iter.cur))
    1478       49556 :             nthis = newleft;
    1479             :         else
    1480             :         {
    1481             :             leafSegmentInfo *next;
    1482             :             ItemPointerData next_first;
    1483             : 
    1484      796260 :             next = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node,
    1485             :                                                        dlist_next_node(&leaf->segments, iter.cur));
    1486      796260 :             if (next->items)
    1487       49544 :                 next_first = next->items[0];
    1488             :             else
    1489             :             {
    1490             :                 Assert(next->seg != NULL);
    1491      746716 :                 next_first = next->seg->first;
    1492             :             }
    1493             : 
    1494      796260 :             nthis = 0;
    1495      796260 :             while (nthis < newleft && ginCompareItemPointers(&nextnew[nthis], &next_first) < 0)
    1496           0 :                 nthis++;
    1497             :         }
    1498      845816 :         if (nthis == 0)
    1499      796260 :             continue;
    1500             : 
    1501             :         /* Merge the new items with the existing items. */
    1502       49556 :         if (!cur->items)
    1503           0 :             cur->items = ginPostingListDecode(cur->seg, &cur->nitems);
    1504             : 
    1505             :         /*
    1506             :          * Fast path for the important special case that we're appending to
    1507             :          * the end of the page: don't let the last segment on the page grow
    1508             :          * larger than the target, create a new segment before that happens.
    1509             :          */
    1510       99112 :         if (!dlist_has_next(&leaf->segments, iter.cur) &&
    1511       49556 :             ginCompareItemPointers(&cur->items[cur->nitems - 1], &nextnew[0]) < 0 &&
    1512       49556 :             cur->seg != NULL &&
    1513       49556 :             SizeOfGinPostingList(cur->seg) >= GinPostingListSegmentTargetSize)
    1514             :         {
    1515         368 :             newseg = palloc(sizeof(leafSegmentInfo));
    1516         368 :             newseg->seg = NULL;
    1517         368 :             newseg->items = nextnew;
    1518         368 :             newseg->nitems = nthis;
    1519         368 :             newseg->action = GIN_SEGMENT_INSERT;
    1520         368 :             dlist_push_tail(&leaf->segments, &newseg->node);
    1521         368 :             modified = true;
    1522       49556 :             break;
    1523             :         }
    1524             : 
    1525       49188 :         tmpitems = ginMergeItemPointers(cur->items, cur->nitems,
    1526             :                                         nextnew, nthis,
    1527             :                                         &ntmpitems);
    1528       49188 :         if (ntmpitems != cur->nitems)
    1529             :         {
    1530             :             /*
    1531             :              * If there are no duplicates, track the added items so that we
    1532             :              * can emit a compact ADDITEMS WAL record later on. (it doesn't
    1533             :              * seem worth re-checking which items were duplicates, if there
    1534             :              * were any)
    1535             :              */
    1536       49188 :             if (ntmpitems == nthis + cur->nitems &&
    1537       49188 :                 cur->action == GIN_SEGMENT_UNMODIFIED)
    1538             :             {
    1539       49188 :                 cur->action = GIN_SEGMENT_ADDITEMS;
    1540       49188 :                 cur->modifieditems = nextnew;
    1541       49188 :                 cur->nmodifieditems = nthis;
    1542             :             }
    1543             :             else
    1544           0 :                 cur->action = GIN_SEGMENT_REPLACE;
    1545             : 
    1546       49188 :             cur->items = tmpitems;
    1547       49188 :             cur->nitems = ntmpitems;
    1548       49188 :             cur->seg = NULL;
    1549       49188 :             modified = true;
    1550             :         }
    1551             : 
    1552       49188 :         nextnew += nthis;
    1553       49188 :         newleft -= nthis;
    1554       49188 :         if (newleft == 0)
    1555       49188 :             break;
    1556             :     }
    1557             : 
    1558       49556 :     return modified;
    1559             : }
    1560             : 
    1561             : /*
    1562             :  * Recompresses all segments that have been modified.
    1563             :  *
    1564             :  * If not all the items fit on two pages (ie. after split), we store as
    1565             :  * many items as fit, and set *remaining to the first item that didn't fit.
    1566             :  * If all items fit, *remaining is set to invalid.
    1567             :  *
    1568             :  * Returns true if the page has to be split.
    1569             :  */
    1570             : static bool
    1571       49556 : leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
    1572             : {
    1573       49556 :     int         pgused = 0;
    1574       49556 :     bool        needsplit = false;
    1575             :     dlist_iter  iter;
    1576             :     int         segsize;
    1577             :     leafSegmentInfo *nextseg;
    1578             :     int         npacked;
    1579             :     bool        modified;
    1580             :     dlist_node *cur_node;
    1581             :     dlist_node *next_node;
    1582             : 
    1583       49556 :     ItemPointerSetInvalid(remaining);
    1584             : 
    1585             :     /*
    1586             :      * cannot use dlist_foreach_modify here because we insert adjacent items
    1587             :      * while iterating.
    1588             :      */
    1589      897554 :     for (cur_node = dlist_head_node(&leaf->segments);
    1590             :          cur_node != NULL;
    1591      847998 :          cur_node = next_node)
    1592             :     {
    1593      848038 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
    1594             :                                                    cur_node);
    1595             : 
    1596      848038 :         if (dlist_has_next(&leaf->segments, cur_node))
    1597      796628 :             next_node = dlist_next_node(&leaf->segments, cur_node);
    1598             :         else
    1599       51410 :             next_node = NULL;
    1600             : 
    1601             :         /* Compress the posting list, if necessary */
    1602      848038 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1603             :         {
    1604      848038 :             if (seginfo->seg == NULL)
    1605             :             {
    1606       51410 :                 if (seginfo->nitems > GinPostingListSegmentMaxSize)
    1607        1888 :                     npacked = 0;    /* no chance that it would fit. */
    1608             :                 else
    1609             :                 {
    1610       49522 :                     seginfo->seg = ginCompressPostingList(seginfo->items,
    1611             :                                                           seginfo->nitems,
    1612             :                                                           GinPostingListSegmentMaxSize,
    1613             :                                                           &npacked);
    1614             :                 }
    1615       51410 :                 if (npacked != seginfo->nitems)
    1616             :                 {
    1617             :                     /*
    1618             :                      * Too large. Compress again to the target size, and
    1619             :                      * create a new segment to represent the remaining items.
    1620             :                      * The new segment is inserted after this one, so it will
    1621             :                      * be processed in the next iteration of this loop.
    1622             :                      */
    1623        1894 :                     if (seginfo->seg)
    1624           6 :                         pfree(seginfo->seg);
    1625        1894 :                     seginfo->seg = ginCompressPostingList(seginfo->items,
    1626             :                                                           seginfo->nitems,
    1627             :                                                           GinPostingListSegmentTargetSize,
    1628             :                                                           &npacked);
    1629        1894 :                     if (seginfo->action != GIN_SEGMENT_INSERT)
    1630          18 :                         seginfo->action = GIN_SEGMENT_REPLACE;
    1631             : 
    1632        1894 :                     nextseg = palloc(sizeof(leafSegmentInfo));
    1633        1894 :                     nextseg->action = GIN_SEGMENT_INSERT;
    1634        1894 :                     nextseg->seg = NULL;
    1635        1894 :                     nextseg->items = &seginfo->items[npacked];
    1636        1894 :                     nextseg->nitems = seginfo->nitems - npacked;
    1637        1894 :                     next_node = &nextseg->node;
    1638        1894 :                     dlist_insert_after(cur_node, next_node);
    1639             :                 }
    1640             :             }
    1641             : 
    1642             :             /*
    1643             :              * If the segment is very small, merge it with the next segment.
    1644             :              */
    1645      848038 :             if (SizeOfGinPostingList(seginfo->seg) < GinPostingListSegmentMinSize && next_node)
    1646             :             {
    1647             :                 int         nmerged;
    1648             : 
    1649           0 :                 nextseg = dlist_container(leafSegmentInfo, node, next_node);
    1650             : 
    1651           0 :                 if (seginfo->items == NULL)
    1652           0 :                     seginfo->items = ginPostingListDecode(seginfo->seg,
    1653             :                                                           &seginfo->nitems);
    1654           0 :                 if (nextseg->items == NULL)
    1655           0 :                     nextseg->items = ginPostingListDecode(nextseg->seg,
    1656             :                                                           &nextseg->nitems);
    1657           0 :                 nextseg->items =
    1658           0 :                     ginMergeItemPointers(seginfo->items, seginfo->nitems,
    1659           0 :                                          nextseg->items, nextseg->nitems,
    1660             :                                          &nmerged);
    1661             :                 Assert(nmerged == seginfo->nitems + nextseg->nitems);
    1662           0 :                 nextseg->nitems = nmerged;
    1663           0 :                 nextseg->seg = NULL;
    1664             : 
    1665           0 :                 nextseg->action = GIN_SEGMENT_REPLACE;
    1666           0 :                 nextseg->modifieditems = NULL;
    1667           0 :                 nextseg->nmodifieditems = 0;
    1668             : 
    1669           0 :                 if (seginfo->action == GIN_SEGMENT_INSERT)
    1670             :                 {
    1671           0 :                     dlist_delete(cur_node);
    1672           0 :                     continue;
    1673             :                 }
    1674             :                 else
    1675             :                 {
    1676           0 :                     seginfo->action = GIN_SEGMENT_DELETE;
    1677           0 :                     seginfo->seg = NULL;
    1678             :                 }
    1679             :             }
    1680             : 
    1681      848038 :             seginfo->items = NULL;
    1682      848038 :             seginfo->nitems = 0;
    1683             :         }
    1684             : 
    1685      848038 :         if (seginfo->action == GIN_SEGMENT_DELETE)
    1686           0 :             continue;
    1687             : 
    1688             :         /*
    1689             :          * OK, we now have a compressed version of this segment ready for
    1690             :          * copying to the page. Did we exceed the size that fits on one page?
    1691             :          */
    1692      848038 :         segsize = SizeOfGinPostingList(seginfo->seg);
    1693      848038 :         if (pgused + segsize > GinDataPageMaxDataSize)
    1694             :         {
    1695         190 :             if (!needsplit)
    1696             :             {
    1697             :                 /* switch to right page */
    1698             :                 Assert(pgused > 0);
    1699         150 :                 leaf->lastleft = dlist_prev_node(&leaf->segments, cur_node);
    1700         150 :                 needsplit = true;
    1701         150 :                 leaf->lsize = pgused;
    1702         150 :                 pgused = 0;
    1703             :             }
    1704             :             else
    1705             :             {
    1706             :                 /*
    1707             :                  * Filled both pages. The last segment we constructed did not
    1708             :                  * fit.
    1709             :                  */
    1710          40 :                 *remaining = seginfo->seg->first;
    1711             : 
    1712             :                 /*
    1713             :                  * remove all segments that did not fit from the list.
    1714             :                  */
    1715          80 :                 while (dlist_has_next(&leaf->segments, cur_node))
    1716          40 :                     dlist_delete(dlist_next_node(&leaf->segments, cur_node));
    1717          40 :                 dlist_delete(cur_node);
    1718          40 :                 break;
    1719             :             }
    1720             :         }
    1721             : 
    1722      847998 :         pgused += segsize;
    1723             :     }
    1724             : 
    1725       49556 :     if (!needsplit)
    1726             :     {
    1727       49406 :         leaf->lsize = pgused;
    1728       49406 :         leaf->rsize = 0;
    1729             :     }
    1730             :     else
    1731         150 :         leaf->rsize = pgused;
    1732             : 
    1733             :     Assert(leaf->lsize <= GinDataPageMaxDataSize);
    1734             :     Assert(leaf->rsize <= GinDataPageMaxDataSize);
    1735             : 
    1736             :     /*
    1737             :      * Make a palloc'd copy of every segment after the first modified one,
    1738             :      * because as we start copying items to the original page, we might
    1739             :      * overwrite an existing segment.
    1740             :      */
    1741       49556 :     modified = false;
    1742      897554 :     dlist_foreach(iter, &leaf->segments)
    1743             :     {
    1744      847998 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
    1745             :                                                    iter.cur);
    1746             : 
    1747      847998 :         if (!modified && seginfo->action != GIN_SEGMENT_UNMODIFIED)
    1748             :         {
    1749       49556 :             modified = true;
    1750             :         }
    1751      798442 :         else if (modified && seginfo->action == GIN_SEGMENT_UNMODIFIED)
    1752             :         {
    1753             :             GinPostingList *tmp;
    1754             : 
    1755           0 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1756           0 :             tmp = palloc(segsize);
    1757           0 :             memcpy(tmp, seginfo->seg, segsize);
    1758           0 :             seginfo->seg = tmp;
    1759             :         }
    1760             :     }
    1761             : 
    1762       49556 :     return needsplit;
    1763             : }
    1764             : 
    1765             : 
    1766             : /*** Functions that are exported to the rest of the GIN code ***/
    1767             : 
    1768             : /*
    1769             :  * Creates new posting tree containing the given TIDs. Returns the page
    1770             :  * number of the root of the new posting tree.
    1771             :  *
    1772             :  * items[] must be in sorted order with no duplicates.
    1773             :  */
    1774             : BlockNumber
    1775         128 : createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
    1776             :                   GinStatsData *buildStats, Buffer entrybuffer)
    1777             : {
    1778             :     BlockNumber blkno;
    1779             :     Buffer      buffer;
    1780             :     Page        tmppage;
    1781             :     Page        page;
    1782             :     Pointer     ptr;
    1783             :     int         nrootitems;
    1784             :     int         rootsize;
    1785         128 :     bool        is_build = (buildStats != NULL);
    1786             : 
    1787             :     /* Construct the new root page in memory first. */
    1788         128 :     tmppage = (Page) palloc(BLCKSZ);
    1789         128 :     GinInitPage(tmppage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
    1790         128 :     GinPageGetOpaque(tmppage)->rightlink = InvalidBlockNumber;
    1791             : 
    1792             :     /*
    1793             :      * Write as many of the items to the root page as fit. In segments of max
    1794             :      * GinPostingListSegmentMaxSize bytes each.
    1795             :      */
    1796         128 :     nrootitems = 0;
    1797         128 :     rootsize = 0;
    1798         128 :     ptr = (Pointer) GinDataLeafPageGetPostingList(tmppage);
    1799        2416 :     while (nrootitems < nitems)
    1800             :     {
    1801             :         GinPostingList *segment;
    1802             :         int         npacked;
    1803             :         int         segsize;
    1804             : 
    1805        2388 :         segment = ginCompressPostingList(&items[nrootitems],
    1806        2388 :                                          nitems - nrootitems,
    1807             :                                          GinPostingListSegmentMaxSize,
    1808             :                                          &npacked);
    1809        2388 :         segsize = SizeOfGinPostingList(segment);
    1810        2388 :         if (rootsize + segsize > GinDataPageMaxDataSize)
    1811         100 :             break;
    1812             : 
    1813        2288 :         memcpy(ptr, segment, segsize);
    1814        2288 :         ptr += segsize;
    1815        2288 :         rootsize += segsize;
    1816        2288 :         nrootitems += npacked;
    1817        2288 :         pfree(segment);
    1818             :     }
    1819         128 :     GinDataPageSetDataSize(tmppage, rootsize);
    1820             : 
    1821             :     /*
    1822             :      * All set. Get a new physical page, and copy the in-memory page to it.
    1823             :      */
    1824         128 :     buffer = GinNewBuffer(index);
    1825         128 :     page = BufferGetPage(buffer);
    1826         128 :     blkno = BufferGetBlockNumber(buffer);
    1827             : 
    1828             :     /*
    1829             :      * Copy any predicate locks from the entry tree leaf (containing posting
    1830             :      * list) to the posting tree.
    1831             :      */
    1832         128 :     PredicateLockPageSplit(index, BufferGetBlockNumber(entrybuffer), blkno);
    1833             : 
    1834         128 :     START_CRIT_SECTION();
    1835             : 
    1836         128 :     PageRestoreTempPage(tmppage, page);
    1837         128 :     MarkBufferDirty(buffer);
    1838             : 
    1839         128 :     if (RelationNeedsWAL(index) && !is_build)
    1840             :     {
    1841             :         XLogRecPtr  recptr;
    1842             :         ginxlogCreatePostingTree data;
    1843             : 
    1844          40 :         data.size = rootsize;
    1845             : 
    1846          40 :         XLogBeginInsert();
    1847          40 :         XLogRegisterData((char *) &data, sizeof(ginxlogCreatePostingTree));
    1848             : 
    1849          40 :         XLogRegisterData((char *) GinDataLeafPageGetPostingList(page),
    1850             :                          rootsize);
    1851          40 :         XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
    1852             : 
    1853          40 :         recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE);
    1854          40 :         PageSetLSN(page, recptr);
    1855             :     }
    1856             : 
    1857         128 :     UnlockReleaseBuffer(buffer);
    1858             : 
    1859         128 :     END_CRIT_SECTION();
    1860             : 
    1861             :     /* During index build, count the newly-added data page */
    1862         128 :     if (buildStats)
    1863          76 :         buildStats->nDataPages++;
    1864             : 
    1865         128 :     elog(DEBUG2, "created GIN posting tree with %d items", nrootitems);
    1866             : 
    1867             :     /*
    1868             :      * Add any remaining TIDs to the newly-created posting tree.
    1869             :      */
    1870         128 :     if (nitems > nrootitems)
    1871             :     {
    1872         100 :         ginInsertItemPointers(index, blkno,
    1873         100 :                               items + nrootitems,
    1874             :                               nitems - nrootitems,
    1875             :                               buildStats);
    1876             :     }
    1877             : 
    1878         128 :     return blkno;
    1879             : }
    1880             : 
    1881             : static void
    1882       49596 : ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno)
    1883             : {
    1884       49596 :     memset(btree, 0, sizeof(GinBtreeData));
    1885             : 
    1886       49596 :     btree->index = index;
    1887       49596 :     btree->rootBlkno = rootBlkno;
    1888             : 
    1889       49596 :     btree->findChildPage = dataLocateItem;
    1890       49596 :     btree->getLeftMostChild = dataGetLeftMostPage;
    1891       49596 :     btree->isMoveRight = dataIsMoveRight;
    1892       49596 :     btree->findItem = NULL;
    1893       49596 :     btree->findChildPtr = dataFindChildPtr;
    1894       49596 :     btree->beginPlaceToPage = dataBeginPlaceToPage;
    1895       49596 :     btree->execPlaceToPage = dataExecPlaceToPage;
    1896       49596 :     btree->fillRoot = ginDataFillRoot;
    1897       49596 :     btree->prepareDownlink = dataPrepareDownlink;
    1898             : 
    1899       49596 :     btree->isData = true;
    1900       49596 :     btree->fullScan = false;
    1901       49596 :     btree->isBuild = false;
    1902       49596 : }
    1903             : 
    1904             : /*
    1905             :  * Inserts array of item pointers, may execute several tree scan (very rare)
    1906             :  */
    1907             : void
    1908       49520 : ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
    1909             :                       ItemPointerData *items, uint32 nitem,
    1910             :                       GinStatsData *buildStats)
    1911             : {
    1912             :     GinBtreeData btree;
    1913             :     GinBtreeDataLeafInsertData insertdata;
    1914             :     GinBtreeStack *stack;
    1915             : 
    1916       49520 :     ginPrepareDataScan(&btree, index, rootBlkno);
    1917       49520 :     btree.isBuild = (buildStats != NULL);
    1918       49520 :     insertdata.items = items;
    1919       49520 :     insertdata.nitem = nitem;
    1920       49520 :     insertdata.curitem = 0;
    1921             : 
    1922       99076 :     while (insertdata.curitem < insertdata.nitem)
    1923             :     {
    1924             :         /* search for the leaf page where the first item should go to */
    1925       49560 :         btree.itemptr = insertdata.items[insertdata.curitem];
    1926       49560 :         stack = ginFindLeafPage(&btree, false, true);
    1927             : 
    1928       49556 :         ginInsertValue(&btree, stack, &insertdata, buildStats);
    1929             :     }
    1930       49516 : }
    1931             : 
    1932             : /*
    1933             :  * Starts a new scan on a posting tree.
    1934             :  */
    1935             : GinBtreeStack *
    1936          76 : ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno)
    1937             : {
    1938             :     GinBtreeStack *stack;
    1939             : 
    1940          76 :     ginPrepareDataScan(btree, index, rootBlkno);
    1941             : 
    1942          76 :     btree->fullScan = true;
    1943             : 
    1944          76 :     stack = ginFindLeafPage(btree, true, false);
    1945             : 
    1946          76 :     return stack;
    1947             : }

Generated by: LCOV version 1.14