LCOV - code coverage report
Current view: top level - src/backend/access/nbtree - nbtsplitloc.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 261 271 96.3 %
Date: 2025-01-18 04:15:08 Functions: 13 13 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * nbtsplitloc.c
       4             :  *    Choose split point code for Postgres btree implementation.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/nbtree/nbtsplitloc.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/nbtree.h"
      18             : #include "common/int.h"
      19             : 
      20             : typedef enum
      21             : {
      22             :     /* strategy for searching through materialized list of split points */
      23             :     SPLIT_DEFAULT,              /* give some weight to truncation */
      24             :     SPLIT_MANY_DUPLICATES,      /* find minimally distinguishing point */
      25             :     SPLIT_SINGLE_VALUE,         /* leave left page almost full */
      26             : } FindSplitStrat;
      27             : 
      28             : typedef struct
      29             : {
      30             :     /* details of free space left by split */
      31             :     int16       curdelta;       /* current leftfree/rightfree delta */
      32             :     int16       leftfree;       /* space left on left page post-split */
      33             :     int16       rightfree;      /* space left on right page post-split */
      34             : 
      35             :     /* split point identifying fields (returned by _bt_findsplitloc) */
      36             :     OffsetNumber firstrightoff; /* first origpage item on rightpage */
      37             :     bool        newitemonleft;  /* new item goes on left, or right? */
      38             : } SplitPoint;
      39             : 
      40             : typedef struct
      41             : {
      42             :     /* context data for _bt_recsplitloc */
      43             :     Relation    rel;            /* index relation */
      44             :     Page        origpage;       /* page undergoing split */
      45             :     IndexTuple  newitem;        /* new item (cause of page split) */
      46             :     Size        newitemsz;      /* size of newitem (includes line pointer) */
      47             :     bool        is_leaf;        /* T if splitting a leaf page */
      48             :     bool        is_rightmost;   /* T if splitting rightmost page on level */
      49             :     OffsetNumber newitemoff;    /* where the new item is to be inserted */
      50             :     int         leftspace;      /* space available for items on left page */
      51             :     int         rightspace;     /* space available for items on right page */
      52             :     int         olddataitemstotal;  /* space taken by old items */
      53             :     Size        minfirstrightsz;    /* smallest firstright size */
      54             : 
      55             :     /* candidate split point data */
      56             :     int         maxsplits;      /* maximum number of splits */
      57             :     int         nsplits;        /* current number of splits */
      58             :     SplitPoint *splits;         /* all candidate split points for page */
      59             :     int         interval;       /* current range of acceptable split points */
      60             : } FindSplitData;
      61             : 
      62             : static void _bt_recsplitloc(FindSplitData *state,
      63             :                             OffsetNumber firstrightoff, bool newitemonleft,
      64             :                             int olddataitemstoleft,
      65             :                             Size firstrightofforigpagetuplesz);
      66             : static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
      67             :                                 bool usemult);
      68             : static int  _bt_splitcmp(const void *arg1, const void *arg2);
      69             : static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
      70             :                                 int leaffillfactor, bool *usemult);
      71             : static bool _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid);
      72             : static OffsetNumber _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
      73             :                                      bool *newitemonleft, FindSplitStrat strategy);
      74             : static int  _bt_defaultinterval(FindSplitData *state);
      75             : static int  _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
      76             :                          SplitPoint *rightpage, FindSplitStrat *strategy);
      77             : static void _bt_interval_edges(FindSplitData *state,
      78             :                                SplitPoint **leftinterval, SplitPoint **rightinterval);
      79             : static inline int _bt_split_penalty(FindSplitData *state, SplitPoint *split);
      80             : static inline IndexTuple _bt_split_lastleft(FindSplitData *state,
      81             :                                             SplitPoint *split);
      82             : static inline IndexTuple _bt_split_firstright(FindSplitData *state,
      83             :                                               SplitPoint *split);
      84             : 
      85             : 
      86             : /*
      87             :  *  _bt_findsplitloc() -- find an appropriate place to split a page.
      88             :  *
      89             :  * The main goal here is to equalize the free space that will be on each
      90             :  * split page, *after accounting for the inserted tuple*.  (If we fail to
      91             :  * account for it, we might find ourselves with too little room on the page
      92             :  * that it needs to go into!)
      93             :  *
      94             :  * If the page is the rightmost page on its level, we instead try to arrange
      95             :  * to leave the left split page fillfactor% full.  In this way, when we are
      96             :  * inserting successively increasing keys (consider sequences, timestamps,
      97             :  * etc) we will end up with a tree whose pages are about fillfactor% full,
      98             :  * instead of the 50% full result that we'd get without this special case.
      99             :  * This is the same as nbtsort.c produces for a newly-created tree.  Note
     100             :  * that leaf and nonleaf pages use different fillfactors.  Note also that
     101             :  * there are a number of further special cases where fillfactor is not
     102             :  * applied in the standard way.
     103             :  *
     104             :  * We are passed the intended insert position of the new tuple, expressed as
     105             :  * the offsetnumber of the tuple it must go in front of (this could be
     106             :  * maxoff+1 if the tuple is to go at the end).  The new tuple itself is also
     107             :  * passed, since it's needed to give some weight to how effective suffix
     108             :  * truncation will be.  The implementation picks the split point that
     109             :  * maximizes the effectiveness of suffix truncation from a small list of
     110             :  * alternative candidate split points that leave each side of the split with
     111             :  * about the same share of free space.  Suffix truncation is secondary to
     112             :  * equalizing free space, except in cases with large numbers of duplicates.
     113             :  * Note that it is always assumed that caller goes on to perform truncation,
     114             :  * even with pg_upgrade'd indexes where that isn't actually the case
     115             :  * (!heapkeyspace indexes).  See nbtree/README for more information about
     116             :  * suffix truncation.
     117             :  *
     118             :  * We return the index of the first existing tuple that should go on the
     119             :  * righthand page (which is called firstrightoff), plus a boolean
     120             :  * indicating whether the new tuple goes on the left or right page.  You
     121             :  * can think of the returned state as a point _between_ two adjacent data
     122             :  * items (lastleft and firstright data items) on an imaginary version of
     123             :  * origpage that already includes newitem.  The bool is necessary to
     124             :  * disambiguate the case where firstrightoff == newitemoff (i.e. it is
     125             :  * sometimes needed to determine if the firstright tuple for the split is
     126             :  * newitem rather than the tuple from origpage at offset firstrightoff).
     127             :  */
     128             : OffsetNumber
     129       21584 : _bt_findsplitloc(Relation rel,
     130             :                  Page origpage,
     131             :                  OffsetNumber newitemoff,
     132             :                  Size newitemsz,
     133             :                  IndexTuple newitem,
     134             :                  bool *newitemonleft)
     135             : {
     136             :     BTPageOpaque opaque;
     137             :     int         leftspace,
     138             :                 rightspace,
     139             :                 olddataitemstotal,
     140             :                 olddataitemstoleft,
     141             :                 perfectpenalty,
     142             :                 leaffillfactor;
     143             :     FindSplitData state;
     144             :     FindSplitStrat strategy;
     145             :     ItemId      itemid;
     146             :     OffsetNumber offnum,
     147             :                 maxoff,
     148             :                 firstrightoff;
     149             :     double      fillfactormult;
     150             :     bool        usemult;
     151             :     SplitPoint  leftpage,
     152             :                 rightpage;
     153             : 
     154       21584 :     opaque = BTPageGetOpaque(origpage);
     155       21584 :     maxoff = PageGetMaxOffsetNumber(origpage);
     156             : 
     157             :     /* Total free space available on a btree page, after fixed overhead */
     158       21584 :     leftspace = rightspace =
     159       21584 :         PageGetPageSize(origpage) - SizeOfPageHeaderData -
     160             :         MAXALIGN(sizeof(BTPageOpaqueData));
     161             : 
     162             :     /* The right page will have the same high key as the old page */
     163       21584 :     if (!P_RIGHTMOST(opaque))
     164             :     {
     165        9210 :         itemid = PageGetItemId(origpage, P_HIKEY);
     166        9210 :         rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
     167             :                              sizeof(ItemIdData));
     168             :     }
     169             : 
     170             :     /* Count up total space in data items before actually scanning 'em */
     171       21584 :     olddataitemstotal = rightspace - (int) PageGetExactFreeSpace(origpage);
     172       21584 :     leaffillfactor = BTGetFillFactor(rel);
     173             : 
     174             :     /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
     175       21584 :     newitemsz += sizeof(ItemIdData);
     176       21584 :     state.rel = rel;
     177       21584 :     state.origpage = origpage;
     178       21584 :     state.newitem = newitem;
     179       21584 :     state.newitemsz = newitemsz;
     180       21584 :     state.is_leaf = P_ISLEAF(opaque);
     181       21584 :     state.is_rightmost = P_RIGHTMOST(opaque);
     182       21584 :     state.leftspace = leftspace;
     183       21584 :     state.rightspace = rightspace;
     184       21584 :     state.olddataitemstotal = olddataitemstotal;
     185       21584 :     state.minfirstrightsz = SIZE_MAX;
     186       21584 :     state.newitemoff = newitemoff;
     187             : 
     188             :     /* newitem cannot be a posting list item */
     189             :     Assert(!BTreeTupleIsPosting(newitem));
     190             : 
     191             :     /*
     192             :      * nsplits should never exceed maxoff because there will be at most as
     193             :      * many candidate split points as there are points _between_ tuples, once
     194             :      * you imagine that the new item is already on the original page (the
     195             :      * final number of splits may be slightly lower because not all points
     196             :      * between tuples will be legal).
     197             :      */
     198       21584 :     state.maxsplits = maxoff;
     199       21584 :     state.splits = palloc(sizeof(SplitPoint) * state.maxsplits);
     200       21584 :     state.nsplits = 0;
     201             : 
     202             :     /*
     203             :      * Scan through the data items and calculate space usage for a split at
     204             :      * each possible position
     205             :      */
     206       21584 :     olddataitemstoleft = 0;
     207             : 
     208     6534820 :     for (offnum = P_FIRSTDATAKEY(opaque);
     209             :          offnum <= maxoff;
     210     6513236 :          offnum = OffsetNumberNext(offnum))
     211             :     {
     212             :         Size        itemsz;
     213             : 
     214     6513236 :         itemid = PageGetItemId(origpage, offnum);
     215     6513236 :         itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
     216             : 
     217             :         /*
     218             :          * When item offset number is not newitemoff, neither side of the
     219             :          * split can be newitem.  Record a split after the previous data item
     220             :          * from original page, but before the current data item from original
     221             :          * page. (_bt_recsplitloc() will reject the split when there are no
     222             :          * previous items, which we rely on.)
     223             :          */
     224     6513236 :         if (offnum < newitemoff)
     225     5683064 :             _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
     226      830172 :         else if (offnum > newitemoff)
     227      818808 :             _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
     228             :         else
     229             :         {
     230             :             /*
     231             :              * Record a split after all "offnum < newitemoff" original page
     232             :              * data items, but before newitem
     233             :              */
     234       11364 :             _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
     235             : 
     236             :             /*
     237             :              * Record a split after newitem, but before data item from
     238             :              * original page at offset newitemoff/current offset
     239             :              */
     240       11364 :             _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
     241             :         }
     242             : 
     243     6513236 :         olddataitemstoleft += itemsz;
     244             :     }
     245             : 
     246             :     /*
     247             :      * Record a split after all original page data items, but before newitem.
     248             :      * (Though only when it's possible that newitem will end up alone on new
     249             :      * right page.)
     250             :      */
     251             :     Assert(olddataitemstoleft == olddataitemstotal);
     252       21584 :     if (newitemoff > maxoff)
     253       10220 :         _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0);
     254             : 
     255             :     /*
     256             :      * I believe it is not possible to fail to find a feasible split, but just
     257             :      * in case ...
     258             :      */
     259       21584 :     if (state.nsplits == 0)
     260           0 :         elog(ERROR, "could not find a feasible split point for index \"%s\"",
     261             :              RelationGetRelationName(rel));
     262             : 
     263             :     /*
     264             :      * Start search for a split point among list of legal split points.  Give
     265             :      * primary consideration to equalizing available free space in each half
     266             :      * of the split initially (start with default strategy), while applying
     267             :      * rightmost and split-after-new-item optimizations where appropriate.
     268             :      * Either of the two other fallback strategies may be required for cases
     269             :      * with a large number of duplicates around the original/space-optimal
     270             :      * split point.
     271             :      *
     272             :      * Default strategy gives some weight to suffix truncation in deciding a
     273             :      * split point on leaf pages.  It attempts to select a split point where a
     274             :      * distinguishing attribute appears earlier in the new high key for the
     275             :      * left side of the split, in order to maximize the number of trailing
     276             :      * attributes that can be truncated away.  Only candidate split points
     277             :      * that imply an acceptable balance of free space on each side are
     278             :      * considered.  See _bt_defaultinterval().
     279             :      */
     280       21584 :     if (!state.is_leaf)
     281             :     {
     282             :         /* fillfactormult only used on rightmost page */
     283         202 :         usemult = state.is_rightmost;
     284         202 :         fillfactormult = BTREE_NONLEAF_FILLFACTOR / 100.0;
     285             :     }
     286       21382 :     else if (state.is_rightmost)
     287             :     {
     288             :         /* Rightmost leaf page --  fillfactormult always used */
     289       12222 :         usemult = true;
     290       12222 :         fillfactormult = leaffillfactor / 100.0;
     291             :     }
     292        9160 :     else if (_bt_afternewitemoff(&state, maxoff, leaffillfactor, &usemult))
     293             :     {
     294             :         /*
     295             :          * New item inserted at rightmost point among a localized grouping on
     296             :          * a leaf page -- apply "split after new item" optimization, either by
     297             :          * applying leaf fillfactor multiplier, or by choosing the exact split
     298             :          * point that leaves newitem as lastleft. (usemult is set for us.)
     299             :          */
     300         756 :         if (usemult)
     301             :         {
     302             :             /* fillfactormult should be set based on leaf fillfactor */
     303         582 :             fillfactormult = leaffillfactor / 100.0;
     304             :         }
     305             :         else
     306             :         {
     307             :             /* find precise split point after newitemoff */
     308       42854 :             for (int i = 0; i < state.nsplits; i++)
     309             :             {
     310       42854 :                 SplitPoint *split = state.splits + i;
     311             : 
     312       42854 :                 if (split->newitemonleft &&
     313         174 :                     newitemoff == split->firstrightoff)
     314             :                 {
     315         174 :                     pfree(state.splits);
     316         174 :                     *newitemonleft = true;
     317         174 :                     return newitemoff;
     318             :                 }
     319             :             }
     320             : 
     321             :             /*
     322             :              * Cannot legally split after newitemoff; proceed with split
     323             :              * without using fillfactor multiplier.  This is defensive, and
     324             :              * should never be needed in practice.
     325             :              */
     326           0 :             fillfactormult = 0.50;
     327             :         }
     328             :     }
     329             :     else
     330             :     {
     331             :         /* Other leaf page.  50:50 page split. */
     332        8404 :         usemult = false;
     333             :         /* fillfactormult not used, but be tidy */
     334        8404 :         fillfactormult = 0.50;
     335             :     }
     336             : 
     337             :     /*
     338             :      * Save leftmost and rightmost splits for page before original ordinal
     339             :      * sort order is lost by delta/fillfactormult sort
     340             :      */
     341       21410 :     leftpage = state.splits[0];
     342       21410 :     rightpage = state.splits[state.nsplits - 1];
     343             : 
     344             :     /* Give split points a fillfactormult-wise delta, and sort on deltas */
     345       21410 :     _bt_deltasortsplits(&state, fillfactormult, usemult);
     346             : 
     347             :     /* Determine split interval for default strategy */
     348       21410 :     state.interval = _bt_defaultinterval(&state);
     349             : 
     350             :     /*
     351             :      * Determine if default strategy/split interval will produce a
     352             :      * sufficiently distinguishing split, or if we should change strategies.
     353             :      * Alternative strategies change the range of split points that are
     354             :      * considered acceptable (split interval), and possibly change
     355             :      * fillfactormult, in order to deal with pages with a large number of
     356             :      * duplicates gracefully.
     357             :      *
     358             :      * Pass low and high splits for the entire page (actually, they're for an
     359             :      * imaginary version of the page that includes newitem).  These are used
     360             :      * when the initial split interval encloses split points that are full of
     361             :      * duplicates, and we need to consider if it's even possible to avoid
     362             :      * appending a heap TID.
     363             :      */
     364       21410 :     perfectpenalty = _bt_strategy(&state, &leftpage, &rightpage, &strategy);
     365             : 
     366       21410 :     if (strategy == SPLIT_DEFAULT)
     367             :     {
     368             :         /*
     369             :          * Default strategy worked out (always works out with internal page).
     370             :          * Original split interval still stands.
     371             :          */
     372             :     }
     373             : 
     374             :     /*
     375             :      * Many duplicates strategy is used when a heap TID would otherwise be
     376             :      * appended, but the page isn't completely full of logical duplicates.
     377             :      *
     378             :      * The split interval is widened to include all legal candidate split
     379             :      * points.  There might be a few as two distinct values in the whole-page
     380             :      * split interval, though it's also possible that most of the values on
     381             :      * the page are unique.  The final split point will either be to the
     382             :      * immediate left or to the immediate right of the group of duplicate
     383             :      * tuples that enclose the first/delta-optimal split point (perfect
     384             :      * penalty was set so that the lowest delta split point that avoids
     385             :      * appending a heap TID will be chosen).  Maximizing the number of
     386             :      * attributes that can be truncated away is not a goal of the many
     387             :      * duplicates strategy.
     388             :      *
     389             :      * Single value strategy is used when it is impossible to avoid appending
     390             :      * a heap TID.  It arranges to leave the left page very full.  This
     391             :      * maximizes space utilization in cases where tuples with the same
     392             :      * attribute values span many pages.  Newly inserted duplicates will tend
     393             :      * to have higher heap TID values, so we'll end up splitting to the right
     394             :      * consistently.  (Single value strategy is harmless though not
     395             :      * particularly useful with !heapkeyspace indexes.)
     396             :      */
     397         418 :     else if (strategy == SPLIT_MANY_DUPLICATES)
     398             :     {
     399             :         Assert(state.is_leaf);
     400             :         /* Shouldn't try to truncate away extra user attributes */
     401             :         Assert(perfectpenalty ==
     402             :                IndexRelationGetNumberOfKeyAttributes(state.rel));
     403             :         /* No need to resort splits -- no change in fillfactormult/deltas */
     404         132 :         state.interval = state.nsplits;
     405             :     }
     406         286 :     else if (strategy == SPLIT_SINGLE_VALUE)
     407             :     {
     408             :         Assert(state.is_leaf);
     409             :         /* Split near the end of the page */
     410         286 :         usemult = true;
     411         286 :         fillfactormult = BTREE_SINGLEVAL_FILLFACTOR / 100.0;
     412             :         /* Resort split points with new delta */
     413         286 :         _bt_deltasortsplits(&state, fillfactormult, usemult);
     414             :         /* Appending a heap TID is unavoidable, so interval of 1 is fine */
     415         286 :         state.interval = 1;
     416             :     }
     417             : 
     418             :     /*
     419             :      * Search among acceptable split points (using final split interval) for
     420             :      * the entry that has the lowest penalty, and is therefore expected to
     421             :      * maximize fan-out.  Sets *newitemonleft for us.
     422             :      */
     423       21410 :     firstrightoff = _bt_bestsplitloc(&state, perfectpenalty, newitemonleft,
     424             :                                      strategy);
     425       21410 :     pfree(state.splits);
     426             : 
     427       21410 :     return firstrightoff;
     428             : }
     429             : 
     430             : /*
     431             :  * Subroutine to record a particular point between two tuples (possibly the
     432             :  * new item) on page (ie, combination of firstrightoff and newitemonleft
     433             :  * settings) in *state for later analysis.  This is also a convenient point to
     434             :  * check if the split is legal (if it isn't, it won't be recorded).
     435             :  *
     436             :  * firstrightoff is the offset of the first item on the original page that
     437             :  * goes to the right page, and firstrightofforigpagetuplesz is the size of
     438             :  * that tuple.  firstrightoff can be > max offset, which means that all the
     439             :  * old items go to the left page and only the new item goes to the right page.
     440             :  * We don't actually use firstrightofforigpagetuplesz in that case (actually,
     441             :  * we don't use it for _any_ split where the firstright tuple happens to be
     442             :  * newitem).
     443             :  *
     444             :  * olddataitemstoleft is the total size of all old items to the left of the
     445             :  * split point that is recorded here when legal.  Should not include
     446             :  * newitemsz, since that is handled here.
     447             :  */
     448             : static void
     449     6534820 : _bt_recsplitloc(FindSplitData *state,
     450             :                 OffsetNumber firstrightoff,
     451             :                 bool newitemonleft,
     452             :                 int olddataitemstoleft,
     453             :                 Size firstrightofforigpagetuplesz)
     454             : {
     455             :     int16       leftfree,
     456             :                 rightfree;
     457             :     Size        firstrightsz;
     458     6534820 :     Size        postingsz = 0;
     459             :     bool        newitemisfirstright;
     460             : 
     461             :     /* Is the new item going to be split point's firstright tuple? */
     462     6567768 :     newitemisfirstright = (firstrightoff == state->newitemoff &&
     463       32948 :                            !newitemonleft);
     464             : 
     465     6534820 :     if (newitemisfirstright)
     466       21584 :         firstrightsz = state->newitemsz;
     467             :     else
     468             :     {
     469     6513236 :         firstrightsz = firstrightofforigpagetuplesz;
     470             : 
     471             :         /*
     472             :          * Calculate suffix truncation space saving when firstright tuple is a
     473             :          * posting list tuple, though only when the tuple is over 64 bytes
     474             :          * including line pointer overhead (arbitrary).  This avoids accessing
     475             :          * the tuple in cases where its posting list must be very small (if
     476             :          * tuple has one at all).
     477             :          *
     478             :          * Note: We don't do this in the case where firstright tuple is
     479             :          * newitem, since newitem cannot have a posting list.
     480             :          */
     481     6513236 :         if (state->is_leaf && firstrightsz > 64)
     482             :         {
     483             :             ItemId      itemid;
     484             :             IndexTuple  newhighkey;
     485             : 
     486       46874 :             itemid = PageGetItemId(state->origpage, firstrightoff);
     487       46874 :             newhighkey = (IndexTuple) PageGetItem(state->origpage, itemid);
     488             : 
     489       46874 :             if (BTreeTupleIsPosting(newhighkey))
     490       44344 :                 postingsz = IndexTupleSize(newhighkey) -
     491       22172 :                     BTreeTupleGetPostingOffset(newhighkey);
     492             :         }
     493             :     }
     494             : 
     495             :     /* Account for all the old tuples */
     496     6534820 :     leftfree = state->leftspace - olddataitemstoleft;
     497     6534820 :     rightfree = state->rightspace -
     498     6534820 :         (state->olddataitemstotal - olddataitemstoleft);
     499             : 
     500             :     /*
     501             :      * The first item on the right page becomes the high key of the left page;
     502             :      * therefore it counts against left space as well as right space (we
     503             :      * cannot assume that suffix truncation will make it any smaller).  When
     504             :      * index has included attributes, then those attributes of left page high
     505             :      * key will be truncated leaving that page with slightly more free space.
     506             :      * However, that shouldn't affect our ability to find valid split
     507             :      * location, since we err in the direction of being pessimistic about free
     508             :      * space on the left half.  Besides, even when suffix truncation of
     509             :      * non-TID attributes occurs, the new high key often won't even be a
     510             :      * single MAXALIGN() quantum smaller than the firstright tuple it's based
     511             :      * on.
     512             :      *
     513             :      * If we are on the leaf level, assume that suffix truncation cannot avoid
     514             :      * adding a heap TID to the left half's new high key when splitting at the
     515             :      * leaf level.  In practice the new high key will often be smaller and
     516             :      * will rarely be larger, but conservatively assume the worst case.  We do
     517             :      * go to the trouble of subtracting away posting list overhead, though
     518             :      * only when it looks like it will make an appreciable difference.
     519             :      * (Posting lists are the only case where truncation will typically make
     520             :      * the final high key far smaller than firstright, so being a bit more
     521             :      * precise there noticeably improves the balance of free space.)
     522             :      */
     523     6534820 :     if (state->is_leaf)
     524     6531038 :         leftfree -= (int16) (firstrightsz +
     525     6531038 :                              MAXALIGN(sizeof(ItemPointerData)) -
     526             :                              postingsz);
     527             :     else
     528        3782 :         leftfree -= (int16) firstrightsz;
     529             : 
     530             :     /* account for the new item */
     531     6534820 :     if (newitemonleft)
     532      830172 :         leftfree -= (int16) state->newitemsz;
     533             :     else
     534     5704648 :         rightfree -= (int16) state->newitemsz;
     535             : 
     536             :     /*
     537             :      * If we are not on the leaf level, we will be able to discard the key
     538             :      * data from the first item that winds up on the right page.
     539             :      */
     540     6534820 :     if (!state->is_leaf)
     541        3782 :         rightfree += (int16) firstrightsz -
     542             :             (int16) (MAXALIGN(sizeof(IndexTupleData)) + sizeof(ItemIdData));
     543             : 
     544             :     /* Record split if legal */
     545     6534820 :     if (leftfree >= 0 && rightfree >= 0)
     546             :     {
     547             :         Assert(state->nsplits < state->maxsplits);
     548             : 
     549             :         /* Determine smallest firstright tuple size among legal splits */
     550     6494454 :         state->minfirstrightsz = Min(state->minfirstrightsz, firstrightsz);
     551             : 
     552     6494454 :         state->splits[state->nsplits].curdelta = 0;
     553     6494454 :         state->splits[state->nsplits].leftfree = leftfree;
     554     6494454 :         state->splits[state->nsplits].rightfree = rightfree;
     555     6494454 :         state->splits[state->nsplits].firstrightoff = firstrightoff;
     556     6494454 :         state->splits[state->nsplits].newitemonleft = newitemonleft;
     557     6494454 :         state->nsplits++;
     558             :     }
     559     6534820 : }
     560             : 
     561             : /*
     562             :  * Subroutine to assign space deltas to materialized array of candidate split
     563             :  * points based on current fillfactor, and to sort array using that fillfactor
     564             :  */
     565             : static void
     566       21696 : _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
     567             :                     bool usemult)
     568             : {
     569     6455388 :     for (int i = 0; i < state->nsplits; i++)
     570             :     {
     571     6433692 :         SplitPoint *split = state->splits + i;
     572             :         int16       delta;
     573             : 
     574     6433692 :         if (usemult)
     575     4234774 :             delta = fillfactormult * split->leftfree -
     576     4234774 :                 (1.0 - fillfactormult) * split->rightfree;
     577             :         else
     578     2198918 :             delta = split->leftfree - split->rightfree;
     579             : 
     580     6433692 :         if (delta < 0)
     581     1527662 :             delta = -delta;
     582             : 
     583             :         /* Save delta */
     584     6433692 :         split->curdelta = delta;
     585             :     }
     586             : 
     587       21696 :     qsort(state->splits, state->nsplits, sizeof(SplitPoint), _bt_splitcmp);
     588       21696 : }
     589             : 
     590             : /*
     591             :  * qsort-style comparator used by _bt_deltasortsplits()
     592             :  */
     593             : static int
     594    69452584 : _bt_splitcmp(const void *arg1, const void *arg2)
     595             : {
     596    69452584 :     SplitPoint *split1 = (SplitPoint *) arg1;
     597    69452584 :     SplitPoint *split2 = (SplitPoint *) arg2;
     598             : 
     599    69452584 :     return pg_cmp_s16(split1->curdelta, split2->curdelta);
     600             : }
     601             : 
     602             : /*
     603             :  * Subroutine to determine whether or not a non-rightmost leaf page should be
     604             :  * split immediately after the would-be original page offset for the
     605             :  * new/incoming tuple (or should have leaf fillfactor applied when new item is
     606             :  * to the right on original page).  This is appropriate when there is a
     607             :  * pattern of localized monotonically increasing insertions into a composite
     608             :  * index, where leading attribute values form local groupings, and we
     609             :  * anticipate further insertions of the same/current grouping (new item's
     610             :  * grouping) in the near future.  This can be thought of as a variation on
     611             :  * applying leaf fillfactor during rightmost leaf page splits, since cases
     612             :  * that benefit will converge on packing leaf pages leaffillfactor% full over
     613             :  * time.
     614             :  *
     615             :  * We may leave extra free space remaining on the rightmost page of a "most
     616             :  * significant column" grouping of tuples if that grouping never ends up
     617             :  * having future insertions that use the free space.  That effect is
     618             :  * self-limiting; a future grouping that becomes the "nearest on the right"
     619             :  * grouping of the affected grouping usually puts the extra free space to good
     620             :  * use.
     621             :  *
     622             :  * Caller uses optimization when routine returns true, though the exact action
     623             :  * taken by caller varies.  Caller uses original leaf page fillfactor in
     624             :  * standard way rather than using the new item offset directly when *usemult
     625             :  * was also set to true here.  Otherwise, caller applies optimization by
     626             :  * locating the legal split point that makes the new tuple the lastleft tuple
     627             :  * for the split.
     628             :  */
     629             : static bool
     630        9160 : _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
     631             :                     int leaffillfactor, bool *usemult)
     632             : {
     633             :     int16       nkeyatts;
     634             :     ItemId      itemid;
     635             :     IndexTuple  tup;
     636             :     int         keepnatts;
     637             : 
     638             :     Assert(state->is_leaf && !state->is_rightmost);
     639             : 
     640        9160 :     nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
     641             : 
     642             :     /* Single key indexes not considered here */
     643        9160 :     if (nkeyatts == 1)
     644        1184 :         return false;
     645             : 
     646             :     /* Ascending insertion pattern never inferred when new item is first */
     647        7976 :     if (state->newitemoff == P_FIRSTKEY)
     648           0 :         return false;
     649             : 
     650             :     /*
     651             :      * Only apply optimization on pages with equisized tuples, since ordinal
     652             :      * keys are likely to be fixed-width.  Testing if the new tuple is
     653             :      * variable width directly might also work, but that fails to apply the
     654             :      * optimization to indexes with a numeric_ops attribute.
     655             :      *
     656             :      * Conclude that page has equisized tuples when the new item is the same
     657             :      * width as the smallest item observed during pass over page, and other
     658             :      * non-pivot tuples must be the same width as well.  (Note that the
     659             :      * possibly-truncated existing high key isn't counted in
     660             :      * olddataitemstotal, and must be subtracted from maxoff.)
     661             :      */
     662        7976 :     if (state->newitemsz != state->minfirstrightsz)
     663        2468 :         return false;
     664        5508 :     if (state->newitemsz * (maxoff - 1) != state->olddataitemstotal)
     665        4072 :         return false;
     666             : 
     667             :     /*
     668             :      * Avoid applying optimization when tuples are wider than a tuple
     669             :      * consisting of two non-NULL int8/int64 attributes (or four non-NULL
     670             :      * int4/int32 attributes)
     671             :      */
     672        1436 :     if (state->newitemsz >
     673             :         MAXALIGN(sizeof(IndexTupleData) + sizeof(int64) * 2) +
     674             :         sizeof(ItemIdData))
     675           0 :         return false;
     676             : 
     677             :     /*
     678             :      * At least the first attribute's value must be equal to the corresponding
     679             :      * value in previous tuple to apply optimization.  New item cannot be a
     680             :      * duplicate, either.
     681             :      *
     682             :      * Handle case where new item is to the right of all items on the existing
     683             :      * page.  This is suggestive of monotonically increasing insertions in
     684             :      * itself, so the "heap TID adjacency" test is not applied here.
     685             :      */
     686        1436 :     if (state->newitemoff > maxoff)
     687             :     {
     688         542 :         itemid = PageGetItemId(state->origpage, maxoff);
     689         542 :         tup = (IndexTuple) PageGetItem(state->origpage, itemid);
     690         542 :         keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
     691             : 
     692         542 :         if (keepnatts > 1 && keepnatts <= nkeyatts)
     693             :         {
     694         542 :             *usemult = true;
     695         542 :             return true;
     696             :         }
     697             : 
     698           0 :         return false;
     699             :     }
     700             : 
     701             :     /*
     702             :      * "Low cardinality leading column, high cardinality suffix column"
     703             :      * indexes with a random insertion pattern (e.g., an index with a boolean
     704             :      * column, such as an index on '(book_is_in_print, book_isbn)') present us
     705             :      * with a risk of consistently misapplying the optimization.  We're
     706             :      * willing to accept very occasional misapplication of the optimization,
     707             :      * provided the cases where we get it wrong are rare and self-limiting.
     708             :      *
     709             :      * Heap TID adjacency strongly suggests that the item just to the left was
     710             :      * inserted very recently, which limits overapplication of the
     711             :      * optimization.  Besides, all inappropriate cases triggered here will
     712             :      * still split in the middle of the page on average.
     713             :      */
     714         894 :     itemid = PageGetItemId(state->origpage, OffsetNumberPrev(state->newitemoff));
     715         894 :     tup = (IndexTuple) PageGetItem(state->origpage, itemid);
     716             :     /* Do cheaper test first */
     717         894 :     if (BTreeTupleIsPosting(tup) ||
     718         894 :         !_bt_adjacenthtid(&tup->t_tid, &state->newitem->t_tid))
     719         560 :         return false;
     720             :     /* Check same conditions as rightmost item case, too */
     721         334 :     keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
     722             : 
     723         334 :     if (keepnatts > 1 && keepnatts <= nkeyatts)
     724             :     {
     725         214 :         double      interp = (double) state->newitemoff / ((double) maxoff + 1);
     726         214 :         double      leaffillfactormult = (double) leaffillfactor / 100.0;
     727             : 
     728             :         /*
     729             :          * Don't allow caller to split after a new item when it will result in
     730             :          * a split point to the right of the point that a leaf fillfactor
     731             :          * split would use -- have caller apply leaf fillfactor instead
     732             :          */
     733         214 :         *usemult = interp > leaffillfactormult;
     734             : 
     735         214 :         return true;
     736             :     }
     737             : 
     738         120 :     return false;
     739             : }
     740             : 
     741             : /*
     742             :  * Subroutine for determining if two heap TIDS are "adjacent".
     743             :  *
     744             :  * Adjacent means that the high TID is very likely to have been inserted into
     745             :  * heap relation immediately after the low TID, probably during the current
     746             :  * transaction.
     747             :  */
     748             : static bool
     749         894 : _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid)
     750             : {
     751             :     BlockNumber lowblk,
     752             :                 highblk;
     753             : 
     754         894 :     lowblk = ItemPointerGetBlockNumber(lowhtid);
     755         894 :     highblk = ItemPointerGetBlockNumber(highhtid);
     756             : 
     757             :     /* Make optimistic assumption of adjacency when heap blocks match */
     758         894 :     if (lowblk == highblk)
     759         330 :         return true;
     760             : 
     761             :     /* When heap block one up, second offset should be FirstOffsetNumber */
     762         760 :     if (lowblk + 1 == highblk &&
     763         196 :         ItemPointerGetOffsetNumber(highhtid) == FirstOffsetNumber)
     764           4 :         return true;
     765             : 
     766         560 :     return false;
     767             : }
     768             : 
     769             : /*
     770             :  * Subroutine to find the "best" split point among candidate split points.
     771             :  * The best split point is the split point with the lowest penalty among split
     772             :  * points that fall within current/final split interval.  Penalty is an
     773             :  * abstract score, with a definition that varies depending on whether we're
     774             :  * splitting a leaf page or an internal page.  See _bt_split_penalty() for
     775             :  * details.
     776             :  *
     777             :  * "perfectpenalty" is assumed to be the lowest possible penalty among
     778             :  * candidate split points.  This allows us to return early without wasting
     779             :  * cycles on calculating the first differing attribute for all candidate
     780             :  * splits when that clearly cannot improve our choice (or when we only want a
     781             :  * minimally distinguishing split point, and don't want to make the split any
     782             :  * more unbalanced than is necessary).
     783             :  *
     784             :  * We return the index of the first existing tuple that should go on the right
     785             :  * page, plus a boolean indicating if new item is on left of split point.
     786             :  */
     787             : static OffsetNumber
     788       21410 : _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
     789             :                  bool *newitemonleft, FindSplitStrat strategy)
     790             : {
     791             :     int         bestpenalty,
     792             :                 lowsplit;
     793       21410 :     int         highsplit = Min(state->interval, state->nsplits);
     794             :     SplitPoint *final;
     795             : 
     796       21410 :     bestpenalty = INT_MAX;
     797       21410 :     lowsplit = 0;
     798       50606 :     for (int i = lowsplit; i < highsplit; i++)
     799             :     {
     800             :         int         penalty;
     801             : 
     802       50564 :         penalty = _bt_split_penalty(state, state->splits + i);
     803             : 
     804       50564 :         if (penalty < bestpenalty)
     805             :         {
     806       27230 :             bestpenalty = penalty;
     807       27230 :             lowsplit = i;
     808             :         }
     809             : 
     810       50564 :         if (penalty <= perfectpenalty)
     811       21368 :             break;
     812             :     }
     813             : 
     814       21410 :     final = &state->splits[lowsplit];
     815             : 
     816             :     /*
     817             :      * There is a risk that the "many duplicates" strategy will repeatedly do
     818             :      * the wrong thing when there are monotonically decreasing insertions to
     819             :      * the right of a large group of duplicates.   Repeated splits could leave
     820             :      * a succession of right half pages with free space that can never be
     821             :      * used.  This must be avoided.
     822             :      *
     823             :      * Consider the example of the leftmost page in a single integer attribute
     824             :      * NULLS FIRST index which is almost filled with NULLs.  Monotonically
     825             :      * decreasing integer insertions might cause the same leftmost page to
     826             :      * split repeatedly at the same point.  Each split derives its new high
     827             :      * key from the lowest current value to the immediate right of the large
     828             :      * group of NULLs, which will always be higher than all future integer
     829             :      * insertions, directing all future integer insertions to the same
     830             :      * leftmost page.
     831             :      */
     832       21410 :     if (strategy == SPLIT_MANY_DUPLICATES && !state->is_rightmost &&
     833         118 :         !final->newitemonleft && final->firstrightoff >= state->newitemoff &&
     834           0 :         final->firstrightoff < state->newitemoff + 9)
     835             :     {
     836             :         /*
     837             :          * Avoid the problem by performing a 50:50 split when the new item is
     838             :          * just to the right of the would-be "many duplicates" split point.
     839             :          * (Note that the test used for an insert that is "just to the right"
     840             :          * of the split point is conservative.)
     841             :          */
     842           0 :         final = &state->splits[0];
     843             :     }
     844             : 
     845       21410 :     *newitemonleft = final->newitemonleft;
     846       21410 :     return final->firstrightoff;
     847             : }
     848             : 
     849             : #define LEAF_SPLIT_DISTANCE         0.050
     850             : #define INTERNAL_SPLIT_DISTANCE     0.075
     851             : 
     852             : /*
     853             :  * Return a split interval to use for the default strategy.  This is a limit
     854             :  * on the number of candidate split points to give further consideration to.
     855             :  * Only a fraction of all candidate splits points (those located at the start
     856             :  * of the now-sorted splits array) fall within the split interval.  Split
     857             :  * interval is applied within _bt_bestsplitloc().
     858             :  *
     859             :  * Split interval represents an acceptable range of split points -- those that
     860             :  * have leftfree and rightfree values that are acceptably balanced.  The final
     861             :  * split point chosen is the split point with the lowest "penalty" among split
     862             :  * points in this split interval (unless we change our entire strategy, in
     863             :  * which case the interval also changes -- see _bt_strategy()).
     864             :  *
     865             :  * The "Prefix B-Trees" paper calls split interval sigma l for leaf splits,
     866             :  * and sigma b for internal ("branch") splits.  It's hard to provide a
     867             :  * theoretical justification for the size of the split interval, though it's
     868             :  * clear that a small split interval can make tuples on level L+1 much smaller
     869             :  * on average, without noticeably affecting space utilization on level L.
     870             :  * (Note that the way that we calculate split interval might need to change if
     871             :  * suffix truncation is taught to truncate tuples "within" the last
     872             :  * attribute/datum for data types like text, which is more or less how it is
     873             :  * assumed to work in the paper.)
     874             :  */
     875             : static int
     876       21410 : _bt_defaultinterval(FindSplitData *state)
     877             : {
     878             :     SplitPoint *spaceoptimal;
     879             :     int16       tolerance,
     880             :                 lowleftfree,
     881             :                 lowrightfree,
     882             :                 highleftfree,
     883             :                 highrightfree;
     884             : 
     885             :     /*
     886             :      * Determine leftfree and rightfree values that are higher and lower than
     887             :      * we're willing to tolerate.  Note that the final split interval will be
     888             :      * about 10% of nsplits in the common case where all non-pivot tuples
     889             :      * (data items) from a leaf page are uniformly sized.  We're a bit more
     890             :      * aggressive when splitting internal pages.
     891             :      */
     892       21410 :     if (state->is_leaf)
     893       21208 :         tolerance = state->olddataitemstotal * LEAF_SPLIT_DISTANCE;
     894             :     else
     895         202 :         tolerance = state->olddataitemstotal * INTERNAL_SPLIT_DISTANCE;
     896             : 
     897             :     /* First candidate split point is the most evenly balanced */
     898       21410 :     spaceoptimal = state->splits;
     899       21410 :     lowleftfree = spaceoptimal->leftfree - tolerance;
     900       21410 :     lowrightfree = spaceoptimal->rightfree - tolerance;
     901       21410 :     highleftfree = spaceoptimal->leftfree + tolerance;
     902       21410 :     highrightfree = spaceoptimal->rightfree + tolerance;
     903             : 
     904             :     /*
     905             :      * Iterate through split points, starting from the split immediately after
     906             :      * 'spaceoptimal'.  Find the first split point that divides free space so
     907             :      * unevenly that including it in the split interval would be unacceptable.
     908             :      */
     909      646110 :     for (int i = 1; i < state->nsplits; i++)
     910             :     {
     911      646110 :         SplitPoint *split = state->splits + i;
     912             : 
     913             :         /* Cannot use curdelta here, since its value is often weighted */
     914      646110 :         if (split->leftfree < lowleftfree || split->rightfree < lowrightfree ||
     915      625596 :             split->leftfree > highleftfree || split->rightfree > highrightfree)
     916       21410 :             return i;
     917             :     }
     918             : 
     919           0 :     return state->nsplits;
     920             : }
     921             : 
     922             : /*
     923             :  * Subroutine to decide whether split should use default strategy/initial
     924             :  * split interval, or whether it should finish splitting the page using
     925             :  * alternative strategies (this is only possible with leaf pages).
     926             :  *
     927             :  * Caller uses alternative strategy (or sticks with default strategy) based
     928             :  * on how *strategy is set here.  Return value is "perfect penalty", which is
     929             :  * passed to _bt_bestsplitloc() as a final constraint on how far caller is
     930             :  * willing to go to avoid appending a heap TID when using the many duplicates
     931             :  * strategy (it also saves _bt_bestsplitloc() useless cycles).
     932             :  */
     933             : static int
     934       21410 : _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
     935             :              SplitPoint *rightpage, FindSplitStrat *strategy)
     936             : {
     937             :     IndexTuple  leftmost,
     938             :                 rightmost;
     939             :     SplitPoint *leftinterval,
     940             :                *rightinterval;
     941             :     int         perfectpenalty;
     942       21410 :     int         indnkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
     943             : 
     944             :     /* Assume that alternative strategy won't be used for now */
     945       21410 :     *strategy = SPLIT_DEFAULT;
     946             : 
     947             :     /*
     948             :      * Use smallest observed firstright item size for entire page (actually,
     949             :      * entire imaginary version of page that includes newitem) as perfect
     950             :      * penalty on internal pages.  This can save cycles in the common case
     951             :      * where most or all splits (not just splits within interval) have
     952             :      * firstright tuples that are the same size.
     953             :      */
     954       21410 :     if (!state->is_leaf)
     955         202 :         return state->minfirstrightsz;
     956             : 
     957             :     /*
     958             :      * Use leftmost and rightmost tuples from leftmost and rightmost splits in
     959             :      * current split interval
     960             :      */
     961       21208 :     _bt_interval_edges(state, &leftinterval, &rightinterval);
     962       21208 :     leftmost = _bt_split_lastleft(state, leftinterval);
     963       21208 :     rightmost = _bt_split_firstright(state, rightinterval);
     964             : 
     965             :     /*
     966             :      * If initial split interval can produce a split point that will at least
     967             :      * avoid appending a heap TID in new high key, we're done.  Finish split
     968             :      * with default strategy and initial split interval.
     969             :      */
     970       21208 :     perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
     971       21208 :     if (perfectpenalty <= indnkeyatts)
     972       20744 :         return perfectpenalty;
     973             : 
     974             :     /*
     975             :      * Work out how caller should finish split when even their "perfect"
     976             :      * penalty for initial/default split interval indicates that the interval
     977             :      * does not contain even a single split that avoids appending a heap TID.
     978             :      *
     979             :      * Use the leftmost split's lastleft tuple and the rightmost split's
     980             :      * firstright tuple to assess every possible split.
     981             :      */
     982         464 :     leftmost = _bt_split_lastleft(state, leftpage);
     983         464 :     rightmost = _bt_split_firstright(state, rightpage);
     984             : 
     985             :     /*
     986             :      * If page (including new item) has many duplicates but is not entirely
     987             :      * full of duplicates, a many duplicates strategy split will be performed.
     988             :      * If page is entirely full of duplicates, a single value strategy split
     989             :      * will be performed.
     990             :      */
     991         464 :     perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
     992         464 :     if (perfectpenalty <= indnkeyatts)
     993             :     {
     994         132 :         *strategy = SPLIT_MANY_DUPLICATES;
     995             : 
     996             :         /*
     997             :          * Many duplicates strategy should split at either side the group of
     998             :          * duplicates that enclose the delta-optimal split point.  Return
     999             :          * indnkeyatts rather than the true perfect penalty to make that
    1000             :          * happen.  (If perfectpenalty was returned here then low cardinality
    1001             :          * composite indexes could have continual unbalanced splits.)
    1002             :          *
    1003             :          * Note that caller won't go through with a many duplicates split in
    1004             :          * rare cases where it looks like there are ever-decreasing insertions
    1005             :          * to the immediate right of the split point.  This must happen just
    1006             :          * before a final decision is made, within _bt_bestsplitloc().
    1007             :          */
    1008         132 :         return indnkeyatts;
    1009             :     }
    1010             : 
    1011             :     /*
    1012             :      * Single value strategy is only appropriate with ever-increasing heap
    1013             :      * TIDs; otherwise, original default strategy split should proceed to
    1014             :      * avoid pathological performance.  Use page high key to infer if this is
    1015             :      * the rightmost page among pages that store the same duplicate value.
    1016             :      * This should not prevent insertions of heap TIDs that are slightly out
    1017             :      * of order from using single value strategy, since that's expected with
    1018             :      * concurrent inserters of the same duplicate value.
    1019             :      */
    1020         332 :     else if (state->is_rightmost)
    1021         244 :         *strategy = SPLIT_SINGLE_VALUE;
    1022             :     else
    1023             :     {
    1024             :         ItemId      itemid;
    1025             :         IndexTuple  hikey;
    1026             : 
    1027          88 :         itemid = PageGetItemId(state->origpage, P_HIKEY);
    1028          88 :         hikey = (IndexTuple) PageGetItem(state->origpage, itemid);
    1029          88 :         perfectpenalty = _bt_keep_natts_fast(state->rel, hikey,
    1030             :                                              state->newitem);
    1031          88 :         if (perfectpenalty <= indnkeyatts)
    1032          42 :             *strategy = SPLIT_SINGLE_VALUE;
    1033             :         else
    1034             :         {
    1035             :             /*
    1036             :              * Have caller finish split using default strategy, since page
    1037             :              * does not appear to be the rightmost page for duplicates of the
    1038             :              * value the page is filled with
    1039             :              */
    1040             :         }
    1041             :     }
    1042             : 
    1043         332 :     return perfectpenalty;
    1044             : }
    1045             : 
    1046             : /*
    1047             :  * Subroutine to locate leftmost and rightmost splits for current/default
    1048             :  * split interval.  Note that it will be the same split iff there is only one
    1049             :  * split in interval.
    1050             :  */
    1051             : static void
    1052       21208 : _bt_interval_edges(FindSplitData *state, SplitPoint **leftinterval,
    1053             :                    SplitPoint **rightinterval)
    1054             : {
    1055       21208 :     int         highsplit = Min(state->interval, state->nsplits);
    1056             :     SplitPoint *deltaoptimal;
    1057             : 
    1058       21208 :     deltaoptimal = state->splits;
    1059       21208 :     *leftinterval = NULL;
    1060       21208 :     *rightinterval = NULL;
    1061             : 
    1062             :     /*
    1063             :      * Delta is an absolute distance to optimal split point, so both the
    1064             :      * leftmost and rightmost split point will usually be at the end of the
    1065             :      * array
    1066             :      */
    1067       42658 :     for (int i = highsplit - 1; i >= 0; i--)
    1068             :     {
    1069       42658 :         SplitPoint *distant = state->splits + i;
    1070             : 
    1071       42658 :         if (distant->firstrightoff < deltaoptimal->firstrightoff)
    1072             :         {
    1073       21040 :             if (*leftinterval == NULL)
    1074       20726 :                 *leftinterval = distant;
    1075             :         }
    1076       21618 :         else if (distant->firstrightoff > deltaoptimal->firstrightoff)
    1077             :         {
    1078       21116 :             if (*rightinterval == NULL)
    1079       20794 :                 *rightinterval = distant;
    1080             :         }
    1081         502 :         else if (!distant->newitemonleft && deltaoptimal->newitemonleft)
    1082             :         {
    1083             :             /*
    1084             :              * "incoming tuple will become firstright" (distant) is to the
    1085             :              * left of "incoming tuple will become lastleft" (delta-optimal)
    1086             :              */
    1087             :             Assert(distant->firstrightoff == state->newitemoff);
    1088           0 :             if (*leftinterval == NULL)
    1089           0 :                 *leftinterval = distant;
    1090             :         }
    1091         502 :         else if (distant->newitemonleft && !deltaoptimal->newitemonleft)
    1092             :         {
    1093             :             /*
    1094             :              * "incoming tuple will become lastleft" (distant) is to the right
    1095             :              * of "incoming tuple will become firstright" (delta-optimal)
    1096             :              */
    1097             :             Assert(distant->firstrightoff == state->newitemoff);
    1098          10 :             if (*rightinterval == NULL)
    1099           8 :                 *rightinterval = distant;
    1100             :         }
    1101             :         else
    1102             :         {
    1103             :             /* There was only one or two splits in initial split interval */
    1104             :             Assert(distant == deltaoptimal);
    1105         492 :             if (*leftinterval == NULL)
    1106         482 :                 *leftinterval = distant;
    1107         492 :             if (*rightinterval == NULL)
    1108         406 :                 *rightinterval = distant;
    1109             :         }
    1110             : 
    1111       42658 :         if (*leftinterval && *rightinterval)
    1112       21208 :             return;
    1113             :     }
    1114             : 
    1115             :     Assert(false);
    1116             : }
    1117             : 
    1118             : /*
    1119             :  * Subroutine to find penalty for caller's candidate split point.
    1120             :  *
    1121             :  * On leaf pages, penalty is the attribute number that distinguishes each side
    1122             :  * of a split.  It's the last attribute that needs to be included in new high
    1123             :  * key for left page.  It can be greater than the number of key attributes in
    1124             :  * cases where a heap TID will need to be appended during truncation.
    1125             :  *
    1126             :  * On internal pages, penalty is simply the size of the firstright tuple for
    1127             :  * the split (including line pointer overhead).  This tuple will become the
    1128             :  * new high key for the left page.
    1129             :  */
    1130             : static inline int
    1131       50564 : _bt_split_penalty(FindSplitData *state, SplitPoint *split)
    1132             : {
    1133             :     IndexTuple  lastleft;
    1134             :     IndexTuple  firstright;
    1135             : 
    1136       50564 :     if (!state->is_leaf)
    1137             :     {
    1138             :         ItemId      itemid;
    1139             : 
    1140         202 :         if (!split->newitemonleft &&
    1141         202 :             split->firstrightoff == state->newitemoff)
    1142          12 :             return state->newitemsz;
    1143             : 
    1144         190 :         itemid = PageGetItemId(state->origpage, split->firstrightoff);
    1145             : 
    1146         190 :         return MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
    1147             :     }
    1148             : 
    1149       50362 :     lastleft = _bt_split_lastleft(state, split);
    1150       50362 :     firstright = _bt_split_firstright(state, split);
    1151             : 
    1152       50362 :     return _bt_keep_natts_fast(state->rel, lastleft, firstright);
    1153             : }
    1154             : 
    1155             : /*
    1156             :  * Subroutine to get a lastleft IndexTuple for a split point
    1157             :  */
    1158             : static inline IndexTuple
    1159       72034 : _bt_split_lastleft(FindSplitData *state, SplitPoint *split)
    1160             : {
    1161             :     ItemId      itemid;
    1162             : 
    1163       72034 :     if (split->newitemonleft && split->firstrightoff == state->newitemoff)
    1164         382 :         return state->newitem;
    1165             : 
    1166       71652 :     itemid = PageGetItemId(state->origpage,
    1167       71652 :                            OffsetNumberPrev(split->firstrightoff));
    1168       71652 :     return (IndexTuple) PageGetItem(state->origpage, itemid);
    1169             : }
    1170             : 
    1171             : /*
    1172             :  * Subroutine to get a firstright IndexTuple for a split point
    1173             :  */
    1174             : static inline IndexTuple
    1175       72034 : _bt_split_firstright(FindSplitData *state, SplitPoint *split)
    1176             : {
    1177             :     ItemId      itemid;
    1178             : 
    1179       72034 :     if (!split->newitemonleft && split->firstrightoff == state->newitemoff)
    1180         242 :         return state->newitem;
    1181             : 
    1182       71792 :     itemid = PageGetItemId(state->origpage, split->firstrightoff);
    1183       71792 :     return (IndexTuple) PageGetItem(state->origpage, itemid);
    1184             : }

Generated by: LCOV version 1.14