LCOV - code coverage report
Current view: top level - src/backend/access/nbtree - nbtsplitloc.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 265 272 97.4 %
Date: 2025-08-31 07:17:45 Functions: 13 13 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * nbtsplitloc.c
       4             :  *    Choose split point code for Postgres btree implementation.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/nbtree/nbtsplitloc.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/nbtree.h"
      18             : #include "access/tableam.h"
      19             : #include "common/int.h"
      20             : 
      21             : typedef enum
      22             : {
      23             :     /* strategy for searching through materialized list of split points */
      24             :     SPLIT_DEFAULT,              /* give some weight to truncation */
      25             :     SPLIT_MANY_DUPLICATES,      /* find minimally distinguishing point */
      26             :     SPLIT_SINGLE_VALUE,         /* leave left page almost full */
      27             : } FindSplitStrat;
      28             : 
      29             : typedef struct
      30             : {
      31             :     /* details of free space left by split */
      32             :     int16       curdelta;       /* current leftfree/rightfree delta */
      33             :     int16       leftfree;       /* space left on left page post-split */
      34             :     int16       rightfree;      /* space left on right page post-split */
      35             : 
      36             :     /* split point identifying fields (returned by _bt_findsplitloc) */
      37             :     OffsetNumber firstrightoff; /* first origpage item on rightpage */
      38             :     bool        newitemonleft;  /* new item goes on left, or right? */
      39             : } SplitPoint;
      40             : 
      41             : typedef struct
      42             : {
      43             :     /* context data for _bt_recsplitloc */
      44             :     Relation    rel;            /* index relation */
      45             :     Page        origpage;       /* page undergoing split */
      46             :     IndexTuple  newitem;        /* new item (cause of page split) */
      47             :     Size        newitemsz;      /* size of newitem (includes line pointer) */
      48             :     bool        is_leaf;        /* T if splitting a leaf page */
      49             :     bool        is_rightmost;   /* T if splitting rightmost page on level */
      50             :     OffsetNumber newitemoff;    /* where the new item is to be inserted */
      51             :     int         leftspace;      /* space available for items on left page */
      52             :     int         rightspace;     /* space available for items on right page */
      53             :     int         olddataitemstotal;  /* space taken by old items */
      54             :     Size        minfirstrightsz;    /* smallest firstright size */
      55             : 
      56             :     /* candidate split point data */
      57             :     int         maxsplits;      /* maximum number of splits */
      58             :     int         nsplits;        /* current number of splits */
      59             :     SplitPoint *splits;         /* all candidate split points for page */
      60             :     int         interval;       /* current range of acceptable split points */
      61             : } FindSplitData;
      62             : 
      63             : static void _bt_recsplitloc(FindSplitData *state,
      64             :                             OffsetNumber firstrightoff, bool newitemonleft,
      65             :                             int olddataitemstoleft,
      66             :                             Size firstrightofforigpagetuplesz);
      67             : static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
      68             :                                 bool usemult);
      69             : static int  _bt_splitcmp(const void *arg1, const void *arg2);
      70             : static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
      71             :                                 int leaffillfactor, bool *usemult);
      72             : static bool _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid);
      73             : static OffsetNumber _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
      74             :                                      bool *newitemonleft, FindSplitStrat strategy);
      75             : static int  _bt_defaultinterval(FindSplitData *state);
      76             : static int  _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
      77             :                          SplitPoint *rightpage, FindSplitStrat *strategy);
      78             : static void _bt_interval_edges(FindSplitData *state,
      79             :                                SplitPoint **leftinterval, SplitPoint **rightinterval);
      80             : static inline int _bt_split_penalty(FindSplitData *state, SplitPoint *split);
      81             : static inline IndexTuple _bt_split_lastleft(FindSplitData *state,
      82             :                                             SplitPoint *split);
      83             : static inline IndexTuple _bt_split_firstright(FindSplitData *state,
      84             :                                               SplitPoint *split);
      85             : 
      86             : 
      87             : /*
      88             :  *  _bt_findsplitloc() -- find an appropriate place to split a page.
      89             :  *
      90             :  * The main goal here is to equalize the free space that will be on each
      91             :  * split page, *after accounting for the inserted tuple*.  (If we fail to
      92             :  * account for it, we might find ourselves with too little room on the page
      93             :  * that it needs to go into!)
      94             :  *
      95             :  * If the page is the rightmost page on its level, we instead try to arrange
      96             :  * to leave the left split page fillfactor% full.  In this way, when we are
      97             :  * inserting successively increasing keys (consider sequences, timestamps,
      98             :  * etc) we will end up with a tree whose pages are about fillfactor% full,
      99             :  * instead of the 50% full result that we'd get without this special case.
     100             :  * This is the same as nbtsort.c produces for a newly-created tree.  Note
     101             :  * that leaf and nonleaf pages use different fillfactors.  Note also that
     102             :  * there are a number of further special cases where fillfactor is not
     103             :  * applied in the standard way.
     104             :  *
     105             :  * We are passed the intended insert position of the new tuple, expressed as
     106             :  * the offsetnumber of the tuple it must go in front of (this could be
     107             :  * maxoff+1 if the tuple is to go at the end).  The new tuple itself is also
     108             :  * passed, since it's needed to give some weight to how effective suffix
     109             :  * truncation will be.  The implementation picks the split point that
     110             :  * maximizes the effectiveness of suffix truncation from a small list of
     111             :  * alternative candidate split points that leave each side of the split with
     112             :  * about the same share of free space.  Suffix truncation is secondary to
     113             :  * equalizing free space, except in cases with large numbers of duplicates.
     114             :  * Note that it is always assumed that caller goes on to perform truncation,
     115             :  * even with pg_upgrade'd indexes where that isn't actually the case
     116             :  * (!heapkeyspace indexes).  See nbtree/README for more information about
     117             :  * suffix truncation.
     118             :  *
     119             :  * We return the index of the first existing tuple that should go on the
     120             :  * righthand page (which is called firstrightoff), plus a boolean
     121             :  * indicating whether the new tuple goes on the left or right page.  You
     122             :  * can think of the returned state as a point _between_ two adjacent data
     123             :  * items (lastleft and firstright data items) on an imaginary version of
     124             :  * origpage that already includes newitem.  The bool is necessary to
     125             :  * disambiguate the case where firstrightoff == newitemoff (i.e. it is
     126             :  * sometimes needed to determine if the firstright tuple for the split is
     127             :  * newitem rather than the tuple from origpage at offset firstrightoff).
     128             :  */
     129             : OffsetNumber
     130       22298 : _bt_findsplitloc(Relation rel,
     131             :                  Page origpage,
     132             :                  OffsetNumber newitemoff,
     133             :                  Size newitemsz,
     134             :                  IndexTuple newitem,
     135             :                  bool *newitemonleft)
     136             : {
     137             :     BTPageOpaque opaque;
     138             :     int         leftspace,
     139             :                 rightspace,
     140             :                 olddataitemstotal,
     141             :                 olddataitemstoleft,
     142             :                 perfectpenalty,
     143             :                 leaffillfactor;
     144             :     FindSplitData state;
     145             :     FindSplitStrat strategy;
     146             :     ItemId      itemid;
     147             :     OffsetNumber offnum,
     148             :                 maxoff,
     149             :                 firstrightoff;
     150             :     double      fillfactormult;
     151             :     bool        usemult;
     152             :     SplitPoint  leftpage,
     153             :                 rightpage;
     154             : 
     155       22298 :     opaque = BTPageGetOpaque(origpage);
     156       22298 :     maxoff = PageGetMaxOffsetNumber(origpage);
     157             : 
     158             :     /* Total free space available on a btree page, after fixed overhead */
     159       22298 :     leftspace = rightspace =
     160       22298 :         PageGetPageSize(origpage) - SizeOfPageHeaderData -
     161             :         MAXALIGN(sizeof(BTPageOpaqueData));
     162             : 
     163             :     /* The right page will have the same high key as the old page */
     164       22298 :     if (!P_RIGHTMOST(opaque))
     165             :     {
     166        9784 :         itemid = PageGetItemId(origpage, P_HIKEY);
     167        9784 :         rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
     168             :                              sizeof(ItemIdData));
     169             :     }
     170             : 
     171             :     /* Count up total space in data items before actually scanning 'em */
     172       22298 :     olddataitemstotal = rightspace - (int) PageGetExactFreeSpace(origpage);
     173       22298 :     leaffillfactor = BTGetFillFactor(rel);
     174             : 
     175             :     /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
     176       22298 :     newitemsz += sizeof(ItemIdData);
     177       22298 :     state.rel = rel;
     178       22298 :     state.origpage = origpage;
     179       22298 :     state.newitem = newitem;
     180       22298 :     state.newitemsz = newitemsz;
     181       22298 :     state.is_leaf = P_ISLEAF(opaque);
     182       22298 :     state.is_rightmost = P_RIGHTMOST(opaque);
     183       22298 :     state.leftspace = leftspace;
     184       22298 :     state.rightspace = rightspace;
     185       22298 :     state.olddataitemstotal = olddataitemstotal;
     186       22298 :     state.minfirstrightsz = SIZE_MAX;
     187       22298 :     state.newitemoff = newitemoff;
     188             : 
     189             :     /* newitem cannot be a posting list item */
     190             :     Assert(!BTreeTupleIsPosting(newitem));
     191             : 
     192             :     /*
     193             :      * nsplits should never exceed maxoff because there will be at most as
     194             :      * many candidate split points as there are points _between_ tuples, once
     195             :      * you imagine that the new item is already on the original page (the
     196             :      * final number of splits may be slightly lower because not all points
     197             :      * between tuples will be legal).
     198             :      */
     199       22298 :     state.maxsplits = maxoff;
     200       22298 :     state.splits = palloc(sizeof(SplitPoint) * state.maxsplits);
     201       22298 :     state.nsplits = 0;
     202             : 
     203             :     /*
     204             :      * Scan through the data items and calculate space usage for a split at
     205             :      * each possible position
     206             :      */
     207       22298 :     olddataitemstoleft = 0;
     208             : 
     209       22298 :     for (offnum = P_FIRSTDATAKEY(opaque);
     210     6763692 :          offnum <= maxoff;
     211     6741394 :          offnum = OffsetNumberNext(offnum))
     212             :     {
     213             :         Size        itemsz;
     214             : 
     215     6741394 :         itemid = PageGetItemId(origpage, offnum);
     216     6741394 :         itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
     217             : 
     218             :         /*
     219             :          * When item offset number is not newitemoff, neither side of the
     220             :          * split can be newitem.  Record a split after the previous data item
     221             :          * from original page, but before the current data item from original
     222             :          * page. (_bt_recsplitloc() will reject the split when there are no
     223             :          * previous items, which we rely on.)
     224             :          */
     225     6741394 :         if (offnum < newitemoff)
     226     5848966 :             _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
     227      892428 :         else if (offnum > newitemoff)
     228      879576 :             _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
     229             :         else
     230             :         {
     231             :             /*
     232             :              * Record a split after all "offnum < newitemoff" original page
     233             :              * data items, but before newitem
     234             :              */
     235       12852 :             _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
     236             : 
     237             :             /*
     238             :              * Record a split after newitem, but before data item from
     239             :              * original page at offset newitemoff/current offset
     240             :              */
     241       12852 :             _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
     242             :         }
     243             : 
     244     6741394 :         olddataitemstoleft += itemsz;
     245             :     }
     246             : 
     247             :     /*
     248             :      * Record a split after all original page data items, but before newitem.
     249             :      * (Though only when it's possible that newitem will end up alone on new
     250             :      * right page.)
     251             :      */
     252             :     Assert(olddataitemstoleft == olddataitemstotal);
     253       22298 :     if (newitemoff > maxoff)
     254        9446 :         _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0);
     255             : 
     256             :     /*
     257             :      * I believe it is not possible to fail to find a feasible split, but just
     258             :      * in case ...
     259             :      */
     260       22298 :     if (state.nsplits == 0)
     261           0 :         elog(ERROR, "could not find a feasible split point for index \"%s\"",
     262             :              RelationGetRelationName(rel));
     263             : 
     264             :     /*
     265             :      * Start search for a split point among list of legal split points.  Give
     266             :      * primary consideration to equalizing available free space in each half
     267             :      * of the split initially (start with default strategy), while applying
     268             :      * rightmost and split-after-new-item optimizations where appropriate.
     269             :      * Either of the two other fallback strategies may be required for cases
     270             :      * with a large number of duplicates around the original/space-optimal
     271             :      * split point.
     272             :      *
     273             :      * Default strategy gives some weight to suffix truncation in deciding a
     274             :      * split point on leaf pages.  It attempts to select a split point where a
     275             :      * distinguishing attribute appears earlier in the new high key for the
     276             :      * left side of the split, in order to maximize the number of trailing
     277             :      * attributes that can be truncated away.  Only candidate split points
     278             :      * that imply an acceptable balance of free space on each side are
     279             :      * considered.  See _bt_defaultinterval().
     280             :      */
     281       22298 :     if (!state.is_leaf)
     282             :     {
     283             :         /* fillfactormult only used on rightmost page */
     284         258 :         usemult = state.is_rightmost;
     285         258 :         fillfactormult = BTREE_NONLEAF_FILLFACTOR / 100.0;
     286             :     }
     287       22040 :     else if (state.is_rightmost)
     288             :     {
     289             :         /* Rightmost leaf page --  fillfactormult always used */
     290       12356 :         usemult = true;
     291       12356 :         fillfactormult = leaffillfactor / 100.0;
     292             :     }
     293        9684 :     else if (_bt_afternewitemoff(&state, maxoff, leaffillfactor, &usemult))
     294             :     {
     295             :         /*
     296             :          * New item inserted at rightmost point among a localized grouping on
     297             :          * a leaf page -- apply "split after new item" optimization, either by
     298             :          * applying leaf fillfactor multiplier, or by choosing the exact split
     299             :          * point that leaves newitem as lastleft. (usemult is set for us.)
     300             :          */
     301         626 :         if (usemult)
     302             :         {
     303             :             /* fillfactormult should be set based on leaf fillfactor */
     304         452 :             fillfactormult = leaffillfactor / 100.0;
     305             :         }
     306             :         else
     307             :         {
     308             :             /* find precise split point after newitemoff */
     309       38284 :             for (int i = 0; i < state.nsplits; i++)
     310             :             {
     311       38284 :                 SplitPoint *split = state.splits + i;
     312             : 
     313       38284 :                 if (split->newitemonleft &&
     314         174 :                     newitemoff == split->firstrightoff)
     315             :                 {
     316         174 :                     pfree(state.splits);
     317         174 :                     *newitemonleft = true;
     318         174 :                     return newitemoff;
     319             :                 }
     320             :             }
     321             : 
     322             :             /*
     323             :              * Cannot legally split after newitemoff; proceed with split
     324             :              * without using fillfactor multiplier.  This is defensive, and
     325             :              * should never be needed in practice.
     326             :              */
     327           0 :             fillfactormult = 0.50;
     328             :         }
     329             :     }
     330             :     else
     331             :     {
     332             :         /* Other leaf page.  50:50 page split. */
     333        9058 :         usemult = false;
     334             :         /* fillfactormult not used, but be tidy */
     335        9058 :         fillfactormult = 0.50;
     336             :     }
     337             : 
     338             :     /*
     339             :      * Save leftmost and rightmost splits for page before original ordinal
     340             :      * sort order is lost by delta/fillfactormult sort
     341             :      */
     342       22124 :     leftpage = state.splits[0];
     343       22124 :     rightpage = state.splits[state.nsplits - 1];
     344             : 
     345             :     /* Give split points a fillfactormult-wise delta, and sort on deltas */
     346       22124 :     _bt_deltasortsplits(&state, fillfactormult, usemult);
     347             : 
     348             :     /* Determine split interval for default strategy */
     349       22124 :     state.interval = _bt_defaultinterval(&state);
     350             : 
     351             :     /*
     352             :      * Determine if default strategy/split interval will produce a
     353             :      * sufficiently distinguishing split, or if we should change strategies.
     354             :      * Alternative strategies change the range of split points that are
     355             :      * considered acceptable (split interval), and possibly change
     356             :      * fillfactormult, in order to deal with pages with a large number of
     357             :      * duplicates gracefully.
     358             :      *
     359             :      * Pass low and high splits for the entire page (actually, they're for an
     360             :      * imaginary version of the page that includes newitem).  These are used
     361             :      * when the initial split interval encloses split points that are full of
     362             :      * duplicates, and we need to consider if it's even possible to avoid
     363             :      * appending a heap TID.
     364             :      */
     365       22124 :     perfectpenalty = _bt_strategy(&state, &leftpage, &rightpage, &strategy);
     366             : 
     367       22124 :     if (strategy == SPLIT_DEFAULT)
     368             :     {
     369             :         /*
     370             :          * Default strategy worked out (always works out with internal page).
     371             :          * Original split interval still stands.
     372             :          */
     373             :     }
     374             : 
     375             :     /*
     376             :      * Many duplicates strategy is used when a heap TID would otherwise be
     377             :      * appended, but the page isn't completely full of logical duplicates.
     378             :      *
     379             :      * The split interval is widened to include all legal candidate split
     380             :      * points.  There might be a few as two distinct values in the whole-page
     381             :      * split interval, though it's also possible that most of the values on
     382             :      * the page are unique.  The final split point will either be to the
     383             :      * immediate left or to the immediate right of the group of duplicate
     384             :      * tuples that enclose the first/delta-optimal split point (perfect
     385             :      * penalty was set so that the lowest delta split point that avoids
     386             :      * appending a heap TID will be chosen).  Maximizing the number of
     387             :      * attributes that can be truncated away is not a goal of the many
     388             :      * duplicates strategy.
     389             :      *
     390             :      * Single value strategy is used when it is impossible to avoid appending
     391             :      * a heap TID.  It arranges to leave the left page very full.  This
     392             :      * maximizes space utilization in cases where tuples with the same
     393             :      * attribute values span many pages.  Newly inserted duplicates will tend
     394             :      * to have higher heap TID values, so we'll end up splitting to the right
     395             :      * consistently.  (Single value strategy is harmless though not
     396             :      * particularly useful with !heapkeyspace indexes.)
     397             :      */
     398         424 :     else if (strategy == SPLIT_MANY_DUPLICATES)
     399             :     {
     400             :         Assert(state.is_leaf);
     401             :         /* Shouldn't try to truncate away extra user attributes */
     402             :         Assert(perfectpenalty ==
     403             :                IndexRelationGetNumberOfKeyAttributes(state.rel));
     404             :         /* No need to resort splits -- no change in fillfactormult/deltas */
     405         146 :         state.interval = state.nsplits;
     406             :     }
     407         278 :     else if (strategy == SPLIT_SINGLE_VALUE)
     408             :     {
     409             :         Assert(state.is_leaf);
     410             :         /* Split near the end of the page */
     411         278 :         usemult = true;
     412         278 :         fillfactormult = BTREE_SINGLEVAL_FILLFACTOR / 100.0;
     413             :         /* Resort split points with new delta */
     414         278 :         _bt_deltasortsplits(&state, fillfactormult, usemult);
     415             :         /* Appending a heap TID is unavoidable, so interval of 1 is fine */
     416         278 :         state.interval = 1;
     417             :     }
     418             : 
     419             :     /*
     420             :      * Search among acceptable split points (using final split interval) for
     421             :      * the entry that has the lowest penalty, and is therefore expected to
     422             :      * maximize fan-out.  Sets *newitemonleft for us.
     423             :      */
     424       22124 :     firstrightoff = _bt_bestsplitloc(&state, perfectpenalty, newitemonleft,
     425             :                                      strategy);
     426       22124 :     pfree(state.splits);
     427             : 
     428       22124 :     return firstrightoff;
     429             : }
     430             : 
     431             : /*
     432             :  * Subroutine to record a particular point between two tuples (possibly the
     433             :  * new item) on page (ie, combination of firstrightoff and newitemonleft
     434             :  * settings) in *state for later analysis.  This is also a convenient point to
     435             :  * check if the split is legal (if it isn't, it won't be recorded).
     436             :  *
     437             :  * firstrightoff is the offset of the first item on the original page that
     438             :  * goes to the right page, and firstrightofforigpagetuplesz is the size of
     439             :  * that tuple.  firstrightoff can be > max offset, which means that all the
     440             :  * old items go to the left page and only the new item goes to the right page.
     441             :  * We don't actually use firstrightofforigpagetuplesz in that case (actually,
     442             :  * we don't use it for _any_ split where the firstright tuple happens to be
     443             :  * newitem).
     444             :  *
     445             :  * olddataitemstoleft is the total size of all old items to the left of the
     446             :  * split point that is recorded here when legal.  Should not include
     447             :  * newitemsz, since that is handled here.
     448             :  */
     449             : static void
     450     6763692 : _bt_recsplitloc(FindSplitData *state,
     451             :                 OffsetNumber firstrightoff,
     452             :                 bool newitemonleft,
     453             :                 int olddataitemstoleft,
     454             :                 Size firstrightofforigpagetuplesz)
     455             : {
     456             :     int16       leftfree,
     457             :                 rightfree;
     458             :     Size        firstrightsz;
     459     6763692 :     Size        postingsz = 0;
     460             :     bool        newitemisfirstright;
     461             : 
     462             :     /* Is the new item going to be split point's firstright tuple? */
     463     6798842 :     newitemisfirstright = (firstrightoff == state->newitemoff &&
     464       35150 :                            !newitemonleft);
     465             : 
     466     6763692 :     if (newitemisfirstright)
     467       22298 :         firstrightsz = state->newitemsz;
     468             :     else
     469             :     {
     470     6741394 :         firstrightsz = firstrightofforigpagetuplesz;
     471             : 
     472             :         /*
     473             :          * Calculate suffix truncation space saving when firstright tuple is a
     474             :          * posting list tuple, though only when the tuple is over 64 bytes
     475             :          * including line pointer overhead (arbitrary).  This avoids accessing
     476             :          * the tuple in cases where its posting list must be very small (if
     477             :          * tuple has one at all).
     478             :          *
     479             :          * Note: We don't do this in the case where firstright tuple is
     480             :          * newitem, since newitem cannot have a posting list.
     481             :          */
     482     6741394 :         if (state->is_leaf && firstrightsz > 64)
     483             :         {
     484             :             ItemId      itemid;
     485             :             IndexTuple  newhighkey;
     486             : 
     487       52386 :             itemid = PageGetItemId(state->origpage, firstrightoff);
     488       52386 :             newhighkey = (IndexTuple) PageGetItem(state->origpage, itemid);
     489             : 
     490       52386 :             if (BTreeTupleIsPosting(newhighkey))
     491       27638 :                 postingsz = IndexTupleSize(newhighkey) -
     492       27638 :                     BTreeTupleGetPostingOffset(newhighkey);
     493             :         }
     494             :     }
     495             : 
     496             :     /* Account for all the old tuples */
     497     6763692 :     leftfree = state->leftspace - olddataitemstoleft;
     498     6763692 :     rightfree = state->rightspace -
     499     6763692 :         (state->olddataitemstotal - olddataitemstoleft);
     500             : 
     501             :     /*
     502             :      * The first item on the right page becomes the high key of the left page;
     503             :      * therefore it counts against left space as well as right space (we
     504             :      * cannot assume that suffix truncation will make it any smaller).  When
     505             :      * index has included attributes, then those attributes of left page high
     506             :      * key will be truncated leaving that page with slightly more free space.
     507             :      * However, that shouldn't affect our ability to find valid split
     508             :      * location, since we err in the direction of being pessimistic about free
     509             :      * space on the left half.  Besides, even when suffix truncation of
     510             :      * non-TID attributes occurs, the new high key often won't even be a
     511             :      * single MAXALIGN() quantum smaller than the firstright tuple it's based
     512             :      * on.
     513             :      *
     514             :      * If we are on the leaf level, assume that suffix truncation cannot avoid
     515             :      * adding a heap TID to the left half's new high key when splitting at the
     516             :      * leaf level.  In practice the new high key will often be smaller and
     517             :      * will rarely be larger, but conservatively assume the worst case.  We do
     518             :      * go to the trouble of subtracting away posting list overhead, though
     519             :      * only when it looks like it will make an appreciable difference.
     520             :      * (Posting lists are the only case where truncation will typically make
     521             :      * the final high key far smaller than firstright, so being a bit more
     522             :      * precise there noticeably improves the balance of free space.)
     523             :      */
     524     6763692 :     if (state->is_leaf)
     525     6759740 :         leftfree -= (int16) (firstrightsz +
     526     6759740 :                              MAXALIGN(sizeof(ItemPointerData)) -
     527             :                              postingsz);
     528             :     else
     529        3952 :         leftfree -= (int16) firstrightsz;
     530             : 
     531             :     /* account for the new item */
     532     6763692 :     if (newitemonleft)
     533      892428 :         leftfree -= (int16) state->newitemsz;
     534             :     else
     535     5871264 :         rightfree -= (int16) state->newitemsz;
     536             : 
     537             :     /*
     538             :      * If we are not on the leaf level, we will be able to discard the key
     539             :      * data from the first item that winds up on the right page.
     540             :      */
     541     6763692 :     if (!state->is_leaf)
     542        3952 :         rightfree += (int16) firstrightsz -
     543             :             (int16) (MAXALIGN(sizeof(IndexTupleData)) + sizeof(ItemIdData));
     544             : 
     545             :     /* Record split if legal */
     546     6763692 :     if (leftfree >= 0 && rightfree >= 0)
     547             :     {
     548             :         Assert(state->nsplits < state->maxsplits);
     549             : 
     550             :         /* Determine smallest firstright tuple size among legal splits */
     551     6722130 :         state->minfirstrightsz = Min(state->minfirstrightsz, firstrightsz);
     552             : 
     553     6722130 :         state->splits[state->nsplits].curdelta = 0;
     554     6722130 :         state->splits[state->nsplits].leftfree = leftfree;
     555     6722130 :         state->splits[state->nsplits].rightfree = rightfree;
     556     6722130 :         state->splits[state->nsplits].firstrightoff = firstrightoff;
     557     6722130 :         state->splits[state->nsplits].newitemonleft = newitemonleft;
     558     6722130 :         state->nsplits++;
     559             :     }
     560     6763692 : }
     561             : 
     562             : /*
     563             :  * Subroutine to assign space deltas to materialized array of candidate split
     564             :  * points based on current fillfactor, and to sort array using that fillfactor
     565             :  */
     566             : static void
     567       22402 : _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
     568             :                     bool usemult)
     569             : {
     570     6684074 :     for (int i = 0; i < state->nsplits; i++)
     571             :     {
     572     6661672 :         SplitPoint *split = state->splits + i;
     573             :         int16       delta;
     574             : 
     575     6661672 :         if (usemult)
     576     4297192 :             delta = fillfactormult * split->leftfree -
     577     4297192 :                 (1.0 - fillfactormult) * split->rightfree;
     578             :         else
     579     2364480 :             delta = split->leftfree - split->rightfree;
     580             : 
     581     6661672 :         if (delta < 0)
     582     1613236 :             delta = -delta;
     583             : 
     584             :         /* Save delta */
     585     6661672 :         split->curdelta = delta;
     586             :     }
     587             : 
     588       22402 :     qsort(state->splits, state->nsplits, sizeof(SplitPoint), _bt_splitcmp);
     589       22402 : }
     590             : 
     591             : /*
     592             :  * qsort-style comparator used by _bt_deltasortsplits()
     593             :  */
     594             : static int
     595    71721600 : _bt_splitcmp(const void *arg1, const void *arg2)
     596             : {
     597    71721600 :     SplitPoint *split1 = (SplitPoint *) arg1;
     598    71721600 :     SplitPoint *split2 = (SplitPoint *) arg2;
     599             : 
     600    71721600 :     return pg_cmp_s16(split1->curdelta, split2->curdelta);
     601             : }
     602             : 
     603             : /*
     604             :  * Subroutine to determine whether or not a non-rightmost leaf page should be
     605             :  * split immediately after the would-be original page offset for the
     606             :  * new/incoming tuple (or should have leaf fillfactor applied when new item is
     607             :  * to the right on original page).  This is appropriate when there is a
     608             :  * pattern of localized monotonically increasing insertions into a composite
     609             :  * index, where leading attribute values form local groupings, and we
     610             :  * anticipate further insertions of the same/current grouping (new item's
     611             :  * grouping) in the near future.  This can be thought of as a variation on
     612             :  * applying leaf fillfactor during rightmost leaf page splits, since cases
     613             :  * that benefit will converge on packing leaf pages leaffillfactor% full over
     614             :  * time.
     615             :  *
     616             :  * We may leave extra free space remaining on the rightmost page of a "most
     617             :  * significant column" grouping of tuples if that grouping never ends up
     618             :  * having future insertions that use the free space.  That effect is
     619             :  * self-limiting; a future grouping that becomes the "nearest on the right"
     620             :  * grouping of the affected grouping usually puts the extra free space to good
     621             :  * use.
     622             :  *
     623             :  * Caller uses optimization when routine returns true, though the exact action
     624             :  * taken by caller varies.  Caller uses original leaf page fillfactor in
     625             :  * standard way rather than using the new item offset directly when *usemult
     626             :  * was also set to true here.  Otherwise, caller applies optimization by
     627             :  * locating the legal split point that makes the new tuple the lastleft tuple
     628             :  * for the split.
     629             :  */
     630             : static bool
     631        9684 : _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
     632             :                     int leaffillfactor, bool *usemult)
     633             : {
     634             :     int16       nkeyatts;
     635             :     ItemId      itemid;
     636             :     IndexTuple  tup;
     637             :     int         keepnatts;
     638             : 
     639             :     Assert(state->is_leaf && !state->is_rightmost);
     640             : 
     641        9684 :     nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
     642             : 
     643             :     /* Single key indexes not considered here */
     644        9684 :     if (nkeyatts == 1)
     645        1272 :         return false;
     646             : 
     647             :     /* Ascending insertion pattern never inferred when new item is first */
     648        8412 :     if (state->newitemoff == P_FIRSTKEY)
     649           4 :         return false;
     650             : 
     651             :     /*
     652             :      * Only apply optimization on pages with equisized tuples, since ordinal
     653             :      * keys are likely to be fixed-width.  Testing if the new tuple is
     654             :      * variable width directly might also work, but that fails to apply the
     655             :      * optimization to indexes with a numeric_ops attribute.
     656             :      *
     657             :      * Conclude that page has equisized tuples when the new item is the same
     658             :      * width as the smallest item observed during pass over page, and other
     659             :      * non-pivot tuples must be the same width as well.  (Note that the
     660             :      * possibly-truncated existing high key isn't counted in
     661             :      * olddataitemstotal, and must be subtracted from maxoff.)
     662             :      */
     663        8408 :     if (state->newitemsz != state->minfirstrightsz)
     664        2574 :         return false;
     665        5834 :     if (state->newitemsz * (maxoff - 1) != state->olddataitemstotal)
     666        4442 :         return false;
     667             : 
     668             :     /*
     669             :      * Avoid applying optimization when tuples are wider than a tuple
     670             :      * consisting of two non-NULL int8/int64 attributes (or four non-NULL
     671             :      * int4/int32 attributes)
     672             :      */
     673        1392 :     if (state->newitemsz >
     674             :         MAXALIGN(sizeof(IndexTupleData) + sizeof(int64) * 2) +
     675             :         sizeof(ItemIdData))
     676           0 :         return false;
     677             : 
     678             :     /*
     679             :      * At least the first attribute's value must be equal to the corresponding
     680             :      * value in previous tuple to apply optimization.  New item cannot be a
     681             :      * duplicate, either.
     682             :      *
     683             :      * Handle case where new item is to the right of all items on the existing
     684             :      * page.  This is suggestive of monotonically increasing insertions in
     685             :      * itself, so the "heap TID adjacency" test is not applied here.
     686             :      */
     687        1392 :     if (state->newitemoff > maxoff)
     688             :     {
     689         420 :         itemid = PageGetItemId(state->origpage, maxoff);
     690         420 :         tup = (IndexTuple) PageGetItem(state->origpage, itemid);
     691         420 :         keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
     692             : 
     693         420 :         if (keepnatts > 1 && keepnatts <= nkeyatts)
     694             :         {
     695         418 :             *usemult = true;
     696         418 :             return true;
     697             :         }
     698             : 
     699           2 :         return false;
     700             :     }
     701             : 
     702             :     /*
     703             :      * "Low cardinality leading column, high cardinality suffix column"
     704             :      * indexes with a random insertion pattern (e.g., an index with a boolean
     705             :      * column, such as an index on '(book_is_in_print, book_isbn)') present us
     706             :      * with a risk of consistently misapplying the optimization.  We're
     707             :      * willing to accept very occasional misapplication of the optimization,
     708             :      * provided the cases where we get it wrong are rare and self-limiting.
     709             :      *
     710             :      * Heap TID adjacency strongly suggests that the item just to the left was
     711             :      * inserted very recently, which limits overapplication of the
     712             :      * optimization.  Besides, all inappropriate cases triggered here will
     713             :      * still split in the middle of the page on average.
     714             :      */
     715         972 :     itemid = PageGetItemId(state->origpage, OffsetNumberPrev(state->newitemoff));
     716         972 :     tup = (IndexTuple) PageGetItem(state->origpage, itemid);
     717             :     /* Do cheaper test first */
     718         972 :     if (BTreeTupleIsPosting(tup) ||
     719         972 :         !_bt_adjacenthtid(&tup->t_tid, &state->newitem->t_tid))
     720         722 :         return false;
     721             :     /* Check same conditions as rightmost item case, too */
     722         250 :     keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
     723             : 
     724         250 :     if (keepnatts > 1 && keepnatts <= nkeyatts)
     725             :     {
     726         208 :         double      interp = (double) state->newitemoff / ((double) maxoff + 1);
     727         208 :         double      leaffillfactormult = (double) leaffillfactor / 100.0;
     728             : 
     729             :         /*
     730             :          * Don't allow caller to split after a new item when it will result in
     731             :          * a split point to the right of the point that a leaf fillfactor
     732             :          * split would use -- have caller apply leaf fillfactor instead
     733             :          */
     734         208 :         *usemult = interp > leaffillfactormult;
     735             : 
     736         208 :         return true;
     737             :     }
     738             : 
     739          42 :     return false;
     740             : }
     741             : 
     742             : /*
     743             :  * Subroutine for determining if two heap TIDS are "adjacent".
     744             :  *
     745             :  * Adjacent means that the high TID is very likely to have been inserted into
     746             :  * heap relation immediately after the low TID, probably during the current
     747             :  * transaction.
     748             :  */
     749             : static bool
     750         972 : _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid)
     751             : {
     752             :     BlockNumber lowblk,
     753             :                 highblk;
     754             : 
     755         972 :     lowblk = ItemPointerGetBlockNumber(lowhtid);
     756         972 :     highblk = ItemPointerGetBlockNumber(highhtid);
     757             : 
     758             :     /* Make optimistic assumption of adjacency when heap blocks match */
     759         972 :     if (lowblk == highblk)
     760         248 :         return true;
     761             : 
     762             :     /* When heap block one up, second offset should be FirstOffsetNumber */
     763         934 :     if (lowblk + 1 == highblk &&
     764         210 :         ItemPointerGetOffsetNumber(highhtid) == FirstOffsetNumber)
     765           2 :         return true;
     766             : 
     767         722 :     return false;
     768             : }
     769             : 
     770             : /*
     771             :  * Subroutine to find the "best" split point among candidate split points.
     772             :  * The best split point is the split point with the lowest penalty among split
     773             :  * points that fall within current/final split interval.  Penalty is an
     774             :  * abstract score, with a definition that varies depending on whether we're
     775             :  * splitting a leaf page or an internal page.  See _bt_split_penalty() for
     776             :  * details.
     777             :  *
     778             :  * "perfectpenalty" is assumed to be the lowest possible penalty among
     779             :  * candidate split points.  This allows us to return early without wasting
     780             :  * cycles on calculating the first differing attribute for all candidate
     781             :  * splits when that clearly cannot improve our choice (or when we only want a
     782             :  * minimally distinguishing split point, and don't want to make the split any
     783             :  * more unbalanced than is necessary).
     784             :  *
     785             :  * We return the index of the first existing tuple that should go on the right
     786             :  * page, plus a boolean indicating if new item is on left of split point.
     787             :  */
     788             : static OffsetNumber
     789       22124 : _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
     790             :                  bool *newitemonleft, FindSplitStrat strategy)
     791             : {
     792             :     int         bestpenalty,
     793             :                 lowsplit;
     794       22124 :     int         highsplit = Min(state->interval, state->nsplits);
     795             :     SplitPoint *final;
     796             : 
     797       22124 :     bestpenalty = INT_MAX;
     798       22124 :     lowsplit = 0;
     799       50824 :     for (int i = lowsplit; i < highsplit; i++)
     800             :     {
     801             :         int         penalty;
     802             : 
     803       50776 :         penalty = _bt_split_penalty(state, state->splits + i);
     804             : 
     805       50776 :         if (penalty < bestpenalty)
     806             :         {
     807       28082 :             bestpenalty = penalty;
     808       28082 :             lowsplit = i;
     809             :         }
     810             : 
     811       50776 :         if (penalty <= perfectpenalty)
     812       22076 :             break;
     813             :     }
     814             : 
     815       22124 :     final = &state->splits[lowsplit];
     816             : 
     817             :     /*
     818             :      * There is a risk that the "many duplicates" strategy will repeatedly do
     819             :      * the wrong thing when there are monotonically decreasing insertions to
     820             :      * the right of a large group of duplicates.   Repeated splits could leave
     821             :      * a succession of right half pages with free space that can never be
     822             :      * used.  This must be avoided.
     823             :      *
     824             :      * Consider the example of the leftmost page in a single integer attribute
     825             :      * NULLS FIRST index which is almost filled with NULLs.  Monotonically
     826             :      * decreasing integer insertions might cause the same leftmost page to
     827             :      * split repeatedly at the same point.  Each split derives its new high
     828             :      * key from the lowest current value to the immediate right of the large
     829             :      * group of NULLs, which will always be higher than all future integer
     830             :      * insertions, directing all future integer insertions to the same
     831             :      * leftmost page.
     832             :      */
     833       22124 :     if (strategy == SPLIT_MANY_DUPLICATES && !state->is_rightmost &&
     834         132 :         !final->newitemonleft && final->firstrightoff >= state->newitemoff &&
     835           0 :         final->firstrightoff < state->newitemoff + 9)
     836             :     {
     837             :         /*
     838             :          * Avoid the problem by performing a 50:50 split when the new item is
     839             :          * just to the right of the would-be "many duplicates" split point.
     840             :          * (Note that the test used for an insert that is "just to the right"
     841             :          * of the split point is conservative.)
     842             :          */
     843           0 :         final = &state->splits[0];
     844             :     }
     845             : 
     846       22124 :     *newitemonleft = final->newitemonleft;
     847       22124 :     return final->firstrightoff;
     848             : }
     849             : 
     850             : #define LEAF_SPLIT_DISTANCE         0.050
     851             : #define INTERNAL_SPLIT_DISTANCE     0.075
     852             : 
     853             : /*
     854             :  * Return a split interval to use for the default strategy.  This is a limit
     855             :  * on the number of candidate split points to give further consideration to.
     856             :  * Only a fraction of all candidate splits points (those located at the start
     857             :  * of the now-sorted splits array) fall within the split interval.  Split
     858             :  * interval is applied within _bt_bestsplitloc().
     859             :  *
     860             :  * Split interval represents an acceptable range of split points -- those that
     861             :  * have leftfree and rightfree values that are acceptably balanced.  The final
     862             :  * split point chosen is the split point with the lowest "penalty" among split
     863             :  * points in this split interval (unless we change our entire strategy, in
     864             :  * which case the interval also changes -- see _bt_strategy()).
     865             :  *
     866             :  * The "Prefix B-Trees" paper calls split interval sigma l for leaf splits,
     867             :  * and sigma b for internal ("branch") splits.  It's hard to provide a
     868             :  * theoretical justification for the size of the split interval, though it's
     869             :  * clear that a small split interval can make tuples on level L+1 much smaller
     870             :  * on average, without noticeably affecting space utilization on level L.
     871             :  * (Note that the way that we calculate split interval might need to change if
     872             :  * suffix truncation is taught to truncate tuples "within" the last
     873             :  * attribute/datum for data types like text, which is more or less how it is
     874             :  * assumed to work in the paper.)
     875             :  */
     876             : static int
     877       22124 : _bt_defaultinterval(FindSplitData *state)
     878             : {
     879             :     SplitPoint *spaceoptimal;
     880             :     int16       tolerance,
     881             :                 lowleftfree,
     882             :                 lowrightfree,
     883             :                 highleftfree,
     884             :                 highrightfree;
     885             : 
     886             :     /*
     887             :      * Determine leftfree and rightfree values that are higher and lower than
     888             :      * we're willing to tolerate.  Note that the final split interval will be
     889             :      * about 10% of nsplits in the common case where all non-pivot tuples
     890             :      * (data items) from a leaf page are uniformly sized.  We're a bit more
     891             :      * aggressive when splitting internal pages.
     892             :      */
     893       22124 :     if (state->is_leaf)
     894       21866 :         tolerance = state->olddataitemstotal * LEAF_SPLIT_DISTANCE;
     895             :     else
     896         258 :         tolerance = state->olddataitemstotal * INTERNAL_SPLIT_DISTANCE;
     897             : 
     898             :     /* First candidate split point is the most evenly balanced */
     899       22124 :     spaceoptimal = state->splits;
     900       22124 :     lowleftfree = spaceoptimal->leftfree - tolerance;
     901       22124 :     lowrightfree = spaceoptimal->rightfree - tolerance;
     902       22124 :     highleftfree = spaceoptimal->leftfree + tolerance;
     903       22124 :     highrightfree = spaceoptimal->rightfree + tolerance;
     904             : 
     905             :     /*
     906             :      * Iterate through split points, starting from the split immediately after
     907             :      * 'spaceoptimal'.  Find the first split point that divides free space so
     908             :      * unevenly that including it in the split interval would be unacceptable.
     909             :      */
     910      672028 :     for (int i = 1; i < state->nsplits; i++)
     911             :     {
     912      672028 :         SplitPoint *split = state->splits + i;
     913             : 
     914             :         /* Cannot use curdelta here, since its value is often weighted */
     915      672028 :         if (split->leftfree < lowleftfree || split->rightfree < lowrightfree ||
     916      650760 :             split->leftfree > highleftfree || split->rightfree > highrightfree)
     917       22124 :             return i;
     918             :     }
     919             : 
     920           0 :     return state->nsplits;
     921             : }
     922             : 
     923             : /*
     924             :  * Subroutine to decide whether split should use default strategy/initial
     925             :  * split interval, or whether it should finish splitting the page using
     926             :  * alternative strategies (this is only possible with leaf pages).
     927             :  *
     928             :  * Caller uses alternative strategy (or sticks with default strategy) based
     929             :  * on how *strategy is set here.  Return value is "perfect penalty", which is
     930             :  * passed to _bt_bestsplitloc() as a final constraint on how far caller is
     931             :  * willing to go to avoid appending a heap TID when using the many duplicates
     932             :  * strategy (it also saves _bt_bestsplitloc() useless cycles).
     933             :  */
     934             : static int
     935       22124 : _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
     936             :              SplitPoint *rightpage, FindSplitStrat *strategy)
     937             : {
     938             :     IndexTuple  leftmost,
     939             :                 rightmost;
     940             :     SplitPoint *leftinterval,
     941             :                *rightinterval;
     942             :     int         perfectpenalty;
     943       22124 :     int         indnkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
     944             : 
     945             :     /* Assume that alternative strategy won't be used for now */
     946       22124 :     *strategy = SPLIT_DEFAULT;
     947             : 
     948             :     /*
     949             :      * Use smallest observed firstright item size for entire page (actually,
     950             :      * entire imaginary version of page that includes newitem) as perfect
     951             :      * penalty on internal pages.  This can save cycles in the common case
     952             :      * where most or all splits (not just splits within interval) have
     953             :      * firstright tuples that are the same size.
     954             :      */
     955       22124 :     if (!state->is_leaf)
     956         258 :         return state->minfirstrightsz;
     957             : 
     958             :     /*
     959             :      * Use leftmost and rightmost tuples from leftmost and rightmost splits in
     960             :      * current split interval
     961             :      */
     962       21866 :     _bt_interval_edges(state, &leftinterval, &rightinterval);
     963       21866 :     leftmost = _bt_split_lastleft(state, leftinterval);
     964       21866 :     rightmost = _bt_split_firstright(state, rightinterval);
     965             : 
     966             :     /*
     967             :      * If initial split interval can produce a split point that will at least
     968             :      * avoid appending a heap TID in new high key, we're done.  Finish split
     969             :      * with default strategy and initial split interval.
     970             :      */
     971       21866 :     perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
     972       21866 :     if (perfectpenalty <= indnkeyatts)
     973       21360 :         return perfectpenalty;
     974             : 
     975             :     /*
     976             :      * Work out how caller should finish split when even their "perfect"
     977             :      * penalty for initial/default split interval indicates that the interval
     978             :      * does not contain even a single split that avoids appending a heap TID.
     979             :      *
     980             :      * Use the leftmost split's lastleft tuple and the rightmost split's
     981             :      * firstright tuple to assess every possible split.
     982             :      */
     983         506 :     leftmost = _bt_split_lastleft(state, leftpage);
     984         506 :     rightmost = _bt_split_firstright(state, rightpage);
     985             : 
     986             :     /*
     987             :      * If page (including new item) has many duplicates but is not entirely
     988             :      * full of duplicates, a many duplicates strategy split will be performed.
     989             :      * If page is entirely full of duplicates, a single value strategy split
     990             :      * will be performed.
     991             :      */
     992         506 :     perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
     993         506 :     if (perfectpenalty <= indnkeyatts)
     994             :     {
     995         146 :         *strategy = SPLIT_MANY_DUPLICATES;
     996             : 
     997             :         /*
     998             :          * Many duplicates strategy should split at either side the group of
     999             :          * duplicates that enclose the delta-optimal split point.  Return
    1000             :          * indnkeyatts rather than the true perfect penalty to make that
    1001             :          * happen.  (If perfectpenalty was returned here then low cardinality
    1002             :          * composite indexes could have continual unbalanced splits.)
    1003             :          *
    1004             :          * Note that caller won't go through with a many duplicates split in
    1005             :          * rare cases where it looks like there are ever-decreasing insertions
    1006             :          * to the immediate right of the split point.  This must happen just
    1007             :          * before a final decision is made, within _bt_bestsplitloc().
    1008             :          */
    1009         146 :         return indnkeyatts;
    1010             :     }
    1011             : 
    1012             :     /*
    1013             :      * Single value strategy is only appropriate with ever-increasing heap
    1014             :      * TIDs; otherwise, original default strategy split should proceed to
    1015             :      * avoid pathological performance.  Use page high key to infer if this is
    1016             :      * the rightmost page among pages that store the same duplicate value.
    1017             :      * This should not prevent insertions of heap TIDs that are slightly out
    1018             :      * of order from using single value strategy, since that's expected with
    1019             :      * concurrent inserters of the same duplicate value.
    1020             :      */
    1021         360 :     else if (state->is_rightmost)
    1022         230 :         *strategy = SPLIT_SINGLE_VALUE;
    1023             :     else
    1024             :     {
    1025             :         ItemId      itemid;
    1026             :         IndexTuple  hikey;
    1027             : 
    1028         130 :         itemid = PageGetItemId(state->origpage, P_HIKEY);
    1029         130 :         hikey = (IndexTuple) PageGetItem(state->origpage, itemid);
    1030         130 :         perfectpenalty = _bt_keep_natts_fast(state->rel, hikey,
    1031             :                                              state->newitem);
    1032         130 :         if (perfectpenalty <= indnkeyatts)
    1033          48 :             *strategy = SPLIT_SINGLE_VALUE;
    1034             :         else
    1035             :         {
    1036             :             /*
    1037             :              * Have caller finish split using default strategy, since page
    1038             :              * does not appear to be the rightmost page for duplicates of the
    1039             :              * value the page is filled with
    1040             :              */
    1041             :         }
    1042             :     }
    1043             : 
    1044         360 :     return perfectpenalty;
    1045             : }
    1046             : 
    1047             : /*
    1048             :  * Subroutine to locate leftmost and rightmost splits for current/default
    1049             :  * split interval.  Note that it will be the same split iff there is only one
    1050             :  * split in interval.
    1051             :  */
    1052             : static void
    1053       21866 : _bt_interval_edges(FindSplitData *state, SplitPoint **leftinterval,
    1054             :                    SplitPoint **rightinterval)
    1055             : {
    1056       21866 :     int         highsplit = Min(state->interval, state->nsplits);
    1057             :     SplitPoint *deltaoptimal;
    1058             : 
    1059       21866 :     deltaoptimal = state->splits;
    1060       21866 :     *leftinterval = NULL;
    1061       21866 :     *rightinterval = NULL;
    1062             : 
    1063             :     /*
    1064             :      * Delta is an absolute distance to optimal split point, so both the
    1065             :      * leftmost and rightmost split point will usually be at the end of the
    1066             :      * array
    1067             :      */
    1068       44122 :     for (int i = highsplit - 1; i >= 0; i--)
    1069             :     {
    1070       44122 :         SplitPoint *distant = state->splits + i;
    1071             : 
    1072       44122 :         if (distant->firstrightoff < deltaoptimal->firstrightoff)
    1073             :         {
    1074       21694 :             if (*leftinterval == NULL)
    1075       21346 :                 *leftinterval = distant;
    1076             :         }
    1077       22428 :         else if (distant->firstrightoff > deltaoptimal->firstrightoff)
    1078             :         {
    1079       21872 :             if (*rightinterval == NULL)
    1080       21418 :                 *rightinterval = distant;
    1081             :         }
    1082         556 :         else if (!distant->newitemonleft && deltaoptimal->newitemonleft)
    1083             :         {
    1084             :             /*
    1085             :              * "incoming tuple will become firstright" (distant) is to the
    1086             :              * left of "incoming tuple will become lastleft" (delta-optimal)
    1087             :              */
    1088             :             Assert(distant->firstrightoff == state->newitemoff);
    1089           2 :             if (*leftinterval == NULL)
    1090           0 :                 *leftinterval = distant;
    1091             :         }
    1092         554 :         else if (distant->newitemonleft && !deltaoptimal->newitemonleft)
    1093             :         {
    1094             :             /*
    1095             :              * "incoming tuple will become lastleft" (distant) is to the right
    1096             :              * of "incoming tuple will become firstright" (delta-optimal)
    1097             :              */
    1098             :             Assert(distant->firstrightoff == state->newitemoff);
    1099          16 :             if (*rightinterval == NULL)
    1100           8 :                 *rightinterval = distant;
    1101             :         }
    1102             :         else
    1103             :         {
    1104             :             /* There was only one or two splits in initial split interval */
    1105             :             Assert(distant == deltaoptimal);
    1106         538 :             if (*leftinterval == NULL)
    1107         520 :                 *leftinterval = distant;
    1108         538 :             if (*rightinterval == NULL)
    1109         440 :                 *rightinterval = distant;
    1110             :         }
    1111             : 
    1112       44122 :         if (*leftinterval && *rightinterval)
    1113       21866 :             return;
    1114             :     }
    1115             : 
    1116             :     Assert(false);
    1117             : }
    1118             : 
    1119             : /*
    1120             :  * Subroutine to find penalty for caller's candidate split point.
    1121             :  *
    1122             :  * On leaf pages, penalty is the attribute number that distinguishes each side
    1123             :  * of a split.  It's the last attribute that needs to be included in new high
    1124             :  * key for left page.  It can be greater than the number of key attributes in
    1125             :  * cases where a heap TID will need to be appended during truncation.
    1126             :  *
    1127             :  * On internal pages, penalty is simply the size of the firstright tuple for
    1128             :  * the split (including line pointer overhead).  This tuple will become the
    1129             :  * new high key for the left page.
    1130             :  */
    1131             : static inline int
    1132       50776 : _bt_split_penalty(FindSplitData *state, SplitPoint *split)
    1133             : {
    1134             :     IndexTuple  lastleft;
    1135             :     IndexTuple  firstright;
    1136             : 
    1137       50776 :     if (!state->is_leaf)
    1138             :     {
    1139             :         ItemId      itemid;
    1140             : 
    1141         266 :         if (!split->newitemonleft &&
    1142         266 :             split->firstrightoff == state->newitemoff)
    1143          24 :             return state->newitemsz;
    1144             : 
    1145         242 :         itemid = PageGetItemId(state->origpage, split->firstrightoff);
    1146             : 
    1147         242 :         return MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
    1148             :     }
    1149             : 
    1150       50510 :     lastleft = _bt_split_lastleft(state, split);
    1151       50510 :     firstright = _bt_split_firstright(state, split);
    1152             : 
    1153       50510 :     return _bt_keep_natts_fast(state->rel, lastleft, firstright);
    1154             : }
    1155             : 
    1156             : /*
    1157             :  * Subroutine to get a lastleft IndexTuple for a split point
    1158             :  */
    1159             : static inline IndexTuple
    1160       72882 : _bt_split_lastleft(FindSplitData *state, SplitPoint *split)
    1161             : {
    1162             :     ItemId      itemid;
    1163             : 
    1164       72882 :     if (split->newitemonleft && split->firstrightoff == state->newitemoff)
    1165         414 :         return state->newitem;
    1166             : 
    1167       72468 :     itemid = PageGetItemId(state->origpage,
    1168       72468 :                            OffsetNumberPrev(split->firstrightoff));
    1169       72468 :     return (IndexTuple) PageGetItem(state->origpage, itemid);
    1170             : }
    1171             : 
    1172             : /*
    1173             :  * Subroutine to get a firstright IndexTuple for a split point
    1174             :  */
    1175             : static inline IndexTuple
    1176       72882 : _bt_split_firstright(FindSplitData *state, SplitPoint *split)
    1177             : {
    1178             :     ItemId      itemid;
    1179             : 
    1180       72882 :     if (!split->newitemonleft && split->firstrightoff == state->newitemoff)
    1181         196 :         return state->newitem;
    1182             : 
    1183       72686 :     itemid = PageGetItemId(state->origpage, split->firstrightoff);
    1184       72686 :     return (IndexTuple) PageGetItem(state->origpage, itemid);
    1185             : }

Generated by: LCOV version 1.16