LCOV - code coverage report
Current view: top level - src/backend/access/nbtree - nbtsplitloc.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 96.3 % 272 262
Test Date: 2026-03-04 00:14:49 Functions: 100.0 % 13 13
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * nbtsplitloc.c
       4              :  *    Choose split point code for Postgres btree implementation.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/access/nbtree/nbtsplitloc.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/nbtree.h"
      18              : #include "access/tableam.h"
      19              : #include "common/int.h"
      20              : 
      21              : typedef enum
      22              : {
      23              :     /* strategy for searching through materialized list of split points */
      24              :     SPLIT_DEFAULT,              /* give some weight to truncation */
      25              :     SPLIT_MANY_DUPLICATES,      /* find minimally distinguishing point */
      26              :     SPLIT_SINGLE_VALUE,         /* leave left page almost full */
      27              : } FindSplitStrat;
      28              : 
      29              : typedef struct
      30              : {
      31              :     /* details of free space left by split */
      32              :     int16       curdelta;       /* current leftfree/rightfree delta */
      33              :     int16       leftfree;       /* space left on left page post-split */
      34              :     int16       rightfree;      /* space left on right page post-split */
      35              : 
      36              :     /* split point identifying fields (returned by _bt_findsplitloc) */
      37              :     OffsetNumber firstrightoff; /* first origpage item on rightpage */
      38              :     bool        newitemonleft;  /* new item goes on left, or right? */
      39              : } SplitPoint;
      40              : 
      41              : typedef struct
      42              : {
      43              :     /* context data for _bt_recsplitloc */
      44              :     Relation    rel;            /* index relation */
      45              :     Page        origpage;       /* page undergoing split */
      46              :     IndexTuple  newitem;        /* new item (cause of page split) */
      47              :     Size        newitemsz;      /* size of newitem (includes line pointer) */
      48              :     bool        is_leaf;        /* T if splitting a leaf page */
      49              :     bool        is_rightmost;   /* T if splitting rightmost page on level */
      50              :     OffsetNumber newitemoff;    /* where the new item is to be inserted */
      51              :     int         leftspace;      /* space available for items on left page */
      52              :     int         rightspace;     /* space available for items on right page */
      53              :     int         olddataitemstotal;  /* space taken by old items */
      54              :     Size        minfirstrightsz;    /* smallest firstright size */
      55              : 
      56              :     /* candidate split point data */
      57              :     int         maxsplits;      /* maximum number of splits */
      58              :     int         nsplits;        /* current number of splits */
      59              :     SplitPoint *splits;         /* all candidate split points for page */
      60              :     int         interval;       /* current range of acceptable split points */
      61              : } FindSplitData;
      62              : 
      63              : static void _bt_recsplitloc(FindSplitData *state,
      64              :                             OffsetNumber firstrightoff, bool newitemonleft,
      65              :                             int olddataitemstoleft,
      66              :                             Size firstrightofforigpagetuplesz);
      67              : static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
      68              :                                 bool usemult);
      69              : static int  _bt_splitcmp(const void *arg1, const void *arg2);
      70              : static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
      71              :                                 int leaffillfactor, bool *usemult);
      72              : static bool _bt_adjacenthtid(const ItemPointerData *lowhtid, const ItemPointerData *highhtid);
      73              : static OffsetNumber _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
      74              :                                      bool *newitemonleft, FindSplitStrat strategy);
      75              : static int  _bt_defaultinterval(FindSplitData *state);
      76              : static int  _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
      77              :                          SplitPoint *rightpage, FindSplitStrat *strategy);
      78              : static void _bt_interval_edges(FindSplitData *state,
      79              :                                SplitPoint **leftinterval, SplitPoint **rightinterval);
      80              : static inline int _bt_split_penalty(FindSplitData *state, SplitPoint *split);
      81              : static inline IndexTuple _bt_split_lastleft(FindSplitData *state,
      82              :                                             SplitPoint *split);
      83              : static inline IndexTuple _bt_split_firstright(FindSplitData *state,
      84              :                                               SplitPoint *split);
      85              : 
      86              : 
      87              : /*
      88              :  *  _bt_findsplitloc() -- find an appropriate place to split a page.
      89              :  *
      90              :  * The main goal here is to equalize the free space that will be on each
      91              :  * split page, *after accounting for the inserted tuple*.  (If we fail to
      92              :  * account for it, we might find ourselves with too little room on the page
      93              :  * that it needs to go into!)
      94              :  *
      95              :  * If the page is the rightmost page on its level, we instead try to arrange
      96              :  * to leave the left split page fillfactor% full.  In this way, when we are
      97              :  * inserting successively increasing keys (consider sequences, timestamps,
      98              :  * etc) we will end up with a tree whose pages are about fillfactor% full,
      99              :  * instead of the 50% full result that we'd get without this special case.
     100              :  * This is the same as nbtsort.c produces for a newly-created tree.  Note
     101              :  * that leaf and nonleaf pages use different fillfactors.  Note also that
     102              :  * there are a number of further special cases where fillfactor is not
     103              :  * applied in the standard way.
     104              :  *
     105              :  * We are passed the intended insert position of the new tuple, expressed as
     106              :  * the offsetnumber of the tuple it must go in front of (this could be
     107              :  * maxoff+1 if the tuple is to go at the end).  The new tuple itself is also
     108              :  * passed, since it's needed to give some weight to how effective suffix
     109              :  * truncation will be.  The implementation picks the split point that
     110              :  * maximizes the effectiveness of suffix truncation from a small list of
     111              :  * alternative candidate split points that leave each side of the split with
     112              :  * about the same share of free space.  Suffix truncation is secondary to
     113              :  * equalizing free space, except in cases with large numbers of duplicates.
     114              :  * Note that it is always assumed that caller goes on to perform truncation,
     115              :  * even with pg_upgrade'd indexes where that isn't actually the case
     116              :  * (!heapkeyspace indexes).  See nbtree/README for more information about
     117              :  * suffix truncation.
     118              :  *
     119              :  * We return the index of the first existing tuple that should go on the
     120              :  * righthand page (which is called firstrightoff), plus a boolean
     121              :  * indicating whether the new tuple goes on the left or right page.  You
     122              :  * can think of the returned state as a point _between_ two adjacent data
     123              :  * items (lastleft and firstright data items) on an imaginary version of
     124              :  * origpage that already includes newitem.  The bool is necessary to
     125              :  * disambiguate the case where firstrightoff == newitemoff (i.e. it is
     126              :  * sometimes needed to determine if the firstright tuple for the split is
     127              :  * newitem rather than the tuple from origpage at offset firstrightoff).
     128              :  */
     129              : OffsetNumber
     130        11561 : _bt_findsplitloc(Relation rel,
     131              :                  Page origpage,
     132              :                  OffsetNumber newitemoff,
     133              :                  Size newitemsz,
     134              :                  IndexTuple newitem,
     135              :                  bool *newitemonleft)
     136              : {
     137              :     BTPageOpaque opaque;
     138              :     int         leftspace,
     139              :                 rightspace,
     140              :                 olddataitemstotal,
     141              :                 olddataitemstoleft,
     142              :                 perfectpenalty,
     143              :                 leaffillfactor;
     144              :     FindSplitData state;
     145              :     FindSplitStrat strategy;
     146              :     ItemId      itemid;
     147              :     OffsetNumber offnum,
     148              :                 maxoff,
     149              :                 firstrightoff;
     150              :     double      fillfactormult;
     151              :     bool        usemult;
     152              :     SplitPoint  leftpage,
     153              :                 rightpage;
     154              : 
     155        11561 :     opaque = BTPageGetOpaque(origpage);
     156        11561 :     maxoff = PageGetMaxOffsetNumber(origpage);
     157              : 
     158              :     /* Total free space available on a btree page, after fixed overhead */
     159        11561 :     leftspace = rightspace =
     160        11561 :         PageGetPageSize(origpage) - SizeOfPageHeaderData -
     161              :         MAXALIGN(sizeof(BTPageOpaqueData));
     162              : 
     163              :     /* The right page will have the same high key as the old page */
     164        11561 :     if (!P_RIGHTMOST(opaque))
     165              :     {
     166         4999 :         itemid = PageGetItemId(origpage, P_HIKEY);
     167         4999 :         rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
     168              :                              sizeof(ItemIdData));
     169              :     }
     170              : 
     171              :     /* Count up total space in data items before actually scanning 'em */
     172        11561 :     olddataitemstotal = rightspace - (int) PageGetExactFreeSpace(origpage);
     173        11561 :     leaffillfactor = BTGetFillFactor(rel);
     174              : 
     175              :     /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
     176        11561 :     newitemsz += sizeof(ItemIdData);
     177        11561 :     state.rel = rel;
     178        11561 :     state.origpage = origpage;
     179        11561 :     state.newitem = newitem;
     180        11561 :     state.newitemsz = newitemsz;
     181        11561 :     state.is_leaf = P_ISLEAF(opaque);
     182        11561 :     state.is_rightmost = P_RIGHTMOST(opaque);
     183        11561 :     state.leftspace = leftspace;
     184        11561 :     state.rightspace = rightspace;
     185        11561 :     state.olddataitemstotal = olddataitemstotal;
     186        11561 :     state.minfirstrightsz = SIZE_MAX;
     187        11561 :     state.newitemoff = newitemoff;
     188              : 
     189              :     /* newitem cannot be a posting list item */
     190              :     Assert(!BTreeTupleIsPosting(newitem));
     191              : 
     192              :     /*
     193              :      * nsplits should never exceed maxoff because there will be at most as
     194              :      * many candidate split points as there are points _between_ tuples, once
     195              :      * you imagine that the new item is already on the original page (the
     196              :      * final number of splits may be slightly lower because not all points
     197              :      * between tuples will be legal).
     198              :      */
     199        11561 :     state.maxsplits = maxoff;
     200        11561 :     state.splits = palloc_array(SplitPoint, state.maxsplits);
     201        11561 :     state.nsplits = 0;
     202              : 
     203              :     /*
     204              :      * Scan through the data items and calculate space usage for a split at
     205              :      * each possible position
     206              :      */
     207        11561 :     olddataitemstoleft = 0;
     208              : 
     209        11561 :     for (offnum = P_FIRSTDATAKEY(opaque);
     210      3499298 :          offnum <= maxoff;
     211      3487737 :          offnum = OffsetNumberNext(offnum))
     212              :     {
     213              :         Size        itemsz;
     214              : 
     215      3487737 :         itemid = PageGetItemId(origpage, offnum);
     216      3487737 :         itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
     217              : 
     218              :         /*
     219              :          * When item offset number is not newitemoff, neither side of the
     220              :          * split can be newitem.  Record a split after the previous data item
     221              :          * from original page, but before the current data item from original
     222              :          * page. (_bt_recsplitloc() will reject the split when there are no
     223              :          * previous items, which we rely on.)
     224              :          */
     225      3487737 :         if (offnum < newitemoff)
     226      3033870 :             _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
     227       453867 :         else if (offnum > newitemoff)
     228       447309 :             _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
     229              :         else
     230              :         {
     231              :             /*
     232              :              * Record a split after all "offnum < newitemoff" original page
     233              :              * data items, but before newitem
     234              :              */
     235         6558 :             _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
     236              : 
     237              :             /*
     238              :              * Record a split after newitem, but before data item from
     239              :              * original page at offset newitemoff/current offset
     240              :              */
     241         6558 :             _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
     242              :         }
     243              : 
     244      3487737 :         olddataitemstoleft += itemsz;
     245              :     }
     246              : 
     247              :     /*
     248              :      * Record a split after all original page data items, but before newitem.
     249              :      * (Though only when it's possible that newitem will end up alone on new
     250              :      * right page.)
     251              :      */
     252              :     Assert(olddataitemstoleft == olddataitemstotal);
     253        11561 :     if (newitemoff > maxoff)
     254         5003 :         _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0);
     255              : 
     256              :     /*
     257              :      * I believe it is not possible to fail to find a feasible split, but just
     258              :      * in case ...
     259              :      */
     260        11561 :     if (state.nsplits == 0)
     261            0 :         elog(ERROR, "could not find a feasible split point for index \"%s\"",
     262              :              RelationGetRelationName(rel));
     263              : 
     264              :     /*
     265              :      * Start search for a split point among list of legal split points.  Give
     266              :      * primary consideration to equalizing available free space in each half
     267              :      * of the split initially (start with default strategy), while applying
     268              :      * rightmost and split-after-new-item optimizations where appropriate.
     269              :      * Either of the two other fallback strategies may be required for cases
     270              :      * with a large number of duplicates around the original/space-optimal
     271              :      * split point.
     272              :      *
     273              :      * Default strategy gives some weight to suffix truncation in deciding a
     274              :      * split point on leaf pages.  It attempts to select a split point where a
     275              :      * distinguishing attribute appears earlier in the new high key for the
     276              :      * left side of the split, in order to maximize the number of trailing
     277              :      * attributes that can be truncated away.  Only candidate split points
     278              :      * that imply an acceptable balance of free space on each side are
     279              :      * considered.  See _bt_defaultinterval().
     280              :      */
     281        11561 :     if (!state.is_leaf)
     282              :     {
     283              :         /* fillfactormult only used on rightmost page */
     284          129 :         usemult = state.is_rightmost;
     285          129 :         fillfactormult = BTREE_NONLEAF_FILLFACTOR / 100.0;
     286              :     }
     287        11432 :     else if (state.is_rightmost)
     288              :     {
     289              :         /* Rightmost leaf page --  fillfactormult always used */
     290         6483 :         usemult = true;
     291         6483 :         fillfactormult = leaffillfactor / 100.0;
     292              :     }
     293         4949 :     else if (_bt_afternewitemoff(&state, maxoff, leaffillfactor, &usemult))
     294              :     {
     295              :         /*
     296              :          * New item inserted at rightmost point among a localized grouping on
     297              :          * a leaf page -- apply "split after new item" optimization, either by
     298              :          * applying leaf fillfactor multiplier, or by choosing the exact split
     299              :          * point that leaves newitem as lastleft. (usemult is set for us.)
     300              :          */
     301          331 :         if (usemult)
     302              :         {
     303              :             /* fillfactormult should be set based on leaf fillfactor */
     304          256 :             fillfactormult = leaffillfactor / 100.0;
     305              :         }
     306              :         else
     307              :         {
     308              :             /* find precise split point after newitemoff */
     309        19760 :             for (int i = 0; i < state.nsplits; i++)
     310              :             {
     311        19760 :                 SplitPoint *split = state.splits + i;
     312              : 
     313        19760 :                 if (split->newitemonleft &&
     314           75 :                     newitemoff == split->firstrightoff)
     315              :                 {
     316           75 :                     pfree(state.splits);
     317           75 :                     *newitemonleft = true;
     318           75 :                     return newitemoff;
     319              :                 }
     320              :             }
     321              : 
     322              :             /*
     323              :              * Cannot legally split after newitemoff; proceed with split
     324              :              * without using fillfactor multiplier.  This is defensive, and
     325              :              * should never be needed in practice.
     326              :              */
     327            0 :             fillfactormult = 0.50;
     328              :         }
     329              :     }
     330              :     else
     331              :     {
     332              :         /* Other leaf page.  50:50 page split. */
     333         4618 :         usemult = false;
     334              :         /* fillfactormult not used, but be tidy */
     335         4618 :         fillfactormult = 0.50;
     336              :     }
     337              : 
     338              :     /*
     339              :      * Save leftmost and rightmost splits for page before original ordinal
     340              :      * sort order is lost by delta/fillfactormult sort
     341              :      */
     342        11486 :     leftpage = state.splits[0];
     343        11486 :     rightpage = state.splits[state.nsplits - 1];
     344              : 
     345              :     /* Give split points a fillfactormult-wise delta, and sort on deltas */
     346        11486 :     _bt_deltasortsplits(&state, fillfactormult, usemult);
     347              : 
     348              :     /* Determine split interval for default strategy */
     349        11486 :     state.interval = _bt_defaultinterval(&state);
     350              : 
     351              :     /*
     352              :      * Determine if default strategy/split interval will produce a
     353              :      * sufficiently distinguishing split, or if we should change strategies.
     354              :      * Alternative strategies change the range of split points that are
     355              :      * considered acceptable (split interval), and possibly change
     356              :      * fillfactormult, in order to deal with pages with a large number of
     357              :      * duplicates gracefully.
     358              :      *
     359              :      * Pass low and high splits for the entire page (actually, they're for an
     360              :      * imaginary version of the page that includes newitem).  These are used
     361              :      * when the initial split interval encloses split points that are full of
     362              :      * duplicates, and we need to consider if it's even possible to avoid
     363              :      * appending a heap TID.
     364              :      */
     365        11486 :     perfectpenalty = _bt_strategy(&state, &leftpage, &rightpage, &strategy);
     366              : 
     367        11486 :     if (strategy == SPLIT_DEFAULT)
     368              :     {
     369              :         /*
     370              :          * Default strategy worked out (always works out with internal page).
     371              :          * Original split interval still stands.
     372              :          */
     373              :     }
     374              : 
     375              :     /*
     376              :      * Many duplicates strategy is used when a heap TID would otherwise be
     377              :      * appended, but the page isn't completely full of logical duplicates.
     378              :      *
     379              :      * The split interval is widened to include all legal candidate split
     380              :      * points.  There might be a few as two distinct values in the whole-page
     381              :      * split interval, though it's also possible that most of the values on
     382              :      * the page are unique.  The final split point will either be to the
     383              :      * immediate left or to the immediate right of the group of duplicate
     384              :      * tuples that enclose the first/delta-optimal split point (perfect
     385              :      * penalty was set so that the lowest delta split point that avoids
     386              :      * appending a heap TID will be chosen).  Maximizing the number of
     387              :      * attributes that can be truncated away is not a goal of the many
     388              :      * duplicates strategy.
     389              :      *
     390              :      * Single value strategy is used when it is impossible to avoid appending
     391              :      * a heap TID.  It arranges to leave the left page very full.  This
     392              :      * maximizes space utilization in cases where tuples with the same
     393              :      * attribute values span many pages.  Newly inserted duplicates will tend
     394              :      * to have higher heap TID values, so we'll end up splitting to the right
     395              :      * consistently.  (Single value strategy is harmless though not
     396              :      * particularly useful with !heapkeyspace indexes.)
     397              :      */
     398          213 :     else if (strategy == SPLIT_MANY_DUPLICATES)
     399              :     {
     400              :         Assert(state.is_leaf);
     401              :         /* Shouldn't try to truncate away extra user attributes */
     402              :         Assert(perfectpenalty ==
     403              :                IndexRelationGetNumberOfKeyAttributes(state.rel));
     404              :         /* No need to resort splits -- no change in fillfactormult/deltas */
     405           72 :         state.interval = state.nsplits;
     406              :     }
     407          141 :     else if (strategy == SPLIT_SINGLE_VALUE)
     408              :     {
     409              :         Assert(state.is_leaf);
     410              :         /* Split near the end of the page */
     411          141 :         usemult = true;
     412          141 :         fillfactormult = BTREE_SINGLEVAL_FILLFACTOR / 100.0;
     413              :         /* Resort split points with new delta */
     414          141 :         _bt_deltasortsplits(&state, fillfactormult, usemult);
     415              :         /* Appending a heap TID is unavoidable, so interval of 1 is fine */
     416          141 :         state.interval = 1;
     417              :     }
     418              : 
     419              :     /*
     420              :      * Search among acceptable split points (using final split interval) for
     421              :      * the entry that has the lowest penalty, and is therefore expected to
     422              :      * maximize fan-out.  Sets *newitemonleft for us.
     423              :      */
     424        11486 :     firstrightoff = _bt_bestsplitloc(&state, perfectpenalty, newitemonleft,
     425              :                                      strategy);
     426        11486 :     pfree(state.splits);
     427              : 
     428        11486 :     return firstrightoff;
     429              : }
     430              : 
     431              : /*
     432              :  * Subroutine to record a particular point between two tuples (possibly the
     433              :  * new item) on page (ie, combination of firstrightoff and newitemonleft
     434              :  * settings) in *state for later analysis.  This is also a convenient point to
     435              :  * check if the split is legal (if it isn't, it won't be recorded).
     436              :  *
     437              :  * firstrightoff is the offset of the first item on the original page that
     438              :  * goes to the right page, and firstrightofforigpagetuplesz is the size of
     439              :  * that tuple.  firstrightoff can be > max offset, which means that all the
     440              :  * old items go to the left page and only the new item goes to the right page.
     441              :  * We don't actually use firstrightofforigpagetuplesz in that case (actually,
     442              :  * we don't use it for _any_ split where the firstright tuple happens to be
     443              :  * newitem).
     444              :  *
     445              :  * olddataitemstoleft is the total size of all old items to the left of the
     446              :  * split point that is recorded here when legal.  Should not include
     447              :  * newitemsz, since that is handled here.
     448              :  */
     449              : static void
     450      3499298 : _bt_recsplitloc(FindSplitData *state,
     451              :                 OffsetNumber firstrightoff,
     452              :                 bool newitemonleft,
     453              :                 int olddataitemstoleft,
     454              :                 Size firstrightofforigpagetuplesz)
     455              : {
     456              :     int16       leftfree,
     457              :                 rightfree;
     458              :     Size        firstrightsz;
     459      3499298 :     Size        postingsz = 0;
     460              :     bool        newitemisfirstright;
     461              : 
     462              :     /* Is the new item going to be split point's firstright tuple? */
     463      3517417 :     newitemisfirstright = (firstrightoff == state->newitemoff &&
     464        18119 :                            !newitemonleft);
     465              : 
     466      3499298 :     if (newitemisfirstright)
     467        11561 :         firstrightsz = state->newitemsz;
     468              :     else
     469              :     {
     470      3487737 :         firstrightsz = firstrightofforigpagetuplesz;
     471              : 
     472              :         /*
     473              :          * Calculate suffix truncation space saving when firstright tuple is a
     474              :          * posting list tuple, though only when the tuple is over 64 bytes
     475              :          * including line pointer overhead (arbitrary).  This avoids accessing
     476              :          * the tuple in cases where its posting list must be very small (if
     477              :          * tuple has one at all).
     478              :          *
     479              :          * Note: We don't do this in the case where firstright tuple is
     480              :          * newitem, since newitem cannot have a posting list.
     481              :          */
     482      3487737 :         if (state->is_leaf && firstrightsz > 64)
     483              :         {
     484              :             ItemId      itemid;
     485              :             IndexTuple  newhighkey;
     486              : 
     487        26864 :             itemid = PageGetItemId(state->origpage, firstrightoff);
     488        26864 :             newhighkey = (IndexTuple) PageGetItem(state->origpage, itemid);
     489              : 
     490        26864 :             if (BTreeTupleIsPosting(newhighkey))
     491        14833 :                 postingsz = IndexTupleSize(newhighkey) -
     492        14833 :                     BTreeTupleGetPostingOffset(newhighkey);
     493              :         }
     494              :     }
     495              : 
     496              :     /* Account for all the old tuples */
     497      3499298 :     leftfree = state->leftspace - olddataitemstoleft;
     498      3499298 :     rightfree = state->rightspace -
     499      3499298 :         (state->olddataitemstotal - olddataitemstoleft);
     500              : 
     501              :     /*
     502              :      * The first item on the right page becomes the high key of the left page;
     503              :      * therefore it counts against left space as well as right space (we
     504              :      * cannot assume that suffix truncation will make it any smaller).  When
     505              :      * index has included attributes, then those attributes of left page high
     506              :      * key will be truncated leaving that page with slightly more free space.
     507              :      * However, that shouldn't affect our ability to find valid split
     508              :      * location, since we err in the direction of being pessimistic about free
     509              :      * space on the left half.  Besides, even when suffix truncation of
     510              :      * non-TID attributes occurs, the new high key often won't even be a
     511              :      * single MAXALIGN() quantum smaller than the firstright tuple it's based
     512              :      * on.
     513              :      *
     514              :      * If we are on the leaf level, assume that suffix truncation cannot avoid
     515              :      * adding a heap TID to the left half's new high key when splitting at the
     516              :      * leaf level.  In practice the new high key will often be smaller and
     517              :      * will rarely be larger, but conservatively assume the worst case.  We do
     518              :      * go to the trouble of subtracting away posting list overhead, though
     519              :      * only when it looks like it will make an appreciable difference.
     520              :      * (Posting lists are the only case where truncation will typically make
     521              :      * the final high key far smaller than firstright, so being a bit more
     522              :      * precise there noticeably improves the balance of free space.)
     523              :      */
     524      3499298 :     if (state->is_leaf)
     525      3497321 :         leftfree -= (int16) (firstrightsz +
     526      3497321 :                              MAXALIGN(sizeof(ItemPointerData)) -
     527              :                              postingsz);
     528              :     else
     529         1977 :         leftfree -= (int16) firstrightsz;
     530              : 
     531              :     /* account for the new item */
     532      3499298 :     if (newitemonleft)
     533       453867 :         leftfree -= (int16) state->newitemsz;
     534              :     else
     535      3045431 :         rightfree -= (int16) state->newitemsz;
     536              : 
     537              :     /*
     538              :      * If we are not on the leaf level, we will be able to discard the key
     539              :      * data from the first item that winds up on the right page.
     540              :      */
     541      3499298 :     if (!state->is_leaf)
     542         1977 :         rightfree += (int16) firstrightsz -
     543              :             (int16) (MAXALIGN(sizeof(IndexTupleData)) + sizeof(ItemIdData));
     544              : 
     545              :     /* Record split if legal */
     546      3499298 :     if (leftfree >= 0 && rightfree >= 0)
     547              :     {
     548              :         Assert(state->nsplits < state->maxsplits);
     549              : 
     550              :         /* Determine smallest firstright tuple size among legal splits */
     551      3477691 :         state->minfirstrightsz = Min(state->minfirstrightsz, firstrightsz);
     552              : 
     553      3477691 :         state->splits[state->nsplits].curdelta = 0;
     554      3477691 :         state->splits[state->nsplits].leftfree = leftfree;
     555      3477691 :         state->splits[state->nsplits].rightfree = rightfree;
     556      3477691 :         state->splits[state->nsplits].firstrightoff = firstrightoff;
     557      3477691 :         state->splits[state->nsplits].newitemonleft = newitemonleft;
     558      3477691 :         state->nsplits++;
     559              :     }
     560      3499298 : }
     561              : 
     562              : /*
     563              :  * Subroutine to assign space deltas to materialized array of candidate split
     564              :  * points based on current fillfactor, and to sort array using that fillfactor
     565              :  */
     566              : static void
     567        11627 : _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
     568              :                     bool usemult)
     569              : {
     570      3464236 :     for (int i = 0; i < state->nsplits; i++)
     571              :     {
     572      3452609 :         SplitPoint *split = state->splits + i;
     573              :         int16       delta;
     574              : 
     575      3452609 :         if (usemult)
     576      2257547 :             delta = fillfactormult * split->leftfree -
     577      2257547 :                 (1.0 - fillfactormult) * split->rightfree;
     578              :         else
     579      1195062 :             delta = split->leftfree - split->rightfree;
     580              : 
     581      3452609 :         if (delta < 0)
     582       822912 :             delta = -delta;
     583              : 
     584              :         /* Save delta */
     585      3452609 :         split->curdelta = delta;
     586              :     }
     587              : 
     588        11627 :     qsort(state->splits, state->nsplits, sizeof(SplitPoint), _bt_splitcmp);
     589        11627 : }
     590              : 
     591              : /*
     592              :  * qsort-style comparator used by _bt_deltasortsplits()
     593              :  */
     594              : static int
     595     37195507 : _bt_splitcmp(const void *arg1, const void *arg2)
     596              : {
     597     37195507 :     const SplitPoint *split1 = arg1;
     598     37195507 :     const SplitPoint *split2 = arg2;
     599              : 
     600     37195507 :     return pg_cmp_s16(split1->curdelta, split2->curdelta);
     601              : }
     602              : 
     603              : /*
     604              :  * Subroutine to determine whether or not a non-rightmost leaf page should be
     605              :  * split immediately after the would-be original page offset for the
     606              :  * new/incoming tuple (or should have leaf fillfactor applied when new item is
     607              :  * to the right on original page).  This is appropriate when there is a
     608              :  * pattern of localized monotonically increasing insertions into a composite
     609              :  * index, where leading attribute values form local groupings, and we
     610              :  * anticipate further insertions of the same/current grouping (new item's
     611              :  * grouping) in the near future.  This can be thought of as a variation on
     612              :  * applying leaf fillfactor during rightmost leaf page splits, since cases
     613              :  * that benefit will converge on packing leaf pages leaffillfactor% full over
     614              :  * time.
     615              :  *
     616              :  * We may leave extra free space remaining on the rightmost page of a "most
     617              :  * significant column" grouping of tuples if that grouping never ends up
     618              :  * having future insertions that use the free space.  That effect is
     619              :  * self-limiting; a future grouping that becomes the "nearest on the right"
     620              :  * grouping of the affected grouping usually puts the extra free space to good
     621              :  * use.
     622              :  *
     623              :  * Caller uses optimization when routine returns true, though the exact action
     624              :  * taken by caller varies.  Caller uses original leaf page fillfactor in
     625              :  * standard way rather than using the new item offset directly when *usemult
     626              :  * was also set to true here.  Otherwise, caller applies optimization by
     627              :  * locating the legal split point that makes the new tuple the lastleft tuple
     628              :  * for the split.
     629              :  */
     630              : static bool
     631         4949 : _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
     632              :                     int leaffillfactor, bool *usemult)
     633              : {
     634              :     int16       nkeyatts;
     635              :     ItemId      itemid;
     636              :     IndexTuple  tup;
     637              :     int         keepnatts;
     638              : 
     639              :     Assert(state->is_leaf && !state->is_rightmost);
     640              : 
     641         4949 :     nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
     642              : 
     643              :     /* Single key indexes not considered here */
     644         4949 :     if (nkeyatts == 1)
     645          612 :         return false;
     646              : 
     647              :     /* Ascending insertion pattern never inferred when new item is first */
     648         4337 :     if (state->newitemoff == P_FIRSTKEY)
     649            1 :         return false;
     650              : 
     651              :     /*
     652              :      * Only apply optimization on pages with equisized tuples, since ordinal
     653              :      * keys are likely to be fixed-width.  Testing if the new tuple is
     654              :      * variable width directly might also work, but that fails to apply the
     655              :      * optimization to indexes with a numeric_ops attribute.
     656              :      *
     657              :      * Conclude that page has equisized tuples when the new item is the same
     658              :      * width as the smallest item observed during pass over page, and other
     659              :      * non-pivot tuples must be the same width as well.  (Note that the
     660              :      * possibly-truncated existing high key isn't counted in
     661              :      * olddataitemstotal, and must be subtracted from maxoff.)
     662              :      */
     663         4336 :     if (state->newitemsz != state->minfirstrightsz)
     664         1265 :         return false;
     665         3071 :     if (state->newitemsz * (maxoff - 1) != state->olddataitemstotal)
     666         2350 :         return false;
     667              : 
     668              :     /*
     669              :      * Avoid applying optimization when tuples are wider than a tuple
     670              :      * consisting of two non-NULL int8/int64 attributes (or four non-NULL
     671              :      * int4/int32 attributes)
     672              :      */
     673          721 :     if (state->newitemsz >
     674              :         MAXALIGN(sizeof(IndexTupleData) + sizeof(int64) * 2) +
     675              :         sizeof(ItemIdData))
     676            0 :         return false;
     677              : 
     678              :     /*
     679              :      * At least the first attribute's value must be equal to the corresponding
     680              :      * value in previous tuple to apply optimization.  New item cannot be a
     681              :      * duplicate, either.
     682              :      *
     683              :      * Handle case where new item is to the right of all items on the existing
     684              :      * page.  This is suggestive of monotonically increasing insertions in
     685              :      * itself, so the "heap TID adjacency" test is not applied here.
     686              :      */
     687          721 :     if (state->newitemoff > maxoff)
     688              :     {
     689          223 :         itemid = PageGetItemId(state->origpage, maxoff);
     690          223 :         tup = (IndexTuple) PageGetItem(state->origpage, itemid);
     691          223 :         keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
     692              : 
     693          223 :         if (keepnatts > 1 && keepnatts <= nkeyatts)
     694              :         {
     695          223 :             *usemult = true;
     696          223 :             return true;
     697              :         }
     698              : 
     699            0 :         return false;
     700              :     }
     701              : 
     702              :     /*
     703              :      * "Low cardinality leading column, high cardinality suffix column"
     704              :      * indexes with a random insertion pattern (e.g., an index with a boolean
     705              :      * column, such as an index on '(book_is_in_print, book_isbn)') present us
     706              :      * with a risk of consistently misapplying the optimization.  We're
     707              :      * willing to accept very occasional misapplication of the optimization,
     708              :      * provided the cases where we get it wrong are rare and self-limiting.
     709              :      *
     710              :      * Heap TID adjacency strongly suggests that the item just to the left was
     711              :      * inserted very recently, which limits overapplication of the
     712              :      * optimization.  Besides, all inappropriate cases triggered here will
     713              :      * still split in the middle of the page on average.
     714              :      */
     715          498 :     itemid = PageGetItemId(state->origpage, OffsetNumberPrev(state->newitemoff));
     716          498 :     tup = (IndexTuple) PageGetItem(state->origpage, itemid);
     717              :     /* Do cheaper test first */
     718          498 :     if (BTreeTupleIsPosting(tup) ||
     719          498 :         !_bt_adjacenthtid(&tup->t_tid, &state->newitem->t_tid))
     720          318 :         return false;
     721              :     /* Check same conditions as rightmost item case, too */
     722          180 :     keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
     723              : 
     724          180 :     if (keepnatts > 1 && keepnatts <= nkeyatts)
     725              :     {
     726          108 :         double      interp = (double) state->newitemoff / ((double) maxoff + 1);
     727          108 :         double      leaffillfactormult = (double) leaffillfactor / 100.0;
     728              : 
     729              :         /*
     730              :          * Don't allow caller to split after a new item when it will result in
     731              :          * a split point to the right of the point that a leaf fillfactor
     732              :          * split would use -- have caller apply leaf fillfactor instead
     733              :          */
     734          108 :         *usemult = interp > leaffillfactormult;
     735              : 
     736          108 :         return true;
     737              :     }
     738              : 
     739           72 :     return false;
     740              : }
     741              : 
     742              : /*
     743              :  * Subroutine for determining if two heap TIDS are "adjacent".
     744              :  *
     745              :  * Adjacent means that the high TID is very likely to have been inserted into
     746              :  * heap relation immediately after the low TID, probably during the current
     747              :  * transaction.
     748              :  */
     749              : static bool
     750          498 : _bt_adjacenthtid(const ItemPointerData *lowhtid, const ItemPointerData *highhtid)
     751              : {
     752              :     BlockNumber lowblk,
     753              :                 highblk;
     754              : 
     755          498 :     lowblk = ItemPointerGetBlockNumber(lowhtid);
     756          498 :     highblk = ItemPointerGetBlockNumber(highhtid);
     757              : 
     758              :     /* Make optimistic assumption of adjacency when heap blocks match */
     759          498 :     if (lowblk == highblk)
     760          180 :         return true;
     761              : 
     762              :     /* When heap block one up, second offset should be FirstOffsetNumber */
     763          426 :     if (lowblk + 1 == highblk &&
     764          108 :         ItemPointerGetOffsetNumber(highhtid) == FirstOffsetNumber)
     765            0 :         return true;
     766              : 
     767          318 :     return false;
     768              : }
     769              : 
     770              : /*
     771              :  * Subroutine to find the "best" split point among candidate split points.
     772              :  * The best split point is the split point with the lowest penalty among split
     773              :  * points that fall within current/final split interval.  Penalty is an
     774              :  * abstract score, with a definition that varies depending on whether we're
     775              :  * splitting a leaf page or an internal page.  See _bt_split_penalty() for
     776              :  * details.
     777              :  *
     778              :  * "perfectpenalty" is assumed to be the lowest possible penalty among
     779              :  * candidate split points.  This allows us to return early without wasting
     780              :  * cycles on calculating the first differing attribute for all candidate
     781              :  * splits when that clearly cannot improve our choice (or when we only want a
     782              :  * minimally distinguishing split point, and don't want to make the split any
     783              :  * more unbalanced than is necessary).
     784              :  *
     785              :  * We return the index of the first existing tuple that should go on the right
     786              :  * page, plus a boolean indicating if new item is on left of split point.
     787              :  */
     788              : static OffsetNumber
     789        11486 : _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
     790              :                  bool *newitemonleft, FindSplitStrat strategy)
     791              : {
     792              :     int         bestpenalty,
     793              :                 lowsplit;
     794        11486 :     int         highsplit = Min(state->interval, state->nsplits);
     795              :     SplitPoint *final;
     796              : 
     797        11486 :     bestpenalty = INT_MAX;
     798        11486 :     lowsplit = 0;
     799        29616 :     for (int i = lowsplit; i < highsplit; i++)
     800              :     {
     801              :         int         penalty;
     802              : 
     803        29590 :         penalty = _bt_split_penalty(state, state->splits + i);
     804              : 
     805        29590 :         if (penalty < bestpenalty)
     806              :         {
     807        14708 :             bestpenalty = penalty;
     808        14708 :             lowsplit = i;
     809              :         }
     810              : 
     811        29590 :         if (penalty <= perfectpenalty)
     812        11460 :             break;
     813              :     }
     814              : 
     815        11486 :     final = &state->splits[lowsplit];
     816              : 
     817              :     /*
     818              :      * There is a risk that the "many duplicates" strategy will repeatedly do
     819              :      * the wrong thing when there are monotonically decreasing insertions to
     820              :      * the right of a large group of duplicates.   Repeated splits could leave
     821              :      * a succession of right half pages with free space that can never be
     822              :      * used.  This must be avoided.
     823              :      *
     824              :      * Consider the example of the leftmost page in a single integer attribute
     825              :      * NULLS FIRST index which is almost filled with NULLs.  Monotonically
     826              :      * decreasing integer insertions might cause the same leftmost page to
     827              :      * split repeatedly at the same point.  Each split derives its new high
     828              :      * key from the lowest current value to the immediate right of the large
     829              :      * group of NULLs, which will always be higher than all future integer
     830              :      * insertions, directing all future integer insertions to the same
     831              :      * leftmost page.
     832              :      */
     833        11486 :     if (strategy == SPLIT_MANY_DUPLICATES && !state->is_rightmost &&
     834           66 :         !final->newitemonleft && final->firstrightoff >= state->newitemoff &&
     835            0 :         final->firstrightoff < state->newitemoff + 9)
     836              :     {
     837              :         /*
     838              :          * Avoid the problem by performing a 50:50 split when the new item is
     839              :          * just to the right of the would-be "many duplicates" split point.
     840              :          * (Note that the test used for an insert that is "just to the right"
     841              :          * of the split point is conservative.)
     842              :          */
     843            0 :         final = &state->splits[0];
     844              :     }
     845              : 
     846        11486 :     *newitemonleft = final->newitemonleft;
     847        11486 :     return final->firstrightoff;
     848              : }
     849              : 
     850              : #define LEAF_SPLIT_DISTANCE         0.050
     851              : #define INTERNAL_SPLIT_DISTANCE     0.075
     852              : 
     853              : /*
     854              :  * Return a split interval to use for the default strategy.  This is a limit
     855              :  * on the number of candidate split points to give further consideration to.
     856              :  * Only a fraction of all candidate splits points (those located at the start
     857              :  * of the now-sorted splits array) fall within the split interval.  Split
     858              :  * interval is applied within _bt_bestsplitloc().
     859              :  *
     860              :  * Split interval represents an acceptable range of split points -- those that
     861              :  * have leftfree and rightfree values that are acceptably balanced.  The final
     862              :  * split point chosen is the split point with the lowest "penalty" among split
     863              :  * points in this split interval (unless we change our entire strategy, in
     864              :  * which case the interval also changes -- see _bt_strategy()).
     865              :  *
     866              :  * The "Prefix B-Trees" paper calls split interval sigma l for leaf splits,
     867              :  * and sigma b for internal ("branch") splits.  It's hard to provide a
     868              :  * theoretical justification for the size of the split interval, though it's
     869              :  * clear that a small split interval can make tuples on level L+1 much smaller
     870              :  * on average, without noticeably affecting space utilization on level L.
     871              :  * (Note that the way that we calculate split interval might need to change if
     872              :  * suffix truncation is taught to truncate tuples "within" the last
     873              :  * attribute/datum for data types like text, which is more or less how it is
     874              :  * assumed to work in the paper.)
     875              :  */
     876              : static int
     877        11486 : _bt_defaultinterval(FindSplitData *state)
     878              : {
     879              :     SplitPoint *spaceoptimal;
     880              :     int16       tolerance,
     881              :                 lowleftfree,
     882              :                 lowrightfree,
     883              :                 highleftfree,
     884              :                 highrightfree;
     885              : 
     886              :     /*
     887              :      * Determine leftfree and rightfree values that are higher and lower than
     888              :      * we're willing to tolerate.  Note that the final split interval will be
     889              :      * about 10% of nsplits in the common case where all non-pivot tuples
     890              :      * (data items) from a leaf page are uniformly sized.  We're a bit more
     891              :      * aggressive when splitting internal pages.
     892              :      */
     893        11486 :     if (state->is_leaf)
     894        11357 :         tolerance = state->olddataitemstotal * LEAF_SPLIT_DISTANCE;
     895              :     else
     896          129 :         tolerance = state->olddataitemstotal * INTERNAL_SPLIT_DISTANCE;
     897              : 
     898              :     /* First candidate split point is the most evenly balanced */
     899        11486 :     spaceoptimal = state->splits;
     900        11486 :     lowleftfree = spaceoptimal->leftfree - tolerance;
     901        11486 :     lowrightfree = spaceoptimal->rightfree - tolerance;
     902        11486 :     highleftfree = spaceoptimal->leftfree + tolerance;
     903        11486 :     highrightfree = spaceoptimal->rightfree + tolerance;
     904              : 
     905              :     /*
     906              :      * Iterate through split points, starting from the split immediately after
     907              :      * 'spaceoptimal'.  Find the first split point that divides free space so
     908              :      * unevenly that including it in the split interval would be unacceptable.
     909              :      */
     910       348372 :     for (int i = 1; i < state->nsplits; i++)
     911              :     {
     912       348372 :         SplitPoint *split = state->splits + i;
     913              : 
     914              :         /* Cannot use curdelta here, since its value is often weighted */
     915       348372 :         if (split->leftfree < lowleftfree || split->rightfree < lowrightfree ||
     916       337343 :             split->leftfree > highleftfree || split->rightfree > highrightfree)
     917        11486 :             return i;
     918              :     }
     919              : 
     920            0 :     return state->nsplits;
     921              : }
     922              : 
     923              : /*
     924              :  * Subroutine to decide whether split should use default strategy/initial
     925              :  * split interval, or whether it should finish splitting the page using
     926              :  * alternative strategies (this is only possible with leaf pages).
     927              :  *
     928              :  * Caller uses alternative strategy (or sticks with default strategy) based
     929              :  * on how *strategy is set here.  Return value is "perfect penalty", which is
     930              :  * passed to _bt_bestsplitloc() as a final constraint on how far caller is
     931              :  * willing to go to avoid appending a heap TID when using the many duplicates
     932              :  * strategy (it also saves _bt_bestsplitloc() useless cycles).
     933              :  */
     934              : static int
     935        11486 : _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
     936              :              SplitPoint *rightpage, FindSplitStrat *strategy)
     937              : {
     938              :     IndexTuple  leftmost,
     939              :                 rightmost;
     940              :     SplitPoint *leftinterval,
     941              :                *rightinterval;
     942              :     int         perfectpenalty;
     943        11486 :     int         indnkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
     944              : 
     945              :     /* Assume that alternative strategy won't be used for now */
     946        11486 :     *strategy = SPLIT_DEFAULT;
     947              : 
     948              :     /*
     949              :      * Use smallest observed firstright item size for entire page (actually,
     950              :      * entire imaginary version of page that includes newitem) as perfect
     951              :      * penalty on internal pages.  This can save cycles in the common case
     952              :      * where most or all splits (not just splits within interval) have
     953              :      * firstright tuples that are the same size.
     954              :      */
     955        11486 :     if (!state->is_leaf)
     956          129 :         return state->minfirstrightsz;
     957              : 
     958              :     /*
     959              :      * Use leftmost and rightmost tuples from leftmost and rightmost splits in
     960              :      * current split interval
     961              :      */
     962        11357 :     _bt_interval_edges(state, &leftinterval, &rightinterval);
     963        11357 :     leftmost = _bt_split_lastleft(state, leftinterval);
     964        11357 :     rightmost = _bt_split_firstright(state, rightinterval);
     965              : 
     966              :     /*
     967              :      * If initial split interval can produce a split point that will at least
     968              :      * avoid appending a heap TID in new high key, we're done.  Finish split
     969              :      * with default strategy and initial split interval.
     970              :      */
     971        11357 :     perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
     972        11357 :     if (perfectpenalty <= indnkeyatts)
     973        11102 :         return perfectpenalty;
     974              : 
     975              :     /*
     976              :      * Work out how caller should finish split when even their "perfect"
     977              :      * penalty for initial/default split interval indicates that the interval
     978              :      * does not contain even a single split that avoids appending a heap TID.
     979              :      *
     980              :      * Use the leftmost split's lastleft tuple and the rightmost split's
     981              :      * firstright tuple to assess every possible split.
     982              :      */
     983          255 :     leftmost = _bt_split_lastleft(state, leftpage);
     984          255 :     rightmost = _bt_split_firstright(state, rightpage);
     985              : 
     986              :     /*
     987              :      * If page (including new item) has many duplicates but is not entirely
     988              :      * full of duplicates, a many duplicates strategy split will be performed.
     989              :      * If page is entirely full of duplicates, a single value strategy split
     990              :      * will be performed.
     991              :      */
     992          255 :     perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
     993          255 :     if (perfectpenalty <= indnkeyatts)
     994              :     {
     995           72 :         *strategy = SPLIT_MANY_DUPLICATES;
     996              : 
     997              :         /*
     998              :          * Many duplicates strategy should split at either side the group of
     999              :          * duplicates that enclose the delta-optimal split point.  Return
    1000              :          * indnkeyatts rather than the true perfect penalty to make that
    1001              :          * happen.  (If perfectpenalty was returned here then low cardinality
    1002              :          * composite indexes could have continual unbalanced splits.)
    1003              :          *
    1004              :          * Note that caller won't go through with a many duplicates split in
    1005              :          * rare cases where it looks like there are ever-decreasing insertions
    1006              :          * to the immediate right of the split point.  This must happen just
    1007              :          * before a final decision is made, within _bt_bestsplitloc().
    1008              :          */
    1009           72 :         return indnkeyatts;
    1010              :     }
    1011              : 
    1012              :     /*
    1013              :      * Single value strategy is only appropriate with ever-increasing heap
    1014              :      * TIDs; otherwise, original default strategy split should proceed to
    1015              :      * avoid pathological performance.  Use page high key to infer if this is
    1016              :      * the rightmost page among pages that store the same duplicate value.
    1017              :      * This should not prevent insertions of heap TIDs that are slightly out
    1018              :      * of order from using single value strategy, since that's expected with
    1019              :      * concurrent inserters of the same duplicate value.
    1020              :      */
    1021          183 :     else if (state->is_rightmost)
    1022          115 :         *strategy = SPLIT_SINGLE_VALUE;
    1023              :     else
    1024              :     {
    1025              :         ItemId      itemid;
    1026              :         IndexTuple  hikey;
    1027              : 
    1028           68 :         itemid = PageGetItemId(state->origpage, P_HIKEY);
    1029           68 :         hikey = (IndexTuple) PageGetItem(state->origpage, itemid);
    1030           68 :         perfectpenalty = _bt_keep_natts_fast(state->rel, hikey,
    1031              :                                              state->newitem);
    1032           68 :         if (perfectpenalty <= indnkeyatts)
    1033           26 :             *strategy = SPLIT_SINGLE_VALUE;
    1034              :         else
    1035              :         {
    1036              :             /*
    1037              :              * Have caller finish split using default strategy, since page
    1038              :              * does not appear to be the rightmost page for duplicates of the
    1039              :              * value the page is filled with
    1040              :              */
    1041              :         }
    1042              :     }
    1043              : 
    1044          183 :     return perfectpenalty;
    1045              : }
    1046              : 
    1047              : /*
    1048              :  * Subroutine to locate leftmost and rightmost splits for current/default
    1049              :  * split interval.  Note that it will be the same split iff there is only one
    1050              :  * split in interval.
    1051              :  */
    1052              : static void
    1053        11357 : _bt_interval_edges(FindSplitData *state, SplitPoint **leftinterval,
    1054              :                    SplitPoint **rightinterval)
    1055              : {
    1056        11357 :     int         highsplit = Min(state->interval, state->nsplits);
    1057              :     SplitPoint *deltaoptimal;
    1058              : 
    1059        11357 :     deltaoptimal = state->splits;
    1060        11357 :     *leftinterval = NULL;
    1061        11357 :     *rightinterval = NULL;
    1062              : 
    1063              :     /*
    1064              :      * Delta is an absolute distance to optimal split point, so both the
    1065              :      * leftmost and rightmost split point will usually be at the end of the
    1066              :      * array
    1067              :      */
    1068        22802 :     for (int i = highsplit - 1; i >= 0; i--)
    1069              :     {
    1070        22802 :         SplitPoint *distant = state->splits + i;
    1071              : 
    1072        22802 :         if (distant->firstrightoff < deltaoptimal->firstrightoff)
    1073              :         {
    1074        11280 :             if (*leftinterval == NULL)
    1075        11097 :                 *leftinterval = distant;
    1076              :         }
    1077        11522 :         else if (distant->firstrightoff > deltaoptimal->firstrightoff)
    1078              :         {
    1079        11245 :             if (*rightinterval == NULL)
    1080        11127 :                 *rightinterval = distant;
    1081              :         }
    1082          277 :         else if (!distant->newitemonleft && deltaoptimal->newitemonleft)
    1083              :         {
    1084              :             /*
    1085              :              * "incoming tuple will become firstright" (distant) is to the
    1086              :              * left of "incoming tuple will become lastleft" (delta-optimal)
    1087              :              */
    1088              :             Assert(distant->firstrightoff == state->newitemoff);
    1089            0 :             if (*leftinterval == NULL)
    1090            0 :                 *leftinterval = distant;
    1091              :         }
    1092          277 :         else if (distant->newitemonleft && !deltaoptimal->newitemonleft)
    1093              :         {
    1094              :             /*
    1095              :              * "incoming tuple will become lastleft" (distant) is to the right
    1096              :              * of "incoming tuple will become firstright" (delta-optimal)
    1097              :              */
    1098              :             Assert(distant->firstrightoff == state->newitemoff);
    1099            5 :             if (*rightinterval == NULL)
    1100            3 :                 *rightinterval = distant;
    1101              :         }
    1102              :         else
    1103              :         {
    1104              :             /* There was only one or two splits in initial split interval */
    1105              :             Assert(distant == deltaoptimal);
    1106          272 :             if (*leftinterval == NULL)
    1107          260 :                 *leftinterval = distant;
    1108          272 :             if (*rightinterval == NULL)
    1109          227 :                 *rightinterval = distant;
    1110              :         }
    1111              : 
    1112        22802 :         if (*leftinterval && *rightinterval)
    1113        11357 :             return;
    1114              :     }
    1115              : 
    1116              :     Assert(false);
    1117              : }
    1118              : 
    1119              : /*
    1120              :  * Subroutine to find penalty for caller's candidate split point.
    1121              :  *
    1122              :  * On leaf pages, penalty is the attribute number that distinguishes each side
    1123              :  * of a split.  It's the last attribute that needs to be included in new high
    1124              :  * key for left page.  It can be greater than the number of key attributes in
    1125              :  * cases where a heap TID will need to be appended during truncation.
    1126              :  *
    1127              :  * On internal pages, penalty is simply the size of the firstright tuple for
    1128              :  * the split (including line pointer overhead).  This tuple will become the
    1129              :  * new high key for the left page.
    1130              :  */
    1131              : static inline int
    1132        29590 : _bt_split_penalty(FindSplitData *state, SplitPoint *split)
    1133              : {
    1134              :     IndexTuple  lastleft;
    1135              :     IndexTuple  firstright;
    1136              : 
    1137        29590 :     if (!state->is_leaf)
    1138              :     {
    1139              :         ItemId      itemid;
    1140              : 
    1141          140 :         if (!split->newitemonleft &&
    1142          140 :             split->firstrightoff == state->newitemoff)
    1143           12 :             return state->newitemsz;
    1144              : 
    1145          128 :         itemid = PageGetItemId(state->origpage, split->firstrightoff);
    1146              : 
    1147          128 :         return MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
    1148              :     }
    1149              : 
    1150        29450 :     lastleft = _bt_split_lastleft(state, split);
    1151        29450 :     firstright = _bt_split_firstright(state, split);
    1152              : 
    1153        29450 :     return _bt_keep_natts_fast(state->rel, lastleft, firstright);
    1154              : }
    1155              : 
    1156              : /*
    1157              :  * Subroutine to get a lastleft IndexTuple for a split point
    1158              :  */
    1159              : static inline IndexTuple
    1160        41062 : _bt_split_lastleft(FindSplitData *state, SplitPoint *split)
    1161              : {
    1162              :     ItemId      itemid;
    1163              : 
    1164        41062 :     if (split->newitemonleft && split->firstrightoff == state->newitemoff)
    1165          271 :         return state->newitem;
    1166              : 
    1167        40791 :     itemid = PageGetItemId(state->origpage,
    1168        40791 :                            OffsetNumberPrev(split->firstrightoff));
    1169        40791 :     return (IndexTuple) PageGetItem(state->origpage, itemid);
    1170              : }
    1171              : 
    1172              : /*
    1173              :  * Subroutine to get a firstright IndexTuple for a split point
    1174              :  */
    1175              : static inline IndexTuple
    1176        41062 : _bt_split_firstright(FindSplitData *state, SplitPoint *split)
    1177              : {
    1178              :     ItemId      itemid;
    1179              : 
    1180        41062 :     if (!split->newitemonleft && split->firstrightoff == state->newitemoff)
    1181          148 :         return state->newitem;
    1182              : 
    1183        40914 :     itemid = PageGetItemId(state->origpage, split->firstrightoff);
    1184        40914 :     return (IndexTuple) PageGetItem(state->origpage, itemid);
    1185              : }
        

Generated by: LCOV version 2.0-1