LCOV - code coverage report
Current view: top level - src/backend/access/nbtree - nbtsplitloc.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 265 274 96.7 %
Date: 2020-06-01 09:07:10 Functions: 13 13 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * nbtsplitloc.c
       4             :  *    Choose split point code for Postgres btree implementation.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/nbtree/nbtsplitloc.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/nbtree.h"
      18             : #include "storage/lmgr.h"
      19             : 
      20             : typedef enum
      21             : {
      22             :     /* strategy for searching through materialized list of split points */
      23             :     SPLIT_DEFAULT,              /* give some weight to truncation */
      24             :     SPLIT_MANY_DUPLICATES,      /* find minimally distinguishing point */
      25             :     SPLIT_SINGLE_VALUE          /* leave left page almost full */
      26             : } FindSplitStrat;
      27             : 
      28             : typedef struct
      29             : {
      30             :     /* details of free space left by split */
      31             :     int16       curdelta;       /* current leftfree/rightfree delta */
      32             :     int16       leftfree;       /* space left on left page post-split */
      33             :     int16       rightfree;      /* space left on right page post-split */
      34             : 
      35             :     /* split point identifying fields (returned by _bt_findsplitloc) */
      36             :     OffsetNumber firstrightoff; /* first origpage item on rightpage */
      37             :     bool        newitemonleft;  /* new item goes on left, or right? */
      38             : 
      39             : } SplitPoint;
      40             : 
      41             : typedef struct
      42             : {
      43             :     /* context data for _bt_recsplitloc */
      44             :     Relation    rel;            /* index relation */
      45             :     Page        origpage;       /* page undergoing split */
      46             :     IndexTuple  newitem;        /* new item (cause of page split) */
      47             :     Size        newitemsz;      /* size of newitem (includes line pointer) */
      48             :     bool        is_leaf;        /* T if splitting a leaf page */
      49             :     bool        is_rightmost;   /* T if splitting rightmost page on level */
      50             :     OffsetNumber newitemoff;    /* where the new item is to be inserted */
      51             :     int         leftspace;      /* space available for items on left page */
      52             :     int         rightspace;     /* space available for items on right page */
      53             :     int         olddataitemstotal;  /* space taken by old items */
      54             :     Size        minfirstrightsz;    /* smallest firstright size */
      55             : 
      56             :     /* candidate split point data */
      57             :     int         maxsplits;      /* maximum number of splits */
      58             :     int         nsplits;        /* current number of splits */
      59             :     SplitPoint *splits;         /* all candidate split points for page */
      60             :     int         interval;       /* current range of acceptable split points */
      61             : } FindSplitData;
      62             : 
      63             : static void _bt_recsplitloc(FindSplitData *state,
      64             :                             OffsetNumber firstrightoff, bool newitemonleft,
      65             :                             int olddataitemstoleft,
      66             :                             Size firstrightofforigpagetuplesz);
      67             : static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
      68             :                                 bool usemult);
      69             : static int  _bt_splitcmp(const void *arg1, const void *arg2);
      70             : static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
      71             :                                 int leaffillfactor, bool *usemult);
      72             : static bool _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid);
      73             : static OffsetNumber _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
      74             :                                      bool *newitemonleft, FindSplitStrat strategy);
      75             : static int  _bt_defaultinterval(FindSplitData *state);
      76             : static int  _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
      77             :                          SplitPoint *rightpage, FindSplitStrat *strategy);
      78             : static void _bt_interval_edges(FindSplitData *state,
      79             :                                SplitPoint **leftinterval, SplitPoint **rightinterval);
      80             : static inline int _bt_split_penalty(FindSplitData *state, SplitPoint *split);
      81             : static inline IndexTuple _bt_split_lastleft(FindSplitData *state,
      82             :                                             SplitPoint *split);
      83             : static inline IndexTuple _bt_split_firstright(FindSplitData *state,
      84             :                                               SplitPoint *split);
      85             : 
      86             : 
      87             : /*
      88             :  *  _bt_findsplitloc() -- find an appropriate place to split a page.
      89             :  *
      90             :  * The main goal here is to equalize the free space that will be on each
      91             :  * split page, *after accounting for the inserted tuple*.  (If we fail to
      92             :  * account for it, we might find ourselves with too little room on the page
      93             :  * that it needs to go into!)
      94             :  *
      95             :  * If the page is the rightmost page on its level, we instead try to arrange
      96             :  * to leave the left split page fillfactor% full.  In this way, when we are
      97             :  * inserting successively increasing keys (consider sequences, timestamps,
      98             :  * etc) we will end up with a tree whose pages are about fillfactor% full,
      99             :  * instead of the 50% full result that we'd get without this special case.
     100             :  * This is the same as nbtsort.c produces for a newly-created tree.  Note
     101             :  * that leaf and nonleaf pages use different fillfactors.  Note also that
     102             :  * there are a number of further special cases where fillfactor is not
     103             :  * applied in the standard way.
     104             :  *
     105             :  * We are passed the intended insert position of the new tuple, expressed as
     106             :  * the offsetnumber of the tuple it must go in front of (this could be
     107             :  * maxoff+1 if the tuple is to go at the end).  The new tuple itself is also
     108             :  * passed, since it's needed to give some weight to how effective suffix
     109             :  * truncation will be.  The implementation picks the split point that
     110             :  * maximizes the effectiveness of suffix truncation from a small list of
     111             :  * alternative candidate split points that leave each side of the split with
     112             :  * about the same share of free space.  Suffix truncation is secondary to
     113             :  * equalizing free space, except in cases with large numbers of duplicates.
     114             :  * Note that it is always assumed that caller goes on to perform truncation,
     115             :  * even with pg_upgrade'd indexes where that isn't actually the case
     116             :  * (!heapkeyspace indexes).  See nbtree/README for more information about
     117             :  * suffix truncation.
     118             :  *
     119             :  * We return the index of the first existing tuple that should go on the
     120             :  * righthand page (which is called firstrightoff), plus a boolean
     121             :  * indicating whether the new tuple goes on the left or right page.  You
     122             :  * can think of the returned state as a point _between_ two adjacent data
     123             :  * items (laftleft and firstright data items) on an imaginary version of
     124             :  * origpage that already includes newitem.  The bool is necessary to
     125             :  * disambiguate the case where firstrightoff == newitemoff (i.e. it is
     126             :  * sometimes needed to determine if the firstright tuple for the split is
     127             :  * newitem rather than the tuple from origpage at offset firstrightoff).
     128             :  */
     129             : OffsetNumber
     130       46348 : _bt_findsplitloc(Relation rel,
     131             :                  Page origpage,
     132             :                  OffsetNumber newitemoff,
     133             :                  Size newitemsz,
     134             :                  IndexTuple newitem,
     135             :                  bool *newitemonleft)
     136             : {
     137             :     BTPageOpaque opaque;
     138             :     int         leftspace,
     139             :                 rightspace,
     140             :                 olddataitemstotal,
     141             :                 olddataitemstoleft,
     142             :                 perfectpenalty,
     143             :                 leaffillfactor;
     144             :     FindSplitData state;
     145             :     FindSplitStrat strategy;
     146             :     ItemId      itemid;
     147             :     OffsetNumber offnum,
     148             :                 maxoff,
     149             :                 firstrightoff;
     150             :     double      fillfactormult;
     151             :     bool        usemult;
     152             :     SplitPoint  leftpage,
     153             :                 rightpage;
     154             : 
     155       46348 :     opaque = (BTPageOpaque) PageGetSpecialPointer(origpage);
     156       46348 :     maxoff = PageGetMaxOffsetNumber(origpage);
     157             : 
     158             :     /* Total free space available on a btree page, after fixed overhead */
     159       46348 :     leftspace = rightspace =
     160       46348 :         PageGetPageSize(origpage) - SizeOfPageHeaderData -
     161             :         MAXALIGN(sizeof(BTPageOpaqueData));
     162             : 
     163             :     /* The right page will have the same high key as the old page */
     164       46348 :     if (!P_RIGHTMOST(opaque))
     165             :     {
     166       25432 :         itemid = PageGetItemId(origpage, P_HIKEY);
     167       25432 :         rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
     168             :                              sizeof(ItemIdData));
     169             :     }
     170             : 
     171             :     /* Count up total space in data items before actually scanning 'em */
     172       46348 :     olddataitemstotal = rightspace - (int) PageGetExactFreeSpace(origpage);
     173       46348 :     leaffillfactor = BTGetFillFactor(rel);
     174             : 
     175             :     /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
     176       46348 :     newitemsz += sizeof(ItemIdData);
     177       46348 :     state.rel = rel;
     178       46348 :     state.origpage = origpage;
     179       46348 :     state.newitem = newitem;
     180       46348 :     state.newitemsz = newitemsz;
     181       46348 :     state.is_leaf = P_ISLEAF(opaque);
     182       46348 :     state.is_rightmost = P_RIGHTMOST(opaque);
     183       46348 :     state.leftspace = leftspace;
     184       46348 :     state.rightspace = rightspace;
     185       46348 :     state.olddataitemstotal = olddataitemstotal;
     186       46348 :     state.minfirstrightsz = SIZE_MAX;
     187       46348 :     state.newitemoff = newitemoff;
     188             : 
     189             :     /* newitem cannot be a posting list item */
     190             :     Assert(!BTreeTupleIsPosting(newitem));
     191             : 
     192             :     /*
     193             :      * maxsplits should never exceed maxoff because there will be at most as
     194             :      * many candidate split points as there are points _between_ tuples, once
     195             :      * you imagine that the new item is already on the original page (the
     196             :      * final number of splits may be slightly lower because not all points
     197             :      * between tuples will be legal).
     198             :      */
     199       46348 :     state.maxsplits = maxoff;
     200       46348 :     state.splits = palloc(sizeof(SplitPoint) * state.maxsplits);
     201       46348 :     state.nsplits = 0;
     202             : 
     203             :     /*
     204             :      * Scan through the data items and calculate space usage for a split at
     205             :      * each possible position
     206             :      */
     207       46348 :     olddataitemstoleft = 0;
     208             : 
     209    13940140 :     for (offnum = P_FIRSTDATAKEY(opaque);
     210             :          offnum <= maxoff;
     211    13893792 :          offnum = OffsetNumberNext(offnum))
     212             :     {
     213             :         Size        itemsz;
     214             : 
     215    13893792 :         itemid = PageGetItemId(origpage, offnum);
     216    13893792 :         itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
     217             : 
     218             :         /*
     219             :          * When item offset number is not newitemoff, neither side of the
     220             :          * split can be newitem.  Record a split after the previous data item
     221             :          * from original page, but before the current data item from original
     222             :          * page. (_bt_recsplitloc() will reject the split when there are no
     223             :          * previous items, which we rely on.)
     224             :          */
     225    13893792 :         if (offnum < newitemoff)
     226    11579100 :             _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
     227     2314692 :         else if (offnum > newitemoff)
     228     2287848 :             _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
     229             :         else
     230             :         {
     231             :             /*
     232             :              * Record a split after all "offnum < newitemoff" original page
     233             :              * data items, but before newitem
     234             :              */
     235       26844 :             _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
     236             : 
     237             :             /*
     238             :              * Record a split after newitem, but before data item from
     239             :              * original page at offset newitemoff/current offset
     240             :              */
     241       26844 :             _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
     242             :         }
     243             : 
     244    13893792 :         olddataitemstoleft += itemsz;
     245             :     }
     246             : 
     247             :     /*
     248             :      * Record a split after all original page data items, but before newitem.
     249             :      * (Though only when it's possible that newitem will end up alone on new
     250             :      * right page.)
     251             :      */
     252             :     Assert(olddataitemstoleft == olddataitemstotal);
     253       46348 :     if (newitemoff > maxoff)
     254       19504 :         _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0);
     255             : 
     256             :     /*
     257             :      * I believe it is not possible to fail to find a feasible split, but just
     258             :      * in case ...
     259             :      */
     260       46348 :     if (state.nsplits == 0)
     261           0 :         elog(ERROR, "could not find a feasible split point for index \"%s\"",
     262             :              RelationGetRelationName(rel));
     263             : 
     264             :     /*
     265             :      * Start search for a split point among list of legal split points.  Give
     266             :      * primary consideration to equalizing available free space in each half
     267             :      * of the split initially (start with default strategy), while applying
     268             :      * rightmost and split-after-new-item optimizations where appropriate.
     269             :      * Either of the two other fallback strategies may be required for cases
     270             :      * with a large number of duplicates around the original/space-optimal
     271             :      * split point.
     272             :      *
     273             :      * Default strategy gives some weight to suffix truncation in deciding a
     274             :      * split point on leaf pages.  It attempts to select a split point where a
     275             :      * distinguishing attribute appears earlier in the new high key for the
     276             :      * left side of the split, in order to maximize the number of trailing
     277             :      * attributes that can be truncated away.  Only candidate split points
     278             :      * that imply an acceptable balance of free space on each side are
     279             :      * considered.  See _bt_defaultinterval().
     280             :      */
     281       46348 :     if (!state.is_leaf)
     282             :     {
     283             :         /* fillfactormult only used on rightmost page */
     284         100 :         usemult = state.is_rightmost;
     285         100 :         fillfactormult = BTREE_NONLEAF_FILLFACTOR / 100.0;
     286             :     }
     287       46248 :     else if (state.is_rightmost)
     288             :     {
     289             :         /* Rightmost leaf page --  fillfactormult always used */
     290       20816 :         usemult = true;
     291       20816 :         fillfactormult = leaffillfactor / 100.0;
     292             :     }
     293       25432 :     else if (_bt_afternewitemoff(&state, maxoff, leaffillfactor, &usemult))
     294             :     {
     295             :         /*
     296             :          * New item inserted at rightmost point among a localized grouping on
     297             :          * a leaf page -- apply "split after new item" optimization, either by
     298             :          * applying leaf fillfactor multiplier, or by choosing the exact split
     299             :          * point that leaves newitem as lastleft. (usemult is set for us.)
     300             :          */
     301       11342 :         if (usemult)
     302             :         {
     303             :             /* fillfactormult should be set based on leaf fillfactor */
     304        6030 :             fillfactormult = leaffillfactor / 100.0;
     305             :         }
     306             :         else
     307             :         {
     308             :             /* find precise split point after newitemoff */
     309      713404 :             for (int i = 0; i < state.nsplits; i++)
     310             :             {
     311      713404 :                 SplitPoint *split = state.splits + i;
     312             : 
     313      713404 :                 if (split->newitemonleft &&
     314        5312 :                     newitemoff == split->firstrightoff)
     315             :                 {
     316        5312 :                     pfree(state.splits);
     317        5312 :                     *newitemonleft = true;
     318        5312 :                     return newitemoff;
     319             :                 }
     320             :             }
     321             : 
     322             :             /*
     323             :              * Cannot legally split after newitemoff; proceed with split
     324             :              * without using fillfactor multiplier.  This is defensive, and
     325             :              * should never be needed in practice.
     326             :              */
     327           0 :             fillfactormult = 0.50;
     328             :         }
     329             :     }
     330             :     else
     331             :     {
     332             :         /* Other leaf page.  50:50 page split. */
     333       14090 :         usemult = false;
     334             :         /* fillfactormult not used, but be tidy */
     335       14090 :         fillfactormult = 0.50;
     336             :     }
     337             : 
     338             :     /*
     339             :      * Save leftmost and rightmost splits for page before original ordinal
     340             :      * sort order is lost by delta/fillfactormult sort
     341             :      */
     342       41036 :     leftpage = state.splits[0];
     343       41036 :     rightpage = state.splits[state.nsplits - 1];
     344             : 
     345             :     /* Give split points a fillfactormult-wise delta, and sort on deltas */
     346       41036 :     _bt_deltasortsplits(&state, fillfactormult, usemult);
     347             : 
     348             :     /* Determine split interval for default strategy */
     349       41036 :     state.interval = _bt_defaultinterval(&state);
     350             : 
     351             :     /*
     352             :      * Determine if default strategy/split interval will produce a
     353             :      * sufficiently distinguishing split, or if we should change strategies.
     354             :      * Alternative strategies change the range of split points that are
     355             :      * considered acceptable (split interval), and possibly change
     356             :      * fillfactormult, in order to deal with pages with a large number of
     357             :      * duplicates gracefully.
     358             :      *
     359             :      * Pass low and high splits for the entire page (actually, they're for an
     360             :      * imaginary version of the page that includes newitem).  These are used
     361             :      * when the initial split interval encloses split points that are full of
     362             :      * duplicates, and we need to consider if it's even possible to avoid
     363             :      * appending a heap TID.
     364             :      */
     365       41036 :     perfectpenalty = _bt_strategy(&state, &leftpage, &rightpage, &strategy);
     366             : 
     367       41036 :     if (strategy == SPLIT_DEFAULT)
     368             :     {
     369             :         /*
     370             :          * Default strategy worked out (always works out with internal page).
     371             :          * Original split interval still stands.
     372             :          */
     373             :     }
     374             : 
     375             :     /*
     376             :      * Many duplicates strategy is used when a heap TID would otherwise be
     377             :      * appended, but the page isn't completely full of logical duplicates.
     378             :      *
     379             :      * The split interval is widened to include all legal candidate split
     380             :      * points.  There might be a few as two distinct values in the whole-page
     381             :      * split interval, though it's also possible that most of the values on
     382             :      * the page are unique.  The final split point will either be to the
     383             :      * immediate left or to the immediate right of the group of duplicate
     384             :      * tuples that enclose the first/delta-optimal split point (perfect
     385             :      * penalty was set so that the lowest delta split point that avoids
     386             :      * appending a heap TID will be chosen).  Maximizing the number of
     387             :      * attributes that can be truncated away is not a goal of the many
     388             :      * duplicates strategy.
     389             :      *
     390             :      * Single value strategy is used when it is impossible to avoid appending
     391             :      * a heap TID.  It arranges to leave the left page very full.  This
     392             :      * maximizes space utilization in cases where tuples with the same
     393             :      * attribute values span many pages.  Newly inserted duplicates will tend
     394             :      * to have higher heap TID values, so we'll end up splitting to the right
     395             :      * consistently.  (Single value strategy is harmless though not
     396             :      * particularly useful with !heapkeyspace indexes.)
     397             :      */
     398        9254 :     else if (strategy == SPLIT_MANY_DUPLICATES)
     399             :     {
     400             :         Assert(state.is_leaf);
     401             :         /* Shouldn't try to truncate away extra user attributes */
     402             :         Assert(perfectpenalty ==
     403             :                IndexRelationGetNumberOfKeyAttributes(state.rel));
     404             :         /* No need to resort splits -- no change in fillfactormult/deltas */
     405         814 :         state.interval = state.nsplits;
     406             :     }
     407        8440 :     else if (strategy == SPLIT_SINGLE_VALUE)
     408             :     {
     409             :         Assert(state.is_leaf);
     410             :         /* Split near the end of the page */
     411        8440 :         usemult = true;
     412        8440 :         fillfactormult = BTREE_SINGLEVAL_FILLFACTOR / 100.0;
     413             :         /* Resort split points with new delta */
     414        8440 :         _bt_deltasortsplits(&state, fillfactormult, usemult);
     415             :         /* Appending a heap TID is unavoidable, so interval of 1 is fine */
     416        8440 :         state.interval = 1;
     417             :     }
     418             : 
     419             :     /*
     420             :      * Search among acceptable split points (using final split interval) for
     421             :      * the entry that has the lowest penalty, and is therefore expected to
     422             :      * maximize fan-out.  Sets *newitemonleft for us.
     423             :      */
     424       41036 :     firstrightoff = _bt_bestsplitloc(&state, perfectpenalty, newitemonleft,
     425             :                                      strategy);
     426       41036 :     pfree(state.splits);
     427             : 
     428       41036 :     return firstrightoff;
     429             : }
     430             : 
     431             : /*
     432             :  * Subroutine to record a particular point between two tuples (possibly the
     433             :  * new item) on page (ie, combination of firstrightoff and newitemonleft
     434             :  * settings) in *state for later analysis.  This is also a convenient point to
     435             :  * check if the split is legal (if it isn't, it won't be recorded).
     436             :  *
     437             :  * firstrightoff is the offset of the first item on the original page that
     438             :  * goes to the right page, and firstrightofforigpagetuplesz is the size of
     439             :  * that tuple.  firstrightoff can be > max offset, which means that all the
     440             :  * old items go to the left page and only the new item goes to the right page.
     441             :  * We don't actually use firstrightofforigpagetuplesz in that case (actually,
     442             :  * we don't use it for _any_ split where the firstright tuple happens to be
     443             :  * newitem).
     444             :  *
     445             :  * olddataitemstoleft is the total size of all old items to the left of the
     446             :  * split point that is recorded here when legal.  Should not include
     447             :  * newitemsz, since that is handled here.
     448             :  */
     449             : static void
     450    13940140 : _bt_recsplitloc(FindSplitData *state,
     451             :                 OffsetNumber firstrightoff,
     452             :                 bool newitemonleft,
     453             :                 int olddataitemstoleft,
     454             :                 Size firstrightofforigpagetuplesz)
     455             : {
     456             :     int16       leftfree,
     457             :                 rightfree;
     458             :     Size        firstrightsz;
     459    13940140 :     Size        postingsz = 0;
     460             :     bool        newitemisfirstright;
     461             : 
     462             :     /* Is the new item going to be split point's firstright tuple? */
     463    14013332 :     newitemisfirstright = (firstrightoff == state->newitemoff &&
     464       73192 :                            !newitemonleft);
     465             : 
     466    13940140 :     if (newitemisfirstright)
     467       46348 :         firstrightsz = state->newitemsz;
     468             :     else
     469             :     {
     470    13893792 :         firstrightsz = firstrightofforigpagetuplesz;
     471             : 
     472             :         /*
     473             :          * Calculate suffix truncation space saving when firstright tuple is a
     474             :          * posting list tuple, though only when the tuple is over 64 bytes
     475             :          * including line pointer overhead (arbitrary).  This avoids accessing
     476             :          * the tuple in cases where its posting list must be very small (if
     477             :          * tuple has one at all).
     478             :          *
     479             :          * Note: We don't do this in the case where firstright tuple is
     480             :          * newitem, since newitem cannot have a posting list.
     481             :          */
     482    13893792 :         if (state->is_leaf && firstrightsz > 64)
     483             :         {
     484             :             ItemId      itemid;
     485             :             IndexTuple  newhighkey;
     486             : 
     487       74382 :             itemid = PageGetItemId(state->origpage, firstrightoff);
     488       74382 :             newhighkey = (IndexTuple) PageGetItem(state->origpage, itemid);
     489             : 
     490       74382 :             if (BTreeTupleIsPosting(newhighkey))
     491        5440 :                 postingsz = IndexTupleSize(newhighkey) -
     492        2720 :                     BTreeTupleGetPostingOffset(newhighkey);
     493             :         }
     494             :     }
     495             : 
     496             :     /* Account for all the old tuples */
     497    13940140 :     leftfree = state->leftspace - olddataitemstoleft;
     498    27880280 :     rightfree = state->rightspace -
     499    13940140 :         (state->olddataitemstotal - olddataitemstoleft);
     500             : 
     501             :     /*
     502             :      * The first item on the right page becomes the high key of the left page;
     503             :      * therefore it counts against left space as well as right space (we
     504             :      * cannot assume that suffix truncation will make it any smaller).  When
     505             :      * index has included attributes, then those attributes of left page high
     506             :      * key will be truncated leaving that page with slightly more free space.
     507             :      * However, that shouldn't affect our ability to find valid split
     508             :      * location, since we err in the direction of being pessimistic about free
     509             :      * space on the left half.  Besides, even when suffix truncation of
     510             :      * non-TID attributes occurs, the new high key often won't even be a
     511             :      * single MAXALIGN() quantum smaller than the firstright tuple it's based
     512             :      * on.
     513             :      *
     514             :      * If we are on the leaf level, assume that suffix truncation cannot avoid
     515             :      * adding a heap TID to the left half's new high key when splitting at the
     516             :      * leaf level.  In practice the new high key will often be smaller and
     517             :      * will rarely be larger, but conservatively assume the worst case.  We do
     518             :      * go to the trouble of subtracting away posting list overhead, though
     519             :      * only when it looks like it will make an appreciable difference.
     520             :      * (Posting lists are the only case where truncation will typically make
     521             :      * the final high key far smaller than firstright, so being a bit more
     522             :      * precise there noticeably improves the balance of free space.)
     523             :      */
     524    13940140 :     if (state->is_leaf)
     525    13936776 :         leftfree -= (int16) (firstrightsz +
     526    13936776 :                              MAXALIGN(sizeof(ItemPointerData)) -
     527             :                              postingsz);
     528             :     else
     529        3364 :         leftfree -= (int16) firstrightsz;
     530             : 
     531             :     /* account for the new item */
     532    13940140 :     if (newitemonleft)
     533     2314692 :         leftfree -= (int16) state->newitemsz;
     534             :     else
     535    11625448 :         rightfree -= (int16) state->newitemsz;
     536             : 
     537             :     /*
     538             :      * If we are not on the leaf level, we will be able to discard the key
     539             :      * data from the first item that winds up on the right page.
     540             :      */
     541    13940140 :     if (!state->is_leaf)
     542        3364 :         rightfree += (int16) firstrightsz -
     543             :             (int16) (MAXALIGN(sizeof(IndexTupleData)) + sizeof(ItemIdData));
     544             : 
     545             :     /* Record split if legal */
     546    13940140 :     if (leftfree >= 0 && rightfree >= 0)
     547             :     {
     548             :         Assert(state->nsplits < state->maxsplits);
     549             : 
     550             :         /* Determine smallest firstright tuple size among legal splits */
     551    13838394 :         state->minfirstrightsz = Min(state->minfirstrightsz, firstrightsz);
     552             : 
     553    13838394 :         state->splits[state->nsplits].curdelta = 0;
     554    13838394 :         state->splits[state->nsplits].leftfree = leftfree;
     555    13838394 :         state->splits[state->nsplits].rightfree = rightfree;
     556    13838394 :         state->splits[state->nsplits].firstrightoff = firstrightoff;
     557    13838394 :         state->splits[state->nsplits].newitemonleft = newitemonleft;
     558    13838394 :         state->nsplits++;
     559             :     }
     560    13940140 : }
     561             : 
     562             : /*
     563             :  * Subroutine to assign space deltas to materialized array of candidate split
     564             :  * points based on current fillfactor, and to sort array using that fillfactor
     565             :  */
     566             : static void
     567       49476 : _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
     568             :                     bool usemult)
     569             : {
     570    14738220 :     for (int i = 0; i < state->nsplits; i++)
     571             :     {
     572    14688744 :         SplitPoint *split = state->splits + i;
     573             :         int16       delta;
     574             : 
     575    14688744 :         if (usemult)
     576    21526192 :             delta = fillfactormult * split->leftfree -
     577    10763096 :                 (1.0 - fillfactormult) * split->rightfree;
     578             :         else
     579     3925648 :             delta = split->leftfree - split->rightfree;
     580             : 
     581    14688744 :         if (delta < 0)
     582     2894380 :             delta = -delta;
     583             : 
     584             :         /* Save delta */
     585    14688744 :         split->curdelta = delta;
     586             :     }
     587             : 
     588       49476 :     qsort(state->splits, state->nsplits, sizeof(SplitPoint), _bt_splitcmp);
     589       49476 : }
     590             : 
     591             : /*
     592             :  * qsort-style comparator used by _bt_deltasortsplits()
     593             :  */
     594             : static int
     595   148319432 : _bt_splitcmp(const void *arg1, const void *arg2)
     596             : {
     597   148319432 :     SplitPoint *split1 = (SplitPoint *) arg1;
     598   148319432 :     SplitPoint *split2 = (SplitPoint *) arg2;
     599             : 
     600   148319432 :     if (split1->curdelta > split2->curdelta)
     601    54044694 :         return 1;
     602    94274738 :     if (split1->curdelta < split2->curdelta)
     603    94190716 :         return -1;
     604             : 
     605       84022 :     return 0;
     606             : }
     607             : 
     608             : /*
     609             :  * Subroutine to determine whether or not a non-rightmost leaf page should be
     610             :  * split immediately after the would-be original page offset for the
     611             :  * new/incoming tuple (or should have leaf fillfactor applied when new item is
     612             :  * to the right on original page).  This is appropriate when there is a
     613             :  * pattern of localized monotonically increasing insertions into a composite
     614             :  * index, where leading attribute values form local groupings, and we
     615             :  * anticipate further insertions of the same/current grouping (new item's
     616             :  * grouping) in the near future.  This can be thought of as a variation on
     617             :  * applying leaf fillfactor during rightmost leaf page splits, since cases
     618             :  * that benefit will converge on packing leaf pages leaffillfactor% full over
     619             :  * time.
     620             :  *
     621             :  * We may leave extra free space remaining on the rightmost page of a "most
     622             :  * significant column" grouping of tuples if that grouping never ends up
     623             :  * having future insertions that use the free space.  That effect is
     624             :  * self-limiting; a future grouping that becomes the "nearest on the right"
     625             :  * grouping of the affected grouping usually puts the extra free space to good
     626             :  * use.
     627             :  *
     628             :  * Caller uses optimization when routine returns true, though the exact action
     629             :  * taken by caller varies.  Caller uses original leaf page fillfactor in
     630             :  * standard way rather than using the new item offset directly when *usemult
     631             :  * was also set to true here.  Otherwise, caller applies optimization by
     632             :  * locating the legal split point that makes the new tuple the lastleft tuple
     633             :  * for the split.
     634             :  */
     635             : static bool
     636       25432 : _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
     637             :                     int leaffillfactor, bool *usemult)
     638             : {
     639             :     int16       nkeyatts;
     640             :     ItemId      itemid;
     641             :     IndexTuple  tup;
     642             :     int         keepnatts;
     643             : 
     644             :     Assert(state->is_leaf && !state->is_rightmost);
     645             : 
     646       25432 :     nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
     647             : 
     648             :     /* Single key indexes not considered here */
     649       25432 :     if (nkeyatts == 1)
     650         954 :         return false;
     651             : 
     652             :     /* Ascending insertion pattern never inferred when new item is first */
     653       24478 :     if (state->newitemoff == P_FIRSTKEY)
     654           2 :         return false;
     655             : 
     656             :     /*
     657             :      * Only apply optimization on pages with equisized tuples, since ordinal
     658             :      * keys are likely to be fixed-width.  Testing if the new tuple is
     659             :      * variable width directly might also work, but that fails to apply the
     660             :      * optimization to indexes with a numeric_ops attribute.
     661             :      *
     662             :      * Conclude that page has equisized tuples when the new item is the same
     663             :      * width as the smallest item observed during pass over page, and other
     664             :      * non-pivot tuples must be the same width as well.  (Note that the
     665             :      * possibly-truncated existing high key isn't counted in
     666             :      * olddataitemstotal, and must be subtracted from maxoff.)
     667             :      */
     668       24476 :     if (state->newitemsz != state->minfirstrightsz)
     669        3996 :         return false;
     670       20480 :     if (state->newitemsz * (maxoff - 1) != state->olddataitemstotal)
     671         136 :         return false;
     672             : 
     673             :     /*
     674             :      * Avoid applying optimization when tuples are wider than a tuple
     675             :      * consisting of two non-NULL int8/int64 attributes (or four non-NULL
     676             :      * int4/int32 attributes)
     677             :      */
     678       20344 :     if (state->newitemsz >
     679             :         MAXALIGN(sizeof(IndexTupleData) + sizeof(int64) * 2) +
     680             :         sizeof(ItemIdData))
     681           0 :         return false;
     682             : 
     683             :     /*
     684             :      * At least the first attribute's value must be equal to the corresponding
     685             :      * value in previous tuple to apply optimization.  New item cannot be a
     686             :      * duplicate, either.
     687             :      *
     688             :      * Handle case where new item is to the right of all items on the existing
     689             :      * page.  This is suggestive of monotonically increasing insertions in
     690             :      * itself, so the "heap TID adjacency" test is not applied here.
     691             :      */
     692       20344 :     if (state->newitemoff > maxoff)
     693             :     {
     694        4270 :         itemid = PageGetItemId(state->origpage, maxoff);
     695        4270 :         tup = (IndexTuple) PageGetItem(state->origpage, itemid);
     696        4270 :         keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
     697             : 
     698        4270 :         if (keepnatts > 1 && keepnatts <= nkeyatts)
     699             :         {
     700        3622 :             *usemult = true;
     701        3622 :             return true;
     702             :         }
     703             : 
     704         648 :         return false;
     705             :     }
     706             : 
     707             :     /*
     708             :      * "Low cardinality leading column, high cardinality suffix column"
     709             :      * indexes with a random insertion pattern (e.g., an index with a boolean
     710             :      * column, such as an index on '(book_is_in_print, book_isbn)') present us
     711             :      * with a risk of consistently misapplying the optimization.  We're
     712             :      * willing to accept very occasional misapplication of the optimization,
     713             :      * provided the cases where we get it wrong are rare and self-limiting.
     714             :      *
     715             :      * Heap TID adjacency strongly suggests that the item just to the left was
     716             :      * inserted very recently, which limits overapplication of the
     717             :      * optimization.  Besides, all inappropriate cases triggered here will
     718             :      * still split in the middle of the page on average.
     719             :      */
     720       16074 :     itemid = PageGetItemId(state->origpage, OffsetNumberPrev(state->newitemoff));
     721       16074 :     tup = (IndexTuple) PageGetItem(state->origpage, itemid);
     722             :     /* Do cheaper test first */
     723       16074 :     if (BTreeTupleIsPosting(tup) ||
     724       16074 :         !_bt_adjacenthtid(&tup->t_tid, &state->newitem->t_tid))
     725        3540 :         return false;
     726             :     /* Check same conditions as rightmost item case, too */
     727       12534 :     keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
     728             : 
     729       12534 :     if (keepnatts > 1 && keepnatts <= nkeyatts)
     730             :     {
     731        7720 :         double      interp = (double) state->newitemoff / ((double) maxoff + 1);
     732        7720 :         double      leaffillfactormult = (double) leaffillfactor / 100.0;
     733             : 
     734             :         /*
     735             :          * Don't allow caller to split after a new item when it will result in
     736             :          * a split point to the right of the point that a leaf fillfactor
     737             :          * split would use -- have caller apply leaf fillfactor instead
     738             :          */
     739        7720 :         *usemult = interp > leaffillfactormult;
     740             : 
     741        7720 :         return true;
     742             :     }
     743             : 
     744        4814 :     return false;
     745             : }
     746             : 
     747             : /*
     748             :  * Subroutine for determining if two heap TIDS are "adjacent".
     749             :  *
     750             :  * Adjacent means that the high TID is very likely to have been inserted into
     751             :  * heap relation immediately after the low TID, probably during the current
     752             :  * transaction.
     753             :  */
     754             : static bool
     755       16074 : _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid)
     756             : {
     757             :     BlockNumber lowblk,
     758             :                 highblk;
     759             : 
     760       16074 :     lowblk = ItemPointerGetBlockNumber(lowhtid);
     761       16074 :     highblk = ItemPointerGetBlockNumber(highhtid);
     762             : 
     763             :     /* Make optimistic assumption of adjacency when heap blocks match */
     764       16074 :     if (lowblk == highblk)
     765       12530 :         return true;
     766             : 
     767             :     /* When heap block one up, second offset should be FirstOffsetNumber */
     768        3544 :     if (lowblk + 1 == highblk &&
     769         768 :         ItemPointerGetOffsetNumber(highhtid) == FirstOffsetNumber)
     770           4 :         return true;
     771             : 
     772        3540 :     return false;
     773             : }
     774             : 
     775             : /*
     776             :  * Subroutine to find the "best" split point among candidate split points.
     777             :  * The best split point is the split point with the lowest penalty among split
     778             :  * points that fall within current/final split interval.  Penalty is an
     779             :  * abstract score, with a definition that varies depending on whether we're
     780             :  * splitting a leaf page or an internal page.  See _bt_split_penalty() for
     781             :  * details.
     782             :  *
     783             :  * "perfectpenalty" is assumed to be the lowest possible penalty among
     784             :  * candidate split points.  This allows us to return early without wasting
     785             :  * cycles on calculating the first differing attribute for all candidate
     786             :  * splits when that clearly cannot improve our choice (or when we only want a
     787             :  * minimally distinguishing split point, and don't want to make the split any
     788             :  * more unbalanced than is necessary).
     789             :  *
     790             :  * We return the index of the first existing tuple that should go on the right
     791             :  * page, plus a boolean indicating if new item is on left of split point.
     792             :  */
     793             : static OffsetNumber
     794       41036 : _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
     795             :                  bool *newitemonleft, FindSplitStrat strategy)
     796             : {
     797             :     int         bestpenalty,
     798             :                 lowsplit;
     799       41036 :     int         highsplit = Min(state->interval, state->nsplits);
     800             :     SplitPoint *final;
     801             : 
     802       41036 :     bestpenalty = INT_MAX;
     803       41036 :     lowsplit = 0;
     804      284994 :     for (int i = lowsplit; i < highsplit; i++)
     805             :     {
     806             :         int         penalty;
     807             : 
     808      284566 :         penalty = _bt_split_penalty(state, state->splits + i);
     809             : 
     810      284566 :         if (penalty < bestpenalty)
     811             :         {
     812       52628 :             bestpenalty = penalty;
     813       52628 :             lowsplit = i;
     814             :         }
     815             : 
     816      284566 :         if (penalty <= perfectpenalty)
     817       40608 :             break;
     818             :     }
     819             : 
     820       41036 :     final = &state->splits[lowsplit];
     821             : 
     822             :     /*
     823             :      * There is a risk that the "many duplicates" strategy will repeatedly do
     824             :      * the wrong thing when there are monotonically decreasing insertions to
     825             :      * the right of a large group of duplicates.   Repeated splits could leave
     826             :      * a succession of right half pages with free space that can never be
     827             :      * used.  This must be avoided.
     828             :      *
     829             :      * Consider the example of the leftmost page in a single integer attribute
     830             :      * NULLS FIRST index which is almost filled with NULLs.  Monotonically
     831             :      * decreasing integer insertions might cause the same leftmost page to
     832             :      * split repeatedly at the same point.  Each split derives its new high
     833             :      * key from the lowest current value to the immediate right of the large
     834             :      * group of NULLs, which will always be higher than all future integer
     835             :      * insertions, directing all future integer insertions to the same
     836             :      * leftmost page.
     837             :      */
     838       41036 :     if (strategy == SPLIT_MANY_DUPLICATES && !state->is_rightmost &&
     839         802 :         !final->newitemonleft && final->firstrightoff >= state->newitemoff &&
     840           0 :         final->firstrightoff < state->newitemoff + 9)
     841             :     {
     842             :         /*
     843             :          * Avoid the problem by performing a 50:50 split when the new item is
     844             :          * just to the right of the would-be "many duplicates" split point.
     845             :          * (Note that the test used for an insert that is "just to the right"
     846             :          * of the split point is conservative.)
     847             :          */
     848           0 :         final = &state->splits[0];
     849             :     }
     850             : 
     851       41036 :     *newitemonleft = final->newitemonleft;
     852       41036 :     return final->firstrightoff;
     853             : }
     854             : 
     855             : #define LEAF_SPLIT_DISTANCE         0.050
     856             : #define INTERNAL_SPLIT_DISTANCE     0.075
     857             : 
     858             : /*
     859             :  * Return a split interval to use for the default strategy.  This is a limit
     860             :  * on the number of candidate split points to give further consideration to.
     861             :  * Only a fraction of all candidate splits points (those located at the start
     862             :  * of the now-sorted splits array) fall within the split interval.  Split
     863             :  * interval is applied within _bt_bestsplitloc().
     864             :  *
     865             :  * Split interval represents an acceptable range of split points -- those that
     866             :  * have leftfree and rightfree values that are acceptably balanced.  The final
     867             :  * split point chosen is the split point with the lowest "penalty" among split
     868             :  * points in this split interval (unless we change our entire strategy, in
     869             :  * which case the interval also changes -- see _bt_strategy()).
     870             :  *
     871             :  * The "Prefix B-Trees" paper calls split interval sigma l for leaf splits,
     872             :  * and sigma b for internal ("branch") splits.  It's hard to provide a
     873             :  * theoretical justification for the size of the split interval, though it's
     874             :  * clear that a small split interval can make tuples on level L+1 much smaller
     875             :  * on average, without noticeably affecting space utilization on level L.
     876             :  * (Note that the way that we calculate split interval might need to change if
     877             :  * suffix truncation is taught to truncate tuples "within" the last
     878             :  * attribute/datum for data types like text, which is more or less how it is
     879             :  * assumed to work in the paper.)
     880             :  */
     881             : static int
     882       41036 : _bt_defaultinterval(FindSplitData *state)
     883             : {
     884             :     SplitPoint *spaceoptimal;
     885             :     int16       tolerance,
     886             :                 lowleftfree,
     887             :                 lowrightfree,
     888             :                 highleftfree,
     889             :                 highrightfree;
     890             : 
     891             :     /*
     892             :      * Determine leftfree and rightfree values that are higher and lower than
     893             :      * we're willing to tolerate.  Note that the final split interval will be
     894             :      * about 10% of nsplits in the common case where all non-pivot tuples
     895             :      * (data items) from a leaf page are uniformly sized.  We're a bit more
     896             :      * aggressive when splitting internal pages.
     897             :      */
     898       41036 :     if (state->is_leaf)
     899       40936 :         tolerance = state->olddataitemstotal * LEAF_SPLIT_DISTANCE;
     900             :     else
     901         100 :         tolerance = state->olddataitemstotal * INTERNAL_SPLIT_DISTANCE;
     902             : 
     903             :     /* First candidate split point is the most evenly balanced */
     904       41036 :     spaceoptimal = state->splits;
     905       41036 :     lowleftfree = spaceoptimal->leftfree - tolerance;
     906       41036 :     lowrightfree = spaceoptimal->rightfree - tolerance;
     907       41036 :     highleftfree = spaceoptimal->leftfree + tolerance;
     908       41036 :     highrightfree = spaceoptimal->rightfree + tolerance;
     909             : 
     910             :     /*
     911             :      * Iterate through split points, starting from the split immediately after
     912             :      * 'spaceoptimal'.  Find the first split point that divides free space so
     913             :      * unevenly that including it in the split interval would be unacceptable.
     914             :      */
     915     1232926 :     for (int i = 1; i < state->nsplits; i++)
     916             :     {
     917     1232926 :         SplitPoint *split = state->splits + i;
     918             : 
     919             :         /* Cannot use curdelta here, since its value is often weighted */
     920     1232926 :         if (split->leftfree < lowleftfree || split->rightfree < lowrightfree ||
     921     1192554 :             split->leftfree > highleftfree || split->rightfree > highrightfree)
     922       41036 :             return i;
     923             :     }
     924             : 
     925           0 :     return state->nsplits;
     926             : }
     927             : 
     928             : /*
     929             :  * Subroutine to decide whether split should use default strategy/initial
     930             :  * split interval, or whether it should finish splitting the page using
     931             :  * alternative strategies (this is only possible with leaf pages).
     932             :  *
     933             :  * Caller uses alternative strategy (or sticks with default strategy) based
     934             :  * on how *strategy is set here.  Return value is "perfect penalty", which is
     935             :  * passed to _bt_bestsplitloc() as a final constraint on how far caller is
     936             :  * willing to go to avoid appending a heap TID when using the many duplicates
     937             :  * strategy (it also saves _bt_bestsplitloc() useless cycles).
     938             :  */
     939             : static int
     940       41036 : _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
     941             :              SplitPoint *rightpage, FindSplitStrat *strategy)
     942             : {
     943             :     IndexTuple  leftmost,
     944             :                 rightmost;
     945             :     SplitPoint *leftinterval,
     946             :                *rightinterval;
     947             :     int         perfectpenalty;
     948       41036 :     int         indnkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
     949             : 
     950             :     /* Assume that alternative strategy won't be used for now */
     951       41036 :     *strategy = SPLIT_DEFAULT;
     952             : 
     953             :     /*
     954             :      * Use smallest observed firstright item size for entire page (actually,
     955             :      * entire imaginary version of page that includes newitem) as perfect
     956             :      * penalty on internal pages.  This can save cycles in the common case
     957             :      * where most or all splits (not just splits within interval) have
     958             :      * firstright tuples that are the same size.
     959             :      */
     960       41036 :     if (!state->is_leaf)
     961         100 :         return state->minfirstrightsz;
     962             : 
     963             :     /*
     964             :      * Use leftmost and rightmost tuples from leftmost and rightmost splits in
     965             :      * current split interval
     966             :      */
     967       40936 :     _bt_interval_edges(state, &leftinterval, &rightinterval);
     968       40936 :     leftmost = _bt_split_lastleft(state, leftinterval);
     969       40936 :     rightmost = _bt_split_firstright(state, rightinterval);
     970             : 
     971             :     /*
     972             :      * If initial split interval can produce a split point that will at least
     973             :      * avoid appending a heap TID in new high key, we're done.  Finish split
     974             :      * with default strategy and initial split interval.
     975             :      */
     976       40936 :     perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
     977       40936 :     if (perfectpenalty <= indnkeyatts)
     978       31654 :         return perfectpenalty;
     979             : 
     980             :     /*
     981             :      * Work out how caller should finish split when even their "perfect"
     982             :      * penalty for initial/default split interval indicates that the interval
     983             :      * does not contain even a single split that avoids appending a heap TID.
     984             :      *
     985             :      * Use the leftmost split's lastleft tuple and the rightmost split's
     986             :      * firstright tuple to assess every possible split.
     987             :      */
     988        9282 :     leftmost = _bt_split_lastleft(state, leftpage);
     989        9282 :     rightmost = _bt_split_firstright(state, rightpage);
     990             : 
     991             :     /*
     992             :      * If page (including new item) has many duplicates but is not entirely
     993             :      * full of duplicates, a many duplicates strategy split will be performed.
     994             :      * If page is entirely full of duplicates, a single value strategy split
     995             :      * will be performed.
     996             :      */
     997        9282 :     perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
     998        9282 :     if (perfectpenalty <= indnkeyatts)
     999             :     {
    1000         814 :         *strategy = SPLIT_MANY_DUPLICATES;
    1001             : 
    1002             :         /*
    1003             :          * Many duplicates strategy should split at either side the group of
    1004             :          * duplicates that enclose the delta-optimal split point.  Return
    1005             :          * indnkeyatts rather than the true perfect penalty to make that
    1006             :          * happen.  (If perfectpenalty was returned here then low cardinality
    1007             :          * composite indexes could have continual unbalanced splits.)
    1008             :          *
    1009             :          * Note that caller won't go through with a many duplicates split in
    1010             :          * rare cases where it looks like there are ever-decreasing insertions
    1011             :          * to the immediate right of the split point.  This must happen just
    1012             :          * before a final decision is made, within _bt_bestsplitloc().
    1013             :          */
    1014         814 :         return indnkeyatts;
    1015             :     }
    1016             : 
    1017             :     /*
    1018             :      * Single value strategy is only appropriate with ever-increasing heap
    1019             :      * TIDs; otherwise, original default strategy split should proceed to
    1020             :      * avoid pathological performance.  Use page high key to infer if this is
    1021             :      * the rightmost page among pages that store the same duplicate value.
    1022             :      * This should not prevent insertions of heap TIDs that are slightly out
    1023             :      * of order from using single value strategy, since that's expected with
    1024             :      * concurrent inserters of the same duplicate value.
    1025             :      */
    1026        8468 :     else if (state->is_rightmost)
    1027        8012 :         *strategy = SPLIT_SINGLE_VALUE;
    1028             :     else
    1029             :     {
    1030             :         ItemId      itemid;
    1031             :         IndexTuple  hikey;
    1032             : 
    1033         456 :         itemid = PageGetItemId(state->origpage, P_HIKEY);
    1034         456 :         hikey = (IndexTuple) PageGetItem(state->origpage, itemid);
    1035         456 :         perfectpenalty = _bt_keep_natts_fast(state->rel, hikey,
    1036             :                                              state->newitem);
    1037         456 :         if (perfectpenalty <= indnkeyatts)
    1038         428 :             *strategy = SPLIT_SINGLE_VALUE;
    1039             :         else
    1040             :         {
    1041             :             /*
    1042             :              * Have caller finish split using default strategy, since page
    1043             :              * does not appear to be the rightmost page for duplicates of the
    1044             :              * value the page is filled with
    1045             :              */
    1046             :         }
    1047             :     }
    1048             : 
    1049        8468 :     return perfectpenalty;
    1050             : }
    1051             : 
    1052             : /*
    1053             :  * Subroutine to locate leftmost and rightmost splits for current/default
    1054             :  * split interval.  Note that it will be the same split iff there is only one
    1055             :  * split in interval.
    1056             :  */
    1057             : static void
    1058       40936 : _bt_interval_edges(FindSplitData *state, SplitPoint **leftinterval,
    1059             :                    SplitPoint **rightinterval)
    1060             : {
    1061       40936 :     int         highsplit = Min(state->interval, state->nsplits);
    1062             :     SplitPoint *deltaoptimal;
    1063             : 
    1064       40936 :     deltaoptimal = state->splits;
    1065       40936 :     *leftinterval = NULL;
    1066       40936 :     *rightinterval = NULL;
    1067             : 
    1068             :     /*
    1069             :      * Delta is an absolute distance to optimal split point, so both the
    1070             :      * leftmost and rightmost split point will usually be at the end of the
    1071             :      * array
    1072             :      */
    1073       81870 :     for (int i = highsplit - 1; i >= 0; i--)
    1074             :     {
    1075       81870 :         SplitPoint *distant = state->splits + i;
    1076             : 
    1077       81870 :         if (distant->firstrightoff < deltaoptimal->firstrightoff)
    1078             :         {
    1079       40848 :             if (*leftinterval == NULL)
    1080       40786 :                 *leftinterval = distant;
    1081             :         }
    1082       41022 :         else if (distant->firstrightoff > deltaoptimal->firstrightoff)
    1083             :         {
    1084       40864 :             if (*rightinterval == NULL)
    1085       40790 :                 *rightinterval = distant;
    1086             :         }
    1087         158 :         else if (!distant->newitemonleft && deltaoptimal->newitemonleft)
    1088             :         {
    1089             :             /*
    1090             :              * "incoming tuple will become firstright" (distant) is to the
    1091             :              * left of "incoming tuple will become lastleft" (delta-optimal)
    1092             :              */
    1093             :             Assert(distant->firstrightoff == state->newitemoff);
    1094           0 :             if (*leftinterval == NULL)
    1095           0 :                 *leftinterval = distant;
    1096             :         }
    1097         158 :         else if (distant->newitemonleft && !deltaoptimal->newitemonleft)
    1098             :         {
    1099             :             /*
    1100             :              * "incoming tuple will become lastleft" (distant) is to the right
    1101             :              * of "incoming tuple will become firstright" (delta-optimal)
    1102             :              */
    1103             :             Assert(distant->firstrightoff == state->newitemoff);
    1104          12 :             if (*rightinterval == NULL)
    1105           6 :                 *rightinterval = distant;
    1106             :         }
    1107             :         else
    1108             :         {
    1109             :             /* There was only one or two splits in initial split interval */
    1110             :             Assert(distant == deltaoptimal);
    1111         152 :             if (*leftinterval == NULL)
    1112         150 :                 *leftinterval = distant;
    1113         152 :             if (*rightinterval == NULL)
    1114         140 :                 *rightinterval = distant;
    1115             :         }
    1116             : 
    1117       81870 :         if (*leftinterval && *rightinterval)
    1118       40936 :             return;
    1119             :     }
    1120             : 
    1121             :     Assert(false);
    1122             : }
    1123             : 
    1124             : /*
    1125             :  * Subroutine to find penalty for caller's candidate split point.
    1126             :  *
    1127             :  * On leaf pages, penalty is the attribute number that distinguishes each side
    1128             :  * of a split.  It's the last attribute that needs to be included in new high
    1129             :  * key for left page.  It can be greater than the number of key attributes in
    1130             :  * cases where a heap TID will need to be appended during truncation.
    1131             :  *
    1132             :  * On internal pages, penalty is simply the size of the firstright tuple for
    1133             :  * the split (including line pointer overhead).  This tuple will become the
    1134             :  * new high key for the left page.
    1135             :  */
    1136             : static inline int
    1137      284566 : _bt_split_penalty(FindSplitData *state, SplitPoint *split)
    1138             : {
    1139             :     IndexTuple  lastleft;
    1140             :     IndexTuple  firstright;
    1141             : 
    1142      284566 :     if (!state->is_leaf)
    1143             :     {
    1144             :         ItemId      itemid;
    1145             : 
    1146         110 :         if (!split->newitemonleft &&
    1147         110 :             split->firstrightoff == state->newitemoff)
    1148           0 :             return state->newitemsz;
    1149             : 
    1150         110 :         itemid = PageGetItemId(state->origpage, split->firstrightoff);
    1151             : 
    1152         110 :         return MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
    1153             :     }
    1154             : 
    1155      284456 :     lastleft = _bt_split_lastleft(state, split);
    1156      284456 :     firstright = _bt_split_firstright(state, split);
    1157             : 
    1158      284456 :     return _bt_keep_natts_fast(state->rel, lastleft, firstright);
    1159             : }
    1160             : 
    1161             : /*
    1162             :  * Subroutine to get a lastleft IndexTuple for a split point
    1163             :  */
    1164             : static inline IndexTuple
    1165      334674 : _bt_split_lastleft(FindSplitData *state, SplitPoint *split)
    1166             : {
    1167             :     ItemId      itemid;
    1168             : 
    1169      334674 :     if (split->newitemonleft && split->firstrightoff == state->newitemoff)
    1170         542 :         return state->newitem;
    1171             : 
    1172      334132 :     itemid = PageGetItemId(state->origpage,
    1173             :                            OffsetNumberPrev(split->firstrightoff));
    1174      334132 :     return (IndexTuple) PageGetItem(state->origpage, itemid);
    1175             : }
    1176             : 
    1177             : /*
    1178             :  * Subroutine to get a firstright IndexTuple for a split point
    1179             :  */
    1180             : static inline IndexTuple
    1181      334674 : _bt_split_firstright(FindSplitData *state, SplitPoint *split)
    1182             : {
    1183             :     ItemId      itemid;
    1184             : 
    1185      334674 :     if (!split->newitemonleft && split->firstrightoff == state->newitemoff)
    1186         542 :         return state->newitem;
    1187             : 
    1188      334132 :     itemid = PageGetItemId(state->origpage, split->firstrightoff);
    1189      334132 :     return (IndexTuple) PageGetItem(state->origpage, itemid);
    1190             : }

Generated by: LCOV version 1.13