LCOV - code coverage report
Current view: top level - src/backend/access/gist - gistsplit.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 181 233 77.7 %
Date: 2024-03-28 12:10:52 Functions: 8 10 80.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * gistsplit.c
       4             :  *    Multi-column page splitting algorithm
       5             :  *
       6             :  * This file is concerned with making good page-split decisions in multi-column
       7             :  * GiST indexes.  The opclass-specific picksplit functions can only be expected
       8             :  * to produce answers based on a single column.  We first run the picksplit
       9             :  * function for column 1; then, if there are more columns, we check if any of
      10             :  * the tuples are "don't cares" so far as the column 1 split is concerned
      11             :  * (that is, they could go to either side for no additional penalty).  If so,
      12             :  * we try to redistribute those tuples on the basis of the next column.
      13             :  * Repeat till we're out of columns.
      14             :  *
      15             :  * gistSplitByKey() is the entry point to this file.
      16             :  *
      17             :  *
      18             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      19             :  * Portions Copyright (c) 1994, Regents of the University of California
      20             :  *
      21             :  * IDENTIFICATION
      22             :  *    src/backend/access/gist/gistsplit.c
      23             :  *
      24             :  *-------------------------------------------------------------------------
      25             :  */
      26             : #include "postgres.h"
      27             : 
      28             : #include "access/gist_private.h"
      29             : #include "utils/rel.h"
      30             : 
      31             : typedef struct
      32             : {
      33             :     OffsetNumber *entries;
      34             :     int         len;
      35             :     Datum      *attr;
      36             :     bool       *isnull;
      37             :     bool       *dontcare;
      38             : } GistSplitUnion;
      39             : 
      40             : 
      41             : /*
      42             :  * Form unions of subkeys in itvec[] entries listed in gsvp->entries[],
      43             :  * ignoring any tuples that are marked in gsvp->dontcare[].  Subroutine for
      44             :  * gistunionsubkey.
      45             :  */
      46             : static void
      47        5848 : gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
      48             :                    GistSplitUnion *gsvp)
      49             : {
      50             :     IndexTuple *cleanedItVec;
      51             :     int         i,
      52        5848 :                 cleanedLen = 0;
      53             : 
      54        5848 :     cleanedItVec = (IndexTuple *) palloc(sizeof(IndexTuple) * gsvp->len);
      55             : 
      56      281740 :     for (i = 0; i < gsvp->len; i++)
      57             :     {
      58      275892 :         if (gsvp->dontcare && gsvp->dontcare[gsvp->entries[i]])
      59           0 :             continue;
      60             : 
      61      275892 :         cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1];
      62             :     }
      63             : 
      64        5848 :     gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen,
      65             :                        gsvp->attr, gsvp->isnull);
      66             : 
      67        5848 :     pfree(cleanedItVec);
      68        5848 : }
      69             : 
      70             : /*
      71             :  * Recompute unions of left- and right-side subkeys after a page split,
      72             :  * ignoring any tuples that are marked in spl->spl_dontcare[].
      73             :  *
      74             :  * Note: we always recompute union keys for all index columns.  In some cases
      75             :  * this might represent duplicate work for the leftmost column(s), but it's
      76             :  * not safe to assume that "zero penalty to move a tuple" means "the union
      77             :  * key doesn't change at all".  Penalty functions aren't 100% accurate.
      78             :  */
      79             : static void
      80        2924 : gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl)
      81             : {
      82             :     GistSplitUnion gsvp;
      83             : 
      84        2924 :     gsvp.dontcare = spl->spl_dontcare;
      85             : 
      86        2924 :     gsvp.entries = spl->splitVector.spl_left;
      87        2924 :     gsvp.len = spl->splitVector.spl_nleft;
      88        2924 :     gsvp.attr = spl->spl_lattr;
      89        2924 :     gsvp.isnull = spl->spl_lisnull;
      90             : 
      91        2924 :     gistunionsubkeyvec(giststate, itvec, &gsvp);
      92             : 
      93        2924 :     gsvp.entries = spl->splitVector.spl_right;
      94        2924 :     gsvp.len = spl->splitVector.spl_nright;
      95        2924 :     gsvp.attr = spl->spl_rattr;
      96        2924 :     gsvp.isnull = spl->spl_risnull;
      97             : 
      98        2924 :     gistunionsubkeyvec(giststate, itvec, &gsvp);
      99        2924 : }
     100             : 
     101             : /*
     102             :  * Find tuples that are "don't cares", that is could be moved to the other
     103             :  * side of the split with zero penalty, so far as the attno column is
     104             :  * concerned.
     105             :  *
     106             :  * Don't-care tuples are marked by setting the corresponding entry in
     107             :  * spl->spl_dontcare[] to "true".  Caller must have initialized that array
     108             :  * to zeroes.
     109             :  *
     110             :  * Returns number of don't-cares found.
     111             :  */
     112             : static int
     113        2528 : findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec,
     114             :               GistSplitVector *spl, int attno)
     115             : {
     116             :     int         i;
     117             :     GISTENTRY   entry;
     118        2528 :     int         NumDontCare = 0;
     119             : 
     120             :     /*
     121             :      * First, search the left-side tuples to see if any have zero penalty to
     122             :      * be added to the right-side union key.
     123             :      *
     124             :      * attno column is known all-not-null (see gistSplitByKey), so we need not
     125             :      * check for nulls
     126             :      */
     127        2528 :     gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL,
     128             :                   (OffsetNumber) 0, false);
     129      124286 :     for (i = 0; i < spl->splitVector.spl_nleft; i++)
     130             :     {
     131      121758 :         int         j = spl->splitVector.spl_left[i];
     132      121758 :         float       penalty = gistpenalty(giststate, attno, &entry, false,
     133      121758 :                                           &valvec[j], false);
     134             : 
     135      121758 :         if (penalty == 0.0)
     136             :         {
     137         184 :             spl->spl_dontcare[j] = true;
     138         184 :             NumDontCare++;
     139             :         }
     140             :     }
     141             : 
     142             :     /* And conversely for the right-side tuples */
     143        2528 :     gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL,
     144             :                   (OffsetNumber) 0, false);
     145      126806 :     for (i = 0; i < spl->splitVector.spl_nright; i++)
     146             :     {
     147      124278 :         int         j = spl->splitVector.spl_right[i];
     148      124278 :         float       penalty = gistpenalty(giststate, attno, &entry, false,
     149      124278 :                                           &valvec[j], false);
     150             : 
     151      124278 :         if (penalty == 0.0)
     152             :         {
     153         184 :             spl->spl_dontcare[j] = true;
     154         184 :             NumDontCare++;
     155             :         }
     156             :     }
     157             : 
     158        2528 :     return NumDontCare;
     159             : }
     160             : 
     161             : /*
     162             :  * Remove tuples that are marked don't-cares from the tuple index array a[]
     163             :  * of length *len.  This is applied separately to the spl_left and spl_right
     164             :  * arrays.
     165             :  */
     166             : static void
     167           4 : removeDontCares(OffsetNumber *a, int *len, const bool *dontcare)
     168             : {
     169             :     int         origlen,
     170             :                 newlen,
     171             :                 i;
     172             :     OffsetNumber *curwpos;
     173             : 
     174           4 :     origlen = newlen = *len;
     175           4 :     curwpos = a;
     176         376 :     for (i = 0; i < origlen; i++)
     177             :     {
     178         372 :         OffsetNumber ai = a[i];
     179             : 
     180         372 :         if (dontcare[ai] == false)
     181             :         {
     182             :             /* re-emit item into a[] */
     183           4 :             *curwpos = ai;
     184           4 :             curwpos++;
     185             :         }
     186             :         else
     187         368 :             newlen--;
     188             :     }
     189             : 
     190           4 :     *len = newlen;
     191           4 : }
     192             : 
     193             : /*
     194             :  * Place a single don't-care tuple into either the left or right side of the
     195             :  * split, according to which has least penalty for merging the tuple into
     196             :  * the previously-computed union keys.  We need consider only columns starting
     197             :  * at attno.
     198             :  */
     199             : static void
     200           0 : placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v,
     201             :          IndexTuple itup, OffsetNumber off, int attno)
     202             : {
     203             :     GISTENTRY   identry[INDEX_MAX_KEYS];
     204             :     bool        isnull[INDEX_MAX_KEYS];
     205           0 :     bool        toLeft = true;
     206             : 
     207           0 :     gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0,
     208             :                       identry, isnull);
     209             : 
     210           0 :     for (; attno < giststate->nonLeafTupdesc->natts; attno++)
     211             :     {
     212             :         float       lpenalty,
     213             :                     rpenalty;
     214             :         GISTENTRY   entry;
     215             : 
     216           0 :         gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, false);
     217           0 :         lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno],
     218           0 :                                identry + attno, isnull[attno]);
     219           0 :         gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, false);
     220           0 :         rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno],
     221           0 :                                identry + attno, isnull[attno]);
     222             : 
     223           0 :         if (lpenalty != rpenalty)
     224             :         {
     225           0 :             if (lpenalty > rpenalty)
     226           0 :                 toLeft = false;
     227           0 :             break;
     228             :         }
     229             :     }
     230             : 
     231           0 :     if (toLeft)
     232           0 :         v->splitVector.spl_left[v->splitVector.spl_nleft++] = off;
     233             :     else
     234           0 :         v->splitVector.spl_right[v->splitVector.spl_nright++] = off;
     235           0 : }
     236             : 
     237             : #define SWAPVAR( s, d, t ) \
     238             : do {    \
     239             :     (t) = (s); \
     240             :     (s) = (d); \
     241             :     (d) = (t); \
     242             : } while(0)
     243             : 
     244             : /*
     245             :  * Clean up when we did a secondary split but the user-defined PickSplit
     246             :  * method didn't support it (leaving spl_ldatum_exists or spl_rdatum_exists
     247             :  * true).
     248             :  *
     249             :  * We consider whether to swap the left and right outputs of the secondary
     250             :  * split; this can be worthwhile if the penalty for merging those tuples into
     251             :  * the previously chosen sets is less that way.
     252             :  *
     253             :  * In any case we must update the union datums for the current column by
     254             :  * adding in the previous union keys (oldL/oldR), since the user-defined
     255             :  * PickSplit method didn't do so.
     256             :  */
     257             : static void
     258           2 : supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno,
     259             :                       GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
     260             : {
     261           2 :     bool        leaveOnLeft = true,
     262             :                 tmpBool;
     263             :     GISTENTRY   entryL,
     264             :                 entryR,
     265             :                 entrySL,
     266             :                 entrySR;
     267             : 
     268           2 :     gistentryinit(entryL, oldL, r, NULL, 0, false);
     269           2 :     gistentryinit(entryR, oldR, r, NULL, 0, false);
     270           2 :     gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false);
     271           2 :     gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false);
     272             : 
     273           2 :     if (sv->spl_ldatum_exists && sv->spl_rdatum_exists)
     274           2 :     {
     275             :         float       penalty1,
     276             :                     penalty2;
     277             : 
     278           2 :         penalty1 = gistpenalty(giststate, attno, &entryL, false, &entrySL, false) +
     279           2 :             gistpenalty(giststate, attno, &entryR, false, &entrySR, false);
     280           2 :         penalty2 = gistpenalty(giststate, attno, &entryL, false, &entrySR, false) +
     281           2 :             gistpenalty(giststate, attno, &entryR, false, &entrySL, false);
     282             : 
     283           2 :         if (penalty1 > penalty2)
     284           0 :             leaveOnLeft = false;
     285             :     }
     286             :     else
     287             :     {
     288           0 :         GISTENTRY  *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR;
     289             :         float       penalty1,
     290             :                     penalty2;
     291             : 
     292             :         /*
     293             :          * There is only one previously defined union, so we just choose swap
     294             :          * or not by lowest penalty for that side.  We can only get here if a
     295             :          * secondary split happened to have all NULLs in its column in the
     296             :          * tuples that the outer recursion level had assigned to one side.
     297             :          * (Note that the null checks in gistSplitByKey don't prevent the
     298             :          * case, because they'll only be checking tuples that were considered
     299             :          * don't-cares at the outer recursion level, not the tuples that went
     300             :          * into determining the passed-down left and right union keys.)
     301             :          */
     302           0 :         penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false);
     303           0 :         penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false);
     304             : 
     305           0 :         if (penalty1 < penalty2)
     306           0 :             leaveOnLeft = sv->spl_ldatum_exists;
     307             :         else
     308           0 :             leaveOnLeft = sv->spl_rdatum_exists;
     309             :     }
     310             : 
     311           2 :     if (leaveOnLeft == false)
     312             :     {
     313             :         /*
     314             :          * swap left and right
     315             :          */
     316             :         OffsetNumber *off,
     317             :                     noff;
     318             :         Datum       datum;
     319             : 
     320           0 :         SWAPVAR(sv->spl_left, sv->spl_right, off);
     321           0 :         SWAPVAR(sv->spl_nleft, sv->spl_nright, noff);
     322           0 :         SWAPVAR(sv->spl_ldatum, sv->spl_rdatum, datum);
     323           0 :         gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false);
     324           0 :         gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false);
     325             :     }
     326             : 
     327           2 :     if (sv->spl_ldatum_exists)
     328           2 :         gistMakeUnionKey(giststate, attno, &entryL, false, &entrySL, false,
     329             :                          &sv->spl_ldatum, &tmpBool);
     330             : 
     331           2 :     if (sv->spl_rdatum_exists)
     332           2 :         gistMakeUnionKey(giststate, attno, &entryR, false, &entrySR, false,
     333             :                          &sv->spl_rdatum, &tmpBool);
     334             : 
     335           2 :     sv->spl_ldatum_exists = sv->spl_rdatum_exists = false;
     336           2 : }
     337             : 
     338             : /*
     339             :  * Trivial picksplit implementation. Function called only
     340             :  * if user-defined picksplit puts all keys on the same side of the split.
     341             :  * That is a bug of user-defined picksplit but we don't want to fail.
     342             :  */
     343             : static void
     344          94 : genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC *v, int attno)
     345             : {
     346             :     OffsetNumber i,
     347             :                 maxoff;
     348             :     int         nbytes;
     349             :     GistEntryVector *evec;
     350             : 
     351          94 :     maxoff = entryvec->n - 1;
     352             : 
     353          94 :     nbytes = (maxoff + 2) * sizeof(OffsetNumber);
     354             : 
     355          94 :     v->spl_left = (OffsetNumber *) palloc(nbytes);
     356          94 :     v->spl_right = (OffsetNumber *) palloc(nbytes);
     357          94 :     v->spl_nleft = v->spl_nright = 0;
     358             : 
     359       26162 :     for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
     360             :     {
     361       26068 :         if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
     362             :         {
     363       13034 :             v->spl_left[v->spl_nleft] = i;
     364       13034 :             v->spl_nleft++;
     365             :         }
     366             :         else
     367             :         {
     368       13034 :             v->spl_right[v->spl_nright] = i;
     369       13034 :             v->spl_nright++;
     370             :         }
     371             :     }
     372             : 
     373             :     /*
     374             :      * Form union datums for each side
     375             :      */
     376          94 :     evec = palloc(sizeof(GISTENTRY) * entryvec->n + GEVHDRSZ);
     377             : 
     378          94 :     evec->n = v->spl_nleft;
     379          94 :     memcpy(evec->vector, entryvec->vector + FirstOffsetNumber,
     380          94 :            sizeof(GISTENTRY) * evec->n);
     381          94 :     v->spl_ldatum = FunctionCall2Coll(&giststate->unionFn[attno],
     382             :                                       giststate->supportCollation[attno],
     383             :                                       PointerGetDatum(evec),
     384             :                                       PointerGetDatum(&nbytes));
     385             : 
     386          94 :     evec->n = v->spl_nright;
     387          94 :     memcpy(evec->vector, entryvec->vector + FirstOffsetNumber + v->spl_nleft,
     388          94 :            sizeof(GISTENTRY) * evec->n);
     389          94 :     v->spl_rdatum = FunctionCall2Coll(&giststate->unionFn[attno],
     390             :                                       giststate->supportCollation[attno],
     391             :                                       PointerGetDatum(evec),
     392             :                                       PointerGetDatum(&nbytes));
     393          94 : }
     394             : 
     395             : /*
     396             :  * Calls user picksplit method for attno column to split tuples into
     397             :  * two vectors.
     398             :  *
     399             :  * Returns false if split is complete (there are no more index columns, or
     400             :  * there is no need to consider them because split is optimal already).
     401             :  *
     402             :  * Returns true and v->spl_dontcare = NULL if the picksplit result is
     403             :  * degenerate (all tuples seem to be don't-cares), so we should just
     404             :  * disregard this column and split on the next column(s) instead.
     405             :  *
     406             :  * Returns true and v->spl_dontcare != NULL if there are don't-care tuples
     407             :  * that could be relocated based on the next column(s).  The don't-care
     408             :  * tuples have been removed from the split and must be reinserted by caller.
     409             :  * There is at least one non-don't-care tuple on each side of the split,
     410             :  * and union keys for all columns are updated to include just those tuples.
     411             :  *
     412             :  * A true result implies there is at least one more index column.
     413             :  */
     414             : static bool
     415       26472 : gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v,
     416             :                   IndexTuple *itup, int len, GISTSTATE *giststate)
     417             : {
     418       26472 :     GIST_SPLITVEC *sv = &v->splitVector;
     419             : 
     420             :     /*
     421             :      * Prepare spl_ldatum/spl_rdatum/spl_ldatum_exists/spl_rdatum_exists in
     422             :      * case we are doing a secondary split (see comments in gist.h).
     423             :      */
     424       26472 :     sv->spl_ldatum_exists = !(v->spl_lisnull[attno]);
     425       26472 :     sv->spl_rdatum_exists = !(v->spl_risnull[attno]);
     426       26472 :     sv->spl_ldatum = v->spl_lattr[attno];
     427       26472 :     sv->spl_rdatum = v->spl_rattr[attno];
     428             : 
     429             :     /*
     430             :      * Let the opclass-specific PickSplit method do its thing.  Note that at
     431             :      * this point we know there are no null keys in the entryvec.
     432             :      */
     433       26472 :     FunctionCall2Coll(&giststate->picksplitFn[attno],
     434             :                       giststate->supportCollation[attno],
     435             :                       PointerGetDatum(entryvec),
     436             :                       PointerGetDatum(sv));
     437             : 
     438       26472 :     if (sv->spl_nleft == 0 || sv->spl_nright == 0)
     439             :     {
     440             :         /*
     441             :          * User-defined picksplit failed to create an actual split, ie it put
     442             :          * everything on the same side.  Complain but cope.
     443             :          */
     444          94 :         ereport(DEBUG1,
     445             :                 (errcode(ERRCODE_INTERNAL_ERROR),
     446             :                  errmsg("picksplit method for column %d of index \"%s\" failed",
     447             :                         attno + 1, RelationGetRelationName(r)),
     448             :                  errhint("The index is not optimal. To optimize it, contact a developer, or try to use the column as the second one in the CREATE INDEX command.")));
     449             : 
     450             :         /*
     451             :          * Reinit GIST_SPLITVEC. Although these fields are not used by
     452             :          * genericPickSplit(), set them up for further processing
     453             :          */
     454          94 :         sv->spl_ldatum_exists = !(v->spl_lisnull[attno]);
     455          94 :         sv->spl_rdatum_exists = !(v->spl_risnull[attno]);
     456          94 :         sv->spl_ldatum = v->spl_lattr[attno];
     457          94 :         sv->spl_rdatum = v->spl_rattr[attno];
     458             : 
     459             :         /* Do a generic split */
     460          94 :         genericPickSplit(giststate, entryvec, sv, attno);
     461             :     }
     462             :     else
     463             :     {
     464             :         /* hack for compatibility with old picksplit API */
     465       26378 :         if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber)
     466           0 :             sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1);
     467       26378 :         if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
     468           0 :             sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
     469             :     }
     470             : 
     471             :     /* Clean up if PickSplit didn't take care of a secondary split */
     472       26472 :     if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
     473           2 :         supportSecondarySplit(r, giststate, attno, sv,
     474             :                               v->spl_lattr[attno], v->spl_rattr[attno]);
     475             : 
     476             :     /* emit union datums computed by PickSplit back to v arrays */
     477       26472 :     v->spl_lattr[attno] = sv->spl_ldatum;
     478       26472 :     v->spl_rattr[attno] = sv->spl_rdatum;
     479       26472 :     v->spl_lisnull[attno] = false;
     480       26472 :     v->spl_risnull[attno] = false;
     481             : 
     482             :     /*
     483             :      * If index columns remain, then consider whether we can improve the split
     484             :      * by using them.
     485             :      */
     486       26472 :     v->spl_dontcare = NULL;
     487             : 
     488       26472 :     if (attno + 1 < giststate->nonLeafTupdesc->natts)
     489             :     {
     490             :         int         NumDontCare;
     491             : 
     492             :         /*
     493             :          * Make a quick check to see if left and right union keys are equal;
     494             :          * if so, the split is certainly degenerate, so tell caller to
     495             :          * re-split with the next column.
     496             :          */
     497        2564 :         if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum))
     498          36 :             return true;
     499             : 
     500             :         /*
     501             :          * Locate don't-care tuples, if any.  If there are none, the split is
     502             :          * optimal, so just fall out and return false.
     503             :          */
     504        2528 :         v->spl_dontcare = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
     505             : 
     506        2528 :         NumDontCare = findDontCares(r, giststate, entryvec->vector, v, attno);
     507             : 
     508        2528 :         if (NumDontCare > 0)
     509             :         {
     510             :             /*
     511             :              * Remove don't-cares from spl_left[] and spl_right[].
     512             :              */
     513           2 :             removeDontCares(sv->spl_left, &sv->spl_nleft, v->spl_dontcare);
     514           2 :             removeDontCares(sv->spl_right, &sv->spl_nright, v->spl_dontcare);
     515             : 
     516             :             /*
     517             :              * If all tuples on either side were don't-cares, the split is
     518             :              * degenerate, and we're best off to ignore it and split on the
     519             :              * next column.  (We used to try to press on with a secondary
     520             :              * split by forcing a random tuple on each side to be treated as
     521             :              * non-don't-care, but it seems unlikely that that technique
     522             :              * really gives a better result.  Note that we don't want to try a
     523             :              * secondary split with empty left or right primary split sides,
     524             :              * because then there is no union key on that side for the
     525             :              * PickSplit function to try to expand, so it can have no good
     526             :              * figure of merit for what it's doing.  Also note that this check
     527             :              * ensures we can't produce a bogus one-side-only split in the
     528             :              * NumDontCare == 1 special case below.)
     529             :              */
     530           2 :             if (sv->spl_nleft == 0 || sv->spl_nright == 0)
     531             :             {
     532           0 :                 v->spl_dontcare = NULL;
     533           0 :                 return true;
     534             :             }
     535             : 
     536             :             /*
     537             :              * Recompute union keys, considering only non-don't-care tuples.
     538             :              * NOTE: this will set union keys for remaining index columns,
     539             :              * which will cause later calls of gistUserPicksplit to pass those
     540             :              * values down to user-defined PickSplit methods with
     541             :              * spl_ldatum_exists/spl_rdatum_exists set true.
     542             :              */
     543           2 :             gistunionsubkey(giststate, itup, v);
     544             : 
     545           2 :             if (NumDontCare == 1)
     546             :             {
     547             :                 /*
     548             :                  * If there's only one don't-care tuple then we can't do a
     549             :                  * PickSplit on it, so just choose whether to send it left or
     550             :                  * right by comparing penalties.  We needed the
     551             :                  * gistunionsubkey step anyway so that we have appropriate
     552             :                  * union keys for figuring the penalties.
     553             :                  */
     554             :                 OffsetNumber toMove;
     555             : 
     556             :                 /* find it ... */
     557           0 :                 for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++)
     558             :                 {
     559           0 :                     if (v->spl_dontcare[toMove])
     560           0 :                         break;
     561             :                 }
     562             :                 Assert(toMove < entryvec->n);
     563             : 
     564             :                 /* ... and assign it to cheaper side */
     565           0 :                 placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
     566             : 
     567             :                 /*
     568             :                  * At this point the union keys are wrong, but we don't care
     569             :                  * because we're done splitting.  The outermost recursion
     570             :                  * level of gistSplitByKey will fix things before returning.
     571             :                  */
     572             :             }
     573             :             else
     574           2 :                 return true;
     575             :         }
     576             :     }
     577             : 
     578       26434 :     return false;
     579             : }
     580             : 
     581             : /*
     582             :  * simply split page in half
     583             :  */
     584             : static void
     585           0 : gistSplitHalf(GIST_SPLITVEC *v, int len)
     586             : {
     587             :     int         i;
     588             : 
     589           0 :     v->spl_nright = v->spl_nleft = 0;
     590           0 :     v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
     591           0 :     v->spl_right = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
     592           0 :     for (i = 1; i <= len; i++)
     593           0 :         if (i < len / 2)
     594           0 :             v->spl_right[v->spl_nright++] = i;
     595             :         else
     596           0 :             v->spl_left[v->spl_nleft++] = i;
     597             : 
     598             :     /* we need not compute union keys, caller took care of it */
     599           0 : }
     600             : 
     601             : /*
     602             :  * gistSplitByKey: main entry point for page-splitting algorithm
     603             :  *
     604             :  * r: index relation
     605             :  * page: page being split
     606             :  * itup: array of IndexTuples to be processed
     607             :  * len: number of IndexTuples to be processed (must be at least 2)
     608             :  * giststate: additional info about index
     609             :  * v: working state and output area
     610             :  * attno: column we are working on (zero-based index)
     611             :  *
     612             :  * Outside caller must initialize v->spl_lisnull and v->spl_risnull arrays
     613             :  * to all-true.  On return, spl_left/spl_nleft contain indexes of tuples
     614             :  * to go left, spl_right/spl_nright contain indexes of tuples to go right,
     615             :  * spl_lattr/spl_lisnull contain left-side union key values, and
     616             :  * spl_rattr/spl_risnull contain right-side union key values.  Other fields
     617             :  * in this struct are workspace for this file.
     618             :  *
     619             :  * Outside caller must pass zero for attno.  The function may internally
     620             :  * recurse to the next column by passing attno+1.
     621             :  */
     622             : void
     623       26830 : gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
     624             :                GISTSTATE *giststate, GistSplitVector *v, int attno)
     625             : {
     626             :     GistEntryVector *entryvec;
     627             :     OffsetNumber *offNullTuples;
     628       26830 :     int         nOffNullTuples = 0;
     629             :     int         i;
     630             : 
     631             :     /* generate the item array, and identify tuples with null keys */
     632             :     /* note that entryvec->vector[0] goes unused in this code */
     633       26830 :     entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
     634       26830 :     entryvec->n = len + 1;
     635       26830 :     offNullTuples = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
     636             : 
     637     2310738 :     for (i = 1; i <= len; i++)
     638             :     {
     639             :         Datum       datum;
     640             :         bool        IsNull;
     641             : 
     642     2283908 :         datum = index_getattr(itup[i - 1], attno + 1, giststate->leafTupdesc,
     643             :                               &IsNull);
     644     2283908 :         gistdentryinit(giststate, attno, &(entryvec->vector[i]),
     645             :                        datum, r, page, i,
     646             :                        false, IsNull);
     647     2283908 :         if (IsNull)
     648        1912 :             offNullTuples[nOffNullTuples++] = i;
     649             :     }
     650             : 
     651       26830 :     if (nOffNullTuples == len)
     652             :     {
     653             :         /*
     654             :          * Corner case: All keys in attno column are null, so just transfer
     655             :          * our attention to the next column.  If there's no next column, just
     656             :          * split page in half.
     657             :          */
     658           0 :         v->spl_risnull[attno] = v->spl_lisnull[attno] = true;
     659             : 
     660           0 :         if (attno + 1 < giststate->nonLeafTupdesc->natts)
     661           0 :             gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
     662             :         else
     663           0 :             gistSplitHalf(&v->splitVector, len);
     664             :     }
     665       26830 :     else if (nOffNullTuples > 0)
     666             :     {
     667         358 :         int         j = 0;
     668             : 
     669             :         /*
     670             :          * We don't want to mix NULL and not-NULL keys on one page, so split
     671             :          * nulls to right page and not-nulls to left.
     672             :          */
     673         358 :         v->splitVector.spl_right = offNullTuples;
     674         358 :         v->splitVector.spl_nright = nOffNullTuples;
     675         358 :         v->spl_risnull[attno] = true;
     676             : 
     677         358 :         v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
     678         358 :         v->splitVector.spl_nleft = 0;
     679       23514 :         for (i = 1; i <= len; i++)
     680       23156 :             if (j < v->splitVector.spl_nright && offNullTuples[j] == i)
     681        1912 :                 j++;
     682             :             else
     683       21244 :                 v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
     684             : 
     685             :         /* Compute union keys, unless outer recursion level will handle it */
     686         358 :         if (attno == 0 && giststate->nonLeafTupdesc->natts == 1)
     687             :         {
     688         356 :             v->spl_dontcare = NULL;
     689         356 :             gistunionsubkey(giststate, itup, v);
     690             :         }
     691             :     }
     692             :     else
     693             :     {
     694             :         /*
     695             :          * All keys are not-null, so apply user-defined PickSplit method
     696             :          */
     697       26472 :         if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate))
     698             :         {
     699             :             /*
     700             :              * Splitting on attno column is not optimal, so consider
     701             :              * redistributing don't-care tuples according to the next column
     702             :              */
     703             :             Assert(attno + 1 < giststate->nonLeafTupdesc->natts);
     704             : 
     705          38 :             if (v->spl_dontcare == NULL)
     706             :             {
     707             :                 /*
     708             :                  * This split was actually degenerate, so ignore it altogether
     709             :                  * and just split according to the next column.
     710             :                  */
     711          36 :                 gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
     712             :             }
     713             :             else
     714             :             {
     715             :                 /*
     716             :                  * Form an array of just the don't-care tuples to pass to a
     717             :                  * recursive invocation of this function for the next column.
     718             :                  */
     719           2 :                 IndexTuple *newitup = (IndexTuple *) palloc(len * sizeof(IndexTuple));
     720           2 :                 OffsetNumber *map = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
     721           2 :                 int         newlen = 0;
     722             :                 GIST_SPLITVEC backupSplit;
     723             : 
     724         374 :                 for (i = 0; i < len; i++)
     725             :                 {
     726         372 :                     if (v->spl_dontcare[i + 1])
     727             :                     {
     728         368 :                         newitup[newlen] = itup[i];
     729         368 :                         map[newlen] = i + 1;
     730         368 :                         newlen++;
     731             :                     }
     732             :                 }
     733             : 
     734             :                 Assert(newlen > 0);
     735             : 
     736             :                 /*
     737             :                  * Make a backup copy of v->splitVector, since the recursive
     738             :                  * call will overwrite that with its own result.
     739             :                  */
     740           2 :                 backupSplit = v->splitVector;
     741           2 :                 backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
     742           2 :                 memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft);
     743           2 :                 backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
     744           2 :                 memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright);
     745             : 
     746             :                 /* Recursively decide how to split the don't-care tuples */
     747           2 :                 gistSplitByKey(r, page, newitup, newlen, giststate, v, attno + 1);
     748             : 
     749             :                 /* Merge result of subsplit with non-don't-care tuples */
     750         186 :                 for (i = 0; i < v->splitVector.spl_nleft; i++)
     751         184 :                     backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1];
     752         186 :                 for (i = 0; i < v->splitVector.spl_nright; i++)
     753         184 :                     backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1];
     754             : 
     755           2 :                 v->splitVector = backupSplit;
     756             :             }
     757             :         }
     758             :     }
     759             : 
     760             :     /*
     761             :      * If we're handling a multicolumn index, at the end of the recursion
     762             :      * recompute the left and right union datums for all index columns.  This
     763             :      * makes sure we hand back correct union datums in all corner cases,
     764             :      * including when we haven't processed all columns to start with, or when
     765             :      * a secondary split moved "don't care" tuples from one side to the other
     766             :      * (we really shouldn't assume that that didn't change the union datums).
     767             :      *
     768             :      * Note: when we're in an internal recursion (attno > 0), we do not worry
     769             :      * about whether the union datums we return with are sensible, since
     770             :      * calling levels won't care.  Also, in a single-column index, we expect
     771             :      * that PickSplit (or the special cases above) produced correct union
     772             :      * datums.
     773             :      */
     774       26830 :     if (attno == 0 && giststate->nonLeafTupdesc->natts > 1)
     775             :     {
     776        2566 :         v->spl_dontcare = NULL;
     777        2566 :         gistunionsubkey(giststate, itup, v);
     778             :     }
     779       26830 : }

Generated by: LCOV version 1.14