Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gistsplit.c
4 : * Multi-column page splitting algorithm
5 : *
6 : * This file is concerned with making good page-split decisions in multi-column
7 : * GiST indexes. The opclass-specific picksplit functions can only be expected
8 : * to produce answers based on a single column. We first run the picksplit
9 : * function for column 1; then, if there are more columns, we check if any of
10 : * the tuples are "don't cares" so far as the column 1 split is concerned
11 : * (that is, they could go to either side for no additional penalty). If so,
12 : * we try to redistribute those tuples on the basis of the next column.
13 : * Repeat till we're out of columns.
14 : *
15 : * gistSplitByKey() is the entry point to this file.
16 : *
17 : *
18 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
19 : * Portions Copyright (c) 1994, Regents of the University of California
20 : *
21 : * IDENTIFICATION
22 : * src/backend/access/gist/gistsplit.c
23 : *
24 : *-------------------------------------------------------------------------
25 : */
26 : #include "postgres.h"
27 :
28 : #include "access/gist_private.h"
29 : #include "utils/rel.h"
30 :
31 : typedef struct
32 : {
33 : OffsetNumber *entries;
34 : int len;
35 : Datum *attr;
36 : bool *isnull;
37 : bool *dontcare;
38 : } GistSplitUnion;
39 :
40 :
41 : /*
42 : * Form unions of subkeys in itvec[] entries listed in gsvp->entries[],
43 : * ignoring any tuples that are marked in gsvp->dontcare[]. Subroutine for
44 : * gistunionsubkey.
45 : */
46 : static void
47 5516 : gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
48 : GistSplitUnion *gsvp)
49 : {
50 : IndexTuple *cleanedItVec;
51 : int i,
52 5516 : cleanedLen = 0;
53 :
54 5516 : cleanedItVec = (IndexTuple *) palloc(sizeof(IndexTuple) * gsvp->len);
55 :
56 279630 : for (i = 0; i < gsvp->len; i++)
57 : {
58 274114 : if (gsvp->dontcare && gsvp->dontcare[gsvp->entries[i]])
59 0 : continue;
60 :
61 274114 : cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1];
62 : }
63 :
64 5516 : gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen,
65 : gsvp->attr, gsvp->isnull);
66 :
67 5516 : pfree(cleanedItVec);
68 5516 : }
69 :
70 : /*
71 : * Recompute unions of left- and right-side subkeys after a page split,
72 : * ignoring any tuples that are marked in spl->spl_dontcare[].
73 : *
74 : * Note: we always recompute union keys for all index columns. In some cases
75 : * this might represent duplicate work for the leftmost column(s), but it's
76 : * not safe to assume that "zero penalty to move a tuple" means "the union
77 : * key doesn't change at all". Penalty functions aren't 100% accurate.
78 : */
79 : static void
80 2758 : gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl)
81 : {
82 : GistSplitUnion gsvp;
83 :
84 2758 : gsvp.dontcare = spl->spl_dontcare;
85 :
86 2758 : gsvp.entries = spl->splitVector.spl_left;
87 2758 : gsvp.len = spl->splitVector.spl_nleft;
88 2758 : gsvp.attr = spl->spl_lattr;
89 2758 : gsvp.isnull = spl->spl_lisnull;
90 :
91 2758 : gistunionsubkeyvec(giststate, itvec, &gsvp);
92 :
93 2758 : gsvp.entries = spl->splitVector.spl_right;
94 2758 : gsvp.len = spl->splitVector.spl_nright;
95 2758 : gsvp.attr = spl->spl_rattr;
96 2758 : gsvp.isnull = spl->spl_risnull;
97 :
98 2758 : gistunionsubkeyvec(giststate, itvec, &gsvp);
99 2758 : }
100 :
101 : /*
102 : * Find tuples that are "don't cares", that is could be moved to the other
103 : * side of the split with zero penalty, so far as the attno column is
104 : * concerned.
105 : *
106 : * Don't-care tuples are marked by setting the corresponding entry in
107 : * spl->spl_dontcare[] to "true". Caller must have initialized that array
108 : * to zeroes.
109 : *
110 : * Returns number of don't-cares found.
111 : */
112 : static int
113 2532 : findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec,
114 : GistSplitVector *spl, int attno)
115 : {
116 : int i;
117 : GISTENTRY entry;
118 2532 : int NumDontCare = 0;
119 :
120 : /*
121 : * First, search the left-side tuples to see if any have zero penalty to
122 : * be added to the right-side union key.
123 : *
124 : * attno column is known all-not-null (see gistSplitByKey), so we need not
125 : * check for nulls
126 : */
127 2532 : gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL,
128 : (OffsetNumber) 0, false);
129 124662 : for (i = 0; i < spl->splitVector.spl_nleft; i++)
130 : {
131 122130 : int j = spl->splitVector.spl_left[i];
132 122130 : float penalty = gistpenalty(giststate, attno, &entry, false,
133 122130 : &valvec[j], false);
134 :
135 122130 : if (penalty == 0.0)
136 : {
137 554 : spl->spl_dontcare[j] = true;
138 554 : NumDontCare++;
139 : }
140 : }
141 :
142 : /* And conversely for the right-side tuples */
143 2532 : gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL,
144 : (OffsetNumber) 0, false);
145 127182 : for (i = 0; i < spl->splitVector.spl_nright; i++)
146 : {
147 124650 : int j = spl->splitVector.spl_right[i];
148 124650 : float penalty = gistpenalty(giststate, attno, &entry, false,
149 124650 : &valvec[j], false);
150 :
151 124650 : if (penalty == 0.0)
152 : {
153 554 : spl->spl_dontcare[j] = true;
154 554 : NumDontCare++;
155 : }
156 : }
157 :
158 2532 : return NumDontCare;
159 : }
160 :
161 : /*
162 : * Remove tuples that are marked don't-cares from the tuple index array a[]
163 : * of length *len. This is applied separately to the spl_left and spl_right
164 : * arrays.
165 : */
166 : static void
167 12 : removeDontCares(OffsetNumber *a, int *len, const bool *dontcare)
168 : {
169 : int origlen,
170 : newlen,
171 : i;
172 : OffsetNumber *curwpos;
173 :
174 12 : origlen = newlen = *len;
175 12 : curwpos = a;
176 1128 : for (i = 0; i < origlen; i++)
177 : {
178 1116 : OffsetNumber ai = a[i];
179 :
180 1116 : if (dontcare[ai] == false)
181 : {
182 : /* re-emit item into a[] */
183 8 : *curwpos = ai;
184 8 : curwpos++;
185 : }
186 : else
187 1108 : newlen--;
188 : }
189 :
190 12 : *len = newlen;
191 12 : }
192 :
193 : /*
194 : * Place a single don't-care tuple into either the left or right side of the
195 : * split, according to which has least penalty for merging the tuple into
196 : * the previously-computed union keys. We need consider only columns starting
197 : * at attno.
198 : */
199 : static void
200 0 : placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v,
201 : IndexTuple itup, OffsetNumber off, int attno)
202 : {
203 : GISTENTRY identry[INDEX_MAX_KEYS];
204 : bool isnull[INDEX_MAX_KEYS];
205 0 : bool toLeft = true;
206 :
207 0 : gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0,
208 : identry, isnull);
209 :
210 0 : for (; attno < giststate->nonLeafTupdesc->natts; attno++)
211 : {
212 : float lpenalty,
213 : rpenalty;
214 : GISTENTRY entry;
215 :
216 0 : gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, false);
217 0 : lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno],
218 0 : identry + attno, isnull[attno]);
219 0 : gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, false);
220 0 : rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno],
221 0 : identry + attno, isnull[attno]);
222 :
223 0 : if (lpenalty != rpenalty)
224 : {
225 0 : if (lpenalty > rpenalty)
226 0 : toLeft = false;
227 0 : break;
228 : }
229 : }
230 :
231 0 : if (toLeft)
232 0 : v->splitVector.spl_left[v->splitVector.spl_nleft++] = off;
233 : else
234 0 : v->splitVector.spl_right[v->splitVector.spl_nright++] = off;
235 0 : }
236 :
237 : #define SWAPVAR( s, d, t ) \
238 : do { \
239 : (t) = (s); \
240 : (s) = (d); \
241 : (d) = (t); \
242 : } while(0)
243 :
244 : /*
245 : * Clean up when we did a secondary split but the user-defined PickSplit
246 : * method didn't support it (leaving spl_ldatum_exists or spl_rdatum_exists
247 : * true).
248 : *
249 : * We consider whether to swap the left and right outputs of the secondary
250 : * split; this can be worthwhile if the penalty for merging those tuples into
251 : * the previously chosen sets is less that way.
252 : *
253 : * In any case we must update the union datums for the current column by
254 : * adding in the previous union keys (oldL/oldR), since the user-defined
255 : * PickSplit method didn't do so.
256 : */
257 : static void
258 2 : supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno,
259 : GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
260 : {
261 2 : bool leaveOnLeft = true,
262 : tmpBool;
263 : GISTENTRY entryL,
264 : entryR,
265 : entrySL,
266 : entrySR;
267 :
268 2 : gistentryinit(entryL, oldL, r, NULL, 0, false);
269 2 : gistentryinit(entryR, oldR, r, NULL, 0, false);
270 2 : gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false);
271 2 : gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false);
272 :
273 2 : if (sv->spl_ldatum_exists && sv->spl_rdatum_exists)
274 2 : {
275 : float penalty1,
276 : penalty2;
277 :
278 2 : penalty1 = gistpenalty(giststate, attno, &entryL, false, &entrySL, false) +
279 2 : gistpenalty(giststate, attno, &entryR, false, &entrySR, false);
280 2 : penalty2 = gistpenalty(giststate, attno, &entryL, false, &entrySR, false) +
281 2 : gistpenalty(giststate, attno, &entryR, false, &entrySL, false);
282 :
283 2 : if (penalty1 > penalty2)
284 0 : leaveOnLeft = false;
285 : }
286 : else
287 : {
288 0 : GISTENTRY *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR;
289 : float penalty1,
290 : penalty2;
291 :
292 : /*
293 : * There is only one previously defined union, so we just choose swap
294 : * or not by lowest penalty for that side. We can only get here if a
295 : * secondary split happened to have all NULLs in its column in the
296 : * tuples that the outer recursion level had assigned to one side.
297 : * (Note that the null checks in gistSplitByKey don't prevent the
298 : * case, because they'll only be checking tuples that were considered
299 : * don't-cares at the outer recursion level, not the tuples that went
300 : * into determining the passed-down left and right union keys.)
301 : */
302 0 : penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false);
303 0 : penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false);
304 :
305 0 : if (penalty1 < penalty2)
306 0 : leaveOnLeft = sv->spl_ldatum_exists;
307 : else
308 0 : leaveOnLeft = sv->spl_rdatum_exists;
309 : }
310 :
311 2 : if (leaveOnLeft == false)
312 : {
313 : /*
314 : * swap left and right
315 : */
316 : OffsetNumber *off,
317 : noff;
318 : Datum datum;
319 :
320 0 : SWAPVAR(sv->spl_left, sv->spl_right, off);
321 0 : SWAPVAR(sv->spl_nleft, sv->spl_nright, noff);
322 0 : SWAPVAR(sv->spl_ldatum, sv->spl_rdatum, datum);
323 0 : gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false);
324 0 : gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false);
325 : }
326 :
327 2 : if (sv->spl_ldatum_exists)
328 2 : gistMakeUnionKey(giststate, attno, &entryL, false, &entrySL, false,
329 : &sv->spl_ldatum, &tmpBool);
330 :
331 2 : if (sv->spl_rdatum_exists)
332 2 : gistMakeUnionKey(giststate, attno, &entryR, false, &entrySR, false,
333 : &sv->spl_rdatum, &tmpBool);
334 :
335 2 : sv->spl_ldatum_exists = sv->spl_rdatum_exists = false;
336 2 : }
337 :
338 : /*
339 : * Trivial picksplit implementation. Function called only
340 : * if user-defined picksplit puts all keys on the same side of the split.
341 : * That is a bug of user-defined picksplit but we don't want to fail.
342 : */
343 : static void
344 92 : genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC *v, int attno)
345 : {
346 : OffsetNumber i,
347 : maxoff;
348 : int nbytes;
349 : GistEntryVector *evec;
350 :
351 92 : maxoff = entryvec->n - 1;
352 :
353 92 : nbytes = (maxoff + 2) * sizeof(OffsetNumber);
354 :
355 92 : v->spl_left = (OffsetNumber *) palloc(nbytes);
356 92 : v->spl_right = (OffsetNumber *) palloc(nbytes);
357 92 : v->spl_nleft = v->spl_nright = 0;
358 :
359 25636 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
360 : {
361 25544 : if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
362 : {
363 12772 : v->spl_left[v->spl_nleft] = i;
364 12772 : v->spl_nleft++;
365 : }
366 : else
367 : {
368 12772 : v->spl_right[v->spl_nright] = i;
369 12772 : v->spl_nright++;
370 : }
371 : }
372 :
373 : /*
374 : * Form union datums for each side
375 : */
376 92 : evec = palloc(sizeof(GISTENTRY) * entryvec->n + GEVHDRSZ);
377 :
378 92 : evec->n = v->spl_nleft;
379 92 : memcpy(evec->vector, entryvec->vector + FirstOffsetNumber,
380 92 : sizeof(GISTENTRY) * evec->n);
381 92 : v->spl_ldatum = FunctionCall2Coll(&giststate->unionFn[attno],
382 : giststate->supportCollation[attno],
383 : PointerGetDatum(evec),
384 : PointerGetDatum(&nbytes));
385 :
386 92 : evec->n = v->spl_nright;
387 92 : memcpy(evec->vector, entryvec->vector + FirstOffsetNumber + v->spl_nleft,
388 92 : sizeof(GISTENTRY) * evec->n);
389 92 : v->spl_rdatum = FunctionCall2Coll(&giststate->unionFn[attno],
390 : giststate->supportCollation[attno],
391 : PointerGetDatum(evec),
392 : PointerGetDatum(&nbytes));
393 92 : }
394 :
395 : /*
396 : * Calls user picksplit method for attno column to split tuples into
397 : * two vectors.
398 : *
399 : * Returns false if split is complete (there are no more index columns, or
400 : * there is no need to consider them because split is optimal already).
401 : *
402 : * Returns true and v->spl_dontcare = NULL if the picksplit result is
403 : * degenerate (all tuples seem to be don't-cares), so we should just
404 : * disregard this column and split on the next column(s) instead.
405 : *
406 : * Returns true and v->spl_dontcare != NULL if there are don't-care tuples
407 : * that could be relocated based on the next column(s). The don't-care
408 : * tuples have been removed from the split and must be reinserted by caller.
409 : * There is at least one non-don't-care tuple on each side of the split,
410 : * and union keys for all columns are updated to include just those tuples.
411 : *
412 : * A true result implies there is at least one more index column.
413 : */
414 : static bool
415 26354 : gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v,
416 : IndexTuple *itup, int len, GISTSTATE *giststate)
417 : {
418 26354 : GIST_SPLITVEC *sv = &v->splitVector;
419 :
420 : /*
421 : * Prepare spl_ldatum/spl_rdatum/spl_ldatum_exists/spl_rdatum_exists in
422 : * case we are doing a secondary split (see comments in gist.h).
423 : */
424 26354 : sv->spl_ldatum_exists = !(v->spl_lisnull[attno]);
425 26354 : sv->spl_rdatum_exists = !(v->spl_risnull[attno]);
426 26354 : sv->spl_ldatum = v->spl_lattr[attno];
427 26354 : sv->spl_rdatum = v->spl_rattr[attno];
428 :
429 : /*
430 : * Let the opclass-specific PickSplit method do its thing. Note that at
431 : * this point we know there are no null keys in the entryvec.
432 : */
433 26354 : FunctionCall2Coll(&giststate->picksplitFn[attno],
434 : giststate->supportCollation[attno],
435 : PointerGetDatum(entryvec),
436 : PointerGetDatum(sv));
437 :
438 26354 : if (sv->spl_nleft == 0 || sv->spl_nright == 0)
439 : {
440 : /*
441 : * User-defined picksplit failed to create an actual split, ie it put
442 : * everything on the same side. Complain but cope.
443 : */
444 92 : ereport(DEBUG1,
445 : (errcode(ERRCODE_INTERNAL_ERROR),
446 : errmsg("picksplit method for column %d of index \"%s\" failed",
447 : attno + 1, RelationGetRelationName(r)),
448 : errhint("The index is not optimal. To optimize it, contact a developer, or try to use the column as the second one in the CREATE INDEX command.")));
449 :
450 : /*
451 : * Reinit GIST_SPLITVEC. Although these fields are not used by
452 : * genericPickSplit(), set them up for further processing
453 : */
454 92 : sv->spl_ldatum_exists = !(v->spl_lisnull[attno]);
455 92 : sv->spl_rdatum_exists = !(v->spl_risnull[attno]);
456 92 : sv->spl_ldatum = v->spl_lattr[attno];
457 92 : sv->spl_rdatum = v->spl_rattr[attno];
458 :
459 : /* Do a generic split */
460 92 : genericPickSplit(giststate, entryvec, sv, attno);
461 : }
462 : else
463 : {
464 : /* hack for compatibility with old picksplit API */
465 26262 : if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber)
466 0 : sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1);
467 26262 : if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
468 0 : sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
469 : }
470 :
471 : /* Clean up if PickSplit didn't take care of a secondary split */
472 26354 : if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
473 2 : supportSecondarySplit(r, giststate, attno, sv,
474 : v->spl_lattr[attno], v->spl_rattr[attno]);
475 :
476 : /* emit union datums computed by PickSplit back to v arrays */
477 26354 : v->spl_lattr[attno] = sv->spl_ldatum;
478 26354 : v->spl_rattr[attno] = sv->spl_rdatum;
479 26354 : v->spl_lisnull[attno] = false;
480 26354 : v->spl_risnull[attno] = false;
481 :
482 : /*
483 : * If index columns remain, then consider whether we can improve the split
484 : * by using them.
485 : */
486 26354 : v->spl_dontcare = NULL;
487 :
488 26354 : if (attno + 1 < giststate->nonLeafTupdesc->natts)
489 : {
490 : int NumDontCare;
491 :
492 : /*
493 : * Make a quick check to see if left and right union keys are equal;
494 : * if so, the split is certainly degenerate, so tell caller to
495 : * re-split with the next column.
496 : */
497 2564 : if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum))
498 32 : return true;
499 :
500 : /*
501 : * Locate don't-care tuples, if any. If there are none, the split is
502 : * optimal, so just fall out and return false.
503 : */
504 2532 : v->spl_dontcare = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
505 :
506 2532 : NumDontCare = findDontCares(r, giststate, entryvec->vector, v, attno);
507 :
508 2532 : if (NumDontCare > 0)
509 : {
510 : /*
511 : * Remove don't-cares from spl_left[] and spl_right[].
512 : */
513 6 : removeDontCares(sv->spl_left, &sv->spl_nleft, v->spl_dontcare);
514 6 : removeDontCares(sv->spl_right, &sv->spl_nright, v->spl_dontcare);
515 :
516 : /*
517 : * If all tuples on either side were don't-cares, the split is
518 : * degenerate, and we're best off to ignore it and split on the
519 : * next column. (We used to try to press on with a secondary
520 : * split by forcing a random tuple on each side to be treated as
521 : * non-don't-care, but it seems unlikely that that technique
522 : * really gives a better result. Note that we don't want to try a
523 : * secondary split with empty left or right primary split sides,
524 : * because then there is no union key on that side for the
525 : * PickSplit function to try to expand, so it can have no good
526 : * figure of merit for what it's doing. Also note that this check
527 : * ensures we can't produce a bogus one-side-only split in the
528 : * NumDontCare == 1 special case below.)
529 : */
530 6 : if (sv->spl_nleft == 0 || sv->spl_nright == 0)
531 : {
532 4 : v->spl_dontcare = NULL;
533 4 : return true;
534 : }
535 :
536 : /*
537 : * Recompute union keys, considering only non-don't-care tuples.
538 : * NOTE: this will set union keys for remaining index columns,
539 : * which will cause later calls of gistUserPicksplit to pass those
540 : * values down to user-defined PickSplit methods with
541 : * spl_ldatum_exists/spl_rdatum_exists set true.
542 : */
543 2 : gistunionsubkey(giststate, itup, v);
544 :
545 2 : if (NumDontCare == 1)
546 : {
547 : /*
548 : * If there's only one don't-care tuple then we can't do a
549 : * PickSplit on it, so just choose whether to send it left or
550 : * right by comparing penalties. We needed the
551 : * gistunionsubkey step anyway so that we have appropriate
552 : * union keys for figuring the penalties.
553 : */
554 : OffsetNumber toMove;
555 :
556 : /* find it ... */
557 0 : for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++)
558 : {
559 0 : if (v->spl_dontcare[toMove])
560 0 : break;
561 : }
562 : Assert(toMove < entryvec->n);
563 :
564 : /* ... and assign it to cheaper side */
565 0 : placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
566 :
567 : /*
568 : * At this point the union keys are wrong, but we don't care
569 : * because we're done splitting. The outermost recursion
570 : * level of gistSplitByKey will fix things before returning.
571 : */
572 : }
573 : else
574 2 : return true;
575 : }
576 : }
577 :
578 26316 : return false;
579 : }
580 :
581 : /*
582 : * simply split page in half
583 : */
584 : static void
585 0 : gistSplitHalf(GIST_SPLITVEC *v, int len)
586 : {
587 : int i;
588 :
589 0 : v->spl_nright = v->spl_nleft = 0;
590 0 : v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
591 0 : v->spl_right = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
592 0 : for (i = 1; i <= len; i++)
593 0 : if (i < len / 2)
594 0 : v->spl_right[v->spl_nright++] = i;
595 : else
596 0 : v->spl_left[v->spl_nleft++] = i;
597 :
598 : /* we need not compute union keys, caller took care of it */
599 0 : }
600 :
601 : /*
602 : * gistSplitByKey: main entry point for page-splitting algorithm
603 : *
604 : * r: index relation
605 : * page: page being split
606 : * itup: array of IndexTuples to be processed
607 : * len: number of IndexTuples to be processed (must be at least 2)
608 : * giststate: additional info about index
609 : * v: working state and output area
610 : * attno: column we are working on (zero-based index)
611 : *
612 : * Outside caller must initialize v->spl_lisnull and v->spl_risnull arrays
613 : * to all-true. On return, spl_left/spl_nleft contain indexes of tuples
614 : * to go left, spl_right/spl_nright contain indexes of tuples to go right,
615 : * spl_lattr/spl_lisnull contain left-side union key values, and
616 : * spl_rattr/spl_risnull contain right-side union key values. Other fields
617 : * in this struct are workspace for this file.
618 : *
619 : * Outside caller must pass zero for attno. The function may internally
620 : * recurse to the next column by passing attno+1.
621 : */
622 : void
623 26546 : gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
624 : GISTSTATE *giststate, GistSplitVector *v, int attno)
625 : {
626 : GistEntryVector *entryvec;
627 : OffsetNumber *offNullTuples;
628 26546 : int nOffNullTuples = 0;
629 : int i;
630 :
631 : /* generate the item array, and identify tuples with null keys */
632 : /* note that entryvec->vector[0] goes unused in this code */
633 26546 : entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
634 26546 : entryvec->n = len + 1;
635 26546 : offNullTuples = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
636 :
637 2305200 : for (i = 1; i <= len; i++)
638 : {
639 : Datum datum;
640 : bool IsNull;
641 :
642 2278654 : datum = index_getattr(itup[i - 1], attno + 1, giststate->leafTupdesc,
643 : &IsNull);
644 2278654 : gistdentryinit(giststate, attno, &(entryvec->vector[i]),
645 : datum, r, page, i,
646 : false, IsNull);
647 2278654 : if (IsNull)
648 1682 : offNullTuples[nOffNullTuples++] = i;
649 : }
650 :
651 26546 : if (nOffNullTuples == len)
652 : {
653 : /*
654 : * Corner case: All keys in attno column are null, so just transfer
655 : * our attention to the next column. If there's no next column, just
656 : * split page in half.
657 : */
658 0 : v->spl_risnull[attno] = v->spl_lisnull[attno] = true;
659 :
660 0 : if (attno + 1 < giststate->nonLeafTupdesc->natts)
661 0 : gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
662 : else
663 0 : gistSplitHalf(&v->splitVector, len);
664 : }
665 26546 : else if (nOffNullTuples > 0)
666 : {
667 192 : int j = 0;
668 :
669 : /*
670 : * We don't want to mix NULL and not-NULL keys on one page, so split
671 : * nulls to right page and not-nulls to left.
672 : */
673 192 : v->splitVector.spl_right = offNullTuples;
674 192 : v->splitVector.spl_nright = nOffNullTuples;
675 192 : v->spl_risnull[attno] = true;
676 :
677 192 : v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
678 192 : v->splitVector.spl_nleft = 0;
679 21570 : for (i = 1; i <= len; i++)
680 21378 : if (j < v->splitVector.spl_nright && offNullTuples[j] == i)
681 1682 : j++;
682 : else
683 19696 : v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
684 :
685 : /* Compute union keys, unless outer recursion level will handle it */
686 192 : if (attno == 0 && giststate->nonLeafTupdesc->natts == 1)
687 : {
688 190 : v->spl_dontcare = NULL;
689 190 : gistunionsubkey(giststate, itup, v);
690 : }
691 : }
692 : else
693 : {
694 : /*
695 : * All keys are not-null, so apply user-defined PickSplit method
696 : */
697 26354 : if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate))
698 : {
699 : /*
700 : * Splitting on attno column is not optimal, so consider
701 : * redistributing don't-care tuples according to the next column
702 : */
703 : Assert(attno + 1 < giststate->nonLeafTupdesc->natts);
704 :
705 38 : if (v->spl_dontcare == NULL)
706 : {
707 : /*
708 : * This split was actually degenerate, so ignore it altogether
709 : * and just split according to the next column.
710 : */
711 36 : gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
712 : }
713 : else
714 : {
715 : /*
716 : * Form an array of just the don't-care tuples to pass to a
717 : * recursive invocation of this function for the next column.
718 : */
719 2 : IndexTuple *newitup = (IndexTuple *) palloc(len * sizeof(IndexTuple));
720 2 : OffsetNumber *map = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
721 2 : int newlen = 0;
722 : GIST_SPLITVEC backupSplit;
723 :
724 374 : for (i = 0; i < len; i++)
725 : {
726 372 : if (v->spl_dontcare[i + 1])
727 : {
728 368 : newitup[newlen] = itup[i];
729 368 : map[newlen] = i + 1;
730 368 : newlen++;
731 : }
732 : }
733 :
734 : Assert(newlen > 0);
735 :
736 : /*
737 : * Make a backup copy of v->splitVector, since the recursive
738 : * call will overwrite that with its own result.
739 : */
740 2 : backupSplit = v->splitVector;
741 2 : backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
742 2 : memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft);
743 2 : backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
744 2 : memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright);
745 :
746 : /* Recursively decide how to split the don't-care tuples */
747 2 : gistSplitByKey(r, page, newitup, newlen, giststate, v, attno + 1);
748 :
749 : /* Merge result of subsplit with non-don't-care tuples */
750 186 : for (i = 0; i < v->splitVector.spl_nleft; i++)
751 184 : backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1];
752 186 : for (i = 0; i < v->splitVector.spl_nright; i++)
753 184 : backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1];
754 :
755 2 : v->splitVector = backupSplit;
756 : }
757 : }
758 : }
759 :
760 : /*
761 : * If we're handling a multicolumn index, at the end of the recursion
762 : * recompute the left and right union datums for all index columns. This
763 : * makes sure we hand back correct union datums in all corner cases,
764 : * including when we haven't processed all columns to start with, or when
765 : * a secondary split moved "don't care" tuples from one side to the other
766 : * (we really shouldn't assume that that didn't change the union datums).
767 : *
768 : * Note: when we're in an internal recursion (attno > 0), we do not worry
769 : * about whether the union datums we return with are sensible, since
770 : * calling levels won't care. Also, in a single-column index, we expect
771 : * that PickSplit (or the special cases above) produced correct union
772 : * datums.
773 : */
774 26546 : if (attno == 0 && giststate->nonLeafTupdesc->natts > 1)
775 : {
776 2566 : v->spl_dontcare = NULL;
777 2566 : gistunionsubkey(giststate, itup, v);
778 : }
779 26546 : }
|