Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gist.c
4 : * interface routines for the postgres GiST index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gist/gist.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/gist_private.h"
18 : #include "access/gistscan.h"
19 : #include "access/xloginsert.h"
20 : #include "catalog/pg_collation.h"
21 : #include "commands/vacuum.h"
22 : #include "miscadmin.h"
23 : #include "nodes/execnodes.h"
24 : #include "storage/predicate.h"
25 : #include "utils/fmgrprotos.h"
26 : #include "utils/index_selfuncs.h"
27 : #include "utils/memutils.h"
28 : #include "utils/rel.h"
29 :
30 : /* non-export function prototypes */
31 : static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate);
32 : static bool gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
33 : GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum);
34 : static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
35 : GISTSTATE *giststate,
36 : IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
37 : Buffer leftchild, Buffer rightchild,
38 : bool unlockbuf, bool unlockleftchild);
39 : static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
40 : GISTSTATE *giststate, List *splitinfo, bool unlockbuf);
41 : static void gistprunepage(Relation rel, Page page, Buffer buffer,
42 : Relation heapRel);
43 :
44 :
45 : #define ROTATEDIST(d) do { \
46 : SplitPageLayout *tmp = (SplitPageLayout *) palloc0(sizeof(SplitPageLayout)); \
47 : tmp->block.blkno = InvalidBlockNumber; \
48 : tmp->buffer = InvalidBuffer; \
49 : tmp->next = (d); \
50 : (d)=tmp; \
51 : } while(0)
52 :
53 :
54 : /*
55 : * GiST handler function: return IndexAmRoutine with access method parameters
56 : * and callbacks.
57 : */
58 : Datum
59 10504 : gisthandler(PG_FUNCTION_ARGS)
60 : {
61 10504 : IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
62 :
63 10504 : amroutine->amstrategies = 0;
64 10504 : amroutine->amsupport = GISTNProcs;
65 10504 : amroutine->amoptsprocnum = GIST_OPTIONS_PROC;
66 10504 : amroutine->amcanorder = false;
67 10504 : amroutine->amcanorderbyop = true;
68 10504 : amroutine->amcanbackward = false;
69 10504 : amroutine->amcanunique = false;
70 10504 : amroutine->amcanmulticol = true;
71 10504 : amroutine->amoptionalkey = true;
72 10504 : amroutine->amsearcharray = false;
73 10504 : amroutine->amsearchnulls = true;
74 10504 : amroutine->amstorage = true;
75 10504 : amroutine->amclusterable = true;
76 10504 : amroutine->ampredlocks = true;
77 10504 : amroutine->amcanparallel = false;
78 10504 : amroutine->amcanbuildparallel = false;
79 10504 : amroutine->amcaninclude = true;
80 10504 : amroutine->amusemaintenanceworkmem = false;
81 10504 : amroutine->amsummarizing = false;
82 10504 : amroutine->amparallelvacuumoptions =
83 : VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_COND_CLEANUP;
84 10504 : amroutine->amkeytype = InvalidOid;
85 :
86 10504 : amroutine->ambuild = gistbuild;
87 10504 : amroutine->ambuildempty = gistbuildempty;
88 10504 : amroutine->aminsert = gistinsert;
89 10504 : amroutine->aminsertcleanup = NULL;
90 10504 : amroutine->ambulkdelete = gistbulkdelete;
91 10504 : amroutine->amvacuumcleanup = gistvacuumcleanup;
92 10504 : amroutine->amcanreturn = gistcanreturn;
93 10504 : amroutine->amcostestimate = gistcostestimate;
94 10504 : amroutine->amgettreeheight = NULL;
95 10504 : amroutine->amoptions = gistoptions;
96 10504 : amroutine->amproperty = gistproperty;
97 10504 : amroutine->ambuildphasename = NULL;
98 10504 : amroutine->amvalidate = gistvalidate;
99 10504 : amroutine->amadjustmembers = gistadjustmembers;
100 10504 : amroutine->ambeginscan = gistbeginscan;
101 10504 : amroutine->amrescan = gistrescan;
102 10504 : amroutine->amgettuple = gistgettuple;
103 10504 : amroutine->amgetbitmap = gistgetbitmap;
104 10504 : amroutine->amendscan = gistendscan;
105 10504 : amroutine->ammarkpos = NULL;
106 10504 : amroutine->amrestrpos = NULL;
107 10504 : amroutine->amestimateparallelscan = NULL;
108 10504 : amroutine->aminitparallelscan = NULL;
109 10504 : amroutine->amparallelrescan = NULL;
110 10504 : amroutine->amtranslatestrategy = NULL;
111 10504 : amroutine->amtranslatecmptype = gisttranslatecmptype;
112 :
113 10504 : PG_RETURN_POINTER(amroutine);
114 : }
115 :
116 : /*
117 : * Create and return a temporary memory context for use by GiST. We
118 : * _always_ invoke user-provided methods in a temporary memory
119 : * context, so that memory leaks in those functions cannot cause
120 : * problems. Also, we use some additional temporary contexts in the
121 : * GiST code itself, to avoid the need to do some awkward manual
122 : * memory management.
123 : */
124 : MemoryContext
125 10710 : createTempGistContext(void)
126 : {
127 10710 : return AllocSetContextCreate(CurrentMemoryContext,
128 : "GiST temporary context",
129 : ALLOCSET_DEFAULT_SIZES);
130 : }
131 :
132 : /*
133 : * gistbuildempty() -- build an empty gist index in the initialization fork
134 : */
135 : void
136 10 : gistbuildempty(Relation index)
137 : {
138 : Buffer buffer;
139 :
140 : /* Initialize the root page */
141 10 : buffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
142 : EB_SKIP_EXTENSION_LOCK | EB_LOCK_FIRST);
143 :
144 : /* Initialize and xlog buffer */
145 10 : START_CRIT_SECTION();
146 10 : GISTInitBuffer(buffer, F_LEAF);
147 10 : MarkBufferDirty(buffer);
148 10 : log_newpage_buffer(buffer, true);
149 10 : END_CRIT_SECTION();
150 :
151 : /* Unlock and release the buffer */
152 10 : UnlockReleaseBuffer(buffer);
153 10 : }
154 :
155 : /*
156 : * gistinsert -- wrapper for GiST tuple insertion.
157 : *
158 : * This is the public interface routine for tuple insertion in GiSTs.
159 : * It doesn't do any work; just locks the relation and passes the buck.
160 : */
161 : bool
162 304648 : gistinsert(Relation r, Datum *values, bool *isnull,
163 : ItemPointer ht_ctid, Relation heapRel,
164 : IndexUniqueCheck checkUnique,
165 : bool indexUnchanged,
166 : IndexInfo *indexInfo)
167 : {
168 304648 : GISTSTATE *giststate = (GISTSTATE *) indexInfo->ii_AmCache;
169 : IndexTuple itup;
170 : MemoryContext oldCxt;
171 :
172 : /* Initialize GISTSTATE cache if first call in this statement */
173 304648 : if (giststate == NULL)
174 : {
175 2130 : oldCxt = MemoryContextSwitchTo(indexInfo->ii_Context);
176 2130 : giststate = initGISTstate(r);
177 2130 : giststate->tempCxt = createTempGistContext();
178 2130 : indexInfo->ii_AmCache = giststate;
179 2130 : MemoryContextSwitchTo(oldCxt);
180 : }
181 :
182 304648 : oldCxt = MemoryContextSwitchTo(giststate->tempCxt);
183 :
184 304648 : itup = gistFormTuple(giststate, r, values, isnull, true);
185 304646 : itup->t_tid = *ht_ctid;
186 :
187 304646 : gistdoinsert(r, itup, 0, giststate, heapRel, false);
188 :
189 : /* cleanup */
190 304634 : MemoryContextSwitchTo(oldCxt);
191 304634 : MemoryContextReset(giststate->tempCxt);
192 :
193 304634 : return false;
194 : }
195 :
196 :
197 : /*
198 : * Place tuples from 'itup' to 'buffer'. If 'oldoffnum' is valid, the tuple
199 : * at that offset is atomically removed along with inserting the new tuples.
200 : * This is used to replace a tuple with a new one.
201 : *
202 : * If 'leftchildbuf' is valid, we're inserting the downlink for the page
203 : * to the right of 'leftchildbuf', or updating the downlink for 'leftchildbuf'.
204 : * F_FOLLOW_RIGHT flag on 'leftchildbuf' is cleared and NSN is set.
205 : *
206 : * If 'markfollowright' is true and the page is split, the left child is
207 : * marked with F_FOLLOW_RIGHT flag. That is the normal case. During buffered
208 : * index build, however, there is no concurrent access and the page splitting
209 : * is done in a slightly simpler fashion, and false is passed.
210 : *
211 : * If there is not enough room on the page, it is split. All the split
212 : * pages are kept pinned and locked and returned in *splitinfo, the caller
213 : * is responsible for inserting the downlinks for them. However, if
214 : * 'buffer' is the root page and it needs to be split, gistplacetopage()
215 : * performs the split as one atomic operation, and *splitinfo is set to NIL.
216 : * In that case, we continue to hold the root page locked, and the child
217 : * pages are released; note that new tuple(s) are *not* on the root page
218 : * but in one of the new child pages.
219 : *
220 : * If 'newblkno' is not NULL, returns the block number of page the first
221 : * new/updated tuple was inserted to. Usually it's the given page, but could
222 : * be its right sibling if the page was split.
223 : *
224 : * Returns 'true' if the page was split, 'false' otherwise.
225 : */
226 : bool
227 1725884 : gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
228 : Buffer buffer,
229 : IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
230 : BlockNumber *newblkno,
231 : Buffer leftchildbuf,
232 : List **splitinfo,
233 : bool markfollowright,
234 : Relation heapRel,
235 : bool is_build)
236 : {
237 1725884 : BlockNumber blkno = BufferGetBlockNumber(buffer);
238 1725884 : Page page = BufferGetPage(buffer);
239 1725884 : bool is_leaf = (GistPageIsLeaf(page)) ? true : false;
240 : XLogRecPtr recptr;
241 : bool is_split;
242 :
243 : /*
244 : * Refuse to modify a page that's incompletely split. This should not
245 : * happen because we finish any incomplete splits while we walk down the
246 : * tree. However, it's remotely possible that another concurrent inserter
247 : * splits a parent page, and errors out before completing the split. We
248 : * will just throw an error in that case, and leave any split we had in
249 : * progress unfinished too. The next insert that comes along will clean up
250 : * the mess.
251 : */
252 1725884 : if (GistFollowRight(page))
253 0 : elog(ERROR, "concurrent GiST page split was incomplete");
254 :
255 : /* should never try to insert to a deleted page */
256 : Assert(!GistPageIsDeleted(page));
257 :
258 1725884 : *splitinfo = NIL;
259 :
260 : /*
261 : * if isupdate, remove old key: This node's key has been modified, either
262 : * because a child split occurred or because we needed to adjust our key
263 : * for an insert in a child node. Therefore, remove the old version of
264 : * this node's key.
265 : *
266 : * for WAL replay, in the non-split case we handle this by setting up a
267 : * one-element todelete array; in the split case, it's handled implicitly
268 : * because the tuple vector passed to gistSplit won't include this tuple.
269 : */
270 1725884 : is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
271 :
272 : /*
273 : * If leaf page is full, try at first to delete dead tuples. And then
274 : * check again.
275 : */
276 1725884 : if (is_split && GistPageIsLeaf(page) && GistPageHasGarbage(page))
277 : {
278 0 : gistprunepage(rel, page, buffer, heapRel);
279 0 : is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
280 : }
281 :
282 1725884 : if (is_split)
283 : {
284 : /* no space for insertion */
285 : IndexTuple *itvec;
286 : int tlen;
287 25602 : SplitPageLayout *dist = NULL,
288 : *ptr;
289 25602 : BlockNumber oldrlink = InvalidBlockNumber;
290 25602 : GistNSN oldnsn = 0;
291 : SplitPageLayout rootpg;
292 : bool is_rootsplit;
293 : int npage;
294 :
295 25602 : is_rootsplit = (blkno == GIST_ROOT_BLKNO);
296 :
297 : /*
298 : * Form index tuples vector to split. If we're replacing an old tuple,
299 : * remove the old version from the vector.
300 : */
301 25602 : itvec = gistextractpage(page, &tlen);
302 25602 : if (OffsetNumberIsValid(oldoffnum))
303 : {
304 : /* on inner page we should remove old tuple */
305 5542 : int pos = oldoffnum - FirstOffsetNumber;
306 :
307 5542 : tlen--;
308 5542 : if (pos != tlen)
309 3586 : memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
310 : }
311 25602 : itvec = gistjoinvector(itvec, &tlen, itup, ntup);
312 25602 : dist = gistSplit(rel, page, itvec, tlen, giststate);
313 :
314 : /*
315 : * Check that split didn't produce too many pages.
316 : */
317 25602 : npage = 0;
318 76870 : for (ptr = dist; ptr; ptr = ptr->next)
319 51268 : npage++;
320 : /* in a root split, we'll add one more page to the list below */
321 25602 : if (is_rootsplit)
322 418 : npage++;
323 25602 : if (npage > GIST_MAX_SPLIT_PAGES)
324 0 : elog(ERROR, "GiST page split into too many halves (%d, maximum %d)",
325 : npage, GIST_MAX_SPLIT_PAGES);
326 :
327 : /*
328 : * Set up pages to work with. Allocate new buffers for all but the
329 : * leftmost page. The original page becomes the new leftmost page, and
330 : * is just replaced with the new contents.
331 : *
332 : * For a root-split, allocate new buffers for all child pages, the
333 : * original page is overwritten with new root page containing
334 : * downlinks to the new child pages.
335 : */
336 25602 : ptr = dist;
337 25602 : if (!is_rootsplit)
338 : {
339 : /* save old rightlink and NSN */
340 25184 : oldrlink = GistPageGetOpaque(page)->rightlink;
341 25184 : oldnsn = GistPageGetNSN(page);
342 :
343 25184 : dist->buffer = buffer;
344 25184 : dist->block.blkno = BufferGetBlockNumber(buffer);
345 25184 : dist->page = PageGetTempPageCopySpecial(BufferGetPage(buffer));
346 :
347 : /* clean all flags except F_LEAF */
348 25184 : GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
349 :
350 25184 : ptr = ptr->next;
351 : }
352 51686 : for (; ptr; ptr = ptr->next)
353 : {
354 : /* Allocate new page */
355 26084 : ptr->buffer = gistNewBuffer(rel, heapRel);
356 26084 : GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
357 26084 : ptr->page = BufferGetPage(ptr->buffer);
358 26084 : ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
359 26084 : PredicateLockPageSplit(rel,
360 : BufferGetBlockNumber(buffer),
361 : BufferGetBlockNumber(ptr->buffer));
362 : }
363 :
364 : /*
365 : * Now that we know which blocks the new pages go to, set up downlink
366 : * tuples to point to them.
367 : */
368 76870 : for (ptr = dist; ptr; ptr = ptr->next)
369 : {
370 51268 : ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
371 51268 : GistTupleSetValid(ptr->itup);
372 : }
373 :
374 : /*
375 : * If this is a root split, we construct the new root page with the
376 : * downlinks here directly, instead of requiring the caller to insert
377 : * them. Add the new root page to the list along with the child pages.
378 : */
379 25602 : if (is_rootsplit)
380 : {
381 : IndexTuple *downlinks;
382 418 : int ndownlinks = 0;
383 : int i;
384 :
385 418 : rootpg.buffer = buffer;
386 418 : rootpg.page = PageGetTempPageCopySpecial(BufferGetPage(rootpg.buffer));
387 418 : GistPageGetOpaque(rootpg.page)->flags = 0;
388 :
389 : /* Prepare a vector of all the downlinks */
390 1264 : for (ptr = dist; ptr; ptr = ptr->next)
391 846 : ndownlinks++;
392 418 : downlinks = palloc(sizeof(IndexTuple) * ndownlinks);
393 1264 : for (i = 0, ptr = dist; ptr; ptr = ptr->next)
394 846 : downlinks[i++] = ptr->itup;
395 :
396 418 : rootpg.block.blkno = GIST_ROOT_BLKNO;
397 418 : rootpg.block.num = ndownlinks;
398 418 : rootpg.list = gistfillitupvec(downlinks, ndownlinks,
399 : &(rootpg.lenlist));
400 418 : rootpg.itup = NULL;
401 :
402 418 : rootpg.next = dist;
403 418 : dist = &rootpg;
404 : }
405 : else
406 : {
407 : /* Prepare split-info to be returned to caller */
408 75606 : for (ptr = dist; ptr; ptr = ptr->next)
409 : {
410 50422 : GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
411 :
412 50422 : si->buf = ptr->buffer;
413 50422 : si->downlink = ptr->itup;
414 50422 : *splitinfo = lappend(*splitinfo, si);
415 : }
416 : }
417 :
418 : /*
419 : * Fill all pages. All the pages are new, ie. freshly allocated empty
420 : * pages, or a temporary copy of the old page.
421 : */
422 77288 : for (ptr = dist; ptr; ptr = ptr->next)
423 : {
424 51686 : char *data = (char *) (ptr->list);
425 :
426 1880658 : for (int i = 0; i < ptr->block.num; i++)
427 : {
428 1828972 : IndexTuple thistup = (IndexTuple) data;
429 :
430 1828972 : if (PageAddItem(ptr->page, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
431 0 : elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(rel));
432 :
433 : /*
434 : * If this is the first inserted/updated tuple, let the caller
435 : * know which page it landed on.
436 : */
437 1828972 : if (newblkno && ItemPointerEquals(&thistup->t_tid, &(*itup)->t_tid))
438 774 : *newblkno = ptr->block.blkno;
439 :
440 1828972 : data += IndexTupleSize(thistup);
441 : }
442 :
443 : /* Set up rightlinks */
444 51686 : if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO)
445 51332 : GistPageGetOpaque(ptr->page)->rightlink =
446 25666 : ptr->next->block.blkno;
447 : else
448 26020 : GistPageGetOpaque(ptr->page)->rightlink = oldrlink;
449 :
450 : /*
451 : * Mark the all but the right-most page with the follow-right
452 : * flag. It will be cleared as soon as the downlink is inserted
453 : * into the parent, but this ensures that if we error out before
454 : * that, the index is still consistent. (in buffering build mode,
455 : * any error will abort the index build anyway, so this is not
456 : * needed.)
457 : */
458 51686 : if (ptr->next && !is_rootsplit && markfollowright)
459 24470 : GistMarkFollowRight(ptr->page);
460 : else
461 27216 : GistClearFollowRight(ptr->page);
462 :
463 : /*
464 : * Copy the NSN of the original page to all pages. The
465 : * F_FOLLOW_RIGHT flags ensure that scans will follow the
466 : * rightlinks until the downlinks are inserted.
467 : */
468 51686 : GistPageSetNSN(ptr->page, oldnsn);
469 : }
470 :
471 : /*
472 : * gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
473 : * insertion for that. NB: The number of pages and data segments
474 : * specified here must match the calculations in gistXLogSplit()!
475 : */
476 25602 : if (!is_build && RelationNeedsWAL(rel))
477 3472 : XLogEnsureRecordSpace(npage, 1 + npage * 2);
478 :
479 25602 : START_CRIT_SECTION();
480 :
481 : /*
482 : * Must mark buffers dirty before XLogInsert, even though we'll still
483 : * be changing their opaque fields below.
484 : */
485 77288 : for (ptr = dist; ptr; ptr = ptr->next)
486 51686 : MarkBufferDirty(ptr->buffer);
487 25602 : if (BufferIsValid(leftchildbuf))
488 5354 : MarkBufferDirty(leftchildbuf);
489 :
490 : /*
491 : * The first page in the chain was a temporary working copy meant to
492 : * replace the old page. Copy it over the old page.
493 : */
494 25602 : PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
495 25602 : dist->page = BufferGetPage(dist->buffer);
496 :
497 : /*
498 : * Write the WAL record.
499 : *
500 : * If we're building a new index, however, we don't WAL-log changes
501 : * yet. The LSN-NSN interlock between parent and child requires that
502 : * LSNs never move backwards, so set the LSNs to a value that's
503 : * smaller than any real or fake unlogged LSN that might be generated
504 : * later. (There can't be any concurrent scans during index build, so
505 : * we don't need to be able to detect concurrent splits yet.)
506 : */
507 25602 : if (is_build)
508 22126 : recptr = GistBuildLSN;
509 : else
510 : {
511 3476 : if (RelationNeedsWAL(rel))
512 3472 : recptr = gistXLogSplit(is_leaf,
513 : dist, oldrlink, oldnsn, leftchildbuf,
514 : markfollowright);
515 : else
516 4 : recptr = gistGetFakeLSN(rel);
517 : }
518 :
519 77288 : for (ptr = dist; ptr; ptr = ptr->next)
520 51686 : PageSetLSN(ptr->page, recptr);
521 :
522 : /*
523 : * Return the new child buffers to the caller.
524 : *
525 : * If this was a root split, we've already inserted the downlink
526 : * pointers, in the form of a new root page. Therefore we can release
527 : * all the new buffers, and keep just the root page locked.
528 : */
529 25602 : if (is_rootsplit)
530 : {
531 1264 : for (ptr = dist->next; ptr; ptr = ptr->next)
532 846 : UnlockReleaseBuffer(ptr->buffer);
533 : }
534 : }
535 : else
536 : {
537 : /*
538 : * Enough space. We always get here if ntup==0.
539 : */
540 1700282 : START_CRIT_SECTION();
541 :
542 : /*
543 : * Delete old tuple if any, then insert new tuple(s) if any. If
544 : * possible, use the fast path of PageIndexTupleOverwrite.
545 : */
546 1700282 : if (OffsetNumberIsValid(oldoffnum))
547 : {
548 755168 : if (ntup == 1)
549 : {
550 : /* One-for-one replacement, so use PageIndexTupleOverwrite */
551 735352 : if (!PageIndexTupleOverwrite(page, oldoffnum, (Item) *itup,
552 : IndexTupleSize(*itup)))
553 0 : elog(ERROR, "failed to add item to index page in \"%s\"",
554 : RelationGetRelationName(rel));
555 : }
556 : else
557 : {
558 : /* Delete old, then append new tuple(s) to page */
559 19816 : PageIndexTupleDelete(page, oldoffnum);
560 19816 : gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
561 : }
562 : }
563 : else
564 : {
565 : /* Just append new tuples at the end of the page */
566 945114 : gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
567 : }
568 :
569 1700282 : MarkBufferDirty(buffer);
570 :
571 1700282 : if (BufferIsValid(leftchildbuf))
572 19116 : MarkBufferDirty(leftchildbuf);
573 :
574 1700282 : if (is_build)
575 1205322 : recptr = GistBuildLSN;
576 : else
577 : {
578 494960 : if (RelationNeedsWAL(rel))
579 494882 : {
580 494882 : OffsetNumber ndeloffs = 0,
581 : deloffs[1];
582 :
583 494882 : if (OffsetNumberIsValid(oldoffnum))
584 : {
585 193780 : deloffs[0] = oldoffnum;
586 193780 : ndeloffs = 1;
587 : }
588 :
589 494882 : recptr = gistXLogUpdate(buffer,
590 : deloffs, ndeloffs, itup, ntup,
591 : leftchildbuf);
592 : }
593 : else
594 78 : recptr = gistGetFakeLSN(rel);
595 : }
596 1700282 : PageSetLSN(page, recptr);
597 :
598 1700282 : if (newblkno)
599 104370 : *newblkno = blkno;
600 : }
601 :
602 : /*
603 : * If we inserted the downlink for a child page, set NSN and clear
604 : * F_FOLLOW_RIGHT flag on the left child, so that concurrent scans know to
605 : * follow the rightlink if and only if they looked at the parent page
606 : * before we inserted the downlink.
607 : *
608 : * Note that we do this *after* writing the WAL record. That means that
609 : * the possible full page image in the WAL record does not include these
610 : * changes, and they must be replayed even if the page is restored from
611 : * the full page image. There's a chicken-and-egg problem: if we updated
612 : * the child pages first, we wouldn't know the recptr of the WAL record
613 : * we're about to write.
614 : */
615 1725884 : if (BufferIsValid(leftchildbuf))
616 : {
617 24470 : Page leftpg = BufferGetPage(leftchildbuf);
618 :
619 24470 : GistPageSetNSN(leftpg, recptr);
620 24470 : GistClearFollowRight(leftpg);
621 :
622 24470 : PageSetLSN(leftpg, recptr);
623 : }
624 :
625 1725884 : END_CRIT_SECTION();
626 :
627 1725884 : return is_split;
628 : }
629 :
630 : /*
631 : * Workhorse routine for doing insertion into a GiST index. Note that
632 : * this routine assumes it is invoked in a short-lived memory context,
633 : * so it does not bother releasing palloc'd allocations.
634 : */
635 : void
636 929702 : gistdoinsert(Relation r, IndexTuple itup, Size freespace,
637 : GISTSTATE *giststate, Relation heapRel, bool is_build)
638 : {
639 : ItemId iid;
640 : IndexTuple idxtuple;
641 : GISTInsertStack firststack;
642 : GISTInsertStack *stack;
643 : GISTInsertState state;
644 929702 : bool xlocked = false;
645 :
646 929702 : memset(&state, 0, sizeof(GISTInsertState));
647 929702 : state.freespace = freespace;
648 929702 : state.r = r;
649 929702 : state.heapRel = heapRel;
650 929702 : state.is_build = is_build;
651 :
652 : /* Start from the root */
653 929702 : firststack.blkno = GIST_ROOT_BLKNO;
654 929702 : firststack.lsn = 0;
655 929702 : firststack.retry_from_parent = false;
656 929702 : firststack.parent = NULL;
657 929702 : firststack.downlinkoffnum = InvalidOffsetNumber;
658 929702 : state.stack = stack = &firststack;
659 :
660 : /*
661 : * Walk down along the path of smallest penalty, updating the parent
662 : * pointers with the key we're inserting as we go. If we crash in the
663 : * middle, the tree is consistent, although the possible parent updates
664 : * were a waste.
665 : */
666 : for (;;)
667 : {
668 : /*
669 : * If we split an internal page while descending the tree, we have to
670 : * retry at the parent. (Normally, the LSN-NSN interlock below would
671 : * also catch this and cause us to retry. But LSNs are not updated
672 : * during index build.)
673 : */
674 2116180 : while (stack->retry_from_parent)
675 : {
676 392 : if (xlocked)
677 0 : LockBuffer(stack->buffer, GIST_UNLOCK);
678 392 : xlocked = false;
679 392 : ReleaseBuffer(stack->buffer);
680 392 : state.stack = stack = stack->parent;
681 : }
682 :
683 2115788 : if (XLogRecPtrIsInvalid(stack->lsn))
684 2115614 : stack->buffer = ReadBuffer(state.r, stack->blkno);
685 :
686 : /*
687 : * Be optimistic and grab shared lock first. Swap it for an exclusive
688 : * lock later if we need to update the page.
689 : */
690 2115788 : if (!xlocked)
691 : {
692 2115786 : LockBuffer(stack->buffer, GIST_SHARE);
693 2115786 : gistcheckpage(state.r, stack->buffer);
694 : }
695 :
696 2115788 : stack->page = (Page) BufferGetPage(stack->buffer);
697 2115788 : stack->lsn = xlocked ?
698 2115788 : PageGetLSN(stack->page) : BufferGetLSNAtomic(stack->buffer);
699 : Assert(!RelationNeedsWAL(state.r) || !XLogRecPtrIsInvalid(stack->lsn));
700 :
701 : /*
702 : * If this page was split but the downlink was never inserted to the
703 : * parent because the inserting backend crashed before doing that, fix
704 : * that now.
705 : */
706 2115788 : if (GistFollowRight(stack->page))
707 : {
708 0 : if (!xlocked)
709 : {
710 0 : LockBuffer(stack->buffer, GIST_UNLOCK);
711 0 : LockBuffer(stack->buffer, GIST_EXCLUSIVE);
712 0 : xlocked = true;
713 : /* someone might've completed the split when we unlocked */
714 0 : if (!GistFollowRight(stack->page))
715 0 : continue;
716 : }
717 0 : gistfixsplit(&state, giststate);
718 :
719 0 : UnlockReleaseBuffer(stack->buffer);
720 0 : xlocked = false;
721 0 : state.stack = stack = stack->parent;
722 0 : continue;
723 : }
724 :
725 3301732 : if ((stack->blkno != GIST_ROOT_BLKNO &&
726 1185944 : stack->parent->lsn < GistPageGetNSN(stack->page)) ||
727 2115788 : GistPageIsDeleted(stack->page))
728 : {
729 : /*
730 : * Concurrent split or page deletion detected. There's no
731 : * guarantee that the downlink for this page is consistent with
732 : * the tuple we're inserting anymore, so go back to parent and
733 : * rechoose the best child.
734 : */
735 0 : UnlockReleaseBuffer(stack->buffer);
736 0 : xlocked = false;
737 0 : state.stack = stack = stack->parent;
738 0 : continue;
739 : }
740 :
741 2115788 : if (!GistPageIsLeaf(stack->page))
742 : {
743 : /*
744 : * This is an internal page so continue to walk down the tree.
745 : * Find the child node that has the minimum insertion penalty.
746 : */
747 : BlockNumber childblkno;
748 : IndexTuple newtup;
749 : GISTInsertStack *item;
750 : OffsetNumber downlinkoffnum;
751 :
752 1186086 : downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate);
753 1186086 : iid = PageGetItemId(stack->page, downlinkoffnum);
754 1186086 : idxtuple = (IndexTuple) PageGetItem(stack->page, iid);
755 1186086 : childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
756 :
757 : /*
758 : * Check that it's not a leftover invalid tuple from pre-9.1
759 : */
760 1186086 : if (GistTupleIsInvalid(idxtuple))
761 0 : ereport(ERROR,
762 : (errmsg("index \"%s\" contains an inner tuple marked as invalid",
763 : RelationGetRelationName(r)),
764 : errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
765 : errhint("Please REINDEX it.")));
766 :
767 : /*
768 : * Check that the key representing the target child node is
769 : * consistent with the key we're inserting. Update it if it's not.
770 : */
771 1186086 : newtup = gistgetadjusted(state.r, idxtuple, itup, giststate);
772 1186086 : if (newtup)
773 : {
774 : /*
775 : * Swap shared lock for an exclusive one. Beware, the page may
776 : * change while we unlock/lock the page...
777 : */
778 666580 : if (!xlocked)
779 : {
780 666580 : LockBuffer(stack->buffer, GIST_UNLOCK);
781 666580 : LockBuffer(stack->buffer, GIST_EXCLUSIVE);
782 666580 : xlocked = true;
783 666580 : stack->page = (Page) BufferGetPage(stack->buffer);
784 :
785 666580 : if (PageGetLSN(stack->page) != stack->lsn)
786 : {
787 : /* the page was changed while we unlocked it, retry */
788 0 : continue;
789 : }
790 : }
791 :
792 : /*
793 : * Update the tuple.
794 : *
795 : * We still hold the lock after gistinserttuple(), but it
796 : * might have to split the page to make the updated tuple fit.
797 : * In that case the updated tuple might migrate to the other
798 : * half of the split, so we have to go back to the parent and
799 : * descend back to the half that's a better fit for the new
800 : * tuple.
801 : */
802 666580 : if (gistinserttuple(&state, stack, giststate, newtup,
803 : downlinkoffnum))
804 : {
805 : /*
806 : * If this was a root split, the root page continues to be
807 : * the parent and the updated tuple went to one of the
808 : * child pages, so we just need to retry from the root
809 : * page.
810 : */
811 174 : if (stack->blkno != GIST_ROOT_BLKNO)
812 : {
813 172 : UnlockReleaseBuffer(stack->buffer);
814 172 : xlocked = false;
815 172 : state.stack = stack = stack->parent;
816 : }
817 174 : continue;
818 : }
819 : }
820 1185912 : LockBuffer(stack->buffer, GIST_UNLOCK);
821 1185912 : xlocked = false;
822 :
823 : /* descend to the chosen child */
824 1185912 : item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
825 1185912 : item->blkno = childblkno;
826 1185912 : item->parent = stack;
827 1185912 : item->downlinkoffnum = downlinkoffnum;
828 1185912 : state.stack = stack = item;
829 : }
830 : else
831 : {
832 : /*
833 : * Leaf page. Insert the new key. We've already updated all the
834 : * parents on the way down, but we might have to split the page if
835 : * it doesn't fit. gistinserttuple() will take care of that.
836 : */
837 :
838 : /*
839 : * Swap shared lock for an exclusive one. Be careful, the page may
840 : * change while we unlock/lock the page...
841 : */
842 929702 : if (!xlocked)
843 : {
844 929702 : LockBuffer(stack->buffer, GIST_UNLOCK);
845 929702 : LockBuffer(stack->buffer, GIST_EXCLUSIVE);
846 929702 : xlocked = true;
847 929702 : stack->page = (Page) BufferGetPage(stack->buffer);
848 929702 : stack->lsn = PageGetLSN(stack->page);
849 :
850 929702 : if (stack->blkno == GIST_ROOT_BLKNO)
851 : {
852 : /*
853 : * the only page that can become inner instead of leaf is
854 : * the root page, so for root we should recheck it
855 : */
856 54658 : if (!GistPageIsLeaf(stack->page))
857 : {
858 : /*
859 : * very rare situation: during unlock/lock index with
860 : * number of pages = 1 was increased
861 : */
862 0 : LockBuffer(stack->buffer, GIST_UNLOCK);
863 0 : xlocked = false;
864 0 : continue;
865 : }
866 :
867 : /*
868 : * we don't need to check root split, because checking
869 : * leaf/inner is enough to recognize split for root
870 : */
871 : }
872 1750088 : else if ((GistFollowRight(stack->page) ||
873 875044 : stack->parent->lsn < GistPageGetNSN(stack->page)) ||
874 875044 : GistPageIsDeleted(stack->page))
875 : {
876 : /*
877 : * The page was split or deleted while we momentarily
878 : * unlocked the page. Go back to parent.
879 : */
880 0 : UnlockReleaseBuffer(stack->buffer);
881 0 : xlocked = false;
882 0 : state.stack = stack = stack->parent;
883 0 : continue;
884 : }
885 : }
886 :
887 : /* now state.stack->(page, buffer and blkno) points to leaf page */
888 :
889 929702 : gistinserttuple(&state, stack, giststate, itup,
890 : InvalidOffsetNumber);
891 929690 : LockBuffer(stack->buffer, GIST_UNLOCK);
892 :
893 : /* Release any pins we might still hold before exiting */
894 3044716 : for (; stack; stack = stack->parent)
895 2115026 : ReleaseBuffer(stack->buffer);
896 929690 : break;
897 : }
898 : }
899 929690 : }
900 :
901 : /*
902 : * Traverse the tree to find path from root page to specified "child" block.
903 : *
904 : * returns a new insertion stack, starting from the parent of "child", up
905 : * to the root. *downlinkoffnum is set to the offset of the downlink in the
906 : * direct parent of child.
907 : *
908 : * To prevent deadlocks, this should lock only one page at a time.
909 : */
910 : static GISTInsertStack *
911 0 : gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
912 : {
913 : Page page;
914 : Buffer buffer;
915 : OffsetNumber i,
916 : maxoff;
917 : ItemId iid;
918 : IndexTuple idxtuple;
919 : List *fifo;
920 : GISTInsertStack *top,
921 : *ptr;
922 : BlockNumber blkno;
923 :
924 0 : top = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
925 0 : top->blkno = GIST_ROOT_BLKNO;
926 0 : top->downlinkoffnum = InvalidOffsetNumber;
927 :
928 0 : fifo = list_make1(top);
929 0 : while (fifo != NIL)
930 : {
931 : /* Get next page to visit */
932 0 : top = linitial(fifo);
933 0 : fifo = list_delete_first(fifo);
934 :
935 0 : buffer = ReadBuffer(r, top->blkno);
936 0 : LockBuffer(buffer, GIST_SHARE);
937 0 : gistcheckpage(r, buffer);
938 0 : page = (Page) BufferGetPage(buffer);
939 :
940 0 : if (GistPageIsLeaf(page))
941 : {
942 : /*
943 : * Because we scan the index top-down, all the rest of the pages
944 : * in the queue must be leaf pages as well.
945 : */
946 0 : UnlockReleaseBuffer(buffer);
947 0 : break;
948 : }
949 :
950 : /* currently, internal pages are never deleted */
951 : Assert(!GistPageIsDeleted(page));
952 :
953 0 : top->lsn = BufferGetLSNAtomic(buffer);
954 :
955 : /*
956 : * If F_FOLLOW_RIGHT is set, the page to the right doesn't have a
957 : * downlink. This should not normally happen..
958 : */
959 0 : if (GistFollowRight(page))
960 0 : elog(ERROR, "concurrent GiST page split was incomplete");
961 :
962 0 : if (top->parent && top->parent->lsn < GistPageGetNSN(page) &&
963 0 : GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
964 : {
965 : /*
966 : * Page was split while we looked elsewhere. We didn't see the
967 : * downlink to the right page when we scanned the parent, so add
968 : * it to the queue now.
969 : *
970 : * Put the right page ahead of the queue, so that we visit it
971 : * next. That's important, because if this is the lowest internal
972 : * level, just above leaves, we might already have queued up some
973 : * leaf pages, and we assume that there can't be any non-leaf
974 : * pages behind leaf pages.
975 : */
976 0 : ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
977 0 : ptr->blkno = GistPageGetOpaque(page)->rightlink;
978 0 : ptr->downlinkoffnum = InvalidOffsetNumber;
979 0 : ptr->parent = top->parent;
980 :
981 0 : fifo = lcons(ptr, fifo);
982 : }
983 :
984 0 : maxoff = PageGetMaxOffsetNumber(page);
985 :
986 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
987 : {
988 0 : iid = PageGetItemId(page, i);
989 0 : idxtuple = (IndexTuple) PageGetItem(page, iid);
990 0 : blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
991 0 : if (blkno == child)
992 : {
993 : /* Found it! */
994 0 : UnlockReleaseBuffer(buffer);
995 0 : *downlinkoffnum = i;
996 0 : return top;
997 : }
998 : else
999 : {
1000 : /* Append this child to the list of pages to visit later */
1001 0 : ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
1002 0 : ptr->blkno = blkno;
1003 0 : ptr->downlinkoffnum = i;
1004 0 : ptr->parent = top;
1005 :
1006 0 : fifo = lappend(fifo, ptr);
1007 : }
1008 : }
1009 :
1010 0 : UnlockReleaseBuffer(buffer);
1011 : }
1012 :
1013 0 : elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",
1014 : RelationGetRelationName(r), child);
1015 : return NULL; /* keep compiler quiet */
1016 : }
1017 :
1018 : /*
1019 : * Updates the stack so that child->parent is the correct parent of the
1020 : * child. child->parent must be exclusively locked on entry, and will
1021 : * remain so at exit, but it might not be the same page anymore.
1022 : */
1023 : static void
1024 24470 : gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build)
1025 : {
1026 24470 : GISTInsertStack *parent = child->parent;
1027 : ItemId iid;
1028 : IndexTuple idxtuple;
1029 : OffsetNumber maxoff;
1030 : GISTInsertStack *ptr;
1031 :
1032 24470 : gistcheckpage(r, parent->buffer);
1033 24470 : parent->page = (Page) BufferGetPage(parent->buffer);
1034 24470 : maxoff = PageGetMaxOffsetNumber(parent->page);
1035 :
1036 : /* Check if the downlink is still where it was before */
1037 24470 : if (child->downlinkoffnum != InvalidOffsetNumber && child->downlinkoffnum <= maxoff)
1038 : {
1039 24448 : iid = PageGetItemId(parent->page, child->downlinkoffnum);
1040 24448 : idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
1041 24448 : if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
1042 24448 : return; /* still there */
1043 : }
1044 :
1045 : /*
1046 : * The page has changed since we looked. During normal operation, every
1047 : * update of a page changes its LSN, so the LSN we memorized should have
1048 : * changed too. During index build, however, we don't WAL-log the changes
1049 : * until we have built the index, so the LSN doesn't change. There is no
1050 : * concurrent activity during index build, but we might have changed the
1051 : * parent ourselves.
1052 : */
1053 : Assert(parent->lsn != PageGetLSN(parent->page) || is_build);
1054 :
1055 : /*
1056 : * Scan the page to re-find the downlink. If the page was split, it might
1057 : * have moved to a different page, so follow the right links until we find
1058 : * it.
1059 : */
1060 : while (true)
1061 14 : {
1062 : OffsetNumber i;
1063 :
1064 36 : maxoff = PageGetMaxOffsetNumber(parent->page);
1065 162 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1066 : {
1067 148 : iid = PageGetItemId(parent->page, i);
1068 148 : idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
1069 148 : if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
1070 : {
1071 : /* yes!!, found */
1072 22 : child->downlinkoffnum = i;
1073 22 : return;
1074 : }
1075 : }
1076 :
1077 14 : parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
1078 14 : parent->downlinkoffnum = InvalidOffsetNumber;
1079 14 : UnlockReleaseBuffer(parent->buffer);
1080 14 : if (parent->blkno == InvalidBlockNumber)
1081 : {
1082 : /*
1083 : * End of chain and still didn't find parent. It's a very-very
1084 : * rare situation when the root was split.
1085 : */
1086 0 : break;
1087 : }
1088 14 : parent->buffer = ReadBuffer(r, parent->blkno);
1089 14 : LockBuffer(parent->buffer, GIST_EXCLUSIVE);
1090 14 : gistcheckpage(r, parent->buffer);
1091 14 : parent->page = (Page) BufferGetPage(parent->buffer);
1092 : }
1093 :
1094 : /*
1095 : * awful!!, we need search tree to find parent ... , but before we should
1096 : * release all old parent
1097 : */
1098 :
1099 0 : ptr = child->parent->parent; /* child->parent already released above */
1100 0 : while (ptr)
1101 : {
1102 0 : ReleaseBuffer(ptr->buffer);
1103 0 : ptr = ptr->parent;
1104 : }
1105 :
1106 : /* ok, find new path */
1107 0 : ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum);
1108 :
1109 : /* read all buffers as expected by caller */
1110 : /* note we don't lock them or gistcheckpage them here! */
1111 0 : while (ptr)
1112 : {
1113 0 : ptr->buffer = ReadBuffer(r, ptr->blkno);
1114 0 : ptr->page = (Page) BufferGetPage(ptr->buffer);
1115 0 : ptr = ptr->parent;
1116 : }
1117 :
1118 : /* install new chain of parents to stack */
1119 0 : child->parent = parent;
1120 :
1121 : /* make recursive call to normal processing */
1122 0 : LockBuffer(child->parent->buffer, GIST_EXCLUSIVE);
1123 0 : gistFindCorrectParent(r, child, is_build);
1124 : }
1125 :
1126 : /*
1127 : * Form a downlink pointer for the page in 'buf'.
1128 : */
1129 : static IndexTuple
1130 0 : gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate,
1131 : GISTInsertStack *stack, bool is_build)
1132 : {
1133 0 : Page page = BufferGetPage(buf);
1134 : OffsetNumber maxoff;
1135 : OffsetNumber offset;
1136 0 : IndexTuple downlink = NULL;
1137 :
1138 0 : maxoff = PageGetMaxOffsetNumber(page);
1139 0 : for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))
1140 : {
1141 : IndexTuple ituple = (IndexTuple)
1142 0 : PageGetItem(page, PageGetItemId(page, offset));
1143 :
1144 0 : if (downlink == NULL)
1145 0 : downlink = CopyIndexTuple(ituple);
1146 : else
1147 : {
1148 : IndexTuple newdownlink;
1149 :
1150 0 : newdownlink = gistgetadjusted(rel, downlink, ituple,
1151 : giststate);
1152 0 : if (newdownlink)
1153 0 : downlink = newdownlink;
1154 : }
1155 : }
1156 :
1157 : /*
1158 : * If the page is completely empty, we can't form a meaningful downlink
1159 : * for it. But we have to insert a downlink for the page. Any key will do,
1160 : * as long as its consistent with the downlink of parent page, so that we
1161 : * can legally insert it to the parent. A minimal one that matches as few
1162 : * scans as possible would be best, to keep scans from doing useless work,
1163 : * but we don't know how to construct that. So we just use the downlink of
1164 : * the original page that was split - that's as far from optimal as it can
1165 : * get but will do..
1166 : */
1167 0 : if (!downlink)
1168 : {
1169 : ItemId iid;
1170 :
1171 0 : LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
1172 0 : gistFindCorrectParent(rel, stack, is_build);
1173 0 : iid = PageGetItemId(stack->parent->page, stack->downlinkoffnum);
1174 0 : downlink = (IndexTuple) PageGetItem(stack->parent->page, iid);
1175 0 : downlink = CopyIndexTuple(downlink);
1176 0 : LockBuffer(stack->parent->buffer, GIST_UNLOCK);
1177 : }
1178 :
1179 0 : ItemPointerSetBlockNumber(&(downlink->t_tid), BufferGetBlockNumber(buf));
1180 0 : GistTupleSetValid(downlink);
1181 :
1182 0 : return downlink;
1183 : }
1184 :
1185 :
1186 : /*
1187 : * Complete the incomplete split of state->stack->page.
1188 : */
1189 : static void
1190 0 : gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
1191 : {
1192 0 : GISTInsertStack *stack = state->stack;
1193 : Buffer buf;
1194 : Page page;
1195 0 : List *splitinfo = NIL;
1196 :
1197 0 : ereport(LOG,
1198 : (errmsg("fixing incomplete split in index \"%s\", block %u",
1199 : RelationGetRelationName(state->r), stack->blkno)));
1200 :
1201 : Assert(GistFollowRight(stack->page));
1202 : Assert(OffsetNumberIsValid(stack->downlinkoffnum));
1203 :
1204 0 : buf = stack->buffer;
1205 :
1206 : /*
1207 : * Read the chain of split pages, following the rightlinks. Construct a
1208 : * downlink tuple for each page.
1209 : */
1210 : for (;;)
1211 0 : {
1212 0 : GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
1213 : IndexTuple downlink;
1214 :
1215 0 : page = BufferGetPage(buf);
1216 :
1217 : /* Form the new downlink tuples to insert to parent */
1218 0 : downlink = gistformdownlink(state->r, buf, giststate, stack, state->is_build);
1219 :
1220 0 : si->buf = buf;
1221 0 : si->downlink = downlink;
1222 :
1223 0 : splitinfo = lappend(splitinfo, si);
1224 :
1225 0 : if (GistFollowRight(page))
1226 : {
1227 : /* lock next page */
1228 0 : buf = ReadBuffer(state->r, GistPageGetOpaque(page)->rightlink);
1229 0 : LockBuffer(buf, GIST_EXCLUSIVE);
1230 : }
1231 : else
1232 0 : break;
1233 : }
1234 :
1235 : /* Insert the downlinks */
1236 0 : gistfinishsplit(state, stack, giststate, splitinfo, false);
1237 0 : }
1238 :
1239 : /*
1240 : * Insert or replace a tuple in stack->buffer. If 'oldoffnum' is valid, the
1241 : * tuple at 'oldoffnum' is replaced, otherwise the tuple is inserted as new.
1242 : * 'stack' represents the path from the root to the page being updated.
1243 : *
1244 : * The caller must hold an exclusive lock on stack->buffer. The lock is still
1245 : * held on return, but the page might not contain the inserted tuple if the
1246 : * page was split. The function returns true if the page was split, false
1247 : * otherwise.
1248 : */
1249 : static bool
1250 1596282 : gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
1251 : GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)
1252 : {
1253 1596282 : return gistinserttuples(state, stack, giststate, &tuple, 1, oldoffnum,
1254 : InvalidBuffer, InvalidBuffer, false, false);
1255 : }
1256 :
1257 : /* ----------------
1258 : * An extended workhorse version of gistinserttuple(). This version allows
1259 : * inserting multiple tuples, or replacing a single tuple with multiple tuples.
1260 : * This is used to recursively update the downlinks in the parent when a page
1261 : * is split.
1262 : *
1263 : * If leftchild and rightchild are valid, we're inserting/replacing the
1264 : * downlink for rightchild, and leftchild is its left sibling. We clear the
1265 : * F_FOLLOW_RIGHT flag and update NSN on leftchild, atomically with the
1266 : * insertion of the downlink.
1267 : *
1268 : * To avoid holding locks for longer than necessary, when recursing up the
1269 : * tree to update the parents, the locking is a bit peculiar here. On entry,
1270 : * the caller must hold an exclusive lock on stack->buffer, as well as
1271 : * leftchild and rightchild if given. On return:
1272 : *
1273 : * - Lock on stack->buffer is released, if 'unlockbuf' is true. The page is
1274 : * always kept pinned, however.
1275 : * - Lock on 'leftchild' is released, if 'unlockleftchild' is true. The page
1276 : * is kept pinned.
1277 : * - Lock and pin on 'rightchild' are always released.
1278 : *
1279 : * Returns 'true' if the page had to be split. Note that if the page was
1280 : * split, the inserted/updated tuples might've been inserted to a right
1281 : * sibling of stack->buffer instead of stack->buffer itself.
1282 : */
1283 : static bool
1284 1620752 : gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
1285 : GISTSTATE *giststate,
1286 : IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
1287 : Buffer leftchild, Buffer rightchild,
1288 : bool unlockbuf, bool unlockleftchild)
1289 : {
1290 : List *splitinfo;
1291 : bool is_split;
1292 :
1293 : /*
1294 : * Check for any rw conflicts (in serializable isolation level) just
1295 : * before we intend to modify the page
1296 : */
1297 1620752 : CheckForSerializableConflictIn(state->r, NULL, BufferGetBlockNumber(stack->buffer));
1298 :
1299 : /* Insert the tuple(s) to the page, splitting the page if necessary */
1300 1620740 : is_split = gistplacetopage(state->r, state->freespace, giststate,
1301 : stack->buffer,
1302 : tuples, ntup,
1303 : oldoffnum, NULL,
1304 : leftchild,
1305 : &splitinfo,
1306 : true,
1307 : state->heapRel,
1308 1620740 : state->is_build);
1309 :
1310 : /*
1311 : * Before recursing up in case the page was split, release locks on the
1312 : * child pages. We don't need to keep them locked when updating the
1313 : * parent.
1314 : */
1315 1620740 : if (BufferIsValid(rightchild))
1316 24470 : UnlockReleaseBuffer(rightchild);
1317 1620740 : if (BufferIsValid(leftchild) && unlockleftchild)
1318 5228 : LockBuffer(leftchild, GIST_UNLOCK);
1319 :
1320 : /*
1321 : * If we had to split, insert/update the downlinks in the parent. If the
1322 : * caller requested us to release the lock on stack->buffer, tell
1323 : * gistfinishsplit() to do that as soon as it's safe to do so. If we
1324 : * didn't have to split, release it ourselves.
1325 : */
1326 1620740 : if (splitinfo)
1327 24416 : gistfinishsplit(state, stack, giststate, splitinfo, unlockbuf);
1328 1596324 : else if (unlockbuf)
1329 19188 : LockBuffer(stack->buffer, GIST_UNLOCK);
1330 :
1331 1620740 : return is_split;
1332 : }
1333 :
1334 : /*
1335 : * Finish an incomplete split by inserting/updating the downlinks in parent
1336 : * page. 'splitinfo' contains all the child pages involved in the split,
1337 : * from left-to-right.
1338 : *
1339 : * On entry, the caller must hold a lock on stack->buffer and all the child
1340 : * pages in 'splitinfo'. If 'unlockbuf' is true, the lock on stack->buffer is
1341 : * released on return. The child pages are always unlocked and unpinned.
1342 : */
1343 : static void
1344 24416 : gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
1345 : GISTSTATE *giststate, List *splitinfo, bool unlockbuf)
1346 : {
1347 : GISTPageSplitInfo *right;
1348 : GISTPageSplitInfo *left;
1349 : IndexTuple tuples[2];
1350 :
1351 : /* A split always contains at least two halves */
1352 : Assert(list_length(splitinfo) >= 2);
1353 :
1354 : /*
1355 : * We need to insert downlinks for each new page, and update the downlink
1356 : * for the original (leftmost) page in the split. Begin at the rightmost
1357 : * page, inserting one downlink at a time until there's only two pages
1358 : * left. Finally insert the downlink for the last new page and update the
1359 : * downlink for the original page as one operation.
1360 : */
1361 24416 : LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
1362 :
1363 : /*
1364 : * Insert downlinks for the siblings from right to left, until there are
1365 : * only two siblings left.
1366 : */
1367 24470 : for (int pos = list_length(splitinfo) - 1; pos > 1; pos--)
1368 : {
1369 54 : right = (GISTPageSplitInfo *) list_nth(splitinfo, pos);
1370 54 : left = (GISTPageSplitInfo *) list_nth(splitinfo, pos - 1);
1371 :
1372 54 : gistFindCorrectParent(state->r, stack, state->is_build);
1373 54 : if (gistinserttuples(state, stack->parent, giststate,
1374 : &right->downlink, 1,
1375 : InvalidOffsetNumber,
1376 : left->buf, right->buf, false, false))
1377 : {
1378 : /*
1379 : * If the parent page was split, the existing downlink might have
1380 : * moved.
1381 : */
1382 4 : stack->downlinkoffnum = InvalidOffsetNumber;
1383 : }
1384 : /* gistinserttuples() released the lock on right->buf. */
1385 : }
1386 :
1387 24416 : right = (GISTPageSplitInfo *) lsecond(splitinfo);
1388 24416 : left = (GISTPageSplitInfo *) linitial(splitinfo);
1389 :
1390 : /*
1391 : * Finally insert downlink for the remaining right page and update the
1392 : * downlink for the original page to not contain the tuples that were
1393 : * moved to the new pages.
1394 : */
1395 24416 : tuples[0] = left->downlink;
1396 24416 : tuples[1] = right->downlink;
1397 24416 : gistFindCorrectParent(state->r, stack, state->is_build);
1398 24416 : (void) gistinserttuples(state, stack->parent, giststate,
1399 : tuples, 2,
1400 24416 : stack->downlinkoffnum,
1401 : left->buf, right->buf,
1402 : true, /* Unlock parent */
1403 : unlockbuf /* Unlock stack->buffer if caller
1404 : * wants that */
1405 : );
1406 :
1407 : /*
1408 : * The downlink might have moved when we updated it. Even if the page
1409 : * wasn't split, because gistinserttuples() implements updating the old
1410 : * tuple by removing and re-inserting it!
1411 : */
1412 24416 : stack->downlinkoffnum = InvalidOffsetNumber;
1413 :
1414 : Assert(left->buf == stack->buffer);
1415 :
1416 : /*
1417 : * If we split the page because we had to adjust the downlink on an
1418 : * internal page, while descending the tree for inserting a new tuple,
1419 : * then this might no longer be the correct page for the new tuple. The
1420 : * downlink to this page might not cover the new tuple anymore, it might
1421 : * need to go to the newly-created right sibling instead. Tell the caller
1422 : * to walk back up the stack, to re-check at the parent which page to
1423 : * insert to.
1424 : *
1425 : * Normally, the LSN-NSN interlock during the tree descend would also
1426 : * detect that a concurrent split happened (by ourselves), and cause us to
1427 : * retry at the parent. But that mechanism doesn't work during index
1428 : * build, because we don't do WAL-logging, and don't update LSNs, during
1429 : * index build.
1430 : */
1431 24416 : stack->retry_from_parent = true;
1432 24416 : }
1433 :
1434 : /*
1435 : * gistSplit -- split a page in the tree and fill struct
1436 : * used for XLOG and real writes buffers. Function is recursive, ie
1437 : * it will split page until keys will fit in every page.
1438 : */
1439 : SplitPageLayout *
1440 26796 : gistSplit(Relation r,
1441 : Page page,
1442 : IndexTuple *itup, /* contains compressed entry */
1443 : int len,
1444 : GISTSTATE *giststate)
1445 : {
1446 : IndexTuple *lvectup,
1447 : *rvectup;
1448 : GistSplitVector v;
1449 : int i;
1450 26796 : SplitPageLayout *res = NULL;
1451 :
1452 : /* this should never recurse very deeply, but better safe than sorry */
1453 26796 : check_stack_depth();
1454 :
1455 : /* there's no point in splitting an empty page */
1456 : Assert(len > 0);
1457 :
1458 : /*
1459 : * If a single tuple doesn't fit on a page, no amount of splitting will
1460 : * help.
1461 : */
1462 26796 : if (len == 1)
1463 0 : ereport(ERROR,
1464 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1465 : errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
1466 : IndexTupleSize(itup[0]), GiSTPageSize,
1467 : RelationGetRelationName(r))));
1468 :
1469 26796 : memset(v.spl_lisnull, true,
1470 26796 : sizeof(bool) * giststate->nonLeafTupdesc->natts);
1471 26796 : memset(v.spl_risnull, true,
1472 26796 : sizeof(bool) * giststate->nonLeafTupdesc->natts);
1473 26796 : gistSplitByKey(r, page, itup, len, giststate, &v, 0);
1474 :
1475 : /* form left and right vector */
1476 26796 : lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
1477 26796 : rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
1478 :
1479 1099512 : for (i = 0; i < v.splitVector.spl_nleft; i++)
1480 1072716 : lvectup[i] = itup[v.splitVector.spl_left[i] - 1];
1481 :
1482 1230912 : for (i = 0; i < v.splitVector.spl_nright; i++)
1483 1204116 : rvectup[i] = itup[v.splitVector.spl_right[i] - 1];
1484 :
1485 : /* finalize splitting (may need another split) */
1486 26796 : if (!gistfitpage(rvectup, v.splitVector.spl_nright))
1487 : {
1488 592 : res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate);
1489 : }
1490 : else
1491 : {
1492 26204 : ROTATEDIST(res);
1493 26204 : res->block.num = v.splitVector.spl_nright;
1494 26204 : res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist));
1495 26204 : res->itup = gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false);
1496 : }
1497 :
1498 26796 : if (!gistfitpage(lvectup, v.splitVector.spl_nleft))
1499 : {
1500 : SplitPageLayout *resptr,
1501 : *subres;
1502 :
1503 324 : resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate);
1504 :
1505 : /* install on list's tail */
1506 810 : while (resptr->next)
1507 486 : resptr = resptr->next;
1508 :
1509 324 : resptr->next = res;
1510 324 : res = subres;
1511 : }
1512 : else
1513 : {
1514 26472 : ROTATEDIST(res);
1515 26472 : res->block.num = v.splitVector.spl_nleft;
1516 26472 : res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist));
1517 26472 : res->itup = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false);
1518 : }
1519 :
1520 26796 : return res;
1521 : }
1522 :
1523 : /*
1524 : * Create a GISTSTATE and fill it with information about the index
1525 : */
1526 : GISTSTATE *
1527 10316 : initGISTstate(Relation index)
1528 : {
1529 : GISTSTATE *giststate;
1530 : MemoryContext scanCxt;
1531 : MemoryContext oldCxt;
1532 : int i;
1533 :
1534 : /* safety check to protect fixed-size arrays in GISTSTATE */
1535 10316 : if (index->rd_att->natts > INDEX_MAX_KEYS)
1536 0 : elog(ERROR, "numberOfAttributes %d > %d",
1537 : index->rd_att->natts, INDEX_MAX_KEYS);
1538 :
1539 : /* Create the memory context that will hold the GISTSTATE */
1540 10316 : scanCxt = AllocSetContextCreate(CurrentMemoryContext,
1541 : "GiST scan context",
1542 : ALLOCSET_DEFAULT_SIZES);
1543 10316 : oldCxt = MemoryContextSwitchTo(scanCxt);
1544 :
1545 : /* Create and fill in the GISTSTATE */
1546 10316 : giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
1547 :
1548 10316 : giststate->scanCxt = scanCxt;
1549 10316 : giststate->tempCxt = scanCxt; /* caller must change this if needed */
1550 10316 : giststate->leafTupdesc = index->rd_att;
1551 :
1552 : /*
1553 : * The truncated tupdesc for non-leaf index tuples, which doesn't contain
1554 : * the INCLUDE attributes.
1555 : *
1556 : * It is used to form tuples during tuple adjustment and page split.
1557 : * B-tree creates shortened tuple descriptor for every truncated tuple,
1558 : * because it is doing this less often: it does not have to form truncated
1559 : * tuples during page split. Also, B-tree is not adjusting tuples on
1560 : * internal pages the way GiST does.
1561 : */
1562 20632 : giststate->nonLeafTupdesc = CreateTupleDescTruncatedCopy(index->rd_att,
1563 10316 : IndexRelationGetNumberOfKeyAttributes(index));
1564 :
1565 27714 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(index); i++)
1566 : {
1567 17398 : fmgr_info_copy(&(giststate->consistentFn[i]),
1568 17398 : index_getprocinfo(index, i + 1, GIST_CONSISTENT_PROC),
1569 : scanCxt);
1570 17398 : fmgr_info_copy(&(giststate->unionFn[i]),
1571 17398 : index_getprocinfo(index, i + 1, GIST_UNION_PROC),
1572 : scanCxt);
1573 :
1574 : /* opclasses are not required to provide a Compress method */
1575 17398 : if (OidIsValid(index_getprocid(index, i + 1, GIST_COMPRESS_PROC)))
1576 5944 : fmgr_info_copy(&(giststate->compressFn[i]),
1577 5944 : index_getprocinfo(index, i + 1, GIST_COMPRESS_PROC),
1578 : scanCxt);
1579 : else
1580 11454 : giststate->compressFn[i].fn_oid = InvalidOid;
1581 :
1582 : /* opclasses are not required to provide a Decompress method */
1583 17398 : if (OidIsValid(index_getprocid(index, i + 1, GIST_DECOMPRESS_PROC)))
1584 1594 : fmgr_info_copy(&(giststate->decompressFn[i]),
1585 1594 : index_getprocinfo(index, i + 1, GIST_DECOMPRESS_PROC),
1586 : scanCxt);
1587 : else
1588 15804 : giststate->decompressFn[i].fn_oid = InvalidOid;
1589 :
1590 17398 : fmgr_info_copy(&(giststate->penaltyFn[i]),
1591 17398 : index_getprocinfo(index, i + 1, GIST_PENALTY_PROC),
1592 : scanCxt);
1593 17398 : fmgr_info_copy(&(giststate->picksplitFn[i]),
1594 17398 : index_getprocinfo(index, i + 1, GIST_PICKSPLIT_PROC),
1595 : scanCxt);
1596 17398 : fmgr_info_copy(&(giststate->equalFn[i]),
1597 17398 : index_getprocinfo(index, i + 1, GIST_EQUAL_PROC),
1598 : scanCxt);
1599 :
1600 : /* opclasses are not required to provide a Distance method */
1601 17398 : if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_PROC)))
1602 1894 : fmgr_info_copy(&(giststate->distanceFn[i]),
1603 1894 : index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC),
1604 : scanCxt);
1605 : else
1606 15504 : giststate->distanceFn[i].fn_oid = InvalidOid;
1607 :
1608 : /* opclasses are not required to provide a Fetch method */
1609 17398 : if (OidIsValid(index_getprocid(index, i + 1, GIST_FETCH_PROC)))
1610 1358 : fmgr_info_copy(&(giststate->fetchFn[i]),
1611 1358 : index_getprocinfo(index, i + 1, GIST_FETCH_PROC),
1612 : scanCxt);
1613 : else
1614 16040 : giststate->fetchFn[i].fn_oid = InvalidOid;
1615 :
1616 : /*
1617 : * If the index column has a specified collation, we should honor that
1618 : * while doing comparisons. However, we may have a collatable storage
1619 : * type for a noncollatable indexed data type. If there's no index
1620 : * collation then specify default collation in case the support
1621 : * functions need collation. This is harmless if the support
1622 : * functions don't care about collation, so we just do it
1623 : * unconditionally. (We could alternatively call get_typcollation,
1624 : * but that seems like expensive overkill --- there aren't going to be
1625 : * any cases where a GiST storage type has a nondefault collation.)
1626 : */
1627 17398 : if (OidIsValid(index->rd_indcollation[i]))
1628 194 : giststate->supportCollation[i] = index->rd_indcollation[i];
1629 : else
1630 17204 : giststate->supportCollation[i] = DEFAULT_COLLATION_OID;
1631 : }
1632 :
1633 : /* No opclass information for INCLUDE attributes */
1634 10818 : for (; i < index->rd_att->natts; i++)
1635 : {
1636 502 : giststate->consistentFn[i].fn_oid = InvalidOid;
1637 502 : giststate->unionFn[i].fn_oid = InvalidOid;
1638 502 : giststate->compressFn[i].fn_oid = InvalidOid;
1639 502 : giststate->decompressFn[i].fn_oid = InvalidOid;
1640 502 : giststate->penaltyFn[i].fn_oid = InvalidOid;
1641 502 : giststate->picksplitFn[i].fn_oid = InvalidOid;
1642 502 : giststate->equalFn[i].fn_oid = InvalidOid;
1643 502 : giststate->distanceFn[i].fn_oid = InvalidOid;
1644 502 : giststate->fetchFn[i].fn_oid = InvalidOid;
1645 502 : giststate->supportCollation[i] = InvalidOid;
1646 : }
1647 :
1648 10316 : MemoryContextSwitchTo(oldCxt);
1649 :
1650 10316 : return giststate;
1651 : }
1652 :
1653 : void
1654 7850 : freeGISTstate(GISTSTATE *giststate)
1655 : {
1656 : /* It's sufficient to delete the scanCxt */
1657 7850 : MemoryContextDelete(giststate->scanCxt);
1658 7850 : }
1659 :
1660 : /*
1661 : * gistprunepage() -- try to remove LP_DEAD items from the given page.
1662 : * Function assumes that buffer is exclusively locked.
1663 : */
1664 : static void
1665 0 : gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel)
1666 : {
1667 : OffsetNumber deletable[MaxIndexTuplesPerPage];
1668 0 : int ndeletable = 0;
1669 : OffsetNumber offnum,
1670 : maxoff;
1671 :
1672 : Assert(GistPageIsLeaf(page));
1673 :
1674 : /*
1675 : * Scan over all items to see which ones need to be deleted according to
1676 : * LP_DEAD flags.
1677 : */
1678 0 : maxoff = PageGetMaxOffsetNumber(page);
1679 0 : for (offnum = FirstOffsetNumber;
1680 : offnum <= maxoff;
1681 0 : offnum = OffsetNumberNext(offnum))
1682 : {
1683 0 : ItemId itemId = PageGetItemId(page, offnum);
1684 :
1685 0 : if (ItemIdIsDead(itemId))
1686 0 : deletable[ndeletable++] = offnum;
1687 : }
1688 :
1689 0 : if (ndeletable > 0)
1690 : {
1691 0 : TransactionId snapshotConflictHorizon = InvalidTransactionId;
1692 :
1693 0 : if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
1694 : snapshotConflictHorizon =
1695 0 : index_compute_xid_horizon_for_tuples(rel, heapRel, buffer,
1696 : deletable, ndeletable);
1697 :
1698 0 : START_CRIT_SECTION();
1699 :
1700 0 : PageIndexMultiDelete(page, deletable, ndeletable);
1701 :
1702 : /*
1703 : * Mark the page as not containing any LP_DEAD items. This is not
1704 : * certainly true (there might be some that have recently been marked,
1705 : * but weren't included in our target-item list), but it will almost
1706 : * always be true and it doesn't seem worth an additional page scan to
1707 : * check it. Remember that F_HAS_GARBAGE is only a hint anyway.
1708 : */
1709 0 : GistClearPageHasGarbage(page);
1710 :
1711 0 : MarkBufferDirty(buffer);
1712 :
1713 : /* XLOG stuff */
1714 0 : if (RelationNeedsWAL(rel))
1715 0 : {
1716 : XLogRecPtr recptr;
1717 :
1718 0 : recptr = gistXLogDelete(buffer,
1719 : deletable, ndeletable,
1720 : snapshotConflictHorizon,
1721 : heapRel);
1722 :
1723 0 : PageSetLSN(page, recptr);
1724 : }
1725 : else
1726 0 : PageSetLSN(page, gistGetFakeLSN(rel));
1727 :
1728 0 : END_CRIT_SECTION();
1729 : }
1730 :
1731 : /*
1732 : * Note: if we didn't find any LP_DEAD items, then the page's
1733 : * F_HAS_GARBAGE hint bit is falsely set. We do not bother expending a
1734 : * separate write to clear it, however. We will clear it when we split
1735 : * the page.
1736 : */
1737 0 : }
|