Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gistbuildbuffers.c
4 : * node buffer management functions for GiST buffering build algorithm.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gist/gistbuildbuffers.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/gist_private.h"
18 : #include "storage/buffile.h"
19 : #include "storage/bufmgr.h"
20 : #include "utils/rel.h"
21 :
22 : static GISTNodeBufferPage *gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb);
23 : static void gistAddLoadedBuffer(GISTBuildBuffers *gfbb,
24 : GISTNodeBuffer *nodeBuffer);
25 : static void gistLoadNodeBuffer(GISTBuildBuffers *gfbb,
26 : GISTNodeBuffer *nodeBuffer);
27 : static void gistUnloadNodeBuffer(GISTBuildBuffers *gfbb,
28 : GISTNodeBuffer *nodeBuffer);
29 : static void gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer,
30 : IndexTuple itup);
31 : static void gistGetItupFromPage(GISTNodeBufferPage *pageBuffer,
32 : IndexTuple *itup);
33 : static long gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb);
34 : static void gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum);
35 :
36 : static void ReadTempFileBlock(BufFile *file, long blknum, void *ptr);
37 : static void WriteTempFileBlock(BufFile *file, long blknum, const void *ptr);
38 :
39 :
40 : /*
41 : * Initialize GiST build buffers.
42 : */
43 : GISTBuildBuffers *
44 6 : gistInitBuildBuffers(int pagesPerBuffer, int levelStep, int maxLevel)
45 : {
46 : GISTBuildBuffers *gfbb;
47 : HASHCTL hashCtl;
48 :
49 6 : gfbb = palloc_object(GISTBuildBuffers);
50 6 : gfbb->pagesPerBuffer = pagesPerBuffer;
51 6 : gfbb->levelStep = levelStep;
52 :
53 : /*
54 : * Create a temporary file to hold buffer pages that are swapped out of
55 : * memory.
56 : */
57 6 : gfbb->pfile = BufFileCreateTemp(false);
58 6 : gfbb->nFileBlocks = 0;
59 :
60 : /* Initialize free page management. */
61 6 : gfbb->nFreeBlocks = 0;
62 6 : gfbb->freeBlocksLen = 32;
63 6 : gfbb->freeBlocks = palloc_array(long, gfbb->freeBlocksLen);
64 :
65 : /*
66 : * Current memory context will be used for all in-memory data structures
67 : * of buffers which are persistent during buffering build.
68 : */
69 6 : gfbb->context = CurrentMemoryContext;
70 :
71 : /*
72 : * nodeBuffersTab hash is association between index blocks and it's
73 : * buffers.
74 : */
75 6 : hashCtl.keysize = sizeof(BlockNumber);
76 6 : hashCtl.entrysize = sizeof(GISTNodeBuffer);
77 6 : hashCtl.hcxt = CurrentMemoryContext;
78 6 : gfbb->nodeBuffersTab = hash_create("gistbuildbuffers",
79 : 1024,
80 : &hashCtl,
81 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
82 :
83 6 : gfbb->bufferEmptyingQueue = NIL;
84 :
85 : /*
86 : * Per-level node buffers lists for final buffers emptying process. Node
87 : * buffers are inserted here when they are created.
88 : */
89 6 : gfbb->buffersOnLevelsLen = 1;
90 6 : gfbb->buffersOnLevels = palloc_array(List *, gfbb->buffersOnLevelsLen);
91 6 : gfbb->buffersOnLevels[0] = NIL;
92 :
93 : /*
94 : * Block numbers of node buffers which last pages are currently loaded
95 : * into main memory.
96 : */
97 6 : gfbb->loadedBuffersLen = 32;
98 6 : gfbb->loadedBuffers = palloc_array(GISTNodeBuffer *, gfbb->loadedBuffersLen);
99 6 : gfbb->loadedBuffersCount = 0;
100 :
101 6 : gfbb->rootlevel = maxLevel;
102 :
103 6 : return gfbb;
104 : }
105 :
106 : /*
107 : * Returns a node buffer for given block. The buffer is created if it
108 : * doesn't exist yet.
109 : */
110 : GISTNodeBuffer *
111 34356 : gistGetNodeBuffer(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
112 : BlockNumber nodeBlocknum, int level)
113 : {
114 : GISTNodeBuffer *nodeBuffer;
115 : bool found;
116 :
117 : /* Find node buffer in hash table */
118 34356 : nodeBuffer = (GISTNodeBuffer *) hash_search(gfbb->nodeBuffersTab,
119 : &nodeBlocknum,
120 : HASH_ENTER,
121 : &found);
122 34356 : if (!found)
123 : {
124 : /*
125 : * Node buffer wasn't found. Initialize the new buffer as empty.
126 : */
127 18 : MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
128 :
129 : /* nodeBuffer->nodeBlocknum is the hash key and was filled in already */
130 18 : nodeBuffer->blocksCount = 0;
131 18 : nodeBuffer->pageBlocknum = InvalidBlockNumber;
132 18 : nodeBuffer->pageBuffer = NULL;
133 18 : nodeBuffer->queuedForEmptying = false;
134 18 : nodeBuffer->isTemp = false;
135 18 : nodeBuffer->level = level;
136 :
137 : /*
138 : * Add this buffer to the list of buffers on this level. Enlarge
139 : * buffersOnLevels array if needed.
140 : */
141 18 : if (level >= gfbb->buffersOnLevelsLen)
142 : {
143 : int i;
144 :
145 6 : gfbb->buffersOnLevels =
146 6 : (List **) repalloc(gfbb->buffersOnLevels,
147 6 : (level + 1) * sizeof(List *));
148 :
149 : /* initialize the enlarged portion */
150 12 : for (i = gfbb->buffersOnLevelsLen; i <= level; i++)
151 6 : gfbb->buffersOnLevels[i] = NIL;
152 6 : gfbb->buffersOnLevelsLen = level + 1;
153 : }
154 :
155 : /*
156 : * Prepend the new buffer to the list of buffers on this level. It's
157 : * not arbitrary that the new buffer is put to the beginning of the
158 : * list: in the final emptying phase we loop through all buffers at
159 : * each level, and flush them. If a page is split during the emptying,
160 : * it's more efficient to flush the new split pages first, before
161 : * moving on to pre-existing pages on the level. The buffers just
162 : * created during the page split are likely still in cache, so
163 : * flushing them immediately is more efficient than putting them to
164 : * the end of the queue.
165 : */
166 36 : gfbb->buffersOnLevels[level] = lcons(nodeBuffer,
167 18 : gfbb->buffersOnLevels[level]);
168 :
169 18 : MemoryContextSwitchTo(oldcxt);
170 : }
171 :
172 34356 : return nodeBuffer;
173 : }
174 :
175 : /*
176 : * Allocate memory for a buffer page.
177 : */
178 : static GISTNodeBufferPage *
179 48 : gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb)
180 : {
181 : GISTNodeBufferPage *pageBuffer;
182 :
183 48 : pageBuffer = (GISTNodeBufferPage *) MemoryContextAllocZero(gfbb->context,
184 : BLCKSZ);
185 48 : pageBuffer->prev = InvalidBlockNumber;
186 :
187 : /* Set page free space */
188 48 : PAGE_FREE_SPACE(pageBuffer) = BLCKSZ - BUFFER_PAGE_DATA_OFFSET;
189 48 : return pageBuffer;
190 : }
191 :
192 : /*
193 : * Add specified buffer into loadedBuffers array.
194 : */
195 : static void
196 48 : gistAddLoadedBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
197 : {
198 : /* Never add a temporary buffer to the array */
199 48 : if (nodeBuffer->isTemp)
200 0 : return;
201 :
202 : /* Enlarge the array if needed */
203 48 : if (gfbb->loadedBuffersCount >= gfbb->loadedBuffersLen)
204 : {
205 0 : gfbb->loadedBuffersLen *= 2;
206 0 : gfbb->loadedBuffers = (GISTNodeBuffer **)
207 0 : repalloc(gfbb->loadedBuffers,
208 0 : gfbb->loadedBuffersLen * sizeof(GISTNodeBuffer *));
209 : }
210 :
211 48 : gfbb->loadedBuffers[gfbb->loadedBuffersCount] = nodeBuffer;
212 48 : gfbb->loadedBuffersCount++;
213 : }
214 :
215 : /*
216 : * Load last page of node buffer into main memory.
217 : */
218 : static void
219 18 : gistLoadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
220 : {
221 : /* Check if we really should load something */
222 18 : if (!nodeBuffer->pageBuffer && nodeBuffer->blocksCount > 0)
223 : {
224 : /* Allocate memory for page */
225 18 : nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
226 :
227 : /* Read block from temporary file */
228 18 : ReadTempFileBlock(gfbb->pfile, nodeBuffer->pageBlocknum,
229 18 : nodeBuffer->pageBuffer);
230 :
231 : /* Mark file block as free */
232 18 : gistBuffersReleaseBlock(gfbb, nodeBuffer->pageBlocknum);
233 :
234 : /* Mark node buffer as loaded */
235 18 : gistAddLoadedBuffer(gfbb, nodeBuffer);
236 18 : nodeBuffer->pageBlocknum = InvalidBlockNumber;
237 : }
238 18 : }
239 :
240 : /*
241 : * Write last page of node buffer to the disk.
242 : */
243 : static void
244 42 : gistUnloadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
245 : {
246 : /* Check if we have something to write */
247 42 : if (nodeBuffer->pageBuffer)
248 : {
249 : BlockNumber blkno;
250 :
251 : /* Get free file block */
252 18 : blkno = gistBuffersGetFreeBlock(gfbb);
253 :
254 : /* Write block to the temporary file */
255 18 : WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
256 :
257 : /* Free memory of that page */
258 18 : pfree(nodeBuffer->pageBuffer);
259 18 : nodeBuffer->pageBuffer = NULL;
260 :
261 : /* Save block number */
262 18 : nodeBuffer->pageBlocknum = blkno;
263 : }
264 42 : }
265 :
266 : /*
267 : * Write last pages of all node buffers to the disk.
268 : */
269 : void
270 18 : gistUnloadNodeBuffers(GISTBuildBuffers *gfbb)
271 : {
272 : int i;
273 :
274 : /* Unload all the buffers that have a page loaded in memory. */
275 60 : for (i = 0; i < gfbb->loadedBuffersCount; i++)
276 42 : gistUnloadNodeBuffer(gfbb, gfbb->loadedBuffers[i]);
277 :
278 : /* Now there are no node buffers with loaded last page */
279 18 : gfbb->loadedBuffersCount = 0;
280 18 : }
281 :
282 : /*
283 : * Add index tuple to buffer page.
284 : */
285 : static void
286 64074 : gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer, IndexTuple itup)
287 : {
288 64074 : Size itupsz = IndexTupleSize(itup);
289 : char *ptr;
290 :
291 : /* There should be enough of space. */
292 : Assert(PAGE_FREE_SPACE(pageBuffer) >= MAXALIGN(itupsz));
293 :
294 : /* Reduce free space value of page to reserve a spot for the tuple. */
295 64074 : PAGE_FREE_SPACE(pageBuffer) -= MAXALIGN(itupsz);
296 :
297 : /* Get pointer to the spot we reserved (ie. end of free space). */
298 64074 : ptr = (char *) pageBuffer + BUFFER_PAGE_DATA_OFFSET
299 64074 : + PAGE_FREE_SPACE(pageBuffer);
300 :
301 : /* Copy the index tuple there. */
302 64074 : memcpy(ptr, itup, itupsz);
303 64074 : }
304 :
305 : /*
306 : * Get last item from buffer page and remove it from page.
307 : */
308 : static void
309 64074 : gistGetItupFromPage(GISTNodeBufferPage *pageBuffer, IndexTuple *itup)
310 : {
311 : IndexTuple ptr;
312 : Size itupsz;
313 :
314 : Assert(!PAGE_IS_EMPTY(pageBuffer)); /* Page shouldn't be empty */
315 :
316 : /* Get pointer to last index tuple */
317 64074 : ptr = (IndexTuple) ((char *) pageBuffer
318 : + BUFFER_PAGE_DATA_OFFSET
319 64074 : + PAGE_FREE_SPACE(pageBuffer));
320 64074 : itupsz = IndexTupleSize(ptr);
321 :
322 : /* Make a copy of the tuple */
323 64074 : *itup = (IndexTuple) palloc(itupsz);
324 64074 : memcpy(*itup, ptr, itupsz);
325 :
326 : /* Mark the space used by the tuple as free */
327 64074 : PAGE_FREE_SPACE(pageBuffer) += MAXALIGN(itupsz);
328 64074 : }
329 :
330 : /*
331 : * Push an index tuple to node buffer.
332 : */
333 : void
334 64074 : gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
335 : IndexTuple itup)
336 : {
337 : /*
338 : * Most part of memory operations will be in buffering build persistent
339 : * context. So, let's switch to it.
340 : */
341 64074 : MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
342 :
343 : /*
344 : * If the buffer is currently empty, create the first page.
345 : */
346 64074 : if (nodeBuffer->blocksCount == 0)
347 : {
348 30 : nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
349 30 : nodeBuffer->blocksCount = 1;
350 30 : gistAddLoadedBuffer(gfbb, nodeBuffer);
351 : }
352 :
353 : /* Load last page of node buffer if it wasn't in memory already */
354 64074 : if (!nodeBuffer->pageBuffer)
355 0 : gistLoadNodeBuffer(gfbb, nodeBuffer);
356 :
357 : /*
358 : * Check if there is enough space on the last page for the tuple.
359 : */
360 64074 : if (PAGE_NO_SPACE(nodeBuffer->pageBuffer, itup))
361 : {
362 : /*
363 : * Nope. Swap previous block to disk and allocate a new one.
364 : */
365 : BlockNumber blkno;
366 :
367 : /* Write filled page to the disk */
368 306 : blkno = gistBuffersGetFreeBlock(gfbb);
369 306 : WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
370 :
371 : /*
372 : * Reset the in-memory page as empty, and link the previous block to
373 : * the new page by storing its block number in the prev-link.
374 : */
375 306 : PAGE_FREE_SPACE(nodeBuffer->pageBuffer) =
376 : BLCKSZ - MAXALIGN(offsetof(GISTNodeBufferPage, tupledata));
377 306 : nodeBuffer->pageBuffer->prev = blkno;
378 :
379 : /* We've just added one more page */
380 306 : nodeBuffer->blocksCount++;
381 : }
382 :
383 64074 : gistPlaceItupToPage(nodeBuffer->pageBuffer, itup);
384 :
385 : /*
386 : * If the buffer just overflowed, add it to the emptying queue.
387 : */
388 64074 : if (BUFFER_HALF_FILLED(nodeBuffer, gfbb) && !nodeBuffer->queuedForEmptying)
389 : {
390 0 : gfbb->bufferEmptyingQueue = lcons(nodeBuffer,
391 : gfbb->bufferEmptyingQueue);
392 0 : nodeBuffer->queuedForEmptying = true;
393 : }
394 :
395 : /* Restore memory context */
396 64074 : MemoryContextSwitchTo(oldcxt);
397 64074 : }
398 :
399 : /*
400 : * Removes one index tuple from node buffer. Returns true if success and false
401 : * if node buffer is empty.
402 : */
403 : bool
404 64104 : gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
405 : IndexTuple *itup)
406 : {
407 : /*
408 : * If node buffer is empty then return false.
409 : */
410 64104 : if (nodeBuffer->blocksCount <= 0)
411 30 : return false;
412 :
413 : /* Load last page of node buffer if needed */
414 64074 : if (!nodeBuffer->pageBuffer)
415 18 : gistLoadNodeBuffer(gfbb, nodeBuffer);
416 :
417 : /*
418 : * Get index tuple from last non-empty page.
419 : */
420 64074 : gistGetItupFromPage(nodeBuffer->pageBuffer, itup);
421 :
422 : /*
423 : * If we just removed the last tuple from the page, fetch previous page on
424 : * this node buffer (if any).
425 : */
426 64074 : if (PAGE_IS_EMPTY(nodeBuffer->pageBuffer))
427 : {
428 : BlockNumber prevblkno;
429 :
430 : /*
431 : * blocksCount includes the page in pageBuffer, so decrease it now.
432 : */
433 336 : nodeBuffer->blocksCount--;
434 :
435 : /*
436 : * If there's more pages, fetch previous one.
437 : */
438 336 : prevblkno = nodeBuffer->pageBuffer->prev;
439 336 : if (prevblkno != InvalidBlockNumber)
440 : {
441 : /* There is a previous page. Fetch it. */
442 : Assert(nodeBuffer->blocksCount > 0);
443 306 : ReadTempFileBlock(gfbb->pfile, prevblkno, nodeBuffer->pageBuffer);
444 :
445 : /*
446 : * Now that we've read the block in memory, we can release its
447 : * on-disk block for reuse.
448 : */
449 306 : gistBuffersReleaseBlock(gfbb, prevblkno);
450 : }
451 : else
452 : {
453 : /* No more pages. Free memory. */
454 : Assert(nodeBuffer->blocksCount == 0);
455 30 : pfree(nodeBuffer->pageBuffer);
456 30 : nodeBuffer->pageBuffer = NULL;
457 : }
458 : }
459 64074 : return true;
460 : }
461 :
462 : /*
463 : * Select a currently unused block for writing to.
464 : */
465 : static long
466 324 : gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb)
467 : {
468 : /*
469 : * If there are multiple free blocks, we select the one appearing last in
470 : * freeBlocks[]. If there are none, assign the next block at the end of
471 : * the file (causing the file to be extended).
472 : */
473 324 : if (gfbb->nFreeBlocks > 0)
474 150 : return gfbb->freeBlocks[--gfbb->nFreeBlocks];
475 : else
476 174 : return gfbb->nFileBlocks++;
477 : }
478 :
479 : /*
480 : * Return a block# to the freelist.
481 : */
482 : static void
483 324 : gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum)
484 : {
485 : int ndx;
486 :
487 : /* Enlarge freeBlocks array if full. */
488 324 : if (gfbb->nFreeBlocks >= gfbb->freeBlocksLen)
489 : {
490 0 : gfbb->freeBlocksLen *= 2;
491 0 : gfbb->freeBlocks = (long *) repalloc(gfbb->freeBlocks,
492 0 : gfbb->freeBlocksLen *
493 : sizeof(long));
494 : }
495 :
496 : /* Add blocknum to array */
497 324 : ndx = gfbb->nFreeBlocks++;
498 324 : gfbb->freeBlocks[ndx] = blocknum;
499 324 : }
500 :
501 : /*
502 : * Free buffering build data structure.
503 : */
504 : void
505 6 : gistFreeBuildBuffers(GISTBuildBuffers *gfbb)
506 : {
507 : /* Close buffers file. */
508 6 : BufFileClose(gfbb->pfile);
509 :
510 : /* All other things will be freed on memory context release */
511 6 : }
512 :
513 : /*
514 : * Data structure representing information about node buffer for index tuples
515 : * relocation from split node buffer.
516 : */
517 : typedef struct
518 : {
519 : GISTENTRY entry[INDEX_MAX_KEYS];
520 : bool isnull[INDEX_MAX_KEYS];
521 : GISTPageSplitInfo *splitinfo;
522 : GISTNodeBuffer *nodeBuffer;
523 : } RelocationBufferInfo;
524 :
525 : /*
526 : * At page split, distribute tuples from the buffer of the split page to
527 : * new buffers for the created page halves. This also adjusts the downlinks
528 : * in 'splitinfo' to include the tuples in the buffers.
529 : */
530 : void
531 768 : gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
532 : Relation r, int level,
533 : Buffer buffer, List *splitinfo)
534 : {
535 : RelocationBufferInfo *relocationBuffersInfos;
536 : bool found;
537 : GISTNodeBuffer *nodeBuffer;
538 : BlockNumber blocknum;
539 : IndexTuple itup;
540 768 : int splitPagesCount = 0;
541 : GISTENTRY entry[INDEX_MAX_KEYS];
542 : bool isnull[INDEX_MAX_KEYS];
543 : GISTNodeBuffer oldBuf;
544 : ListCell *lc;
545 :
546 : /* If the split page doesn't have buffers, we have nothing to do. */
547 768 : if (!LEVEL_HAS_BUFFERS(level, gfbb))
548 756 : return;
549 :
550 : /*
551 : * Get the node buffer of the split page.
552 : */
553 12 : blocknum = BufferGetBlockNumber(buffer);
554 12 : nodeBuffer = hash_search(gfbb->nodeBuffersTab, &blocknum,
555 : HASH_FIND, &found);
556 12 : if (!found)
557 : {
558 : /* The page has no buffer, so we have nothing to do. */
559 0 : return;
560 : }
561 :
562 : /*
563 : * Make a copy of the old buffer, as we're going reuse it as the buffer
564 : * for the new left page, which is on the same block as the old page.
565 : * That's not true for the root page, but that's fine because we never
566 : * have a buffer on the root page anyway. The original algorithm as
567 : * described by Arge et al did, but it's of no use, as you might as well
568 : * read the tuples straight from the heap instead of the root buffer.
569 : */
570 : Assert(blocknum != GIST_ROOT_BLKNO);
571 12 : memcpy(&oldBuf, nodeBuffer, sizeof(GISTNodeBuffer));
572 12 : oldBuf.isTemp = true;
573 :
574 : /* Reset the old buffer, used for the new left page from now on */
575 12 : nodeBuffer->blocksCount = 0;
576 12 : nodeBuffer->pageBuffer = NULL;
577 12 : nodeBuffer->pageBlocknum = InvalidBlockNumber;
578 :
579 : /*
580 : * Allocate memory for information about relocation buffers.
581 : */
582 12 : splitPagesCount = list_length(splitinfo);
583 12 : relocationBuffersInfos = palloc_array(RelocationBufferInfo, splitPagesCount);
584 :
585 : /*
586 : * Fill relocation buffers information for node buffers of pages produced
587 : * by split.
588 : */
589 36 : foreach(lc, splitinfo)
590 : {
591 24 : GISTPageSplitInfo *si = (GISTPageSplitInfo *) lfirst(lc);
592 : GISTNodeBuffer *newNodeBuffer;
593 24 : int i = foreach_current_index(lc);
594 :
595 : /* Decompress parent index tuple of node buffer page. */
596 24 : gistDeCompressAtt(giststate, r,
597 : si->downlink, NULL, (OffsetNumber) 0,
598 24 : relocationBuffersInfos[i].entry,
599 24 : relocationBuffersInfos[i].isnull);
600 :
601 : /*
602 : * Create a node buffer for the page. The leftmost half is on the same
603 : * block as the old page before split, so for the leftmost half this
604 : * will return the original buffer. The tuples on the original buffer
605 : * were relinked to the temporary buffer, so the original one is now
606 : * empty.
607 : */
608 24 : newNodeBuffer = gistGetNodeBuffer(gfbb, giststate, BufferGetBlockNumber(si->buf), level);
609 :
610 24 : relocationBuffersInfos[i].nodeBuffer = newNodeBuffer;
611 24 : relocationBuffersInfos[i].splitinfo = si;
612 : }
613 :
614 : /*
615 : * Loop through all index tuples in the buffer of the page being split,
616 : * moving them to buffers for the new pages. We try to move each tuple to
617 : * the page that will result in the lowest penalty for the leading column
618 : * or, in the case of a tie, the lowest penalty for the earliest column
619 : * that is not tied.
620 : *
621 : * The page searching logic is very similar to gistchoose().
622 : */
623 29754 : while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup))
624 : {
625 : float best_penalty[INDEX_MAX_KEYS];
626 : int i,
627 : which;
628 : IndexTuple newtup;
629 : RelocationBufferInfo *targetBufferInfo;
630 :
631 29742 : gistDeCompressAtt(giststate, r,
632 : itup, NULL, (OffsetNumber) 0, entry, isnull);
633 :
634 : /* default to using first page (shouldn't matter) */
635 29742 : which = 0;
636 :
637 : /*
638 : * best_penalty[j] is the best penalty we have seen so far for column
639 : * j, or -1 when we haven't yet examined column j. Array entries to
640 : * the right of the first -1 are undefined.
641 : */
642 29742 : best_penalty[0] = -1;
643 :
644 : /*
645 : * Loop over possible target pages, looking for one to move this tuple
646 : * to.
647 : */
648 89214 : for (i = 0; i < splitPagesCount; i++)
649 : {
650 59478 : RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
651 : bool zero_penalty;
652 : int j;
653 :
654 59478 : zero_penalty = true;
655 :
656 : /* Loop over index attributes. */
657 110580 : for (j = 0; j < IndexRelationGetNumberOfKeyAttributes(r); j++)
658 : {
659 : float usize;
660 :
661 : /* Compute penalty for this column. */
662 59478 : usize = gistpenalty(giststate, j,
663 : &splitPageInfo->entry[j],
664 59478 : splitPageInfo->isnull[j],
665 59478 : &entry[j], isnull[j]);
666 59478 : if (usize > 0)
667 59472 : zero_penalty = false;
668 :
669 59478 : if (best_penalty[j] < 0 || usize < best_penalty[j])
670 : {
671 : /*
672 : * New best penalty for column. Tentatively select this
673 : * page as the target, and record the best penalty. Then
674 : * reset the next column's penalty to "unknown" (and
675 : * indirectly, the same for all the ones to its right).
676 : * This will force us to adopt this page's penalty values
677 : * as the best for all the remaining columns during
678 : * subsequent loop iterations.
679 : */
680 51102 : which = i;
681 51102 : best_penalty[j] = usize;
682 :
683 51102 : if (j < IndexRelationGetNumberOfKeyAttributes(r) - 1)
684 0 : best_penalty[j + 1] = -1;
685 : }
686 8376 : else if (best_penalty[j] == usize)
687 : {
688 : /*
689 : * The current page is exactly as good for this column as
690 : * the best page seen so far. The next iteration of this
691 : * loop will compare the next column.
692 : */
693 : }
694 : else
695 : {
696 : /*
697 : * The current page is worse for this column than the best
698 : * page seen so far. Skip the remaining columns and move
699 : * on to the next page, if any.
700 : */
701 8376 : zero_penalty = false; /* so outer loop won't exit */
702 8376 : break;
703 : }
704 : }
705 :
706 : /*
707 : * If we find a page with zero penalty for all columns, there's no
708 : * need to examine remaining pages; just break out of the loop and
709 : * return it.
710 : */
711 59478 : if (zero_penalty)
712 6 : break;
713 : }
714 :
715 : /* OK, "which" is the page index to push the tuple to */
716 29742 : targetBufferInfo = &relocationBuffersInfos[which];
717 :
718 : /* Push item to selected node buffer */
719 29742 : gistPushItupToNodeBuffer(gfbb, targetBufferInfo->nodeBuffer, itup);
720 :
721 : /* Adjust the downlink for this page, if needed. */
722 29742 : newtup = gistgetadjusted(r, targetBufferInfo->splitinfo->downlink,
723 : itup, giststate);
724 29742 : if (newtup)
725 : {
726 29736 : gistDeCompressAtt(giststate, r,
727 : newtup, NULL, (OffsetNumber) 0,
728 29736 : targetBufferInfo->entry,
729 29736 : targetBufferInfo->isnull);
730 :
731 29736 : targetBufferInfo->splitinfo->downlink = newtup;
732 : }
733 : }
734 :
735 12 : pfree(relocationBuffersInfos);
736 : }
737 :
738 :
739 : /*
740 : * Wrappers around BufFile operations. The main difference is that these
741 : * wrappers report errors with ereport(), so that the callers don't need
742 : * to check the return code.
743 : */
744 :
745 : static void
746 324 : ReadTempFileBlock(BufFile *file, long blknum, void *ptr)
747 : {
748 324 : if (BufFileSeekBlock(file, blknum) != 0)
749 0 : elog(ERROR, "could not seek to block %ld in temporary file", blknum);
750 324 : BufFileReadExact(file, ptr, BLCKSZ);
751 324 : }
752 :
753 : static void
754 324 : WriteTempFileBlock(BufFile *file, long blknum, const void *ptr)
755 : {
756 324 : if (BufFileSeekBlock(file, blknum) != 0)
757 0 : elog(ERROR, "could not seek to block %ld in temporary file", blknum);
758 324 : BufFileWrite(file, ptr, BLCKSZ);
759 324 : }
|