Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bufpage.c
4 : * POSTGRES standard buffer page code.
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/storage/page/bufpage.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/htup_details.h"
18 : #include "access/itup.h"
19 : #include "access/xlog.h"
20 : #include "pgstat.h"
21 : #include "storage/checksum.h"
22 : #include "utils/memdebug.h"
23 : #include "utils/memutils.h"
24 :
25 :
26 : /* GUC variable */
27 : bool ignore_checksum_failure = false;
28 :
29 :
30 : /* ----------------------------------------------------------------
31 : * Page support functions
32 : * ----------------------------------------------------------------
33 : */
34 :
35 : /*
36 : * PageInit
37 : * Initializes the contents of a page.
38 : * Note that we don't calculate an initial checksum here; that's not done
39 : * until it's time to write.
40 : */
41 : void
42 703888 : PageInit(Page page, Size pageSize, Size specialSize)
43 : {
44 703888 : PageHeader p = (PageHeader) page;
45 :
46 703888 : specialSize = MAXALIGN(specialSize);
47 :
48 : Assert(pageSize == BLCKSZ);
49 : Assert(pageSize > specialSize + SizeOfPageHeaderData);
50 :
51 : /* Make sure all fields of page are zero, as well as unused space */
52 703888 : MemSet(p, 0, pageSize);
53 :
54 703888 : p->pd_flags = 0;
55 703888 : p->pd_lower = SizeOfPageHeaderData;
56 703888 : p->pd_upper = pageSize - specialSize;
57 703888 : p->pd_special = pageSize - specialSize;
58 703888 : PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION);
59 : /* p->pd_prune_xid = InvalidTransactionId; done by above MemSet */
60 703888 : }
61 :
62 :
63 : /*
64 : * PageIsVerified
65 : * Check that the page header and checksum (if any) appear valid.
66 : *
67 : * This is called when a page has just been read in from disk. The idea is
68 : * to cheaply detect trashed pages before we go nuts following bogus line
69 : * pointers, testing invalid transaction identifiers, etc.
70 : *
71 : * It turns out to be necessary to allow zeroed pages here too. Even though
72 : * this routine is *not* called when deliberately adding a page to a relation,
73 : * there are scenarios in which a zeroed page might be found in a table.
74 : * (Example: a backend extends a relation, then crashes before it can write
75 : * any WAL entry about the new page. The kernel will already have the
76 : * zeroed page in the file, and it will stay that way after restart.) So we
77 : * allow zeroed pages here, and are careful that the page access macros
78 : * treat such a page as empty and without free space. Eventually, VACUUM
79 : * will clean up such a page and make it usable.
80 : *
81 : * If flag PIV_LOG_WARNING/PIV_LOG_LOG is set, a WARNING/LOG message is logged
82 : * in the event of a checksum failure.
83 : *
84 : * If flag PIV_IGNORE_CHECKSUM_FAILURE is set, checksum failures will cause a
85 : * message about the failure to be emitted, but will not cause
86 : * PageIsVerified() to return false.
87 : *
88 : * To allow the caller to report statistics about checksum failures,
89 : * *checksum_failure_p can be passed in. Note that there may be checksum
90 : * failures even if this function returns true, due to
91 : * PIV_IGNORE_CHECKSUM_FAILURE.
92 : */
93 : bool
94 2541290 : PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_failure_p)
95 : {
96 2541290 : const PageHeaderData *p = (const PageHeaderData *) page;
97 : size_t *pagebytes;
98 2541290 : bool checksum_failure = false;
99 2541290 : bool header_sane = false;
100 2541290 : uint16 checksum = 0;
101 :
102 2541290 : if (checksum_failure_p)
103 2541290 : *checksum_failure_p = false;
104 :
105 : /*
106 : * Don't verify page data unless the page passes basic non-zero test
107 : */
108 2541290 : if (!PageIsNew(page))
109 : {
110 2532758 : if (DataChecksumsEnabled())
111 : {
112 2516308 : checksum = pg_checksum_page(page, blkno);
113 :
114 2516308 : if (checksum != p->pd_checksum)
115 : {
116 64 : checksum_failure = true;
117 64 : if (checksum_failure_p)
118 64 : *checksum_failure_p = true;
119 : }
120 : }
121 :
122 : /*
123 : * The following checks don't prove the header is correct, only that
124 : * it looks sane enough to allow into the buffer pool. Later usage of
125 : * the block can still reveal problems, which is why we offer the
126 : * checksum option.
127 : */
128 2532758 : if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
129 2532758 : p->pd_lower <= p->pd_upper &&
130 2532758 : p->pd_upper <= p->pd_special &&
131 2532758 : p->pd_special <= BLCKSZ &&
132 2532594 : p->pd_special == MAXALIGN(p->pd_special))
133 2532594 : header_sane = true;
134 :
135 2532758 : if (header_sane && !checksum_failure)
136 2532542 : return true;
137 : }
138 :
139 : /* Check all-zeroes case */
140 8748 : pagebytes = (size_t *) page;
141 :
142 8748 : if (pg_memory_is_all_zeros(pagebytes, BLCKSZ))
143 8532 : return true;
144 :
145 : /*
146 : * Throw a WARNING/LOG, as instructed by PIV_LOG_*, if the checksum fails,
147 : * but only after we've checked for the all-zeroes case.
148 : */
149 216 : if (checksum_failure)
150 : {
151 64 : if ((flags & (PIV_LOG_WARNING | PIV_LOG_LOG)) != 0)
152 64 : ereport(flags & PIV_LOG_WARNING ? WARNING : LOG,
153 : (errcode(ERRCODE_DATA_CORRUPTED),
154 : errmsg("page verification failed, calculated checksum %u but expected %u",
155 : checksum, p->pd_checksum)));
156 :
157 64 : if (header_sane && (flags & PIV_IGNORE_CHECKSUM_FAILURE))
158 24 : return true;
159 : }
160 :
161 192 : return false;
162 : }
163 :
164 :
165 : /*
166 : * PageAddItemExtended
167 : *
168 : * Add an item to a page. Return value is the offset at which it was
169 : * inserted, or InvalidOffsetNumber if the item is not inserted for any
170 : * reason. A WARNING is issued indicating the reason for the refusal.
171 : *
172 : * offsetNumber must be either InvalidOffsetNumber to specify finding a
173 : * free line pointer, or a value between FirstOffsetNumber and one past
174 : * the last existing item, to specify using that particular line pointer.
175 : *
176 : * If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
177 : * the item at the specified offsetNumber, which must be either a
178 : * currently-unused line pointer, or one past the last existing item.
179 : *
180 : * If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
181 : * the item at the specified offsetNumber, moving existing items later
182 : * in the array to make room.
183 : *
184 : * If offsetNumber is not valid, then assign a slot by finding the first
185 : * one that is both unused and deallocated.
186 : *
187 : * If flag PAI_IS_HEAP is set, we enforce that there can't be more than
188 : * MaxHeapTuplesPerPage line pointers on the page.
189 : *
190 : * !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
191 : */
192 : OffsetNumber
193 70386594 : PageAddItemExtended(Page page,
194 : Item item,
195 : Size size,
196 : OffsetNumber offsetNumber,
197 : int flags)
198 : {
199 70386594 : PageHeader phdr = (PageHeader) page;
200 : Size alignedSize;
201 : int lower;
202 : int upper;
203 : ItemId itemId;
204 : OffsetNumber limit;
205 70386594 : bool needshuffle = false;
206 :
207 : /*
208 : * Be wary about corrupted page pointers
209 : */
210 70386594 : if (phdr->pd_lower < SizeOfPageHeaderData ||
211 70386594 : phdr->pd_lower > phdr->pd_upper ||
212 70386594 : phdr->pd_upper > phdr->pd_special ||
213 70386594 : phdr->pd_special > BLCKSZ)
214 0 : ereport(PANIC,
215 : (errcode(ERRCODE_DATA_CORRUPTED),
216 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
217 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
218 :
219 : /*
220 : * Select offsetNumber to place the new item at
221 : */
222 70386594 : limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
223 :
224 : /* was offsetNumber passed in? */
225 70386594 : if (OffsetNumberIsValid(offsetNumber))
226 : {
227 : /* yes, check it */
228 45998958 : if ((flags & PAI_OVERWRITE) != 0)
229 : {
230 3148774 : if (offsetNumber < limit)
231 : {
232 23774 : itemId = PageGetItemId(page, offsetNumber);
233 23774 : if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
234 : {
235 0 : elog(WARNING, "will not overwrite a used ItemId");
236 0 : return InvalidOffsetNumber;
237 : }
238 : }
239 : }
240 : else
241 : {
242 42850184 : if (offsetNumber < limit)
243 6701986 : needshuffle = true; /* need to move existing linp's */
244 : }
245 : }
246 : else
247 : {
248 : /* offsetNumber was not passed in, so find a free slot */
249 : /* if no free slot, we'll put it at limit (1st open slot) */
250 24387636 : if (PageHasFreeLinePointers(page))
251 : {
252 : /*
253 : * Scan line pointer array to locate a "recyclable" (unused)
254 : * ItemId.
255 : *
256 : * Always use earlier items first. PageTruncateLinePointerArray
257 : * can only truncate unused items when they appear as a contiguous
258 : * group at the end of the line pointer array.
259 : */
260 18326526 : for (offsetNumber = FirstOffsetNumber;
261 : offsetNumber < limit; /* limit is maxoff+1 */
262 18050748 : offsetNumber++)
263 : {
264 18311246 : itemId = PageGetItemId(page, offsetNumber);
265 :
266 : /*
267 : * We check for no storage as well, just to be paranoid;
268 : * unused items should never have storage. Assert() that the
269 : * invariant is respected too.
270 : */
271 : Assert(ItemIdIsUsed(itemId) || !ItemIdHasStorage(itemId));
272 :
273 18311246 : if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
274 260498 : break;
275 : }
276 275778 : if (offsetNumber >= limit)
277 : {
278 : /* the hint is wrong, so reset it */
279 15280 : PageClearHasFreeLinePointers(page);
280 : }
281 : }
282 : else
283 : {
284 : /* don't bother searching if hint says there's no free slot */
285 24111858 : offsetNumber = limit;
286 : }
287 : }
288 :
289 : /* Reject placing items beyond the first unused line pointer */
290 70386594 : if (offsetNumber > limit)
291 : {
292 0 : elog(WARNING, "specified item offset is too large");
293 0 : return InvalidOffsetNumber;
294 : }
295 :
296 : /* Reject placing items beyond heap boundary, if heap */
297 70386594 : if ((flags & PAI_IS_HEAP) != 0 && offsetNumber > MaxHeapTuplesPerPage)
298 : {
299 0 : elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
300 0 : return InvalidOffsetNumber;
301 : }
302 :
303 : /*
304 : * Compute new lower and upper pointers for page, see if it'll fit.
305 : *
306 : * Note: do arithmetic as signed ints, to avoid mistakes if, say,
307 : * alignedSize > pd_upper.
308 : */
309 70386594 : if (offsetNumber == limit || needshuffle)
310 70102322 : lower = phdr->pd_lower + sizeof(ItemIdData);
311 : else
312 284272 : lower = phdr->pd_lower;
313 :
314 70386594 : alignedSize = MAXALIGN(size);
315 :
316 70386594 : upper = (int) phdr->pd_upper - (int) alignedSize;
317 :
318 70386594 : if (lower > upper)
319 0 : return InvalidOffsetNumber;
320 :
321 : /*
322 : * OK to insert the item. First, shuffle the existing pointers if needed.
323 : */
324 70386594 : itemId = PageGetItemId(page, offsetNumber);
325 :
326 70386594 : if (needshuffle)
327 6701986 : memmove(itemId + 1, itemId,
328 6701986 : (limit - offsetNumber) * sizeof(ItemIdData));
329 :
330 : /* set the line pointer */
331 70386594 : ItemIdSetNormal(itemId, upper, size);
332 :
333 : /*
334 : * Items normally contain no uninitialized bytes. Core bufpage consumers
335 : * conform, but this is not a necessary coding rule; a new index AM could
336 : * opt to depart from it. However, data type input functions and other
337 : * C-language functions that synthesize datums should initialize all
338 : * bytes; datumIsEqual() relies on this. Testing here, along with the
339 : * similar check in printtup(), helps to catch such mistakes.
340 : *
341 : * Values of the "name" type retrieved via index-only scans may contain
342 : * uninitialized bytes; see comment in btrescan(). Valgrind will report
343 : * this as an error, but it is safe to ignore.
344 : */
345 : VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
346 :
347 : /* copy the item's data onto the page */
348 70386594 : memcpy((char *) page + upper, item, size);
349 :
350 : /* adjust page header */
351 70386594 : phdr->pd_lower = (LocationIndex) lower;
352 70386594 : phdr->pd_upper = (LocationIndex) upper;
353 :
354 70386594 : return offsetNumber;
355 : }
356 :
357 :
358 : /*
359 : * PageGetTempPage
360 : * Get a temporary page in local memory for special processing.
361 : * The returned page is not initialized at all; caller must do that.
362 : */
363 : Page
364 22608 : PageGetTempPage(const PageData *page)
365 : {
366 : Size pageSize;
367 : Page temp;
368 :
369 22608 : pageSize = PageGetPageSize(page);
370 22608 : temp = (Page) palloc(pageSize);
371 :
372 22608 : return temp;
373 : }
374 :
375 : /*
376 : * PageGetTempPageCopy
377 : * Get a temporary page in local memory for special processing.
378 : * The page is initialized by copying the contents of the given page.
379 : */
380 : Page
381 11826 : PageGetTempPageCopy(const PageData *page)
382 : {
383 : Size pageSize;
384 : Page temp;
385 :
386 11826 : pageSize = PageGetPageSize(page);
387 11826 : temp = (Page) palloc(pageSize);
388 :
389 11826 : memcpy(temp, page, pageSize);
390 :
391 11826 : return temp;
392 : }
393 :
394 : /*
395 : * PageGetTempPageCopySpecial
396 : * Get a temporary page in local memory for special processing.
397 : * The page is PageInit'd with the same special-space size as the
398 : * given page, and the special space is copied from the given page.
399 : */
400 : Page
401 60222 : PageGetTempPageCopySpecial(const PageData *page)
402 : {
403 : Size pageSize;
404 : Page temp;
405 :
406 60222 : pageSize = PageGetPageSize(page);
407 60222 : temp = (Page) palloc(pageSize);
408 :
409 60222 : PageInit(temp, pageSize, PageGetSpecialSize(page));
410 180666 : memcpy(PageGetSpecialPointer(temp),
411 60222 : PageGetSpecialPointer(page),
412 60222 : PageGetSpecialSize(page));
413 :
414 60222 : return temp;
415 : }
416 :
417 : /*
418 : * PageRestoreTempPage
419 : * Copy temporary page back to permanent page after special processing
420 : * and release the temporary page.
421 : */
422 : void
423 79578 : PageRestoreTempPage(Page tempPage, Page oldPage)
424 : {
425 : Size pageSize;
426 :
427 79578 : pageSize = PageGetPageSize(tempPage);
428 79578 : memcpy(oldPage, tempPage, pageSize);
429 :
430 79578 : pfree(tempPage);
431 79578 : }
432 :
433 : /*
434 : * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
435 : */
436 : typedef struct itemIdCompactData
437 : {
438 : uint16 offsetindex; /* linp array index */
439 : int16 itemoff; /* page offset of item data */
440 : uint16 alignedlen; /* MAXALIGN(item data len) */
441 : } itemIdCompactData;
442 : typedef itemIdCompactData *itemIdCompact;
443 :
444 : /*
445 : * After removing or marking some line pointers unused, move the tuples to
446 : * remove the gaps caused by the removed items and reorder them back into
447 : * reverse line pointer order in the page.
448 : *
449 : * This function can often be fairly hot, so it pays to take some measures to
450 : * make it as optimal as possible.
451 : *
452 : * Callers may pass 'presorted' as true if the 'itemidbase' array is sorted in
453 : * descending order of itemoff. When this is true we can just memmove()
454 : * tuples towards the end of the page. This is quite a common case as it's
455 : * the order that tuples are initially inserted into pages. When we call this
456 : * function to defragment the tuples in the page then any new line pointers
457 : * added to the page will keep that presorted order, so hitting this case is
458 : * still very common for tables that are commonly updated.
459 : *
460 : * When the 'itemidbase' array is not presorted then we're unable to just
461 : * memmove() tuples around freely. Doing so could cause us to overwrite the
462 : * memory belonging to a tuple we've not moved yet. In this case, we copy all
463 : * the tuples that need to be moved into a temporary buffer. We can then
464 : * simply memcpy() out of that temp buffer back into the page at the correct
465 : * location. Tuples are copied back into the page in the same order as the
466 : * 'itemidbase' array, so we end up reordering the tuples back into reverse
467 : * line pointer order. This will increase the chances of hitting the
468 : * presorted case the next time around.
469 : *
470 : * Callers must ensure that nitems is > 0
471 : */
472 : static void
473 132176 : compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
474 : {
475 132176 : PageHeader phdr = (PageHeader) page;
476 : Offset upper;
477 : Offset copy_tail;
478 : Offset copy_head;
479 : itemIdCompact itemidptr;
480 : int i;
481 :
482 : /* Code within will not work correctly if nitems == 0 */
483 : Assert(nitems > 0);
484 :
485 132176 : if (presorted)
486 : {
487 :
488 : #ifdef USE_ASSERT_CHECKING
489 : {
490 : /*
491 : * Verify we've not gotten any new callers that are incorrectly
492 : * passing a true presorted value.
493 : */
494 : Offset lastoff = phdr->pd_special;
495 :
496 : for (i = 0; i < nitems; i++)
497 : {
498 : itemidptr = &itemidbase[i];
499 :
500 : Assert(lastoff > itemidptr->itemoff);
501 :
502 : lastoff = itemidptr->itemoff;
503 : }
504 : }
505 : #endif /* USE_ASSERT_CHECKING */
506 :
507 : /*
508 : * 'itemidbase' is already in the optimal order, i.e, lower item
509 : * pointers have a higher offset. This allows us to memmove() the
510 : * tuples up to the end of the page without having to worry about
511 : * overwriting other tuples that have not been moved yet.
512 : *
513 : * There's a good chance that there are tuples already right at the
514 : * end of the page that we can simply skip over because they're
515 : * already in the correct location within the page. We'll do that
516 : * first...
517 : */
518 99790 : upper = phdr->pd_special;
519 99790 : i = 0;
520 : do
521 : {
522 1535430 : itemidptr = &itemidbase[i];
523 1535430 : if (upper != itemidptr->itemoff + itemidptr->alignedlen)
524 90496 : break;
525 1444934 : upper -= itemidptr->alignedlen;
526 :
527 1444934 : i++;
528 1444934 : } while (i < nitems);
529 :
530 : /*
531 : * Now that we've found the first tuple that needs to be moved, we can
532 : * do the tuple compactification. We try and make the least number of
533 : * memmove() calls and only call memmove() when there's a gap. When
534 : * we see a gap we just move all tuples after the gap up until the
535 : * point of the last move operation.
536 : */
537 99790 : copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
538 2286618 : for (; i < nitems; i++)
539 : {
540 : ItemId lp;
541 :
542 2186828 : itemidptr = &itemidbase[i];
543 2186828 : lp = PageGetItemId(page, itemidptr->offsetindex + 1);
544 :
545 2186828 : if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
546 : {
547 264924 : memmove((char *) page + upper,
548 264924 : page + copy_head,
549 264924 : copy_tail - copy_head);
550 :
551 : /*
552 : * We've now moved all tuples already seen, but not the
553 : * current tuple, so we set the copy_tail to the end of this
554 : * tuple so it can be moved in another iteration of the loop.
555 : */
556 264924 : copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
557 : }
558 : /* shift the target offset down by the length of this tuple */
559 2186828 : upper -= itemidptr->alignedlen;
560 : /* point the copy_head to the start of this tuple */
561 2186828 : copy_head = itemidptr->itemoff;
562 :
563 : /* update the line pointer to reference the new offset */
564 2186828 : lp->lp_off = upper;
565 : }
566 :
567 : /* move the remaining tuples. */
568 99790 : memmove((char *) page + upper,
569 99790 : page + copy_head,
570 99790 : copy_tail - copy_head);
571 : }
572 : else
573 : {
574 : PGAlignedBlock scratch;
575 32386 : char *scratchptr = scratch.data;
576 :
577 : /*
578 : * Non-presorted case: The tuples in the itemidbase array may be in
579 : * any order. So, in order to move these to the end of the page we
580 : * must make a temp copy of each tuple that needs to be moved before
581 : * we copy them back into the page at the new offset.
582 : *
583 : * If a large percentage of tuples have been pruned (>75%) then we'll
584 : * copy these into the temp buffer tuple-by-tuple, otherwise, we'll
585 : * just do a single memcpy() for all tuples that need to be moved.
586 : * When so many tuples have been removed there's likely to be a lot of
587 : * gaps and it's unlikely that many non-movable tuples remain at the
588 : * end of the page.
589 : */
590 32386 : if (nitems < PageGetMaxOffsetNumber(page) / 4)
591 : {
592 1846 : i = 0;
593 : do
594 : {
595 34940 : itemidptr = &itemidbase[i];
596 34940 : memcpy(scratchptr + itemidptr->itemoff, page + itemidptr->itemoff,
597 34940 : itemidptr->alignedlen);
598 34940 : i++;
599 34940 : } while (i < nitems);
600 :
601 : /* Set things up for the compactification code below */
602 1846 : i = 0;
603 1846 : itemidptr = &itemidbase[0];
604 1846 : upper = phdr->pd_special;
605 : }
606 : else
607 : {
608 30540 : upper = phdr->pd_special;
609 :
610 : /*
611 : * Many tuples are likely to already be in the correct location.
612 : * There's no need to copy these into the temp buffer. Instead
613 : * we'll just skip forward in the itemidbase array to the position
614 : * that we do need to move tuples from so that the code below just
615 : * leaves these ones alone.
616 : */
617 30540 : i = 0;
618 : do
619 : {
620 774486 : itemidptr = &itemidbase[i];
621 774486 : if (upper != itemidptr->itemoff + itemidptr->alignedlen)
622 30540 : break;
623 743946 : upper -= itemidptr->alignedlen;
624 :
625 743946 : i++;
626 743946 : } while (i < nitems);
627 :
628 : /* Copy all tuples that need to be moved into the temp buffer */
629 30540 : memcpy(scratchptr + phdr->pd_upper,
630 30540 : page + phdr->pd_upper,
631 30540 : upper - phdr->pd_upper);
632 : }
633 :
634 : /*
635 : * Do the tuple compactification. itemidptr is already pointing to
636 : * the first tuple that we're going to move. Here we collapse the
637 : * memcpy calls for adjacent tuples into a single call. This is done
638 : * by delaying the memcpy call until we find a gap that needs to be
639 : * closed.
640 : */
641 32386 : copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
642 3489020 : for (; i < nitems; i++)
643 : {
644 : ItemId lp;
645 :
646 3456634 : itemidptr = &itemidbase[i];
647 3456634 : lp = PageGetItemId(page, itemidptr->offsetindex + 1);
648 :
649 : /* copy pending tuples when we detect a gap */
650 3456634 : if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
651 : {
652 947490 : memcpy((char *) page + upper,
653 947490 : scratchptr + copy_head,
654 947490 : copy_tail - copy_head);
655 :
656 : /*
657 : * We've now copied all tuples already seen, but not the
658 : * current tuple, so we set the copy_tail to the end of this
659 : * tuple.
660 : */
661 947490 : copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
662 : }
663 : /* shift the target offset down by the length of this tuple */
664 3456634 : upper -= itemidptr->alignedlen;
665 : /* point the copy_head to the start of this tuple */
666 3456634 : copy_head = itemidptr->itemoff;
667 :
668 : /* update the line pointer to reference the new offset */
669 3456634 : lp->lp_off = upper;
670 : }
671 :
672 : /* Copy the remaining chunk */
673 32386 : memcpy((char *) page + upper,
674 32386 : scratchptr + copy_head,
675 32386 : copy_tail - copy_head);
676 : }
677 :
678 132176 : phdr->pd_upper = upper;
679 132176 : }
680 :
681 : /*
682 : * PageRepairFragmentation
683 : *
684 : * Frees fragmented space on a heap page following pruning.
685 : *
686 : * This routine is usable for heap pages only, but see PageIndexMultiDelete.
687 : *
688 : * This routine removes unused line pointers from the end of the line pointer
689 : * array. This is possible when dead heap-only tuples get removed by pruning,
690 : * especially when there were HOT chains with several tuples each beforehand.
691 : *
692 : * Caller had better have a full cleanup lock on page's buffer. As a side
693 : * effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
694 : * needed. Caller might also need to account for a reduction in the length of
695 : * the line pointer array following array truncation.
696 : */
697 : void
698 118686 : PageRepairFragmentation(Page page)
699 : {
700 118686 : Offset pd_lower = ((PageHeader) page)->pd_lower;
701 118686 : Offset pd_upper = ((PageHeader) page)->pd_upper;
702 118686 : Offset pd_special = ((PageHeader) page)->pd_special;
703 : Offset last_offset;
704 : itemIdCompactData itemidbase[MaxHeapTuplesPerPage];
705 : itemIdCompact itemidptr;
706 : ItemId lp;
707 : int nline,
708 : nstorage,
709 : nunused;
710 118686 : OffsetNumber finalusedlp = InvalidOffsetNumber;
711 : int i;
712 : Size totallen;
713 118686 : bool presorted = true; /* For now */
714 :
715 : /*
716 : * It's worth the trouble to be more paranoid here than in most places,
717 : * because we are about to reshuffle data in (what is usually) a shared
718 : * disk buffer. If we aren't careful then corrupted pointers, lengths,
719 : * etc could cause us to clobber adjacent disk buffers, spreading the data
720 : * loss further. So, check everything.
721 : */
722 118686 : if (pd_lower < SizeOfPageHeaderData ||
723 118686 : pd_lower > pd_upper ||
724 118686 : pd_upper > pd_special ||
725 118686 : pd_special > BLCKSZ ||
726 118686 : pd_special != MAXALIGN(pd_special))
727 0 : ereport(ERROR,
728 : (errcode(ERRCODE_DATA_CORRUPTED),
729 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
730 : pd_lower, pd_upper, pd_special)));
731 :
732 : /*
733 : * Run through the line pointer array and collect data about live items.
734 : */
735 118686 : nline = PageGetMaxOffsetNumber(page);
736 118686 : itemidptr = itemidbase;
737 118686 : nunused = totallen = 0;
738 118686 : last_offset = pd_special;
739 9713500 : for (i = FirstOffsetNumber; i <= nline; i++)
740 : {
741 9594814 : lp = PageGetItemId(page, i);
742 9594814 : if (ItemIdIsUsed(lp))
743 : {
744 9265060 : if (ItemIdHasStorage(lp))
745 : {
746 3509360 : itemidptr->offsetindex = i - 1;
747 3509360 : itemidptr->itemoff = ItemIdGetOffset(lp);
748 :
749 3509360 : if (last_offset > itemidptr->itemoff)
750 2978426 : last_offset = itemidptr->itemoff;
751 : else
752 530934 : presorted = false;
753 :
754 3509360 : if (unlikely(itemidptr->itemoff < (int) pd_upper ||
755 : itemidptr->itemoff >= (int) pd_special))
756 0 : ereport(ERROR,
757 : (errcode(ERRCODE_DATA_CORRUPTED),
758 : errmsg("corrupted line pointer: %u",
759 : itemidptr->itemoff)));
760 3509360 : itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
761 3509360 : totallen += itemidptr->alignedlen;
762 3509360 : itemidptr++;
763 : }
764 :
765 9265060 : finalusedlp = i; /* Could be the final non-LP_UNUSED item */
766 : }
767 : else
768 : {
769 : /* Unused entries should have lp_len = 0, but make sure */
770 : Assert(!ItemIdHasStorage(lp));
771 329754 : ItemIdSetUnused(lp);
772 329754 : nunused++;
773 : }
774 : }
775 :
776 118686 : nstorage = itemidptr - itemidbase;
777 118686 : if (nstorage == 0)
778 : {
779 : /* Page is completely empty, so just reset it quickly */
780 22260 : ((PageHeader) page)->pd_upper = pd_special;
781 : }
782 : else
783 : {
784 : /* Need to compact the page the hard way */
785 96426 : if (totallen > (Size) (pd_special - pd_lower))
786 0 : ereport(ERROR,
787 : (errcode(ERRCODE_DATA_CORRUPTED),
788 : errmsg("corrupted item lengths: total %u, available space %u",
789 : (unsigned int) totallen, pd_special - pd_lower)));
790 :
791 96426 : compactify_tuples(itemidbase, nstorage, page, presorted);
792 : }
793 :
794 118686 : if (finalusedlp != nline)
795 : {
796 : /* The last line pointer is not the last used line pointer */
797 3606 : int nunusedend = nline - finalusedlp;
798 :
799 : Assert(nunused >= nunusedend && nunusedend > 0);
800 :
801 : /* remove trailing unused line pointers from the count */
802 3606 : nunused -= nunusedend;
803 : /* truncate the line pointer array */
804 3606 : ((PageHeader) page)->pd_lower -= (sizeof(ItemIdData) * nunusedend);
805 : }
806 :
807 : /* Set hint bit for PageAddItemExtended */
808 118686 : if (nunused > 0)
809 27630 : PageSetHasFreeLinePointers(page);
810 : else
811 91056 : PageClearHasFreeLinePointers(page);
812 118686 : }
813 :
814 : /*
815 : * PageTruncateLinePointerArray
816 : *
817 : * Removes unused line pointers at the end of the line pointer array.
818 : *
819 : * This routine is usable for heap pages only. It is called by VACUUM during
820 : * its second pass over the heap. We expect at least one LP_UNUSED line
821 : * pointer on the page (if VACUUM didn't have an LP_DEAD item on the page that
822 : * it just set to LP_UNUSED then it should not call here).
823 : *
824 : * We avoid truncating the line pointer array to 0 items, if necessary by
825 : * leaving behind a single remaining LP_UNUSED item. This is a little
826 : * arbitrary, but it seems like a good idea to avoid leaving a PageIsEmpty()
827 : * page behind.
828 : *
829 : * Caller can have either an exclusive lock or a full cleanup lock on page's
830 : * buffer. The page's PD_HAS_FREE_LINES hint bit will be set or unset based
831 : * on whether or not we leave behind any remaining LP_UNUSED items.
832 : */
833 : void
834 24616 : PageTruncateLinePointerArray(Page page)
835 : {
836 24616 : PageHeader phdr = (PageHeader) page;
837 24616 : bool countdone = false,
838 24616 : sethint = false;
839 24616 : int nunusedend = 0;
840 :
841 : /* Scan line pointer array back-to-front */
842 1578504 : for (int i = PageGetMaxOffsetNumber(page); i >= FirstOffsetNumber; i--)
843 : {
844 1577666 : ItemId lp = PageGetItemId(page, i);
845 :
846 1577666 : if (!countdone && i > FirstOffsetNumber)
847 : {
848 : /*
849 : * Still determining which line pointers from the end of the array
850 : * will be truncated away. Either count another line pointer as
851 : * safe to truncate, or notice that it's not safe to truncate
852 : * additional line pointers (stop counting line pointers).
853 : */
854 1398164 : if (!ItemIdIsUsed(lp))
855 1385808 : nunusedend++;
856 : else
857 12356 : countdone = true;
858 : }
859 : else
860 : {
861 : /*
862 : * Once we've stopped counting we still need to figure out if
863 : * there are any remaining LP_UNUSED line pointers somewhere more
864 : * towards the front of the array.
865 : */
866 179502 : if (!ItemIdIsUsed(lp))
867 : {
868 : /*
869 : * This is an unused line pointer that we won't be truncating
870 : * away -- so there is at least one. Set hint on page.
871 : */
872 23778 : sethint = true;
873 23778 : break;
874 : }
875 : }
876 : }
877 :
878 24616 : if (nunusedend > 0)
879 : {
880 15076 : phdr->pd_lower -= sizeof(ItemIdData) * nunusedend;
881 :
882 : #ifdef CLOBBER_FREED_MEMORY
883 : memset((char *) page + phdr->pd_lower, 0x7F,
884 : sizeof(ItemIdData) * nunusedend);
885 : #endif
886 : }
887 : else
888 : Assert(sethint);
889 :
890 : /* Set hint bit for PageAddItemExtended */
891 24616 : if (sethint)
892 23778 : PageSetHasFreeLinePointers(page);
893 : else
894 838 : PageClearHasFreeLinePointers(page);
895 24616 : }
896 :
897 : /*
898 : * PageGetFreeSpace
899 : * Returns the size of the free (allocatable) space on a page,
900 : * reduced by the space needed for a new line pointer.
901 : *
902 : * Note: this should usually only be used on index pages. Use
903 : * PageGetHeapFreeSpace on heap pages.
904 : */
905 : Size
906 59086760 : PageGetFreeSpace(const PageData *page)
907 : {
908 59086760 : const PageHeaderData *phdr = (const PageHeaderData *) page;
909 : int space;
910 :
911 : /*
912 : * Use signed arithmetic here so that we behave sensibly if pd_lower >
913 : * pd_upper.
914 : */
915 59086760 : space = (int) phdr->pd_upper - (int) phdr->pd_lower;
916 :
917 59086760 : if (space < (int) sizeof(ItemIdData))
918 15406 : return 0;
919 59071354 : space -= sizeof(ItemIdData);
920 :
921 59071354 : return (Size) space;
922 : }
923 :
924 : /*
925 : * PageGetFreeSpaceForMultipleTuples
926 : * Returns the size of the free (allocatable) space on a page,
927 : * reduced by the space needed for multiple new line pointers.
928 : *
929 : * Note: this should usually only be used on index pages. Use
930 : * PageGetHeapFreeSpace on heap pages.
931 : */
932 : Size
933 132432 : PageGetFreeSpaceForMultipleTuples(const PageData *page, int ntups)
934 : {
935 132432 : const PageHeaderData *phdr = (const PageHeaderData *) page;
936 : int space;
937 :
938 : /*
939 : * Use signed arithmetic here so that we behave sensibly if pd_lower >
940 : * pd_upper.
941 : */
942 132432 : space = (int) phdr->pd_upper - (int) phdr->pd_lower;
943 :
944 132432 : if (space < (int) (ntups * sizeof(ItemIdData)))
945 0 : return 0;
946 132432 : space -= ntups * sizeof(ItemIdData);
947 :
948 132432 : return (Size) space;
949 : }
950 :
951 : /*
952 : * PageGetExactFreeSpace
953 : * Returns the size of the free (allocatable) space on a page,
954 : * without any consideration for adding/removing line pointers.
955 : */
956 : Size
957 3817130 : PageGetExactFreeSpace(const PageData *page)
958 : {
959 3817130 : const PageHeaderData *phdr = (const PageHeaderData *) page;
960 : int space;
961 :
962 : /*
963 : * Use signed arithmetic here so that we behave sensibly if pd_lower >
964 : * pd_upper.
965 : */
966 3817130 : space = (int) phdr->pd_upper - (int) phdr->pd_lower;
967 :
968 3817130 : if (space < 0)
969 0 : return 0;
970 :
971 3817130 : return (Size) space;
972 : }
973 :
974 :
975 : /*
976 : * PageGetHeapFreeSpace
977 : * Returns the size of the free (allocatable) space on a page,
978 : * reduced by the space needed for a new line pointer.
979 : *
980 : * The difference between this and PageGetFreeSpace is that this will return
981 : * zero if there are already MaxHeapTuplesPerPage line pointers in the page
982 : * and none are free. We use this to enforce that no more than
983 : * MaxHeapTuplesPerPage line pointers are created on a heap page. (Although
984 : * no more tuples than that could fit anyway, in the presence of redirected
985 : * or dead line pointers it'd be possible to have too many line pointers.
986 : * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
987 : * on the number of line pointers, we make this extra check.)
988 : */
989 : Size
990 28583036 : PageGetHeapFreeSpace(const PageData *page)
991 : {
992 : Size space;
993 :
994 28583036 : space = PageGetFreeSpace(page);
995 28583036 : if (space > 0)
996 : {
997 : OffsetNumber offnum,
998 : nline;
999 :
1000 : /*
1001 : * Are there already MaxHeapTuplesPerPage line pointers in the page?
1002 : */
1003 28549512 : nline = PageGetMaxOffsetNumber(page);
1004 28549512 : if (nline >= MaxHeapTuplesPerPage)
1005 : {
1006 6902 : if (PageHasFreeLinePointers(page))
1007 : {
1008 : /*
1009 : * Since this is just a hint, we must confirm that there is
1010 : * indeed a free line pointer
1011 : */
1012 748932 : for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
1013 : {
1014 748724 : ItemId lp = PageGetItemId(unconstify(PageData *, page), offnum);
1015 :
1016 748724 : if (!ItemIdIsUsed(lp))
1017 3472 : break;
1018 : }
1019 :
1020 3680 : if (offnum > nline)
1021 : {
1022 : /*
1023 : * The hint is wrong, but we can't clear it here since we
1024 : * don't have the ability to mark the page dirty.
1025 : */
1026 208 : space = 0;
1027 : }
1028 : }
1029 : else
1030 : {
1031 : /*
1032 : * Although the hint might be wrong, PageAddItem will believe
1033 : * it anyway, so we must believe it too.
1034 : */
1035 3222 : space = 0;
1036 : }
1037 : }
1038 : }
1039 28583036 : return space;
1040 : }
1041 :
1042 :
1043 : /*
1044 : * PageIndexTupleDelete
1045 : *
1046 : * This routine does the work of removing a tuple from an index page.
1047 : *
1048 : * Unlike heap pages, we compact out the line pointer for the removed tuple.
1049 : */
1050 : void
1051 1100470 : PageIndexTupleDelete(Page page, OffsetNumber offnum)
1052 : {
1053 1100470 : PageHeader phdr = (PageHeader) page;
1054 : char *addr;
1055 : ItemId tup;
1056 : Size size;
1057 : unsigned offset;
1058 : int nbytes;
1059 : int offidx;
1060 : int nline;
1061 :
1062 : /*
1063 : * As with PageRepairFragmentation, paranoia seems justified.
1064 : */
1065 1100470 : if (phdr->pd_lower < SizeOfPageHeaderData ||
1066 1100470 : phdr->pd_lower > phdr->pd_upper ||
1067 1100470 : phdr->pd_upper > phdr->pd_special ||
1068 1100470 : phdr->pd_special > BLCKSZ ||
1069 1100470 : phdr->pd_special != MAXALIGN(phdr->pd_special))
1070 0 : ereport(ERROR,
1071 : (errcode(ERRCODE_DATA_CORRUPTED),
1072 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1073 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1074 :
1075 1100470 : nline = PageGetMaxOffsetNumber(page);
1076 1100470 : if ((int) offnum <= 0 || (int) offnum > nline)
1077 0 : elog(ERROR, "invalid index offnum: %u", offnum);
1078 :
1079 : /* change offset number to offset index */
1080 1100470 : offidx = offnum - 1;
1081 :
1082 1100470 : tup = PageGetItemId(page, offnum);
1083 : Assert(ItemIdHasStorage(tup));
1084 1100470 : size = ItemIdGetLength(tup);
1085 1100470 : offset = ItemIdGetOffset(tup);
1086 :
1087 1100470 : if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
1088 1100470 : offset != MAXALIGN(offset))
1089 0 : ereport(ERROR,
1090 : (errcode(ERRCODE_DATA_CORRUPTED),
1091 : errmsg("corrupted line pointer: offset = %u, size = %u",
1092 : offset, (unsigned int) size)));
1093 :
1094 : /* Amount of space to actually be deleted */
1095 1100470 : size = MAXALIGN(size);
1096 :
1097 : /*
1098 : * First, we want to get rid of the pd_linp entry for the index tuple. We
1099 : * copy all subsequent linp's back one slot in the array. We don't use
1100 : * PageGetItemId, because we are manipulating the _array_, not individual
1101 : * linp's.
1102 : */
1103 1100470 : nbytes = phdr->pd_lower -
1104 1100470 : ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
1105 :
1106 1100470 : if (nbytes > 0)
1107 1072216 : memmove(&(phdr->pd_linp[offidx]),
1108 1072216 : &(phdr->pd_linp[offidx + 1]),
1109 : nbytes);
1110 :
1111 : /*
1112 : * Now move everything between the old upper bound (beginning of tuple
1113 : * space) and the beginning of the deleted tuple forward, so that space in
1114 : * the middle of the page is left free. If we've just deleted the tuple
1115 : * at the beginning of tuple space, then there's no need to do the copy.
1116 : */
1117 :
1118 : /* beginning of tuple space */
1119 1100470 : addr = (char *) page + phdr->pd_upper;
1120 :
1121 1100470 : if (offset > phdr->pd_upper)
1122 1073792 : memmove(addr + size, addr, offset - phdr->pd_upper);
1123 :
1124 : /* adjust free space boundary pointers */
1125 1100470 : phdr->pd_upper += size;
1126 1100470 : phdr->pd_lower -= sizeof(ItemIdData);
1127 :
1128 : /*
1129 : * Finally, we need to adjust the linp entries that remain.
1130 : *
1131 : * Anything that used to be before the deleted tuple's data was moved
1132 : * forward by the size of the deleted tuple.
1133 : */
1134 1100470 : if (!PageIsEmpty(page))
1135 : {
1136 : int i;
1137 :
1138 1098768 : nline--; /* there's one less than when we started */
1139 167986522 : for (i = 1; i <= nline; i++)
1140 : {
1141 166887754 : ItemId ii = PageGetItemId(page, i);
1142 :
1143 : Assert(ItemIdHasStorage(ii));
1144 166887754 : if (ItemIdGetOffset(ii) <= offset)
1145 105063706 : ii->lp_off += size;
1146 : }
1147 : }
1148 1100470 : }
1149 :
1150 :
1151 : /*
1152 : * PageIndexMultiDelete
1153 : *
1154 : * This routine handles the case of deleting multiple tuples from an
1155 : * index page at once. It is considerably faster than a loop around
1156 : * PageIndexTupleDelete ... however, the caller *must* supply the array
1157 : * of item numbers to be deleted in item number order!
1158 : */
1159 : void
1160 40292 : PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
1161 : {
1162 40292 : PageHeader phdr = (PageHeader) page;
1163 40292 : Offset pd_lower = phdr->pd_lower;
1164 40292 : Offset pd_upper = phdr->pd_upper;
1165 40292 : Offset pd_special = phdr->pd_special;
1166 : Offset last_offset;
1167 : itemIdCompactData itemidbase[MaxIndexTuplesPerPage];
1168 : ItemIdData newitemids[MaxIndexTuplesPerPage];
1169 : itemIdCompact itemidptr;
1170 : ItemId lp;
1171 : int nline,
1172 : nused;
1173 : Size totallen;
1174 : Size size;
1175 : unsigned offset;
1176 : int nextitm;
1177 : OffsetNumber offnum;
1178 40292 : bool presorted = true; /* For now */
1179 :
1180 : Assert(nitems <= MaxIndexTuplesPerPage);
1181 :
1182 : /*
1183 : * If there aren't very many items to delete, then retail
1184 : * PageIndexTupleDelete is the best way. Delete the items in reverse
1185 : * order so we don't have to think about adjusting item numbers for
1186 : * previous deletions.
1187 : *
1188 : * TODO: tune the magic number here
1189 : */
1190 40292 : if (nitems <= 2)
1191 : {
1192 9356 : while (--nitems >= 0)
1193 5322 : PageIndexTupleDelete(page, itemnos[nitems]);
1194 4034 : return;
1195 : }
1196 :
1197 : /*
1198 : * As with PageRepairFragmentation, paranoia seems justified.
1199 : */
1200 36258 : if (pd_lower < SizeOfPageHeaderData ||
1201 36258 : pd_lower > pd_upper ||
1202 36258 : pd_upper > pd_special ||
1203 36258 : pd_special > BLCKSZ ||
1204 36258 : pd_special != MAXALIGN(pd_special))
1205 0 : ereport(ERROR,
1206 : (errcode(ERRCODE_DATA_CORRUPTED),
1207 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1208 : pd_lower, pd_upper, pd_special)));
1209 :
1210 : /*
1211 : * Scan the line pointer array and build a list of just the ones we are
1212 : * going to keep. Notice we do not modify the page yet, since we are
1213 : * still validity-checking.
1214 : */
1215 36258 : nline = PageGetMaxOffsetNumber(page);
1216 36258 : itemidptr = itemidbase;
1217 36258 : totallen = 0;
1218 36258 : nused = 0;
1219 36258 : nextitm = 0;
1220 36258 : last_offset = pd_special;
1221 8122902 : for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
1222 : {
1223 8086644 : lp = PageGetItemId(page, offnum);
1224 : Assert(ItemIdHasStorage(lp));
1225 8086644 : size = ItemIdGetLength(lp);
1226 8086644 : offset = ItemIdGetOffset(lp);
1227 8086644 : if (offset < pd_upper ||
1228 8086644 : (offset + size) > pd_special ||
1229 8086644 : offset != MAXALIGN(offset))
1230 0 : ereport(ERROR,
1231 : (errcode(ERRCODE_DATA_CORRUPTED),
1232 : errmsg("corrupted line pointer: offset = %u, size = %u",
1233 : offset, (unsigned int) size)));
1234 :
1235 8086644 : if (nextitm < nitems && offnum == itemnos[nextitm])
1236 : {
1237 : /* skip item to be deleted */
1238 3763662 : nextitm++;
1239 : }
1240 : else
1241 : {
1242 4322982 : itemidptr->offsetindex = nused; /* where it will go */
1243 4322982 : itemidptr->itemoff = offset;
1244 :
1245 4322982 : if (last_offset > itemidptr->itemoff)
1246 2242392 : last_offset = itemidptr->itemoff;
1247 : else
1248 2080590 : presorted = false;
1249 :
1250 4322982 : itemidptr->alignedlen = MAXALIGN(size);
1251 4322982 : totallen += itemidptr->alignedlen;
1252 4322982 : newitemids[nused] = *lp;
1253 4322982 : itemidptr++;
1254 4322982 : nused++;
1255 : }
1256 : }
1257 :
1258 : /* this will catch invalid or out-of-order itemnos[] */
1259 36258 : if (nextitm != nitems)
1260 0 : elog(ERROR, "incorrect index offsets supplied");
1261 :
1262 36258 : if (totallen > (Size) (pd_special - pd_lower))
1263 0 : ereport(ERROR,
1264 : (errcode(ERRCODE_DATA_CORRUPTED),
1265 : errmsg("corrupted item lengths: total %u, available space %u",
1266 : (unsigned int) totallen, pd_special - pd_lower)));
1267 :
1268 : /*
1269 : * Looks good. Overwrite the line pointers with the copy, from which we've
1270 : * removed all the unused items.
1271 : */
1272 36258 : memcpy(phdr->pd_linp, newitemids, nused * sizeof(ItemIdData));
1273 36258 : phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
1274 :
1275 : /* and compactify the tuple data */
1276 36258 : if (nused > 0)
1277 35750 : compactify_tuples(itemidbase, nused, page, presorted);
1278 : else
1279 508 : phdr->pd_upper = pd_special;
1280 : }
1281 :
1282 :
1283 : /*
1284 : * PageIndexTupleDeleteNoCompact
1285 : *
1286 : * Remove the specified tuple from an index page, but set its line pointer
1287 : * to "unused" instead of compacting it out, except that it can be removed
1288 : * if it's the last line pointer on the page.
1289 : *
1290 : * This is used for index AMs that require that existing TIDs of live tuples
1291 : * remain unchanged, and are willing to allow unused line pointers instead.
1292 : */
1293 : void
1294 676 : PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
1295 : {
1296 676 : PageHeader phdr = (PageHeader) page;
1297 : char *addr;
1298 : ItemId tup;
1299 : Size size;
1300 : unsigned offset;
1301 : int nline;
1302 :
1303 : /*
1304 : * As with PageRepairFragmentation, paranoia seems justified.
1305 : */
1306 676 : if (phdr->pd_lower < SizeOfPageHeaderData ||
1307 676 : phdr->pd_lower > phdr->pd_upper ||
1308 676 : phdr->pd_upper > phdr->pd_special ||
1309 676 : phdr->pd_special > BLCKSZ ||
1310 676 : phdr->pd_special != MAXALIGN(phdr->pd_special))
1311 0 : ereport(ERROR,
1312 : (errcode(ERRCODE_DATA_CORRUPTED),
1313 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1314 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1315 :
1316 676 : nline = PageGetMaxOffsetNumber(page);
1317 676 : if ((int) offnum <= 0 || (int) offnum > nline)
1318 0 : elog(ERROR, "invalid index offnum: %u", offnum);
1319 :
1320 676 : tup = PageGetItemId(page, offnum);
1321 : Assert(ItemIdHasStorage(tup));
1322 676 : size = ItemIdGetLength(tup);
1323 676 : offset = ItemIdGetOffset(tup);
1324 :
1325 676 : if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
1326 676 : offset != MAXALIGN(offset))
1327 0 : ereport(ERROR,
1328 : (errcode(ERRCODE_DATA_CORRUPTED),
1329 : errmsg("corrupted line pointer: offset = %u, size = %u",
1330 : offset, (unsigned int) size)));
1331 :
1332 : /* Amount of space to actually be deleted */
1333 676 : size = MAXALIGN(size);
1334 :
1335 : /*
1336 : * Either set the line pointer to "unused", or zap it if it's the last
1337 : * one. (Note: it's possible that the next-to-last one(s) are already
1338 : * unused, but we do not trouble to try to compact them out if so.)
1339 : */
1340 676 : if ((int) offnum < nline)
1341 608 : ItemIdSetUnused(tup);
1342 : else
1343 : {
1344 68 : phdr->pd_lower -= sizeof(ItemIdData);
1345 68 : nline--; /* there's one less than when we started */
1346 : }
1347 :
1348 : /*
1349 : * Now move everything between the old upper bound (beginning of tuple
1350 : * space) and the beginning of the deleted tuple forward, so that space in
1351 : * the middle of the page is left free. If we've just deleted the tuple
1352 : * at the beginning of tuple space, then there's no need to do the copy.
1353 : */
1354 :
1355 : /* beginning of tuple space */
1356 676 : addr = (char *) page + phdr->pd_upper;
1357 :
1358 676 : if (offset > phdr->pd_upper)
1359 608 : memmove(addr + size, addr, offset - phdr->pd_upper);
1360 :
1361 : /* adjust free space boundary pointer */
1362 676 : phdr->pd_upper += size;
1363 :
1364 : /*
1365 : * Finally, we need to adjust the linp entries that remain.
1366 : *
1367 : * Anything that used to be before the deleted tuple's data was moved
1368 : * forward by the size of the deleted tuple.
1369 : */
1370 676 : if (!PageIsEmpty(page))
1371 : {
1372 : int i;
1373 :
1374 173014 : for (i = 1; i <= nline; i++)
1375 : {
1376 172348 : ItemId ii = PageGetItemId(page, i);
1377 :
1378 172348 : if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1379 84558 : ii->lp_off += size;
1380 : }
1381 : }
1382 676 : }
1383 :
1384 :
1385 : /*
1386 : * PageIndexTupleOverwrite
1387 : *
1388 : * Replace a specified tuple on an index page.
1389 : *
1390 : * The new tuple is placed exactly where the old one had been, shifting
1391 : * other tuples' data up or down as needed to keep the page compacted.
1392 : * This is better than deleting and reinserting the tuple, because it
1393 : * avoids any data shifting when the tuple size doesn't change; and
1394 : * even when it does, we avoid moving the line pointers around.
1395 : * This could be used by an index AM that doesn't want to unset the
1396 : * LP_DEAD bit when it happens to be set. It could conceivably also be
1397 : * used by an index AM that cares about the physical order of tuples as
1398 : * well as their logical/ItemId order.
1399 : *
1400 : * If there's insufficient space for the new tuple, return false. Other
1401 : * errors represent data-corruption problems, so we just elog.
1402 : */
1403 : bool
1404 887566 : PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
1405 : Item newtup, Size newsize)
1406 : {
1407 887566 : PageHeader phdr = (PageHeader) page;
1408 : ItemId tupid;
1409 : int oldsize;
1410 : unsigned offset;
1411 : Size alignednewsize;
1412 : int size_diff;
1413 : int itemcount;
1414 :
1415 : /*
1416 : * As with PageRepairFragmentation, paranoia seems justified.
1417 : */
1418 887566 : if (phdr->pd_lower < SizeOfPageHeaderData ||
1419 887566 : phdr->pd_lower > phdr->pd_upper ||
1420 887566 : phdr->pd_upper > phdr->pd_special ||
1421 887566 : phdr->pd_special > BLCKSZ ||
1422 887566 : phdr->pd_special != MAXALIGN(phdr->pd_special))
1423 0 : ereport(ERROR,
1424 : (errcode(ERRCODE_DATA_CORRUPTED),
1425 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1426 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1427 :
1428 887566 : itemcount = PageGetMaxOffsetNumber(page);
1429 887566 : if ((int) offnum <= 0 || (int) offnum > itemcount)
1430 0 : elog(ERROR, "invalid index offnum: %u", offnum);
1431 :
1432 887566 : tupid = PageGetItemId(page, offnum);
1433 : Assert(ItemIdHasStorage(tupid));
1434 887566 : oldsize = ItemIdGetLength(tupid);
1435 887566 : offset = ItemIdGetOffset(tupid);
1436 :
1437 887566 : if (offset < phdr->pd_upper || (offset + oldsize) > phdr->pd_special ||
1438 887566 : offset != MAXALIGN(offset))
1439 0 : ereport(ERROR,
1440 : (errcode(ERRCODE_DATA_CORRUPTED),
1441 : errmsg("corrupted line pointer: offset = %u, size = %u",
1442 : offset, (unsigned int) oldsize)));
1443 :
1444 : /*
1445 : * Determine actual change in space requirement, check for page overflow.
1446 : */
1447 887566 : oldsize = MAXALIGN(oldsize);
1448 887566 : alignednewsize = MAXALIGN(newsize);
1449 887566 : if (alignednewsize > oldsize + (phdr->pd_upper - phdr->pd_lower))
1450 0 : return false;
1451 :
1452 : /*
1453 : * Relocate existing data and update line pointers, unless the new tuple
1454 : * is the same size as the old (after alignment), in which case there's
1455 : * nothing to do. Notice that what we have to relocate is data before the
1456 : * target tuple, not data after, so it's convenient to express size_diff
1457 : * as the amount by which the tuple's size is decreasing, making it the
1458 : * delta to add to pd_upper and affected line pointers.
1459 : */
1460 887566 : size_diff = oldsize - (int) alignednewsize;
1461 887566 : if (size_diff != 0)
1462 : {
1463 90662 : char *addr = (char *) page + phdr->pd_upper;
1464 : int i;
1465 :
1466 : /* relocate all tuple data before the target tuple */
1467 90662 : memmove(addr + size_diff, addr, offset - phdr->pd_upper);
1468 :
1469 : /* adjust free space boundary pointer */
1470 90662 : phdr->pd_upper += size_diff;
1471 :
1472 : /* adjust affected line pointers too */
1473 14522754 : for (i = FirstOffsetNumber; i <= itemcount; i++)
1474 : {
1475 14432092 : ItemId ii = PageGetItemId(page, i);
1476 :
1477 : /* Allow items without storage; currently only BRIN needs that */
1478 14432092 : if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1479 6840504 : ii->lp_off += size_diff;
1480 : }
1481 : }
1482 :
1483 : /* Update the item's tuple length without changing its lp_flags field */
1484 887566 : tupid->lp_off = offset + size_diff;
1485 887566 : tupid->lp_len = newsize;
1486 :
1487 : /* Copy new tuple data onto page */
1488 887566 : memcpy(PageGetItem(page, tupid), newtup, newsize);
1489 :
1490 887566 : return true;
1491 : }
1492 :
1493 :
1494 : /*
1495 : * Set checksum for a page in shared buffers.
1496 : *
1497 : * If checksums are disabled, or if the page is not initialized, just return
1498 : * the input. Otherwise, we must make a copy of the page before calculating
1499 : * the checksum, to prevent concurrent modifications (e.g. setting hint bits)
1500 : * from making the final checksum invalid. It doesn't matter if we include or
1501 : * exclude hints during the copy, as long as we write a valid page and
1502 : * associated checksum.
1503 : *
1504 : * Returns a pointer to the block-sized data that needs to be written. Uses
1505 : * statically-allocated memory, so the caller must immediately write the
1506 : * returned page and not refer to it again.
1507 : */
1508 : char *
1509 1040892 : PageSetChecksumCopy(Page page, BlockNumber blkno)
1510 : {
1511 : static char *pageCopy = NULL;
1512 :
1513 : /* If we don't need a checksum, just return the passed-in data */
1514 1040892 : if (PageIsNew(page) || !DataChecksumsEnabled())
1515 21468 : return page;
1516 :
1517 : /*
1518 : * We allocate the copy space once and use it over on each subsequent
1519 : * call. The point of palloc'ing here, rather than having a static char
1520 : * array, is first to ensure adequate alignment for the checksumming code
1521 : * and second to avoid wasting space in processes that never call this.
1522 : */
1523 1019424 : if (pageCopy == NULL)
1524 5308 : pageCopy = MemoryContextAllocAligned(TopMemoryContext,
1525 : BLCKSZ,
1526 : PG_IO_ALIGN_SIZE,
1527 : 0);
1528 :
1529 1019424 : memcpy(pageCopy, page, BLCKSZ);
1530 1019424 : ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno);
1531 1019424 : return pageCopy;
1532 : }
1533 :
1534 : /*
1535 : * Set checksum for a page in private memory.
1536 : *
1537 : * This must only be used when we know that no other process can be modifying
1538 : * the page buffer.
1539 : */
1540 : void
1541 127600 : PageSetChecksumInplace(Page page, BlockNumber blkno)
1542 : {
1543 : /* If we don't need a checksum, just return */
1544 127600 : if (PageIsNew(page) || !DataChecksumsEnabled())
1545 3702 : return;
1546 :
1547 123898 : ((PageHeader) page)->pd_checksum = pg_checksum_page(page, blkno);
1548 : }
|