Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bufpage.c
4 : * POSTGRES standard buffer page code.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/storage/page/bufpage.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/htup_details.h"
18 : #include "access/itup.h"
19 : #include "access/xlog.h"
20 : #include "pgstat.h"
21 : #include "storage/checksum.h"
22 : #include "utils/memdebug.h"
23 : #include "utils/memutils.h"
24 :
25 :
26 : /* GUC variable */
27 : bool ignore_checksum_failure = false;
28 :
29 :
30 : /* ----------------------------------------------------------------
31 : * Page support functions
32 : * ----------------------------------------------------------------
33 : */
34 :
35 : /*
36 : * PageInit
37 : * Initializes the contents of a page.
38 : * Note that we don't calculate an initial checksum here; that's not done
39 : * until it's time to write.
40 : */
41 : void
42 435667 : PageInit(Page page, Size pageSize, Size specialSize)
43 : {
44 435667 : PageHeader p = (PageHeader) page;
45 :
46 435667 : specialSize = MAXALIGN(specialSize);
47 :
48 : Assert(pageSize == BLCKSZ);
49 : Assert(pageSize > specialSize + SizeOfPageHeaderData);
50 :
51 : /* Make sure all fields of page are zero, as well as unused space */
52 435667 : MemSet(p, 0, pageSize);
53 :
54 435667 : p->pd_flags = 0;
55 435667 : p->pd_lower = SizeOfPageHeaderData;
56 435667 : p->pd_upper = pageSize - specialSize;
57 435667 : p->pd_special = pageSize - specialSize;
58 435667 : PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION);
59 : /* p->pd_prune_xid = InvalidTransactionId; done by above MemSet */
60 435667 : }
61 :
62 :
63 : /*
64 : * PageIsVerified
65 : * Check that the page header and checksum (if any) appear valid.
66 : *
67 : * This is called when a page has just been read in from disk. The idea is
68 : * to cheaply detect trashed pages before we go nuts following bogus line
69 : * pointers, testing invalid transaction identifiers, etc.
70 : *
71 : * It turns out to be necessary to allow zeroed pages here too. Even though
72 : * this routine is *not* called when deliberately adding a page to a relation,
73 : * there are scenarios in which a zeroed page might be found in a table.
74 : * (Example: a backend extends a relation, then crashes before it can write
75 : * any WAL entry about the new page. The kernel will already have the
76 : * zeroed page in the file, and it will stay that way after restart.) So we
77 : * allow zeroed pages here, and are careful that the page access macros
78 : * treat such a page as empty and without free space. Eventually, VACUUM
79 : * will clean up such a page and make it usable.
80 : *
81 : * If flag PIV_LOG_WARNING/PIV_LOG_LOG is set, a WARNING/LOG message is logged
82 : * in the event of a checksum failure.
83 : *
84 : * If flag PIV_IGNORE_CHECKSUM_FAILURE is set, checksum failures will cause a
85 : * message about the failure to be emitted, but will not cause
86 : * PageIsVerified() to return false.
87 : *
88 : * To allow the caller to report statistics about checksum failures,
89 : * *checksum_failure_p can be passed in. Note that there may be checksum
90 : * failures even if this function returns true, due to
91 : * PIV_IGNORE_CHECKSUM_FAILURE.
92 : */
93 : bool
94 1494125 : PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_failure_p)
95 : {
96 1494125 : const PageHeaderData *p = (const PageHeaderData *) page;
97 : size_t *pagebytes;
98 1494125 : bool checksum_failure = false;
99 1494125 : bool header_sane = false;
100 1494125 : uint16 checksum = 0;
101 :
102 1494125 : if (checksum_failure_p)
103 1494125 : *checksum_failure_p = false;
104 :
105 : /*
106 : * Don't verify page data unless the page passes basic non-zero test
107 : */
108 1494125 : if (!PageIsNew(page))
109 : {
110 : /*
111 : * There shouldn't be any check for interrupt calls happening in this
112 : * codepath, but just to be on the safe side we hold interrupts since
113 : * if they did happen the data checksum state could change during
114 : * verifying checksums, which could lead to incorrect verification
115 : * results.
116 : */
117 1488985 : HOLD_INTERRUPTS();
118 1488985 : if (DataChecksumsNeedVerify())
119 : {
120 1414921 : checksum = pg_checksum_page(page, blkno);
121 :
122 1414921 : if (checksum != p->pd_checksum)
123 : {
124 32 : checksum_failure = true;
125 32 : if (checksum_failure_p)
126 32 : *checksum_failure_p = true;
127 : }
128 : }
129 1488985 : RESUME_INTERRUPTS();
130 :
131 : /*
132 : * The following checks don't prove the header is correct, only that
133 : * it looks sane enough to allow into the buffer pool. Later usage of
134 : * the block can still reveal problems, which is why we offer the
135 : * checksum option.
136 : */
137 1488985 : if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
138 1488985 : p->pd_lower <= p->pd_upper &&
139 1488985 : p->pd_upper <= p->pd_special &&
140 1488985 : p->pd_special <= BLCKSZ &&
141 1488903 : p->pd_special == MAXALIGN(p->pd_special))
142 1488903 : header_sane = true;
143 :
144 1488985 : if (header_sane && !checksum_failure)
145 1488877 : return true;
146 : }
147 :
148 : /* Check all-zeroes case */
149 5248 : pagebytes = (size_t *) page;
150 :
151 5248 : if (pg_memory_is_all_zeros(pagebytes, BLCKSZ))
152 5140 : return true;
153 :
154 : /*
155 : * Throw a WARNING/LOG, as instructed by PIV_LOG_*, if the checksum fails,
156 : * but only after we've checked for the all-zeroes case.
157 : */
158 108 : if (checksum_failure)
159 : {
160 32 : if ((flags & (PIV_LOG_WARNING | PIV_LOG_LOG)) != 0)
161 32 : ereport(flags & PIV_LOG_WARNING ? WARNING : LOG,
162 : (errcode(ERRCODE_DATA_CORRUPTED),
163 : errmsg("page verification failed, calculated checksum %u but expected %u%s",
164 : checksum, p->pd_checksum,
165 : (flags & PIV_ZERO_BUFFERS_ON_ERROR ? ", buffer will be zeroed" : ""))));
166 :
167 32 : if (header_sane && (flags & PIV_IGNORE_CHECKSUM_FAILURE))
168 12 : return true;
169 : }
170 :
171 96 : return false;
172 : }
173 :
174 :
175 : /*
176 : * PageAddItemExtended
177 : *
178 : * Add an item to a page. Return value is the offset at which it was
179 : * inserted, or InvalidOffsetNumber if the item is not inserted for any
180 : * reason. A WARNING is issued indicating the reason for the refusal.
181 : *
182 : * offsetNumber must be either InvalidOffsetNumber to specify finding a
183 : * free line pointer, or a value between FirstOffsetNumber and one past
184 : * the last existing item, to specify using that particular line pointer.
185 : *
186 : * If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
187 : * the item at the specified offsetNumber, which must be either a
188 : * currently-unused line pointer, or one past the last existing item.
189 : *
190 : * If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
191 : * the item at the specified offsetNumber, moving existing items later
192 : * in the array to make room.
193 : *
194 : * If offsetNumber is not valid, then assign a slot by finding the first
195 : * one that is both unused and deallocated.
196 : *
197 : * If flag PAI_IS_HEAP is set, we enforce that there can't be more than
198 : * MaxHeapTuplesPerPage line pointers on the page.
199 : *
200 : * !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
201 : */
202 : OffsetNumber
203 46707257 : PageAddItemExtended(Page page,
204 : const void *item,
205 : Size size,
206 : OffsetNumber offsetNumber,
207 : int flags)
208 : {
209 46707257 : PageHeader phdr = (PageHeader) page;
210 : Size alignedSize;
211 : int lower;
212 : int upper;
213 : ItemId itemId;
214 : OffsetNumber limit;
215 46707257 : bool needshuffle = false;
216 :
217 : /*
218 : * Be wary about corrupted page pointers
219 : */
220 46707257 : if (phdr->pd_lower < SizeOfPageHeaderData ||
221 46707257 : phdr->pd_lower > phdr->pd_upper ||
222 46707257 : phdr->pd_upper > phdr->pd_special ||
223 46707257 : phdr->pd_special > BLCKSZ)
224 0 : ereport(PANIC,
225 : (errcode(ERRCODE_DATA_CORRUPTED),
226 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
227 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
228 :
229 : /*
230 : * Select offsetNumber to place the new item at
231 : */
232 46707257 : limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
233 :
234 : /* was offsetNumber passed in? */
235 46707257 : if (OffsetNumberIsValid(offsetNumber))
236 : {
237 : /* yes, check it */
238 28971512 : if ((flags & PAI_OVERWRITE) != 0)
239 : {
240 1636693 : if (offsetNumber < limit)
241 : {
242 23444 : itemId = PageGetItemId(page, offsetNumber);
243 23444 : if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
244 : {
245 0 : elog(WARNING, "will not overwrite a used ItemId");
246 0 : return InvalidOffsetNumber;
247 : }
248 : }
249 : }
250 : else
251 : {
252 27334819 : if (offsetNumber < limit)
253 4174129 : needshuffle = true; /* need to move existing linp's */
254 : }
255 : }
256 : else
257 : {
258 : /* offsetNumber was not passed in, so find a free slot */
259 : /* if no free slot, we'll put it at limit (1st open slot) */
260 17735745 : if (PageHasFreeLinePointers(page))
261 : {
262 : /*
263 : * Scan line pointer array to locate a "recyclable" (unused)
264 : * ItemId.
265 : *
266 : * Always use earlier items first. PageTruncateLinePointerArray
267 : * can only truncate unused items when they appear as a contiguous
268 : * group at the end of the line pointer array.
269 : */
270 156437 : for (offsetNumber = FirstOffsetNumber;
271 10206525 : offsetNumber < limit; /* limit is maxoff+1 */
272 10050088 : offsetNumber++)
273 : {
274 10196367 : itemId = PageGetItemId(page, offsetNumber);
275 :
276 : /*
277 : * We check for no storage as well, just to be paranoid;
278 : * unused items should never have storage. Assert() that the
279 : * invariant is respected too.
280 : */
281 : Assert(ItemIdIsUsed(itemId) || !ItemIdHasStorage(itemId));
282 :
283 10196367 : if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
284 146279 : break;
285 : }
286 156437 : if (offsetNumber >= limit)
287 : {
288 : /* the hint is wrong, so reset it */
289 10158 : PageClearHasFreeLinePointers(page);
290 : }
291 : }
292 : else
293 : {
294 : /* don't bother searching if hint says there's no free slot */
295 17579308 : offsetNumber = limit;
296 : }
297 : }
298 :
299 : /* Reject placing items beyond the first unused line pointer */
300 46707257 : if (offsetNumber > limit)
301 : {
302 0 : elog(WARNING, "specified item offset is too large");
303 0 : return InvalidOffsetNumber;
304 : }
305 :
306 : /* Reject placing items beyond heap boundary, if heap */
307 46707257 : if ((flags & PAI_IS_HEAP) != 0 && offsetNumber > MaxHeapTuplesPerPage)
308 : {
309 0 : elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
310 0 : return InvalidOffsetNumber;
311 : }
312 :
313 : /*
314 : * Compute new lower and upper pointers for page, see if it'll fit.
315 : *
316 : * Note: do arithmetic as signed ints, to avoid mistakes if, say,
317 : * alignedSize > pd_upper.
318 : */
319 46707257 : if (offsetNumber == limit || needshuffle)
320 46537534 : lower = phdr->pd_lower + sizeof(ItemIdData);
321 : else
322 169723 : lower = phdr->pd_lower;
323 :
324 46707257 : alignedSize = MAXALIGN(size);
325 :
326 46707257 : upper = (int) phdr->pd_upper - (int) alignedSize;
327 :
328 46707257 : if (lower > upper)
329 0 : return InvalidOffsetNumber;
330 :
331 : /*
332 : * OK to insert the item. First, shuffle the existing pointers if needed.
333 : */
334 46707257 : itemId = PageGetItemId(page, offsetNumber);
335 :
336 46707257 : if (needshuffle)
337 4174129 : memmove(itemId + 1, itemId,
338 4174129 : (limit - offsetNumber) * sizeof(ItemIdData));
339 :
340 : /* set the line pointer */
341 46707257 : ItemIdSetNormal(itemId, upper, size);
342 :
343 : /*
344 : * Items normally contain no uninitialized bytes. Core bufpage consumers
345 : * conform, but this is not a necessary coding rule; a new index AM could
346 : * opt to depart from it. However, data type input functions and other
347 : * C-language functions that synthesize datums should initialize all
348 : * bytes; datumIsEqual() relies on this. Testing here, along with the
349 : * similar check in printtup(), helps to catch such mistakes.
350 : *
351 : * Values of the "name" type retrieved via index-only scans may contain
352 : * uninitialized bytes; see comment in btrescan(). Valgrind will report
353 : * this as an error, but it is safe to ignore.
354 : */
355 : VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
356 :
357 : /* copy the item's data onto the page */
358 46707257 : memcpy((char *) page + upper, item, size);
359 :
360 : /* adjust page header */
361 46707257 : phdr->pd_lower = (LocationIndex) lower;
362 46707257 : phdr->pd_upper = (LocationIndex) upper;
363 :
364 46707257 : return offsetNumber;
365 : }
366 :
367 :
368 : /*
369 : * PageGetTempPage
370 : * Get a temporary page in local memory for special processing.
371 : * The returned page is not initialized at all; caller must do that.
372 : */
373 : Page
374 135 : PageGetTempPage(const PageData *page)
375 : {
376 : Size pageSize;
377 : Page temp;
378 :
379 135 : pageSize = PageGetPageSize(page);
380 135 : temp = (Page) palloc(pageSize);
381 :
382 135 : return temp;
383 : }
384 :
385 : /*
386 : * PageGetTempPageCopy
387 : * Get a temporary page in local memory for special processing.
388 : * The page is initialized by copying the contents of the given page.
389 : */
390 : Page
391 7225 : PageGetTempPageCopy(const PageData *page)
392 : {
393 : Size pageSize;
394 : Page temp;
395 :
396 7225 : pageSize = PageGetPageSize(page);
397 7225 : temp = (Page) palloc(pageSize);
398 :
399 7225 : memcpy(temp, page, pageSize);
400 :
401 7225 : return temp;
402 : }
403 :
404 : /*
405 : * PageGetTempPageCopySpecial
406 : * Get a temporary page in local memory for special processing.
407 : * The page is PageInit'd with the same special-space size as the
408 : * given page, and the special space is copied from the given page.
409 : */
410 : Page
411 38041 : PageGetTempPageCopySpecial(const PageData *page)
412 : {
413 : Size pageSize;
414 : Page temp;
415 :
416 38041 : pageSize = PageGetPageSize(page);
417 38041 : temp = (Page) palloc(pageSize);
418 :
419 38041 : PageInit(temp, pageSize, PageGetSpecialSize(page));
420 114123 : memcpy(PageGetSpecialPointer(temp),
421 38041 : PageGetSpecialPointer(page),
422 38041 : PageGetSpecialSize(page));
423 :
424 38041 : return temp;
425 : }
426 :
427 : /*
428 : * PageRestoreTempPage
429 : * Copy temporary page back to permanent page after special processing
430 : * and release the temporary page.
431 : */
432 : void
433 35945 : PageRestoreTempPage(Page tempPage, Page oldPage)
434 : {
435 : Size pageSize;
436 :
437 35945 : pageSize = PageGetPageSize(tempPage);
438 35945 : memcpy(oldPage, tempPage, pageSize);
439 :
440 35945 : pfree(tempPage);
441 35945 : }
442 :
443 : /*
444 : * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
445 : */
446 : typedef struct itemIdCompactData
447 : {
448 : uint16 offsetindex; /* linp array index */
449 : int16 itemoff; /* page offset of item data */
450 : uint16 alignedlen; /* MAXALIGN(item data len) */
451 : } itemIdCompactData;
452 : typedef itemIdCompactData *itemIdCompact;
453 :
454 : /*
455 : * After removing or marking some line pointers unused, move the tuples to
456 : * remove the gaps caused by the removed items and reorder them back into
457 : * reverse line pointer order in the page.
458 : *
459 : * This function can often be fairly hot, so it pays to take some measures to
460 : * make it as optimal as possible.
461 : *
462 : * Callers may pass 'presorted' as true if the 'itemidbase' array is sorted in
463 : * descending order of itemoff. When this is true we can just memmove()
464 : * tuples towards the end of the page. This is quite a common case as it's
465 : * the order that tuples are initially inserted into pages. When we call this
466 : * function to defragment the tuples in the page then any new line pointers
467 : * added to the page will keep that presorted order, so hitting this case is
468 : * still very common for tables that are commonly updated.
469 : *
470 : * When the 'itemidbase' array is not presorted then we're unable to just
471 : * memmove() tuples around freely. Doing so could cause us to overwrite the
472 : * memory belonging to a tuple we've not moved yet. In this case, we copy all
473 : * the tuples that need to be moved into a temporary buffer. We can then
474 : * simply memcpy() out of that temp buffer back into the page at the correct
475 : * location. Tuples are copied back into the page in the same order as the
476 : * 'itemidbase' array, so we end up reordering the tuples back into reverse
477 : * line pointer order. This will increase the chances of hitting the
478 : * presorted case the next time around.
479 : *
480 : * Callers must ensure that nitems is > 0
481 : */
482 : static void
483 77107 : compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
484 : {
485 77107 : PageHeader phdr = (PageHeader) page;
486 : Offset upper;
487 : Offset copy_tail;
488 : Offset copy_head;
489 : itemIdCompact itemidptr;
490 : int i;
491 :
492 : /* Code within will not work correctly if nitems == 0 */
493 : Assert(nitems > 0);
494 :
495 77107 : if (presorted)
496 : {
497 :
498 : #ifdef USE_ASSERT_CHECKING
499 : {
500 : /*
501 : * Verify we've not gotten any new callers that are incorrectly
502 : * passing a true presorted value.
503 : */
504 : Offset lastoff = phdr->pd_special;
505 :
506 : for (i = 0; i < nitems; i++)
507 : {
508 : itemidptr = &itemidbase[i];
509 :
510 : Assert(lastoff > itemidptr->itemoff);
511 :
512 : lastoff = itemidptr->itemoff;
513 : }
514 : }
515 : #endif /* USE_ASSERT_CHECKING */
516 :
517 : /*
518 : * 'itemidbase' is already in the optimal order, i.e, lower item
519 : * pointers have a higher offset. This allows us to memmove() the
520 : * tuples up to the end of the page without having to worry about
521 : * overwriting other tuples that have not been moved yet.
522 : *
523 : * There's a good chance that there are tuples already right at the
524 : * end of the page that we can simply skip over because they're
525 : * already in the correct location within the page. We'll do that
526 : * first...
527 : */
528 56992 : upper = phdr->pd_special;
529 56992 : i = 0;
530 : do
531 : {
532 863169 : itemidptr = &itemidbase[i];
533 863169 : if (upper != itemidptr->itemoff + itemidptr->alignedlen)
534 51207 : break;
535 811962 : upper -= itemidptr->alignedlen;
536 :
537 811962 : i++;
538 811962 : } while (i < nitems);
539 :
540 : /*
541 : * Now that we've found the first tuple that needs to be moved, we can
542 : * do the tuple compactification. We try and make the least number of
543 : * memmove() calls and only call memmove() when there's a gap. When
544 : * we see a gap we just move all tuples after the gap up until the
545 : * point of the last move operation.
546 : */
547 56992 : copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
548 1290702 : for (; i < nitems; i++)
549 : {
550 : ItemId lp;
551 :
552 1233710 : itemidptr = &itemidbase[i];
553 1233710 : lp = PageGetItemId(page, itemidptr->offsetindex + 1);
554 :
555 1233710 : if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
556 : {
557 138985 : memmove((char *) page + upper,
558 138985 : page + copy_head,
559 138985 : copy_tail - copy_head);
560 :
561 : /*
562 : * We've now moved all tuples already seen, but not the
563 : * current tuple, so we set the copy_tail to the end of this
564 : * tuple so it can be moved in another iteration of the loop.
565 : */
566 138985 : copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
567 : }
568 : /* shift the target offset down by the length of this tuple */
569 1233710 : upper -= itemidptr->alignedlen;
570 : /* point the copy_head to the start of this tuple */
571 1233710 : copy_head = itemidptr->itemoff;
572 :
573 : /* update the line pointer to reference the new offset */
574 1233710 : lp->lp_off = upper;
575 : }
576 :
577 : /* move the remaining tuples. */
578 56992 : memmove((char *) page + upper,
579 56992 : page + copy_head,
580 56992 : copy_tail - copy_head);
581 : }
582 : else
583 : {
584 : PGAlignedBlock scratch;
585 20115 : char *scratchptr = scratch.data;
586 :
587 : /*
588 : * Non-presorted case: The tuples in the itemidbase array may be in
589 : * any order. So, in order to move these to the end of the page we
590 : * must make a temp copy of each tuple that needs to be moved before
591 : * we copy them back into the page at the new offset.
592 : *
593 : * If a large percentage of tuples have been pruned (>75%) then we'll
594 : * copy these into the temp buffer tuple-by-tuple, otherwise, we'll
595 : * just do a single memcpy() for all tuples that need to be moved.
596 : * When so many tuples have been removed there's likely to be a lot of
597 : * gaps and it's unlikely that many non-movable tuples remain at the
598 : * end of the page.
599 : */
600 20115 : if (nitems < PageGetMaxOffsetNumber(page) / 4)
601 : {
602 1037 : i = 0;
603 : do
604 : {
605 22321 : itemidptr = &itemidbase[i];
606 22321 : memcpy(scratchptr + itemidptr->itemoff, page + itemidptr->itemoff,
607 22321 : itemidptr->alignedlen);
608 22321 : i++;
609 22321 : } while (i < nitems);
610 :
611 : /* Set things up for the compactification code below */
612 1037 : i = 0;
613 1037 : itemidptr = &itemidbase[0];
614 1037 : upper = phdr->pd_special;
615 : }
616 : else
617 : {
618 19078 : upper = phdr->pd_special;
619 :
620 : /*
621 : * Many tuples are likely to already be in the correct location.
622 : * There's no need to copy these into the temp buffer. Instead
623 : * we'll just skip forward in the itemidbase array to the position
624 : * that we do need to move tuples from so that the code below just
625 : * leaves these ones alone.
626 : */
627 19078 : i = 0;
628 : do
629 : {
630 526937 : itemidptr = &itemidbase[i];
631 526937 : if (upper != itemidptr->itemoff + itemidptr->alignedlen)
632 19078 : break;
633 507859 : upper -= itemidptr->alignedlen;
634 :
635 507859 : i++;
636 507859 : } while (i < nitems);
637 :
638 : /* Copy all tuples that need to be moved into the temp buffer */
639 19078 : memcpy(scratchptr + phdr->pd_upper,
640 19078 : page + phdr->pd_upper,
641 19078 : upper - phdr->pd_upper);
642 : }
643 :
644 : /*
645 : * Do the tuple compactification. itemidptr is already pointing to
646 : * the first tuple that we're going to move. Here we collapse the
647 : * memcpy calls for adjacent tuples into a single call. This is done
648 : * by delaying the memcpy call until we find a gap that needs to be
649 : * closed.
650 : */
651 20115 : copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
652 2173794 : for (; i < nitems; i++)
653 : {
654 : ItemId lp;
655 :
656 2153679 : itemidptr = &itemidbase[i];
657 2153679 : lp = PageGetItemId(page, itemidptr->offsetindex + 1);
658 :
659 : /* copy pending tuples when we detect a gap */
660 2153679 : if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
661 : {
662 600471 : memcpy((char *) page + upper,
663 600471 : scratchptr + copy_head,
664 600471 : copy_tail - copy_head);
665 :
666 : /*
667 : * We've now copied all tuples already seen, but not the
668 : * current tuple, so we set the copy_tail to the end of this
669 : * tuple.
670 : */
671 600471 : copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
672 : }
673 : /* shift the target offset down by the length of this tuple */
674 2153679 : upper -= itemidptr->alignedlen;
675 : /* point the copy_head to the start of this tuple */
676 2153679 : copy_head = itemidptr->itemoff;
677 :
678 : /* update the line pointer to reference the new offset */
679 2153679 : lp->lp_off = upper;
680 : }
681 :
682 : /* Copy the remaining chunk */
683 20115 : memcpy((char *) page + upper,
684 20115 : scratchptr + copy_head,
685 20115 : copy_tail - copy_head);
686 : }
687 :
688 77107 : phdr->pd_upper = upper;
689 77107 : }
690 :
691 : /*
692 : * PageRepairFragmentation
693 : *
694 : * Frees fragmented space on a heap page following pruning.
695 : *
696 : * This routine is usable for heap pages only, but see PageIndexMultiDelete.
697 : *
698 : * This routine removes unused line pointers from the end of the line pointer
699 : * array. This is possible when dead heap-only tuples get removed by pruning,
700 : * especially when there were HOT chains with several tuples each beforehand.
701 : *
702 : * Caller had better have a full cleanup lock on page's buffer. As a side
703 : * effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
704 : * needed. Caller might also need to account for a reduction in the length of
705 : * the line pointer array following array truncation.
706 : */
707 : void
708 77436 : PageRepairFragmentation(Page page)
709 : {
710 77436 : Offset pd_lower = ((PageHeader) page)->pd_lower;
711 77436 : Offset pd_upper = ((PageHeader) page)->pd_upper;
712 77436 : Offset pd_special = ((PageHeader) page)->pd_special;
713 : Offset last_offset;
714 : itemIdCompactData itemidbase[MaxHeapTuplesPerPage];
715 : itemIdCompact itemidptr;
716 : ItemId lp;
717 : int nline,
718 : nstorage,
719 : nunused;
720 77436 : OffsetNumber finalusedlp = InvalidOffsetNumber;
721 : int i;
722 : Size totallen;
723 77436 : bool presorted = true; /* For now */
724 :
725 : /*
726 : * It's worth the trouble to be more paranoid here than in most places,
727 : * because we are about to reshuffle data in (what is usually) a shared
728 : * disk buffer. If we aren't careful then corrupted pointers, lengths,
729 : * etc could cause us to clobber adjacent disk buffers, spreading the data
730 : * loss further. So, check everything.
731 : */
732 77436 : if (pd_lower < SizeOfPageHeaderData ||
733 77436 : pd_lower > pd_upper ||
734 77436 : pd_upper > pd_special ||
735 77436 : pd_special > BLCKSZ ||
736 77436 : pd_special != MAXALIGN(pd_special))
737 0 : ereport(ERROR,
738 : (errcode(ERRCODE_DATA_CORRUPTED),
739 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
740 : pd_lower, pd_upper, pd_special)));
741 :
742 : /*
743 : * Run through the line pointer array and collect data about live items.
744 : */
745 77436 : nline = PageGetMaxOffsetNumber(page);
746 77436 : itemidptr = itemidbase;
747 77436 : nunused = totallen = 0;
748 77436 : last_offset = pd_special;
749 7874529 : for (i = FirstOffsetNumber; i <= nline; i++)
750 : {
751 7797093 : lp = PageGetItemId(page, i);
752 7797093 : if (ItemIdIsUsed(lp))
753 : {
754 7583208 : if (ItemIdHasStorage(lp))
755 : {
756 1917036 : itemidptr->offsetindex = i - 1;
757 1917036 : itemidptr->itemoff = ItemIdGetOffset(lp);
758 :
759 1917036 : if (last_offset > itemidptr->itemoff)
760 1644225 : last_offset = itemidptr->itemoff;
761 : else
762 272811 : presorted = false;
763 :
764 1917036 : if (unlikely(itemidptr->itemoff < (int) pd_upper ||
765 : itemidptr->itemoff >= (int) pd_special))
766 0 : ereport(ERROR,
767 : (errcode(ERRCODE_DATA_CORRUPTED),
768 : errmsg("corrupted line pointer: %u",
769 : itemidptr->itemoff)));
770 1917036 : itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
771 1917036 : totallen += itemidptr->alignedlen;
772 1917036 : itemidptr++;
773 : }
774 :
775 7583208 : finalusedlp = i; /* Could be the final non-LP_UNUSED item */
776 : }
777 : else
778 : {
779 : /* Unused entries should have lp_len = 0, but make sure */
780 : Assert(!ItemIdHasStorage(lp));
781 213885 : ItemIdSetUnused(lp);
782 213885 : nunused++;
783 : }
784 : }
785 :
786 77436 : nstorage = itemidptr - itemidbase;
787 77436 : if (nstorage == 0)
788 : {
789 : /* Page is completely empty, so just reset it quickly */
790 22970 : ((PageHeader) page)->pd_upper = pd_special;
791 : }
792 : else
793 : {
794 : /* Need to compact the page the hard way */
795 54466 : if (totallen > (Size) (pd_special - pd_lower))
796 0 : ereport(ERROR,
797 : (errcode(ERRCODE_DATA_CORRUPTED),
798 : errmsg("corrupted item lengths: total %zu, available space %u",
799 : totallen, pd_special - pd_lower)));
800 :
801 54466 : compactify_tuples(itemidbase, nstorage, page, presorted);
802 : }
803 :
804 77436 : if (finalusedlp != nline)
805 : {
806 : /* The last line pointer is not the last used line pointer */
807 2300 : int nunusedend = nline - finalusedlp;
808 :
809 : Assert(nunused >= nunusedend && nunusedend > 0);
810 :
811 : /* remove trailing unused line pointers from the count */
812 2300 : nunused -= nunusedend;
813 : /* truncate the line pointer array */
814 2300 : ((PageHeader) page)->pd_lower -= (sizeof(ItemIdData) * nunusedend);
815 : }
816 :
817 : /* Set hint bit for PageAddItemExtended */
818 77436 : if (nunused > 0)
819 17242 : PageSetHasFreeLinePointers(page);
820 : else
821 60194 : PageClearHasFreeLinePointers(page);
822 77436 : }
823 :
824 : /*
825 : * PageTruncateLinePointerArray
826 : *
827 : * Removes unused line pointers at the end of the line pointer array.
828 : *
829 : * This routine is usable for heap pages only. It is called by VACUUM during
830 : * its second pass over the heap. We expect at least one LP_UNUSED line
831 : * pointer on the page (if VACUUM didn't have an LP_DEAD item on the page that
832 : * it just set to LP_UNUSED then it should not call here).
833 : *
834 : * We avoid truncating the line pointer array to 0 items, if necessary by
835 : * leaving behind a single remaining LP_UNUSED item. This is a little
836 : * arbitrary, but it seems like a good idea to avoid leaving a PageIsEmpty()
837 : * page behind.
838 : *
839 : * Caller can have either an exclusive lock or a full cleanup lock on page's
840 : * buffer. The page's PD_HAS_FREE_LINES hint bit will be set or unset based
841 : * on whether or not we leave behind any remaining LP_UNUSED items.
842 : */
843 : void
844 16209 : PageTruncateLinePointerArray(Page page)
845 : {
846 16209 : PageHeader phdr = (PageHeader) page;
847 16209 : bool countdone = false,
848 16209 : sethint = false;
849 16209 : int nunusedend = 0;
850 :
851 : /* Scan line pointer array back-to-front */
852 1068454 : for (int i = PageGetMaxOffsetNumber(page); i >= FirstOffsetNumber; i--)
853 : {
854 1067931 : ItemId lp = PageGetItemId(page, i);
855 :
856 1067931 : if (!countdone && i > FirstOffsetNumber)
857 : {
858 : /*
859 : * Still determining which line pointers from the end of the array
860 : * will be truncated away. Either count another line pointer as
861 : * safe to truncate, or notice that it's not safe to truncate
862 : * additional line pointers (stop counting line pointers).
863 : */
864 970620 : if (!ItemIdIsUsed(lp))
865 963195 : nunusedend++;
866 : else
867 7425 : countdone = true;
868 : }
869 : else
870 : {
871 : /*
872 : * Once we've stopped counting we still need to figure out if
873 : * there are any remaining LP_UNUSED line pointers somewhere more
874 : * towards the front of the array.
875 : */
876 97311 : if (!ItemIdIsUsed(lp))
877 : {
878 : /*
879 : * This is an unused line pointer that we won't be truncating
880 : * away -- so there is at least one. Set hint on page.
881 : */
882 15686 : sethint = true;
883 15686 : break;
884 : }
885 : }
886 : }
887 :
888 16209 : if (nunusedend > 0)
889 : {
890 11172 : phdr->pd_lower -= sizeof(ItemIdData) * nunusedend;
891 :
892 : #ifdef CLOBBER_FREED_MEMORY
893 : memset((char *) page + phdr->pd_lower, 0x7F,
894 : sizeof(ItemIdData) * nunusedend);
895 : #endif
896 : }
897 : else
898 : Assert(sethint);
899 :
900 : /* Set hint bit for PageAddItemExtended */
901 16209 : if (sethint)
902 15686 : PageSetHasFreeLinePointers(page);
903 : else
904 523 : PageClearHasFreeLinePointers(page);
905 16209 : }
906 :
907 : /*
908 : * PageGetFreeSpace
909 : * Returns the size of the free (allocatable) space on a page,
910 : * reduced by the space needed for a new line pointer.
911 : *
912 : * Note: this should usually only be used on index pages. Use
913 : * PageGetHeapFreeSpace on heap pages.
914 : */
915 : Size
916 42780920 : PageGetFreeSpace(const PageData *page)
917 : {
918 42780920 : const PageHeaderData *phdr = (const PageHeaderData *) page;
919 : int space;
920 :
921 : /*
922 : * Use signed arithmetic here so that we behave sensibly if pd_lower >
923 : * pd_upper.
924 : */
925 42780920 : space = (int) phdr->pd_upper - (int) phdr->pd_lower;
926 :
927 42780920 : if (space < (int) sizeof(ItemIdData))
928 9864 : return 0;
929 42771056 : space -= sizeof(ItemIdData);
930 :
931 42771056 : return (Size) space;
932 : }
933 :
934 : /*
935 : * PageGetFreeSpaceForMultipleTuples
936 : * Returns the size of the free (allocatable) space on a page,
937 : * reduced by the space needed for multiple new line pointers.
938 : *
939 : * Note: this should usually only be used on index pages. Use
940 : * PageGetHeapFreeSpace on heap pages.
941 : */
942 : Size
943 87607 : PageGetFreeSpaceForMultipleTuples(const PageData *page, int ntups)
944 : {
945 87607 : const PageHeaderData *phdr = (const PageHeaderData *) page;
946 : int space;
947 :
948 : /*
949 : * Use signed arithmetic here so that we behave sensibly if pd_lower >
950 : * pd_upper.
951 : */
952 87607 : space = (int) phdr->pd_upper - (int) phdr->pd_lower;
953 :
954 87607 : if (space < (int) (ntups * sizeof(ItemIdData)))
955 0 : return 0;
956 87607 : space -= ntups * sizeof(ItemIdData);
957 :
958 87607 : return (Size) space;
959 : }
960 :
961 : /*
962 : * PageGetExactFreeSpace
963 : * Returns the size of the free (allocatable) space on a page,
964 : * without any consideration for adding/removing line pointers.
965 : */
966 : Size
967 2147306 : PageGetExactFreeSpace(const PageData *page)
968 : {
969 2147306 : const PageHeaderData *phdr = (const PageHeaderData *) page;
970 : int space;
971 :
972 : /*
973 : * Use signed arithmetic here so that we behave sensibly if pd_lower >
974 : * pd_upper.
975 : */
976 2147306 : space = (int) phdr->pd_upper - (int) phdr->pd_lower;
977 :
978 2147306 : if (space < 0)
979 0 : return 0;
980 :
981 2147306 : return (Size) space;
982 : }
983 :
984 :
985 : /*
986 : * PageGetHeapFreeSpace
987 : * Returns the size of the free (allocatable) space on a page,
988 : * reduced by the space needed for a new line pointer.
989 : *
990 : * The difference between this and PageGetFreeSpace is that this will return
991 : * zero if there are already MaxHeapTuplesPerPage line pointers in the page
992 : * and none are free. We use this to enforce that no more than
993 : * MaxHeapTuplesPerPage line pointers are created on a heap page. (Although
994 : * no more tuples than that could fit anyway, in the presence of redirected
995 : * or dead line pointers it'd be possible to have too many line pointers.
996 : * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
997 : * on the number of line pointers, we make this extra check.)
998 : */
999 : Size
1000 23203097 : PageGetHeapFreeSpace(const PageData *page)
1001 : {
1002 : Size space;
1003 :
1004 23203097 : space = PageGetFreeSpace(page);
1005 23203097 : if (space > 0)
1006 : {
1007 : OffsetNumber offnum,
1008 : nline;
1009 :
1010 : /*
1011 : * Are there already MaxHeapTuplesPerPage line pointers in the page?
1012 : */
1013 23175918 : nline = PageGetMaxOffsetNumber(page);
1014 23175918 : if (nline >= MaxHeapTuplesPerPage)
1015 : {
1016 4295 : if (PageHasFreeLinePointers(page))
1017 : {
1018 : /*
1019 : * Since this is just a hint, we must confirm that there is
1020 : * indeed a free line pointer
1021 : */
1022 407156 : for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
1023 : {
1024 407041 : ItemId lp = PageGetItemId(unconstify(PageData *, page), offnum);
1025 :
1026 407041 : if (!ItemIdIsUsed(lp))
1027 1667 : break;
1028 : }
1029 :
1030 1782 : if (offnum > nline)
1031 : {
1032 : /*
1033 : * The hint is wrong, but we can't clear it here since we
1034 : * don't have the ability to mark the page dirty.
1035 : */
1036 115 : space = 0;
1037 : }
1038 : }
1039 : else
1040 : {
1041 : /*
1042 : * Although the hint might be wrong, PageAddItem will believe
1043 : * it anyway, so we must believe it too.
1044 : */
1045 2513 : space = 0;
1046 : }
1047 : }
1048 : }
1049 23203097 : return space;
1050 : }
1051 :
1052 :
1053 : /*
1054 : * PageIndexTupleDelete
1055 : *
1056 : * This routine does the work of removing a tuple from an index page.
1057 : *
1058 : * Unlike heap pages, we compact out the line pointer for the removed tuple.
1059 : */
1060 : void
1061 627138 : PageIndexTupleDelete(Page page, OffsetNumber offnum)
1062 : {
1063 627138 : PageHeader phdr = (PageHeader) page;
1064 : char *addr;
1065 : ItemId tup;
1066 : Size size;
1067 : unsigned offset;
1068 : int nbytes;
1069 : int offidx;
1070 : int nline;
1071 :
1072 : /*
1073 : * As with PageRepairFragmentation, paranoia seems justified.
1074 : */
1075 627138 : if (phdr->pd_lower < SizeOfPageHeaderData ||
1076 627138 : phdr->pd_lower > phdr->pd_upper ||
1077 627138 : phdr->pd_upper > phdr->pd_special ||
1078 627138 : phdr->pd_special > BLCKSZ ||
1079 627138 : phdr->pd_special != MAXALIGN(phdr->pd_special))
1080 0 : ereport(ERROR,
1081 : (errcode(ERRCODE_DATA_CORRUPTED),
1082 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1083 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1084 :
1085 627138 : nline = PageGetMaxOffsetNumber(page);
1086 627138 : if ((int) offnum <= 0 || (int) offnum > nline)
1087 0 : elog(ERROR, "invalid index offnum: %u", offnum);
1088 :
1089 : /* change offset number to offset index */
1090 627138 : offidx = offnum - 1;
1091 :
1092 627138 : tup = PageGetItemId(page, offnum);
1093 : Assert(ItemIdHasStorage(tup));
1094 627138 : size = ItemIdGetLength(tup);
1095 627138 : offset = ItemIdGetOffset(tup);
1096 :
1097 627138 : if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
1098 627138 : offset != MAXALIGN(offset))
1099 0 : ereport(ERROR,
1100 : (errcode(ERRCODE_DATA_CORRUPTED),
1101 : errmsg("corrupted line pointer: offset = %u, size = %zu",
1102 : offset, size)));
1103 :
1104 : /* Amount of space to actually be deleted */
1105 627138 : size = MAXALIGN(size);
1106 :
1107 : /*
1108 : * First, we want to get rid of the pd_linp entry for the index tuple. We
1109 : * copy all subsequent linp's back one slot in the array. We don't use
1110 : * PageGetItemId, because we are manipulating the _array_, not individual
1111 : * linp's.
1112 : */
1113 627138 : nbytes = phdr->pd_lower -
1114 627138 : ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
1115 :
1116 627138 : if (nbytes > 0)
1117 611315 : memmove(&(phdr->pd_linp[offidx]),
1118 611315 : &(phdr->pd_linp[offidx + 1]),
1119 : nbytes);
1120 :
1121 : /*
1122 : * Now move everything between the old upper bound (beginning of tuple
1123 : * space) and the beginning of the deleted tuple forward, so that space in
1124 : * the middle of the page is left free. If we've just deleted the tuple
1125 : * at the beginning of tuple space, then there's no need to do the copy.
1126 : */
1127 :
1128 : /* beginning of tuple space */
1129 627138 : addr = (char *) page + phdr->pd_upper;
1130 :
1131 627138 : if (offset > phdr->pd_upper)
1132 611919 : memmove(addr + size, addr, offset - phdr->pd_upper);
1133 :
1134 : /* adjust free space boundary pointers */
1135 627138 : phdr->pd_upper += size;
1136 627138 : phdr->pd_lower -= sizeof(ItemIdData);
1137 :
1138 : /*
1139 : * Finally, we need to adjust the linp entries that remain.
1140 : *
1141 : * Anything that used to be before the deleted tuple's data was moved
1142 : * forward by the size of the deleted tuple.
1143 : */
1144 627138 : if (!PageIsEmpty(page))
1145 : {
1146 : int i;
1147 :
1148 626303 : nline--; /* there's one less than when we started */
1149 96625505 : for (i = 1; i <= nline; i++)
1150 : {
1151 95999202 : ItemId ii = PageGetItemId(page, i);
1152 :
1153 : Assert(ItemIdHasStorage(ii));
1154 95999202 : if (ItemIdGetOffset(ii) <= offset)
1155 62693424 : ii->lp_off += size;
1156 : }
1157 : }
1158 627138 : }
1159 :
1160 :
1161 : /*
1162 : * PageIndexMultiDelete
1163 : *
1164 : * This routine handles the case of deleting multiple tuples from an
1165 : * index page at once. It is considerably faster than a loop around
1166 : * PageIndexTupleDelete ... however, the caller *must* supply the array
1167 : * of item numbers to be deleted in item number order!
1168 : */
1169 : void
1170 25274 : PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
1171 : {
1172 25274 : PageHeader phdr = (PageHeader) page;
1173 25274 : Offset pd_lower = phdr->pd_lower;
1174 25274 : Offset pd_upper = phdr->pd_upper;
1175 25274 : Offset pd_special = phdr->pd_special;
1176 : Offset last_offset;
1177 : itemIdCompactData itemidbase[MaxIndexTuplesPerPage];
1178 : ItemIdData newitemids[MaxIndexTuplesPerPage];
1179 : itemIdCompact itemidptr;
1180 : ItemId lp;
1181 : int nline,
1182 : nused;
1183 : Size totallen;
1184 : Size size;
1185 : unsigned offset;
1186 : int nextitm;
1187 : OffsetNumber offnum;
1188 25274 : bool presorted = true; /* For now */
1189 :
1190 : Assert(nitems <= MaxIndexTuplesPerPage);
1191 :
1192 : /*
1193 : * If there aren't very many items to delete, then retail
1194 : * PageIndexTupleDelete is the best way. Delete the items in reverse
1195 : * order so we don't have to think about adjusting item numbers for
1196 : * previous deletions.
1197 : *
1198 : * TODO: tune the magic number here
1199 : */
1200 25274 : if (nitems <= 2)
1201 : {
1202 5511 : while (--nitems >= 0)
1203 3197 : PageIndexTupleDelete(page, itemnos[nitems]);
1204 2314 : return;
1205 : }
1206 :
1207 : /*
1208 : * As with PageRepairFragmentation, paranoia seems justified.
1209 : */
1210 22960 : if (pd_lower < SizeOfPageHeaderData ||
1211 22960 : pd_lower > pd_upper ||
1212 22960 : pd_upper > pd_special ||
1213 22960 : pd_special > BLCKSZ ||
1214 22960 : pd_special != MAXALIGN(pd_special))
1215 0 : ereport(ERROR,
1216 : (errcode(ERRCODE_DATA_CORRUPTED),
1217 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1218 : pd_lower, pd_upper, pd_special)));
1219 :
1220 : /*
1221 : * Scan the line pointer array and build a list of just the ones we are
1222 : * going to keep. Notice we do not modify the page yet, since we are
1223 : * still validity-checking.
1224 : */
1225 22960 : nline = PageGetMaxOffsetNumber(page);
1226 22960 : itemidptr = itemidbase;
1227 22960 : totallen = 0;
1228 22960 : nused = 0;
1229 22960 : nextitm = 0;
1230 22960 : last_offset = pd_special;
1231 5219579 : for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
1232 : {
1233 5196619 : lp = PageGetItemId(page, offnum);
1234 : Assert(ItemIdHasStorage(lp));
1235 5196619 : size = ItemIdGetLength(lp);
1236 5196619 : offset = ItemIdGetOffset(lp);
1237 5196619 : if (offset < pd_upper ||
1238 5196619 : (offset + size) > pd_special ||
1239 5196619 : offset != MAXALIGN(offset))
1240 0 : ereport(ERROR,
1241 : (errcode(ERRCODE_DATA_CORRUPTED),
1242 : errmsg("corrupted line pointer: offset = %u, size = %zu",
1243 : offset, size)));
1244 :
1245 5196619 : if (nextitm < nitems && offnum == itemnos[nextitm])
1246 : {
1247 : /* skip item to be deleted */
1248 2406445 : nextitm++;
1249 : }
1250 : else
1251 : {
1252 2790174 : itemidptr->offsetindex = nused; /* where it will go */
1253 2790174 : itemidptr->itemoff = offset;
1254 :
1255 2790174 : if (last_offset > itemidptr->itemoff)
1256 1445176 : last_offset = itemidptr->itemoff;
1257 : else
1258 1344998 : presorted = false;
1259 :
1260 2790174 : itemidptr->alignedlen = MAXALIGN(size);
1261 2790174 : totallen += itemidptr->alignedlen;
1262 2790174 : newitemids[nused] = *lp;
1263 2790174 : itemidptr++;
1264 2790174 : nused++;
1265 : }
1266 : }
1267 :
1268 : /* this will catch invalid or out-of-order itemnos[] */
1269 22960 : if (nextitm != nitems)
1270 0 : elog(ERROR, "incorrect index offsets supplied");
1271 :
1272 22960 : if (totallen > (Size) (pd_special - pd_lower))
1273 0 : ereport(ERROR,
1274 : (errcode(ERRCODE_DATA_CORRUPTED),
1275 : errmsg("corrupted item lengths: total %zu, available space %u",
1276 : totallen, pd_special - pd_lower)));
1277 :
1278 : /*
1279 : * Looks good. Overwrite the line pointers with the copy, from which we've
1280 : * removed all the unused items.
1281 : */
1282 22960 : memcpy(phdr->pd_linp, newitemids, nused * sizeof(ItemIdData));
1283 22960 : phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
1284 :
1285 : /* and compactify the tuple data */
1286 22960 : if (nused > 0)
1287 22641 : compactify_tuples(itemidbase, nused, page, presorted);
1288 : else
1289 319 : phdr->pd_upper = pd_special;
1290 : }
1291 :
1292 :
1293 : /*
1294 : * PageIndexTupleDeleteNoCompact
1295 : *
1296 : * Remove the specified tuple from an index page, but set its line pointer
1297 : * to "unused" instead of compacting it out, except that it can be removed
1298 : * if it's the last line pointer on the page.
1299 : *
1300 : * This is used for index AMs that require that existing TIDs of live tuples
1301 : * remain unchanged, and are willing to allow unused line pointers instead.
1302 : */
1303 : void
1304 349 : PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
1305 : {
1306 349 : PageHeader phdr = (PageHeader) page;
1307 : char *addr;
1308 : ItemId tup;
1309 : Size size;
1310 : unsigned offset;
1311 : int nline;
1312 :
1313 : /*
1314 : * As with PageRepairFragmentation, paranoia seems justified.
1315 : */
1316 349 : if (phdr->pd_lower < SizeOfPageHeaderData ||
1317 349 : phdr->pd_lower > phdr->pd_upper ||
1318 349 : phdr->pd_upper > phdr->pd_special ||
1319 349 : phdr->pd_special > BLCKSZ ||
1320 349 : phdr->pd_special != MAXALIGN(phdr->pd_special))
1321 0 : ereport(ERROR,
1322 : (errcode(ERRCODE_DATA_CORRUPTED),
1323 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1324 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1325 :
1326 349 : nline = PageGetMaxOffsetNumber(page);
1327 349 : if ((int) offnum <= 0 || (int) offnum > nline)
1328 0 : elog(ERROR, "invalid index offnum: %u", offnum);
1329 :
1330 349 : tup = PageGetItemId(page, offnum);
1331 : Assert(ItemIdHasStorage(tup));
1332 349 : size = ItemIdGetLength(tup);
1333 349 : offset = ItemIdGetOffset(tup);
1334 :
1335 349 : if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
1336 349 : offset != MAXALIGN(offset))
1337 0 : ereport(ERROR,
1338 : (errcode(ERRCODE_DATA_CORRUPTED),
1339 : errmsg("corrupted line pointer: offset = %u, size = %zu",
1340 : offset, size)));
1341 :
1342 : /* Amount of space to actually be deleted */
1343 349 : size = MAXALIGN(size);
1344 :
1345 : /*
1346 : * Either set the line pointer to "unused", or zap it if it's the last
1347 : * one. (Note: it's possible that the next-to-last one(s) are already
1348 : * unused, but we do not trouble to try to compact them out if so.)
1349 : */
1350 349 : if ((int) offnum < nline)
1351 308 : ItemIdSetUnused(tup);
1352 : else
1353 : {
1354 41 : phdr->pd_lower -= sizeof(ItemIdData);
1355 41 : nline--; /* there's one less than when we started */
1356 : }
1357 :
1358 : /*
1359 : * Now move everything between the old upper bound (beginning of tuple
1360 : * space) and the beginning of the deleted tuple forward, so that space in
1361 : * the middle of the page is left free. If we've just deleted the tuple
1362 : * at the beginning of tuple space, then there's no need to do the copy.
1363 : */
1364 :
1365 : /* beginning of tuple space */
1366 349 : addr = (char *) page + phdr->pd_upper;
1367 :
1368 349 : if (offset > phdr->pd_upper)
1369 308 : memmove(addr + size, addr, offset - phdr->pd_upper);
1370 :
1371 : /* adjust free space boundary pointer */
1372 349 : phdr->pd_upper += size;
1373 :
1374 : /*
1375 : * Finally, we need to adjust the linp entries that remain.
1376 : *
1377 : * Anything that used to be before the deleted tuple's data was moved
1378 : * forward by the size of the deleted tuple.
1379 : */
1380 349 : if (!PageIsEmpty(page))
1381 : {
1382 : int i;
1383 :
1384 86604 : for (i = 1; i <= nline; i++)
1385 : {
1386 86261 : ItemId ii = PageGetItemId(page, i);
1387 :
1388 86261 : if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1389 42304 : ii->lp_off += size;
1390 : }
1391 : }
1392 349 : }
1393 :
1394 :
1395 : /*
1396 : * PageIndexTupleOverwrite
1397 : *
1398 : * Replace a specified tuple on an index page.
1399 : *
1400 : * The new tuple is placed exactly where the old one had been, shifting
1401 : * other tuples' data up or down as needed to keep the page compacted.
1402 : * This is better than deleting and reinserting the tuple, because it
1403 : * avoids any data shifting when the tuple size doesn't change; and
1404 : * even when it does, we avoid moving the line pointers around.
1405 : * This could be used by an index AM that doesn't want to unset the
1406 : * LP_DEAD bit when it happens to be set. It could conceivably also be
1407 : * used by an index AM that cares about the physical order of tuples as
1408 : * well as their logical/ItemId order.
1409 : *
1410 : * If there's insufficient space for the new tuple, return false. Other
1411 : * errors represent data-corruption problems, so we just elog.
1412 : */
1413 : bool
1414 579183 : PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
1415 : const void *newtup, Size newsize)
1416 : {
1417 579183 : PageHeader phdr = (PageHeader) page;
1418 : ItemId tupid;
1419 : int oldsize;
1420 : unsigned offset;
1421 : Size alignednewsize;
1422 : int size_diff;
1423 : int itemcount;
1424 :
1425 : /*
1426 : * As with PageRepairFragmentation, paranoia seems justified.
1427 : */
1428 579183 : if (phdr->pd_lower < SizeOfPageHeaderData ||
1429 579183 : phdr->pd_lower > phdr->pd_upper ||
1430 579183 : phdr->pd_upper > phdr->pd_special ||
1431 579183 : phdr->pd_special > BLCKSZ ||
1432 579183 : phdr->pd_special != MAXALIGN(phdr->pd_special))
1433 0 : ereport(ERROR,
1434 : (errcode(ERRCODE_DATA_CORRUPTED),
1435 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1436 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1437 :
1438 579183 : itemcount = PageGetMaxOffsetNumber(page);
1439 579183 : if ((int) offnum <= 0 || (int) offnum > itemcount)
1440 0 : elog(ERROR, "invalid index offnum: %u", offnum);
1441 :
1442 579183 : tupid = PageGetItemId(page, offnum);
1443 : Assert(ItemIdHasStorage(tupid));
1444 579183 : oldsize = ItemIdGetLength(tupid);
1445 579183 : offset = ItemIdGetOffset(tupid);
1446 :
1447 579183 : if (offset < phdr->pd_upper || (offset + oldsize) > phdr->pd_special ||
1448 579183 : offset != MAXALIGN(offset))
1449 0 : ereport(ERROR,
1450 : (errcode(ERRCODE_DATA_CORRUPTED),
1451 : errmsg("corrupted line pointer: offset = %u, size = %d",
1452 : offset, oldsize)));
1453 :
1454 : /*
1455 : * Determine actual change in space requirement, check for page overflow.
1456 : */
1457 579183 : oldsize = MAXALIGN(oldsize);
1458 579183 : alignednewsize = MAXALIGN(newsize);
1459 579183 : if (alignednewsize > oldsize + (phdr->pd_upper - phdr->pd_lower))
1460 0 : return false;
1461 :
1462 : /*
1463 : * Relocate existing data and update line pointers, unless the new tuple
1464 : * is the same size as the old (after alignment), in which case there's
1465 : * nothing to do. Notice that what we have to relocate is data before the
1466 : * target tuple, not data after, so it's convenient to express size_diff
1467 : * as the amount by which the tuple's size is decreasing, making it the
1468 : * delta to add to pd_upper and affected line pointers.
1469 : */
1470 579183 : size_diff = oldsize - (int) alignednewsize;
1471 579183 : if (size_diff != 0)
1472 : {
1473 87363 : char *addr = (char *) page + phdr->pd_upper;
1474 : int i;
1475 :
1476 : /* relocate all tuple data before the target tuple */
1477 87363 : memmove(addr + size_diff, addr, offset - phdr->pd_upper);
1478 :
1479 : /* adjust free space boundary pointer */
1480 87363 : phdr->pd_upper += size_diff;
1481 :
1482 : /* adjust affected line pointers too */
1483 16128383 : for (i = FirstOffsetNumber; i <= itemcount; i++)
1484 : {
1485 16041020 : ItemId ii = PageGetItemId(page, i);
1486 :
1487 : /* Allow items without storage; currently only BRIN needs that */
1488 16041020 : if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1489 8491157 : ii->lp_off += size_diff;
1490 : }
1491 : }
1492 :
1493 : /* Update the item's tuple length without changing its lp_flags field */
1494 579183 : tupid->lp_off = offset + size_diff;
1495 579183 : tupid->lp_len = newsize;
1496 :
1497 : /* Copy new tuple data onto page */
1498 579183 : memcpy(PageGetItem(page, tupid), newtup, newsize);
1499 :
1500 579183 : return true;
1501 : }
1502 :
1503 :
1504 : /*
1505 : * Set checksum on a page.
1506 : *
1507 : * If the page is in shared buffers, it needs to be locked in at least
1508 : * share-exclusive mode.
1509 : *
1510 : * If checksums are disabled, or if the page is not initialized, just
1511 : * return. Otherwise compute and set the checksum.
1512 : *
1513 : * In the past this needed to be done on a copy of the page, due to the
1514 : * possibility of e.g., hint bits being set concurrently. However, this is not
1515 : * necessary anymore as hint bits won't be set while IO is going on.
1516 : */
1517 : void
1518 783140 : PageSetChecksum(Page page, BlockNumber blkno)
1519 : {
1520 783140 : HOLD_INTERRUPTS();
1521 : /* If we don't need a checksum, just return */
1522 783140 : if (PageIsNew(page) || !DataChecksumsNeedWrite())
1523 : {
1524 50632 : RESUME_INTERRUPTS();
1525 50632 : return;
1526 : }
1527 :
1528 732508 : ((PageHeader) page)->pd_checksum = pg_checksum_page(page, blkno);
1529 732508 : RESUME_INTERRUPTS();
1530 : }
|