Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bufpage.c
4 : * POSTGRES standard buffer page code.
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/storage/page/bufpage.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/htup_details.h"
18 : #include "access/itup.h"
19 : #include "access/xlog.h"
20 : #include "pgstat.h"
21 : #include "storage/checksum.h"
22 : #include "utils/memdebug.h"
23 : #include "utils/memutils.h"
24 :
25 :
26 : /* GUC variable */
27 : bool ignore_checksum_failure = false;
28 :
29 :
30 : /* ----------------------------------------------------------------
31 : * Page support functions
32 : * ----------------------------------------------------------------
33 : */
34 :
35 : /*
36 : * PageInit
37 : * Initializes the contents of a page.
38 : * Note that we don't calculate an initial checksum here; that's not done
39 : * until it's time to write.
40 : */
41 : void
42 647200 : PageInit(Page page, Size pageSize, Size specialSize)
43 : {
44 647200 : PageHeader p = (PageHeader) page;
45 :
46 647200 : specialSize = MAXALIGN(specialSize);
47 :
48 : Assert(pageSize == BLCKSZ);
49 : Assert(pageSize > specialSize + SizeOfPageHeaderData);
50 :
51 : /* Make sure all fields of page are zero, as well as unused space */
52 647200 : MemSet(p, 0, pageSize);
53 :
54 647200 : p->pd_flags = 0;
55 647200 : p->pd_lower = SizeOfPageHeaderData;
56 647200 : p->pd_upper = pageSize - specialSize;
57 647200 : p->pd_special = pageSize - specialSize;
58 647200 : PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION);
59 : /* p->pd_prune_xid = InvalidTransactionId; done by above MemSet */
60 647200 : }
61 :
62 :
63 : /*
64 : * PageIsVerifiedExtended
65 : * Check that the page header and checksum (if any) appear valid.
66 : *
67 : * This is called when a page has just been read in from disk. The idea is
68 : * to cheaply detect trashed pages before we go nuts following bogus line
69 : * pointers, testing invalid transaction identifiers, etc.
70 : *
71 : * It turns out to be necessary to allow zeroed pages here too. Even though
72 : * this routine is *not* called when deliberately adding a page to a relation,
73 : * there are scenarios in which a zeroed page might be found in a table.
74 : * (Example: a backend extends a relation, then crashes before it can write
75 : * any WAL entry about the new page. The kernel will already have the
76 : * zeroed page in the file, and it will stay that way after restart.) So we
77 : * allow zeroed pages here, and are careful that the page access macros
78 : * treat such a page as empty and without free space. Eventually, VACUUM
79 : * will clean up such a page and make it usable.
80 : *
81 : * If flag PIV_LOG_WARNING is set, a WARNING is logged in the event of
82 : * a checksum failure.
83 : *
84 : * If flag PIV_REPORT_STAT is set, a checksum failure is reported directly
85 : * to pgstat.
86 : */
87 : bool
88 2331884 : PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
89 : {
90 2331884 : PageHeader p = (PageHeader) page;
91 : size_t *pagebytes;
92 2331884 : bool checksum_failure = false;
93 2331884 : bool header_sane = false;
94 2331884 : uint16 checksum = 0;
95 :
96 : /*
97 : * Don't verify page data unless the page passes basic non-zero test
98 : */
99 2331884 : if (!PageIsNew(page))
100 : {
101 2324534 : if (DataChecksumsEnabled())
102 : {
103 2302918 : checksum = pg_checksum_page((char *) page, blkno);
104 :
105 2302918 : if (checksum != p->pd_checksum)
106 0 : checksum_failure = true;
107 : }
108 :
109 : /*
110 : * The following checks don't prove the header is correct, only that
111 : * it looks sane enough to allow into the buffer pool. Later usage of
112 : * the block can still reveal problems, which is why we offer the
113 : * checksum option.
114 : */
115 2324534 : if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
116 2324534 : p->pd_lower <= p->pd_upper &&
117 2324534 : p->pd_upper <= p->pd_special &&
118 2324534 : p->pd_special <= BLCKSZ &&
119 2324534 : p->pd_special == MAXALIGN(p->pd_special))
120 2324534 : header_sane = true;
121 :
122 2324534 : if (header_sane && !checksum_failure)
123 2324534 : return true;
124 : }
125 :
126 : /* Check all-zeroes case */
127 7350 : pagebytes = (size_t *) page;
128 :
129 7350 : if (pg_memory_is_all_zeros(pagebytes, BLCKSZ))
130 7350 : return true;
131 :
132 : /*
133 : * Throw a WARNING if the checksum fails, but only after we've checked for
134 : * the all-zeroes case.
135 : */
136 0 : if (checksum_failure)
137 : {
138 0 : if ((flags & PIV_LOG_WARNING) != 0)
139 0 : ereport(WARNING,
140 : (errcode(ERRCODE_DATA_CORRUPTED),
141 : errmsg("page verification failed, calculated checksum %u but expected %u",
142 : checksum, p->pd_checksum)));
143 :
144 0 : if ((flags & PIV_REPORT_STAT) != 0)
145 0 : pgstat_report_checksum_failure();
146 :
147 0 : if (header_sane && ignore_checksum_failure)
148 0 : return true;
149 : }
150 :
151 0 : return false;
152 : }
153 :
154 :
155 : /*
156 : * PageAddItemExtended
157 : *
158 : * Add an item to a page. Return value is the offset at which it was
159 : * inserted, or InvalidOffsetNumber if the item is not inserted for any
160 : * reason. A WARNING is issued indicating the reason for the refusal.
161 : *
162 : * offsetNumber must be either InvalidOffsetNumber to specify finding a
163 : * free line pointer, or a value between FirstOffsetNumber and one past
164 : * the last existing item, to specify using that particular line pointer.
165 : *
166 : * If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
167 : * the item at the specified offsetNumber, which must be either a
168 : * currently-unused line pointer, or one past the last existing item.
169 : *
170 : * If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
171 : * the item at the specified offsetNumber, moving existing items later
172 : * in the array to make room.
173 : *
174 : * If offsetNumber is not valid, then assign a slot by finding the first
175 : * one that is both unused and deallocated.
176 : *
177 : * If flag PAI_IS_HEAP is set, we enforce that there can't be more than
178 : * MaxHeapTuplesPerPage line pointers on the page.
179 : *
180 : * !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
181 : */
182 : OffsetNumber
183 65406092 : PageAddItemExtended(Page page,
184 : Item item,
185 : Size size,
186 : OffsetNumber offsetNumber,
187 : int flags)
188 : {
189 65406092 : PageHeader phdr = (PageHeader) page;
190 : Size alignedSize;
191 : int lower;
192 : int upper;
193 : ItemId itemId;
194 : OffsetNumber limit;
195 65406092 : bool needshuffle = false;
196 :
197 : /*
198 : * Be wary about corrupted page pointers
199 : */
200 65406092 : if (phdr->pd_lower < SizeOfPageHeaderData ||
201 65406092 : phdr->pd_lower > phdr->pd_upper ||
202 65406092 : phdr->pd_upper > phdr->pd_special ||
203 65406092 : phdr->pd_special > BLCKSZ)
204 0 : ereport(PANIC,
205 : (errcode(ERRCODE_DATA_CORRUPTED),
206 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
207 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
208 :
209 : /*
210 : * Select offsetNumber to place the new item at
211 : */
212 65406092 : limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
213 :
214 : /* was offsetNumber passed in? */
215 65406092 : if (OffsetNumberIsValid(offsetNumber))
216 : {
217 : /* yes, check it */
218 43271950 : if ((flags & PAI_OVERWRITE) != 0)
219 : {
220 3076276 : if (offsetNumber < limit)
221 : {
222 22164 : itemId = PageGetItemId(page, offsetNumber);
223 22164 : if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
224 : {
225 0 : elog(WARNING, "will not overwrite a used ItemId");
226 0 : return InvalidOffsetNumber;
227 : }
228 : }
229 : }
230 : else
231 : {
232 40195674 : if (offsetNumber < limit)
233 5938658 : needshuffle = true; /* need to move existing linp's */
234 : }
235 : }
236 : else
237 : {
238 : /* offsetNumber was not passed in, so find a free slot */
239 : /* if no free slot, we'll put it at limit (1st open slot) */
240 22134142 : if (PageHasFreeLinePointers(page))
241 : {
242 : /*
243 : * Scan line pointer array to locate a "recyclable" (unused)
244 : * ItemId.
245 : *
246 : * Always use earlier items first. PageTruncateLinePointerArray
247 : * can only truncate unused items when they appear as a contiguous
248 : * group at the end of the line pointer array.
249 : */
250 16604106 : for (offsetNumber = FirstOffsetNumber;
251 : offsetNumber < limit; /* limit is maxoff+1 */
252 16371226 : offsetNumber++)
253 : {
254 16591306 : itemId = PageGetItemId(page, offsetNumber);
255 :
256 : /*
257 : * We check for no storage as well, just to be paranoid;
258 : * unused items should never have storage. Assert() that the
259 : * invariant is respected too.
260 : */
261 : Assert(ItemIdIsUsed(itemId) || !ItemIdHasStorage(itemId));
262 :
263 16591306 : if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
264 220080 : break;
265 : }
266 232880 : if (offsetNumber >= limit)
267 : {
268 : /* the hint is wrong, so reset it */
269 12800 : PageClearHasFreeLinePointers(page);
270 : }
271 : }
272 : else
273 : {
274 : /* don't bother searching if hint says there's no free slot */
275 21901262 : offsetNumber = limit;
276 : }
277 : }
278 :
279 : /* Reject placing items beyond the first unused line pointer */
280 65406092 : if (offsetNumber > limit)
281 : {
282 0 : elog(WARNING, "specified item offset is too large");
283 0 : return InvalidOffsetNumber;
284 : }
285 :
286 : /* Reject placing items beyond heap boundary, if heap */
287 65406092 : if ((flags & PAI_IS_HEAP) != 0 && offsetNumber > MaxHeapTuplesPerPage)
288 : {
289 0 : elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
290 0 : return InvalidOffsetNumber;
291 : }
292 :
293 : /*
294 : * Compute new lower and upper pointers for page, see if it'll fit.
295 : *
296 : * Note: do arithmetic as signed ints, to avoid mistakes if, say,
297 : * alignedSize > pd_upper.
298 : */
299 65406092 : if (offsetNumber == limit || needshuffle)
300 65163848 : lower = phdr->pd_lower + sizeof(ItemIdData);
301 : else
302 242244 : lower = phdr->pd_lower;
303 :
304 65406092 : alignedSize = MAXALIGN(size);
305 :
306 65406092 : upper = (int) phdr->pd_upper - (int) alignedSize;
307 :
308 65406092 : if (lower > upper)
309 0 : return InvalidOffsetNumber;
310 :
311 : /*
312 : * OK to insert the item. First, shuffle the existing pointers if needed.
313 : */
314 65406092 : itemId = PageGetItemId(page, offsetNumber);
315 :
316 65406092 : if (needshuffle)
317 5938658 : memmove(itemId + 1, itemId,
318 5938658 : (limit - offsetNumber) * sizeof(ItemIdData));
319 :
320 : /* set the line pointer */
321 65406092 : ItemIdSetNormal(itemId, upper, size);
322 :
323 : /*
324 : * Items normally contain no uninitialized bytes. Core bufpage consumers
325 : * conform, but this is not a necessary coding rule; a new index AM could
326 : * opt to depart from it. However, data type input functions and other
327 : * C-language functions that synthesize datums should initialize all
328 : * bytes; datumIsEqual() relies on this. Testing here, along with the
329 : * similar check in printtup(), helps to catch such mistakes.
330 : *
331 : * Values of the "name" type retrieved via index-only scans may contain
332 : * uninitialized bytes; see comment in btrescan(). Valgrind will report
333 : * this as an error, but it is safe to ignore.
334 : */
335 : VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
336 :
337 : /* copy the item's data onto the page */
338 65406092 : memcpy((char *) page + upper, item, size);
339 :
340 : /* adjust page header */
341 65406092 : phdr->pd_lower = (LocationIndex) lower;
342 65406092 : phdr->pd_upper = (LocationIndex) upper;
343 :
344 65406092 : return offsetNumber;
345 : }
346 :
347 :
348 : /*
349 : * PageGetTempPage
350 : * Get a temporary page in local memory for special processing.
351 : * The returned page is not initialized at all; caller must do that.
352 : */
353 : Page
354 21872 : PageGetTempPage(Page page)
355 : {
356 : Size pageSize;
357 : Page temp;
358 :
359 21872 : pageSize = PageGetPageSize(page);
360 21872 : temp = (Page) palloc(pageSize);
361 :
362 21872 : return temp;
363 : }
364 :
365 : /*
366 : * PageGetTempPageCopy
367 : * Get a temporary page in local memory for special processing.
368 : * The page is initialized by copying the contents of the given page.
369 : */
370 : Page
371 11280 : PageGetTempPageCopy(Page page)
372 : {
373 : Size pageSize;
374 : Page temp;
375 :
376 11280 : pageSize = PageGetPageSize(page);
377 11280 : temp = (Page) palloc(pageSize);
378 :
379 11280 : memcpy(temp, page, pageSize);
380 :
381 11280 : return temp;
382 : }
383 :
384 : /*
385 : * PageGetTempPageCopySpecial
386 : * Get a temporary page in local memory for special processing.
387 : * The page is PageInit'd with the same special-space size as the
388 : * given page, and the special space is copied from the given page.
389 : */
390 : Page
391 57826 : PageGetTempPageCopySpecial(Page page)
392 : {
393 : Size pageSize;
394 : Page temp;
395 :
396 57826 : pageSize = PageGetPageSize(page);
397 57826 : temp = (Page) palloc(pageSize);
398 :
399 57826 : PageInit(temp, pageSize, PageGetSpecialSize(page));
400 57826 : memcpy(PageGetSpecialPointer(temp),
401 57826 : PageGetSpecialPointer(page),
402 57826 : PageGetSpecialSize(page));
403 :
404 57826 : return temp;
405 : }
406 :
407 : /*
408 : * PageRestoreTempPage
409 : * Copy temporary page back to permanent page after special processing
410 : * and release the temporary page.
411 : */
412 : void
413 76904 : PageRestoreTempPage(Page tempPage, Page oldPage)
414 : {
415 : Size pageSize;
416 :
417 76904 : pageSize = PageGetPageSize(tempPage);
418 76904 : memcpy((char *) oldPage, (char *) tempPage, pageSize);
419 :
420 76904 : pfree(tempPage);
421 76904 : }
422 :
423 : /*
424 : * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
425 : */
426 : typedef struct itemIdCompactData
427 : {
428 : uint16 offsetindex; /* linp array index */
429 : int16 itemoff; /* page offset of item data */
430 : uint16 alignedlen; /* MAXALIGN(item data len) */
431 : } itemIdCompactData;
432 : typedef itemIdCompactData *itemIdCompact;
433 :
434 : /*
435 : * After removing or marking some line pointers unused, move the tuples to
436 : * remove the gaps caused by the removed items and reorder them back into
437 : * reverse line pointer order in the page.
438 : *
439 : * This function can often be fairly hot, so it pays to take some measures to
440 : * make it as optimal as possible.
441 : *
442 : * Callers may pass 'presorted' as true if the 'itemidbase' array is sorted in
443 : * descending order of itemoff. When this is true we can just memmove()
444 : * tuples towards the end of the page. This is quite a common case as it's
445 : * the order that tuples are initially inserted into pages. When we call this
446 : * function to defragment the tuples in the page then any new line pointers
447 : * added to the page will keep that presorted order, so hitting this case is
448 : * still very common for tables that are commonly updated.
449 : *
450 : * When the 'itemidbase' array is not presorted then we're unable to just
451 : * memmove() tuples around freely. Doing so could cause us to overwrite the
452 : * memory belonging to a tuple we've not moved yet. In this case, we copy all
453 : * the tuples that need to be moved into a temporary buffer. We can then
454 : * simply memcpy() out of that temp buffer back into the page at the correct
455 : * location. Tuples are copied back into the page in the same order as the
456 : * 'itemidbase' array, so we end up reordering the tuples back into reverse
457 : * line pointer order. This will increase the chances of hitting the
458 : * presorted case the next time around.
459 : *
460 : * Callers must ensure that nitems is > 0
461 : */
462 : static void
463 117708 : compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
464 : {
465 117708 : PageHeader phdr = (PageHeader) page;
466 : Offset upper;
467 : Offset copy_tail;
468 : Offset copy_head;
469 : itemIdCompact itemidptr;
470 : int i;
471 :
472 : /* Code within will not work correctly if nitems == 0 */
473 : Assert(nitems > 0);
474 :
475 117708 : if (presorted)
476 : {
477 :
478 : #ifdef USE_ASSERT_CHECKING
479 : {
480 : /*
481 : * Verify we've not gotten any new callers that are incorrectly
482 : * passing a true presorted value.
483 : */
484 : Offset lastoff = phdr->pd_special;
485 :
486 : for (i = 0; i < nitems; i++)
487 : {
488 : itemidptr = &itemidbase[i];
489 :
490 : Assert(lastoff > itemidptr->itemoff);
491 :
492 : lastoff = itemidptr->itemoff;
493 : }
494 : }
495 : #endif /* USE_ASSERT_CHECKING */
496 :
497 : /*
498 : * 'itemidbase' is already in the optimal order, i.e, lower item
499 : * pointers have a higher offset. This allows us to memmove() the
500 : * tuples up to the end of the page without having to worry about
501 : * overwriting other tuples that have not been moved yet.
502 : *
503 : * There's a good chance that there are tuples already right at the
504 : * end of the page that we can simply skip over because they're
505 : * already in the correct location within the page. We'll do that
506 : * first...
507 : */
508 89064 : upper = phdr->pd_special;
509 89064 : i = 0;
510 : do
511 : {
512 1348754 : itemidptr = &itemidbase[i];
513 1348754 : if (upper != itemidptr->itemoff + itemidptr->alignedlen)
514 80650 : break;
515 1268104 : upper -= itemidptr->alignedlen;
516 :
517 1268104 : i++;
518 1268104 : } while (i < nitems);
519 :
520 : /*
521 : * Now that we've found the first tuple that needs to be moved, we can
522 : * do the tuple compactification. We try and make the least number of
523 : * memmove() calls and only call memmove() when there's a gap. When
524 : * we see a gap we just move all tuples after the gap up until the
525 : * point of the last move operation.
526 : */
527 89064 : copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
528 1999516 : for (; i < nitems; i++)
529 : {
530 : ItemId lp;
531 :
532 1910452 : itemidptr = &itemidbase[i];
533 1910452 : lp = PageGetItemId(page, itemidptr->offsetindex + 1);
534 :
535 1910452 : if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
536 : {
537 227916 : memmove((char *) page + upper,
538 227916 : page + copy_head,
539 227916 : copy_tail - copy_head);
540 :
541 : /*
542 : * We've now moved all tuples already seen, but not the
543 : * current tuple, so we set the copy_tail to the end of this
544 : * tuple so it can be moved in another iteration of the loop.
545 : */
546 227916 : copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
547 : }
548 : /* shift the target offset down by the length of this tuple */
549 1910452 : upper -= itemidptr->alignedlen;
550 : /* point the copy_head to the start of this tuple */
551 1910452 : copy_head = itemidptr->itemoff;
552 :
553 : /* update the line pointer to reference the new offset */
554 1910452 : lp->lp_off = upper;
555 : }
556 :
557 : /* move the remaining tuples. */
558 89064 : memmove((char *) page + upper,
559 89064 : page + copy_head,
560 89064 : copy_tail - copy_head);
561 : }
562 : else
563 : {
564 : PGAlignedBlock scratch;
565 28644 : char *scratchptr = scratch.data;
566 :
567 : /*
568 : * Non-presorted case: The tuples in the itemidbase array may be in
569 : * any order. So, in order to move these to the end of the page we
570 : * must make a temp copy of each tuple that needs to be moved before
571 : * we copy them back into the page at the new offset.
572 : *
573 : * If a large percentage of tuples have been pruned (>75%) then we'll
574 : * copy these into the temp buffer tuple-by-tuple, otherwise, we'll
575 : * just do a single memcpy() for all tuples that need to be moved.
576 : * When so many tuples have been removed there's likely to be a lot of
577 : * gaps and it's unlikely that many non-movable tuples remain at the
578 : * end of the page.
579 : */
580 28644 : if (nitems < PageGetMaxOffsetNumber(page) / 4)
581 : {
582 1468 : i = 0;
583 : do
584 : {
585 28554 : itemidptr = &itemidbase[i];
586 28554 : memcpy(scratchptr + itemidptr->itemoff, page + itemidptr->itemoff,
587 28554 : itemidptr->alignedlen);
588 28554 : i++;
589 28554 : } while (i < nitems);
590 :
591 : /* Set things up for the compactification code below */
592 1468 : i = 0;
593 1468 : itemidptr = &itemidbase[0];
594 1468 : upper = phdr->pd_special;
595 : }
596 : else
597 : {
598 27176 : upper = phdr->pd_special;
599 :
600 : /*
601 : * Many tuples are likely to already be in the correct location.
602 : * There's no need to copy these into the temp buffer. Instead
603 : * we'll just skip forward in the itemidbase array to the position
604 : * that we do need to move tuples from so that the code below just
605 : * leaves these ones alone.
606 : */
607 27176 : i = 0;
608 : do
609 : {
610 691096 : itemidptr = &itemidbase[i];
611 691096 : if (upper != itemidptr->itemoff + itemidptr->alignedlen)
612 27176 : break;
613 663920 : upper -= itemidptr->alignedlen;
614 :
615 663920 : i++;
616 663920 : } while (i < nitems);
617 :
618 : /* Copy all tuples that need to be moved into the temp buffer */
619 27176 : memcpy(scratchptr + phdr->pd_upper,
620 27176 : page + phdr->pd_upper,
621 27176 : upper - phdr->pd_upper);
622 : }
623 :
624 : /*
625 : * Do the tuple compactification. itemidptr is already pointing to
626 : * the first tuple that we're going to move. Here we collapse the
627 : * memcpy calls for adjacent tuples into a single call. This is done
628 : * by delaying the memcpy call until we find a gap that needs to be
629 : * closed.
630 : */
631 28644 : copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
632 3194814 : for (; i < nitems; i++)
633 : {
634 : ItemId lp;
635 :
636 3166170 : itemidptr = &itemidbase[i];
637 3166170 : lp = PageGetItemId(page, itemidptr->offsetindex + 1);
638 :
639 : /* copy pending tuples when we detect a gap */
640 3166170 : if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
641 : {
642 838198 : memcpy((char *) page + upper,
643 838198 : scratchptr + copy_head,
644 838198 : copy_tail - copy_head);
645 :
646 : /*
647 : * We've now copied all tuples already seen, but not the
648 : * current tuple, so we set the copy_tail to the end of this
649 : * tuple.
650 : */
651 838198 : copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
652 : }
653 : /* shift the target offset down by the length of this tuple */
654 3166170 : upper -= itemidptr->alignedlen;
655 : /* point the copy_head to the start of this tuple */
656 3166170 : copy_head = itemidptr->itemoff;
657 :
658 : /* update the line pointer to reference the new offset */
659 3166170 : lp->lp_off = upper;
660 : }
661 :
662 : /* Copy the remaining chunk */
663 28644 : memcpy((char *) page + upper,
664 28644 : scratchptr + copy_head,
665 28644 : copy_tail - copy_head);
666 : }
667 :
668 117708 : phdr->pd_upper = upper;
669 117708 : }
670 :
671 : /*
672 : * PageRepairFragmentation
673 : *
674 : * Frees fragmented space on a heap page following pruning.
675 : *
676 : * This routine is usable for heap pages only, but see PageIndexMultiDelete.
677 : *
678 : * This routine removes unused line pointers from the end of the line pointer
679 : * array. This is possible when dead heap-only tuples get removed by pruning,
680 : * especially when there were HOT chains with several tuples each beforehand.
681 : *
682 : * Caller had better have a full cleanup lock on page's buffer. As a side
683 : * effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
684 : * needed. Caller might also need to account for a reduction in the length of
685 : * the line pointer array following array truncation.
686 : */
687 : void
688 107034 : PageRepairFragmentation(Page page)
689 : {
690 107034 : Offset pd_lower = ((PageHeader) page)->pd_lower;
691 107034 : Offset pd_upper = ((PageHeader) page)->pd_upper;
692 107034 : Offset pd_special = ((PageHeader) page)->pd_special;
693 : Offset last_offset;
694 : itemIdCompactData itemidbase[MaxHeapTuplesPerPage];
695 : itemIdCompact itemidptr;
696 : ItemId lp;
697 : int nline,
698 : nstorage,
699 : nunused;
700 107034 : OffsetNumber finalusedlp = InvalidOffsetNumber;
701 : int i;
702 : Size totallen;
703 107034 : bool presorted = true; /* For now */
704 :
705 : /*
706 : * It's worth the trouble to be more paranoid here than in most places,
707 : * because we are about to reshuffle data in (what is usually) a shared
708 : * disk buffer. If we aren't careful then corrupted pointers, lengths,
709 : * etc could cause us to clobber adjacent disk buffers, spreading the data
710 : * loss further. So, check everything.
711 : */
712 107034 : if (pd_lower < SizeOfPageHeaderData ||
713 107034 : pd_lower > pd_upper ||
714 107034 : pd_upper > pd_special ||
715 107034 : pd_special > BLCKSZ ||
716 107034 : pd_special != MAXALIGN(pd_special))
717 0 : ereport(ERROR,
718 : (errcode(ERRCODE_DATA_CORRUPTED),
719 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
720 : pd_lower, pd_upper, pd_special)));
721 :
722 : /*
723 : * Run through the line pointer array and collect data about live items.
724 : */
725 107034 : nline = PageGetMaxOffsetNumber(page);
726 107034 : itemidptr = itemidbase;
727 107034 : nunused = totallen = 0;
728 107034 : last_offset = pd_special;
729 8898354 : for (i = FirstOffsetNumber; i <= nline; i++)
730 : {
731 8791320 : lp = PageGetItemId(page, i);
732 8791320 : if (ItemIdIsUsed(lp))
733 : {
734 8514900 : if (ItemIdHasStorage(lp))
735 : {
736 3154392 : itemidptr->offsetindex = i - 1;
737 3154392 : itemidptr->itemoff = ItemIdGetOffset(lp);
738 :
739 3154392 : if (last_offset > itemidptr->itemoff)
740 2667834 : last_offset = itemidptr->itemoff;
741 : else
742 486558 : presorted = false;
743 :
744 3154392 : if (unlikely(itemidptr->itemoff < (int) pd_upper ||
745 : itemidptr->itemoff >= (int) pd_special))
746 0 : ereport(ERROR,
747 : (errcode(ERRCODE_DATA_CORRUPTED),
748 : errmsg("corrupted line pointer: %u",
749 : itemidptr->itemoff)));
750 3154392 : itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
751 3154392 : totallen += itemidptr->alignedlen;
752 3154392 : itemidptr++;
753 : }
754 :
755 8514900 : finalusedlp = i; /* Could be the final non-LP_UNUSED item */
756 : }
757 : else
758 : {
759 : /* Unused entries should have lp_len = 0, but make sure */
760 : Assert(!ItemIdHasStorage(lp));
761 276420 : ItemIdSetUnused(lp);
762 276420 : nunused++;
763 : }
764 : }
765 :
766 107034 : nstorage = itemidptr - itemidbase;
767 107034 : if (nstorage == 0)
768 : {
769 : /* Page is completely empty, so just reset it quickly */
770 22328 : ((PageHeader) page)->pd_upper = pd_special;
771 : }
772 : else
773 : {
774 : /* Need to compact the page the hard way */
775 84706 : if (totallen > (Size) (pd_special - pd_lower))
776 0 : ereport(ERROR,
777 : (errcode(ERRCODE_DATA_CORRUPTED),
778 : errmsg("corrupted item lengths: total %u, available space %u",
779 : (unsigned int) totallen, pd_special - pd_lower)));
780 :
781 84706 : compactify_tuples(itemidbase, nstorage, page, presorted);
782 : }
783 :
784 107034 : if (finalusedlp != nline)
785 : {
786 : /* The last line pointer is not the last used line pointer */
787 3146 : int nunusedend = nline - finalusedlp;
788 :
789 : Assert(nunused >= nunusedend && nunusedend > 0);
790 :
791 : /* remove trailing unused line pointers from the count */
792 3146 : nunused -= nunusedend;
793 : /* truncate the line pointer array */
794 3146 : ((PageHeader) page)->pd_lower -= (sizeof(ItemIdData) * nunusedend);
795 : }
796 :
797 : /* Set hint bit for PageAddItemExtended */
798 107034 : if (nunused > 0)
799 23428 : PageSetHasFreeLinePointers(page);
800 : else
801 83606 : PageClearHasFreeLinePointers(page);
802 107034 : }
803 :
804 : /*
805 : * PageTruncateLinePointerArray
806 : *
807 : * Removes unused line pointers at the end of the line pointer array.
808 : *
809 : * This routine is usable for heap pages only. It is called by VACUUM during
810 : * its second pass over the heap. We expect at least one LP_UNUSED line
811 : * pointer on the page (if VACUUM didn't have an LP_DEAD item on the page that
812 : * it just set to LP_UNUSED then it should not call here).
813 : *
814 : * We avoid truncating the line pointer array to 0 items, if necessary by
815 : * leaving behind a single remaining LP_UNUSED item. This is a little
816 : * arbitrary, but it seems like a good idea to avoid leaving a PageIsEmpty()
817 : * page behind.
818 : *
819 : * Caller can have either an exclusive lock or a full cleanup lock on page's
820 : * buffer. The page's PD_HAS_FREE_LINES hint bit will be set or unset based
821 : * on whether or not we leave behind any remaining LP_UNUSED items.
822 : */
823 : void
824 24186 : PageTruncateLinePointerArray(Page page)
825 : {
826 24186 : PageHeader phdr = (PageHeader) page;
827 24186 : bool countdone = false,
828 24186 : sethint = false;
829 24186 : int nunusedend = 0;
830 :
831 : /* Scan line pointer array back-to-front */
832 1603150 : for (int i = PageGetMaxOffsetNumber(page); i >= FirstOffsetNumber; i--)
833 : {
834 1602382 : ItemId lp = PageGetItemId(page, i);
835 :
836 1602382 : if (!countdone && i > FirstOffsetNumber)
837 : {
838 : /*
839 : * Still determining which line pointers from the end of the array
840 : * will be truncated away. Either count another line pointer as
841 : * safe to truncate, or notice that it's not safe to truncate
842 : * additional line pointers (stop counting line pointers).
843 : */
844 1448344 : if (!ItemIdIsUsed(lp))
845 1436894 : nunusedend++;
846 : else
847 11450 : countdone = true;
848 : }
849 : else
850 : {
851 : /*
852 : * Once we've stopped counting we still need to figure out if
853 : * there are any remaining LP_UNUSED line pointers somewhere more
854 : * towards the front of the array.
855 : */
856 154038 : if (!ItemIdIsUsed(lp))
857 : {
858 : /*
859 : * This is an unused line pointer that we won't be truncating
860 : * away -- so there is at least one. Set hint on page.
861 : */
862 23418 : sethint = true;
863 23418 : break;
864 : }
865 : }
866 : }
867 :
868 24186 : if (nunusedend > 0)
869 : {
870 15660 : phdr->pd_lower -= sizeof(ItemIdData) * nunusedend;
871 :
872 : #ifdef CLOBBER_FREED_MEMORY
873 : memset((char *) page + phdr->pd_lower, 0x7F,
874 : sizeof(ItemIdData) * nunusedend);
875 : #endif
876 : }
877 : else
878 : Assert(sethint);
879 :
880 : /* Set hint bit for PageAddItemExtended */
881 24186 : if (sethint)
882 23418 : PageSetHasFreeLinePointers(page);
883 : else
884 768 : PageClearHasFreeLinePointers(page);
885 24186 : }
886 :
887 : /*
888 : * PageGetFreeSpace
889 : * Returns the size of the free (allocatable) space on a page,
890 : * reduced by the space needed for a new line pointer.
891 : *
892 : * Note: this should usually only be used on index pages. Use
893 : * PageGetHeapFreeSpace on heap pages.
894 : */
895 : Size
896 54137808 : PageGetFreeSpace(Page page)
897 : {
898 : int space;
899 :
900 : /*
901 : * Use signed arithmetic here so that we behave sensibly if pd_lower >
902 : * pd_upper.
903 : */
904 54137808 : space = (int) ((PageHeader) page)->pd_upper -
905 54137808 : (int) ((PageHeader) page)->pd_lower;
906 :
907 54137808 : if (space < (int) sizeof(ItemIdData))
908 11800 : return 0;
909 54126008 : space -= sizeof(ItemIdData);
910 :
911 54126008 : return (Size) space;
912 : }
913 :
914 : /*
915 : * PageGetFreeSpaceForMultipleTuples
916 : * Returns the size of the free (allocatable) space on a page,
917 : * reduced by the space needed for multiple new line pointers.
918 : *
919 : * Note: this should usually only be used on index pages. Use
920 : * PageGetHeapFreeSpace on heap pages.
921 : */
922 : Size
923 130384 : PageGetFreeSpaceForMultipleTuples(Page page, int ntups)
924 : {
925 : int space;
926 :
927 : /*
928 : * Use signed arithmetic here so that we behave sensibly if pd_lower >
929 : * pd_upper.
930 : */
931 130384 : space = (int) ((PageHeader) page)->pd_upper -
932 130384 : (int) ((PageHeader) page)->pd_lower;
933 :
934 130384 : if (space < (int) (ntups * sizeof(ItemIdData)))
935 0 : return 0;
936 130384 : space -= ntups * sizeof(ItemIdData);
937 :
938 130384 : return (Size) space;
939 : }
940 :
941 : /*
942 : * PageGetExactFreeSpace
943 : * Returns the size of the free (allocatable) space on a page,
944 : * without any consideration for adding/removing line pointers.
945 : */
946 : Size
947 3241928 : PageGetExactFreeSpace(Page page)
948 : {
949 : int space;
950 :
951 : /*
952 : * Use signed arithmetic here so that we behave sensibly if pd_lower >
953 : * pd_upper.
954 : */
955 3241928 : space = (int) ((PageHeader) page)->pd_upper -
956 3241928 : (int) ((PageHeader) page)->pd_lower;
957 :
958 3241928 : if (space < 0)
959 0 : return 0;
960 :
961 3241928 : return (Size) space;
962 : }
963 :
964 :
965 : /*
966 : * PageGetHeapFreeSpace
967 : * Returns the size of the free (allocatable) space on a page,
968 : * reduced by the space needed for a new line pointer.
969 : *
970 : * The difference between this and PageGetFreeSpace is that this will return
971 : * zero if there are already MaxHeapTuplesPerPage line pointers in the page
972 : * and none are free. We use this to enforce that no more than
973 : * MaxHeapTuplesPerPage line pointers are created on a heap page. (Although
974 : * no more tuples than that could fit anyway, in the presence of redirected
975 : * or dead line pointers it'd be possible to have too many line pointers.
976 : * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
977 : * on the number of line pointers, we make this extra check.)
978 : */
979 : Size
980 25620368 : PageGetHeapFreeSpace(Page page)
981 : {
982 : Size space;
983 :
984 25620368 : space = PageGetFreeSpace(page);
985 25620368 : if (space > 0)
986 : {
987 : OffsetNumber offnum,
988 : nline;
989 :
990 : /*
991 : * Are there already MaxHeapTuplesPerPage line pointers in the page?
992 : */
993 25592178 : nline = PageGetMaxOffsetNumber(page);
994 25592178 : if (nline >= MaxHeapTuplesPerPage)
995 : {
996 6558 : if (PageHasFreeLinePointers(page))
997 : {
998 : /*
999 : * Since this is just a hint, we must confirm that there is
1000 : * indeed a free line pointer
1001 : */
1002 685116 : for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
1003 : {
1004 684916 : ItemId lp = PageGetItemId(page, offnum);
1005 :
1006 684916 : if (!ItemIdIsUsed(lp))
1007 3356 : break;
1008 : }
1009 :
1010 3556 : if (offnum > nline)
1011 : {
1012 : /*
1013 : * The hint is wrong, but we can't clear it here since we
1014 : * don't have the ability to mark the page dirty.
1015 : */
1016 200 : space = 0;
1017 : }
1018 : }
1019 : else
1020 : {
1021 : /*
1022 : * Although the hint might be wrong, PageAddItem will believe
1023 : * it anyway, so we must believe it too.
1024 : */
1025 3002 : space = 0;
1026 : }
1027 : }
1028 : }
1029 25620368 : return space;
1030 : }
1031 :
1032 :
1033 : /*
1034 : * PageIndexTupleDelete
1035 : *
1036 : * This routine does the work of removing a tuple from an index page.
1037 : *
1038 : * Unlike heap pages, we compact out the line pointer for the removed tuple.
1039 : */
1040 : void
1041 813502 : PageIndexTupleDelete(Page page, OffsetNumber offnum)
1042 : {
1043 813502 : PageHeader phdr = (PageHeader) page;
1044 : char *addr;
1045 : ItemId tup;
1046 : Size size;
1047 : unsigned offset;
1048 : int nbytes;
1049 : int offidx;
1050 : int nline;
1051 :
1052 : /*
1053 : * As with PageRepairFragmentation, paranoia seems justified.
1054 : */
1055 813502 : if (phdr->pd_lower < SizeOfPageHeaderData ||
1056 813502 : phdr->pd_lower > phdr->pd_upper ||
1057 813502 : phdr->pd_upper > phdr->pd_special ||
1058 813502 : phdr->pd_special > BLCKSZ ||
1059 813502 : phdr->pd_special != MAXALIGN(phdr->pd_special))
1060 0 : ereport(ERROR,
1061 : (errcode(ERRCODE_DATA_CORRUPTED),
1062 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1063 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1064 :
1065 813502 : nline = PageGetMaxOffsetNumber(page);
1066 813502 : if ((int) offnum <= 0 || (int) offnum > nline)
1067 0 : elog(ERROR, "invalid index offnum: %u", offnum);
1068 :
1069 : /* change offset number to offset index */
1070 813502 : offidx = offnum - 1;
1071 :
1072 813502 : tup = PageGetItemId(page, offnum);
1073 : Assert(ItemIdHasStorage(tup));
1074 813502 : size = ItemIdGetLength(tup);
1075 813502 : offset = ItemIdGetOffset(tup);
1076 :
1077 813502 : if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
1078 813502 : offset != MAXALIGN(offset))
1079 0 : ereport(ERROR,
1080 : (errcode(ERRCODE_DATA_CORRUPTED),
1081 : errmsg("corrupted line pointer: offset = %u, size = %u",
1082 : offset, (unsigned int) size)));
1083 :
1084 : /* Amount of space to actually be deleted */
1085 813502 : size = MAXALIGN(size);
1086 :
1087 : /*
1088 : * First, we want to get rid of the pd_linp entry for the index tuple. We
1089 : * copy all subsequent linp's back one slot in the array. We don't use
1090 : * PageGetItemId, because we are manipulating the _array_, not individual
1091 : * linp's.
1092 : */
1093 813502 : nbytes = phdr->pd_lower -
1094 813502 : ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
1095 :
1096 813502 : if (nbytes > 0)
1097 795072 : memmove((char *) &(phdr->pd_linp[offidx]),
1098 795072 : (char *) &(phdr->pd_linp[offidx + 1]),
1099 : nbytes);
1100 :
1101 : /*
1102 : * Now move everything between the old upper bound (beginning of tuple
1103 : * space) and the beginning of the deleted tuple forward, so that space in
1104 : * the middle of the page is left free. If we've just deleted the tuple
1105 : * at the beginning of tuple space, then there's no need to do the copy.
1106 : */
1107 :
1108 : /* beginning of tuple space */
1109 813502 : addr = (char *) page + phdr->pd_upper;
1110 :
1111 813502 : if (offset > phdr->pd_upper)
1112 793832 : memmove(addr + size, addr, offset - phdr->pd_upper);
1113 :
1114 : /* adjust free space boundary pointers */
1115 813502 : phdr->pd_upper += size;
1116 813502 : phdr->pd_lower -= sizeof(ItemIdData);
1117 :
1118 : /*
1119 : * Finally, we need to adjust the linp entries that remain.
1120 : *
1121 : * Anything that used to be before the deleted tuple's data was moved
1122 : * forward by the size of the deleted tuple.
1123 : */
1124 813502 : if (!PageIsEmpty(page))
1125 : {
1126 : int i;
1127 :
1128 811940 : nline--; /* there's one less than when we started */
1129 142875078 : for (i = 1; i <= nline; i++)
1130 : {
1131 142063138 : ItemId ii = PageGetItemId(page, i);
1132 :
1133 : Assert(ItemIdHasStorage(ii));
1134 142063138 : if (ItemIdGetOffset(ii) <= offset)
1135 93090464 : ii->lp_off += size;
1136 : }
1137 : }
1138 813502 : }
1139 :
1140 :
1141 : /*
1142 : * PageIndexMultiDelete
1143 : *
1144 : * This routine handles the case of deleting multiple tuples from an
1145 : * index page at once. It is considerably faster than a loop around
1146 : * PageIndexTupleDelete ... however, the caller *must* supply the array
1147 : * of item numbers to be deleted in item number order!
1148 : */
1149 : void
1150 36468 : PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
1151 : {
1152 36468 : PageHeader phdr = (PageHeader) page;
1153 36468 : Offset pd_lower = phdr->pd_lower;
1154 36468 : Offset pd_upper = phdr->pd_upper;
1155 36468 : Offset pd_special = phdr->pd_special;
1156 : Offset last_offset;
1157 : itemIdCompactData itemidbase[MaxIndexTuplesPerPage];
1158 : ItemIdData newitemids[MaxIndexTuplesPerPage];
1159 : itemIdCompact itemidptr;
1160 : ItemId lp;
1161 : int nline,
1162 : nused;
1163 : Size totallen;
1164 : Size size;
1165 : unsigned offset;
1166 : int nextitm;
1167 : OffsetNumber offnum;
1168 36468 : bool presorted = true; /* For now */
1169 :
1170 : Assert(nitems <= MaxIndexTuplesPerPage);
1171 :
1172 : /*
1173 : * If there aren't very many items to delete, then retail
1174 : * PageIndexTupleDelete is the best way. Delete the items in reverse
1175 : * order so we don't have to think about adjusting item numbers for
1176 : * previous deletions.
1177 : *
1178 : * TODO: tune the magic number here
1179 : */
1180 36468 : if (nitems <= 2)
1181 : {
1182 7314 : while (--nitems >= 0)
1183 4134 : PageIndexTupleDelete(page, itemnos[nitems]);
1184 3180 : return;
1185 : }
1186 :
1187 : /*
1188 : * As with PageRepairFragmentation, paranoia seems justified.
1189 : */
1190 33288 : if (pd_lower < SizeOfPageHeaderData ||
1191 33288 : pd_lower > pd_upper ||
1192 33288 : pd_upper > pd_special ||
1193 33288 : pd_special > BLCKSZ ||
1194 33288 : pd_special != MAXALIGN(pd_special))
1195 0 : ereport(ERROR,
1196 : (errcode(ERRCODE_DATA_CORRUPTED),
1197 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1198 : pd_lower, pd_upper, pd_special)));
1199 :
1200 : /*
1201 : * Scan the line pointer array and build a list of just the ones we are
1202 : * going to keep. Notice we do not modify the page yet, since we are
1203 : * still validity-checking.
1204 : */
1205 33288 : nline = PageGetMaxOffsetNumber(page);
1206 33288 : itemidptr = itemidbase;
1207 33288 : totallen = 0;
1208 33288 : nused = 0;
1209 33288 : nextitm = 0;
1210 33288 : last_offset = pd_special;
1211 7427308 : for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
1212 : {
1213 7394020 : lp = PageGetItemId(page, offnum);
1214 : Assert(ItemIdHasStorage(lp));
1215 7394020 : size = ItemIdGetLength(lp);
1216 7394020 : offset = ItemIdGetOffset(lp);
1217 7394020 : if (offset < pd_upper ||
1218 7394020 : (offset + size) > pd_special ||
1219 7394020 : offset != MAXALIGN(offset))
1220 0 : ereport(ERROR,
1221 : (errcode(ERRCODE_DATA_CORRUPTED),
1222 : errmsg("corrupted line pointer: offset = %u, size = %u",
1223 : offset, (unsigned int) size)));
1224 :
1225 7394020 : if (nextitm < nitems && offnum == itemnos[nextitm])
1226 : {
1227 : /* skip item to be deleted */
1228 3539766 : nextitm++;
1229 : }
1230 : else
1231 : {
1232 3854254 : itemidptr->offsetindex = nused; /* where it will go */
1233 3854254 : itemidptr->itemoff = offset;
1234 :
1235 3854254 : if (last_offset > itemidptr->itemoff)
1236 1905774 : last_offset = itemidptr->itemoff;
1237 : else
1238 1948480 : presorted = false;
1239 :
1240 3854254 : itemidptr->alignedlen = MAXALIGN(size);
1241 3854254 : totallen += itemidptr->alignedlen;
1242 3854254 : newitemids[nused] = *lp;
1243 3854254 : itemidptr++;
1244 3854254 : nused++;
1245 : }
1246 : }
1247 :
1248 : /* this will catch invalid or out-of-order itemnos[] */
1249 33288 : if (nextitm != nitems)
1250 0 : elog(ERROR, "incorrect index offsets supplied");
1251 :
1252 33288 : if (totallen > (Size) (pd_special - pd_lower))
1253 0 : ereport(ERROR,
1254 : (errcode(ERRCODE_DATA_CORRUPTED),
1255 : errmsg("corrupted item lengths: total %u, available space %u",
1256 : (unsigned int) totallen, pd_special - pd_lower)));
1257 :
1258 : /*
1259 : * Looks good. Overwrite the line pointers with the copy, from which we've
1260 : * removed all the unused items.
1261 : */
1262 33288 : memcpy(phdr->pd_linp, newitemids, nused * sizeof(ItemIdData));
1263 33288 : phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
1264 :
1265 : /* and compactify the tuple data */
1266 33288 : if (nused > 0)
1267 33002 : compactify_tuples(itemidbase, nused, page, presorted);
1268 : else
1269 286 : phdr->pd_upper = pd_special;
1270 : }
1271 :
1272 :
1273 : /*
1274 : * PageIndexTupleDeleteNoCompact
1275 : *
1276 : * Remove the specified tuple from an index page, but set its line pointer
1277 : * to "unused" instead of compacting it out, except that it can be removed
1278 : * if it's the last line pointer on the page.
1279 : *
1280 : * This is used for index AMs that require that existing TIDs of live tuples
1281 : * remain unchanged, and are willing to allow unused line pointers instead.
1282 : */
1283 : void
1284 676 : PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
1285 : {
1286 676 : PageHeader phdr = (PageHeader) page;
1287 : char *addr;
1288 : ItemId tup;
1289 : Size size;
1290 : unsigned offset;
1291 : int nline;
1292 :
1293 : /*
1294 : * As with PageRepairFragmentation, paranoia seems justified.
1295 : */
1296 676 : if (phdr->pd_lower < SizeOfPageHeaderData ||
1297 676 : phdr->pd_lower > phdr->pd_upper ||
1298 676 : phdr->pd_upper > phdr->pd_special ||
1299 676 : phdr->pd_special > BLCKSZ ||
1300 676 : phdr->pd_special != MAXALIGN(phdr->pd_special))
1301 0 : ereport(ERROR,
1302 : (errcode(ERRCODE_DATA_CORRUPTED),
1303 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1304 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1305 :
1306 676 : nline = PageGetMaxOffsetNumber(page);
1307 676 : if ((int) offnum <= 0 || (int) offnum > nline)
1308 0 : elog(ERROR, "invalid index offnum: %u", offnum);
1309 :
1310 676 : tup = PageGetItemId(page, offnum);
1311 : Assert(ItemIdHasStorage(tup));
1312 676 : size = ItemIdGetLength(tup);
1313 676 : offset = ItemIdGetOffset(tup);
1314 :
1315 676 : if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
1316 676 : offset != MAXALIGN(offset))
1317 0 : ereport(ERROR,
1318 : (errcode(ERRCODE_DATA_CORRUPTED),
1319 : errmsg("corrupted line pointer: offset = %u, size = %u",
1320 : offset, (unsigned int) size)));
1321 :
1322 : /* Amount of space to actually be deleted */
1323 676 : size = MAXALIGN(size);
1324 :
1325 : /*
1326 : * Either set the line pointer to "unused", or zap it if it's the last
1327 : * one. (Note: it's possible that the next-to-last one(s) are already
1328 : * unused, but we do not trouble to try to compact them out if so.)
1329 : */
1330 676 : if ((int) offnum < nline)
1331 608 : ItemIdSetUnused(tup);
1332 : else
1333 : {
1334 68 : phdr->pd_lower -= sizeof(ItemIdData);
1335 68 : nline--; /* there's one less than when we started */
1336 : }
1337 :
1338 : /*
1339 : * Now move everything between the old upper bound (beginning of tuple
1340 : * space) and the beginning of the deleted tuple forward, so that space in
1341 : * the middle of the page is left free. If we've just deleted the tuple
1342 : * at the beginning of tuple space, then there's no need to do the copy.
1343 : */
1344 :
1345 : /* beginning of tuple space */
1346 676 : addr = (char *) page + phdr->pd_upper;
1347 :
1348 676 : if (offset > phdr->pd_upper)
1349 608 : memmove(addr + size, addr, offset - phdr->pd_upper);
1350 :
1351 : /* adjust free space boundary pointer */
1352 676 : phdr->pd_upper += size;
1353 :
1354 : /*
1355 : * Finally, we need to adjust the linp entries that remain.
1356 : *
1357 : * Anything that used to be before the deleted tuple's data was moved
1358 : * forward by the size of the deleted tuple.
1359 : */
1360 676 : if (!PageIsEmpty(page))
1361 : {
1362 : int i;
1363 :
1364 173014 : for (i = 1; i <= nline; i++)
1365 : {
1366 172348 : ItemId ii = PageGetItemId(page, i);
1367 :
1368 172348 : if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1369 84558 : ii->lp_off += size;
1370 : }
1371 : }
1372 676 : }
1373 :
1374 :
1375 : /*
1376 : * PageIndexTupleOverwrite
1377 : *
1378 : * Replace a specified tuple on an index page.
1379 : *
1380 : * The new tuple is placed exactly where the old one had been, shifting
1381 : * other tuples' data up or down as needed to keep the page compacted.
1382 : * This is better than deleting and reinserting the tuple, because it
1383 : * avoids any data shifting when the tuple size doesn't change; and
1384 : * even when it does, we avoid moving the line pointers around.
1385 : * This could be used by an index AM that doesn't want to unset the
1386 : * LP_DEAD bit when it happens to be set. It could conceivably also be
1387 : * used by an index AM that cares about the physical order of tuples as
1388 : * well as their logical/ItemId order.
1389 : *
1390 : * If there's insufficient space for the new tuple, return false. Other
1391 : * errors represent data-corruption problems, so we just elog.
1392 : */
1393 : bool
1394 892734 : PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
1395 : Item newtup, Size newsize)
1396 : {
1397 892734 : PageHeader phdr = (PageHeader) page;
1398 : ItemId tupid;
1399 : int oldsize;
1400 : unsigned offset;
1401 : Size alignednewsize;
1402 : int size_diff;
1403 : int itemcount;
1404 :
1405 : /*
1406 : * As with PageRepairFragmentation, paranoia seems justified.
1407 : */
1408 892734 : if (phdr->pd_lower < SizeOfPageHeaderData ||
1409 892734 : phdr->pd_lower > phdr->pd_upper ||
1410 892734 : phdr->pd_upper > phdr->pd_special ||
1411 892734 : phdr->pd_special > BLCKSZ ||
1412 892734 : phdr->pd_special != MAXALIGN(phdr->pd_special))
1413 0 : ereport(ERROR,
1414 : (errcode(ERRCODE_DATA_CORRUPTED),
1415 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1416 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1417 :
1418 892734 : itemcount = PageGetMaxOffsetNumber(page);
1419 892734 : if ((int) offnum <= 0 || (int) offnum > itemcount)
1420 0 : elog(ERROR, "invalid index offnum: %u", offnum);
1421 :
1422 892734 : tupid = PageGetItemId(page, offnum);
1423 : Assert(ItemIdHasStorage(tupid));
1424 892734 : oldsize = ItemIdGetLength(tupid);
1425 892734 : offset = ItemIdGetOffset(tupid);
1426 :
1427 892734 : if (offset < phdr->pd_upper || (offset + oldsize) > phdr->pd_special ||
1428 892734 : offset != MAXALIGN(offset))
1429 0 : ereport(ERROR,
1430 : (errcode(ERRCODE_DATA_CORRUPTED),
1431 : errmsg("corrupted line pointer: offset = %u, size = %u",
1432 : offset, (unsigned int) oldsize)));
1433 :
1434 : /*
1435 : * Determine actual change in space requirement, check for page overflow.
1436 : */
1437 892734 : oldsize = MAXALIGN(oldsize);
1438 892734 : alignednewsize = MAXALIGN(newsize);
1439 892734 : if (alignednewsize > oldsize + (phdr->pd_upper - phdr->pd_lower))
1440 0 : return false;
1441 :
1442 : /*
1443 : * Relocate existing data and update line pointers, unless the new tuple
1444 : * is the same size as the old (after alignment), in which case there's
1445 : * nothing to do. Notice that what we have to relocate is data before the
1446 : * target tuple, not data after, so it's convenient to express size_diff
1447 : * as the amount by which the tuple's size is decreasing, making it the
1448 : * delta to add to pd_upper and affected line pointers.
1449 : */
1450 892734 : size_diff = oldsize - (int) alignednewsize;
1451 892734 : if (size_diff != 0)
1452 : {
1453 79404 : char *addr = (char *) page + phdr->pd_upper;
1454 : int i;
1455 :
1456 : /* relocate all tuple data before the target tuple */
1457 79404 : memmove(addr + size_diff, addr, offset - phdr->pd_upper);
1458 :
1459 : /* adjust free space boundary pointer */
1460 79404 : phdr->pd_upper += size_diff;
1461 :
1462 : /* adjust affected line pointers too */
1463 12728252 : for (i = FirstOffsetNumber; i <= itemcount; i++)
1464 : {
1465 12648848 : ItemId ii = PageGetItemId(page, i);
1466 :
1467 : /* Allow items without storage; currently only BRIN needs that */
1468 12648848 : if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1469 5915954 : ii->lp_off += size_diff;
1470 : }
1471 : }
1472 :
1473 : /* Update the item's tuple length without changing its lp_flags field */
1474 892734 : tupid->lp_off = offset + size_diff;
1475 892734 : tupid->lp_len = newsize;
1476 :
1477 : /* Copy new tuple data onto page */
1478 892734 : memcpy(PageGetItem(page, tupid), newtup, newsize);
1479 :
1480 892734 : return true;
1481 : }
1482 :
1483 :
1484 : /*
1485 : * Set checksum for a page in shared buffers.
1486 : *
1487 : * If checksums are disabled, or if the page is not initialized, just return
1488 : * the input. Otherwise, we must make a copy of the page before calculating
1489 : * the checksum, to prevent concurrent modifications (e.g. setting hint bits)
1490 : * from making the final checksum invalid. It doesn't matter if we include or
1491 : * exclude hints during the copy, as long as we write a valid page and
1492 : * associated checksum.
1493 : *
1494 : * Returns a pointer to the block-sized data that needs to be written. Uses
1495 : * statically-allocated memory, so the caller must immediately write the
1496 : * returned page and not refer to it again.
1497 : */
1498 : char *
1499 942652 : PageSetChecksumCopy(Page page, BlockNumber blkno)
1500 : {
1501 : static char *pageCopy = NULL;
1502 :
1503 : /* If we don't need a checksum, just return the passed-in data */
1504 942652 : if (PageIsNew(page) || !DataChecksumsEnabled())
1505 21330 : return (char *) page;
1506 :
1507 : /*
1508 : * We allocate the copy space once and use it over on each subsequent
1509 : * call. The point of palloc'ing here, rather than having a static char
1510 : * array, is first to ensure adequate alignment for the checksumming code
1511 : * and second to avoid wasting space in processes that never call this.
1512 : */
1513 921322 : if (pageCopy == NULL)
1514 4930 : pageCopy = MemoryContextAllocAligned(TopMemoryContext,
1515 : BLCKSZ,
1516 : PG_IO_ALIGN_SIZE,
1517 : 0);
1518 :
1519 921322 : memcpy(pageCopy, (char *) page, BLCKSZ);
1520 921322 : ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno);
1521 921322 : return pageCopy;
1522 : }
1523 :
1524 : /*
1525 : * Set checksum for a page in private memory.
1526 : *
1527 : * This must only be used when we know that no other process can be modifying
1528 : * the page buffer.
1529 : */
1530 : void
1531 112990 : PageSetChecksumInplace(Page page, BlockNumber blkno)
1532 : {
1533 : /* If we don't need a checksum, just return */
1534 112990 : if (PageIsNew(page) || !DataChecksumsEnabled())
1535 3656 : return;
1536 :
1537 109334 : ((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno);
1538 : }
|