Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * heapam_xlog.c
4 : * WAL replay logic for heap access method.
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/heap/heapam_xlog.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/bufmask.h"
18 : #include "access/heapam.h"
19 : #include "access/visibilitymap.h"
20 : #include "access/xlog.h"
21 : #include "access/xlogutils.h"
22 : #include "storage/freespace.h"
23 : #include "storage/standby.h"
24 :
25 :
26 : /*
27 : * Replay XLOG_HEAP2_PRUNE_* records.
28 : */
29 : static void
30 27968 : heap_xlog_prune_freeze(XLogReaderState *record)
31 : {
32 27968 : XLogRecPtr lsn = record->EndRecPtr;
33 27968 : char *maindataptr = XLogRecGetData(record);
34 : xl_heap_prune xlrec;
35 : Buffer buffer;
36 : RelFileLocator rlocator;
37 : BlockNumber blkno;
38 27968 : Buffer vmbuffer = InvalidBuffer;
39 27968 : uint8 vmflags = 0;
40 27968 : Size freespace = 0;
41 :
42 27968 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
43 27968 : memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
44 27968 : maindataptr += SizeOfHeapPrune;
45 :
46 : /*
47 : * We will take an ordinary exclusive lock or a cleanup lock depending on
48 : * whether the XLHP_CLEANUP_LOCK flag is set. With an ordinary exclusive
49 : * lock, we better not be doing anything that requires moving existing
50 : * tuple data.
51 : */
52 : Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
53 : (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
54 :
55 27968 : if (xlrec.flags & XLHP_VM_ALL_VISIBLE)
56 : {
57 5856 : vmflags = VISIBILITYMAP_ALL_VISIBLE;
58 5856 : if (xlrec.flags & XLHP_VM_ALL_FROZEN)
59 5086 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
60 : }
61 :
62 : /*
63 : * After xl_heap_prune is the optional snapshot conflict horizon.
64 : *
65 : * In Hot Standby mode, we must ensure that there are no running queries
66 : * which would conflict with the changes in this record. That means we
67 : * can't replay this record if it removes tuples that are still visible to
68 : * transactions on the standby, freeze tuples with xids that are still
69 : * considered running on the standby, or set a page as all-visible in the
70 : * VM if it isn't all-visible to all transactions on the standby.
71 : */
72 27968 : if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
73 : {
74 : TransactionId snapshot_conflict_horizon;
75 :
76 : /* memcpy() because snapshot_conflict_horizon is stored unaligned */
77 20954 : memcpy(&snapshot_conflict_horizon, maindataptr, sizeof(TransactionId));
78 20954 : maindataptr += sizeof(TransactionId);
79 :
80 20954 : if (InHotStandby)
81 20490 : ResolveRecoveryConflictWithSnapshot(snapshot_conflict_horizon,
82 20490 : (xlrec.flags & XLHP_IS_CATALOG_REL) != 0,
83 : rlocator);
84 : }
85 :
86 : /*
87 : * If we have a full-page image of the heap block, restore it and we're
88 : * done with the heap block.
89 : */
90 27968 : if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
91 27968 : (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
92 : &buffer) == BLK_NEEDS_REDO)
93 : {
94 19620 : Page page = BufferGetPage(buffer);
95 : OffsetNumber *redirected;
96 : OffsetNumber *nowdead;
97 : OffsetNumber *nowunused;
98 : int nredirected;
99 : int ndead;
100 : int nunused;
101 : int nplans;
102 : Size datalen;
103 : xlhp_freeze_plan *plans;
104 : OffsetNumber *frz_offsets;
105 19620 : char *dataptr = XLogRecGetBlockData(record, 0, &datalen);
106 : bool do_prune;
107 :
108 19620 : heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
109 : &nplans, &plans, &frz_offsets,
110 : &nredirected, &redirected,
111 : &ndead, &nowdead,
112 : &nunused, &nowunused);
113 :
114 19620 : do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
115 :
116 : /* Ensure the record does something */
117 : Assert(do_prune || nplans > 0 || vmflags & VISIBILITYMAP_VALID_BITS);
118 :
119 : /*
120 : * Update all line pointers per the record, and repair fragmentation
121 : * if needed.
122 : */
123 19620 : if (do_prune)
124 18470 : heap_page_prune_execute(buffer,
125 18470 : (xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
126 : redirected, nredirected,
127 : nowdead, ndead,
128 : nowunused, nunused);
129 :
130 : /* Freeze tuples */
131 22042 : for (int p = 0; p < nplans; p++)
132 : {
133 : HeapTupleFreeze frz;
134 :
135 : /*
136 : * Convert freeze plan representation from WAL record into
137 : * per-tuple format used by heap_execute_freeze_tuple
138 : */
139 2422 : frz.xmax = plans[p].xmax;
140 2422 : frz.t_infomask2 = plans[p].t_infomask2;
141 2422 : frz.t_infomask = plans[p].t_infomask;
142 2422 : frz.frzflags = plans[p].frzflags;
143 2422 : frz.offset = InvalidOffsetNumber; /* unused, but be tidy */
144 :
145 108130 : for (int i = 0; i < plans[p].ntuples; i++)
146 : {
147 105708 : OffsetNumber offset = *(frz_offsets++);
148 : ItemId lp;
149 : HeapTupleHeader tuple;
150 :
151 105708 : lp = PageGetItemId(page, offset);
152 105708 : tuple = (HeapTupleHeader) PageGetItem(page, lp);
153 105708 : heap_execute_freeze_tuple(tuple, &frz);
154 : }
155 : }
156 :
157 : /* There should be no more data */
158 : Assert((char *) frz_offsets == dataptr + datalen);
159 :
160 19620 : if (vmflags & VISIBILITYMAP_VALID_BITS)
161 3036 : PageSetAllVisible(page);
162 :
163 19620 : MarkBufferDirty(buffer);
164 :
165 : /*
166 : * See log_heap_prune_and_freeze() for commentary on when we set the
167 : * heap page LSN.
168 : */
169 19620 : if (do_prune || nplans > 0 ||
170 0 : ((vmflags & VISIBILITYMAP_VALID_BITS) && XLogHintBitIsNeeded()))
171 19620 : PageSetLSN(page, lsn);
172 :
173 : /*
174 : * Note: we don't worry about updating the page's prunability hints.
175 : * At worst this will cause an extra prune cycle to occur soon.
176 : */
177 : }
178 :
179 : /*
180 : * If we 1) released any space or line pointers or 2) set PD_ALL_VISIBLE
181 : * or the VM, update the freespace map.
182 : *
183 : * Even when no actual space is freed (when only marking the page
184 : * all-visible or frozen), we still update the FSM. Because the FSM is
185 : * unlogged and maintained heuristically, it often becomes stale on
186 : * standbys. If such a standby is later promoted and runs VACUUM, it will
187 : * skip recalculating free space for pages that were marked
188 : * all-visible/all-forzen. FreeSpaceMapVacuum() can then propagate overly
189 : * optimistic free space values upward, causing future insertions to
190 : * select pages that turn out to be unusable. In bulk, this can lead to
191 : * long stalls.
192 : *
193 : * To prevent this, always update the FSM even when only marking a page
194 : * all-visible/all-frozen.
195 : *
196 : * Do this regardless of whether a full-page image is logged, since FSM
197 : * data is not part of the page itself.
198 : */
199 27968 : if (BufferIsValid(buffer))
200 : {
201 27968 : if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
202 : XLHP_HAS_DEAD_ITEMS |
203 3700 : XLHP_HAS_NOW_UNUSED_ITEMS)) ||
204 3700 : (vmflags & VISIBILITYMAP_VALID_BITS))
205 24268 : freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
206 :
207 : /*
208 : * We want to avoid holding an exclusive lock on the heap buffer while
209 : * doing IO (either of the FSM or the VM), so we'll release it now.
210 : */
211 27968 : UnlockReleaseBuffer(buffer);
212 : }
213 :
214 : /*
215 : * Now read and update the VM block.
216 : *
217 : * We must redo changes to the VM even if the heap page was skipped due to
218 : * LSN interlock. See comment in heap_xlog_multi_insert() for more details
219 : * on replaying changes to the VM.
220 : */
221 33824 : if ((vmflags & VISIBILITYMAP_VALID_BITS) &&
222 5856 : XLogReadBufferForRedoExtended(record, 1,
223 : RBM_ZERO_ON_ERROR,
224 : false,
225 : &vmbuffer) == BLK_NEEDS_REDO)
226 : {
227 5696 : Page vmpage = BufferGetPage(vmbuffer);
228 :
229 : /* initialize the page if it was read as zeros */
230 5696 : if (PageIsNew(vmpage))
231 0 : PageInit(vmpage, BLCKSZ, 0);
232 :
233 5696 : visibilitymap_set_vmbits(blkno, vmbuffer, vmflags, rlocator);
234 :
235 : Assert(BufferIsDirty(vmbuffer));
236 5696 : PageSetLSN(vmpage, lsn);
237 : }
238 :
239 27968 : if (BufferIsValid(vmbuffer))
240 5856 : UnlockReleaseBuffer(vmbuffer);
241 :
242 27968 : if (freespace > 0)
243 24088 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
244 27968 : }
245 :
246 : /*
247 : * Replay XLOG_HEAP2_VISIBLE records.
248 : *
249 : * The critical integrity requirement here is that we must never end up with
250 : * a situation where the visibility map bit is set, and the page-level
251 : * PD_ALL_VISIBLE bit is clear. If that were to occur, then a subsequent
252 : * page modification would fail to clear the visibility map bit.
253 : */
254 : static void
255 9674 : heap_xlog_visible(XLogReaderState *record)
256 : {
257 9674 : XLogRecPtr lsn = record->EndRecPtr;
258 9674 : xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
259 9674 : Buffer vmbuffer = InvalidBuffer;
260 : Buffer buffer;
261 : Page page;
262 : RelFileLocator rlocator;
263 : BlockNumber blkno;
264 : XLogRedoAction action;
265 :
266 : Assert((xlrec->flags & VISIBILITYMAP_XLOG_VALID_BITS) == xlrec->flags);
267 :
268 9674 : XLogRecGetBlockTag(record, 1, &rlocator, NULL, &blkno);
269 :
270 : /*
271 : * If there are any Hot Standby transactions running that have an xmin
272 : * horizon old enough that this page isn't all-visible for them, they
273 : * might incorrectly decide that an index-only scan can skip a heap fetch.
274 : *
275 : * NB: It might be better to throw some kind of "soft" conflict here that
276 : * forces any index-only scan that is in flight to perform heap fetches,
277 : * rather than killing the transaction outright.
278 : */
279 9674 : if (InHotStandby)
280 9326 : ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
281 9326 : xlrec->flags & VISIBILITYMAP_XLOG_CATALOG_REL,
282 : rlocator);
283 :
284 : /*
285 : * Read the heap page, if it still exists. If the heap file has dropped or
286 : * truncated later in recovery, we don't need to update the page, but we'd
287 : * better still update the visibility map.
288 : */
289 9674 : action = XLogReadBufferForRedo(record, 1, &buffer);
290 9674 : if (action == BLK_NEEDS_REDO)
291 : {
292 : /*
293 : * We don't bump the LSN of the heap page when setting the visibility
294 : * map bit (unless checksums or wal_hint_bits is enabled, in which
295 : * case we must). This exposes us to torn page hazards, but since
296 : * we're not inspecting the existing page contents in any way, we
297 : * don't care.
298 : */
299 6148 : page = BufferGetPage(buffer);
300 :
301 6148 : PageSetAllVisible(page);
302 :
303 6148 : if (XLogHintBitIsNeeded())
304 6148 : PageSetLSN(page, lsn);
305 :
306 6148 : MarkBufferDirty(buffer);
307 : }
308 : else if (action == BLK_RESTORED)
309 : {
310 : /*
311 : * If heap block was backed up, we already restored it and there's
312 : * nothing more to do. (This can only happen with checksums or
313 : * wal_log_hints enabled.)
314 : */
315 : }
316 :
317 9674 : if (BufferIsValid(buffer))
318 : {
319 9674 : Size space = PageGetFreeSpace(BufferGetPage(buffer));
320 :
321 9674 : UnlockReleaseBuffer(buffer);
322 :
323 : /*
324 : * Since FSM is not WAL-logged and only updated heuristically, it
325 : * easily becomes stale in standbys. If the standby is later promoted
326 : * and runs VACUUM, it will skip updating individual free space
327 : * figures for pages that became all-visible (or all-frozen, depending
328 : * on the vacuum mode,) which is troublesome when FreeSpaceMapVacuum
329 : * propagates too optimistic free space values to upper FSM layers;
330 : * later inserters try to use such pages only to find out that they
331 : * are unusable. This can cause long stalls when there are many such
332 : * pages.
333 : *
334 : * Forestall those problems by updating FSM's idea about a page that
335 : * is becoming all-visible or all-frozen.
336 : *
337 : * Do this regardless of a full-page image being applied, since the
338 : * FSM data is not in the page anyway.
339 : */
340 9674 : if (xlrec->flags & VISIBILITYMAP_VALID_BITS)
341 9674 : XLogRecordPageWithFreeSpace(rlocator, blkno, space);
342 : }
343 :
344 : /*
345 : * Even if we skipped the heap page update due to the LSN interlock, it's
346 : * still safe to update the visibility map. Any WAL record that clears
347 : * the visibility map bit does so before checking the page LSN, so any
348 : * bits that need to be cleared will still be cleared.
349 : */
350 9674 : if (XLogReadBufferForRedoExtended(record, 0, RBM_ZERO_ON_ERROR, false,
351 : &vmbuffer) == BLK_NEEDS_REDO)
352 : {
353 9256 : Page vmpage = BufferGetPage(vmbuffer);
354 : Relation reln;
355 : uint8 vmbits;
356 :
357 : /* initialize the page if it was read as zeros */
358 9256 : if (PageIsNew(vmpage))
359 0 : PageInit(vmpage, BLCKSZ, 0);
360 :
361 : /* remove VISIBILITYMAP_XLOG_* */
362 9256 : vmbits = xlrec->flags & VISIBILITYMAP_VALID_BITS;
363 :
364 : /*
365 : * XLogReadBufferForRedoExtended locked the buffer. But
366 : * visibilitymap_set will handle locking itself.
367 : */
368 9256 : LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
369 :
370 9256 : reln = CreateFakeRelcacheEntry(rlocator);
371 :
372 9256 : visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
373 : xlrec->snapshotConflictHorizon, vmbits);
374 :
375 9256 : ReleaseBuffer(vmbuffer);
376 9256 : FreeFakeRelcacheEntry(reln);
377 : }
378 418 : else if (BufferIsValid(vmbuffer))
379 418 : UnlockReleaseBuffer(vmbuffer);
380 9674 : }
381 :
382 : /*
383 : * Given an "infobits" field from an XLog record, set the correct bits in the
384 : * given infomask and infomask2 for the tuple touched by the record.
385 : *
386 : * (This is the reverse of compute_infobits).
387 : */
388 : static void
389 894764 : fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
390 : {
391 894764 : *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
392 : HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
393 894764 : *infomask2 &= ~HEAP_KEYS_UPDATED;
394 :
395 894764 : if (infobits & XLHL_XMAX_IS_MULTI)
396 4 : *infomask |= HEAP_XMAX_IS_MULTI;
397 894764 : if (infobits & XLHL_XMAX_LOCK_ONLY)
398 110774 : *infomask |= HEAP_XMAX_LOCK_ONLY;
399 894764 : if (infobits & XLHL_XMAX_EXCL_LOCK)
400 109970 : *infomask |= HEAP_XMAX_EXCL_LOCK;
401 : /* note HEAP_XMAX_SHR_LOCK isn't considered here */
402 894764 : if (infobits & XLHL_XMAX_KEYSHR_LOCK)
403 828 : *infomask |= HEAP_XMAX_KEYSHR_LOCK;
404 :
405 894764 : if (infobits & XLHL_KEYS_UPDATED)
406 599590 : *infomask2 |= HEAP_KEYS_UPDATED;
407 894764 : }
408 :
409 : /*
410 : * Replay XLOG_HEAP_DELETE records.
411 : */
412 : static void
413 601046 : heap_xlog_delete(XLogReaderState *record)
414 : {
415 601046 : XLogRecPtr lsn = record->EndRecPtr;
416 601046 : xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
417 : Buffer buffer;
418 : Page page;
419 601046 : ItemId lp = NULL;
420 : HeapTupleHeader htup;
421 : BlockNumber blkno;
422 : RelFileLocator target_locator;
423 : ItemPointerData target_tid;
424 :
425 601046 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
426 601046 : ItemPointerSetBlockNumber(&target_tid, blkno);
427 601046 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
428 :
429 : /*
430 : * The visibility map may need to be fixed even if the heap page is
431 : * already up-to-date.
432 : */
433 601046 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
434 : {
435 70 : Relation reln = CreateFakeRelcacheEntry(target_locator);
436 70 : Buffer vmbuffer = InvalidBuffer;
437 :
438 70 : visibilitymap_pin(reln, blkno, &vmbuffer);
439 70 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
440 70 : ReleaseBuffer(vmbuffer);
441 70 : FreeFakeRelcacheEntry(reln);
442 : }
443 :
444 601046 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
445 : {
446 596874 : page = BufferGetPage(buffer);
447 :
448 596874 : if (PageGetMaxOffsetNumber(page) >= xlrec->offnum)
449 596874 : lp = PageGetItemId(page, xlrec->offnum);
450 :
451 596874 : if (PageGetMaxOffsetNumber(page) < xlrec->offnum || !ItemIdIsNormal(lp))
452 0 : elog(PANIC, "invalid lp");
453 :
454 596874 : htup = (HeapTupleHeader) PageGetItem(page, lp);
455 :
456 596874 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
457 596874 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
458 596874 : HeapTupleHeaderClearHotUpdated(htup);
459 596874 : fix_infomask_from_infobits(xlrec->infobits_set,
460 : &htup->t_infomask, &htup->t_infomask2);
461 596874 : if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
462 596874 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
463 : else
464 0 : HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
465 596874 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
466 :
467 : /* Mark the page as a candidate for pruning */
468 596874 : PageSetPrunable(page, XLogRecGetXid(record));
469 :
470 596874 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
471 22 : PageClearAllVisible(page);
472 :
473 : /* Make sure t_ctid is set correctly */
474 596874 : if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
475 288 : HeapTupleHeaderSetMovedPartitions(htup);
476 : else
477 596586 : htup->t_ctid = target_tid;
478 596874 : PageSetLSN(page, lsn);
479 596874 : MarkBufferDirty(buffer);
480 : }
481 601046 : if (BufferIsValid(buffer))
482 601046 : UnlockReleaseBuffer(buffer);
483 601046 : }
484 :
485 : /*
486 : * Replay XLOG_HEAP_INSERT records.
487 : */
488 : static void
489 2582212 : heap_xlog_insert(XLogReaderState *record)
490 : {
491 2582212 : XLogRecPtr lsn = record->EndRecPtr;
492 2582212 : xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
493 : Buffer buffer;
494 : Page page;
495 : union
496 : {
497 : HeapTupleHeaderData hdr;
498 : char data[MaxHeapTupleSize];
499 : } tbuf;
500 : HeapTupleHeader htup;
501 : xl_heap_header xlhdr;
502 : uint32 newlen;
503 2582212 : Size freespace = 0;
504 : RelFileLocator target_locator;
505 : BlockNumber blkno;
506 : ItemPointerData target_tid;
507 : XLogRedoAction action;
508 :
509 2582212 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
510 2582212 : ItemPointerSetBlockNumber(&target_tid, blkno);
511 2582212 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
512 :
513 : /* No freezing in the heap_insert() code path */
514 : Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
515 :
516 : /*
517 : * The visibility map may need to be fixed even if the heap page is
518 : * already up-to-date.
519 : */
520 2582212 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
521 : {
522 2044 : Relation reln = CreateFakeRelcacheEntry(target_locator);
523 2044 : Buffer vmbuffer = InvalidBuffer;
524 :
525 2044 : visibilitymap_pin(reln, blkno, &vmbuffer);
526 2044 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
527 2044 : ReleaseBuffer(vmbuffer);
528 2044 : FreeFakeRelcacheEntry(reln);
529 : }
530 :
531 : /*
532 : * If we inserted the first and only tuple on the page, re-initialize the
533 : * page from scratch.
534 : */
535 2582212 : if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
536 : {
537 34298 : buffer = XLogInitBufferForRedo(record, 0);
538 34298 : page = BufferGetPage(buffer);
539 34298 : PageInit(page, BufferGetPageSize(buffer), 0);
540 34298 : action = BLK_NEEDS_REDO;
541 : }
542 : else
543 2547914 : action = XLogReadBufferForRedo(record, 0, &buffer);
544 2582212 : if (action == BLK_NEEDS_REDO)
545 : {
546 : Size datalen;
547 : char *data;
548 :
549 2576504 : page = BufferGetPage(buffer);
550 :
551 2576504 : if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
552 0 : elog(PANIC, "invalid max offset number");
553 :
554 2576504 : data = XLogRecGetBlockData(record, 0, &datalen);
555 :
556 2576504 : newlen = datalen - SizeOfHeapHeader;
557 : Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
558 2576504 : memcpy(&xlhdr, data, SizeOfHeapHeader);
559 2576504 : data += SizeOfHeapHeader;
560 :
561 2576504 : htup = &tbuf.hdr;
562 2576504 : MemSet(htup, 0, SizeofHeapTupleHeader);
563 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
564 2576504 : memcpy((char *) htup + SizeofHeapTupleHeader,
565 : data,
566 : newlen);
567 2576504 : newlen += SizeofHeapTupleHeader;
568 2576504 : htup->t_infomask2 = xlhdr.t_infomask2;
569 2576504 : htup->t_infomask = xlhdr.t_infomask;
570 2576504 : htup->t_hoff = xlhdr.t_hoff;
571 2576504 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
572 2576504 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
573 2576504 : htup->t_ctid = target_tid;
574 :
575 2576504 : if (PageAddItem(page, htup, newlen, xlrec->offnum, true, true) == InvalidOffsetNumber)
576 0 : elog(PANIC, "failed to add tuple");
577 :
578 2576504 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
579 :
580 2576504 : PageSetLSN(page, lsn);
581 :
582 2576504 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
583 620 : PageClearAllVisible(page);
584 :
585 2576504 : MarkBufferDirty(buffer);
586 : }
587 2582212 : if (BufferIsValid(buffer))
588 2582212 : UnlockReleaseBuffer(buffer);
589 :
590 : /*
591 : * If the page is running low on free space, update the FSM as well.
592 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
593 : * better than that without knowing the fill-factor for the table.
594 : *
595 : * XXX: Don't do this if the page was restored from full page image. We
596 : * don't bother to update the FSM in that case, it doesn't need to be
597 : * totally accurate anyway.
598 : */
599 2582212 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
600 507614 : XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
601 2582212 : }
602 :
603 : /*
604 : * Replay XLOG_HEAP2_MULTI_INSERT records.
605 : */
606 : static void
607 115778 : heap_xlog_multi_insert(XLogReaderState *record)
608 : {
609 115778 : XLogRecPtr lsn = record->EndRecPtr;
610 : xl_heap_multi_insert *xlrec;
611 : RelFileLocator rlocator;
612 : BlockNumber blkno;
613 : Buffer buffer;
614 : Page page;
615 : union
616 : {
617 : HeapTupleHeaderData hdr;
618 : char data[MaxHeapTupleSize];
619 : } tbuf;
620 : HeapTupleHeader htup;
621 : uint32 newlen;
622 115778 : Size freespace = 0;
623 : int i;
624 115778 : bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
625 : XLogRedoAction action;
626 115778 : Buffer vmbuffer = InvalidBuffer;
627 :
628 : /*
629 : * Insertion doesn't overwrite MVCC data, so no conflict processing is
630 : * required.
631 : */
632 115778 : xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
633 :
634 115778 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
635 :
636 : /* check that the mutually exclusive flags are not both set */
637 : Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
638 : (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
639 :
640 : /*
641 : * The visibility map may need to be fixed even if the heap page is
642 : * already up-to-date.
643 : */
644 115778 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
645 : {
646 2052 : Relation reln = CreateFakeRelcacheEntry(rlocator);
647 :
648 2052 : visibilitymap_pin(reln, blkno, &vmbuffer);
649 2052 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
650 2052 : ReleaseBuffer(vmbuffer);
651 2052 : vmbuffer = InvalidBuffer;
652 2052 : FreeFakeRelcacheEntry(reln);
653 : }
654 :
655 115778 : if (isinit)
656 : {
657 3454 : buffer = XLogInitBufferForRedo(record, 0);
658 3454 : page = BufferGetPage(buffer);
659 3454 : PageInit(page, BufferGetPageSize(buffer), 0);
660 3454 : action = BLK_NEEDS_REDO;
661 : }
662 : else
663 112324 : action = XLogReadBufferForRedo(record, 0, &buffer);
664 115778 : if (action == BLK_NEEDS_REDO)
665 : {
666 : char *tupdata;
667 : char *endptr;
668 : Size len;
669 :
670 : /* Tuples are stored as block data */
671 112422 : tupdata = XLogRecGetBlockData(record, 0, &len);
672 112422 : endptr = tupdata + len;
673 :
674 112422 : page = BufferGetPage(buffer);
675 :
676 525286 : for (i = 0; i < xlrec->ntuples; i++)
677 : {
678 : OffsetNumber offnum;
679 : xl_multi_insert_tuple *xlhdr;
680 :
681 : /*
682 : * If we're reinitializing the page, the tuples are stored in
683 : * order from FirstOffsetNumber. Otherwise there's an array of
684 : * offsets in the WAL record, and the tuples come after that.
685 : */
686 412864 : if (isinit)
687 198380 : offnum = FirstOffsetNumber + i;
688 : else
689 214484 : offnum = xlrec->offsets[i];
690 412864 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
691 0 : elog(PANIC, "invalid max offset number");
692 :
693 412864 : xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
694 412864 : tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
695 :
696 412864 : newlen = xlhdr->datalen;
697 : Assert(newlen <= MaxHeapTupleSize);
698 412864 : htup = &tbuf.hdr;
699 412864 : MemSet(htup, 0, SizeofHeapTupleHeader);
700 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
701 412864 : memcpy((char *) htup + SizeofHeapTupleHeader,
702 : tupdata,
703 : newlen);
704 412864 : tupdata += newlen;
705 :
706 412864 : newlen += SizeofHeapTupleHeader;
707 412864 : htup->t_infomask2 = xlhdr->t_infomask2;
708 412864 : htup->t_infomask = xlhdr->t_infomask;
709 412864 : htup->t_hoff = xlhdr->t_hoff;
710 412864 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
711 412864 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
712 412864 : ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
713 412864 : ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
714 :
715 412864 : offnum = PageAddItem(page, htup, newlen, offnum, true, true);
716 412864 : if (offnum == InvalidOffsetNumber)
717 0 : elog(PANIC, "failed to add tuple");
718 : }
719 112422 : if (tupdata != endptr)
720 0 : elog(PANIC, "total tuple length mismatch");
721 :
722 112422 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
723 :
724 112422 : PageSetLSN(page, lsn);
725 :
726 112422 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
727 184 : PageClearAllVisible(page);
728 :
729 : /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
730 112422 : if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
731 8 : PageSetAllVisible(page);
732 :
733 112422 : MarkBufferDirty(buffer);
734 : }
735 115778 : if (BufferIsValid(buffer))
736 115778 : UnlockReleaseBuffer(buffer);
737 :
738 115778 : buffer = InvalidBuffer;
739 :
740 : /*
741 : * Read and update the visibility map (VM) block.
742 : *
743 : * We must always redo VM changes, even if the corresponding heap page
744 : * update was skipped due to the LSN interlock. Each VM block covers
745 : * multiple heap pages, so later WAL records may update other bits in the
746 : * same block. If this record includes an FPI (full-page image),
747 : * subsequent WAL records may depend on it to guard against torn pages.
748 : *
749 : * Heap page changes are replayed first to preserve the invariant:
750 : * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
751 : *
752 : * Note that we released the heap page lock above. During normal
753 : * operation, this would be unsafe — a concurrent modification could
754 : * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
755 : * invariant.
756 : *
757 : * During recovery, however, no concurrent writers exist. Therefore,
758 : * updating the VM without holding the heap page lock is safe enough. This
759 : * same approach is taken when replaying xl_heap_visible records (see
760 : * heap_xlog_visible()).
761 : */
762 115786 : if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
763 8 : XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
764 : &vmbuffer) == BLK_NEEDS_REDO)
765 : {
766 0 : Page vmpage = BufferGetPage(vmbuffer);
767 :
768 : /* initialize the page if it was read as zeros */
769 0 : if (PageIsNew(vmpage))
770 0 : PageInit(vmpage, BLCKSZ, 0);
771 :
772 0 : visibilitymap_set_vmbits(blkno,
773 : vmbuffer,
774 : VISIBILITYMAP_ALL_VISIBLE |
775 : VISIBILITYMAP_ALL_FROZEN,
776 : rlocator);
777 :
778 : Assert(BufferIsDirty(vmbuffer));
779 0 : PageSetLSN(vmpage, lsn);
780 : }
781 :
782 115778 : if (BufferIsValid(vmbuffer))
783 8 : UnlockReleaseBuffer(vmbuffer);
784 :
785 : /*
786 : * If the page is running low on free space, update the FSM as well.
787 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
788 : * better than that without knowing the fill-factor for the table.
789 : *
790 : * XXX: Don't do this if the page was restored from full page image. We
791 : * don't bother to update the FSM in that case, it doesn't need to be
792 : * totally accurate anyway.
793 : */
794 115778 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
795 34034 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
796 115778 : }
797 :
798 : /*
799 : * Replay XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE records.
800 : */
801 : static void
802 187732 : heap_xlog_update(XLogReaderState *record, bool hot_update)
803 : {
804 187732 : XLogRecPtr lsn = record->EndRecPtr;
805 187732 : xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
806 : RelFileLocator rlocator;
807 : BlockNumber oldblk;
808 : BlockNumber newblk;
809 : ItemPointerData newtid;
810 : Buffer obuffer,
811 : nbuffer;
812 : Page page;
813 : OffsetNumber offnum;
814 187732 : ItemId lp = NULL;
815 : HeapTupleData oldtup;
816 : HeapTupleHeader htup;
817 187732 : uint16 prefixlen = 0,
818 187732 : suffixlen = 0;
819 : char *newp;
820 : union
821 : {
822 : HeapTupleHeaderData hdr;
823 : char data[MaxHeapTupleSize];
824 : } tbuf;
825 : xl_heap_header xlhdr;
826 : uint32 newlen;
827 187732 : Size freespace = 0;
828 : XLogRedoAction oldaction;
829 : XLogRedoAction newaction;
830 :
831 : /* initialize to keep the compiler quiet */
832 187732 : oldtup.t_data = NULL;
833 187732 : oldtup.t_len = 0;
834 :
835 187732 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
836 187732 : if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
837 : {
838 : /* HOT updates are never done across pages */
839 : Assert(!hot_update);
840 : }
841 : else
842 78304 : oldblk = newblk;
843 :
844 187732 : ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
845 :
846 : /*
847 : * The visibility map may need to be fixed even if the heap page is
848 : * already up-to-date.
849 : */
850 187732 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
851 : {
852 486 : Relation reln = CreateFakeRelcacheEntry(rlocator);
853 486 : Buffer vmbuffer = InvalidBuffer;
854 :
855 486 : visibilitymap_pin(reln, oldblk, &vmbuffer);
856 486 : visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
857 486 : ReleaseBuffer(vmbuffer);
858 486 : FreeFakeRelcacheEntry(reln);
859 : }
860 :
861 : /*
862 : * In normal operation, it is important to lock the two pages in
863 : * page-number order, to avoid possible deadlocks against other update
864 : * operations going the other way. However, during WAL replay there can
865 : * be no other update happening, so we don't need to worry about that. But
866 : * we *do* need to worry that we don't expose an inconsistent state to Hot
867 : * Standby queries --- so the original page can't be unlocked before we've
868 : * added the new tuple to the new page.
869 : */
870 :
871 : /* Deal with old tuple version */
872 187732 : oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
873 : &obuffer);
874 187732 : if (oldaction == BLK_NEEDS_REDO)
875 : {
876 187116 : page = BufferGetPage(obuffer);
877 187116 : offnum = xlrec->old_offnum;
878 187116 : if (PageGetMaxOffsetNumber(page) >= offnum)
879 187116 : lp = PageGetItemId(page, offnum);
880 :
881 187116 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
882 0 : elog(PANIC, "invalid lp");
883 :
884 187116 : htup = (HeapTupleHeader) PageGetItem(page, lp);
885 :
886 187116 : oldtup.t_data = htup;
887 187116 : oldtup.t_len = ItemIdGetLength(lp);
888 :
889 187116 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
890 187116 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
891 187116 : if (hot_update)
892 72152 : HeapTupleHeaderSetHotUpdated(htup);
893 : else
894 114964 : HeapTupleHeaderClearHotUpdated(htup);
895 187116 : fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
896 : &htup->t_infomask2);
897 187116 : HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
898 187116 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
899 : /* Set forward chain link in t_ctid */
900 187116 : htup->t_ctid = newtid;
901 :
902 : /* Mark the page as a candidate for pruning */
903 187116 : PageSetPrunable(page, XLogRecGetXid(record));
904 :
905 187116 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
906 450 : PageClearAllVisible(page);
907 :
908 187116 : PageSetLSN(page, lsn);
909 187116 : MarkBufferDirty(obuffer);
910 : }
911 :
912 : /*
913 : * Read the page the new tuple goes into, if different from old.
914 : */
915 187732 : if (oldblk == newblk)
916 : {
917 78304 : nbuffer = obuffer;
918 78304 : newaction = oldaction;
919 : }
920 109428 : else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
921 : {
922 1160 : nbuffer = XLogInitBufferForRedo(record, 0);
923 1160 : page = BufferGetPage(nbuffer);
924 1160 : PageInit(page, BufferGetPageSize(nbuffer), 0);
925 1160 : newaction = BLK_NEEDS_REDO;
926 : }
927 : else
928 108268 : newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
929 :
930 : /*
931 : * The visibility map may need to be fixed even if the heap page is
932 : * already up-to-date.
933 : */
934 187732 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
935 : {
936 398 : Relation reln = CreateFakeRelcacheEntry(rlocator);
937 398 : Buffer vmbuffer = InvalidBuffer;
938 :
939 398 : visibilitymap_pin(reln, newblk, &vmbuffer);
940 398 : visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
941 398 : ReleaseBuffer(vmbuffer);
942 398 : FreeFakeRelcacheEntry(reln);
943 : }
944 :
945 : /* Deal with new tuple */
946 187732 : if (newaction == BLK_NEEDS_REDO)
947 : {
948 : char *recdata;
949 : char *recdata_end;
950 : Size datalen;
951 : Size tuplen;
952 :
953 186600 : recdata = XLogRecGetBlockData(record, 0, &datalen);
954 186600 : recdata_end = recdata + datalen;
955 :
956 186600 : page = BufferGetPage(nbuffer);
957 :
958 186600 : offnum = xlrec->new_offnum;
959 186600 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
960 0 : elog(PANIC, "invalid max offset number");
961 :
962 186600 : if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
963 : {
964 : Assert(newblk == oldblk);
965 30536 : memcpy(&prefixlen, recdata, sizeof(uint16));
966 30536 : recdata += sizeof(uint16);
967 : }
968 186600 : if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
969 : {
970 : Assert(newblk == oldblk);
971 67212 : memcpy(&suffixlen, recdata, sizeof(uint16));
972 67212 : recdata += sizeof(uint16);
973 : }
974 :
975 186600 : memcpy(&xlhdr, recdata, SizeOfHeapHeader);
976 186600 : recdata += SizeOfHeapHeader;
977 :
978 186600 : tuplen = recdata_end - recdata;
979 : Assert(tuplen <= MaxHeapTupleSize);
980 :
981 186600 : htup = &tbuf.hdr;
982 186600 : MemSet(htup, 0, SizeofHeapTupleHeader);
983 :
984 : /*
985 : * Reconstruct the new tuple using the prefix and/or suffix from the
986 : * old tuple, and the data stored in the WAL record.
987 : */
988 186600 : newp = (char *) htup + SizeofHeapTupleHeader;
989 186600 : if (prefixlen > 0)
990 : {
991 : int len;
992 :
993 : /* copy bitmap [+ padding] [+ oid] from WAL record */
994 30536 : len = xlhdr.t_hoff - SizeofHeapTupleHeader;
995 30536 : memcpy(newp, recdata, len);
996 30536 : recdata += len;
997 30536 : newp += len;
998 :
999 : /* copy prefix from old tuple */
1000 30536 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
1001 30536 : newp += prefixlen;
1002 :
1003 : /* copy new tuple data from WAL record */
1004 30536 : len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
1005 30536 : memcpy(newp, recdata, len);
1006 30536 : recdata += len;
1007 30536 : newp += len;
1008 : }
1009 : else
1010 : {
1011 : /*
1012 : * copy bitmap [+ padding] [+ oid] + data from record, all in one
1013 : * go
1014 : */
1015 156064 : memcpy(newp, recdata, tuplen);
1016 156064 : recdata += tuplen;
1017 156064 : newp += tuplen;
1018 : }
1019 : Assert(recdata == recdata_end);
1020 :
1021 : /* copy suffix from old tuple */
1022 186600 : if (suffixlen > 0)
1023 67212 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
1024 :
1025 186600 : newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
1026 186600 : htup->t_infomask2 = xlhdr.t_infomask2;
1027 186600 : htup->t_infomask = xlhdr.t_infomask;
1028 186600 : htup->t_hoff = xlhdr.t_hoff;
1029 :
1030 186600 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
1031 186600 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
1032 186600 : HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
1033 : /* Make sure there is no forward chain link in t_ctid */
1034 186600 : htup->t_ctid = newtid;
1035 :
1036 186600 : offnum = PageAddItem(page, htup, newlen, offnum, true, true);
1037 186600 : if (offnum == InvalidOffsetNumber)
1038 0 : elog(PANIC, "failed to add tuple");
1039 :
1040 186600 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
1041 162 : PageClearAllVisible(page);
1042 :
1043 186600 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
1044 :
1045 186600 : PageSetLSN(page, lsn);
1046 186600 : MarkBufferDirty(nbuffer);
1047 : }
1048 :
1049 187732 : if (BufferIsValid(nbuffer) && nbuffer != obuffer)
1050 109428 : UnlockReleaseBuffer(nbuffer);
1051 187732 : if (BufferIsValid(obuffer))
1052 187732 : UnlockReleaseBuffer(obuffer);
1053 :
1054 : /*
1055 : * If the new page is running low on free space, update the FSM as well.
1056 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
1057 : * better than that without knowing the fill-factor for the table.
1058 : *
1059 : * However, don't update the FSM on HOT updates, because after crash
1060 : * recovery, either the old or the new tuple will certainly be dead and
1061 : * prunable. After pruning, the page will have roughly as much free space
1062 : * as it did before the update, assuming the new tuple is about the same
1063 : * size as the old one.
1064 : *
1065 : * XXX: Don't do this if the page was restored from full page image. We
1066 : * don't bother to update the FSM in that case, it doesn't need to be
1067 : * totally accurate anyway.
1068 : */
1069 187732 : if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
1070 23574 : XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
1071 187732 : }
1072 :
1073 : /*
1074 : * Replay XLOG_HEAP_CONFIRM records.
1075 : */
1076 : static void
1077 164 : heap_xlog_confirm(XLogReaderState *record)
1078 : {
1079 164 : XLogRecPtr lsn = record->EndRecPtr;
1080 164 : xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
1081 : Buffer buffer;
1082 : Page page;
1083 : OffsetNumber offnum;
1084 164 : ItemId lp = NULL;
1085 : HeapTupleHeader htup;
1086 :
1087 164 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1088 : {
1089 164 : page = BufferGetPage(buffer);
1090 :
1091 164 : offnum = xlrec->offnum;
1092 164 : if (PageGetMaxOffsetNumber(page) >= offnum)
1093 164 : lp = PageGetItemId(page, offnum);
1094 :
1095 164 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
1096 0 : elog(PANIC, "invalid lp");
1097 :
1098 164 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1099 :
1100 : /*
1101 : * Confirm tuple as actually inserted
1102 : */
1103 164 : ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
1104 :
1105 164 : PageSetLSN(page, lsn);
1106 164 : MarkBufferDirty(buffer);
1107 : }
1108 164 : if (BufferIsValid(buffer))
1109 164 : UnlockReleaseBuffer(buffer);
1110 164 : }
1111 :
1112 : /*
1113 : * Replay XLOG_HEAP_LOCK records.
1114 : */
1115 : static void
1116 111256 : heap_xlog_lock(XLogReaderState *record)
1117 : {
1118 111256 : XLogRecPtr lsn = record->EndRecPtr;
1119 111256 : xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
1120 : Buffer buffer;
1121 : Page page;
1122 : OffsetNumber offnum;
1123 111256 : ItemId lp = NULL;
1124 : HeapTupleHeader htup;
1125 :
1126 : /*
1127 : * The visibility map may need to be fixed even if the heap page is
1128 : * already up-to-date.
1129 : */
1130 111256 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1131 : {
1132 : RelFileLocator rlocator;
1133 122 : Buffer vmbuffer = InvalidBuffer;
1134 : BlockNumber block;
1135 : Relation reln;
1136 :
1137 122 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1138 122 : reln = CreateFakeRelcacheEntry(rlocator);
1139 :
1140 122 : visibilitymap_pin(reln, block, &vmbuffer);
1141 122 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1142 :
1143 122 : ReleaseBuffer(vmbuffer);
1144 122 : FreeFakeRelcacheEntry(reln);
1145 : }
1146 :
1147 111256 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1148 : {
1149 110774 : page = BufferGetPage(buffer);
1150 :
1151 110774 : offnum = xlrec->offnum;
1152 110774 : if (PageGetMaxOffsetNumber(page) >= offnum)
1153 110774 : lp = PageGetItemId(page, offnum);
1154 :
1155 110774 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
1156 0 : elog(PANIC, "invalid lp");
1157 :
1158 110774 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1159 :
1160 110774 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1161 110774 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1162 110774 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1163 : &htup->t_infomask2);
1164 :
1165 : /*
1166 : * Clear relevant update flags, but only if the modified infomask says
1167 : * there's no update.
1168 : */
1169 110774 : if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
1170 : {
1171 110774 : HeapTupleHeaderClearHotUpdated(htup);
1172 : /* Make sure there is no forward chain link in t_ctid */
1173 110774 : ItemPointerSet(&htup->t_ctid,
1174 : BufferGetBlockNumber(buffer),
1175 : offnum);
1176 : }
1177 110774 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1178 110774 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
1179 110774 : PageSetLSN(page, lsn);
1180 110774 : MarkBufferDirty(buffer);
1181 : }
1182 111256 : if (BufferIsValid(buffer))
1183 111256 : UnlockReleaseBuffer(buffer);
1184 111256 : }
1185 :
1186 : /*
1187 : * Replay XLOG_HEAP2_LOCK_UPDATED records.
1188 : */
1189 : static void
1190 0 : heap_xlog_lock_updated(XLogReaderState *record)
1191 : {
1192 0 : XLogRecPtr lsn = record->EndRecPtr;
1193 : xl_heap_lock_updated *xlrec;
1194 : Buffer buffer;
1195 : Page page;
1196 : OffsetNumber offnum;
1197 0 : ItemId lp = NULL;
1198 : HeapTupleHeader htup;
1199 :
1200 0 : xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
1201 :
1202 : /*
1203 : * The visibility map may need to be fixed even if the heap page is
1204 : * already up-to-date.
1205 : */
1206 0 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1207 : {
1208 : RelFileLocator rlocator;
1209 0 : Buffer vmbuffer = InvalidBuffer;
1210 : BlockNumber block;
1211 : Relation reln;
1212 :
1213 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1214 0 : reln = CreateFakeRelcacheEntry(rlocator);
1215 :
1216 0 : visibilitymap_pin(reln, block, &vmbuffer);
1217 0 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1218 :
1219 0 : ReleaseBuffer(vmbuffer);
1220 0 : FreeFakeRelcacheEntry(reln);
1221 : }
1222 :
1223 0 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1224 : {
1225 0 : page = BufferGetPage(buffer);
1226 :
1227 0 : offnum = xlrec->offnum;
1228 0 : if (PageGetMaxOffsetNumber(page) >= offnum)
1229 0 : lp = PageGetItemId(page, offnum);
1230 :
1231 0 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
1232 0 : elog(PANIC, "invalid lp");
1233 :
1234 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1235 :
1236 0 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1237 0 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1238 0 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1239 : &htup->t_infomask2);
1240 0 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1241 :
1242 0 : PageSetLSN(page, lsn);
1243 0 : MarkBufferDirty(buffer);
1244 : }
1245 0 : if (BufferIsValid(buffer))
1246 0 : UnlockReleaseBuffer(buffer);
1247 0 : }
1248 :
1249 : /*
1250 : * Replay XLOG_HEAP_INPLACE records.
1251 : */
1252 : static void
1253 15444 : heap_xlog_inplace(XLogReaderState *record)
1254 : {
1255 15444 : XLogRecPtr lsn = record->EndRecPtr;
1256 15444 : xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
1257 : Buffer buffer;
1258 : Page page;
1259 : OffsetNumber offnum;
1260 15444 : ItemId lp = NULL;
1261 : HeapTupleHeader htup;
1262 : uint32 oldlen;
1263 : Size newlen;
1264 :
1265 15444 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1266 : {
1267 14948 : char *newtup = XLogRecGetBlockData(record, 0, &newlen);
1268 :
1269 14948 : page = BufferGetPage(buffer);
1270 :
1271 14948 : offnum = xlrec->offnum;
1272 14948 : if (PageGetMaxOffsetNumber(page) >= offnum)
1273 14948 : lp = PageGetItemId(page, offnum);
1274 :
1275 14948 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
1276 0 : elog(PANIC, "invalid lp");
1277 :
1278 14948 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1279 :
1280 14948 : oldlen = ItemIdGetLength(lp) - htup->t_hoff;
1281 14948 : if (oldlen != newlen)
1282 0 : elog(PANIC, "wrong tuple length");
1283 :
1284 14948 : memcpy((char *) htup + htup->t_hoff, newtup, newlen);
1285 :
1286 14948 : PageSetLSN(page, lsn);
1287 14948 : MarkBufferDirty(buffer);
1288 : }
1289 15444 : if (BufferIsValid(buffer))
1290 15444 : UnlockReleaseBuffer(buffer);
1291 :
1292 15444 : ProcessCommittedInvalidationMessages(xlrec->msgs,
1293 : xlrec->nmsgs,
1294 15444 : xlrec->relcacheInitFileInval,
1295 : xlrec->dbId,
1296 : xlrec->tsId);
1297 15444 : }
1298 :
1299 : void
1300 3497858 : heap_redo(XLogReaderState *record)
1301 : {
1302 3497858 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1303 :
1304 : /*
1305 : * These operations don't overwrite MVCC data so no conflict processing is
1306 : * required. The ones in heap2 rmgr do.
1307 : */
1308 :
1309 3497858 : switch (info & XLOG_HEAP_OPMASK)
1310 : {
1311 2582212 : case XLOG_HEAP_INSERT:
1312 2582212 : heap_xlog_insert(record);
1313 2582212 : break;
1314 601046 : case XLOG_HEAP_DELETE:
1315 601046 : heap_xlog_delete(record);
1316 601046 : break;
1317 115032 : case XLOG_HEAP_UPDATE:
1318 115032 : heap_xlog_update(record, false);
1319 115032 : break;
1320 4 : case XLOG_HEAP_TRUNCATE:
1321 :
1322 : /*
1323 : * TRUNCATE is a no-op because the actions are already logged as
1324 : * SMGR WAL records. TRUNCATE WAL record only exists for logical
1325 : * decoding.
1326 : */
1327 4 : break;
1328 72700 : case XLOG_HEAP_HOT_UPDATE:
1329 72700 : heap_xlog_update(record, true);
1330 72700 : break;
1331 164 : case XLOG_HEAP_CONFIRM:
1332 164 : heap_xlog_confirm(record);
1333 164 : break;
1334 111256 : case XLOG_HEAP_LOCK:
1335 111256 : heap_xlog_lock(record);
1336 111256 : break;
1337 15444 : case XLOG_HEAP_INPLACE:
1338 15444 : heap_xlog_inplace(record);
1339 15444 : break;
1340 0 : default:
1341 0 : elog(PANIC, "heap_redo: unknown op code %u", info);
1342 : }
1343 3497858 : }
1344 :
1345 : void
1346 155420 : heap2_redo(XLogReaderState *record)
1347 : {
1348 155420 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1349 :
1350 155420 : switch (info & XLOG_HEAP_OPMASK)
1351 : {
1352 27968 : case XLOG_HEAP2_PRUNE_ON_ACCESS:
1353 : case XLOG_HEAP2_PRUNE_VACUUM_SCAN:
1354 : case XLOG_HEAP2_PRUNE_VACUUM_CLEANUP:
1355 27968 : heap_xlog_prune_freeze(record);
1356 27968 : break;
1357 9674 : case XLOG_HEAP2_VISIBLE:
1358 9674 : heap_xlog_visible(record);
1359 9674 : break;
1360 115778 : case XLOG_HEAP2_MULTI_INSERT:
1361 115778 : heap_xlog_multi_insert(record);
1362 115778 : break;
1363 0 : case XLOG_HEAP2_LOCK_UPDATED:
1364 0 : heap_xlog_lock_updated(record);
1365 0 : break;
1366 2000 : case XLOG_HEAP2_NEW_CID:
1367 :
1368 : /*
1369 : * Nothing to do on a real replay, only used during logical
1370 : * decoding.
1371 : */
1372 2000 : break;
1373 0 : case XLOG_HEAP2_REWRITE:
1374 0 : heap_xlog_logical_rewrite(record);
1375 0 : break;
1376 0 : default:
1377 0 : elog(PANIC, "heap2_redo: unknown op code %u", info);
1378 : }
1379 155420 : }
1380 :
1381 : /*
1382 : * Mask a heap page before performing consistency checks on it.
1383 : */
1384 : void
1385 5846152 : heap_mask(char *pagedata, BlockNumber blkno)
1386 : {
1387 5846152 : Page page = (Page) pagedata;
1388 : OffsetNumber off;
1389 :
1390 5846152 : mask_page_lsn_and_checksum(page);
1391 :
1392 5846152 : mask_page_hint_bits(page);
1393 5846152 : mask_unused_space(page);
1394 :
1395 481546412 : for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
1396 : {
1397 475700260 : ItemId iid = PageGetItemId(page, off);
1398 : char *page_item;
1399 :
1400 475700260 : page_item = (char *) (page + ItemIdGetOffset(iid));
1401 :
1402 475700260 : if (ItemIdIsNormal(iid))
1403 : {
1404 448871652 : HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
1405 :
1406 : /*
1407 : * If xmin of a tuple is not yet frozen, we should ignore
1408 : * differences in hint bits, since they can be set without
1409 : * emitting WAL.
1410 : */
1411 448871652 : if (!HeapTupleHeaderXminFrozen(page_htup))
1412 442547020 : page_htup->t_infomask &= ~HEAP_XACT_MASK;
1413 : else
1414 : {
1415 : /* Still we need to mask xmax hint bits. */
1416 6324632 : page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
1417 6324632 : page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
1418 : }
1419 :
1420 : /*
1421 : * During replay, we set Command Id to FirstCommandId. Hence, mask
1422 : * it. See heap_xlog_insert() for details.
1423 : */
1424 448871652 : page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
1425 :
1426 : /*
1427 : * For a speculative tuple, heap_insert() does not set ctid in the
1428 : * caller-passed heap tuple itself, leaving the ctid field to
1429 : * contain a speculative token value - a per-backend monotonically
1430 : * increasing identifier. Besides, it does not WAL-log ctid under
1431 : * any circumstances.
1432 : *
1433 : * During redo, heap_xlog_insert() sets t_ctid to current block
1434 : * number and self offset number. It doesn't care about any
1435 : * speculative insertions on the primary. Hence, we set t_ctid to
1436 : * current block number and self offset number to ignore any
1437 : * inconsistency.
1438 : */
1439 448871652 : if (HeapTupleHeaderIsSpeculative(page_htup))
1440 166 : ItemPointerSet(&page_htup->t_ctid, blkno, off);
1441 :
1442 : /*
1443 : * NB: Not ignoring ctid changes due to the tuple having moved
1444 : * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
1445 : * important information that needs to be in-sync between primary
1446 : * and standby, and thus is WAL logged.
1447 : */
1448 : }
1449 :
1450 : /*
1451 : * Ignore any padding bytes after the tuple, when the length of the
1452 : * item is not MAXALIGNed.
1453 : */
1454 475700260 : if (ItemIdHasStorage(iid))
1455 : {
1456 448871652 : int len = ItemIdGetLength(iid);
1457 448871652 : int padlen = MAXALIGN(len) - len;
1458 :
1459 448871652 : if (padlen > 0)
1460 239151740 : memset(page_item + len, MASK_MARKER, padlen);
1461 : }
1462 : }
1463 5846152 : }
|