Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * heapam_xlog.c
4 : * WAL replay logic for heap access method.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/heap/heapam_xlog.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/bufmask.h"
18 : #include "access/heapam.h"
19 : #include "access/visibilitymap.h"
20 : #include "access/xlog.h"
21 : #include "access/xlogutils.h"
22 : #include "storage/freespace.h"
23 : #include "storage/standby.h"
24 :
25 :
26 : /*
27 : * Replay XLOG_HEAP2_PRUNE_* records.
28 : */
29 : static void
30 14786 : heap_xlog_prune_freeze(XLogReaderState *record)
31 : {
32 14786 : XLogRecPtr lsn = record->EndRecPtr;
33 14786 : char *maindataptr = XLogRecGetData(record);
34 : xl_heap_prune xlrec;
35 : Buffer buffer;
36 : RelFileLocator rlocator;
37 : BlockNumber blkno;
38 14786 : Buffer vmbuffer = InvalidBuffer;
39 14786 : uint8 vmflags = 0;
40 14786 : Size freespace = 0;
41 :
42 14786 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
43 14786 : memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
44 14786 : maindataptr += SizeOfHeapPrune;
45 :
46 : /*
47 : * We will take an ordinary exclusive lock or a cleanup lock depending on
48 : * whether the XLHP_CLEANUP_LOCK flag is set. With an ordinary exclusive
49 : * lock, we better not be doing anything that requires moving existing
50 : * tuple data.
51 : */
52 : Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
53 : (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
54 :
55 14786 : if (xlrec.flags & XLHP_VM_ALL_VISIBLE)
56 : {
57 3470 : vmflags = VISIBILITYMAP_ALL_VISIBLE;
58 3470 : if (xlrec.flags & XLHP_VM_ALL_FROZEN)
59 2939 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
60 : }
61 :
62 : /*
63 : * After xl_heap_prune is the optional snapshot conflict horizon.
64 : *
65 : * In Hot Standby mode, we must ensure that there are no running queries
66 : * which would conflict with the changes in this record. That means we
67 : * can't replay this record if it removes tuples that are still visible to
68 : * transactions on the standby, freeze tuples with xids that are still
69 : * considered running on the standby, or set a page as all-visible in the
70 : * VM if it isn't all-visible to all transactions on the standby.
71 : */
72 14786 : if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
73 : {
74 : TransactionId snapshot_conflict_horizon;
75 :
76 : /* memcpy() because snapshot_conflict_horizon is stored unaligned */
77 11154 : memcpy(&snapshot_conflict_horizon, maindataptr, sizeof(TransactionId));
78 11154 : maindataptr += sizeof(TransactionId);
79 :
80 11154 : if (InHotStandby)
81 10921 : ResolveRecoveryConflictWithSnapshot(snapshot_conflict_horizon,
82 10921 : (xlrec.flags & XLHP_IS_CATALOG_REL) != 0,
83 : rlocator);
84 : }
85 :
86 : /*
87 : * If we have a full-page image of the heap block, restore it and we're
88 : * done with the heap block.
89 : */
90 14786 : if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
91 14786 : (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
92 : &buffer) == BLK_NEEDS_REDO)
93 : {
94 10622 : Page page = BufferGetPage(buffer);
95 : OffsetNumber *redirected;
96 : OffsetNumber *nowdead;
97 : OffsetNumber *nowunused;
98 : int nredirected;
99 : int ndead;
100 : int nunused;
101 : int nplans;
102 : Size datalen;
103 : xlhp_freeze_plan *plans;
104 : OffsetNumber *frz_offsets;
105 10622 : char *dataptr = XLogRecGetBlockData(record, 0, &datalen);
106 : bool do_prune;
107 :
108 10622 : heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
109 : &nplans, &plans, &frz_offsets,
110 : &nredirected, &redirected,
111 : &ndead, &nowdead,
112 : &nunused, &nowunused);
113 :
114 10622 : do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
115 :
116 : /* Ensure the record does something */
117 : Assert(do_prune || nplans > 0 || vmflags & VISIBILITYMAP_VALID_BITS);
118 :
119 : /*
120 : * Update all line pointers per the record, and repair fragmentation
121 : * if needed.
122 : */
123 10622 : if (do_prune)
124 10048 : heap_page_prune_execute(buffer,
125 10048 : (xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
126 : redirected, nredirected,
127 : nowdead, ndead,
128 : nowunused, nunused);
129 :
130 : /* Freeze tuples */
131 11744 : for (int p = 0; p < nplans; p++)
132 : {
133 : HeapTupleFreeze frz;
134 :
135 : /*
136 : * Convert freeze plan representation from WAL record into
137 : * per-tuple format used by heap_execute_freeze_tuple
138 : */
139 1122 : frz.xmax = plans[p].xmax;
140 1122 : frz.t_infomask2 = plans[p].t_infomask2;
141 1122 : frz.t_infomask = plans[p].t_infomask;
142 1122 : frz.frzflags = plans[p].frzflags;
143 1122 : frz.offset = InvalidOffsetNumber; /* unused, but be tidy */
144 :
145 54867 : for (int i = 0; i < plans[p].ntuples; i++)
146 : {
147 53745 : OffsetNumber offset = *(frz_offsets++);
148 : ItemId lp;
149 : HeapTupleHeader tuple;
150 :
151 53745 : lp = PageGetItemId(page, offset);
152 53745 : tuple = (HeapTupleHeader) PageGetItem(page, lp);
153 53745 : heap_execute_freeze_tuple(tuple, &frz);
154 : }
155 : }
156 :
157 : /* There should be no more data */
158 : Assert((char *) frz_offsets == dataptr + datalen);
159 :
160 : /*
161 : * The critical integrity requirement here is that we must never end
162 : * up with the visibility map bit set and the page-level
163 : * PD_ALL_VISIBLE bit unset. If that were to occur, a subsequent page
164 : * modification would fail to clear the visibility map bit.
165 : */
166 10622 : if (vmflags & VISIBILITYMAP_VALID_BITS)
167 : {
168 2291 : PageSetAllVisible(page);
169 2291 : PageClearPrunable(page);
170 : }
171 :
172 10622 : MarkBufferDirty(buffer);
173 :
174 : /*
175 : * See log_heap_prune_and_freeze() for commentary on when we set the
176 : * heap page LSN.
177 : */
178 10622 : if (do_prune || nplans > 0 ||
179 0 : ((vmflags & VISIBILITYMAP_VALID_BITS) && XLogHintBitIsNeeded()))
180 10622 : PageSetLSN(page, lsn);
181 :
182 : /*
183 : * Note: we don't worry about updating the page's prunability hints.
184 : * At worst this will cause an extra prune cycle to occur soon.
185 : */
186 : }
187 :
188 : /*
189 : * If we 1) released any space or line pointers or 2) set PD_ALL_VISIBLE
190 : * or the VM, update the freespace map.
191 : *
192 : * Even when no actual space is freed (when only marking the page
193 : * all-visible or frozen), we still update the FSM. Because the FSM is
194 : * unlogged and maintained heuristically, it often becomes stale on
195 : * standbys. If such a standby is later promoted and runs VACUUM, it will
196 : * skip recalculating free space for pages that were marked
197 : * all-visible/all-frozen. FreeSpaceMapVacuum() can then propagate overly
198 : * optimistic free space values upward, causing future insertions to
199 : * select pages that turn out to be unusable. In bulk, this can lead to
200 : * long stalls.
201 : *
202 : * To prevent this, always update the FSM even when only marking a page
203 : * all-visible/all-frozen.
204 : *
205 : * Do this regardless of whether a full-page image is logged, since FSM
206 : * data is not part of the page itself.
207 : */
208 14786 : if (BufferIsValid(buffer))
209 : {
210 14786 : if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
211 : XLHP_HAS_DEAD_ITEMS |
212 1812 : XLHP_HAS_NOW_UNUSED_ITEMS)) ||
213 1812 : (vmflags & VISIBILITYMAP_VALID_BITS))
214 12974 : freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
215 :
216 : /*
217 : * We want to avoid holding an exclusive lock on the heap buffer while
218 : * doing IO (either of the FSM or the VM), so we'll release it now.
219 : */
220 14786 : UnlockReleaseBuffer(buffer);
221 : }
222 :
223 : /*
224 : * Now read and update the VM block.
225 : *
226 : * We must redo changes to the VM even if the heap page was skipped due to
227 : * LSN interlock. See comment in heap_xlog_multi_insert() for more details
228 : * on replaying changes to the VM.
229 : */
230 18256 : if ((vmflags & VISIBILITYMAP_VALID_BITS) &&
231 3470 : XLogReadBufferForRedoExtended(record, 1,
232 : RBM_ZERO_ON_ERROR,
233 : false,
234 : &vmbuffer) == BLK_NEEDS_REDO)
235 : {
236 3404 : Page vmpage = BufferGetPage(vmbuffer);
237 :
238 : /* initialize the page if it was read as zeros */
239 3404 : if (PageIsNew(vmpage))
240 0 : PageInit(vmpage, BLCKSZ, 0);
241 :
242 3404 : visibilitymap_set_vmbits(blkno, vmbuffer, vmflags, rlocator);
243 :
244 : Assert(BufferIsDirty(vmbuffer));
245 3404 : PageSetLSN(vmpage, lsn);
246 : }
247 :
248 14786 : if (BufferIsValid(vmbuffer))
249 3470 : UnlockReleaseBuffer(vmbuffer);
250 :
251 14786 : if (freespace > 0)
252 12867 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
253 14786 : }
254 :
255 : /*
256 : * Replay XLOG_HEAP2_VISIBLE records.
257 : *
258 : * The critical integrity requirement here is that we must never end up with
259 : * a situation where the visibility map bit is set, and the page-level
260 : * PD_ALL_VISIBLE bit is clear. If that were to occur, then a subsequent
261 : * page modification would fail to clear the visibility map bit.
262 : */
263 : static void
264 5182 : heap_xlog_visible(XLogReaderState *record)
265 : {
266 5182 : XLogRecPtr lsn = record->EndRecPtr;
267 5182 : xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
268 5182 : Buffer vmbuffer = InvalidBuffer;
269 : Buffer buffer;
270 : Page page;
271 : RelFileLocator rlocator;
272 : BlockNumber blkno;
273 : XLogRedoAction action;
274 :
275 : Assert((xlrec->flags & VISIBILITYMAP_XLOG_VALID_BITS) == xlrec->flags);
276 :
277 5182 : XLogRecGetBlockTag(record, 1, &rlocator, NULL, &blkno);
278 :
279 : /*
280 : * If there are any Hot Standby transactions running that have an xmin
281 : * horizon old enough that this page isn't all-visible for them, they
282 : * might incorrectly decide that an index-only scan can skip a heap fetch.
283 : *
284 : * NB: It might be better to throw some kind of "soft" conflict here that
285 : * forces any index-only scan that is in flight to perform heap fetches,
286 : * rather than killing the transaction outright.
287 : */
288 5182 : if (InHotStandby)
289 5008 : ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
290 5008 : xlrec->flags & VISIBILITYMAP_XLOG_CATALOG_REL,
291 : rlocator);
292 :
293 : /*
294 : * Read the heap page, if it still exists. If the heap file has dropped or
295 : * truncated later in recovery, we don't need to update the page, but we'd
296 : * better still update the visibility map.
297 : */
298 5182 : action = XLogReadBufferForRedo(record, 1, &buffer);
299 5182 : if (action == BLK_NEEDS_REDO)
300 : {
301 : /*
302 : * We don't bump the LSN of the heap page when setting the visibility
303 : * map bit (unless checksums or wal_hint_bits is enabled, in which
304 : * case we must). This exposes us to torn page hazards, but since
305 : * we're not inspecting the existing page contents in any way, we
306 : * don't care.
307 : */
308 2994 : page = BufferGetPage(buffer);
309 :
310 2994 : PageSetAllVisible(page);
311 2994 : PageClearPrunable(page);
312 :
313 2994 : if (XLogHintBitIsNeeded())
314 2994 : PageSetLSN(page, lsn);
315 :
316 2994 : MarkBufferDirty(buffer);
317 : }
318 : else if (action == BLK_RESTORED)
319 : {
320 : /*
321 : * If heap block was backed up, we already restored it and there's
322 : * nothing more to do. (This can only happen with checksums or
323 : * wal_log_hints enabled.)
324 : */
325 : }
326 :
327 5182 : if (BufferIsValid(buffer))
328 : {
329 5182 : Size space = PageGetFreeSpace(BufferGetPage(buffer));
330 :
331 5182 : UnlockReleaseBuffer(buffer);
332 :
333 : /*
334 : * Since FSM is not WAL-logged and only updated heuristically, it
335 : * easily becomes stale in standbys. If the standby is later promoted
336 : * and runs VACUUM, it will skip updating individual free space
337 : * figures for pages that became all-visible (or all-frozen, depending
338 : * on the vacuum mode,) which is troublesome when FreeSpaceMapVacuum
339 : * propagates too optimistic free space values to upper FSM layers;
340 : * later inserters try to use such pages only to find out that they
341 : * are unusable. This can cause long stalls when there are many such
342 : * pages.
343 : *
344 : * Forestall those problems by updating FSM's idea about a page that
345 : * is becoming all-visible or all-frozen.
346 : *
347 : * Do this regardless of a full-page image being applied, since the
348 : * FSM data is not in the page anyway.
349 : */
350 5182 : if (xlrec->flags & VISIBILITYMAP_VALID_BITS)
351 5182 : XLogRecordPageWithFreeSpace(rlocator, blkno, space);
352 : }
353 :
354 : /*
355 : * Even if we skipped the heap page update due to the LSN interlock, it's
356 : * still safe to update the visibility map. Any WAL record that clears
357 : * the visibility map bit does so before checking the page LSN, so any
358 : * bits that need to be cleared will still be cleared.
359 : */
360 5182 : if (XLogReadBufferForRedoExtended(record, 0, RBM_ZERO_ON_ERROR, false,
361 : &vmbuffer) == BLK_NEEDS_REDO)
362 : {
363 4965 : Page vmpage = BufferGetPage(vmbuffer);
364 : Relation reln;
365 : uint8 vmbits;
366 :
367 : /* initialize the page if it was read as zeros */
368 4965 : if (PageIsNew(vmpage))
369 0 : PageInit(vmpage, BLCKSZ, 0);
370 :
371 : /* remove VISIBILITYMAP_XLOG_* */
372 4965 : vmbits = xlrec->flags & VISIBILITYMAP_VALID_BITS;
373 :
374 : /*
375 : * XLogReadBufferForRedoExtended locked the buffer. But
376 : * visibilitymap_set will handle locking itself.
377 : */
378 4965 : LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
379 :
380 4965 : reln = CreateFakeRelcacheEntry(rlocator);
381 :
382 4965 : visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
383 : xlrec->snapshotConflictHorizon, vmbits);
384 :
385 4965 : ReleaseBuffer(vmbuffer);
386 4965 : FreeFakeRelcacheEntry(reln);
387 : }
388 217 : else if (BufferIsValid(vmbuffer))
389 217 : UnlockReleaseBuffer(vmbuffer);
390 5182 : }
391 :
392 : /*
393 : * Given an "infobits" field from an XLog record, set the correct bits in the
394 : * given infomask and infomask2 for the tuple touched by the record.
395 : *
396 : * (This is the reverse of compute_infobits).
397 : */
398 : static void
399 457416 : fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
400 : {
401 457416 : *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
402 : HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
403 457416 : *infomask2 &= ~HEAP_KEYS_UPDATED;
404 :
405 457416 : if (infobits & XLHL_XMAX_IS_MULTI)
406 3 : *infomask |= HEAP_XMAX_IS_MULTI;
407 457416 : if (infobits & XLHL_XMAX_LOCK_ONLY)
408 55229 : *infomask |= HEAP_XMAX_LOCK_ONLY;
409 457416 : if (infobits & XLHL_XMAX_EXCL_LOCK)
410 54820 : *infomask |= HEAP_XMAX_EXCL_LOCK;
411 : /* note HEAP_XMAX_SHR_LOCK isn't considered here */
412 457416 : if (infobits & XLHL_XMAX_KEYSHR_LOCK)
413 423 : *infomask |= HEAP_XMAX_KEYSHR_LOCK;
414 :
415 457416 : if (infobits & XLHL_KEYS_UPDATED)
416 308631 : *infomask2 |= HEAP_KEYS_UPDATED;
417 457416 : }
418 :
419 : /*
420 : * Replay XLOG_HEAP_DELETE records.
421 : */
422 : static void
423 309394 : heap_xlog_delete(XLogReaderState *record)
424 : {
425 309394 : XLogRecPtr lsn = record->EndRecPtr;
426 309394 : xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
427 : Buffer buffer;
428 : Page page;
429 : ItemId lp;
430 : HeapTupleHeader htup;
431 : BlockNumber blkno;
432 : RelFileLocator target_locator;
433 : ItemPointerData target_tid;
434 :
435 309394 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
436 309394 : ItemPointerSetBlockNumber(&target_tid, blkno);
437 309394 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
438 :
439 : /*
440 : * The visibility map may need to be fixed even if the heap page is
441 : * already up-to-date.
442 : */
443 309394 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
444 : {
445 18 : Relation reln = CreateFakeRelcacheEntry(target_locator);
446 18 : Buffer vmbuffer = InvalidBuffer;
447 :
448 18 : visibilitymap_pin(reln, blkno, &vmbuffer);
449 18 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
450 18 : ReleaseBuffer(vmbuffer);
451 18 : FreeFakeRelcacheEntry(reln);
452 : }
453 :
454 309394 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
455 : {
456 307238 : page = BufferGetPage(buffer);
457 :
458 307238 : if (xlrec->offnum < 1 || xlrec->offnum > PageGetMaxOffsetNumber(page))
459 0 : elog(PANIC, "offnum out of range");
460 307238 : lp = PageGetItemId(page, xlrec->offnum);
461 307238 : if (!ItemIdIsNormal(lp))
462 0 : elog(PANIC, "invalid lp");
463 :
464 307238 : htup = (HeapTupleHeader) PageGetItem(page, lp);
465 :
466 307238 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
467 307238 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
468 307238 : HeapTupleHeaderClearHotUpdated(htup);
469 307238 : fix_infomask_from_infobits(xlrec->infobits_set,
470 : &htup->t_infomask, &htup->t_infomask2);
471 307238 : if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
472 307238 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
473 : else
474 0 : HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
475 307238 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
476 :
477 : /* Mark the page as a candidate for pruning */
478 307238 : PageSetPrunable(page, XLogRecGetXid(record));
479 :
480 307238 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
481 13 : PageClearAllVisible(page);
482 :
483 : /* Make sure t_ctid is set correctly */
484 307238 : if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
485 141 : HeapTupleHeaderSetMovedPartitions(htup);
486 : else
487 307097 : htup->t_ctid = target_tid;
488 307238 : PageSetLSN(page, lsn);
489 307238 : MarkBufferDirty(buffer);
490 : }
491 309394 : if (BufferIsValid(buffer))
492 309394 : UnlockReleaseBuffer(buffer);
493 309394 : }
494 :
495 : /*
496 : * Replay XLOG_HEAP_INSERT records.
497 : */
498 : static void
499 1295651 : heap_xlog_insert(XLogReaderState *record)
500 : {
501 1295651 : XLogRecPtr lsn = record->EndRecPtr;
502 1295651 : xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
503 : Buffer buffer;
504 : Page page;
505 : union
506 : {
507 : HeapTupleHeaderData hdr;
508 : char data[MaxHeapTupleSize];
509 : } tbuf;
510 : HeapTupleHeader htup;
511 : xl_heap_header xlhdr;
512 : uint32 newlen;
513 1295651 : Size freespace = 0;
514 : RelFileLocator target_locator;
515 : BlockNumber blkno;
516 : ItemPointerData target_tid;
517 : XLogRedoAction action;
518 :
519 1295651 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
520 1295651 : ItemPointerSetBlockNumber(&target_tid, blkno);
521 1295651 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
522 :
523 : /* No freezing in the heap_insert() code path */
524 : Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
525 :
526 : /*
527 : * The visibility map may need to be fixed even if the heap page is
528 : * already up-to-date.
529 : */
530 1295651 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
531 : {
532 894 : Relation reln = CreateFakeRelcacheEntry(target_locator);
533 894 : Buffer vmbuffer = InvalidBuffer;
534 :
535 894 : visibilitymap_pin(reln, blkno, &vmbuffer);
536 894 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
537 894 : ReleaseBuffer(vmbuffer);
538 894 : FreeFakeRelcacheEntry(reln);
539 : }
540 :
541 : /*
542 : * If we inserted the first and only tuple on the page, re-initialize the
543 : * page from scratch.
544 : */
545 1295651 : if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
546 : {
547 17560 : buffer = XLogInitBufferForRedo(record, 0);
548 17560 : page = BufferGetPage(buffer);
549 17560 : PageInit(page, BufferGetPageSize(buffer), 0);
550 17560 : action = BLK_NEEDS_REDO;
551 : }
552 : else
553 1278091 : action = XLogReadBufferForRedo(record, 0, &buffer);
554 1295651 : if (action == BLK_NEEDS_REDO)
555 : {
556 : Size datalen;
557 : char *data;
558 :
559 1292795 : page = BufferGetPage(buffer);
560 :
561 1292795 : if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
562 0 : elog(PANIC, "invalid max offset number");
563 :
564 1292795 : data = XLogRecGetBlockData(record, 0, &datalen);
565 :
566 1292795 : newlen = datalen - SizeOfHeapHeader;
567 : Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
568 1292795 : memcpy(&xlhdr, data, SizeOfHeapHeader);
569 1292795 : data += SizeOfHeapHeader;
570 :
571 1292795 : htup = &tbuf.hdr;
572 1292795 : MemSet(htup, 0, SizeofHeapTupleHeader);
573 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
574 1292795 : memcpy((char *) htup + SizeofHeapTupleHeader,
575 : data,
576 : newlen);
577 1292795 : newlen += SizeofHeapTupleHeader;
578 1292795 : htup->t_infomask2 = xlhdr.t_infomask2;
579 1292795 : htup->t_infomask = xlhdr.t_infomask;
580 1292795 : htup->t_hoff = xlhdr.t_hoff;
581 1292795 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
582 1292795 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
583 1292795 : htup->t_ctid = target_tid;
584 :
585 1292795 : if (PageAddItem(page, htup, newlen, xlrec->offnum, true, true) == InvalidOffsetNumber)
586 0 : elog(PANIC, "failed to add tuple");
587 :
588 1292795 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
589 :
590 1292795 : PageSetLSN(page, lsn);
591 :
592 1292795 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
593 313 : PageClearAllVisible(page);
594 :
595 1292795 : MarkBufferDirty(buffer);
596 : }
597 1295651 : if (BufferIsValid(buffer))
598 1295651 : UnlockReleaseBuffer(buffer);
599 :
600 : /*
601 : * If the page is running low on free space, update the FSM as well.
602 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
603 : * better than that without knowing the fill-factor for the table.
604 : *
605 : * XXX: Don't do this if the page was restored from full page image. We
606 : * don't bother to update the FSM in that case, it doesn't need to be
607 : * totally accurate anyway.
608 : */
609 1295651 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
610 254188 : XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
611 1295651 : }
612 :
613 : /*
614 : * Replay XLOG_HEAP2_MULTI_INSERT records.
615 : */
616 : static void
617 61687 : heap_xlog_multi_insert(XLogReaderState *record)
618 : {
619 61687 : XLogRecPtr lsn = record->EndRecPtr;
620 : xl_heap_multi_insert *xlrec;
621 : RelFileLocator rlocator;
622 : BlockNumber blkno;
623 : Buffer buffer;
624 : Page page;
625 : union
626 : {
627 : HeapTupleHeaderData hdr;
628 : char data[MaxHeapTupleSize];
629 : } tbuf;
630 : HeapTupleHeader htup;
631 : uint32 newlen;
632 61687 : Size freespace = 0;
633 : int i;
634 61687 : bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
635 : XLogRedoAction action;
636 61687 : Buffer vmbuffer = InvalidBuffer;
637 :
638 : /*
639 : * Insertion doesn't overwrite MVCC data, so no conflict processing is
640 : * required.
641 : */
642 61687 : xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
643 :
644 61687 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
645 :
646 : /* check that the mutually exclusive flags are not both set */
647 : Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
648 : (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
649 :
650 : /*
651 : * The visibility map may need to be fixed even if the heap page is
652 : * already up-to-date.
653 : */
654 61687 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
655 : {
656 907 : Relation reln = CreateFakeRelcacheEntry(rlocator);
657 :
658 907 : visibilitymap_pin(reln, blkno, &vmbuffer);
659 907 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
660 907 : ReleaseBuffer(vmbuffer);
661 907 : vmbuffer = InvalidBuffer;
662 907 : FreeFakeRelcacheEntry(reln);
663 : }
664 :
665 61687 : if (isinit)
666 : {
667 1834 : buffer = XLogInitBufferForRedo(record, 0);
668 1834 : page = BufferGetPage(buffer);
669 1834 : PageInit(page, BufferGetPageSize(buffer), 0);
670 1834 : action = BLK_NEEDS_REDO;
671 : }
672 : else
673 59853 : action = XLogReadBufferForRedo(record, 0, &buffer);
674 61687 : if (action == BLK_NEEDS_REDO)
675 : {
676 : char *tupdata;
677 : char *endptr;
678 : Size len;
679 :
680 : /* Tuples are stored as block data */
681 60068 : tupdata = XLogRecGetBlockData(record, 0, &len);
682 60068 : endptr = tupdata + len;
683 :
684 60068 : page = BufferGetPage(buffer);
685 :
686 273282 : for (i = 0; i < xlrec->ntuples; i++)
687 : {
688 : OffsetNumber offnum;
689 : xl_multi_insert_tuple *xlhdr;
690 :
691 : /*
692 : * If we're reinitializing the page, the tuples are stored in
693 : * order from FirstOffsetNumber. Otherwise there's an array of
694 : * offsets in the WAL record, and the tuples come after that.
695 : */
696 213214 : if (isinit)
697 99454 : offnum = FirstOffsetNumber + i;
698 : else
699 113760 : offnum = xlrec->offsets[i];
700 213214 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
701 0 : elog(PANIC, "invalid max offset number");
702 :
703 213214 : xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
704 213214 : tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
705 :
706 213214 : newlen = xlhdr->datalen;
707 : Assert(newlen <= MaxHeapTupleSize);
708 213214 : htup = &tbuf.hdr;
709 213214 : MemSet(htup, 0, SizeofHeapTupleHeader);
710 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
711 213214 : memcpy((char *) htup + SizeofHeapTupleHeader,
712 : tupdata,
713 : newlen);
714 213214 : tupdata += newlen;
715 :
716 213214 : newlen += SizeofHeapTupleHeader;
717 213214 : htup->t_infomask2 = xlhdr->t_infomask2;
718 213214 : htup->t_infomask = xlhdr->t_infomask;
719 213214 : htup->t_hoff = xlhdr->t_hoff;
720 213214 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
721 213214 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
722 213214 : ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
723 213214 : ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
724 :
725 213214 : offnum = PageAddItem(page, htup, newlen, offnum, true, true);
726 213214 : if (offnum == InvalidOffsetNumber)
727 0 : elog(PANIC, "failed to add tuple");
728 : }
729 60068 : if (tupdata != endptr)
730 0 : elog(PANIC, "total tuple length mismatch");
731 :
732 60068 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
733 :
734 60068 : PageSetLSN(page, lsn);
735 :
736 60068 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
737 107 : PageClearAllVisible(page);
738 :
739 : /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
740 60068 : if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
741 : {
742 4 : PageSetAllVisible(page);
743 4 : PageClearPrunable(page);
744 : }
745 :
746 60068 : MarkBufferDirty(buffer);
747 : }
748 61687 : if (BufferIsValid(buffer))
749 61687 : UnlockReleaseBuffer(buffer);
750 :
751 61687 : buffer = InvalidBuffer;
752 :
753 : /*
754 : * Read and update the visibility map (VM) block.
755 : *
756 : * We must always redo VM changes, even if the corresponding heap page
757 : * update was skipped due to the LSN interlock. Each VM block covers
758 : * multiple heap pages, so later WAL records may update other bits in the
759 : * same block. If this record includes an FPI (full-page image),
760 : * subsequent WAL records may depend on it to guard against torn pages.
761 : *
762 : * Heap page changes are replayed first to preserve the invariant:
763 : * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
764 : *
765 : * Note that we released the heap page lock above. During normal
766 : * operation, this would be unsafe — a concurrent modification could
767 : * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
768 : * invariant.
769 : *
770 : * During recovery, however, no concurrent writers exist. Therefore,
771 : * updating the VM without holding the heap page lock is safe enough. This
772 : * same approach is taken when replaying xl_heap_visible records (see
773 : * heap_xlog_visible()).
774 : */
775 61691 : if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
776 4 : XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
777 : &vmbuffer) == BLK_NEEDS_REDO)
778 : {
779 0 : Page vmpage = BufferGetPage(vmbuffer);
780 :
781 : /* initialize the page if it was read as zeros */
782 0 : if (PageIsNew(vmpage))
783 0 : PageInit(vmpage, BLCKSZ, 0);
784 :
785 0 : visibilitymap_set_vmbits(blkno,
786 : vmbuffer,
787 : VISIBILITYMAP_ALL_VISIBLE |
788 : VISIBILITYMAP_ALL_FROZEN,
789 : rlocator);
790 :
791 : Assert(BufferIsDirty(vmbuffer));
792 0 : PageSetLSN(vmpage, lsn);
793 : }
794 :
795 61687 : if (BufferIsValid(vmbuffer))
796 4 : UnlockReleaseBuffer(vmbuffer);
797 :
798 : /*
799 : * If the page is running low on free space, update the FSM as well.
800 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
801 : * better than that without knowing the fill-factor for the table.
802 : *
803 : * XXX: Don't do this if the page was restored from full page image. We
804 : * don't bother to update the FSM in that case, it doesn't need to be
805 : * totally accurate anyway.
806 : */
807 61687 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
808 16847 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
809 61687 : }
810 :
811 : /*
812 : * Replay XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE records.
813 : */
814 : static void
815 95286 : heap_xlog_update(XLogReaderState *record, bool hot_update)
816 : {
817 95286 : XLogRecPtr lsn = record->EndRecPtr;
818 95286 : xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
819 : RelFileLocator rlocator;
820 : BlockNumber oldblk;
821 : BlockNumber newblk;
822 : ItemPointerData newtid;
823 : Buffer obuffer,
824 : nbuffer;
825 : Page page;
826 : OffsetNumber offnum;
827 : ItemId lp;
828 : HeapTupleData oldtup;
829 : HeapTupleHeader htup;
830 95286 : uint16 prefixlen = 0,
831 95286 : suffixlen = 0;
832 : char *newp;
833 : union
834 : {
835 : HeapTupleHeaderData hdr;
836 : char data[MaxHeapTupleSize];
837 : } tbuf;
838 : xl_heap_header xlhdr;
839 : uint32 newlen;
840 95286 : Size freespace = 0;
841 : XLogRedoAction oldaction;
842 : XLogRedoAction newaction;
843 :
844 : /* initialize to keep the compiler quiet */
845 95286 : oldtup.t_data = NULL;
846 95286 : oldtup.t_len = 0;
847 :
848 95286 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
849 95286 : if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
850 : {
851 : /* HOT updates are never done across pages */
852 : Assert(!hot_update);
853 : }
854 : else
855 40783 : oldblk = newblk;
856 :
857 95286 : ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
858 :
859 : /*
860 : * The visibility map may need to be fixed even if the heap page is
861 : * already up-to-date.
862 : */
863 95286 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
864 : {
865 207 : Relation reln = CreateFakeRelcacheEntry(rlocator);
866 207 : Buffer vmbuffer = InvalidBuffer;
867 :
868 207 : visibilitymap_pin(reln, oldblk, &vmbuffer);
869 207 : visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
870 207 : ReleaseBuffer(vmbuffer);
871 207 : FreeFakeRelcacheEntry(reln);
872 : }
873 :
874 : /*
875 : * In normal operation, it is important to lock the two pages in
876 : * page-number order, to avoid possible deadlocks against other update
877 : * operations going the other way. However, during WAL replay there can
878 : * be no other update happening, so we don't need to worry about that. But
879 : * we *do* need to worry that we don't expose an inconsistent state to Hot
880 : * Standby queries --- so the original page can't be unlocked before we've
881 : * added the new tuple to the new page.
882 : */
883 :
884 : /* Deal with old tuple version */
885 95286 : oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
886 : &obuffer);
887 95286 : if (oldaction == BLK_NEEDS_REDO)
888 : {
889 94949 : page = BufferGetPage(obuffer);
890 94949 : offnum = xlrec->old_offnum;
891 94949 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
892 0 : elog(PANIC, "offnum out of range");
893 94949 : lp = PageGetItemId(page, offnum);
894 94949 : if (!ItemIdIsNormal(lp))
895 0 : elog(PANIC, "invalid lp");
896 :
897 94949 : htup = (HeapTupleHeader) PageGetItem(page, lp);
898 :
899 94949 : oldtup.t_data = htup;
900 94949 : oldtup.t_len = ItemIdGetLength(lp);
901 :
902 94949 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
903 94949 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
904 94949 : if (hot_update)
905 37515 : HeapTupleHeaderSetHotUpdated(htup);
906 : else
907 57434 : HeapTupleHeaderClearHotUpdated(htup);
908 94949 : fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
909 : &htup->t_infomask2);
910 94949 : HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
911 94949 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
912 : /* Set forward chain link in t_ctid */
913 94949 : htup->t_ctid = newtid;
914 :
915 : /* Mark the page as a candidate for pruning */
916 94949 : PageSetPrunable(page, XLogRecGetXid(record));
917 :
918 94949 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
919 200 : PageClearAllVisible(page);
920 :
921 94949 : PageSetLSN(page, lsn);
922 94949 : MarkBufferDirty(obuffer);
923 : }
924 :
925 : /*
926 : * Read the page the new tuple goes into, if different from old.
927 : */
928 95286 : if (oldblk == newblk)
929 : {
930 40783 : nbuffer = obuffer;
931 40783 : newaction = oldaction;
932 : }
933 54503 : else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
934 : {
935 589 : nbuffer = XLogInitBufferForRedo(record, 0);
936 589 : page = BufferGetPage(nbuffer);
937 589 : PageInit(page, BufferGetPageSize(nbuffer), 0);
938 589 : newaction = BLK_NEEDS_REDO;
939 : }
940 : else
941 53914 : newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
942 :
943 : /*
944 : * The visibility map may need to be fixed even if the heap page is
945 : * already up-to-date.
946 : */
947 95286 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
948 : {
949 170 : Relation reln = CreateFakeRelcacheEntry(rlocator);
950 170 : Buffer vmbuffer = InvalidBuffer;
951 :
952 170 : visibilitymap_pin(reln, newblk, &vmbuffer);
953 170 : visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
954 170 : ReleaseBuffer(vmbuffer);
955 170 : FreeFakeRelcacheEntry(reln);
956 : }
957 :
958 : /* Deal with new tuple */
959 95286 : if (newaction == BLK_NEEDS_REDO)
960 : {
961 : char *recdata;
962 : char *recdata_end;
963 : Size datalen;
964 : Size tuplen;
965 :
966 94737 : recdata = XLogRecGetBlockData(record, 0, &datalen);
967 94737 : recdata_end = recdata + datalen;
968 :
969 94737 : page = BufferGetPage(nbuffer);
970 :
971 94737 : offnum = xlrec->new_offnum;
972 94737 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
973 0 : elog(PANIC, "invalid max offset number");
974 :
975 94737 : if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
976 : {
977 : Assert(newblk == oldblk);
978 16874 : memcpy(&prefixlen, recdata, sizeof(uint16));
979 16874 : recdata += sizeof(uint16);
980 : }
981 94737 : if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
982 : {
983 : Assert(newblk == oldblk);
984 34866 : memcpy(&suffixlen, recdata, sizeof(uint16));
985 34866 : recdata += sizeof(uint16);
986 : }
987 :
988 94737 : memcpy(&xlhdr, recdata, SizeOfHeapHeader);
989 94737 : recdata += SizeOfHeapHeader;
990 :
991 94737 : tuplen = recdata_end - recdata;
992 : Assert(tuplen <= MaxHeapTupleSize);
993 :
994 94737 : htup = &tbuf.hdr;
995 94737 : MemSet(htup, 0, SizeofHeapTupleHeader);
996 :
997 : /*
998 : * Reconstruct the new tuple using the prefix and/or suffix from the
999 : * old tuple, and the data stored in the WAL record.
1000 : */
1001 94737 : newp = (char *) htup + SizeofHeapTupleHeader;
1002 94737 : if (prefixlen > 0)
1003 : {
1004 : int len;
1005 :
1006 : /* copy bitmap [+ padding] [+ oid] from WAL record */
1007 16874 : len = xlhdr.t_hoff - SizeofHeapTupleHeader;
1008 16874 : memcpy(newp, recdata, len);
1009 16874 : recdata += len;
1010 16874 : newp += len;
1011 :
1012 : /* copy prefix from old tuple */
1013 16874 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
1014 16874 : newp += prefixlen;
1015 :
1016 : /* copy new tuple data from WAL record */
1017 16874 : len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
1018 16874 : memcpy(newp, recdata, len);
1019 16874 : recdata += len;
1020 16874 : newp += len;
1021 : }
1022 : else
1023 : {
1024 : /*
1025 : * copy bitmap [+ padding] [+ oid] + data from record, all in one
1026 : * go
1027 : */
1028 77863 : memcpy(newp, recdata, tuplen);
1029 77863 : recdata += tuplen;
1030 77863 : newp += tuplen;
1031 : }
1032 : Assert(recdata == recdata_end);
1033 :
1034 : /* copy suffix from old tuple */
1035 94737 : if (suffixlen > 0)
1036 34866 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
1037 :
1038 94737 : newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
1039 94737 : htup->t_infomask2 = xlhdr.t_infomask2;
1040 94737 : htup->t_infomask = xlhdr.t_infomask;
1041 94737 : htup->t_hoff = xlhdr.t_hoff;
1042 :
1043 94737 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
1044 94737 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
1045 94737 : HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
1046 : /* Make sure there is no forward chain link in t_ctid */
1047 94737 : htup->t_ctid = newtid;
1048 :
1049 94737 : offnum = PageAddItem(page, htup, newlen, offnum, true, true);
1050 94737 : if (offnum == InvalidOffsetNumber)
1051 0 : elog(PANIC, "failed to add tuple");
1052 :
1053 94737 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
1054 86 : PageClearAllVisible(page);
1055 :
1056 94737 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
1057 :
1058 94737 : PageSetLSN(page, lsn);
1059 94737 : MarkBufferDirty(nbuffer);
1060 : }
1061 :
1062 95286 : if (BufferIsValid(nbuffer) && nbuffer != obuffer)
1063 54503 : UnlockReleaseBuffer(nbuffer);
1064 95286 : if (BufferIsValid(obuffer))
1065 95286 : UnlockReleaseBuffer(obuffer);
1066 :
1067 : /*
1068 : * If the new page is running low on free space, update the FSM as well.
1069 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
1070 : * better than that without knowing the fill-factor for the table.
1071 : *
1072 : * However, don't update the FSM on HOT updates, because after crash
1073 : * recovery, either the old or the new tuple will certainly be dead and
1074 : * prunable. After pruning, the page will have roughly as much free space
1075 : * as it did before the update, assuming the new tuple is about the same
1076 : * size as the old one.
1077 : *
1078 : * XXX: Don't do this if the page was restored from full page image. We
1079 : * don't bother to update the FSM in that case, it doesn't need to be
1080 : * totally accurate anyway.
1081 : */
1082 95286 : if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
1083 11683 : XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
1084 95286 : }
1085 :
1086 : /*
1087 : * Replay XLOG_HEAP_CONFIRM records.
1088 : */
1089 : static void
1090 93 : heap_xlog_confirm(XLogReaderState *record)
1091 : {
1092 93 : XLogRecPtr lsn = record->EndRecPtr;
1093 93 : xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
1094 : Buffer buffer;
1095 : Page page;
1096 : OffsetNumber offnum;
1097 : ItemId lp;
1098 : HeapTupleHeader htup;
1099 :
1100 93 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1101 : {
1102 93 : page = BufferGetPage(buffer);
1103 :
1104 93 : offnum = xlrec->offnum;
1105 93 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1106 0 : elog(PANIC, "offnum out of range");
1107 93 : lp = PageGetItemId(page, offnum);
1108 93 : if (!ItemIdIsNormal(lp))
1109 0 : elog(PANIC, "invalid lp");
1110 :
1111 93 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1112 :
1113 : /*
1114 : * Confirm tuple as actually inserted
1115 : */
1116 93 : ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
1117 :
1118 93 : PageSetLSN(page, lsn);
1119 93 : MarkBufferDirty(buffer);
1120 : }
1121 93 : if (BufferIsValid(buffer))
1122 93 : UnlockReleaseBuffer(buffer);
1123 93 : }
1124 :
1125 : /*
1126 : * Replay XLOG_HEAP_LOCK records.
1127 : */
1128 : static void
1129 55434 : heap_xlog_lock(XLogReaderState *record)
1130 : {
1131 55434 : XLogRecPtr lsn = record->EndRecPtr;
1132 55434 : xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
1133 : Buffer buffer;
1134 : Page page;
1135 : OffsetNumber offnum;
1136 : ItemId lp;
1137 : HeapTupleHeader htup;
1138 :
1139 : /*
1140 : * The visibility map may need to be fixed even if the heap page is
1141 : * already up-to-date.
1142 : */
1143 55434 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1144 : {
1145 : RelFileLocator rlocator;
1146 46 : Buffer vmbuffer = InvalidBuffer;
1147 : BlockNumber block;
1148 : Relation reln;
1149 :
1150 46 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1151 46 : reln = CreateFakeRelcacheEntry(rlocator);
1152 :
1153 46 : visibilitymap_pin(reln, block, &vmbuffer);
1154 46 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1155 :
1156 46 : ReleaseBuffer(vmbuffer);
1157 46 : FreeFakeRelcacheEntry(reln);
1158 : }
1159 :
1160 55434 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1161 : {
1162 55229 : page = BufferGetPage(buffer);
1163 :
1164 55229 : offnum = xlrec->offnum;
1165 55229 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1166 0 : elog(PANIC, "offnum out of range");
1167 55229 : lp = PageGetItemId(page, offnum);
1168 55229 : if (!ItemIdIsNormal(lp))
1169 0 : elog(PANIC, "invalid lp");
1170 :
1171 55229 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1172 :
1173 55229 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1174 55229 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1175 55229 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1176 : &htup->t_infomask2);
1177 :
1178 : /*
1179 : * Clear relevant update flags, but only if the modified infomask says
1180 : * there's no update.
1181 : */
1182 55229 : if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
1183 : {
1184 55229 : HeapTupleHeaderClearHotUpdated(htup);
1185 : /* Make sure there is no forward chain link in t_ctid */
1186 55229 : ItemPointerSet(&htup->t_ctid,
1187 : BufferGetBlockNumber(buffer),
1188 : offnum);
1189 : }
1190 55229 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1191 55229 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
1192 55229 : PageSetLSN(page, lsn);
1193 55229 : MarkBufferDirty(buffer);
1194 : }
1195 55434 : if (BufferIsValid(buffer))
1196 55434 : UnlockReleaseBuffer(buffer);
1197 55434 : }
1198 :
1199 : /*
1200 : * Replay XLOG_HEAP2_LOCK_UPDATED records.
1201 : */
1202 : static void
1203 0 : heap_xlog_lock_updated(XLogReaderState *record)
1204 : {
1205 0 : XLogRecPtr lsn = record->EndRecPtr;
1206 : xl_heap_lock_updated *xlrec;
1207 : Buffer buffer;
1208 : Page page;
1209 : OffsetNumber offnum;
1210 : ItemId lp;
1211 : HeapTupleHeader htup;
1212 :
1213 0 : xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
1214 :
1215 : /*
1216 : * The visibility map may need to be fixed even if the heap page is
1217 : * already up-to-date.
1218 : */
1219 0 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1220 : {
1221 : RelFileLocator rlocator;
1222 0 : Buffer vmbuffer = InvalidBuffer;
1223 : BlockNumber block;
1224 : Relation reln;
1225 :
1226 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1227 0 : reln = CreateFakeRelcacheEntry(rlocator);
1228 :
1229 0 : visibilitymap_pin(reln, block, &vmbuffer);
1230 0 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1231 :
1232 0 : ReleaseBuffer(vmbuffer);
1233 0 : FreeFakeRelcacheEntry(reln);
1234 : }
1235 :
1236 0 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1237 : {
1238 0 : page = BufferGetPage(buffer);
1239 :
1240 0 : offnum = xlrec->offnum;
1241 0 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1242 0 : elog(PANIC, "offnum out of range");
1243 0 : lp = PageGetItemId(page, offnum);
1244 0 : if (!ItemIdIsNormal(lp))
1245 0 : elog(PANIC, "invalid lp");
1246 :
1247 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1248 :
1249 0 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1250 0 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1251 0 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1252 : &htup->t_infomask2);
1253 0 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1254 :
1255 0 : PageSetLSN(page, lsn);
1256 0 : MarkBufferDirty(buffer);
1257 : }
1258 0 : if (BufferIsValid(buffer))
1259 0 : UnlockReleaseBuffer(buffer);
1260 0 : }
1261 :
1262 : /*
1263 : * Replay XLOG_HEAP_INPLACE records.
1264 : */
1265 : static void
1266 7880 : heap_xlog_inplace(XLogReaderState *record)
1267 : {
1268 7880 : XLogRecPtr lsn = record->EndRecPtr;
1269 7880 : xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
1270 : Buffer buffer;
1271 : Page page;
1272 : OffsetNumber offnum;
1273 : ItemId lp;
1274 : HeapTupleHeader htup;
1275 : uint32 oldlen;
1276 : Size newlen;
1277 :
1278 7880 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1279 : {
1280 7691 : char *newtup = XLogRecGetBlockData(record, 0, &newlen);
1281 :
1282 7691 : page = BufferGetPage(buffer);
1283 :
1284 7691 : offnum = xlrec->offnum;
1285 7691 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1286 0 : elog(PANIC, "offnum out of range");
1287 7691 : lp = PageGetItemId(page, offnum);
1288 7691 : if (!ItemIdIsNormal(lp))
1289 0 : elog(PANIC, "invalid lp");
1290 :
1291 7691 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1292 :
1293 7691 : oldlen = ItemIdGetLength(lp) - htup->t_hoff;
1294 7691 : if (oldlen != newlen)
1295 0 : elog(PANIC, "wrong tuple length");
1296 :
1297 7691 : memcpy((char *) htup + htup->t_hoff, newtup, newlen);
1298 :
1299 7691 : PageSetLSN(page, lsn);
1300 7691 : MarkBufferDirty(buffer);
1301 : }
1302 7880 : if (BufferIsValid(buffer))
1303 7880 : UnlockReleaseBuffer(buffer);
1304 :
1305 7880 : ProcessCommittedInvalidationMessages(xlrec->msgs,
1306 : xlrec->nmsgs,
1307 7880 : xlrec->relcacheInitFileInval,
1308 : xlrec->dbId,
1309 : xlrec->tsId);
1310 7880 : }
1311 :
1312 : void
1313 1763740 : heap_redo(XLogReaderState *record)
1314 : {
1315 1763740 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1316 :
1317 : /*
1318 : * These operations don't overwrite MVCC data so no conflict processing is
1319 : * required. The ones in heap2 rmgr do.
1320 : */
1321 :
1322 1763740 : switch (info & XLOG_HEAP_OPMASK)
1323 : {
1324 1295651 : case XLOG_HEAP_INSERT:
1325 1295651 : heap_xlog_insert(record);
1326 1295651 : break;
1327 309394 : case XLOG_HEAP_DELETE:
1328 309394 : heap_xlog_delete(record);
1329 309394 : break;
1330 57473 : case XLOG_HEAP_UPDATE:
1331 57473 : heap_xlog_update(record, false);
1332 57473 : break;
1333 2 : case XLOG_HEAP_TRUNCATE:
1334 :
1335 : /*
1336 : * TRUNCATE is a no-op because the actions are already logged as
1337 : * SMGR WAL records. TRUNCATE WAL record only exists for logical
1338 : * decoding.
1339 : */
1340 2 : break;
1341 37813 : case XLOG_HEAP_HOT_UPDATE:
1342 37813 : heap_xlog_update(record, true);
1343 37813 : break;
1344 93 : case XLOG_HEAP_CONFIRM:
1345 93 : heap_xlog_confirm(record);
1346 93 : break;
1347 55434 : case XLOG_HEAP_LOCK:
1348 55434 : heap_xlog_lock(record);
1349 55434 : break;
1350 7880 : case XLOG_HEAP_INPLACE:
1351 7880 : heap_xlog_inplace(record);
1352 7880 : break;
1353 0 : default:
1354 0 : elog(PANIC, "heap_redo: unknown op code %u", info);
1355 : }
1356 1763740 : }
1357 :
1358 : void
1359 82703 : heap2_redo(XLogReaderState *record)
1360 : {
1361 82703 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1362 :
1363 82703 : switch (info & XLOG_HEAP_OPMASK)
1364 : {
1365 14786 : case XLOG_HEAP2_PRUNE_ON_ACCESS:
1366 : case XLOG_HEAP2_PRUNE_VACUUM_SCAN:
1367 : case XLOG_HEAP2_PRUNE_VACUUM_CLEANUP:
1368 14786 : heap_xlog_prune_freeze(record);
1369 14786 : break;
1370 5182 : case XLOG_HEAP2_VISIBLE:
1371 5182 : heap_xlog_visible(record);
1372 5182 : break;
1373 61687 : case XLOG_HEAP2_MULTI_INSERT:
1374 61687 : heap_xlog_multi_insert(record);
1375 61687 : break;
1376 0 : case XLOG_HEAP2_LOCK_UPDATED:
1377 0 : heap_xlog_lock_updated(record);
1378 0 : break;
1379 1048 : case XLOG_HEAP2_NEW_CID:
1380 :
1381 : /*
1382 : * Nothing to do on a real replay, only used during logical
1383 : * decoding.
1384 : */
1385 1048 : break;
1386 0 : case XLOG_HEAP2_REWRITE:
1387 0 : heap_xlog_logical_rewrite(record);
1388 0 : break;
1389 0 : default:
1390 0 : elog(PANIC, "heap2_redo: unknown op code %u", info);
1391 : }
1392 82703 : }
1393 :
1394 : /*
1395 : * Mask a heap page before performing consistency checks on it.
1396 : */
1397 : void
1398 2963238 : heap_mask(char *pagedata, BlockNumber blkno)
1399 : {
1400 2963238 : Page page = (Page) pagedata;
1401 : OffsetNumber off;
1402 :
1403 2963238 : mask_page_lsn_and_checksum(page);
1404 :
1405 2963238 : mask_page_hint_bits(page);
1406 2963238 : mask_unused_space(page);
1407 :
1408 244637596 : for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
1409 : {
1410 241674358 : ItemId iid = PageGetItemId(page, off);
1411 : char *page_item;
1412 :
1413 241674358 : page_item = (char *) (page + ItemIdGetOffset(iid));
1414 :
1415 241674358 : if (ItemIdIsNormal(iid))
1416 : {
1417 225649244 : HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
1418 :
1419 : /*
1420 : * If xmin of a tuple is not yet frozen, we should ignore
1421 : * differences in hint bits, since they can be set without
1422 : * emitting WAL.
1423 : */
1424 225649244 : if (!HeapTupleHeaderXminFrozen(page_htup))
1425 223522740 : page_htup->t_infomask &= ~HEAP_XACT_MASK;
1426 : else
1427 : {
1428 : /* Still we need to mask xmax hint bits. */
1429 2126504 : page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
1430 2126504 : page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
1431 : }
1432 :
1433 : /*
1434 : * During replay, we set Command Id to FirstCommandId. Hence, mask
1435 : * it. See heap_xlog_insert() for details.
1436 : */
1437 225649244 : page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
1438 :
1439 : /*
1440 : * For a speculative tuple, heap_insert() does not set ctid in the
1441 : * caller-passed heap tuple itself, leaving the ctid field to
1442 : * contain a speculative token value - a per-backend monotonically
1443 : * increasing identifier. Besides, it does not WAL-log ctid under
1444 : * any circumstances.
1445 : *
1446 : * During redo, heap_xlog_insert() sets t_ctid to current block
1447 : * number and self offset number. It doesn't care about any
1448 : * speculative insertions on the primary. Hence, we set t_ctid to
1449 : * current block number and self offset number to ignore any
1450 : * inconsistency.
1451 : */
1452 225649244 : if (HeapTupleHeaderIsSpeculative(page_htup))
1453 94 : ItemPointerSet(&page_htup->t_ctid, blkno, off);
1454 :
1455 : /*
1456 : * NB: Not ignoring ctid changes due to the tuple having moved
1457 : * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
1458 : * important information that needs to be in-sync between primary
1459 : * and standby, and thus is WAL logged.
1460 : */
1461 : }
1462 :
1463 : /*
1464 : * Ignore any padding bytes after the tuple, when the length of the
1465 : * item is not MAXALIGNed.
1466 : */
1467 241674358 : if (ItemIdHasStorage(iid))
1468 : {
1469 225649244 : int len = ItemIdGetLength(iid);
1470 225649244 : int padlen = MAXALIGN(len) - len;
1471 :
1472 225649244 : if (padlen > 0)
1473 120736210 : memset(page_item + len, MASK_MARKER, padlen);
1474 : }
1475 : }
1476 2963238 : }
|