Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * heapam_xlog.c
4 : * WAL replay logic for heap access method.
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/heap/heapam_xlog.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/bufmask.h"
18 : #include "access/heapam.h"
19 : #include "access/visibilitymap.h"
20 : #include "access/xlog.h"
21 : #include "access/xlogutils.h"
22 : #include "storage/freespace.h"
23 : #include "storage/standby.h"
24 :
25 :
26 : /*
27 : * Replay XLOG_HEAP2_PRUNE_* records.
28 : */
29 : static void
30 30618 : heap_xlog_prune_freeze(XLogReaderState *record)
31 : {
32 30618 : XLogRecPtr lsn = record->EndRecPtr;
33 30618 : char *maindataptr = XLogRecGetData(record);
34 : xl_heap_prune xlrec;
35 : Buffer buffer;
36 : RelFileLocator rlocator;
37 : BlockNumber blkno;
38 30618 : Buffer vmbuffer = InvalidBuffer;
39 30618 : uint8 vmflags = 0;
40 30618 : Size freespace = 0;
41 :
42 30618 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
43 30618 : memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
44 30618 : maindataptr += SizeOfHeapPrune;
45 :
46 : /*
47 : * We will take an ordinary exclusive lock or a cleanup lock depending on
48 : * whether the XLHP_CLEANUP_LOCK flag is set. With an ordinary exclusive
49 : * lock, we better not be doing anything that requires moving existing
50 : * tuple data.
51 : */
52 : Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
53 : (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
54 :
55 30618 : if (xlrec.flags & XLHP_VM_ALL_VISIBLE)
56 : {
57 7648 : vmflags = VISIBILITYMAP_ALL_VISIBLE;
58 7648 : if (xlrec.flags & XLHP_VM_ALL_FROZEN)
59 6372 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
60 : }
61 :
62 : /*
63 : * After xl_heap_prune is the optional snapshot conflict horizon.
64 : *
65 : * In Hot Standby mode, we must ensure that there are no running queries
66 : * which would conflict with the changes in this record. That means we
67 : * can't replay this record if it removes tuples that are still visible to
68 : * transactions on the standby, freeze tuples with xids that are still
69 : * considered running on the standby, or set a page as all-visible in the
70 : * VM if it isn't all-visible to all transactions on the standby.
71 : */
72 30618 : if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
73 : {
74 : TransactionId snapshot_conflict_horizon;
75 :
76 : /* memcpy() because snapshot_conflict_horizon is stored unaligned */
77 22346 : memcpy(&snapshot_conflict_horizon, maindataptr, sizeof(TransactionId));
78 22346 : maindataptr += sizeof(TransactionId);
79 :
80 22346 : if (InHotStandby)
81 21882 : ResolveRecoveryConflictWithSnapshot(snapshot_conflict_horizon,
82 21882 : (xlrec.flags & XLHP_IS_CATALOG_REL) != 0,
83 : rlocator);
84 : }
85 :
86 : /*
87 : * If we have a full-page image of the heap block, restore it and we're
88 : * done with the heap block.
89 : */
90 30618 : if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
91 30618 : (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
92 : &buffer) == BLK_NEEDS_REDO)
93 : {
94 21608 : Page page = BufferGetPage(buffer);
95 : OffsetNumber *redirected;
96 : OffsetNumber *nowdead;
97 : OffsetNumber *nowunused;
98 : int nredirected;
99 : int ndead;
100 : int nunused;
101 : int nplans;
102 : Size datalen;
103 : xlhp_freeze_plan *plans;
104 : OffsetNumber *frz_offsets;
105 21608 : char *dataptr = XLogRecGetBlockData(record, 0, &datalen);
106 : bool do_prune;
107 :
108 21608 : heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
109 : &nplans, &plans, &frz_offsets,
110 : &nredirected, &redirected,
111 : &ndead, &nowdead,
112 : &nunused, &nowunused);
113 :
114 21608 : do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
115 :
116 : /* Ensure the record does something */
117 : Assert(do_prune || nplans > 0 || vmflags & VISIBILITYMAP_VALID_BITS);
118 :
119 : /*
120 : * Update all line pointers per the record, and repair fragmentation
121 : * if needed.
122 : */
123 21608 : if (do_prune)
124 20514 : heap_page_prune_execute(buffer,
125 20514 : (xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
126 : redirected, nredirected,
127 : nowdead, ndead,
128 : nowunused, nunused);
129 :
130 : /* Freeze tuples */
131 23742 : for (int p = 0; p < nplans; p++)
132 : {
133 : HeapTupleFreeze frz;
134 :
135 : /*
136 : * Convert freeze plan representation from WAL record into
137 : * per-tuple format used by heap_execute_freeze_tuple
138 : */
139 2134 : frz.xmax = plans[p].xmax;
140 2134 : frz.t_infomask2 = plans[p].t_infomask2;
141 2134 : frz.t_infomask = plans[p].t_infomask;
142 2134 : frz.frzflags = plans[p].frzflags;
143 2134 : frz.offset = InvalidOffsetNumber; /* unused, but be tidy */
144 :
145 98342 : for (int i = 0; i < plans[p].ntuples; i++)
146 : {
147 96208 : OffsetNumber offset = *(frz_offsets++);
148 : ItemId lp;
149 : HeapTupleHeader tuple;
150 :
151 96208 : lp = PageGetItemId(page, offset);
152 96208 : tuple = (HeapTupleHeader) PageGetItem(page, lp);
153 96208 : heap_execute_freeze_tuple(tuple, &frz);
154 : }
155 : }
156 :
157 : /* There should be no more data */
158 : Assert((char *) frz_offsets == dataptr + datalen);
159 :
160 : /*
161 : * The critical integrity requirement here is that we must never end
162 : * up with the visibility map bit set and the page-level
163 : * PD_ALL_VISIBLE bit unset. If that were to occur, a subsequent page
164 : * modification would fail to clear the visibility map bit.
165 : */
166 21608 : if (vmflags & VISIBILITYMAP_VALID_BITS)
167 5070 : PageSetAllVisible(page);
168 :
169 21608 : MarkBufferDirty(buffer);
170 :
171 : /*
172 : * See log_heap_prune_and_freeze() for commentary on when we set the
173 : * heap page LSN.
174 : */
175 21608 : if (do_prune || nplans > 0 ||
176 0 : ((vmflags & VISIBILITYMAP_VALID_BITS) && XLogHintBitIsNeeded()))
177 21608 : PageSetLSN(page, lsn);
178 :
179 : /*
180 : * Note: we don't worry about updating the page's prunability hints.
181 : * At worst this will cause an extra prune cycle to occur soon.
182 : */
183 : }
184 :
185 : /*
186 : * If we 1) released any space or line pointers or 2) set PD_ALL_VISIBLE
187 : * or the VM, update the freespace map.
188 : *
189 : * Even when no actual space is freed (when only marking the page
190 : * all-visible or frozen), we still update the FSM. Because the FSM is
191 : * unlogged and maintained heuristically, it often becomes stale on
192 : * standbys. If such a standby is later promoted and runs VACUUM, it will
193 : * skip recalculating free space for pages that were marked
194 : * all-visible/all-forzen. FreeSpaceMapVacuum() can then propagate overly
195 : * optimistic free space values upward, causing future insertions to
196 : * select pages that turn out to be unusable. In bulk, this can lead to
197 : * long stalls.
198 : *
199 : * To prevent this, always update the FSM even when only marking a page
200 : * all-visible/all-frozen.
201 : *
202 : * Do this regardless of whether a full-page image is logged, since FSM
203 : * data is not part of the page itself.
204 : */
205 30618 : if (BufferIsValid(buffer))
206 : {
207 30618 : if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
208 : XLHP_HAS_DEAD_ITEMS |
209 3644 : XLHP_HAS_NOW_UNUSED_ITEMS)) ||
210 3644 : (vmflags & VISIBILITYMAP_VALID_BITS))
211 26974 : freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
212 :
213 : /*
214 : * We want to avoid holding an exclusive lock on the heap buffer while
215 : * doing IO (either of the FSM or the VM), so we'll release it now.
216 : */
217 30618 : UnlockReleaseBuffer(buffer);
218 : }
219 :
220 : /*
221 : * Now read and update the VM block.
222 : *
223 : * We must redo changes to the VM even if the heap page was skipped due to
224 : * LSN interlock. See comment in heap_xlog_multi_insert() for more details
225 : * on replaying changes to the VM.
226 : */
227 38266 : if ((vmflags & VISIBILITYMAP_VALID_BITS) &&
228 7648 : XLogReadBufferForRedoExtended(record, 1,
229 : RBM_ZERO_ON_ERROR,
230 : false,
231 : &vmbuffer) == BLK_NEEDS_REDO)
232 : {
233 7482 : Page vmpage = BufferGetPage(vmbuffer);
234 :
235 : /* initialize the page if it was read as zeros */
236 7482 : if (PageIsNew(vmpage))
237 0 : PageInit(vmpage, BLCKSZ, 0);
238 :
239 7482 : visibilitymap_set_vmbits(blkno, vmbuffer, vmflags, rlocator);
240 :
241 : Assert(BufferIsDirty(vmbuffer));
242 7482 : PageSetLSN(vmpage, lsn);
243 : }
244 :
245 30618 : if (BufferIsValid(vmbuffer))
246 7648 : UnlockReleaseBuffer(vmbuffer);
247 :
248 30618 : if (freespace > 0)
249 26796 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
250 30618 : }
251 :
252 : /*
253 : * Replay XLOG_HEAP2_VISIBLE records.
254 : *
255 : * The critical integrity requirement here is that we must never end up with
256 : * a situation where the visibility map bit is set, and the page-level
257 : * PD_ALL_VISIBLE bit is clear. If that were to occur, then a subsequent
258 : * page modification would fail to clear the visibility map bit.
259 : */
260 : static void
261 10442 : heap_xlog_visible(XLogReaderState *record)
262 : {
263 10442 : XLogRecPtr lsn = record->EndRecPtr;
264 10442 : xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
265 10442 : Buffer vmbuffer = InvalidBuffer;
266 : Buffer buffer;
267 : Page page;
268 : RelFileLocator rlocator;
269 : BlockNumber blkno;
270 : XLogRedoAction action;
271 :
272 : Assert((xlrec->flags & VISIBILITYMAP_XLOG_VALID_BITS) == xlrec->flags);
273 :
274 10442 : XLogRecGetBlockTag(record, 1, &rlocator, NULL, &blkno);
275 :
276 : /*
277 : * If there are any Hot Standby transactions running that have an xmin
278 : * horizon old enough that this page isn't all-visible for them, they
279 : * might incorrectly decide that an index-only scan can skip a heap fetch.
280 : *
281 : * NB: It might be better to throw some kind of "soft" conflict here that
282 : * forces any index-only scan that is in flight to perform heap fetches,
283 : * rather than killing the transaction outright.
284 : */
285 10442 : if (InHotStandby)
286 10094 : ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
287 10094 : xlrec->flags & VISIBILITYMAP_XLOG_CATALOG_REL,
288 : rlocator);
289 :
290 : /*
291 : * Read the heap page, if it still exists. If the heap file has dropped or
292 : * truncated later in recovery, we don't need to update the page, but we'd
293 : * better still update the visibility map.
294 : */
295 10442 : action = XLogReadBufferForRedo(record, 1, &buffer);
296 10442 : if (action == BLK_NEEDS_REDO)
297 : {
298 : /*
299 : * We don't bump the LSN of the heap page when setting the visibility
300 : * map bit (unless checksums or wal_hint_bits is enabled, in which
301 : * case we must). This exposes us to torn page hazards, but since
302 : * we're not inspecting the existing page contents in any way, we
303 : * don't care.
304 : */
305 6126 : page = BufferGetPage(buffer);
306 :
307 6126 : PageSetAllVisible(page);
308 :
309 6126 : if (XLogHintBitIsNeeded())
310 6126 : PageSetLSN(page, lsn);
311 :
312 6126 : MarkBufferDirty(buffer);
313 : }
314 : else if (action == BLK_RESTORED)
315 : {
316 : /*
317 : * If heap block was backed up, we already restored it and there's
318 : * nothing more to do. (This can only happen with checksums or
319 : * wal_log_hints enabled.)
320 : */
321 : }
322 :
323 10442 : if (BufferIsValid(buffer))
324 : {
325 10442 : Size space = PageGetFreeSpace(BufferGetPage(buffer));
326 :
327 10442 : UnlockReleaseBuffer(buffer);
328 :
329 : /*
330 : * Since FSM is not WAL-logged and only updated heuristically, it
331 : * easily becomes stale in standbys. If the standby is later promoted
332 : * and runs VACUUM, it will skip updating individual free space
333 : * figures for pages that became all-visible (or all-frozen, depending
334 : * on the vacuum mode,) which is troublesome when FreeSpaceMapVacuum
335 : * propagates too optimistic free space values to upper FSM layers;
336 : * later inserters try to use such pages only to find out that they
337 : * are unusable. This can cause long stalls when there are many such
338 : * pages.
339 : *
340 : * Forestall those problems by updating FSM's idea about a page that
341 : * is becoming all-visible or all-frozen.
342 : *
343 : * Do this regardless of a full-page image being applied, since the
344 : * FSM data is not in the page anyway.
345 : */
346 10442 : if (xlrec->flags & VISIBILITYMAP_VALID_BITS)
347 10442 : XLogRecordPageWithFreeSpace(rlocator, blkno, space);
348 : }
349 :
350 : /*
351 : * Even if we skipped the heap page update due to the LSN interlock, it's
352 : * still safe to update the visibility map. Any WAL record that clears
353 : * the visibility map bit does so before checking the page LSN, so any
354 : * bits that need to be cleared will still be cleared.
355 : */
356 10442 : if (XLogReadBufferForRedoExtended(record, 0, RBM_ZERO_ON_ERROR, false,
357 : &vmbuffer) == BLK_NEEDS_REDO)
358 : {
359 10016 : Page vmpage = BufferGetPage(vmbuffer);
360 : Relation reln;
361 : uint8 vmbits;
362 :
363 : /* initialize the page if it was read as zeros */
364 10016 : if (PageIsNew(vmpage))
365 0 : PageInit(vmpage, BLCKSZ, 0);
366 :
367 : /* remove VISIBILITYMAP_XLOG_* */
368 10016 : vmbits = xlrec->flags & VISIBILITYMAP_VALID_BITS;
369 :
370 : /*
371 : * XLogReadBufferForRedoExtended locked the buffer. But
372 : * visibilitymap_set will handle locking itself.
373 : */
374 10016 : LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
375 :
376 10016 : reln = CreateFakeRelcacheEntry(rlocator);
377 :
378 10016 : visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
379 : xlrec->snapshotConflictHorizon, vmbits);
380 :
381 10016 : ReleaseBuffer(vmbuffer);
382 10016 : FreeFakeRelcacheEntry(reln);
383 : }
384 426 : else if (BufferIsValid(vmbuffer))
385 426 : UnlockReleaseBuffer(vmbuffer);
386 10442 : }
387 :
388 : /*
389 : * Given an "infobits" field from an XLog record, set the correct bits in the
390 : * given infomask and infomask2 for the tuple touched by the record.
391 : *
392 : * (This is the reverse of compute_infobits).
393 : */
394 : static void
395 914420 : fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
396 : {
397 914420 : *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
398 : HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
399 914420 : *infomask2 &= ~HEAP_KEYS_UPDATED;
400 :
401 914420 : if (infobits & XLHL_XMAX_IS_MULTI)
402 4 : *infomask |= HEAP_XMAX_IS_MULTI;
403 914420 : if (infobits & XLHL_XMAX_LOCK_ONLY)
404 111284 : *infomask |= HEAP_XMAX_LOCK_ONLY;
405 914420 : if (infobits & XLHL_XMAX_EXCL_LOCK)
406 110466 : *infomask |= HEAP_XMAX_EXCL_LOCK;
407 : /* note HEAP_XMAX_SHR_LOCK isn't considered here */
408 914420 : if (infobits & XLHL_XMAX_KEYSHR_LOCK)
409 842 : *infomask |= HEAP_XMAX_KEYSHR_LOCK;
410 :
411 914420 : if (infobits & XLHL_KEYS_UPDATED)
412 615470 : *infomask2 |= HEAP_KEYS_UPDATED;
413 914420 : }
414 :
415 : /*
416 : * Replay XLOG_HEAP_DELETE records.
417 : */
418 : static void
419 617010 : heap_xlog_delete(XLogReaderState *record)
420 : {
421 617010 : XLogRecPtr lsn = record->EndRecPtr;
422 617010 : xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
423 : Buffer buffer;
424 : Page page;
425 : ItemId lp;
426 : HeapTupleHeader htup;
427 : BlockNumber blkno;
428 : RelFileLocator target_locator;
429 : ItemPointerData target_tid;
430 :
431 617010 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
432 617010 : ItemPointerSetBlockNumber(&target_tid, blkno);
433 617010 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
434 :
435 : /*
436 : * The visibility map may need to be fixed even if the heap page is
437 : * already up-to-date.
438 : */
439 617010 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
440 : {
441 36 : Relation reln = CreateFakeRelcacheEntry(target_locator);
442 36 : Buffer vmbuffer = InvalidBuffer;
443 :
444 36 : visibilitymap_pin(reln, blkno, &vmbuffer);
445 36 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
446 36 : ReleaseBuffer(vmbuffer);
447 36 : FreeFakeRelcacheEntry(reln);
448 : }
449 :
450 617010 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
451 : {
452 612698 : page = BufferGetPage(buffer);
453 :
454 612698 : if (xlrec->offnum < 1 || xlrec->offnum > PageGetMaxOffsetNumber(page))
455 0 : elog(PANIC, "offnum out of range");
456 612698 : lp = PageGetItemId(page, xlrec->offnum);
457 612698 : if (!ItemIdIsNormal(lp))
458 0 : elog(PANIC, "invalid lp");
459 :
460 612698 : htup = (HeapTupleHeader) PageGetItem(page, lp);
461 :
462 612698 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
463 612698 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
464 612698 : HeapTupleHeaderClearHotUpdated(htup);
465 612698 : fix_infomask_from_infobits(xlrec->infobits_set,
466 : &htup->t_infomask, &htup->t_infomask2);
467 612698 : if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
468 612698 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
469 : else
470 0 : HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
471 612698 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
472 :
473 : /* Mark the page as a candidate for pruning */
474 612698 : PageSetPrunable(page, XLogRecGetXid(record));
475 :
476 612698 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
477 22 : PageClearAllVisible(page);
478 :
479 : /* Make sure t_ctid is set correctly */
480 612698 : if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
481 288 : HeapTupleHeaderSetMovedPartitions(htup);
482 : else
483 612410 : htup->t_ctid = target_tid;
484 612698 : PageSetLSN(page, lsn);
485 612698 : MarkBufferDirty(buffer);
486 : }
487 617010 : if (BufferIsValid(buffer))
488 617010 : UnlockReleaseBuffer(buffer);
489 617010 : }
490 :
491 : /*
492 : * Replay XLOG_HEAP_INSERT records.
493 : */
494 : static void
495 2590110 : heap_xlog_insert(XLogReaderState *record)
496 : {
497 2590110 : XLogRecPtr lsn = record->EndRecPtr;
498 2590110 : xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
499 : Buffer buffer;
500 : Page page;
501 : union
502 : {
503 : HeapTupleHeaderData hdr;
504 : char data[MaxHeapTupleSize];
505 : } tbuf;
506 : HeapTupleHeader htup;
507 : xl_heap_header xlhdr;
508 : uint32 newlen;
509 2590110 : Size freespace = 0;
510 : RelFileLocator target_locator;
511 : BlockNumber blkno;
512 : ItemPointerData target_tid;
513 : XLogRedoAction action;
514 :
515 2590110 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
516 2590110 : ItemPointerSetBlockNumber(&target_tid, blkno);
517 2590110 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
518 :
519 : /* No freezing in the heap_insert() code path */
520 : Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
521 :
522 : /*
523 : * The visibility map may need to be fixed even if the heap page is
524 : * already up-to-date.
525 : */
526 2590110 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
527 : {
528 1890 : Relation reln = CreateFakeRelcacheEntry(target_locator);
529 1890 : Buffer vmbuffer = InvalidBuffer;
530 :
531 1890 : visibilitymap_pin(reln, blkno, &vmbuffer);
532 1890 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
533 1890 : ReleaseBuffer(vmbuffer);
534 1890 : FreeFakeRelcacheEntry(reln);
535 : }
536 :
537 : /*
538 : * If we inserted the first and only tuple on the page, re-initialize the
539 : * page from scratch.
540 : */
541 2590110 : if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
542 : {
543 34862 : buffer = XLogInitBufferForRedo(record, 0);
544 34862 : page = BufferGetPage(buffer);
545 34862 : PageInit(page, BufferGetPageSize(buffer), 0);
546 34862 : action = BLK_NEEDS_REDO;
547 : }
548 : else
549 2555248 : action = XLogReadBufferForRedo(record, 0, &buffer);
550 2590110 : if (action == BLK_NEEDS_REDO)
551 : {
552 : Size datalen;
553 : char *data;
554 :
555 2584392 : page = BufferGetPage(buffer);
556 :
557 2584392 : if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
558 0 : elog(PANIC, "invalid max offset number");
559 :
560 2584392 : data = XLogRecGetBlockData(record, 0, &datalen);
561 :
562 2584392 : newlen = datalen - SizeOfHeapHeader;
563 : Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
564 2584392 : memcpy(&xlhdr, data, SizeOfHeapHeader);
565 2584392 : data += SizeOfHeapHeader;
566 :
567 2584392 : htup = &tbuf.hdr;
568 2584392 : MemSet(htup, 0, SizeofHeapTupleHeader);
569 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
570 2584392 : memcpy((char *) htup + SizeofHeapTupleHeader,
571 : data,
572 : newlen);
573 2584392 : newlen += SizeofHeapTupleHeader;
574 2584392 : htup->t_infomask2 = xlhdr.t_infomask2;
575 2584392 : htup->t_infomask = xlhdr.t_infomask;
576 2584392 : htup->t_hoff = xlhdr.t_hoff;
577 2584392 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
578 2584392 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
579 2584392 : htup->t_ctid = target_tid;
580 :
581 2584392 : if (PageAddItem(page, htup, newlen, xlrec->offnum, true, true) == InvalidOffsetNumber)
582 0 : elog(PANIC, "failed to add tuple");
583 :
584 2584392 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
585 :
586 2584392 : PageSetLSN(page, lsn);
587 :
588 2584392 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
589 626 : PageClearAllVisible(page);
590 :
591 2584392 : MarkBufferDirty(buffer);
592 : }
593 2590110 : if (BufferIsValid(buffer))
594 2590110 : UnlockReleaseBuffer(buffer);
595 :
596 : /*
597 : * If the page is running low on free space, update the FSM as well.
598 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
599 : * better than that without knowing the fill-factor for the table.
600 : *
601 : * XXX: Don't do this if the page was restored from full page image. We
602 : * don't bother to update the FSM in that case, it doesn't need to be
603 : * totally accurate anyway.
604 : */
605 2590110 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
606 508070 : XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
607 2590110 : }
608 :
609 : /*
610 : * Replay XLOG_HEAP2_MULTI_INSERT records.
611 : */
612 : static void
613 122668 : heap_xlog_multi_insert(XLogReaderState *record)
614 : {
615 122668 : XLogRecPtr lsn = record->EndRecPtr;
616 : xl_heap_multi_insert *xlrec;
617 : RelFileLocator rlocator;
618 : BlockNumber blkno;
619 : Buffer buffer;
620 : Page page;
621 : union
622 : {
623 : HeapTupleHeaderData hdr;
624 : char data[MaxHeapTupleSize];
625 : } tbuf;
626 : HeapTupleHeader htup;
627 : uint32 newlen;
628 122668 : Size freespace = 0;
629 : int i;
630 122668 : bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
631 : XLogRedoAction action;
632 122668 : Buffer vmbuffer = InvalidBuffer;
633 :
634 : /*
635 : * Insertion doesn't overwrite MVCC data, so no conflict processing is
636 : * required.
637 : */
638 122668 : xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
639 :
640 122668 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
641 :
642 : /* check that the mutually exclusive flags are not both set */
643 : Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
644 : (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
645 :
646 : /*
647 : * The visibility map may need to be fixed even if the heap page is
648 : * already up-to-date.
649 : */
650 122668 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
651 : {
652 1788 : Relation reln = CreateFakeRelcacheEntry(rlocator);
653 :
654 1788 : visibilitymap_pin(reln, blkno, &vmbuffer);
655 1788 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
656 1788 : ReleaseBuffer(vmbuffer);
657 1788 : vmbuffer = InvalidBuffer;
658 1788 : FreeFakeRelcacheEntry(reln);
659 : }
660 :
661 122668 : if (isinit)
662 : {
663 3770 : buffer = XLogInitBufferForRedo(record, 0);
664 3770 : page = BufferGetPage(buffer);
665 3770 : PageInit(page, BufferGetPageSize(buffer), 0);
666 3770 : action = BLK_NEEDS_REDO;
667 : }
668 : else
669 118898 : action = XLogReadBufferForRedo(record, 0, &buffer);
670 122668 : if (action == BLK_NEEDS_REDO)
671 : {
672 : char *tupdata;
673 : char *endptr;
674 : Size len;
675 :
676 : /* Tuples are stored as block data */
677 119442 : tupdata = XLogRecGetBlockData(record, 0, &len);
678 119442 : endptr = tupdata + len;
679 :
680 119442 : page = BufferGetPage(buffer);
681 :
682 544536 : for (i = 0; i < xlrec->ntuples; i++)
683 : {
684 : OffsetNumber offnum;
685 : xl_multi_insert_tuple *xlhdr;
686 :
687 : /*
688 : * If we're reinitializing the page, the tuples are stored in
689 : * order from FirstOffsetNumber. Otherwise there's an array of
690 : * offsets in the WAL record, and the tuples come after that.
691 : */
692 425094 : if (isinit)
693 198964 : offnum = FirstOffsetNumber + i;
694 : else
695 226130 : offnum = xlrec->offsets[i];
696 425094 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
697 0 : elog(PANIC, "invalid max offset number");
698 :
699 425094 : xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
700 425094 : tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
701 :
702 425094 : newlen = xlhdr->datalen;
703 : Assert(newlen <= MaxHeapTupleSize);
704 425094 : htup = &tbuf.hdr;
705 425094 : MemSet(htup, 0, SizeofHeapTupleHeader);
706 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
707 425094 : memcpy((char *) htup + SizeofHeapTupleHeader,
708 : tupdata,
709 : newlen);
710 425094 : tupdata += newlen;
711 :
712 425094 : newlen += SizeofHeapTupleHeader;
713 425094 : htup->t_infomask2 = xlhdr->t_infomask2;
714 425094 : htup->t_infomask = xlhdr->t_infomask;
715 425094 : htup->t_hoff = xlhdr->t_hoff;
716 425094 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
717 425094 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
718 425094 : ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
719 425094 : ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
720 :
721 425094 : offnum = PageAddItem(page, htup, newlen, offnum, true, true);
722 425094 : if (offnum == InvalidOffsetNumber)
723 0 : elog(PANIC, "failed to add tuple");
724 : }
725 119442 : if (tupdata != endptr)
726 0 : elog(PANIC, "total tuple length mismatch");
727 :
728 119442 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
729 :
730 119442 : PageSetLSN(page, lsn);
731 :
732 119442 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
733 200 : PageClearAllVisible(page);
734 :
735 : /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
736 119442 : if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
737 8 : PageSetAllVisible(page);
738 :
739 119442 : MarkBufferDirty(buffer);
740 : }
741 122668 : if (BufferIsValid(buffer))
742 122668 : UnlockReleaseBuffer(buffer);
743 :
744 122668 : buffer = InvalidBuffer;
745 :
746 : /*
747 : * Read and update the visibility map (VM) block.
748 : *
749 : * We must always redo VM changes, even if the corresponding heap page
750 : * update was skipped due to the LSN interlock. Each VM block covers
751 : * multiple heap pages, so later WAL records may update other bits in the
752 : * same block. If this record includes an FPI (full-page image),
753 : * subsequent WAL records may depend on it to guard against torn pages.
754 : *
755 : * Heap page changes are replayed first to preserve the invariant:
756 : * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
757 : *
758 : * Note that we released the heap page lock above. During normal
759 : * operation, this would be unsafe — a concurrent modification could
760 : * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
761 : * invariant.
762 : *
763 : * During recovery, however, no concurrent writers exist. Therefore,
764 : * updating the VM without holding the heap page lock is safe enough. This
765 : * same approach is taken when replaying xl_heap_visible records (see
766 : * heap_xlog_visible()).
767 : */
768 122676 : if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
769 8 : XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
770 : &vmbuffer) == BLK_NEEDS_REDO)
771 : {
772 0 : Page vmpage = BufferGetPage(vmbuffer);
773 :
774 : /* initialize the page if it was read as zeros */
775 0 : if (PageIsNew(vmpage))
776 0 : PageInit(vmpage, BLCKSZ, 0);
777 :
778 0 : visibilitymap_set_vmbits(blkno,
779 : vmbuffer,
780 : VISIBILITYMAP_ALL_VISIBLE |
781 : VISIBILITYMAP_ALL_FROZEN,
782 : rlocator);
783 :
784 : Assert(BufferIsDirty(vmbuffer));
785 0 : PageSetLSN(vmpage, lsn);
786 : }
787 :
788 122668 : if (BufferIsValid(vmbuffer))
789 8 : UnlockReleaseBuffer(vmbuffer);
790 :
791 : /*
792 : * If the page is running low on free space, update the FSM as well.
793 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
794 : * better than that without knowing the fill-factor for the table.
795 : *
796 : * XXX: Don't do this if the page was restored from full page image. We
797 : * don't bother to update the FSM in that case, it doesn't need to be
798 : * totally accurate anyway.
799 : */
800 122668 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
801 33372 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
802 122668 : }
803 :
804 : /*
805 : * Replay XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE records.
806 : */
807 : static void
808 191126 : heap_xlog_update(XLogReaderState *record, bool hot_update)
809 : {
810 191126 : XLogRecPtr lsn = record->EndRecPtr;
811 191126 : xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
812 : RelFileLocator rlocator;
813 : BlockNumber oldblk;
814 : BlockNumber newblk;
815 : ItemPointerData newtid;
816 : Buffer obuffer,
817 : nbuffer;
818 : Page page;
819 : OffsetNumber offnum;
820 : ItemId lp;
821 : HeapTupleData oldtup;
822 : HeapTupleHeader htup;
823 191126 : uint16 prefixlen = 0,
824 191126 : suffixlen = 0;
825 : char *newp;
826 : union
827 : {
828 : HeapTupleHeaderData hdr;
829 : char data[MaxHeapTupleSize];
830 : } tbuf;
831 : xl_heap_header xlhdr;
832 : uint32 newlen;
833 191126 : Size freespace = 0;
834 : XLogRedoAction oldaction;
835 : XLogRedoAction newaction;
836 :
837 : /* initialize to keep the compiler quiet */
838 191126 : oldtup.t_data = NULL;
839 191126 : oldtup.t_len = 0;
840 :
841 191126 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
842 191126 : if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
843 : {
844 : /* HOT updates are never done across pages */
845 : Assert(!hot_update);
846 : }
847 : else
848 81238 : oldblk = newblk;
849 :
850 191126 : ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
851 :
852 : /*
853 : * The visibility map may need to be fixed even if the heap page is
854 : * already up-to-date.
855 : */
856 191126 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
857 : {
858 528 : Relation reln = CreateFakeRelcacheEntry(rlocator);
859 528 : Buffer vmbuffer = InvalidBuffer;
860 :
861 528 : visibilitymap_pin(reln, oldblk, &vmbuffer);
862 528 : visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
863 528 : ReleaseBuffer(vmbuffer);
864 528 : FreeFakeRelcacheEntry(reln);
865 : }
866 :
867 : /*
868 : * In normal operation, it is important to lock the two pages in
869 : * page-number order, to avoid possible deadlocks against other update
870 : * operations going the other way. However, during WAL replay there can
871 : * be no other update happening, so we don't need to worry about that. But
872 : * we *do* need to worry that we don't expose an inconsistent state to Hot
873 : * Standby queries --- so the original page can't be unlocked before we've
874 : * added the new tuple to the new page.
875 : */
876 :
877 : /* Deal with old tuple version */
878 191126 : oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
879 : &obuffer);
880 191126 : if (oldaction == BLK_NEEDS_REDO)
881 : {
882 190438 : page = BufferGetPage(obuffer);
883 190438 : offnum = xlrec->old_offnum;
884 190438 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
885 0 : elog(PANIC, "offnum out of range");
886 190438 : lp = PageGetItemId(page, offnum);
887 190438 : if (!ItemIdIsNormal(lp))
888 0 : elog(PANIC, "invalid lp");
889 :
890 190438 : htup = (HeapTupleHeader) PageGetItem(page, lp);
891 :
892 190438 : oldtup.t_data = htup;
893 190438 : oldtup.t_len = ItemIdGetLength(lp);
894 :
895 190438 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
896 190438 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
897 190438 : if (hot_update)
898 74532 : HeapTupleHeaderSetHotUpdated(htup);
899 : else
900 115906 : HeapTupleHeaderClearHotUpdated(htup);
901 190438 : fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
902 : &htup->t_infomask2);
903 190438 : HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
904 190438 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
905 : /* Set forward chain link in t_ctid */
906 190438 : htup->t_ctid = newtid;
907 :
908 : /* Mark the page as a candidate for pruning */
909 190438 : PageSetPrunable(page, XLogRecGetXid(record));
910 :
911 190438 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
912 444 : PageClearAllVisible(page);
913 :
914 190438 : PageSetLSN(page, lsn);
915 190438 : MarkBufferDirty(obuffer);
916 : }
917 :
918 : /*
919 : * Read the page the new tuple goes into, if different from old.
920 : */
921 191126 : if (oldblk == newblk)
922 : {
923 81238 : nbuffer = obuffer;
924 81238 : newaction = oldaction;
925 : }
926 109888 : else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
927 : {
928 1302 : nbuffer = XLogInitBufferForRedo(record, 0);
929 1302 : page = BufferGetPage(nbuffer);
930 1302 : PageInit(page, BufferGetPageSize(nbuffer), 0);
931 1302 : newaction = BLK_NEEDS_REDO;
932 : }
933 : else
934 108586 : newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
935 :
936 : /*
937 : * The visibility map may need to be fixed even if the heap page is
938 : * already up-to-date.
939 : */
940 191126 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
941 : {
942 402 : Relation reln = CreateFakeRelcacheEntry(rlocator);
943 402 : Buffer vmbuffer = InvalidBuffer;
944 :
945 402 : visibilitymap_pin(reln, newblk, &vmbuffer);
946 402 : visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
947 402 : ReleaseBuffer(vmbuffer);
948 402 : FreeFakeRelcacheEntry(reln);
949 : }
950 :
951 : /* Deal with new tuple */
952 191126 : if (newaction == BLK_NEEDS_REDO)
953 : {
954 : char *recdata;
955 : char *recdata_end;
956 : Size datalen;
957 : Size tuplen;
958 :
959 190014 : recdata = XLogRecGetBlockData(record, 0, &datalen);
960 190014 : recdata_end = recdata + datalen;
961 :
962 190014 : page = BufferGetPage(nbuffer);
963 :
964 190014 : offnum = xlrec->new_offnum;
965 190014 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
966 0 : elog(PANIC, "invalid max offset number");
967 :
968 190014 : if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
969 : {
970 : Assert(newblk == oldblk);
971 33458 : memcpy(&prefixlen, recdata, sizeof(uint16));
972 33458 : recdata += sizeof(uint16);
973 : }
974 190014 : if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
975 : {
976 : Assert(newblk == oldblk);
977 69488 : memcpy(&suffixlen, recdata, sizeof(uint16));
978 69488 : recdata += sizeof(uint16);
979 : }
980 :
981 190014 : memcpy(&xlhdr, recdata, SizeOfHeapHeader);
982 190014 : recdata += SizeOfHeapHeader;
983 :
984 190014 : tuplen = recdata_end - recdata;
985 : Assert(tuplen <= MaxHeapTupleSize);
986 :
987 190014 : htup = &tbuf.hdr;
988 190014 : MemSet(htup, 0, SizeofHeapTupleHeader);
989 :
990 : /*
991 : * Reconstruct the new tuple using the prefix and/or suffix from the
992 : * old tuple, and the data stored in the WAL record.
993 : */
994 190014 : newp = (char *) htup + SizeofHeapTupleHeader;
995 190014 : if (prefixlen > 0)
996 : {
997 : int len;
998 :
999 : /* copy bitmap [+ padding] [+ oid] from WAL record */
1000 33458 : len = xlhdr.t_hoff - SizeofHeapTupleHeader;
1001 33458 : memcpy(newp, recdata, len);
1002 33458 : recdata += len;
1003 33458 : newp += len;
1004 :
1005 : /* copy prefix from old tuple */
1006 33458 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
1007 33458 : newp += prefixlen;
1008 :
1009 : /* copy new tuple data from WAL record */
1010 33458 : len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
1011 33458 : memcpy(newp, recdata, len);
1012 33458 : recdata += len;
1013 33458 : newp += len;
1014 : }
1015 : else
1016 : {
1017 : /*
1018 : * copy bitmap [+ padding] [+ oid] + data from record, all in one
1019 : * go
1020 : */
1021 156556 : memcpy(newp, recdata, tuplen);
1022 156556 : recdata += tuplen;
1023 156556 : newp += tuplen;
1024 : }
1025 : Assert(recdata == recdata_end);
1026 :
1027 : /* copy suffix from old tuple */
1028 190014 : if (suffixlen > 0)
1029 69488 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
1030 :
1031 190014 : newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
1032 190014 : htup->t_infomask2 = xlhdr.t_infomask2;
1033 190014 : htup->t_infomask = xlhdr.t_infomask;
1034 190014 : htup->t_hoff = xlhdr.t_hoff;
1035 :
1036 190014 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
1037 190014 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
1038 190014 : HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
1039 : /* Make sure there is no forward chain link in t_ctid */
1040 190014 : htup->t_ctid = newtid;
1041 :
1042 190014 : offnum = PageAddItem(page, htup, newlen, offnum, true, true);
1043 190014 : if (offnum == InvalidOffsetNumber)
1044 0 : elog(PANIC, "failed to add tuple");
1045 :
1046 190014 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
1047 234 : PageClearAllVisible(page);
1048 :
1049 190014 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
1050 :
1051 190014 : PageSetLSN(page, lsn);
1052 190014 : MarkBufferDirty(nbuffer);
1053 : }
1054 :
1055 191126 : if (BufferIsValid(nbuffer) && nbuffer != obuffer)
1056 109888 : UnlockReleaseBuffer(nbuffer);
1057 191126 : if (BufferIsValid(obuffer))
1058 191126 : UnlockReleaseBuffer(obuffer);
1059 :
1060 : /*
1061 : * If the new page is running low on free space, update the FSM as well.
1062 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
1063 : * better than that without knowing the fill-factor for the table.
1064 : *
1065 : * However, don't update the FSM on HOT updates, because after crash
1066 : * recovery, either the old or the new tuple will certainly be dead and
1067 : * prunable. After pruning, the page will have roughly as much free space
1068 : * as it did before the update, assuming the new tuple is about the same
1069 : * size as the old one.
1070 : *
1071 : * XXX: Don't do this if the page was restored from full page image. We
1072 : * don't bother to update the FSM in that case, it doesn't need to be
1073 : * totally accurate anyway.
1074 : */
1075 191126 : if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
1076 23746 : XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
1077 191126 : }
1078 :
1079 : /*
1080 : * Replay XLOG_HEAP_CONFIRM records.
1081 : */
1082 : static void
1083 166 : heap_xlog_confirm(XLogReaderState *record)
1084 : {
1085 166 : XLogRecPtr lsn = record->EndRecPtr;
1086 166 : xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
1087 : Buffer buffer;
1088 : Page page;
1089 : OffsetNumber offnum;
1090 : ItemId lp;
1091 : HeapTupleHeader htup;
1092 :
1093 166 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1094 : {
1095 166 : page = BufferGetPage(buffer);
1096 :
1097 166 : offnum = xlrec->offnum;
1098 166 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1099 0 : elog(PANIC, "offnum out of range");
1100 166 : lp = PageGetItemId(page, offnum);
1101 166 : if (!ItemIdIsNormal(lp))
1102 0 : elog(PANIC, "invalid lp");
1103 :
1104 166 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1105 :
1106 : /*
1107 : * Confirm tuple as actually inserted
1108 : */
1109 166 : ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
1110 :
1111 166 : PageSetLSN(page, lsn);
1112 166 : MarkBufferDirty(buffer);
1113 : }
1114 166 : if (BufferIsValid(buffer))
1115 166 : UnlockReleaseBuffer(buffer);
1116 166 : }
1117 :
1118 : /*
1119 : * Replay XLOG_HEAP_LOCK records.
1120 : */
1121 : static void
1122 111724 : heap_xlog_lock(XLogReaderState *record)
1123 : {
1124 111724 : XLogRecPtr lsn = record->EndRecPtr;
1125 111724 : xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
1126 : Buffer buffer;
1127 : Page page;
1128 : OffsetNumber offnum;
1129 : ItemId lp;
1130 : HeapTupleHeader htup;
1131 :
1132 : /*
1133 : * The visibility map may need to be fixed even if the heap page is
1134 : * already up-to-date.
1135 : */
1136 111724 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1137 : {
1138 : RelFileLocator rlocator;
1139 128 : Buffer vmbuffer = InvalidBuffer;
1140 : BlockNumber block;
1141 : Relation reln;
1142 :
1143 128 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1144 128 : reln = CreateFakeRelcacheEntry(rlocator);
1145 :
1146 128 : visibilitymap_pin(reln, block, &vmbuffer);
1147 128 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1148 :
1149 128 : ReleaseBuffer(vmbuffer);
1150 128 : FreeFakeRelcacheEntry(reln);
1151 : }
1152 :
1153 111724 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1154 : {
1155 111284 : page = BufferGetPage(buffer);
1156 :
1157 111284 : offnum = xlrec->offnum;
1158 111284 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1159 0 : elog(PANIC, "offnum out of range");
1160 111284 : lp = PageGetItemId(page, offnum);
1161 111284 : if (!ItemIdIsNormal(lp))
1162 0 : elog(PANIC, "invalid lp");
1163 :
1164 111284 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1165 :
1166 111284 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1167 111284 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1168 111284 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1169 : &htup->t_infomask2);
1170 :
1171 : /*
1172 : * Clear relevant update flags, but only if the modified infomask says
1173 : * there's no update.
1174 : */
1175 111284 : if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
1176 : {
1177 111284 : HeapTupleHeaderClearHotUpdated(htup);
1178 : /* Make sure there is no forward chain link in t_ctid */
1179 111284 : ItemPointerSet(&htup->t_ctid,
1180 : BufferGetBlockNumber(buffer),
1181 : offnum);
1182 : }
1183 111284 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1184 111284 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
1185 111284 : PageSetLSN(page, lsn);
1186 111284 : MarkBufferDirty(buffer);
1187 : }
1188 111724 : if (BufferIsValid(buffer))
1189 111724 : UnlockReleaseBuffer(buffer);
1190 111724 : }
1191 :
1192 : /*
1193 : * Replay XLOG_HEAP2_LOCK_UPDATED records.
1194 : */
1195 : static void
1196 0 : heap_xlog_lock_updated(XLogReaderState *record)
1197 : {
1198 0 : XLogRecPtr lsn = record->EndRecPtr;
1199 : xl_heap_lock_updated *xlrec;
1200 : Buffer buffer;
1201 : Page page;
1202 : OffsetNumber offnum;
1203 : ItemId lp;
1204 : HeapTupleHeader htup;
1205 :
1206 0 : xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
1207 :
1208 : /*
1209 : * The visibility map may need to be fixed even if the heap page is
1210 : * already up-to-date.
1211 : */
1212 0 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1213 : {
1214 : RelFileLocator rlocator;
1215 0 : Buffer vmbuffer = InvalidBuffer;
1216 : BlockNumber block;
1217 : Relation reln;
1218 :
1219 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1220 0 : reln = CreateFakeRelcacheEntry(rlocator);
1221 :
1222 0 : visibilitymap_pin(reln, block, &vmbuffer);
1223 0 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1224 :
1225 0 : ReleaseBuffer(vmbuffer);
1226 0 : FreeFakeRelcacheEntry(reln);
1227 : }
1228 :
1229 0 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1230 : {
1231 0 : page = BufferGetPage(buffer);
1232 :
1233 0 : offnum = xlrec->offnum;
1234 0 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1235 0 : elog(PANIC, "offnum out of range");
1236 0 : lp = PageGetItemId(page, offnum);
1237 0 : if (!ItemIdIsNormal(lp))
1238 0 : elog(PANIC, "invalid lp");
1239 :
1240 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1241 :
1242 0 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1243 0 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1244 0 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1245 : &htup->t_infomask2);
1246 0 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1247 :
1248 0 : PageSetLSN(page, lsn);
1249 0 : MarkBufferDirty(buffer);
1250 : }
1251 0 : if (BufferIsValid(buffer))
1252 0 : UnlockReleaseBuffer(buffer);
1253 0 : }
1254 :
1255 : /*
1256 : * Replay XLOG_HEAP_INPLACE records.
1257 : */
1258 : static void
1259 15964 : heap_xlog_inplace(XLogReaderState *record)
1260 : {
1261 15964 : XLogRecPtr lsn = record->EndRecPtr;
1262 15964 : xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
1263 : Buffer buffer;
1264 : Page page;
1265 : OffsetNumber offnum;
1266 : ItemId lp;
1267 : HeapTupleHeader htup;
1268 : uint32 oldlen;
1269 : Size newlen;
1270 :
1271 15964 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1272 : {
1273 15544 : char *newtup = XLogRecGetBlockData(record, 0, &newlen);
1274 :
1275 15544 : page = BufferGetPage(buffer);
1276 :
1277 15544 : offnum = xlrec->offnum;
1278 15544 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1279 0 : elog(PANIC, "offnum out of range");
1280 15544 : lp = PageGetItemId(page, offnum);
1281 15544 : if (!ItemIdIsNormal(lp))
1282 0 : elog(PANIC, "invalid lp");
1283 :
1284 15544 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1285 :
1286 15544 : oldlen = ItemIdGetLength(lp) - htup->t_hoff;
1287 15544 : if (oldlen != newlen)
1288 0 : elog(PANIC, "wrong tuple length");
1289 :
1290 15544 : memcpy((char *) htup + htup->t_hoff, newtup, newlen);
1291 :
1292 15544 : PageSetLSN(page, lsn);
1293 15544 : MarkBufferDirty(buffer);
1294 : }
1295 15964 : if (BufferIsValid(buffer))
1296 15964 : UnlockReleaseBuffer(buffer);
1297 :
1298 15964 : ProcessCommittedInvalidationMessages(xlrec->msgs,
1299 : xlrec->nmsgs,
1300 15964 : xlrec->relcacheInitFileInval,
1301 : xlrec->dbId,
1302 : xlrec->tsId);
1303 15964 : }
1304 :
1305 : void
1306 3526104 : heap_redo(XLogReaderState *record)
1307 : {
1308 3526104 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1309 :
1310 : /*
1311 : * These operations don't overwrite MVCC data so no conflict processing is
1312 : * required. The ones in heap2 rmgr do.
1313 : */
1314 :
1315 3526104 : switch (info & XLOG_HEAP_OPMASK)
1316 : {
1317 2590110 : case XLOG_HEAP_INSERT:
1318 2590110 : heap_xlog_insert(record);
1319 2590110 : break;
1320 617010 : case XLOG_HEAP_DELETE:
1321 617010 : heap_xlog_delete(record);
1322 617010 : break;
1323 115972 : case XLOG_HEAP_UPDATE:
1324 115972 : heap_xlog_update(record, false);
1325 115972 : break;
1326 4 : case XLOG_HEAP_TRUNCATE:
1327 :
1328 : /*
1329 : * TRUNCATE is a no-op because the actions are already logged as
1330 : * SMGR WAL records. TRUNCATE WAL record only exists for logical
1331 : * decoding.
1332 : */
1333 4 : break;
1334 75154 : case XLOG_HEAP_HOT_UPDATE:
1335 75154 : heap_xlog_update(record, true);
1336 75154 : break;
1337 166 : case XLOG_HEAP_CONFIRM:
1338 166 : heap_xlog_confirm(record);
1339 166 : break;
1340 111724 : case XLOG_HEAP_LOCK:
1341 111724 : heap_xlog_lock(record);
1342 111724 : break;
1343 15964 : case XLOG_HEAP_INPLACE:
1344 15964 : heap_xlog_inplace(record);
1345 15964 : break;
1346 0 : default:
1347 0 : elog(PANIC, "heap_redo: unknown op code %u", info);
1348 : }
1349 3526104 : }
1350 :
1351 : void
1352 165692 : heap2_redo(XLogReaderState *record)
1353 : {
1354 165692 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1355 :
1356 165692 : switch (info & XLOG_HEAP_OPMASK)
1357 : {
1358 30618 : case XLOG_HEAP2_PRUNE_ON_ACCESS:
1359 : case XLOG_HEAP2_PRUNE_VACUUM_SCAN:
1360 : case XLOG_HEAP2_PRUNE_VACUUM_CLEANUP:
1361 30618 : heap_xlog_prune_freeze(record);
1362 30618 : break;
1363 10442 : case XLOG_HEAP2_VISIBLE:
1364 10442 : heap_xlog_visible(record);
1365 10442 : break;
1366 122668 : case XLOG_HEAP2_MULTI_INSERT:
1367 122668 : heap_xlog_multi_insert(record);
1368 122668 : break;
1369 0 : case XLOG_HEAP2_LOCK_UPDATED:
1370 0 : heap_xlog_lock_updated(record);
1371 0 : break;
1372 1964 : case XLOG_HEAP2_NEW_CID:
1373 :
1374 : /*
1375 : * Nothing to do on a real replay, only used during logical
1376 : * decoding.
1377 : */
1378 1964 : break;
1379 0 : case XLOG_HEAP2_REWRITE:
1380 0 : heap_xlog_logical_rewrite(record);
1381 0 : break;
1382 0 : default:
1383 0 : elog(PANIC, "heap2_redo: unknown op code %u", info);
1384 : }
1385 165692 : }
1386 :
1387 : /*
1388 : * Mask a heap page before performing consistency checks on it.
1389 : */
1390 : void
1391 5926696 : heap_mask(char *pagedata, BlockNumber blkno)
1392 : {
1393 5926696 : Page page = (Page) pagedata;
1394 : OffsetNumber off;
1395 :
1396 5926696 : mask_page_lsn_and_checksum(page);
1397 :
1398 5926696 : mask_page_hint_bits(page);
1399 5926696 : mask_unused_space(page);
1400 :
1401 484809756 : for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
1402 : {
1403 478883060 : ItemId iid = PageGetItemId(page, off);
1404 : char *page_item;
1405 :
1406 478883060 : page_item = (char *) (page + ItemIdGetOffset(iid));
1407 :
1408 478883060 : if (ItemIdIsNormal(iid))
1409 : {
1410 451242252 : HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
1411 :
1412 : /*
1413 : * If xmin of a tuple is not yet frozen, we should ignore
1414 : * differences in hint bits, since they can be set without
1415 : * emitting WAL.
1416 : */
1417 451242252 : if (!HeapTupleHeaderXminFrozen(page_htup))
1418 446577916 : page_htup->t_infomask &= ~HEAP_XACT_MASK;
1419 : else
1420 : {
1421 : /* Still we need to mask xmax hint bits. */
1422 4664336 : page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
1423 4664336 : page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
1424 : }
1425 :
1426 : /*
1427 : * During replay, we set Command Id to FirstCommandId. Hence, mask
1428 : * it. See heap_xlog_insert() for details.
1429 : */
1430 451242252 : page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
1431 :
1432 : /*
1433 : * For a speculative tuple, heap_insert() does not set ctid in the
1434 : * caller-passed heap tuple itself, leaving the ctid field to
1435 : * contain a speculative token value - a per-backend monotonically
1436 : * increasing identifier. Besides, it does not WAL-log ctid under
1437 : * any circumstances.
1438 : *
1439 : * During redo, heap_xlog_insert() sets t_ctid to current block
1440 : * number and self offset number. It doesn't care about any
1441 : * speculative insertions on the primary. Hence, we set t_ctid to
1442 : * current block number and self offset number to ignore any
1443 : * inconsistency.
1444 : */
1445 451242252 : if (HeapTupleHeaderIsSpeculative(page_htup))
1446 168 : ItemPointerSet(&page_htup->t_ctid, blkno, off);
1447 :
1448 : /*
1449 : * NB: Not ignoring ctid changes due to the tuple having moved
1450 : * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
1451 : * important information that needs to be in-sync between primary
1452 : * and standby, and thus is WAL logged.
1453 : */
1454 : }
1455 :
1456 : /*
1457 : * Ignore any padding bytes after the tuple, when the length of the
1458 : * item is not MAXALIGNed.
1459 : */
1460 478883060 : if (ItemIdHasStorage(iid))
1461 : {
1462 451242252 : int len = ItemIdGetLength(iid);
1463 451242252 : int padlen = MAXALIGN(len) - len;
1464 :
1465 451242252 : if (padlen > 0)
1466 241354068 : memset(page_item + len, MASK_MARKER, padlen);
1467 : }
1468 : }
1469 5926696 : }
|