Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * heapam_xlog.c
4 : * WAL replay logic for heap access method.
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/heap/heapam_xlog.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/bufmask.h"
18 : #include "access/heapam.h"
19 : #include "access/visibilitymap.h"
20 : #include "access/xlog.h"
21 : #include "access/xlogutils.h"
22 : #include "storage/freespace.h"
23 : #include "storage/standby.h"
24 :
25 :
26 : /*
27 : * Replay XLOG_HEAP2_PRUNE_* records.
28 : */
29 : static void
30 28428 : heap_xlog_prune_freeze(XLogReaderState *record)
31 : {
32 28428 : XLogRecPtr lsn = record->EndRecPtr;
33 28428 : char *maindataptr = XLogRecGetData(record);
34 : xl_heap_prune xlrec;
35 : Buffer buffer;
36 : RelFileLocator rlocator;
37 : BlockNumber blkno;
38 28428 : Buffer vmbuffer = InvalidBuffer;
39 28428 : uint8 vmflags = 0;
40 28428 : Size freespace = 0;
41 :
42 28428 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
43 28428 : memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
44 28428 : maindataptr += SizeOfHeapPrune;
45 :
46 : /*
47 : * We will take an ordinary exclusive lock or a cleanup lock depending on
48 : * whether the XLHP_CLEANUP_LOCK flag is set. With an ordinary exclusive
49 : * lock, we better not be doing anything that requires moving existing
50 : * tuple data.
51 : */
52 : Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
53 : (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
54 :
55 28428 : if (xlrec.flags & XLHP_VM_ALL_VISIBLE)
56 : {
57 6714 : vmflags = VISIBILITYMAP_ALL_VISIBLE;
58 6714 : if (xlrec.flags & XLHP_VM_ALL_FROZEN)
59 5840 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
60 : }
61 :
62 : /*
63 : * After xl_heap_prune is the optional snapshot conflict horizon.
64 : *
65 : * In Hot Standby mode, we must ensure that there are no running queries
66 : * which would conflict with the changes in this record. That means we
67 : * can't replay this record if it removes tuples that are still visible to
68 : * transactions on the standby, freeze tuples with xids that are still
69 : * considered running on the standby, or set a page as all-visible in the
70 : * VM if it isn't all-visible to all transactions on the standby.
71 : */
72 28428 : if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
73 : {
74 : TransactionId snapshot_conflict_horizon;
75 :
76 : /* memcpy() because snapshot_conflict_horizon is stored unaligned */
77 20876 : memcpy(&snapshot_conflict_horizon, maindataptr, sizeof(TransactionId));
78 20876 : maindataptr += sizeof(TransactionId);
79 :
80 20876 : if (InHotStandby)
81 20414 : ResolveRecoveryConflictWithSnapshot(snapshot_conflict_horizon,
82 20414 : (xlrec.flags & XLHP_IS_CATALOG_REL) != 0,
83 : rlocator);
84 : }
85 :
86 : /*
87 : * If we have a full-page image of the heap block, restore it and we're
88 : * done with the heap block.
89 : */
90 28428 : if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
91 28428 : (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
92 : &buffer) == BLK_NEEDS_REDO)
93 : {
94 20914 : Page page = BufferGetPage(buffer);
95 : OffsetNumber *redirected;
96 : OffsetNumber *nowdead;
97 : OffsetNumber *nowunused;
98 : int nredirected;
99 : int ndead;
100 : int nunused;
101 : int nplans;
102 : Size datalen;
103 : xlhp_freeze_plan *plans;
104 : OffsetNumber *frz_offsets;
105 20914 : char *dataptr = XLogRecGetBlockData(record, 0, &datalen);
106 : bool do_prune;
107 :
108 20914 : heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
109 : &nplans, &plans, &frz_offsets,
110 : &nredirected, &redirected,
111 : &ndead, &nowdead,
112 : &nunused, &nowunused);
113 :
114 20914 : do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
115 :
116 : /* Ensure the record does something */
117 : Assert(do_prune || nplans > 0 || vmflags & VISIBILITYMAP_VALID_BITS);
118 :
119 : /*
120 : * Update all line pointers per the record, and repair fragmentation
121 : * if needed.
122 : */
123 20914 : if (do_prune)
124 19848 : heap_page_prune_execute(buffer,
125 19848 : (xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
126 : redirected, nredirected,
127 : nowdead, ndead,
128 : nowunused, nunused);
129 :
130 : /* Freeze tuples */
131 23188 : for (int p = 0; p < nplans; p++)
132 : {
133 : HeapTupleFreeze frz;
134 :
135 : /*
136 : * Convert freeze plan representation from WAL record into
137 : * per-tuple format used by heap_execute_freeze_tuple
138 : */
139 2274 : frz.xmax = plans[p].xmax;
140 2274 : frz.t_infomask2 = plans[p].t_infomask2;
141 2274 : frz.t_infomask = plans[p].t_infomask;
142 2274 : frz.frzflags = plans[p].frzflags;
143 2274 : frz.offset = InvalidOffsetNumber; /* unused, but be tidy */
144 :
145 100536 : for (int i = 0; i < plans[p].ntuples; i++)
146 : {
147 98262 : OffsetNumber offset = *(frz_offsets++);
148 : ItemId lp;
149 : HeapTupleHeader tuple;
150 :
151 98262 : lp = PageGetItemId(page, offset);
152 98262 : tuple = (HeapTupleHeader) PageGetItem(page, lp);
153 98262 : heap_execute_freeze_tuple(tuple, &frz);
154 : }
155 : }
156 :
157 : /* There should be no more data */
158 : Assert((char *) frz_offsets == dataptr + datalen);
159 :
160 20914 : if (vmflags & VISIBILITYMAP_VALID_BITS)
161 4674 : PageSetAllVisible(page);
162 :
163 20914 : MarkBufferDirty(buffer);
164 :
165 : /*
166 : * See log_heap_prune_and_freeze() for commentary on when we set the
167 : * heap page LSN.
168 : */
169 20914 : if (do_prune || nplans > 0 ||
170 0 : ((vmflags & VISIBILITYMAP_VALID_BITS) && XLogHintBitIsNeeded()))
171 20914 : PageSetLSN(page, lsn);
172 :
173 : /*
174 : * Note: we don't worry about updating the page's prunability hints.
175 : * At worst this will cause an extra prune cycle to occur soon.
176 : */
177 : }
178 :
179 : /*
180 : * If we 1) released any space or line pointers or 2) set PD_ALL_VISIBLE
181 : * or the VM, update the freespace map.
182 : *
183 : * Even when no actual space is freed (when only marking the page
184 : * all-visible or frozen), we still update the FSM. Because the FSM is
185 : * unlogged and maintained heuristically, it often becomes stale on
186 : * standbys. If such a standby is later promoted and runs VACUUM, it will
187 : * skip recalculating free space for pages that were marked
188 : * all-visible/all-forzen. FreeSpaceMapVacuum() can then propagate overly
189 : * optimistic free space values upward, causing future insertions to
190 : * select pages that turn out to be unusable. In bulk, this can lead to
191 : * long stalls.
192 : *
193 : * To prevent this, always update the FSM even when only marking a page
194 : * all-visible/all-frozen.
195 : *
196 : * Do this regardless of whether a full-page image is logged, since FSM
197 : * data is not part of the page itself.
198 : */
199 28428 : if (BufferIsValid(buffer))
200 : {
201 28428 : if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
202 : XLHP_HAS_DEAD_ITEMS |
203 3300 : XLHP_HAS_NOW_UNUSED_ITEMS)) ||
204 3300 : (vmflags & VISIBILITYMAP_VALID_BITS))
205 25128 : freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
206 :
207 : /*
208 : * We want to avoid holding an exclusive lock on the heap buffer while
209 : * doing IO (either of the FSM or the VM), so we'll release it now.
210 : */
211 28428 : UnlockReleaseBuffer(buffer);
212 : }
213 :
214 : /*
215 : * Now read and update the VM block.
216 : *
217 : * We must redo changes to the VM even if the heap page was skipped due to
218 : * LSN interlock. See comment in heap_xlog_multi_insert() for more details
219 : * on replaying changes to the VM.
220 : */
221 35142 : if ((vmflags & VISIBILITYMAP_VALID_BITS) &&
222 6714 : XLogReadBufferForRedoExtended(record, 1,
223 : RBM_ZERO_ON_ERROR,
224 : false,
225 : &vmbuffer) == BLK_NEEDS_REDO)
226 : {
227 6556 : Page vmpage = BufferGetPage(vmbuffer);
228 :
229 : /* initialize the page if it was read as zeros */
230 6556 : if (PageIsNew(vmpage))
231 0 : PageInit(vmpage, BLCKSZ, 0);
232 :
233 6556 : visibilitymap_set_vmbits(blkno, vmbuffer, vmflags, rlocator);
234 :
235 : Assert(BufferIsDirty(vmbuffer));
236 6556 : PageSetLSN(vmpage, lsn);
237 : }
238 :
239 28428 : if (BufferIsValid(vmbuffer))
240 6714 : UnlockReleaseBuffer(vmbuffer);
241 :
242 28428 : if (freespace > 0)
243 25042 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
244 28428 : }
245 :
246 : /*
247 : * Replay XLOG_HEAP2_VISIBLE records.
248 : *
249 : * The critical integrity requirement here is that we must never end up with
250 : * a situation where the visibility map bit is set, and the page-level
251 : * PD_ALL_VISIBLE bit is clear. If that were to occur, then a subsequent
252 : * page modification would fail to clear the visibility map bit.
253 : */
254 : static void
255 9320 : heap_xlog_visible(XLogReaderState *record)
256 : {
257 9320 : XLogRecPtr lsn = record->EndRecPtr;
258 9320 : xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
259 9320 : Buffer vmbuffer = InvalidBuffer;
260 : Buffer buffer;
261 : Page page;
262 : RelFileLocator rlocator;
263 : BlockNumber blkno;
264 : XLogRedoAction action;
265 :
266 : Assert((xlrec->flags & VISIBILITYMAP_XLOG_VALID_BITS) == xlrec->flags);
267 :
268 9320 : XLogRecGetBlockTag(record, 1, &rlocator, NULL, &blkno);
269 :
270 : /*
271 : * If there are any Hot Standby transactions running that have an xmin
272 : * horizon old enough that this page isn't all-visible for them, they
273 : * might incorrectly decide that an index-only scan can skip a heap fetch.
274 : *
275 : * NB: It might be better to throw some kind of "soft" conflict here that
276 : * forces any index-only scan that is in flight to perform heap fetches,
277 : * rather than killing the transaction outright.
278 : */
279 9320 : if (InHotStandby)
280 8972 : ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
281 8972 : xlrec->flags & VISIBILITYMAP_XLOG_CATALOG_REL,
282 : rlocator);
283 :
284 : /*
285 : * Read the heap page, if it still exists. If the heap file has dropped or
286 : * truncated later in recovery, we don't need to update the page, but we'd
287 : * better still update the visibility map.
288 : */
289 9320 : action = XLogReadBufferForRedo(record, 1, &buffer);
290 9320 : if (action == BLK_NEEDS_REDO)
291 : {
292 : /*
293 : * We don't bump the LSN of the heap page when setting the visibility
294 : * map bit (unless checksums or wal_hint_bits is enabled, in which
295 : * case we must). This exposes us to torn page hazards, but since
296 : * we're not inspecting the existing page contents in any way, we
297 : * don't care.
298 : */
299 5516 : page = BufferGetPage(buffer);
300 :
301 5516 : PageSetAllVisible(page);
302 :
303 5516 : if (XLogHintBitIsNeeded())
304 5516 : PageSetLSN(page, lsn);
305 :
306 5516 : MarkBufferDirty(buffer);
307 : }
308 : else if (action == BLK_RESTORED)
309 : {
310 : /*
311 : * If heap block was backed up, we already restored it and there's
312 : * nothing more to do. (This can only happen with checksums or
313 : * wal_log_hints enabled.)
314 : */
315 : }
316 :
317 9320 : if (BufferIsValid(buffer))
318 : {
319 9320 : Size space = PageGetFreeSpace(BufferGetPage(buffer));
320 :
321 9320 : UnlockReleaseBuffer(buffer);
322 :
323 : /*
324 : * Since FSM is not WAL-logged and only updated heuristically, it
325 : * easily becomes stale in standbys. If the standby is later promoted
326 : * and runs VACUUM, it will skip updating individual free space
327 : * figures for pages that became all-visible (or all-frozen, depending
328 : * on the vacuum mode,) which is troublesome when FreeSpaceMapVacuum
329 : * propagates too optimistic free space values to upper FSM layers;
330 : * later inserters try to use such pages only to find out that they
331 : * are unusable. This can cause long stalls when there are many such
332 : * pages.
333 : *
334 : * Forestall those problems by updating FSM's idea about a page that
335 : * is becoming all-visible or all-frozen.
336 : *
337 : * Do this regardless of a full-page image being applied, since the
338 : * FSM data is not in the page anyway.
339 : */
340 9320 : if (xlrec->flags & VISIBILITYMAP_VALID_BITS)
341 9320 : XLogRecordPageWithFreeSpace(rlocator, blkno, space);
342 : }
343 :
344 : /*
345 : * Even if we skipped the heap page update due to the LSN interlock, it's
346 : * still safe to update the visibility map. Any WAL record that clears
347 : * the visibility map bit does so before checking the page LSN, so any
348 : * bits that need to be cleared will still be cleared.
349 : */
350 9320 : if (XLogReadBufferForRedoExtended(record, 0, RBM_ZERO_ON_ERROR, false,
351 : &vmbuffer) == BLK_NEEDS_REDO)
352 : {
353 8910 : Page vmpage = BufferGetPage(vmbuffer);
354 : Relation reln;
355 : uint8 vmbits;
356 :
357 : /* initialize the page if it was read as zeros */
358 8910 : if (PageIsNew(vmpage))
359 0 : PageInit(vmpage, BLCKSZ, 0);
360 :
361 : /* remove VISIBILITYMAP_XLOG_* */
362 8910 : vmbits = xlrec->flags & VISIBILITYMAP_VALID_BITS;
363 :
364 : /*
365 : * XLogReadBufferForRedoExtended locked the buffer. But
366 : * visibilitymap_set will handle locking itself.
367 : */
368 8910 : LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
369 :
370 8910 : reln = CreateFakeRelcacheEntry(rlocator);
371 :
372 8910 : visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
373 : xlrec->snapshotConflictHorizon, vmbits);
374 :
375 8910 : ReleaseBuffer(vmbuffer);
376 8910 : FreeFakeRelcacheEntry(reln);
377 : }
378 410 : else if (BufferIsValid(vmbuffer))
379 410 : UnlockReleaseBuffer(vmbuffer);
380 9320 : }
381 :
382 : /*
383 : * Given an "infobits" field from an XLog record, set the correct bits in the
384 : * given infomask and infomask2 for the tuple touched by the record.
385 : *
386 : * (This is the reverse of compute_infobits).
387 : */
388 : static void
389 893152 : fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
390 : {
391 893152 : *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
392 : HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
393 893152 : *infomask2 &= ~HEAP_KEYS_UPDATED;
394 :
395 893152 : if (infobits & XLHL_XMAX_IS_MULTI)
396 4 : *infomask |= HEAP_XMAX_IS_MULTI;
397 893152 : if (infobits & XLHL_XMAX_LOCK_ONLY)
398 110422 : *infomask |= HEAP_XMAX_LOCK_ONLY;
399 893152 : if (infobits & XLHL_XMAX_EXCL_LOCK)
400 109626 : *infomask |= HEAP_XMAX_EXCL_LOCK;
401 : /* note HEAP_XMAX_SHR_LOCK isn't considered here */
402 893152 : if (infobits & XLHL_XMAX_KEYSHR_LOCK)
403 818 : *infomask |= HEAP_XMAX_KEYSHR_LOCK;
404 :
405 893152 : if (infobits & XLHL_KEYS_UPDATED)
406 598728 : *infomask2 |= HEAP_KEYS_UPDATED;
407 893152 : }
408 :
409 : /*
410 : * Replay XLOG_HEAP_DELETE records.
411 : */
412 : static void
413 600072 : heap_xlog_delete(XLogReaderState *record)
414 : {
415 600072 : XLogRecPtr lsn = record->EndRecPtr;
416 600072 : xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
417 : Buffer buffer;
418 : Page page;
419 600072 : ItemId lp = NULL;
420 : HeapTupleHeader htup;
421 : BlockNumber blkno;
422 : RelFileLocator target_locator;
423 : ItemPointerData target_tid;
424 :
425 600072 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
426 600072 : ItemPointerSetBlockNumber(&target_tid, blkno);
427 600072 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
428 :
429 : /*
430 : * The visibility map may need to be fixed even if the heap page is
431 : * already up-to-date.
432 : */
433 600072 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
434 : {
435 66 : Relation reln = CreateFakeRelcacheEntry(target_locator);
436 66 : Buffer vmbuffer = InvalidBuffer;
437 :
438 66 : visibilitymap_pin(reln, blkno, &vmbuffer);
439 66 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
440 66 : ReleaseBuffer(vmbuffer);
441 66 : FreeFakeRelcacheEntry(reln);
442 : }
443 :
444 600072 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
445 : {
446 596000 : page = BufferGetPage(buffer);
447 :
448 596000 : if (PageGetMaxOffsetNumber(page) >= xlrec->offnum)
449 596000 : lp = PageGetItemId(page, xlrec->offnum);
450 :
451 596000 : if (PageGetMaxOffsetNumber(page) < xlrec->offnum || !ItemIdIsNormal(lp))
452 0 : elog(PANIC, "invalid lp");
453 :
454 596000 : htup = (HeapTupleHeader) PageGetItem(page, lp);
455 :
456 596000 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
457 596000 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
458 596000 : HeapTupleHeaderClearHotUpdated(htup);
459 596000 : fix_infomask_from_infobits(xlrec->infobits_set,
460 : &htup->t_infomask, &htup->t_infomask2);
461 596000 : if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
462 596000 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
463 : else
464 0 : HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
465 596000 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
466 :
467 : /* Mark the page as a candidate for pruning */
468 596000 : PageSetPrunable(page, XLogRecGetXid(record));
469 :
470 596000 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
471 12 : PageClearAllVisible(page);
472 :
473 : /* Make sure t_ctid is set correctly */
474 596000 : if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
475 284 : HeapTupleHeaderSetMovedPartitions(htup);
476 : else
477 595716 : htup->t_ctid = target_tid;
478 596000 : PageSetLSN(page, lsn);
479 596000 : MarkBufferDirty(buffer);
480 : }
481 600072 : if (BufferIsValid(buffer))
482 600072 : UnlockReleaseBuffer(buffer);
483 600072 : }
484 :
485 : /*
486 : * Replay XLOG_HEAP_INSERT records.
487 : */
488 : static void
489 2581762 : heap_xlog_insert(XLogReaderState *record)
490 : {
491 2581762 : XLogRecPtr lsn = record->EndRecPtr;
492 2581762 : xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
493 : Buffer buffer;
494 : Page page;
495 : union
496 : {
497 : HeapTupleHeaderData hdr;
498 : char data[MaxHeapTupleSize];
499 : } tbuf;
500 : HeapTupleHeader htup;
501 : xl_heap_header xlhdr;
502 : uint32 newlen;
503 2581762 : Size freespace = 0;
504 : RelFileLocator target_locator;
505 : BlockNumber blkno;
506 : ItemPointerData target_tid;
507 : XLogRedoAction action;
508 :
509 2581762 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
510 2581762 : ItemPointerSetBlockNumber(&target_tid, blkno);
511 2581762 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
512 :
513 : /* No freezing in the heap_insert() code path */
514 : Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
515 :
516 : /*
517 : * The visibility map may need to be fixed even if the heap page is
518 : * already up-to-date.
519 : */
520 2581762 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
521 : {
522 2168 : Relation reln = CreateFakeRelcacheEntry(target_locator);
523 2168 : Buffer vmbuffer = InvalidBuffer;
524 :
525 2168 : visibilitymap_pin(reln, blkno, &vmbuffer);
526 2168 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
527 2168 : ReleaseBuffer(vmbuffer);
528 2168 : FreeFakeRelcacheEntry(reln);
529 : }
530 :
531 : /*
532 : * If we inserted the first and only tuple on the page, re-initialize the
533 : * page from scratch.
534 : */
535 2581762 : if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
536 : {
537 34248 : buffer = XLogInitBufferForRedo(record, 0);
538 34248 : page = BufferGetPage(buffer);
539 34248 : PageInit(page, BufferGetPageSize(buffer), 0);
540 34248 : action = BLK_NEEDS_REDO;
541 : }
542 : else
543 2547514 : action = XLogReadBufferForRedo(record, 0, &buffer);
544 2581762 : if (action == BLK_NEEDS_REDO)
545 : {
546 : Size datalen;
547 : char *data;
548 :
549 2576076 : page = BufferGetPage(buffer);
550 :
551 2576076 : if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
552 0 : elog(PANIC, "invalid max offset number");
553 :
554 2576076 : data = XLogRecGetBlockData(record, 0, &datalen);
555 :
556 2576076 : newlen = datalen - SizeOfHeapHeader;
557 : Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
558 2576076 : memcpy(&xlhdr, data, SizeOfHeapHeader);
559 2576076 : data += SizeOfHeapHeader;
560 :
561 2576076 : htup = &tbuf.hdr;
562 2576076 : MemSet(htup, 0, SizeofHeapTupleHeader);
563 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
564 2576076 : memcpy((char *) htup + SizeofHeapTupleHeader,
565 : data,
566 : newlen);
567 2576076 : newlen += SizeofHeapTupleHeader;
568 2576076 : htup->t_infomask2 = xlhdr.t_infomask2;
569 2576076 : htup->t_infomask = xlhdr.t_infomask;
570 2576076 : htup->t_hoff = xlhdr.t_hoff;
571 2576076 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
572 2576076 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
573 2576076 : htup->t_ctid = target_tid;
574 :
575 2576076 : if (PageAddItem(page, (Item) htup, newlen, xlrec->offnum,
576 : true, true) == InvalidOffsetNumber)
577 0 : elog(PANIC, "failed to add tuple");
578 :
579 2576076 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
580 :
581 2576076 : PageSetLSN(page, lsn);
582 :
583 2576076 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
584 676 : PageClearAllVisible(page);
585 :
586 2576076 : MarkBufferDirty(buffer);
587 : }
588 2581762 : if (BufferIsValid(buffer))
589 2581762 : UnlockReleaseBuffer(buffer);
590 :
591 : /*
592 : * If the page is running low on free space, update the FSM as well.
593 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
594 : * better than that without knowing the fill-factor for the table.
595 : *
596 : * XXX: Don't do this if the page was restored from full page image. We
597 : * don't bother to update the FSM in that case, it doesn't need to be
598 : * totally accurate anyway.
599 : */
600 2581762 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
601 507454 : XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
602 2581762 : }
603 :
604 : /*
605 : * Replay XLOG_HEAP2_MULTI_INSERT records.
606 : */
607 : static void
608 115418 : heap_xlog_multi_insert(XLogReaderState *record)
609 : {
610 115418 : XLogRecPtr lsn = record->EndRecPtr;
611 : xl_heap_multi_insert *xlrec;
612 : RelFileLocator rlocator;
613 : BlockNumber blkno;
614 : Buffer buffer;
615 : Page page;
616 : union
617 : {
618 : HeapTupleHeaderData hdr;
619 : char data[MaxHeapTupleSize];
620 : } tbuf;
621 : HeapTupleHeader htup;
622 : uint32 newlen;
623 115418 : Size freespace = 0;
624 : int i;
625 115418 : bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
626 : XLogRedoAction action;
627 115418 : Buffer vmbuffer = InvalidBuffer;
628 :
629 : /*
630 : * Insertion doesn't overwrite MVCC data, so no conflict processing is
631 : * required.
632 : */
633 115418 : xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
634 :
635 115418 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
636 :
637 : /* check that the mutually exclusive flags are not both set */
638 : Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
639 : (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
640 :
641 : /*
642 : * The visibility map may need to be fixed even if the heap page is
643 : * already up-to-date.
644 : */
645 115418 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
646 : {
647 2302 : Relation reln = CreateFakeRelcacheEntry(rlocator);
648 :
649 2302 : visibilitymap_pin(reln, blkno, &vmbuffer);
650 2302 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
651 2302 : ReleaseBuffer(vmbuffer);
652 2302 : vmbuffer = InvalidBuffer;
653 2302 : FreeFakeRelcacheEntry(reln);
654 : }
655 :
656 115418 : if (isinit)
657 : {
658 3290 : buffer = XLogInitBufferForRedo(record, 0);
659 3290 : page = BufferGetPage(buffer);
660 3290 : PageInit(page, BufferGetPageSize(buffer), 0);
661 3290 : action = BLK_NEEDS_REDO;
662 : }
663 : else
664 112128 : action = XLogReadBufferForRedo(record, 0, &buffer);
665 115418 : if (action == BLK_NEEDS_REDO)
666 : {
667 : char *tupdata;
668 : char *endptr;
669 : Size len;
670 :
671 : /* Tuples are stored as block data */
672 111766 : tupdata = XLogRecGetBlockData(record, 0, &len);
673 111766 : endptr = tupdata + len;
674 :
675 111766 : page = BufferGetPage(buffer);
676 :
677 523250 : for (i = 0; i < xlrec->ntuples; i++)
678 : {
679 : OffsetNumber offnum;
680 : xl_multi_insert_tuple *xlhdr;
681 :
682 : /*
683 : * If we're reinitializing the page, the tuples are stored in
684 : * order from FirstOffsetNumber. Otherwise there's an array of
685 : * offsets in the WAL record, and the tuples come after that.
686 : */
687 411484 : if (isinit)
688 197674 : offnum = FirstOffsetNumber + i;
689 : else
690 213810 : offnum = xlrec->offsets[i];
691 411484 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
692 0 : elog(PANIC, "invalid max offset number");
693 :
694 411484 : xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
695 411484 : tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
696 :
697 411484 : newlen = xlhdr->datalen;
698 : Assert(newlen <= MaxHeapTupleSize);
699 411484 : htup = &tbuf.hdr;
700 411484 : MemSet(htup, 0, SizeofHeapTupleHeader);
701 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
702 411484 : memcpy((char *) htup + SizeofHeapTupleHeader,
703 : tupdata,
704 : newlen);
705 411484 : tupdata += newlen;
706 :
707 411484 : newlen += SizeofHeapTupleHeader;
708 411484 : htup->t_infomask2 = xlhdr->t_infomask2;
709 411484 : htup->t_infomask = xlhdr->t_infomask;
710 411484 : htup->t_hoff = xlhdr->t_hoff;
711 411484 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
712 411484 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
713 411484 : ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
714 411484 : ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
715 :
716 411484 : offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
717 411484 : if (offnum == InvalidOffsetNumber)
718 0 : elog(PANIC, "failed to add tuple");
719 : }
720 111766 : if (tupdata != endptr)
721 0 : elog(PANIC, "total tuple length mismatch");
722 :
723 111766 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
724 :
725 111766 : PageSetLSN(page, lsn);
726 :
727 111766 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
728 180 : PageClearAllVisible(page);
729 :
730 : /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
731 111766 : if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
732 8 : PageSetAllVisible(page);
733 :
734 111766 : MarkBufferDirty(buffer);
735 : }
736 115418 : if (BufferIsValid(buffer))
737 115418 : UnlockReleaseBuffer(buffer);
738 :
739 115418 : buffer = InvalidBuffer;
740 :
741 : /*
742 : * Read and update the visibility map (VM) block.
743 : *
744 : * We must always redo VM changes, even if the corresponding heap page
745 : * update was skipped due to the LSN interlock. Each VM block covers
746 : * multiple heap pages, so later WAL records may update other bits in the
747 : * same block. If this record includes an FPI (full-page image),
748 : * subsequent WAL records may depend on it to guard against torn pages.
749 : *
750 : * Heap page changes are replayed first to preserve the invariant:
751 : * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
752 : *
753 : * Note that we released the heap page lock above. During normal
754 : * operation, this would be unsafe — a concurrent modification could
755 : * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
756 : * invariant.
757 : *
758 : * During recovery, however, no concurrent writers exist. Therefore,
759 : * updating the VM without holding the heap page lock is safe enough. This
760 : * same approach is taken when replaying xl_heap_visible records (see
761 : * heap_xlog_visible()).
762 : */
763 115426 : if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
764 8 : XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
765 : &vmbuffer) == BLK_NEEDS_REDO)
766 : {
767 0 : Page vmpage = BufferGetPage(vmbuffer);
768 :
769 : /* initialize the page if it was read as zeros */
770 0 : if (PageIsNew(vmpage))
771 0 : PageInit(vmpage, BLCKSZ, 0);
772 :
773 0 : visibilitymap_set_vmbits(blkno,
774 : vmbuffer,
775 : VISIBILITYMAP_ALL_VISIBLE |
776 : VISIBILITYMAP_ALL_FROZEN,
777 : rlocator);
778 :
779 : Assert(BufferIsDirty(vmbuffer));
780 0 : PageSetLSN(vmpage, lsn);
781 : }
782 :
783 115418 : if (BufferIsValid(vmbuffer))
784 8 : UnlockReleaseBuffer(vmbuffer);
785 :
786 : /*
787 : * If the page is running low on free space, update the FSM as well.
788 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
789 : * better than that without knowing the fill-factor for the table.
790 : *
791 : * XXX: Don't do this if the page was restored from full page image. We
792 : * don't bother to update the FSM in that case, it doesn't need to be
793 : * totally accurate anyway.
794 : */
795 115418 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
796 33866 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
797 115418 : }
798 :
799 : /*
800 : * Replay XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE records.
801 : */
802 : static void
803 187376 : heap_xlog_update(XLogReaderState *record, bool hot_update)
804 : {
805 187376 : XLogRecPtr lsn = record->EndRecPtr;
806 187376 : xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
807 : RelFileLocator rlocator;
808 : BlockNumber oldblk;
809 : BlockNumber newblk;
810 : ItemPointerData newtid;
811 : Buffer obuffer,
812 : nbuffer;
813 : Page page;
814 : OffsetNumber offnum;
815 187376 : ItemId lp = NULL;
816 : HeapTupleData oldtup;
817 : HeapTupleHeader htup;
818 187376 : uint16 prefixlen = 0,
819 187376 : suffixlen = 0;
820 : char *newp;
821 : union
822 : {
823 : HeapTupleHeaderData hdr;
824 : char data[MaxHeapTupleSize];
825 : } tbuf;
826 : xl_heap_header xlhdr;
827 : uint32 newlen;
828 187376 : Size freespace = 0;
829 : XLogRedoAction oldaction;
830 : XLogRedoAction newaction;
831 :
832 : /* initialize to keep the compiler quiet */
833 187376 : oldtup.t_data = NULL;
834 187376 : oldtup.t_len = 0;
835 :
836 187376 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
837 187376 : if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
838 : {
839 : /* HOT updates are never done across pages */
840 : Assert(!hot_update);
841 : }
842 : else
843 78310 : oldblk = newblk;
844 :
845 187376 : ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
846 :
847 : /*
848 : * The visibility map may need to be fixed even if the heap page is
849 : * already up-to-date.
850 : */
851 187376 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
852 : {
853 462 : Relation reln = CreateFakeRelcacheEntry(rlocator);
854 462 : Buffer vmbuffer = InvalidBuffer;
855 :
856 462 : visibilitymap_pin(reln, oldblk, &vmbuffer);
857 462 : visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
858 462 : ReleaseBuffer(vmbuffer);
859 462 : FreeFakeRelcacheEntry(reln);
860 : }
861 :
862 : /*
863 : * In normal operation, it is important to lock the two pages in
864 : * page-number order, to avoid possible deadlocks against other update
865 : * operations going the other way. However, during WAL replay there can
866 : * be no other update happening, so we don't need to worry about that. But
867 : * we *do* need to worry that we don't expose an inconsistent state to Hot
868 : * Standby queries --- so the original page can't be unlocked before we've
869 : * added the new tuple to the new page.
870 : */
871 :
872 : /* Deal with old tuple version */
873 187376 : oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
874 : &obuffer);
875 187376 : if (oldaction == BLK_NEEDS_REDO)
876 : {
877 186730 : page = BufferGetPage(obuffer);
878 186730 : offnum = xlrec->old_offnum;
879 186730 : if (PageGetMaxOffsetNumber(page) >= offnum)
880 186730 : lp = PageGetItemId(page, offnum);
881 :
882 186730 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
883 0 : elog(PANIC, "invalid lp");
884 :
885 186730 : htup = (HeapTupleHeader) PageGetItem(page, lp);
886 :
887 186730 : oldtup.t_data = htup;
888 186730 : oldtup.t_len = ItemIdGetLength(lp);
889 :
890 186730 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
891 186730 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
892 186730 : if (hot_update)
893 72072 : HeapTupleHeaderSetHotUpdated(htup);
894 : else
895 114658 : HeapTupleHeaderClearHotUpdated(htup);
896 186730 : fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
897 : &htup->t_infomask2);
898 186730 : HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
899 186730 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
900 : /* Set forward chain link in t_ctid */
901 186730 : htup->t_ctid = newtid;
902 :
903 : /* Mark the page as a candidate for pruning */
904 186730 : PageSetPrunable(page, XLogRecGetXid(record));
905 :
906 186730 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
907 434 : PageClearAllVisible(page);
908 :
909 186730 : PageSetLSN(page, lsn);
910 186730 : MarkBufferDirty(obuffer);
911 : }
912 :
913 : /*
914 : * Read the page the new tuple goes into, if different from old.
915 : */
916 187376 : if (oldblk == newblk)
917 : {
918 78310 : nbuffer = obuffer;
919 78310 : newaction = oldaction;
920 : }
921 109066 : else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
922 : {
923 1124 : nbuffer = XLogInitBufferForRedo(record, 0);
924 1124 : page = BufferGetPage(nbuffer);
925 1124 : PageInit(page, BufferGetPageSize(nbuffer), 0);
926 1124 : newaction = BLK_NEEDS_REDO;
927 : }
928 : else
929 107942 : newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
930 :
931 : /*
932 : * The visibility map may need to be fixed even if the heap page is
933 : * already up-to-date.
934 : */
935 187376 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
936 : {
937 486 : Relation reln = CreateFakeRelcacheEntry(rlocator);
938 486 : Buffer vmbuffer = InvalidBuffer;
939 :
940 486 : visibilitymap_pin(reln, newblk, &vmbuffer);
941 486 : visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
942 486 : ReleaseBuffer(vmbuffer);
943 486 : FreeFakeRelcacheEntry(reln);
944 : }
945 :
946 : /* Deal with new tuple */
947 187376 : if (newaction == BLK_NEEDS_REDO)
948 : {
949 : char *recdata;
950 : char *recdata_end;
951 : Size datalen;
952 : Size tuplen;
953 :
954 186234 : recdata = XLogRecGetBlockData(record, 0, &datalen);
955 186234 : recdata_end = recdata + datalen;
956 :
957 186234 : page = BufferGetPage(nbuffer);
958 :
959 186234 : offnum = xlrec->new_offnum;
960 186234 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
961 0 : elog(PANIC, "invalid max offset number");
962 :
963 186234 : if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
964 : {
965 : Assert(newblk == oldblk);
966 30506 : memcpy(&prefixlen, recdata, sizeof(uint16));
967 30506 : recdata += sizeof(uint16);
968 : }
969 186234 : if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
970 : {
971 : Assert(newblk == oldblk);
972 67252 : memcpy(&suffixlen, recdata, sizeof(uint16));
973 67252 : recdata += sizeof(uint16);
974 : }
975 :
976 186234 : memcpy(&xlhdr, recdata, SizeOfHeapHeader);
977 186234 : recdata += SizeOfHeapHeader;
978 :
979 186234 : tuplen = recdata_end - recdata;
980 : Assert(tuplen <= MaxHeapTupleSize);
981 :
982 186234 : htup = &tbuf.hdr;
983 186234 : MemSet(htup, 0, SizeofHeapTupleHeader);
984 :
985 : /*
986 : * Reconstruct the new tuple using the prefix and/or suffix from the
987 : * old tuple, and the data stored in the WAL record.
988 : */
989 186234 : newp = (char *) htup + SizeofHeapTupleHeader;
990 186234 : if (prefixlen > 0)
991 : {
992 : int len;
993 :
994 : /* copy bitmap [+ padding] [+ oid] from WAL record */
995 30506 : len = xlhdr.t_hoff - SizeofHeapTupleHeader;
996 30506 : memcpy(newp, recdata, len);
997 30506 : recdata += len;
998 30506 : newp += len;
999 :
1000 : /* copy prefix from old tuple */
1001 30506 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
1002 30506 : newp += prefixlen;
1003 :
1004 : /* copy new tuple data from WAL record */
1005 30506 : len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
1006 30506 : memcpy(newp, recdata, len);
1007 30506 : recdata += len;
1008 30506 : newp += len;
1009 : }
1010 : else
1011 : {
1012 : /*
1013 : * copy bitmap [+ padding] [+ oid] + data from record, all in one
1014 : * go
1015 : */
1016 155728 : memcpy(newp, recdata, tuplen);
1017 155728 : recdata += tuplen;
1018 155728 : newp += tuplen;
1019 : }
1020 : Assert(recdata == recdata_end);
1021 :
1022 : /* copy suffix from old tuple */
1023 186234 : if (suffixlen > 0)
1024 67252 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
1025 :
1026 186234 : newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
1027 186234 : htup->t_infomask2 = xlhdr.t_infomask2;
1028 186234 : htup->t_infomask = xlhdr.t_infomask;
1029 186234 : htup->t_hoff = xlhdr.t_hoff;
1030 :
1031 186234 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
1032 186234 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
1033 186234 : HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
1034 : /* Make sure there is no forward chain link in t_ctid */
1035 186234 : htup->t_ctid = newtid;
1036 :
1037 186234 : offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
1038 186234 : if (offnum == InvalidOffsetNumber)
1039 0 : elog(PANIC, "failed to add tuple");
1040 :
1041 186234 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
1042 210 : PageClearAllVisible(page);
1043 :
1044 186234 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
1045 :
1046 186234 : PageSetLSN(page, lsn);
1047 186234 : MarkBufferDirty(nbuffer);
1048 : }
1049 :
1050 187376 : if (BufferIsValid(nbuffer) && nbuffer != obuffer)
1051 109066 : UnlockReleaseBuffer(nbuffer);
1052 187376 : if (BufferIsValid(obuffer))
1053 187376 : UnlockReleaseBuffer(obuffer);
1054 :
1055 : /*
1056 : * If the new page is running low on free space, update the FSM as well.
1057 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
1058 : * better than that without knowing the fill-factor for the table.
1059 : *
1060 : * However, don't update the FSM on HOT updates, because after crash
1061 : * recovery, either the old or the new tuple will certainly be dead and
1062 : * prunable. After pruning, the page will have roughly as much free space
1063 : * as it did before the update, assuming the new tuple is about the same
1064 : * size as the old one.
1065 : *
1066 : * XXX: Don't do this if the page was restored from full page image. We
1067 : * don't bother to update the FSM in that case, it doesn't need to be
1068 : * totally accurate anyway.
1069 : */
1070 187376 : if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
1071 23486 : XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
1072 187376 : }
1073 :
1074 : /*
1075 : * Replay XLOG_HEAP_CONFIRM records.
1076 : */
1077 : static void
1078 158 : heap_xlog_confirm(XLogReaderState *record)
1079 : {
1080 158 : XLogRecPtr lsn = record->EndRecPtr;
1081 158 : xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
1082 : Buffer buffer;
1083 : Page page;
1084 : OffsetNumber offnum;
1085 158 : ItemId lp = NULL;
1086 : HeapTupleHeader htup;
1087 :
1088 158 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1089 : {
1090 156 : page = BufferGetPage(buffer);
1091 :
1092 156 : offnum = xlrec->offnum;
1093 156 : if (PageGetMaxOffsetNumber(page) >= offnum)
1094 156 : lp = PageGetItemId(page, offnum);
1095 :
1096 156 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
1097 0 : elog(PANIC, "invalid lp");
1098 :
1099 156 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1100 :
1101 : /*
1102 : * Confirm tuple as actually inserted
1103 : */
1104 156 : ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
1105 :
1106 156 : PageSetLSN(page, lsn);
1107 156 : MarkBufferDirty(buffer);
1108 : }
1109 158 : if (BufferIsValid(buffer))
1110 158 : UnlockReleaseBuffer(buffer);
1111 158 : }
1112 :
1113 : /*
1114 : * Replay XLOG_HEAP_LOCK records.
1115 : */
1116 : static void
1117 110844 : heap_xlog_lock(XLogReaderState *record)
1118 : {
1119 110844 : XLogRecPtr lsn = record->EndRecPtr;
1120 110844 : xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
1121 : Buffer buffer;
1122 : Page page;
1123 : OffsetNumber offnum;
1124 110844 : ItemId lp = NULL;
1125 : HeapTupleHeader htup;
1126 :
1127 : /*
1128 : * The visibility map may need to be fixed even if the heap page is
1129 : * already up-to-date.
1130 : */
1131 110844 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1132 : {
1133 : RelFileLocator rlocator;
1134 132 : Buffer vmbuffer = InvalidBuffer;
1135 : BlockNumber block;
1136 : Relation reln;
1137 :
1138 132 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1139 132 : reln = CreateFakeRelcacheEntry(rlocator);
1140 :
1141 132 : visibilitymap_pin(reln, block, &vmbuffer);
1142 132 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1143 :
1144 132 : ReleaseBuffer(vmbuffer);
1145 132 : FreeFakeRelcacheEntry(reln);
1146 : }
1147 :
1148 110844 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1149 : {
1150 110422 : page = BufferGetPage(buffer);
1151 :
1152 110422 : offnum = xlrec->offnum;
1153 110422 : if (PageGetMaxOffsetNumber(page) >= offnum)
1154 110422 : lp = PageGetItemId(page, offnum);
1155 :
1156 110422 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
1157 0 : elog(PANIC, "invalid lp");
1158 :
1159 110422 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1160 :
1161 110422 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1162 110422 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1163 110422 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1164 : &htup->t_infomask2);
1165 :
1166 : /*
1167 : * Clear relevant update flags, but only if the modified infomask says
1168 : * there's no update.
1169 : */
1170 110422 : if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
1171 : {
1172 110422 : HeapTupleHeaderClearHotUpdated(htup);
1173 : /* Make sure there is no forward chain link in t_ctid */
1174 110422 : ItemPointerSet(&htup->t_ctid,
1175 : BufferGetBlockNumber(buffer),
1176 : offnum);
1177 : }
1178 110422 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1179 110422 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
1180 110422 : PageSetLSN(page, lsn);
1181 110422 : MarkBufferDirty(buffer);
1182 : }
1183 110844 : if (BufferIsValid(buffer))
1184 110844 : UnlockReleaseBuffer(buffer);
1185 110844 : }
1186 :
1187 : /*
1188 : * Replay XLOG_HEAP2_LOCK_UPDATED records.
1189 : */
1190 : static void
1191 0 : heap_xlog_lock_updated(XLogReaderState *record)
1192 : {
1193 0 : XLogRecPtr lsn = record->EndRecPtr;
1194 : xl_heap_lock_updated *xlrec;
1195 : Buffer buffer;
1196 : Page page;
1197 : OffsetNumber offnum;
1198 0 : ItemId lp = NULL;
1199 : HeapTupleHeader htup;
1200 :
1201 0 : xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
1202 :
1203 : /*
1204 : * The visibility map may need to be fixed even if the heap page is
1205 : * already up-to-date.
1206 : */
1207 0 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1208 : {
1209 : RelFileLocator rlocator;
1210 0 : Buffer vmbuffer = InvalidBuffer;
1211 : BlockNumber block;
1212 : Relation reln;
1213 :
1214 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1215 0 : reln = CreateFakeRelcacheEntry(rlocator);
1216 :
1217 0 : visibilitymap_pin(reln, block, &vmbuffer);
1218 0 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1219 :
1220 0 : ReleaseBuffer(vmbuffer);
1221 0 : FreeFakeRelcacheEntry(reln);
1222 : }
1223 :
1224 0 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1225 : {
1226 0 : page = BufferGetPage(buffer);
1227 :
1228 0 : offnum = xlrec->offnum;
1229 0 : if (PageGetMaxOffsetNumber(page) >= offnum)
1230 0 : lp = PageGetItemId(page, offnum);
1231 :
1232 0 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
1233 0 : elog(PANIC, "invalid lp");
1234 :
1235 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1236 :
1237 0 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1238 0 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1239 0 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1240 : &htup->t_infomask2);
1241 0 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1242 :
1243 0 : PageSetLSN(page, lsn);
1244 0 : MarkBufferDirty(buffer);
1245 : }
1246 0 : if (BufferIsValid(buffer))
1247 0 : UnlockReleaseBuffer(buffer);
1248 0 : }
1249 :
1250 : /*
1251 : * Replay XLOG_HEAP_INPLACE records.
1252 : */
1253 : static void
1254 15366 : heap_xlog_inplace(XLogReaderState *record)
1255 : {
1256 15366 : XLogRecPtr lsn = record->EndRecPtr;
1257 15366 : xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
1258 : Buffer buffer;
1259 : Page page;
1260 : OffsetNumber offnum;
1261 15366 : ItemId lp = NULL;
1262 : HeapTupleHeader htup;
1263 : uint32 oldlen;
1264 : Size newlen;
1265 :
1266 15366 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1267 : {
1268 14974 : char *newtup = XLogRecGetBlockData(record, 0, &newlen);
1269 :
1270 14974 : page = BufferGetPage(buffer);
1271 :
1272 14974 : offnum = xlrec->offnum;
1273 14974 : if (PageGetMaxOffsetNumber(page) >= offnum)
1274 14974 : lp = PageGetItemId(page, offnum);
1275 :
1276 14974 : if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
1277 0 : elog(PANIC, "invalid lp");
1278 :
1279 14974 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1280 :
1281 14974 : oldlen = ItemIdGetLength(lp) - htup->t_hoff;
1282 14974 : if (oldlen != newlen)
1283 0 : elog(PANIC, "wrong tuple length");
1284 :
1285 14974 : memcpy((char *) htup + htup->t_hoff, newtup, newlen);
1286 :
1287 14974 : PageSetLSN(page, lsn);
1288 14974 : MarkBufferDirty(buffer);
1289 : }
1290 15366 : if (BufferIsValid(buffer))
1291 15366 : UnlockReleaseBuffer(buffer);
1292 :
1293 15366 : ProcessCommittedInvalidationMessages(xlrec->msgs,
1294 : xlrec->nmsgs,
1295 15366 : xlrec->relcacheInitFileInval,
1296 : xlrec->dbId,
1297 : xlrec->tsId);
1298 15366 : }
1299 :
1300 : void
1301 3495582 : heap_redo(XLogReaderState *record)
1302 : {
1303 3495582 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1304 :
1305 : /*
1306 : * These operations don't overwrite MVCC data so no conflict processing is
1307 : * required. The ones in heap2 rmgr do.
1308 : */
1309 :
1310 3495582 : switch (info & XLOG_HEAP_OPMASK)
1311 : {
1312 2581762 : case XLOG_HEAP_INSERT:
1313 2581762 : heap_xlog_insert(record);
1314 2581762 : break;
1315 600072 : case XLOG_HEAP_DELETE:
1316 600072 : heap_xlog_delete(record);
1317 600072 : break;
1318 114748 : case XLOG_HEAP_UPDATE:
1319 114748 : heap_xlog_update(record, false);
1320 114748 : break;
1321 4 : case XLOG_HEAP_TRUNCATE:
1322 :
1323 : /*
1324 : * TRUNCATE is a no-op because the actions are already logged as
1325 : * SMGR WAL records. TRUNCATE WAL record only exists for logical
1326 : * decoding.
1327 : */
1328 4 : break;
1329 72628 : case XLOG_HEAP_HOT_UPDATE:
1330 72628 : heap_xlog_update(record, true);
1331 72628 : break;
1332 158 : case XLOG_HEAP_CONFIRM:
1333 158 : heap_xlog_confirm(record);
1334 158 : break;
1335 110844 : case XLOG_HEAP_LOCK:
1336 110844 : heap_xlog_lock(record);
1337 110844 : break;
1338 15366 : case XLOG_HEAP_INPLACE:
1339 15366 : heap_xlog_inplace(record);
1340 15366 : break;
1341 0 : default:
1342 0 : elog(PANIC, "heap_redo: unknown op code %u", info);
1343 : }
1344 3495582 : }
1345 :
1346 : void
1347 155166 : heap2_redo(XLogReaderState *record)
1348 : {
1349 155166 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1350 :
1351 155166 : switch (info & XLOG_HEAP_OPMASK)
1352 : {
1353 28428 : case XLOG_HEAP2_PRUNE_ON_ACCESS:
1354 : case XLOG_HEAP2_PRUNE_VACUUM_SCAN:
1355 : case XLOG_HEAP2_PRUNE_VACUUM_CLEANUP:
1356 28428 : heap_xlog_prune_freeze(record);
1357 28428 : break;
1358 9320 : case XLOG_HEAP2_VISIBLE:
1359 9320 : heap_xlog_visible(record);
1360 9320 : break;
1361 115418 : case XLOG_HEAP2_MULTI_INSERT:
1362 115418 : heap_xlog_multi_insert(record);
1363 115418 : break;
1364 0 : case XLOG_HEAP2_LOCK_UPDATED:
1365 0 : heap_xlog_lock_updated(record);
1366 0 : break;
1367 2000 : case XLOG_HEAP2_NEW_CID:
1368 :
1369 : /*
1370 : * Nothing to do on a real replay, only used during logical
1371 : * decoding.
1372 : */
1373 2000 : break;
1374 0 : case XLOG_HEAP2_REWRITE:
1375 0 : heap_xlog_logical_rewrite(record);
1376 0 : break;
1377 0 : default:
1378 0 : elog(PANIC, "heap2_redo: unknown op code %u", info);
1379 : }
1380 155166 : }
1381 :
1382 : /*
1383 : * Mask a heap page before performing consistency checks on it.
1384 : */
1385 : void
1386 5843248 : heap_mask(char *pagedata, BlockNumber blkno)
1387 : {
1388 5843248 : Page page = (Page) pagedata;
1389 : OffsetNumber off;
1390 :
1391 5843248 : mask_page_lsn_and_checksum(page);
1392 :
1393 5843248 : mask_page_hint_bits(page);
1394 5843248 : mask_unused_space(page);
1395 :
1396 481168904 : for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
1397 : {
1398 475325656 : ItemId iid = PageGetItemId(page, off);
1399 : char *page_item;
1400 :
1401 475325656 : page_item = (char *) (page + ItemIdGetOffset(iid));
1402 :
1403 475325656 : if (ItemIdIsNormal(iid))
1404 : {
1405 448765308 : HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
1406 :
1407 : /*
1408 : * If xmin of a tuple is not yet frozen, we should ignore
1409 : * differences in hint bits, since they can be set without
1410 : * emitting WAL.
1411 : */
1412 448765308 : if (!HeapTupleHeaderXminFrozen(page_htup))
1413 441508488 : page_htup->t_infomask &= ~HEAP_XACT_MASK;
1414 : else
1415 : {
1416 : /* Still we need to mask xmax hint bits. */
1417 7256820 : page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
1418 7256820 : page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
1419 : }
1420 :
1421 : /*
1422 : * During replay, we set Command Id to FirstCommandId. Hence, mask
1423 : * it. See heap_xlog_insert() for details.
1424 : */
1425 448765308 : page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
1426 :
1427 : /*
1428 : * For a speculative tuple, heap_insert() does not set ctid in the
1429 : * caller-passed heap tuple itself, leaving the ctid field to
1430 : * contain a speculative token value - a per-backend monotonically
1431 : * increasing identifier. Besides, it does not WAL-log ctid under
1432 : * any circumstances.
1433 : *
1434 : * During redo, heap_xlog_insert() sets t_ctid to current block
1435 : * number and self offset number. It doesn't care about any
1436 : * speculative insertions on the primary. Hence, we set t_ctid to
1437 : * current block number and self offset number to ignore any
1438 : * inconsistency.
1439 : */
1440 448765308 : if (HeapTupleHeaderIsSpeculative(page_htup))
1441 160 : ItemPointerSet(&page_htup->t_ctid, blkno, off);
1442 :
1443 : /*
1444 : * NB: Not ignoring ctid changes due to the tuple having moved
1445 : * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
1446 : * important information that needs to be in-sync between primary
1447 : * and standby, and thus is WAL logged.
1448 : */
1449 : }
1450 :
1451 : /*
1452 : * Ignore any padding bytes after the tuple, when the length of the
1453 : * item is not MAXALIGNed.
1454 : */
1455 475325656 : if (ItemIdHasStorage(iid))
1456 : {
1457 448765308 : int len = ItemIdGetLength(iid);
1458 448765308 : int padlen = MAXALIGN(len) - len;
1459 :
1460 448765308 : if (padlen > 0)
1461 239065244 : memset(page_item + len, MASK_MARKER, padlen);
1462 : }
1463 : }
1464 5843248 : }
|