Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * heapam_xlog.c
4 : * WAL replay logic for heap access method.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/heap/heapam_xlog.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/bufmask.h"
18 : #include "access/heapam.h"
19 : #include "access/visibilitymap.h"
20 : #include "access/xlog.h"
21 : #include "access/xlogutils.h"
22 : #include "storage/freespace.h"
23 : #include "storage/standby.h"
24 :
25 :
26 : /*
27 : * Replay XLOG_HEAP2_PRUNE_* records.
28 : */
29 : static void
30 19838 : heap_xlog_prune_freeze(XLogReaderState *record)
31 : {
32 19838 : XLogRecPtr lsn = record->EndRecPtr;
33 19838 : char *maindataptr = XLogRecGetData(record);
34 : xl_heap_prune xlrec;
35 : Buffer buffer;
36 : RelFileLocator rlocator;
37 : BlockNumber blkno;
38 19838 : Buffer vmbuffer = InvalidBuffer;
39 19838 : uint8 vmflags = 0;
40 19838 : Size freespace = 0;
41 19838 : bool do_update_fsm = false;
42 :
43 19838 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
44 19838 : memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
45 19838 : maindataptr += SizeOfHeapPrune;
46 :
47 : /*
48 : * We will take an ordinary exclusive lock or a cleanup lock depending on
49 : * whether the XLHP_CLEANUP_LOCK flag is set. With an ordinary exclusive
50 : * lock, we better not be doing anything that requires moving existing
51 : * tuple data.
52 : */
53 : Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
54 : (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
55 :
56 19838 : if (xlrec.flags & XLHP_VM_ALL_VISIBLE)
57 : {
58 9795 : vmflags = VISIBILITYMAP_ALL_VISIBLE;
59 9795 : if (xlrec.flags & XLHP_VM_ALL_FROZEN)
60 5835 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
61 : }
62 :
63 : /*
64 : * After xl_heap_prune is the optional snapshot conflict horizon.
65 : *
66 : * In Hot Standby mode, we must ensure that there are no running queries
67 : * which would conflict with the changes in this record. That means we
68 : * can't replay this record if it removes tuples that are still visible to
69 : * transactions on the standby, freeze tuples with xids that are still
70 : * considered running on the standby, or set a page as all-visible in the
71 : * VM if it isn't all-visible to all transactions on the standby.
72 : */
73 19838 : if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
74 : {
75 : TransactionId snapshot_conflict_horizon;
76 :
77 : /* memcpy() because snapshot_conflict_horizon is stored unaligned */
78 15801 : memcpy(&snapshot_conflict_horizon, maindataptr, sizeof(TransactionId));
79 15801 : maindataptr += sizeof(TransactionId);
80 :
81 15801 : if (InHotStandby)
82 15351 : ResolveRecoveryConflictWithSnapshot(snapshot_conflict_horizon,
83 15351 : (xlrec.flags & XLHP_IS_CATALOG_REL) != 0,
84 : rlocator);
85 : }
86 :
87 : /*
88 : * If we have a full-page image of the heap block, restore it and we're
89 : * done with the heap block.
90 : */
91 19838 : if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
92 19838 : (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
93 : &buffer) == BLK_NEEDS_REDO)
94 : {
95 13545 : Page page = BufferGetPage(buffer);
96 : OffsetNumber *redirected;
97 : OffsetNumber *nowdead;
98 : OffsetNumber *nowunused;
99 : int nredirected;
100 : int ndead;
101 : int nunused;
102 : int nplans;
103 : Size datalen;
104 : xlhp_freeze_plan *plans;
105 : OffsetNumber *frz_offsets;
106 13545 : char *dataptr = XLogRecGetBlockData(record, 0, &datalen);
107 : bool do_prune;
108 :
109 13545 : heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
110 : &nplans, &plans, &frz_offsets,
111 : &nredirected, &redirected,
112 : &ndead, &nowdead,
113 : &nunused, &nowunused);
114 :
115 13545 : do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
116 :
117 : /* Ensure the record does something */
118 : Assert(do_prune || nplans > 0 || vmflags & VISIBILITYMAP_VALID_BITS);
119 :
120 : /*
121 : * Update all line pointers per the record, and repair fragmentation
122 : * if needed.
123 : */
124 13545 : if (do_prune)
125 11050 : heap_page_prune_execute(buffer,
126 11050 : (xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
127 : redirected, nredirected,
128 : nowdead, ndead,
129 : nowunused, nunused);
130 :
131 : /* Freeze tuples */
132 14661 : for (int p = 0; p < nplans; p++)
133 : {
134 : HeapTupleFreeze frz;
135 :
136 : /*
137 : * Convert freeze plan representation from WAL record into
138 : * per-tuple format used by heap_execute_freeze_tuple
139 : */
140 1116 : frz.xmax = plans[p].xmax;
141 1116 : frz.t_infomask2 = plans[p].t_infomask2;
142 1116 : frz.t_infomask = plans[p].t_infomask;
143 1116 : frz.frzflags = plans[p].frzflags;
144 1116 : frz.offset = InvalidOffsetNumber; /* unused, but be tidy */
145 :
146 50853 : for (int i = 0; i < plans[p].ntuples; i++)
147 : {
148 49737 : OffsetNumber offset = *(frz_offsets++);
149 : ItemId lp;
150 : HeapTupleHeader tuple;
151 :
152 49737 : lp = PageGetItemId(page, offset);
153 49737 : tuple = (HeapTupleHeader) PageGetItem(page, lp);
154 49737 : heap_execute_freeze_tuple(tuple, &frz);
155 : }
156 : }
157 :
158 : /* There should be no more data */
159 : Assert((char *) frz_offsets == dataptr + datalen);
160 :
161 : /*
162 : * The critical integrity requirement here is that we must never end
163 : * up with the visibility map bit set and the page-level
164 : * PD_ALL_VISIBLE bit unset. If that were to occur, a subsequent page
165 : * modification would fail to clear the visibility map bit.
166 : */
167 13545 : if (vmflags & VISIBILITYMAP_VALID_BITS)
168 : {
169 5310 : PageSetAllVisible(page);
170 5310 : PageClearPrunable(page);
171 : }
172 :
173 13545 : MarkBufferDirty(buffer);
174 :
175 : /*
176 : * See log_heap_prune_and_freeze() for commentary on when we set the
177 : * heap page LSN.
178 : */
179 13545 : if (do_prune || nplans > 0 ||
180 1937 : ((vmflags & VISIBILITYMAP_VALID_BITS) && XLogHintBitIsNeeded()))
181 13545 : PageSetLSN(page, lsn);
182 :
183 : /*
184 : * Note: we don't worry about updating the page's prunability hints.
185 : * At worst this will cause an extra prune cycle to occur soon.
186 : */
187 : }
188 :
189 : /*
190 : * If we 1) released any space or line pointers or 2) set PD_ALL_VISIBLE
191 : * or the VM, update the freespace map.
192 : *
193 : * Even when no actual space is freed (when only marking the page
194 : * all-visible or frozen), we still update the FSM. Because the FSM is
195 : * unlogged and maintained heuristically, it often becomes stale on
196 : * standbys. If such a standby is later promoted and runs VACUUM, it will
197 : * skip recalculating free space for pages that were marked
198 : * all-visible/all-frozen. FreeSpaceMapVacuum() can then propagate overly
199 : * optimistic free space values upward, causing future insertions to
200 : * select pages that turn out to be unusable. In bulk, this can lead to
201 : * long stalls.
202 : *
203 : * To prevent this, always update the FSM even when only marking a page
204 : * all-visible/all-frozen.
205 : *
206 : * Do this regardless of whether a full-page image is logged, since FSM
207 : * data is not part of the page itself.
208 : */
209 19838 : if (BufferIsValid(buffer))
210 : {
211 19838 : if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
212 : XLHP_HAS_DEAD_ITEMS |
213 5801 : XLHP_HAS_NOW_UNUSED_ITEMS)) ||
214 5801 : (vmflags & VISIBILITYMAP_VALID_BITS))
215 : {
216 19821 : freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
217 19821 : do_update_fsm = true;
218 : }
219 :
220 : /*
221 : * We want to avoid holding an exclusive lock on the heap buffer while
222 : * doing IO (either of the FSM or the VM), so we'll release it now.
223 : */
224 19838 : UnlockReleaseBuffer(buffer);
225 : }
226 :
227 : /*
228 : * Now read and update the VM block.
229 : *
230 : * We must redo changes to the VM even if the heap page was skipped due to
231 : * LSN interlock. See comment in heap_xlog_multi_insert() for more details
232 : * on replaying changes to the VM.
233 : */
234 29633 : if ((vmflags & VISIBILITYMAP_VALID_BITS) &&
235 9795 : XLogReadBufferForRedoExtended(record, 1,
236 : RBM_ZERO_ON_ERROR,
237 : false,
238 : &vmbuffer) == BLK_NEEDS_REDO)
239 : {
240 9431 : Page vmpage = BufferGetPage(vmbuffer);
241 :
242 : /* initialize the page if it was read as zeros */
243 9431 : if (PageIsNew(vmpage))
244 2 : PageInit(vmpage, BLCKSZ, 0);
245 :
246 9431 : visibilitymap_set(blkno, vmbuffer, vmflags, rlocator);
247 :
248 : Assert(BufferIsDirty(vmbuffer));
249 9431 : PageSetLSN(vmpage, lsn);
250 : }
251 :
252 19838 : if (BufferIsValid(vmbuffer))
253 9795 : UnlockReleaseBuffer(vmbuffer);
254 :
255 19838 : if (do_update_fsm)
256 19821 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
257 19838 : }
258 :
259 : /*
260 : * Given an "infobits" field from an XLog record, set the correct bits in the
261 : * given infomask and infomask2 for the tuple touched by the record.
262 : *
263 : * (This is the reverse of compute_infobits).
264 : */
265 : static void
266 467379 : fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
267 : {
268 467379 : *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
269 : HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
270 467379 : *infomask2 &= ~HEAP_KEYS_UPDATED;
271 :
272 467379 : if (infobits & XLHL_XMAX_IS_MULTI)
273 2 : *infomask |= HEAP_XMAX_IS_MULTI;
274 467379 : if (infobits & XLHL_XMAX_LOCK_ONLY)
275 55546 : *infomask |= HEAP_XMAX_LOCK_ONLY;
276 467379 : if (infobits & XLHL_XMAX_EXCL_LOCK)
277 54780 : *infomask |= HEAP_XMAX_EXCL_LOCK;
278 : /* note HEAP_XMAX_SHR_LOCK isn't considered here */
279 467379 : if (infobits & XLHL_XMAX_KEYSHR_LOCK)
280 779 : *infomask |= HEAP_XMAX_KEYSHR_LOCK;
281 :
282 467379 : if (infobits & XLHL_KEYS_UPDATED)
283 317240 : *infomask2 |= HEAP_KEYS_UPDATED;
284 467379 : }
285 :
286 : /*
287 : * Replay XLOG_HEAP_DELETE records.
288 : */
289 : static void
290 317917 : heap_xlog_delete(XLogReaderState *record)
291 : {
292 317917 : XLogRecPtr lsn = record->EndRecPtr;
293 317917 : xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
294 : Buffer buffer;
295 : Page page;
296 : ItemId lp;
297 : HeapTupleHeader htup;
298 : BlockNumber blkno;
299 : RelFileLocator target_locator;
300 : ItemPointerData target_tid;
301 :
302 317917 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
303 317917 : ItemPointerSetBlockNumber(&target_tid, blkno);
304 317917 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
305 :
306 : /*
307 : * The visibility map may need to be fixed even if the heap page is
308 : * already up-to-date.
309 : */
310 317917 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
311 : {
312 41 : Relation reln = CreateFakeRelcacheEntry(target_locator);
313 41 : Buffer vmbuffer = InvalidBuffer;
314 :
315 41 : visibilitymap_pin(reln, blkno, &vmbuffer);
316 41 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
317 41 : ReleaseBuffer(vmbuffer);
318 41 : FreeFakeRelcacheEntry(reln);
319 : }
320 :
321 317917 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
322 : {
323 315821 : page = BufferGetPage(buffer);
324 :
325 315821 : if (xlrec->offnum < 1 || xlrec->offnum > PageGetMaxOffsetNumber(page))
326 0 : elog(PANIC, "offnum out of range");
327 315821 : lp = PageGetItemId(page, xlrec->offnum);
328 315821 : if (!ItemIdIsNormal(lp))
329 0 : elog(PANIC, "invalid lp");
330 :
331 315821 : htup = (HeapTupleHeader) PageGetItem(page, lp);
332 :
333 315821 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
334 315821 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
335 315821 : HeapTupleHeaderClearHotUpdated(htup);
336 315821 : fix_infomask_from_infobits(xlrec->infobits_set,
337 : &htup->t_infomask, &htup->t_infomask2);
338 315821 : if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
339 315821 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
340 : else
341 0 : HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
342 315821 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
343 :
344 : /* Mark the page as a candidate for pruning */
345 315821 : PageSetPrunable(page, XLogRecGetXid(record));
346 :
347 315821 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
348 20 : PageClearAllVisible(page);
349 :
350 : /* Make sure t_ctid is set correctly */
351 315821 : if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
352 151 : HeapTupleHeaderSetMovedPartitions(htup);
353 : else
354 315670 : htup->t_ctid = target_tid;
355 315821 : PageSetLSN(page, lsn);
356 315821 : MarkBufferDirty(buffer);
357 : }
358 317917 : if (BufferIsValid(buffer))
359 317917 : UnlockReleaseBuffer(buffer);
360 317917 : }
361 :
362 : /*
363 : * Replay XLOG_HEAP_INSERT records.
364 : */
365 : static void
366 1323226 : heap_xlog_insert(XLogReaderState *record)
367 : {
368 1323226 : XLogRecPtr lsn = record->EndRecPtr;
369 1323226 : xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
370 : Buffer buffer;
371 : Page page;
372 : union
373 : {
374 : HeapTupleHeaderData hdr;
375 : char data[MaxHeapTupleSize];
376 : } tbuf;
377 : HeapTupleHeader htup;
378 : xl_heap_header xlhdr;
379 : uint32 newlen;
380 1323226 : Size freespace = 0;
381 : RelFileLocator target_locator;
382 : BlockNumber blkno;
383 : ItemPointerData target_tid;
384 : XLogRedoAction action;
385 :
386 1323226 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
387 1323226 : ItemPointerSetBlockNumber(&target_tid, blkno);
388 1323226 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
389 :
390 : /* No freezing in the heap_insert() code path */
391 : Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
392 :
393 : /*
394 : * The visibility map may need to be fixed even if the heap page is
395 : * already up-to-date.
396 : */
397 1323226 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
398 : {
399 925 : Relation reln = CreateFakeRelcacheEntry(target_locator);
400 925 : Buffer vmbuffer = InvalidBuffer;
401 :
402 925 : visibilitymap_pin(reln, blkno, &vmbuffer);
403 925 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
404 925 : ReleaseBuffer(vmbuffer);
405 925 : FreeFakeRelcacheEntry(reln);
406 : }
407 :
408 : /*
409 : * If we inserted the first and only tuple on the page, re-initialize the
410 : * page from scratch.
411 : */
412 1323226 : if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
413 : {
414 17748 : buffer = XLogInitBufferForRedo(record, 0);
415 17748 : page = BufferGetPage(buffer);
416 17748 : PageInit(page, BufferGetPageSize(buffer), 0);
417 17748 : action = BLK_NEEDS_REDO;
418 : }
419 : else
420 1305478 : action = XLogReadBufferForRedo(record, 0, &buffer);
421 1323226 : if (action == BLK_NEEDS_REDO)
422 : {
423 : Size datalen;
424 : char *data;
425 :
426 1320102 : page = BufferGetPage(buffer);
427 :
428 1320102 : if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
429 0 : elog(PANIC, "invalid max offset number");
430 :
431 1320102 : data = XLogRecGetBlockData(record, 0, &datalen);
432 :
433 1320102 : newlen = datalen - SizeOfHeapHeader;
434 : Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
435 1320102 : memcpy(&xlhdr, data, SizeOfHeapHeader);
436 1320102 : data += SizeOfHeapHeader;
437 :
438 1320102 : htup = &tbuf.hdr;
439 1320102 : MemSet(htup, 0, SizeofHeapTupleHeader);
440 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
441 1320102 : memcpy((char *) htup + SizeofHeapTupleHeader,
442 : data,
443 : newlen);
444 1320102 : newlen += SizeofHeapTupleHeader;
445 1320102 : htup->t_infomask2 = xlhdr.t_infomask2;
446 1320102 : htup->t_infomask = xlhdr.t_infomask;
447 1320102 : htup->t_hoff = xlhdr.t_hoff;
448 1320102 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
449 1320102 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
450 1320102 : htup->t_ctid = target_tid;
451 :
452 1320102 : if (PageAddItem(page, htup, newlen, xlrec->offnum, true, true) == InvalidOffsetNumber)
453 0 : elog(PANIC, "failed to add tuple");
454 :
455 1320102 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
456 :
457 : /*
458 : * Set the page prunable to trigger on-access pruning later, which may
459 : * set the page all-visible in the VM. See comments in heap_insert().
460 : */
461 1320102 : if (TransactionIdIsNormal(XLogRecGetXid(record)) &&
462 1320102 : !HeapTupleHeaderXminFrozen(htup))
463 1319572 : PageSetPrunable(page, XLogRecGetXid(record));
464 :
465 1320102 : PageSetLSN(page, lsn);
466 :
467 1320102 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
468 288 : PageClearAllVisible(page);
469 :
470 1320102 : MarkBufferDirty(buffer);
471 : }
472 1323226 : if (BufferIsValid(buffer))
473 1323226 : UnlockReleaseBuffer(buffer);
474 :
475 : /*
476 : * If the page is running low on free space, update the FSM as well.
477 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
478 : * better than that without knowing the fill-factor for the table.
479 : *
480 : * XXX: Don't do this if the page was restored from full page image. We
481 : * don't bother to update the FSM in that case, it doesn't need to be
482 : * totally accurate anyway.
483 : */
484 1323226 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
485 259627 : XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
486 1323226 : }
487 :
488 : /*
489 : * Replay XLOG_HEAP2_MULTI_INSERT records.
490 : */
491 : static void
492 67228 : heap_xlog_multi_insert(XLogReaderState *record)
493 : {
494 67228 : XLogRecPtr lsn = record->EndRecPtr;
495 : xl_heap_multi_insert *xlrec;
496 : RelFileLocator rlocator;
497 : BlockNumber blkno;
498 : Buffer buffer;
499 : Page page;
500 : union
501 : {
502 : HeapTupleHeaderData hdr;
503 : char data[MaxHeapTupleSize];
504 : } tbuf;
505 : HeapTupleHeader htup;
506 : uint32 newlen;
507 67228 : Size freespace = 0;
508 : int i;
509 67228 : bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
510 : XLogRedoAction action;
511 67228 : Buffer vmbuffer = InvalidBuffer;
512 :
513 : /*
514 : * Insertion doesn't overwrite MVCC data, so no conflict processing is
515 : * required.
516 : */
517 67228 : xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
518 :
519 67228 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
520 :
521 : /* check that the mutually exclusive flags are not both set */
522 : Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
523 : (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
524 :
525 : /*
526 : * The visibility map may need to be fixed even if the heap page is
527 : * already up-to-date.
528 : */
529 67228 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
530 : {
531 921 : Relation reln = CreateFakeRelcacheEntry(rlocator);
532 :
533 921 : visibilitymap_pin(reln, blkno, &vmbuffer);
534 921 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
535 921 : ReleaseBuffer(vmbuffer);
536 921 : vmbuffer = InvalidBuffer;
537 921 : FreeFakeRelcacheEntry(reln);
538 : }
539 :
540 67228 : if (isinit)
541 : {
542 1916 : buffer = XLogInitBufferForRedo(record, 0);
543 1916 : page = BufferGetPage(buffer);
544 1916 : PageInit(page, BufferGetPageSize(buffer), 0);
545 1916 : action = BLK_NEEDS_REDO;
546 : }
547 : else
548 65312 : action = XLogReadBufferForRedo(record, 0, &buffer);
549 67228 : if (action == BLK_NEEDS_REDO)
550 : {
551 : char *tupdata;
552 : char *endptr;
553 : Size len;
554 :
555 : /* Tuples are stored as block data */
556 65517 : tupdata = XLogRecGetBlockData(record, 0, &len);
557 65517 : endptr = tupdata + len;
558 :
559 65517 : page = BufferGetPage(buffer);
560 :
561 287989 : for (i = 0; i < xlrec->ntuples; i++)
562 : {
563 : OffsetNumber offnum;
564 : xl_multi_insert_tuple *xlhdr;
565 :
566 : /*
567 : * If we're reinitializing the page, the tuples are stored in
568 : * order from FirstOffsetNumber. Otherwise there's an array of
569 : * offsets in the WAL record, and the tuples come after that.
570 : */
571 222472 : if (isinit)
572 99853 : offnum = FirstOffsetNumber + i;
573 : else
574 122619 : offnum = xlrec->offsets[i];
575 222472 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
576 0 : elog(PANIC, "invalid max offset number");
577 :
578 222472 : xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
579 222472 : tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
580 :
581 222472 : newlen = xlhdr->datalen;
582 : Assert(newlen <= MaxHeapTupleSize);
583 222472 : htup = &tbuf.hdr;
584 222472 : MemSet(htup, 0, SizeofHeapTupleHeader);
585 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
586 222472 : memcpy((char *) htup + SizeofHeapTupleHeader,
587 : tupdata,
588 : newlen);
589 222472 : tupdata += newlen;
590 :
591 222472 : newlen += SizeofHeapTupleHeader;
592 222472 : htup->t_infomask2 = xlhdr->t_infomask2;
593 222472 : htup->t_infomask = xlhdr->t_infomask;
594 222472 : htup->t_hoff = xlhdr->t_hoff;
595 222472 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
596 222472 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
597 222472 : ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
598 222472 : ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
599 :
600 222472 : offnum = PageAddItem(page, htup, newlen, offnum, true, true);
601 222472 : if (offnum == InvalidOffsetNumber)
602 0 : elog(PANIC, "failed to add tuple");
603 : }
604 65517 : if (tupdata != endptr)
605 0 : elog(PANIC, "total tuple length mismatch");
606 :
607 65517 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
608 :
609 65517 : PageSetLSN(page, lsn);
610 :
611 65517 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
612 85 : PageClearAllVisible(page);
613 :
614 : /*
615 : * XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible. If
616 : * we are not setting the page frozen, then set the page's prunable
617 : * hint so that we trigger on-access pruning later which may set the
618 : * page all-visible in the VM.
619 : */
620 65517 : if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
621 : {
622 4 : PageSetAllVisible(page);
623 4 : PageClearPrunable(page);
624 : }
625 : else
626 65513 : PageSetPrunable(page, XLogRecGetXid(record));
627 :
628 65517 : MarkBufferDirty(buffer);
629 : }
630 67228 : if (BufferIsValid(buffer))
631 67228 : UnlockReleaseBuffer(buffer);
632 :
633 67228 : buffer = InvalidBuffer;
634 :
635 : /*
636 : * Read and update the visibility map (VM) block.
637 : *
638 : * We must always redo VM changes, even if the corresponding heap page
639 : * update was skipped due to the LSN interlock. Each VM block covers
640 : * multiple heap pages, so later WAL records may update other bits in the
641 : * same block. If this record includes an FPI (full-page image),
642 : * subsequent WAL records may depend on it to guard against torn pages.
643 : *
644 : * Heap page changes are replayed first to preserve the invariant:
645 : * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
646 : *
647 : * Note that we released the heap page lock above. During normal
648 : * operation, this would be unsafe — a concurrent modification could
649 : * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
650 : * invariant.
651 : *
652 : * During recovery, however, no concurrent writers exist. Therefore,
653 : * updating the VM without holding the heap page lock is safe enough. This
654 : * same approach is taken when replaying XLOG_HEAP2_PRUNE* records (see
655 : * heap_xlog_prune_freeze()).
656 : */
657 67232 : if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
658 4 : XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
659 : &vmbuffer) == BLK_NEEDS_REDO)
660 : {
661 0 : Page vmpage = BufferGetPage(vmbuffer);
662 :
663 : /* initialize the page if it was read as zeros */
664 0 : if (PageIsNew(vmpage))
665 0 : PageInit(vmpage, BLCKSZ, 0);
666 :
667 0 : visibilitymap_set(blkno,
668 : vmbuffer,
669 : VISIBILITYMAP_ALL_VISIBLE |
670 : VISIBILITYMAP_ALL_FROZEN,
671 : rlocator);
672 :
673 : Assert(BufferIsDirty(vmbuffer));
674 0 : PageSetLSN(vmpage, lsn);
675 : }
676 :
677 67228 : if (BufferIsValid(vmbuffer))
678 4 : UnlockReleaseBuffer(vmbuffer);
679 :
680 : /*
681 : * If the page is running low on free space, update the FSM as well.
682 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
683 : * better than that without knowing the fill-factor for the table.
684 : *
685 : * XXX: Don't do this if the page was restored from full page image. We
686 : * don't bother to update the FSM in that case, it doesn't need to be
687 : * totally accurate anyway.
688 : */
689 67228 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
690 17618 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
691 67228 : }
692 :
693 : /*
694 : * Replay XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE records.
695 : */
696 : static void
697 96363 : heap_xlog_update(XLogReaderState *record, bool hot_update)
698 : {
699 96363 : XLogRecPtr lsn = record->EndRecPtr;
700 96363 : xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
701 : RelFileLocator rlocator;
702 : BlockNumber oldblk;
703 : BlockNumber newblk;
704 : ItemPointerData newtid;
705 : Buffer obuffer,
706 : nbuffer;
707 : Page opage,
708 : npage;
709 : OffsetNumber offnum;
710 : ItemId lp;
711 : HeapTupleData oldtup;
712 : HeapTupleHeader htup;
713 96363 : uint16 prefixlen = 0,
714 96363 : suffixlen = 0;
715 : char *newp;
716 : union
717 : {
718 : HeapTupleHeaderData hdr;
719 : char data[MaxHeapTupleSize];
720 : } tbuf;
721 : xl_heap_header xlhdr;
722 : uint32 newlen;
723 96363 : Size freespace = 0;
724 : XLogRedoAction oldaction;
725 : XLogRedoAction newaction;
726 :
727 : /* initialize to keep the compiler quiet */
728 96363 : oldtup.t_data = NULL;
729 96363 : oldtup.t_len = 0;
730 :
731 96363 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
732 96363 : if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
733 : {
734 : /* HOT updates are never done across pages */
735 : Assert(!hot_update);
736 : }
737 : else
738 41919 : oldblk = newblk;
739 :
740 96363 : ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
741 :
742 : /*
743 : * The visibility map may need to be fixed even if the heap page is
744 : * already up-to-date.
745 : */
746 96363 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
747 : {
748 269 : Relation reln = CreateFakeRelcacheEntry(rlocator);
749 269 : Buffer vmbuffer = InvalidBuffer;
750 :
751 269 : visibilitymap_pin(reln, oldblk, &vmbuffer);
752 269 : visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
753 269 : ReleaseBuffer(vmbuffer);
754 269 : FreeFakeRelcacheEntry(reln);
755 : }
756 :
757 : /*
758 : * In normal operation, it is important to lock the two pages in
759 : * page-number order, to avoid possible deadlocks against other update
760 : * operations going the other way. However, during WAL replay there can
761 : * be no other update happening, so we don't need to worry about that. But
762 : * we *do* need to worry that we don't expose an inconsistent state to Hot
763 : * Standby queries --- so the original page can't be unlocked before we've
764 : * added the new tuple to the new page.
765 : */
766 :
767 : /* Deal with old tuple version */
768 96363 : oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
769 : &obuffer);
770 96363 : if (oldaction == BLK_NEEDS_REDO)
771 : {
772 96012 : opage = BufferGetPage(obuffer);
773 96012 : offnum = xlrec->old_offnum;
774 96012 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(opage))
775 0 : elog(PANIC, "offnum out of range");
776 96012 : lp = PageGetItemId(opage, offnum);
777 96012 : if (!ItemIdIsNormal(lp))
778 0 : elog(PANIC, "invalid lp");
779 :
780 96012 : htup = (HeapTupleHeader) PageGetItem(opage, lp);
781 :
782 96012 : oldtup.t_data = htup;
783 96012 : oldtup.t_len = ItemIdGetLength(lp);
784 :
785 96012 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
786 96012 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
787 96012 : if (hot_update)
788 38372 : HeapTupleHeaderSetHotUpdated(htup);
789 : else
790 57640 : HeapTupleHeaderClearHotUpdated(htup);
791 96012 : fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
792 : &htup->t_infomask2);
793 96012 : HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
794 96012 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
795 : /* Set forward chain link in t_ctid */
796 96012 : htup->t_ctid = newtid;
797 :
798 : /* Mark the page as a candidate for pruning */
799 96012 : PageSetPrunable(opage, XLogRecGetXid(record));
800 :
801 96012 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
802 262 : PageClearAllVisible(opage);
803 :
804 96012 : PageSetLSN(opage, lsn);
805 96012 : MarkBufferDirty(obuffer);
806 : }
807 :
808 : /*
809 : * Read the page the new tuple goes into, if different from old.
810 : */
811 96363 : if (oldblk == newblk)
812 : {
813 41919 : nbuffer = obuffer;
814 41919 : newaction = oldaction;
815 : }
816 54444 : else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
817 : {
818 613 : nbuffer = XLogInitBufferForRedo(record, 0);
819 613 : npage = BufferGetPage(nbuffer);
820 613 : PageInit(npage, BufferGetPageSize(nbuffer), 0);
821 613 : newaction = BLK_NEEDS_REDO;
822 : }
823 : else
824 53831 : newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
825 :
826 : /*
827 : * The visibility map may need to be fixed even if the heap page is
828 : * already up-to-date.
829 : */
830 96363 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
831 : {
832 141 : Relation reln = CreateFakeRelcacheEntry(rlocator);
833 141 : Buffer vmbuffer = InvalidBuffer;
834 :
835 141 : visibilitymap_pin(reln, newblk, &vmbuffer);
836 141 : visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
837 141 : ReleaseBuffer(vmbuffer);
838 141 : FreeFakeRelcacheEntry(reln);
839 : }
840 :
841 : /* Deal with new tuple */
842 96363 : if (newaction == BLK_NEEDS_REDO)
843 : {
844 : char *recdata;
845 : char *recdata_end;
846 : Size datalen;
847 : Size tuplen;
848 :
849 95797 : recdata = XLogRecGetBlockData(record, 0, &datalen);
850 95797 : recdata_end = recdata + datalen;
851 :
852 95797 : npage = BufferGetPage(nbuffer);
853 :
854 95797 : offnum = xlrec->new_offnum;
855 95797 : if (PageGetMaxOffsetNumber(npage) + 1 < offnum)
856 0 : elog(PANIC, "invalid max offset number");
857 :
858 95797 : if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
859 : {
860 : Assert(newblk == oldblk);
861 17984 : memcpy(&prefixlen, recdata, sizeof(uint16));
862 17984 : recdata += sizeof(uint16);
863 : }
864 95797 : if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
865 : {
866 : Assert(newblk == oldblk);
867 35637 : memcpy(&suffixlen, recdata, sizeof(uint16));
868 35637 : recdata += sizeof(uint16);
869 : }
870 :
871 95797 : memcpy(&xlhdr, recdata, SizeOfHeapHeader);
872 95797 : recdata += SizeOfHeapHeader;
873 :
874 95797 : tuplen = recdata_end - recdata;
875 : Assert(tuplen <= MaxHeapTupleSize);
876 :
877 95797 : htup = &tbuf.hdr;
878 95797 : MemSet(htup, 0, SizeofHeapTupleHeader);
879 :
880 : /*
881 : * Reconstruct the new tuple using the prefix and/or suffix from the
882 : * old tuple, and the data stored in the WAL record.
883 : */
884 95797 : newp = (char *) htup + SizeofHeapTupleHeader;
885 95797 : if (prefixlen > 0)
886 : {
887 : int len;
888 :
889 : /* copy bitmap [+ padding] [+ oid] from WAL record */
890 17984 : len = xlhdr.t_hoff - SizeofHeapTupleHeader;
891 17984 : memcpy(newp, recdata, len);
892 17984 : recdata += len;
893 17984 : newp += len;
894 :
895 : /* copy prefix from old tuple */
896 17984 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
897 17984 : newp += prefixlen;
898 :
899 : /* copy new tuple data from WAL record */
900 17984 : len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
901 17984 : memcpy(newp, recdata, len);
902 17984 : recdata += len;
903 17984 : newp += len;
904 : }
905 : else
906 : {
907 : /*
908 : * copy bitmap [+ padding] [+ oid] + data from record, all in one
909 : * go
910 : */
911 77813 : memcpy(newp, recdata, tuplen);
912 77813 : recdata += tuplen;
913 77813 : newp += tuplen;
914 : }
915 : Assert(recdata == recdata_end);
916 :
917 : /* copy suffix from old tuple */
918 95797 : if (suffixlen > 0)
919 35637 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
920 :
921 95797 : newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
922 95797 : htup->t_infomask2 = xlhdr.t_infomask2;
923 95797 : htup->t_infomask = xlhdr.t_infomask;
924 95797 : htup->t_hoff = xlhdr.t_hoff;
925 :
926 95797 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
927 95797 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
928 95797 : HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
929 : /* Make sure there is no forward chain link in t_ctid */
930 95797 : htup->t_ctid = newtid;
931 :
932 95797 : offnum = PageAddItem(npage, htup, newlen, offnum, true, true);
933 95797 : if (offnum == InvalidOffsetNumber)
934 0 : elog(PANIC, "failed to add tuple");
935 :
936 95797 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
937 66 : PageClearAllVisible(npage);
938 :
939 : /* needed to update FSM below */
940 95797 : freespace = PageGetHeapFreeSpace(npage);
941 :
942 95797 : PageSetLSN(npage, lsn);
943 : /* See heap_insert() for why we set pd_prune_xid on insert */
944 95797 : PageSetPrunable(npage, XLogRecGetXid(record));
945 95797 : MarkBufferDirty(nbuffer);
946 : }
947 :
948 96363 : if (BufferIsValid(nbuffer) && nbuffer != obuffer)
949 54444 : UnlockReleaseBuffer(nbuffer);
950 96363 : if (BufferIsValid(obuffer))
951 96363 : UnlockReleaseBuffer(obuffer);
952 :
953 : /*
954 : * If the new page is running low on free space, update the FSM as well.
955 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
956 : * better than that without knowing the fill-factor for the table.
957 : *
958 : * However, don't update the FSM on HOT updates, because after crash
959 : * recovery, either the old or the new tuple will certainly be dead and
960 : * prunable. After pruning, the page will have roughly as much free space
961 : * as it did before the update, assuming the new tuple is about the same
962 : * size as the old one.
963 : *
964 : * XXX: Don't do this if the page was restored from full page image. We
965 : * don't bother to update the FSM in that case, it doesn't need to be
966 : * totally accurate anyway.
967 : */
968 96363 : if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
969 11878 : XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
970 96363 : }
971 :
972 : /*
973 : * Replay XLOG_HEAP_CONFIRM records.
974 : */
975 : static void
976 93 : heap_xlog_confirm(XLogReaderState *record)
977 : {
978 93 : XLogRecPtr lsn = record->EndRecPtr;
979 93 : xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
980 : Buffer buffer;
981 : Page page;
982 : OffsetNumber offnum;
983 : ItemId lp;
984 : HeapTupleHeader htup;
985 :
986 93 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
987 : {
988 93 : page = BufferGetPage(buffer);
989 :
990 93 : offnum = xlrec->offnum;
991 93 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
992 0 : elog(PANIC, "offnum out of range");
993 93 : lp = PageGetItemId(page, offnum);
994 93 : if (!ItemIdIsNormal(lp))
995 0 : elog(PANIC, "invalid lp");
996 :
997 93 : htup = (HeapTupleHeader) PageGetItem(page, lp);
998 :
999 : /*
1000 : * Confirm tuple as actually inserted
1001 : */
1002 93 : ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
1003 :
1004 93 : PageSetLSN(page, lsn);
1005 93 : MarkBufferDirty(buffer);
1006 : }
1007 93 : if (BufferIsValid(buffer))
1008 93 : UnlockReleaseBuffer(buffer);
1009 93 : }
1010 :
1011 : /*
1012 : * Replay XLOG_HEAP_LOCK records.
1013 : */
1014 : static void
1015 55750 : heap_xlog_lock(XLogReaderState *record)
1016 : {
1017 55750 : XLogRecPtr lsn = record->EndRecPtr;
1018 55750 : xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
1019 : Buffer buffer;
1020 : Page page;
1021 : OffsetNumber offnum;
1022 : ItemId lp;
1023 : HeapTupleHeader htup;
1024 :
1025 : /*
1026 : * The visibility map may need to be fixed even if the heap page is
1027 : * already up-to-date.
1028 : */
1029 55750 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1030 : {
1031 : RelFileLocator rlocator;
1032 60 : Buffer vmbuffer = InvalidBuffer;
1033 : BlockNumber block;
1034 : Relation reln;
1035 :
1036 60 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1037 60 : reln = CreateFakeRelcacheEntry(rlocator);
1038 :
1039 60 : visibilitymap_pin(reln, block, &vmbuffer);
1040 60 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1041 :
1042 60 : ReleaseBuffer(vmbuffer);
1043 60 : FreeFakeRelcacheEntry(reln);
1044 : }
1045 :
1046 55750 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1047 : {
1048 55546 : page = BufferGetPage(buffer);
1049 :
1050 55546 : offnum = xlrec->offnum;
1051 55546 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1052 0 : elog(PANIC, "offnum out of range");
1053 55546 : lp = PageGetItemId(page, offnum);
1054 55546 : if (!ItemIdIsNormal(lp))
1055 0 : elog(PANIC, "invalid lp");
1056 :
1057 55546 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1058 :
1059 55546 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1060 55546 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1061 55546 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1062 : &htup->t_infomask2);
1063 :
1064 : /*
1065 : * Clear relevant update flags, but only if the modified infomask says
1066 : * there's no update.
1067 : */
1068 55546 : if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
1069 : {
1070 55546 : HeapTupleHeaderClearHotUpdated(htup);
1071 : /* Make sure there is no forward chain link in t_ctid */
1072 55546 : ItemPointerSet(&htup->t_ctid,
1073 : BufferGetBlockNumber(buffer),
1074 : offnum);
1075 : }
1076 55546 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1077 55546 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
1078 55546 : PageSetLSN(page, lsn);
1079 55546 : MarkBufferDirty(buffer);
1080 : }
1081 55750 : if (BufferIsValid(buffer))
1082 55750 : UnlockReleaseBuffer(buffer);
1083 55750 : }
1084 :
1085 : /*
1086 : * Replay XLOG_HEAP2_LOCK_UPDATED records.
1087 : */
1088 : static void
1089 0 : heap_xlog_lock_updated(XLogReaderState *record)
1090 : {
1091 0 : XLogRecPtr lsn = record->EndRecPtr;
1092 : xl_heap_lock_updated *xlrec;
1093 : Buffer buffer;
1094 : Page page;
1095 : OffsetNumber offnum;
1096 : ItemId lp;
1097 : HeapTupleHeader htup;
1098 :
1099 0 : xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
1100 :
1101 : /*
1102 : * The visibility map may need to be fixed even if the heap page is
1103 : * already up-to-date.
1104 : */
1105 0 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1106 : {
1107 : RelFileLocator rlocator;
1108 0 : Buffer vmbuffer = InvalidBuffer;
1109 : BlockNumber block;
1110 : Relation reln;
1111 :
1112 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1113 0 : reln = CreateFakeRelcacheEntry(rlocator);
1114 :
1115 0 : visibilitymap_pin(reln, block, &vmbuffer);
1116 0 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1117 :
1118 0 : ReleaseBuffer(vmbuffer);
1119 0 : FreeFakeRelcacheEntry(reln);
1120 : }
1121 :
1122 0 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1123 : {
1124 0 : page = BufferGetPage(buffer);
1125 :
1126 0 : offnum = xlrec->offnum;
1127 0 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1128 0 : elog(PANIC, "offnum out of range");
1129 0 : lp = PageGetItemId(page, offnum);
1130 0 : if (!ItemIdIsNormal(lp))
1131 0 : elog(PANIC, "invalid lp");
1132 :
1133 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1134 :
1135 0 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1136 0 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1137 0 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1138 : &htup->t_infomask2);
1139 0 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1140 :
1141 0 : PageSetLSN(page, lsn);
1142 0 : MarkBufferDirty(buffer);
1143 : }
1144 0 : if (BufferIsValid(buffer))
1145 0 : UnlockReleaseBuffer(buffer);
1146 0 : }
1147 :
1148 : /*
1149 : * Replay XLOG_HEAP_INPLACE records.
1150 : */
1151 : static void
1152 8290 : heap_xlog_inplace(XLogReaderState *record)
1153 : {
1154 8290 : XLogRecPtr lsn = record->EndRecPtr;
1155 8290 : xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
1156 : Buffer buffer;
1157 : Page page;
1158 : OffsetNumber offnum;
1159 : ItemId lp;
1160 : HeapTupleHeader htup;
1161 : uint32 oldlen;
1162 : Size newlen;
1163 :
1164 8290 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1165 : {
1166 8081 : char *newtup = XLogRecGetBlockData(record, 0, &newlen);
1167 :
1168 8081 : page = BufferGetPage(buffer);
1169 :
1170 8081 : offnum = xlrec->offnum;
1171 8081 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1172 0 : elog(PANIC, "offnum out of range");
1173 8081 : lp = PageGetItemId(page, offnum);
1174 8081 : if (!ItemIdIsNormal(lp))
1175 0 : elog(PANIC, "invalid lp");
1176 :
1177 8081 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1178 :
1179 8081 : oldlen = ItemIdGetLength(lp) - htup->t_hoff;
1180 8081 : if (oldlen != newlen)
1181 0 : elog(PANIC, "wrong tuple length");
1182 :
1183 8081 : memcpy((char *) htup + htup->t_hoff, newtup, newlen);
1184 :
1185 8081 : PageSetLSN(page, lsn);
1186 8081 : MarkBufferDirty(buffer);
1187 : }
1188 8290 : if (BufferIsValid(buffer))
1189 8290 : UnlockReleaseBuffer(buffer);
1190 :
1191 8290 : ProcessCommittedInvalidationMessages(xlrec->msgs,
1192 : xlrec->nmsgs,
1193 8290 : xlrec->relcacheInitFileInval,
1194 : xlrec->dbId,
1195 : xlrec->tsId);
1196 8290 : }
1197 :
1198 : void
1199 1801641 : heap_redo(XLogReaderState *record)
1200 : {
1201 1801641 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1202 :
1203 : /*
1204 : * These operations don't overwrite MVCC data so no conflict processing is
1205 : * required. The ones in heap2 rmgr do.
1206 : */
1207 :
1208 1801641 : switch (info & XLOG_HEAP_OPMASK)
1209 : {
1210 1323226 : case XLOG_HEAP_INSERT:
1211 1323226 : heap_xlog_insert(record);
1212 1323226 : break;
1213 317917 : case XLOG_HEAP_DELETE:
1214 317917 : heap_xlog_delete(record);
1215 317917 : break;
1216 57690 : case XLOG_HEAP_UPDATE:
1217 57690 : heap_xlog_update(record, false);
1218 57690 : break;
1219 2 : case XLOG_HEAP_TRUNCATE:
1220 :
1221 : /*
1222 : * TRUNCATE is a no-op because the actions are already logged as
1223 : * SMGR WAL records. TRUNCATE WAL record only exists for logical
1224 : * decoding.
1225 : */
1226 2 : break;
1227 38673 : case XLOG_HEAP_HOT_UPDATE:
1228 38673 : heap_xlog_update(record, true);
1229 38673 : break;
1230 93 : case XLOG_HEAP_CONFIRM:
1231 93 : heap_xlog_confirm(record);
1232 93 : break;
1233 55750 : case XLOG_HEAP_LOCK:
1234 55750 : heap_xlog_lock(record);
1235 55750 : break;
1236 8290 : case XLOG_HEAP_INPLACE:
1237 8290 : heap_xlog_inplace(record);
1238 8290 : break;
1239 0 : default:
1240 0 : elog(PANIC, "heap_redo: unknown op code %u", info);
1241 : }
1242 1801641 : }
1243 :
1244 : void
1245 88114 : heap2_redo(XLogReaderState *record)
1246 : {
1247 88114 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1248 :
1249 88114 : switch (info & XLOG_HEAP_OPMASK)
1250 : {
1251 19838 : case XLOG_HEAP2_PRUNE_ON_ACCESS:
1252 : case XLOG_HEAP2_PRUNE_VACUUM_SCAN:
1253 : case XLOG_HEAP2_PRUNE_VACUUM_CLEANUP:
1254 19838 : heap_xlog_prune_freeze(record);
1255 19838 : break;
1256 67228 : case XLOG_HEAP2_MULTI_INSERT:
1257 67228 : heap_xlog_multi_insert(record);
1258 67228 : break;
1259 0 : case XLOG_HEAP2_LOCK_UPDATED:
1260 0 : heap_xlog_lock_updated(record);
1261 0 : break;
1262 1048 : case XLOG_HEAP2_NEW_CID:
1263 :
1264 : /*
1265 : * Nothing to do on a real replay, only used during logical
1266 : * decoding.
1267 : */
1268 1048 : break;
1269 0 : case XLOG_HEAP2_REWRITE:
1270 0 : heap_xlog_logical_rewrite(record);
1271 0 : break;
1272 0 : default:
1273 0 : elog(PANIC, "heap2_redo: unknown op code %u", info);
1274 : }
1275 88114 : }
1276 :
1277 : /*
1278 : * Mask a heap page before performing consistency checks on it.
1279 : */
1280 : void
1281 3007966 : heap_mask(char *pagedata, BlockNumber blkno)
1282 : {
1283 3007966 : Page page = (Page) pagedata;
1284 : OffsetNumber off;
1285 :
1286 3007966 : mask_page_lsn_and_checksum(page);
1287 :
1288 3007966 : mask_page_hint_bits(page);
1289 3007966 : mask_unused_space(page);
1290 :
1291 249555334 : for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
1292 : {
1293 246547368 : ItemId iid = PageGetItemId(page, off);
1294 : char *page_item;
1295 :
1296 246547368 : page_item = (char *) (page + ItemIdGetOffset(iid));
1297 :
1298 246547368 : if (ItemIdIsNormal(iid))
1299 : {
1300 228233212 : HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
1301 :
1302 : /*
1303 : * If xmin of a tuple is not yet frozen, we should ignore
1304 : * differences in hint bits, since they can be set without
1305 : * emitting WAL.
1306 : */
1307 228233212 : if (!HeapTupleHeaderXminFrozen(page_htup))
1308 225443926 : page_htup->t_infomask &= ~HEAP_XACT_MASK;
1309 : else
1310 : {
1311 : /* Still we need to mask xmax hint bits. */
1312 2789286 : page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
1313 2789286 : page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
1314 : }
1315 :
1316 : /*
1317 : * During replay, we set Command Id to FirstCommandId. Hence, mask
1318 : * it. See heap_xlog_insert() for details.
1319 : */
1320 228233212 : page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
1321 :
1322 : /*
1323 : * For a speculative tuple, heap_insert() does not set ctid in the
1324 : * caller-passed heap tuple itself, leaving the ctid field to
1325 : * contain a speculative token value - a per-backend monotonically
1326 : * increasing identifier. Besides, it does not WAL-log ctid under
1327 : * any circumstances.
1328 : *
1329 : * During redo, heap_xlog_insert() sets t_ctid to current block
1330 : * number and self offset number. It doesn't care about any
1331 : * speculative insertions on the primary. Hence, we set t_ctid to
1332 : * current block number and self offset number to ignore any
1333 : * inconsistency.
1334 : */
1335 228233212 : if (HeapTupleHeaderIsSpeculative(page_htup))
1336 92 : ItemPointerSet(&page_htup->t_ctid, blkno, off);
1337 :
1338 : /*
1339 : * NB: Not ignoring ctid changes due to the tuple having moved
1340 : * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
1341 : * important information that needs to be in-sync between primary
1342 : * and standby, and thus is WAL logged.
1343 : */
1344 : }
1345 :
1346 : /*
1347 : * Ignore any padding bytes after the tuple, when the length of the
1348 : * item is not MAXALIGNed.
1349 : */
1350 246547368 : if (ItemIdHasStorage(iid))
1351 : {
1352 228233212 : int len = ItemIdGetLength(iid);
1353 228233212 : int padlen = MAXALIGN(len) - len;
1354 :
1355 228233212 : if (padlen > 0)
1356 123021932 : memset(page_item + len, MASK_MARKER, padlen);
1357 : }
1358 : }
1359 3007966 : }
|