Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * heapam_xlog.c
4 : * WAL replay logic for heap access method.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/heap/heapam_xlog.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/bufmask.h"
18 : #include "access/heapam.h"
19 : #include "access/visibilitymap.h"
20 : #include "access/xlog.h"
21 : #include "access/xlogutils.h"
22 : #include "storage/freespace.h"
23 : #include "storage/standby.h"
24 :
25 :
26 : /*
27 : * Replay XLOG_HEAP2_PRUNE_* records.
28 : */
29 : static void
30 19977 : heap_xlog_prune_freeze(XLogReaderState *record)
31 : {
32 19977 : XLogRecPtr lsn = record->EndRecPtr;
33 19977 : char *maindataptr = XLogRecGetData(record);
34 : xl_heap_prune xlrec;
35 : Buffer buffer;
36 : RelFileLocator rlocator;
37 : BlockNumber blkno;
38 19977 : Buffer vmbuffer = InvalidBuffer;
39 19977 : uint8 vmflags = 0;
40 19977 : Size freespace = 0;
41 19977 : bool do_update_fsm = false;
42 :
43 19977 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
44 19977 : memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
45 19977 : maindataptr += SizeOfHeapPrune;
46 :
47 : /*
48 : * We will take an ordinary exclusive lock or a cleanup lock depending on
49 : * whether the XLHP_CLEANUP_LOCK flag is set. With an ordinary exclusive
50 : * lock, we better not be doing anything that requires moving existing
51 : * tuple data.
52 : */
53 : Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
54 : (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
55 :
56 19977 : if (xlrec.flags & XLHP_VM_ALL_VISIBLE)
57 : {
58 9918 : vmflags = VISIBILITYMAP_ALL_VISIBLE;
59 9918 : if (xlrec.flags & XLHP_VM_ALL_FROZEN)
60 5792 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
61 : }
62 :
63 : /*
64 : * After xl_heap_prune is the optional snapshot conflict horizon.
65 : *
66 : * In Hot Standby mode, we must ensure that there are no running queries
67 : * which would conflict with the changes in this record. That means we
68 : * can't replay this record if it removes tuples that are still visible to
69 : * transactions on the standby, freeze tuples with xids that are still
70 : * considered running on the standby, or set a page as all-visible in the
71 : * VM if it isn't all-visible to all transactions on the standby.
72 : */
73 19977 : if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
74 : {
75 : TransactionId snapshot_conflict_horizon;
76 :
77 : /* memcpy() because snapshot_conflict_horizon is stored unaligned */
78 16076 : memcpy(&snapshot_conflict_horizon, maindataptr, sizeof(TransactionId));
79 16076 : maindataptr += sizeof(TransactionId);
80 :
81 16076 : if (InHotStandby)
82 15626 : ResolveRecoveryConflictWithSnapshot(snapshot_conflict_horizon,
83 15626 : (xlrec.flags & XLHP_IS_CATALOG_REL) != 0,
84 : rlocator);
85 : }
86 :
87 : /*
88 : * If we have a full-page image of the heap block, restore it and we're
89 : * done with the heap block.
90 : */
91 19977 : if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
92 19977 : (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
93 : &buffer) == BLK_NEEDS_REDO)
94 : {
95 13878 : Page page = BufferGetPage(buffer);
96 : OffsetNumber *redirected;
97 : OffsetNumber *nowdead;
98 : OffsetNumber *nowunused;
99 : int nredirected;
100 : int ndead;
101 : int nunused;
102 : int nplans;
103 : Size datalen;
104 : xlhp_freeze_plan *plans;
105 : OffsetNumber *frz_offsets;
106 13878 : char *dataptr = XLogRecGetBlockData(record, 0, &datalen);
107 : bool do_prune;
108 :
109 13878 : heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
110 : &nplans, &plans, &frz_offsets,
111 : &nredirected, &redirected,
112 : &ndead, &nowdead,
113 : &nunused, &nowunused);
114 :
115 13878 : do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
116 :
117 : /* Ensure the record does something */
118 : Assert(do_prune || nplans > 0 || vmflags & VISIBILITYMAP_VALID_BITS);
119 :
120 : /*
121 : * Update all line pointers per the record, and repair fragmentation
122 : * if needed.
123 : */
124 13878 : if (do_prune)
125 10724 : heap_page_prune_execute(buffer,
126 10724 : (xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
127 : redirected, nredirected,
128 : nowdead, ndead,
129 : nowunused, nunused);
130 :
131 : /* Freeze tuples */
132 15435 : for (int p = 0; p < nplans; p++)
133 : {
134 : HeapTupleFreeze frz;
135 :
136 : /*
137 : * Convert freeze plan representation from WAL record into
138 : * per-tuple format used by heap_execute_freeze_tuple
139 : */
140 1557 : frz.xmax = plans[p].xmax;
141 1557 : frz.t_infomask2 = plans[p].t_infomask2;
142 1557 : frz.t_infomask = plans[p].t_infomask;
143 1557 : frz.frzflags = plans[p].frzflags;
144 1557 : frz.offset = InvalidOffsetNumber; /* unused, but be tidy */
145 :
146 82634 : for (int i = 0; i < plans[p].ntuples; i++)
147 : {
148 81077 : OffsetNumber offset = *(frz_offsets++);
149 : ItemId lp;
150 : HeapTupleHeader tuple;
151 :
152 81077 : lp = PageGetItemId(page, offset);
153 81077 : tuple = (HeapTupleHeader) PageGetItem(page, lp);
154 81077 : heap_execute_freeze_tuple(tuple, &frz);
155 : }
156 : }
157 :
158 : /* There should be no more data */
159 : Assert((char *) frz_offsets == dataptr + datalen);
160 :
161 : /*
162 : * The critical integrity requirement here is that we must never end
163 : * up with the visibility map bit set and the page-level
164 : * PD_ALL_VISIBLE bit unset. If that were to occur, a subsequent page
165 : * modification would fail to clear the visibility map bit.
166 : */
167 13878 : if (vmflags & VISIBILITYMAP_VALID_BITS)
168 : {
169 5665 : PageSetAllVisible(page);
170 5665 : PageClearPrunable(page);
171 : }
172 :
173 13878 : MarkBufferDirty(buffer);
174 :
175 : /*
176 : * See log_heap_prune_and_freeze() for commentary on when we set the
177 : * heap page LSN.
178 : */
179 13878 : if (do_prune || nplans > 0 ||
180 2042 : ((vmflags & VISIBILITYMAP_VALID_BITS) && XLogHintBitIsNeeded()))
181 13878 : PageSetLSN(page, lsn);
182 :
183 : /*
184 : * Note: we don't worry about updating the page's prunability hints.
185 : * At worst this will cause an extra prune cycle to occur soon.
186 : */
187 : }
188 :
189 : /*
190 : * If we 1) released any space or line pointers or 2) set PD_ALL_VISIBLE
191 : * or the VM, update the freespace map.
192 : *
193 : * Even when no actual space is freed (when only marking the page
194 : * all-visible or frozen), we still update the FSM. Because the FSM is
195 : * unlogged and maintained heuristically, it often becomes stale on
196 : * standbys. If such a standby is later promoted and runs VACUUM, it will
197 : * skip recalculating free space for pages that were marked
198 : * all-visible/all-frozen. FreeSpaceMapVacuum() can then propagate overly
199 : * optimistic free space values upward, causing future insertions to
200 : * select pages that turn out to be unusable. In bulk, this can lead to
201 : * long stalls.
202 : *
203 : * To prevent this, always update the FSM even when only marking a page
204 : * all-visible/all-frozen.
205 : *
206 : * Do this regardless of whether a full-page image is logged, since FSM
207 : * data is not part of the page itself.
208 : */
209 19977 : if (BufferIsValid(buffer))
210 : {
211 19977 : if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
212 : XLHP_HAS_DEAD_ITEMS |
213 5981 : XLHP_HAS_NOW_UNUSED_ITEMS)) ||
214 5981 : (vmflags & VISIBILITYMAP_VALID_BITS))
215 : {
216 19958 : freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
217 19958 : do_update_fsm = true;
218 : }
219 :
220 : /*
221 : * We want to avoid holding an exclusive lock on the heap buffer while
222 : * doing IO (either of the FSM or the VM), so we'll release it now.
223 : */
224 19977 : UnlockReleaseBuffer(buffer);
225 : }
226 :
227 : /*
228 : * Now read and update the VM block.
229 : *
230 : * We must redo changes to the VM even if the heap page was skipped due to
231 : * LSN interlock. See comment in heap_xlog_multi_insert() for more details
232 : * on replaying changes to the VM.
233 : */
234 29895 : if ((vmflags & VISIBILITYMAP_VALID_BITS) &&
235 9918 : XLogReadBufferForRedoExtended(record, 1,
236 : RBM_ZERO_ON_ERROR,
237 : false,
238 : &vmbuffer) == BLK_NEEDS_REDO)
239 : {
240 9554 : Page vmpage = BufferGetPage(vmbuffer);
241 :
242 : /* initialize the page if it was read as zeros */
243 9554 : if (PageIsNew(vmpage))
244 2 : PageInit(vmpage, BLCKSZ, 0);
245 :
246 9554 : visibilitymap_set(blkno, vmbuffer, vmflags, rlocator);
247 :
248 : Assert(BufferIsDirty(vmbuffer));
249 9554 : PageSetLSN(vmpage, lsn);
250 : }
251 :
252 19977 : if (BufferIsValid(vmbuffer))
253 9918 : UnlockReleaseBuffer(vmbuffer);
254 :
255 19977 : if (do_update_fsm)
256 19958 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
257 19977 : }
258 :
259 : /*
260 : * Given an "infobits" field from an XLog record, set the correct bits in the
261 : * given infomask and infomask2 for the tuple touched by the record.
262 : *
263 : * (This is the reverse of compute_infobits).
264 : */
265 : static void
266 468565 : fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
267 : {
268 468565 : *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
269 : HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
270 468565 : *infomask2 &= ~HEAP_KEYS_UPDATED;
271 :
272 468565 : if (infobits & XLHL_XMAX_IS_MULTI)
273 3 : *infomask |= HEAP_XMAX_IS_MULTI;
274 468565 : if (infobits & XLHL_XMAX_LOCK_ONLY)
275 55580 : *infomask |= HEAP_XMAX_LOCK_ONLY;
276 468565 : if (infobits & XLHL_XMAX_EXCL_LOCK)
277 54812 : *infomask |= HEAP_XMAX_EXCL_LOCK;
278 : /* note HEAP_XMAX_SHR_LOCK isn't considered here */
279 468565 : if (infobits & XLHL_XMAX_KEYSHR_LOCK)
280 782 : *infomask |= HEAP_XMAX_KEYSHR_LOCK;
281 :
282 468565 : if (infobits & XLHL_KEYS_UPDATED)
283 318194 : *infomask2 |= HEAP_KEYS_UPDATED;
284 468565 : }
285 :
286 : /*
287 : * Replay XLOG_HEAP_DELETE records.
288 : */
289 : static void
290 318969 : heap_xlog_delete(XLogReaderState *record)
291 : {
292 318969 : XLogRecPtr lsn = record->EndRecPtr;
293 318969 : xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
294 : Buffer buffer;
295 : Page page;
296 : ItemId lp;
297 : HeapTupleHeader htup;
298 : BlockNumber blkno;
299 : RelFileLocator target_locator;
300 : ItemPointerData target_tid;
301 :
302 318969 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
303 318969 : ItemPointerSetBlockNumber(&target_tid, blkno);
304 318969 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
305 :
306 : /*
307 : * The visibility map may need to be fixed even if the heap page is
308 : * already up-to-date.
309 : */
310 318969 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
311 : {
312 55 : Relation reln = CreateFakeRelcacheEntry(target_locator);
313 55 : Buffer vmbuffer = InvalidBuffer;
314 :
315 55 : visibilitymap_pin(reln, blkno, &vmbuffer);
316 55 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
317 55 : ReleaseBuffer(vmbuffer);
318 55 : FreeFakeRelcacheEntry(reln);
319 : }
320 :
321 318969 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
322 : {
323 316750 : page = BufferGetPage(buffer);
324 :
325 316750 : if (xlrec->offnum < 1 || xlrec->offnum > PageGetMaxOffsetNumber(page))
326 0 : elog(PANIC, "offnum out of range");
327 316750 : lp = PageGetItemId(page, xlrec->offnum);
328 316750 : if (!ItemIdIsNormal(lp))
329 0 : elog(PANIC, "invalid lp");
330 :
331 316750 : htup = (HeapTupleHeader) PageGetItem(page, lp);
332 :
333 316750 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
334 316750 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
335 316750 : HeapTupleHeaderClearHotUpdated(htup);
336 316750 : fix_infomask_from_infobits(xlrec->infobits_set,
337 : &htup->t_infomask, &htup->t_infomask2);
338 316750 : if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
339 316750 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
340 : else
341 0 : HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
342 316750 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
343 :
344 : /* Mark the page as a candidate for pruning */
345 316750 : PageSetPrunable(page, XLogRecGetXid(record));
346 :
347 316750 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
348 38 : PageClearAllVisible(page);
349 :
350 : /* Make sure t_ctid is set correctly */
351 316750 : if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
352 152 : HeapTupleHeaderSetMovedPartitions(htup);
353 : else
354 316598 : htup->t_ctid = target_tid;
355 316750 : PageSetLSN(page, lsn);
356 316750 : MarkBufferDirty(buffer);
357 : }
358 318969 : if (BufferIsValid(buffer))
359 318969 : UnlockReleaseBuffer(buffer);
360 318969 : }
361 :
362 : /*
363 : * Replay XLOG_HEAP_INSERT records.
364 : */
365 : static void
366 1324600 : heap_xlog_insert(XLogReaderState *record)
367 : {
368 1324600 : XLogRecPtr lsn = record->EndRecPtr;
369 1324600 : xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
370 : Buffer buffer;
371 : Page page;
372 : union
373 : {
374 : HeapTupleHeaderData hdr;
375 : char data[MaxHeapTupleSize];
376 : } tbuf;
377 : HeapTupleHeader htup;
378 : xl_heap_header xlhdr;
379 : uint32 newlen;
380 1324600 : Size freespace = 0;
381 : RelFileLocator target_locator;
382 : BlockNumber blkno;
383 : ItemPointerData target_tid;
384 : XLogRedoAction action;
385 :
386 1324600 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
387 1324600 : ItemPointerSetBlockNumber(&target_tid, blkno);
388 1324600 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
389 :
390 : /* No freezing in the heap_insert() code path */
391 : Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
392 :
393 : /*
394 : * The visibility map may need to be fixed even if the heap page is
395 : * already up-to-date.
396 : */
397 1324600 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
398 : {
399 909 : Relation reln = CreateFakeRelcacheEntry(target_locator);
400 909 : Buffer vmbuffer = InvalidBuffer;
401 :
402 909 : visibilitymap_pin(reln, blkno, &vmbuffer);
403 909 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
404 909 : ReleaseBuffer(vmbuffer);
405 909 : FreeFakeRelcacheEntry(reln);
406 : }
407 :
408 : /*
409 : * If we inserted the first and only tuple on the page, re-initialize the
410 : * page from scratch.
411 : */
412 1324600 : if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
413 : {
414 17729 : buffer = XLogInitBufferForRedo(record, 0);
415 17729 : page = BufferGetPage(buffer);
416 17729 : PageInit(page, BufferGetPageSize(buffer), 0);
417 17729 : action = BLK_NEEDS_REDO;
418 : }
419 : else
420 1306871 : action = XLogReadBufferForRedo(record, 0, &buffer);
421 1324600 : if (action == BLK_NEEDS_REDO)
422 : {
423 : Size datalen;
424 : char *data;
425 :
426 1321515 : page = BufferGetPage(buffer);
427 :
428 1321515 : if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
429 0 : elog(PANIC, "invalid max offset number");
430 :
431 1321515 : data = XLogRecGetBlockData(record, 0, &datalen);
432 :
433 1321515 : newlen = datalen - SizeOfHeapHeader;
434 : Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
435 1321515 : memcpy(&xlhdr, data, SizeOfHeapHeader);
436 1321515 : data += SizeOfHeapHeader;
437 :
438 1321515 : htup = &tbuf.hdr;
439 1321515 : MemSet(htup, 0, SizeofHeapTupleHeader);
440 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
441 1321515 : memcpy((char *) htup + SizeofHeapTupleHeader,
442 : data,
443 : newlen);
444 1321515 : newlen += SizeofHeapTupleHeader;
445 1321515 : htup->t_infomask2 = xlhdr.t_infomask2;
446 1321515 : htup->t_infomask = xlhdr.t_infomask;
447 1321515 : htup->t_hoff = xlhdr.t_hoff;
448 1321515 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
449 1321515 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
450 1321515 : htup->t_ctid = target_tid;
451 :
452 1321515 : if (PageAddItem(page, htup, newlen, xlrec->offnum, true, true) == InvalidOffsetNumber)
453 0 : elog(PANIC, "failed to add tuple");
454 :
455 1321515 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
456 :
457 : /*
458 : * Set the page prunable to trigger on-access pruning later, which may
459 : * set the page all-visible in the VM. See comments in heap_insert().
460 : */
461 1321515 : if (TransactionIdIsNormal(XLogRecGetXid(record)) &&
462 1321515 : !HeapTupleHeaderXminFrozen(htup))
463 1320985 : PageSetPrunable(page, XLogRecGetXid(record));
464 :
465 1321515 : PageSetLSN(page, lsn);
466 :
467 1321515 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
468 300 : PageClearAllVisible(page);
469 :
470 1321515 : MarkBufferDirty(buffer);
471 : }
472 1324600 : if (BufferIsValid(buffer))
473 1324600 : UnlockReleaseBuffer(buffer);
474 :
475 : /*
476 : * If the page is running low on free space, update the FSM as well.
477 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
478 : * better than that without knowing the fill-factor for the table.
479 : *
480 : * XXX: Don't do this if the page was restored from full page image. We
481 : * don't bother to update the FSM in that case, it doesn't need to be
482 : * totally accurate anyway.
483 : */
484 1324600 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
485 259908 : XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
486 1324600 : }
487 :
488 : /*
489 : * Replay XLOG_HEAP2_MULTI_INSERT records.
490 : */
491 : static void
492 67568 : heap_xlog_multi_insert(XLogReaderState *record)
493 : {
494 67568 : XLogRecPtr lsn = record->EndRecPtr;
495 : xl_heap_multi_insert *xlrec;
496 : RelFileLocator rlocator;
497 : BlockNumber blkno;
498 : Buffer buffer;
499 : Page page;
500 : union
501 : {
502 : HeapTupleHeaderData hdr;
503 : char data[MaxHeapTupleSize];
504 : } tbuf;
505 : HeapTupleHeader htup;
506 : uint32 newlen;
507 67568 : Size freespace = 0;
508 : int i;
509 67568 : bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
510 : XLogRedoAction action;
511 67568 : Buffer vmbuffer = InvalidBuffer;
512 :
513 : /*
514 : * Insertion doesn't overwrite MVCC data, so no conflict processing is
515 : * required.
516 : */
517 67568 : xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
518 :
519 67568 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
520 :
521 : /* check that the mutually exclusive flags are not both set */
522 : Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
523 : (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
524 :
525 : /*
526 : * The visibility map may need to be fixed even if the heap page is
527 : * already up-to-date.
528 : */
529 67568 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
530 : {
531 925 : Relation reln = CreateFakeRelcacheEntry(rlocator);
532 :
533 925 : visibilitymap_pin(reln, blkno, &vmbuffer);
534 925 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
535 925 : ReleaseBuffer(vmbuffer);
536 925 : vmbuffer = InvalidBuffer;
537 925 : FreeFakeRelcacheEntry(reln);
538 : }
539 :
540 67568 : if (isinit)
541 : {
542 1918 : buffer = XLogInitBufferForRedo(record, 0);
543 1918 : page = BufferGetPage(buffer);
544 1918 : PageInit(page, BufferGetPageSize(buffer), 0);
545 1918 : action = BLK_NEEDS_REDO;
546 : }
547 : else
548 65650 : action = XLogReadBufferForRedo(record, 0, &buffer);
549 67568 : if (action == BLK_NEEDS_REDO)
550 : {
551 : char *tupdata;
552 : char *endptr;
553 : Size len;
554 :
555 : /* Tuples are stored as block data */
556 65840 : tupdata = XLogRecGetBlockData(record, 0, &len);
557 65840 : endptr = tupdata + len;
558 :
559 65840 : page = BufferGetPage(buffer);
560 :
561 289100 : for (i = 0; i < xlrec->ntuples; i++)
562 : {
563 : OffsetNumber offnum;
564 : xl_multi_insert_tuple *xlhdr;
565 :
566 : /*
567 : * If we're reinitializing the page, the tuples are stored in
568 : * order from FirstOffsetNumber. Otherwise there's an array of
569 : * offsets in the WAL record, and the tuples come after that.
570 : */
571 223260 : if (isinit)
572 99760 : offnum = FirstOffsetNumber + i;
573 : else
574 123500 : offnum = xlrec->offsets[i];
575 223260 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
576 0 : elog(PANIC, "invalid max offset number");
577 :
578 223260 : xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
579 223260 : tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
580 :
581 223260 : newlen = xlhdr->datalen;
582 : Assert(newlen <= MaxHeapTupleSize);
583 223260 : htup = &tbuf.hdr;
584 223260 : MemSet(htup, 0, SizeofHeapTupleHeader);
585 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
586 223260 : memcpy((char *) htup + SizeofHeapTupleHeader,
587 : tupdata,
588 : newlen);
589 223260 : tupdata += newlen;
590 :
591 223260 : newlen += SizeofHeapTupleHeader;
592 223260 : htup->t_infomask2 = xlhdr->t_infomask2;
593 223260 : htup->t_infomask = xlhdr->t_infomask;
594 223260 : htup->t_hoff = xlhdr->t_hoff;
595 223260 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
596 223260 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
597 223260 : ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
598 223260 : ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
599 :
600 223260 : offnum = PageAddItem(page, htup, newlen, offnum, true, true);
601 223260 : if (offnum == InvalidOffsetNumber)
602 0 : elog(PANIC, "failed to add tuple");
603 : }
604 65840 : if (tupdata != endptr)
605 0 : elog(PANIC, "total tuple length mismatch");
606 :
607 65840 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
608 :
609 65840 : PageSetLSN(page, lsn);
610 :
611 65840 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
612 79 : PageClearAllVisible(page);
613 :
614 : /*
615 : * XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible. If
616 : * we are not setting the page frozen, then set the page's prunable
617 : * hint so that we trigger on-access pruning later which may set the
618 : * page all-visible in the VM.
619 : */
620 65840 : if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
621 : {
622 4 : PageSetAllVisible(page);
623 4 : PageClearPrunable(page);
624 : }
625 : else
626 65836 : PageSetPrunable(page, XLogRecGetXid(record));
627 :
628 65840 : MarkBufferDirty(buffer);
629 : }
630 67568 : if (BufferIsValid(buffer))
631 67568 : UnlockReleaseBuffer(buffer);
632 :
633 67568 : buffer = InvalidBuffer;
634 :
635 : /*
636 : * Read and update the visibility map (VM) block.
637 : *
638 : * We must always redo VM changes, even if the corresponding heap page
639 : * update was skipped due to the LSN interlock. Each VM block covers
640 : * multiple heap pages, so later WAL records may update other bits in the
641 : * same block. If this record includes an FPI (full-page image),
642 : * subsequent WAL records may depend on it to guard against torn pages.
643 : *
644 : * Heap page changes are replayed first to preserve the invariant:
645 : * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
646 : *
647 : * Note that we released the heap page lock above. During normal
648 : * operation, this would be unsafe — a concurrent modification could
649 : * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
650 : * invariant.
651 : *
652 : * During recovery, however, no concurrent writers exist. Therefore,
653 : * updating the VM without holding the heap page lock is safe enough. This
654 : * same approach is taken when replaying XLOG_HEAP2_PRUNE* records (see
655 : * heap_xlog_prune_freeze()).
656 : */
657 67572 : if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
658 4 : XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
659 : &vmbuffer) == BLK_NEEDS_REDO)
660 : {
661 0 : Page vmpage = BufferGetPage(vmbuffer);
662 :
663 : /* initialize the page if it was read as zeros */
664 0 : if (PageIsNew(vmpage))
665 0 : PageInit(vmpage, BLCKSZ, 0);
666 :
667 0 : visibilitymap_set(blkno,
668 : vmbuffer,
669 : VISIBILITYMAP_ALL_VISIBLE |
670 : VISIBILITYMAP_ALL_FROZEN,
671 : rlocator);
672 :
673 : Assert(BufferIsDirty(vmbuffer));
674 0 : PageSetLSN(vmpage, lsn);
675 : }
676 :
677 67568 : if (BufferIsValid(vmbuffer))
678 4 : UnlockReleaseBuffer(vmbuffer);
679 :
680 : /*
681 : * If the page is running low on free space, update the FSM as well.
682 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
683 : * better than that without knowing the fill-factor for the table.
684 : *
685 : * XXX: Don't do this if the page was restored from full page image. We
686 : * don't bother to update the FSM in that case, it doesn't need to be
687 : * totally accurate anyway.
688 : */
689 67568 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
690 17530 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
691 67568 : }
692 :
693 : /*
694 : * Replay XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE records.
695 : */
696 : static void
697 96501 : heap_xlog_update(XLogReaderState *record, bool hot_update)
698 : {
699 96501 : XLogRecPtr lsn = record->EndRecPtr;
700 96501 : xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
701 : RelFileLocator rlocator;
702 : BlockNumber oldblk;
703 : BlockNumber newblk;
704 : ItemPointerData newtid;
705 : Buffer obuffer,
706 : nbuffer;
707 : Page opage,
708 : npage;
709 : OffsetNumber offnum;
710 : ItemId lp;
711 : HeapTupleData oldtup;
712 : HeapTupleHeader htup;
713 96501 : uint16 prefixlen = 0,
714 96501 : suffixlen = 0;
715 : char *newp;
716 : union
717 : {
718 : HeapTupleHeaderData hdr;
719 : char data[MaxHeapTupleSize];
720 : } tbuf;
721 : xl_heap_header xlhdr;
722 : uint32 newlen;
723 96501 : Size freespace = 0;
724 : XLogRedoAction oldaction;
725 : XLogRedoAction newaction;
726 :
727 : /* initialize to keep the compiler quiet */
728 96501 : oldtup.t_data = NULL;
729 96501 : oldtup.t_len = 0;
730 :
731 96501 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
732 96501 : if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
733 : {
734 : /* HOT updates are never done across pages */
735 : Assert(!hot_update);
736 : }
737 : else
738 42020 : oldblk = newblk;
739 :
740 96501 : ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
741 :
742 : /*
743 : * The visibility map may need to be fixed even if the heap page is
744 : * already up-to-date.
745 : */
746 96501 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
747 : {
748 378 : Relation reln = CreateFakeRelcacheEntry(rlocator);
749 378 : Buffer vmbuffer = InvalidBuffer;
750 :
751 378 : visibilitymap_pin(reln, oldblk, &vmbuffer);
752 378 : visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
753 378 : ReleaseBuffer(vmbuffer);
754 378 : FreeFakeRelcacheEntry(reln);
755 : }
756 :
757 : /*
758 : * In normal operation, it is important to lock the two pages in
759 : * page-number order, to avoid possible deadlocks against other update
760 : * operations going the other way. However, during WAL replay there can
761 : * be no other update happening, so we don't need to worry about that. But
762 : * we *do* need to worry that we don't expose an inconsistent state to Hot
763 : * Standby queries --- so the original page can't be unlocked before we've
764 : * added the new tuple to the new page.
765 : */
766 :
767 : /* Deal with old tuple version */
768 96501 : oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
769 : &obuffer);
770 96501 : if (oldaction == BLK_NEEDS_REDO)
771 : {
772 96235 : opage = BufferGetPage(obuffer);
773 96235 : offnum = xlrec->old_offnum;
774 96235 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(opage))
775 0 : elog(PANIC, "offnum out of range");
776 96235 : lp = PageGetItemId(opage, offnum);
777 96235 : if (!ItemIdIsNormal(lp))
778 0 : elog(PANIC, "invalid lp");
779 :
780 96235 : htup = (HeapTupleHeader) PageGetItem(opage, lp);
781 :
782 96235 : oldtup.t_data = htup;
783 96235 : oldtup.t_len = ItemIdGetLength(lp);
784 :
785 96235 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
786 96235 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
787 96235 : if (hot_update)
788 38572 : HeapTupleHeaderSetHotUpdated(htup);
789 : else
790 57663 : HeapTupleHeaderClearHotUpdated(htup);
791 96235 : fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
792 : &htup->t_infomask2);
793 96235 : HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
794 96235 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
795 : /* Set forward chain link in t_ctid */
796 96235 : htup->t_ctid = newtid;
797 :
798 : /* Mark the page as a candidate for pruning */
799 96235 : PageSetPrunable(opage, XLogRecGetXid(record));
800 :
801 96235 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
802 374 : PageClearAllVisible(opage);
803 :
804 96235 : PageSetLSN(opage, lsn);
805 96235 : MarkBufferDirty(obuffer);
806 : }
807 :
808 : /*
809 : * Read the page the new tuple goes into, if different from old.
810 : */
811 96501 : if (oldblk == newblk)
812 : {
813 42020 : nbuffer = obuffer;
814 42020 : newaction = oldaction;
815 : }
816 54481 : else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
817 : {
818 631 : nbuffer = XLogInitBufferForRedo(record, 0);
819 631 : npage = BufferGetPage(nbuffer);
820 631 : PageInit(npage, BufferGetPageSize(nbuffer), 0);
821 631 : newaction = BLK_NEEDS_REDO;
822 : }
823 : else
824 53850 : newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
825 :
826 : /*
827 : * The visibility map may need to be fixed even if the heap page is
828 : * already up-to-date.
829 : */
830 96501 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
831 : {
832 160 : Relation reln = CreateFakeRelcacheEntry(rlocator);
833 160 : Buffer vmbuffer = InvalidBuffer;
834 :
835 160 : visibilitymap_pin(reln, newblk, &vmbuffer);
836 160 : visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
837 160 : ReleaseBuffer(vmbuffer);
838 160 : FreeFakeRelcacheEntry(reln);
839 : }
840 :
841 : /* Deal with new tuple */
842 96501 : if (newaction == BLK_NEEDS_REDO)
843 : {
844 : char *recdata;
845 : char *recdata_end;
846 : Size datalen;
847 : Size tuplen;
848 :
849 96002 : recdata = XLogRecGetBlockData(record, 0, &datalen);
850 96002 : recdata_end = recdata + datalen;
851 :
852 96002 : npage = BufferGetPage(nbuffer);
853 :
854 96002 : offnum = xlrec->new_offnum;
855 96002 : if (PageGetMaxOffsetNumber(npage) + 1 < offnum)
856 0 : elog(PANIC, "invalid max offset number");
857 :
858 96002 : if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
859 : {
860 : Assert(newblk == oldblk);
861 18143 : memcpy(&prefixlen, recdata, sizeof(uint16));
862 18143 : recdata += sizeof(uint16);
863 : }
864 96002 : if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
865 : {
866 : Assert(newblk == oldblk);
867 35753 : memcpy(&suffixlen, recdata, sizeof(uint16));
868 35753 : recdata += sizeof(uint16);
869 : }
870 :
871 96002 : memcpy(&xlhdr, recdata, SizeOfHeapHeader);
872 96002 : recdata += SizeOfHeapHeader;
873 :
874 96002 : tuplen = recdata_end - recdata;
875 : Assert(tuplen <= MaxHeapTupleSize);
876 :
877 96002 : htup = &tbuf.hdr;
878 96002 : MemSet(htup, 0, SizeofHeapTupleHeader);
879 :
880 : /*
881 : * Reconstruct the new tuple using the prefix and/or suffix from the
882 : * old tuple, and the data stored in the WAL record.
883 : */
884 96002 : newp = (char *) htup + SizeofHeapTupleHeader;
885 96002 : if (prefixlen > 0)
886 : {
887 : int len;
888 :
889 : /* copy bitmap [+ padding] [+ oid] from WAL record */
890 18143 : len = xlhdr.t_hoff - SizeofHeapTupleHeader;
891 18143 : memcpy(newp, recdata, len);
892 18143 : recdata += len;
893 18143 : newp += len;
894 :
895 : /* copy prefix from old tuple */
896 18143 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
897 18143 : newp += prefixlen;
898 :
899 : /* copy new tuple data from WAL record */
900 18143 : len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
901 18143 : memcpy(newp, recdata, len);
902 18143 : recdata += len;
903 18143 : newp += len;
904 : }
905 : else
906 : {
907 : /*
908 : * copy bitmap [+ padding] [+ oid] + data from record, all in one
909 : * go
910 : */
911 77859 : memcpy(newp, recdata, tuplen);
912 77859 : recdata += tuplen;
913 77859 : newp += tuplen;
914 : }
915 : Assert(recdata == recdata_end);
916 :
917 : /* copy suffix from old tuple */
918 96002 : if (suffixlen > 0)
919 35753 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
920 :
921 96002 : newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
922 96002 : htup->t_infomask2 = xlhdr.t_infomask2;
923 96002 : htup->t_infomask = xlhdr.t_infomask;
924 96002 : htup->t_hoff = xlhdr.t_hoff;
925 :
926 96002 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
927 96002 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
928 96002 : HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
929 : /* Make sure there is no forward chain link in t_ctid */
930 96002 : htup->t_ctid = newtid;
931 :
932 96002 : offnum = PageAddItem(npage, htup, newlen, offnum, true, true);
933 96002 : if (offnum == InvalidOffsetNumber)
934 0 : elog(PANIC, "failed to add tuple");
935 :
936 96002 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
937 80 : PageClearAllVisible(npage);
938 :
939 : /* needed to update FSM below */
940 96002 : freespace = PageGetHeapFreeSpace(npage);
941 :
942 96002 : PageSetLSN(npage, lsn);
943 : /* See heap_insert() for why we set pd_prune_xid on insert */
944 96002 : PageSetPrunable(npage, XLogRecGetXid(record));
945 96002 : MarkBufferDirty(nbuffer);
946 : }
947 :
948 96501 : if (BufferIsValid(nbuffer) && nbuffer != obuffer)
949 54481 : UnlockReleaseBuffer(nbuffer);
950 96501 : if (BufferIsValid(obuffer))
951 96501 : UnlockReleaseBuffer(obuffer);
952 :
953 : /*
954 : * If the new page is running low on free space, update the FSM as well.
955 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
956 : * better than that without knowing the fill-factor for the table.
957 : *
958 : * However, don't update the FSM on HOT updates, because after crash
959 : * recovery, either the old or the new tuple will certainly be dead and
960 : * prunable. After pruning, the page will have roughly as much free space
961 : * as it did before the update, assuming the new tuple is about the same
962 : * size as the old one.
963 : *
964 : * XXX: Don't do this if the page was restored from full page image. We
965 : * don't bother to update the FSM in that case, it doesn't need to be
966 : * totally accurate anyway.
967 : */
968 96501 : if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
969 11725 : XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
970 96501 : }
971 :
972 : /*
973 : * Replay XLOG_HEAP_CONFIRM records.
974 : */
975 : static void
976 93 : heap_xlog_confirm(XLogReaderState *record)
977 : {
978 93 : XLogRecPtr lsn = record->EndRecPtr;
979 93 : xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
980 : Buffer buffer;
981 : Page page;
982 : OffsetNumber offnum;
983 : ItemId lp;
984 : HeapTupleHeader htup;
985 :
986 93 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
987 : {
988 93 : page = BufferGetPage(buffer);
989 :
990 93 : offnum = xlrec->offnum;
991 93 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
992 0 : elog(PANIC, "offnum out of range");
993 93 : lp = PageGetItemId(page, offnum);
994 93 : if (!ItemIdIsNormal(lp))
995 0 : elog(PANIC, "invalid lp");
996 :
997 93 : htup = (HeapTupleHeader) PageGetItem(page, lp);
998 :
999 : /*
1000 : * Confirm tuple as actually inserted
1001 : */
1002 93 : ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
1003 :
1004 93 : PageSetLSN(page, lsn);
1005 93 : MarkBufferDirty(buffer);
1006 : }
1007 93 : if (BufferIsValid(buffer))
1008 93 : UnlockReleaseBuffer(buffer);
1009 93 : }
1010 :
1011 : /*
1012 : * Replay XLOG_HEAP_LOCK records.
1013 : */
1014 : static void
1015 55786 : heap_xlog_lock(XLogReaderState *record)
1016 : {
1017 55786 : XLogRecPtr lsn = record->EndRecPtr;
1018 55786 : xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
1019 : Buffer buffer;
1020 : Page page;
1021 : OffsetNumber offnum;
1022 : ItemId lp;
1023 : HeapTupleHeader htup;
1024 :
1025 : /*
1026 : * The visibility map may need to be fixed even if the heap page is
1027 : * already up-to-date.
1028 : */
1029 55786 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1030 : {
1031 : RelFileLocator rlocator;
1032 51 : Buffer vmbuffer = InvalidBuffer;
1033 : BlockNumber block;
1034 : Relation reln;
1035 :
1036 51 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1037 51 : reln = CreateFakeRelcacheEntry(rlocator);
1038 :
1039 51 : visibilitymap_pin(reln, block, &vmbuffer);
1040 51 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1041 :
1042 51 : ReleaseBuffer(vmbuffer);
1043 51 : FreeFakeRelcacheEntry(reln);
1044 : }
1045 :
1046 55786 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1047 : {
1048 55580 : page = BufferGetPage(buffer);
1049 :
1050 55580 : offnum = xlrec->offnum;
1051 55580 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1052 0 : elog(PANIC, "offnum out of range");
1053 55580 : lp = PageGetItemId(page, offnum);
1054 55580 : if (!ItemIdIsNormal(lp))
1055 0 : elog(PANIC, "invalid lp");
1056 :
1057 55580 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1058 :
1059 55580 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1060 55580 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1061 55580 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1062 : &htup->t_infomask2);
1063 :
1064 : /*
1065 : * Clear relevant update flags, but only if the modified infomask says
1066 : * there's no update.
1067 : */
1068 55580 : if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
1069 : {
1070 55580 : HeapTupleHeaderClearHotUpdated(htup);
1071 : /* Make sure there is no forward chain link in t_ctid */
1072 55580 : ItemPointerSet(&htup->t_ctid,
1073 : BufferGetBlockNumber(buffer),
1074 : offnum);
1075 : }
1076 55580 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1077 55580 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
1078 55580 : PageSetLSN(page, lsn);
1079 55580 : MarkBufferDirty(buffer);
1080 : }
1081 55786 : if (BufferIsValid(buffer))
1082 55786 : UnlockReleaseBuffer(buffer);
1083 55786 : }
1084 :
1085 : /*
1086 : * Replay XLOG_HEAP2_LOCK_UPDATED records.
1087 : */
1088 : static void
1089 0 : heap_xlog_lock_updated(XLogReaderState *record)
1090 : {
1091 0 : XLogRecPtr lsn = record->EndRecPtr;
1092 : xl_heap_lock_updated *xlrec;
1093 : Buffer buffer;
1094 : Page page;
1095 : OffsetNumber offnum;
1096 : ItemId lp;
1097 : HeapTupleHeader htup;
1098 :
1099 0 : xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
1100 :
1101 : /*
1102 : * The visibility map may need to be fixed even if the heap page is
1103 : * already up-to-date.
1104 : */
1105 0 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1106 : {
1107 : RelFileLocator rlocator;
1108 0 : Buffer vmbuffer = InvalidBuffer;
1109 : BlockNumber block;
1110 : Relation reln;
1111 :
1112 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1113 0 : reln = CreateFakeRelcacheEntry(rlocator);
1114 :
1115 0 : visibilitymap_pin(reln, block, &vmbuffer);
1116 0 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1117 :
1118 0 : ReleaseBuffer(vmbuffer);
1119 0 : FreeFakeRelcacheEntry(reln);
1120 : }
1121 :
1122 0 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1123 : {
1124 0 : page = BufferGetPage(buffer);
1125 :
1126 0 : offnum = xlrec->offnum;
1127 0 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1128 0 : elog(PANIC, "offnum out of range");
1129 0 : lp = PageGetItemId(page, offnum);
1130 0 : if (!ItemIdIsNormal(lp))
1131 0 : elog(PANIC, "invalid lp");
1132 :
1133 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1134 :
1135 0 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1136 0 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1137 0 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1138 : &htup->t_infomask2);
1139 0 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1140 :
1141 0 : PageSetLSN(page, lsn);
1142 0 : MarkBufferDirty(buffer);
1143 : }
1144 0 : if (BufferIsValid(buffer))
1145 0 : UnlockReleaseBuffer(buffer);
1146 0 : }
1147 :
1148 : /*
1149 : * Replay XLOG_HEAP_INPLACE records.
1150 : */
1151 : static void
1152 8333 : heap_xlog_inplace(XLogReaderState *record)
1153 : {
1154 8333 : XLogRecPtr lsn = record->EndRecPtr;
1155 8333 : xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
1156 : Buffer buffer;
1157 : Page page;
1158 : OffsetNumber offnum;
1159 : ItemId lp;
1160 : HeapTupleHeader htup;
1161 : uint32 oldlen;
1162 : Size newlen;
1163 :
1164 8333 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1165 : {
1166 8137 : char *newtup = XLogRecGetBlockData(record, 0, &newlen);
1167 :
1168 8137 : page = BufferGetPage(buffer);
1169 :
1170 8137 : offnum = xlrec->offnum;
1171 8137 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1172 0 : elog(PANIC, "offnum out of range");
1173 8137 : lp = PageGetItemId(page, offnum);
1174 8137 : if (!ItemIdIsNormal(lp))
1175 0 : elog(PANIC, "invalid lp");
1176 :
1177 8137 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1178 :
1179 8137 : oldlen = ItemIdGetLength(lp) - htup->t_hoff;
1180 8137 : if (oldlen != newlen)
1181 0 : elog(PANIC, "wrong tuple length");
1182 :
1183 8137 : memcpy((char *) htup + htup->t_hoff, newtup, newlen);
1184 :
1185 8137 : PageSetLSN(page, lsn);
1186 8137 : MarkBufferDirty(buffer);
1187 : }
1188 8333 : if (BufferIsValid(buffer))
1189 8333 : UnlockReleaseBuffer(buffer);
1190 :
1191 8333 : ProcessCommittedInvalidationMessages(xlrec->msgs,
1192 : xlrec->nmsgs,
1193 8333 : xlrec->relcacheInitFileInval,
1194 : xlrec->dbId,
1195 : xlrec->tsId);
1196 8333 : }
1197 :
1198 : void
1199 1804284 : heap_redo(XLogReaderState *record)
1200 : {
1201 1804284 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1202 :
1203 : /*
1204 : * These operations don't overwrite MVCC data so no conflict processing is
1205 : * required. The ones in heap2 rmgr do.
1206 : */
1207 :
1208 1804284 : switch (info & XLOG_HEAP_OPMASK)
1209 : {
1210 1324600 : case XLOG_HEAP_INSERT:
1211 1324600 : heap_xlog_insert(record);
1212 1324600 : break;
1213 318969 : case XLOG_HEAP_DELETE:
1214 318969 : heap_xlog_delete(record);
1215 318969 : break;
1216 57702 : case XLOG_HEAP_UPDATE:
1217 57702 : heap_xlog_update(record, false);
1218 57702 : break;
1219 2 : case XLOG_HEAP_TRUNCATE:
1220 :
1221 : /*
1222 : * TRUNCATE is a no-op because the actions are already logged as
1223 : * SMGR WAL records. TRUNCATE WAL record only exists for logical
1224 : * decoding.
1225 : */
1226 2 : break;
1227 38799 : case XLOG_HEAP_HOT_UPDATE:
1228 38799 : heap_xlog_update(record, true);
1229 38799 : break;
1230 93 : case XLOG_HEAP_CONFIRM:
1231 93 : heap_xlog_confirm(record);
1232 93 : break;
1233 55786 : case XLOG_HEAP_LOCK:
1234 55786 : heap_xlog_lock(record);
1235 55786 : break;
1236 8333 : case XLOG_HEAP_INPLACE:
1237 8333 : heap_xlog_inplace(record);
1238 8333 : break;
1239 0 : default:
1240 0 : elog(PANIC, "heap_redo: unknown op code %u", info);
1241 : }
1242 1804284 : }
1243 :
1244 : void
1245 88593 : heap2_redo(XLogReaderState *record)
1246 : {
1247 88593 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1248 :
1249 88593 : switch (info & XLOG_HEAP_OPMASK)
1250 : {
1251 19977 : case XLOG_HEAP2_PRUNE_ON_ACCESS:
1252 : case XLOG_HEAP2_PRUNE_VACUUM_SCAN:
1253 : case XLOG_HEAP2_PRUNE_VACUUM_CLEANUP:
1254 19977 : heap_xlog_prune_freeze(record);
1255 19977 : break;
1256 67568 : case XLOG_HEAP2_MULTI_INSERT:
1257 67568 : heap_xlog_multi_insert(record);
1258 67568 : break;
1259 0 : case XLOG_HEAP2_LOCK_UPDATED:
1260 0 : heap_xlog_lock_updated(record);
1261 0 : break;
1262 1048 : case XLOG_HEAP2_NEW_CID:
1263 :
1264 : /*
1265 : * Nothing to do on a real replay, only used during logical
1266 : * decoding.
1267 : */
1268 1048 : break;
1269 0 : case XLOG_HEAP2_REWRITE:
1270 0 : heap_xlog_logical_rewrite(record);
1271 0 : break;
1272 0 : default:
1273 0 : elog(PANIC, "heap2_redo: unknown op code %u", info);
1274 : }
1275 88593 : }
1276 :
1277 : /*
1278 : * Mask a heap page before performing consistency checks on it.
1279 : */
1280 : void
1281 3012656 : heap_mask(char *pagedata, BlockNumber blkno)
1282 : {
1283 3012656 : Page page = (Page) pagedata;
1284 : OffsetNumber off;
1285 :
1286 3012656 : mask_page_lsn_and_checksum(page);
1287 :
1288 3012656 : mask_page_hint_bits(page);
1289 3012656 : mask_unused_space(page);
1290 :
1291 250298530 : for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
1292 : {
1293 247285874 : ItemId iid = PageGetItemId(page, off);
1294 : char *page_item;
1295 :
1296 247285874 : page_item = (char *) (page + ItemIdGetOffset(iid));
1297 :
1298 247285874 : if (ItemIdIsNormal(iid))
1299 : {
1300 228422020 : HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
1301 :
1302 : /*
1303 : * If xmin of a tuple is not yet frozen, we should ignore
1304 : * differences in hint bits, since they can be set without
1305 : * emitting WAL.
1306 : */
1307 228422020 : if (!HeapTupleHeaderXminFrozen(page_htup))
1308 225666168 : page_htup->t_infomask &= ~HEAP_XACT_MASK;
1309 : else
1310 : {
1311 : /* Still we need to mask xmax hint bits. */
1312 2755852 : page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
1313 2755852 : page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
1314 : }
1315 :
1316 : /*
1317 : * During replay, we set Command Id to FirstCommandId. Hence, mask
1318 : * it. See heap_xlog_insert() for details.
1319 : */
1320 228422020 : page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
1321 :
1322 : /*
1323 : * For a speculative tuple, heap_insert() does not set ctid in the
1324 : * caller-passed heap tuple itself, leaving the ctid field to
1325 : * contain a speculative token value - a per-backend monotonically
1326 : * increasing identifier. Besides, it does not WAL-log ctid under
1327 : * any circumstances.
1328 : *
1329 : * During redo, heap_xlog_insert() sets t_ctid to current block
1330 : * number and self offset number. It doesn't care about any
1331 : * speculative insertions on the primary. Hence, we set t_ctid to
1332 : * current block number and self offset number to ignore any
1333 : * inconsistency.
1334 : */
1335 228422020 : if (HeapTupleHeaderIsSpeculative(page_htup))
1336 94 : ItemPointerSet(&page_htup->t_ctid, blkno, off);
1337 :
1338 : /*
1339 : * NB: Not ignoring ctid changes due to the tuple having moved
1340 : * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
1341 : * important information that needs to be in-sync between primary
1342 : * and standby, and thus is WAL logged.
1343 : */
1344 : }
1345 :
1346 : /*
1347 : * Ignore any padding bytes after the tuple, when the length of the
1348 : * item is not MAXALIGNed.
1349 : */
1350 247285874 : if (ItemIdHasStorage(iid))
1351 : {
1352 228422020 : int len = ItemIdGetLength(iid);
1353 228422020 : int padlen = MAXALIGN(len) - len;
1354 :
1355 228422020 : if (padlen > 0)
1356 123172256 : memset(page_item + len, MASK_MARKER, padlen);
1357 : }
1358 : }
1359 3012656 : }
|