Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * heapam_xlog.c
4 : * WAL replay logic for heap access method.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/heap/heapam_xlog.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/bufmask.h"
18 : #include "access/heapam.h"
19 : #include "access/visibilitymap.h"
20 : #include "access/xlog.h"
21 : #include "access/xlogutils.h"
22 : #include "storage/freespace.h"
23 : #include "storage/standby.h"
24 :
25 :
26 : /*
27 : * Replay XLOG_HEAP2_PRUNE_* records.
28 : */
29 : static void
30 20184 : heap_xlog_prune_freeze(XLogReaderState *record)
31 : {
32 20184 : XLogRecPtr lsn = record->EndRecPtr;
33 20184 : char *maindataptr = XLogRecGetData(record);
34 : xl_heap_prune xlrec;
35 : Buffer buffer;
36 : RelFileLocator rlocator;
37 : BlockNumber blkno;
38 20184 : Buffer vmbuffer = InvalidBuffer;
39 20184 : uint8 vmflags = 0;
40 20184 : Size freespace = 0;
41 20184 : bool do_update_fsm = false;
42 :
43 20184 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
44 20184 : memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
45 20184 : maindataptr += SizeOfHeapPrune;
46 :
47 : /*
48 : * We will take an ordinary exclusive lock or a cleanup lock depending on
49 : * whether the XLHP_CLEANUP_LOCK flag is set. With an ordinary exclusive
50 : * lock, we better not be doing anything that requires moving existing
51 : * tuple data.
52 : */
53 : Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
54 : (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
55 :
56 20184 : if (xlrec.flags & XLHP_VM_ALL_VISIBLE)
57 : {
58 10129 : vmflags = VISIBILITYMAP_ALL_VISIBLE;
59 10129 : if (xlrec.flags & XLHP_VM_ALL_FROZEN)
60 6086 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
61 : }
62 :
63 : /*
64 : * After xl_heap_prune is the optional snapshot conflict horizon.
65 : *
66 : * In Hot Standby mode, we must ensure that there are no running queries
67 : * which would conflict with the changes in this record. That means we
68 : * can't replay this record if it removes tuples that are still visible to
69 : * transactions on the standby, freeze tuples with xids that are still
70 : * considered running on the standby, or set a page as all-visible in the
71 : * VM if it isn't all-visible to all transactions on the standby.
72 : */
73 20184 : if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
74 : {
75 : TransactionId snapshot_conflict_horizon;
76 :
77 : /* memcpy() because snapshot_conflict_horizon is stored unaligned */
78 16145 : memcpy(&snapshot_conflict_horizon, maindataptr, sizeof(TransactionId));
79 16145 : maindataptr += sizeof(TransactionId);
80 :
81 16145 : if (InHotStandby)
82 15695 : ResolveRecoveryConflictWithSnapshot(snapshot_conflict_horizon,
83 15695 : (xlrec.flags & XLHP_IS_CATALOG_REL) != 0,
84 : rlocator);
85 : }
86 :
87 : /*
88 : * If we have a full-page image of the heap block, restore it and we're
89 : * done with the heap block.
90 : */
91 20184 : if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
92 20184 : (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
93 : &buffer) == BLK_NEEDS_REDO)
94 : {
95 14404 : Page page = BufferGetPage(buffer);
96 : OffsetNumber *redirected;
97 : OffsetNumber *nowdead;
98 : OffsetNumber *nowunused;
99 : int nredirected;
100 : int ndead;
101 : int nunused;
102 : int nplans;
103 : Size datalen;
104 : xlhp_freeze_plan *plans;
105 : OffsetNumber *frz_offsets;
106 14404 : char *dataptr = XLogRecGetBlockData(record, 0, &datalen);
107 : bool do_prune;
108 :
109 14404 : heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
110 : &nplans, &plans, &frz_offsets,
111 : &nredirected, &redirected,
112 : &ndead, &nowdead,
113 : &nunused, &nowunused);
114 :
115 14404 : do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
116 :
117 : /* Ensure the record does something */
118 : Assert(do_prune || nplans > 0 || vmflags & VISIBILITYMAP_VALID_BITS);
119 :
120 : /*
121 : * Update all line pointers per the record, and repair fragmentation
122 : * if needed.
123 : */
124 14404 : if (do_prune)
125 11023 : heap_page_prune_execute(buffer,
126 11023 : (xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
127 : redirected, nredirected,
128 : nowdead, ndead,
129 : nowunused, nunused);
130 :
131 : /* Freeze tuples */
132 16273 : for (int p = 0; p < nplans; p++)
133 : {
134 : HeapTupleFreeze frz;
135 :
136 : /*
137 : * Convert freeze plan representation from WAL record into
138 : * per-tuple format used by heap_execute_freeze_tuple
139 : */
140 1869 : frz.xmax = plans[p].xmax;
141 1869 : frz.t_infomask2 = plans[p].t_infomask2;
142 1869 : frz.t_infomask = plans[p].t_infomask;
143 1869 : frz.frzflags = plans[p].frzflags;
144 1869 : frz.offset = InvalidOffsetNumber; /* unused, but be tidy */
145 :
146 109155 : for (int i = 0; i < plans[p].ntuples; i++)
147 : {
148 107286 : OffsetNumber offset = *(frz_offsets++);
149 : ItemId lp;
150 : HeapTupleHeader tuple;
151 :
152 107286 : lp = PageGetItemId(page, offset);
153 107286 : tuple = (HeapTupleHeader) PageGetItem(page, lp);
154 107286 : heap_execute_freeze_tuple(tuple, &frz);
155 : }
156 : }
157 :
158 : /* There should be no more data */
159 : Assert((char *) frz_offsets == dataptr + datalen);
160 :
161 : /*
162 : * The critical integrity requirement here is that we must never end
163 : * up with the visibility map bit set and the page-level
164 : * PD_ALL_VISIBLE bit unset. If that were to occur, a subsequent page
165 : * modification would fail to clear the visibility map bit.
166 : */
167 14404 : if (vmflags & VISIBILITYMAP_VALID_BITS)
168 : {
169 6275 : PageSetAllVisible(page);
170 6275 : PageClearPrunable(page);
171 : }
172 :
173 14404 : MarkBufferDirty(buffer);
174 :
175 : /*
176 : * See log_heap_prune_and_freeze() for commentary on when we set the
177 : * heap page LSN.
178 : */
179 14404 : if (do_prune || nplans > 0 ||
180 1982 : ((vmflags & VISIBILITYMAP_VALID_BITS) && XLogHintBitIsNeeded()))
181 14404 : PageSetLSN(page, lsn);
182 :
183 : /*
184 : * Note: we don't worry about updating the page's prunability hints.
185 : * At worst this will cause an extra prune cycle to occur soon.
186 : */
187 : }
188 :
189 : /*
190 : * If we 1) released any space or line pointers or 2) set PD_ALL_VISIBLE
191 : * or the VM, update the freespace map.
192 : *
193 : * Even when no actual space is freed (when only marking the page
194 : * all-visible or frozen), we still update the FSM. Because the FSM is
195 : * unlogged and maintained heuristically, it often becomes stale on
196 : * standbys. If such a standby is later promoted and runs VACUUM, it will
197 : * skip recalculating free space for pages that were marked
198 : * all-visible/all-frozen. FreeSpaceMapVacuum() can then propagate overly
199 : * optimistic free space values upward, causing future insertions to
200 : * select pages that turn out to be unusable. In bulk, this can lead to
201 : * long stalls.
202 : *
203 : * To prevent this, always update the FSM even when only marking a page
204 : * all-visible/all-frozen.
205 : *
206 : * Do this regardless of whether a full-page image is logged, since FSM
207 : * data is not part of the page itself.
208 : */
209 20184 : if (BufferIsValid(buffer))
210 : {
211 20184 : if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
212 : XLHP_HAS_DEAD_ITEMS |
213 6112 : XLHP_HAS_NOW_UNUSED_ITEMS)) ||
214 6112 : (vmflags & VISIBILITYMAP_VALID_BITS))
215 : {
216 20167 : freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
217 20167 : do_update_fsm = true;
218 : }
219 :
220 : /*
221 : * We want to avoid holding an exclusive lock on the heap buffer while
222 : * doing IO (either of the FSM or the VM), so we'll release it now.
223 : */
224 20184 : UnlockReleaseBuffer(buffer);
225 : }
226 :
227 : /*
228 : * Now read and update the VM block.
229 : *
230 : * We must redo changes to the VM even if the heap page was skipped due to
231 : * LSN interlock. See comment in heap_xlog_multi_insert() for more details
232 : * on replaying changes to the VM.
233 : */
234 30313 : if ((vmflags & VISIBILITYMAP_VALID_BITS) &&
235 10129 : XLogReadBufferForRedoExtended(record, 1,
236 : RBM_ZERO_ON_ERROR,
237 : false,
238 : &vmbuffer) == BLK_NEEDS_REDO)
239 : {
240 9758 : Page vmpage = BufferGetPage(vmbuffer);
241 :
242 : /* initialize the page if it was read as zeros */
243 9758 : if (PageIsNew(vmpage))
244 2 : PageInit(vmpage, BLCKSZ, 0);
245 :
246 9758 : visibilitymap_set(blkno, vmbuffer, vmflags, rlocator);
247 :
248 : Assert(BufferIsDirty(vmbuffer));
249 9758 : PageSetLSN(vmpage, lsn);
250 : }
251 :
252 20184 : if (BufferIsValid(vmbuffer))
253 10129 : UnlockReleaseBuffer(vmbuffer);
254 :
255 20184 : if (do_update_fsm)
256 20167 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
257 20184 : }
258 :
259 : /*
260 : * Given an "infobits" field from an XLog record, set the correct bits in the
261 : * given infomask and infomask2 for the tuple touched by the record.
262 : *
263 : * (This is the reverse of compute_infobits).
264 : */
265 : static void
266 468011 : fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
267 : {
268 468011 : *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
269 : HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
270 468011 : *infomask2 &= ~HEAP_KEYS_UPDATED;
271 :
272 468011 : if (infobits & XLHL_XMAX_IS_MULTI)
273 3 : *infomask |= HEAP_XMAX_IS_MULTI;
274 468011 : if (infobits & XLHL_XMAX_LOCK_ONLY)
275 55698 : *infomask |= HEAP_XMAX_LOCK_ONLY;
276 468011 : if (infobits & XLHL_XMAX_EXCL_LOCK)
277 54931 : *infomask |= HEAP_XMAX_EXCL_LOCK;
278 : /* note HEAP_XMAX_SHR_LOCK isn't considered here */
279 468011 : if (infobits & XLHL_XMAX_KEYSHR_LOCK)
280 781 : *infomask |= HEAP_XMAX_KEYSHR_LOCK;
281 :
282 468011 : if (infobits & XLHL_KEYS_UPDATED)
283 317607 : *infomask2 |= HEAP_KEYS_UPDATED;
284 468011 : }
285 :
286 : /*
287 : * Replay XLOG_HEAP_DELETE records.
288 : */
289 : static void
290 318331 : heap_xlog_delete(XLogReaderState *record)
291 : {
292 318331 : XLogRecPtr lsn = record->EndRecPtr;
293 318331 : xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
294 : Buffer buffer;
295 : Page page;
296 : ItemId lp;
297 : HeapTupleHeader htup;
298 : BlockNumber blkno;
299 : RelFileLocator target_locator;
300 : ItemPointerData target_tid;
301 :
302 318331 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
303 318331 : ItemPointerSetBlockNumber(&target_tid, blkno);
304 318331 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
305 :
306 : /*
307 : * The visibility map may need to be fixed even if the heap page is
308 : * already up-to-date.
309 : */
310 318331 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
311 : {
312 214 : Relation reln = CreateFakeRelcacheEntry(target_locator);
313 214 : Buffer vmbuffer = InvalidBuffer;
314 :
315 214 : visibilitymap_pin(reln, blkno, &vmbuffer);
316 214 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
317 214 : ReleaseBuffer(vmbuffer);
318 214 : FreeFakeRelcacheEntry(reln);
319 : }
320 :
321 318331 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
322 : {
323 316150 : page = BufferGetPage(buffer);
324 :
325 316150 : if (xlrec->offnum < 1 || xlrec->offnum > PageGetMaxOffsetNumber(page))
326 0 : elog(PANIC, "offnum out of range");
327 316150 : lp = PageGetItemId(page, xlrec->offnum);
328 316150 : if (!ItemIdIsNormal(lp))
329 0 : elog(PANIC, "invalid lp");
330 :
331 316150 : htup = (HeapTupleHeader) PageGetItem(page, lp);
332 :
333 316150 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
334 316150 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
335 316150 : HeapTupleHeaderClearHotUpdated(htup);
336 316150 : fix_infomask_from_infobits(xlrec->infobits_set,
337 : &htup->t_infomask, &htup->t_infomask2);
338 316150 : if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
339 316150 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
340 : else
341 0 : HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
342 316150 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
343 :
344 : /* Mark the page as a candidate for pruning */
345 316150 : PageSetPrunable(page, XLogRecGetXid(record));
346 :
347 316150 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
348 34 : PageClearAllVisible(page);
349 :
350 : /* Make sure t_ctid is set correctly */
351 316150 : if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
352 151 : HeapTupleHeaderSetMovedPartitions(htup);
353 : else
354 315999 : htup->t_ctid = target_tid;
355 316150 : PageSetLSN(page, lsn);
356 316150 : MarkBufferDirty(buffer);
357 : }
358 318331 : if (BufferIsValid(buffer))
359 318331 : UnlockReleaseBuffer(buffer);
360 318331 : }
361 :
362 : /*
363 : * Replay XLOG_HEAP_INSERT records.
364 : */
365 : static void
366 1324429 : heap_xlog_insert(XLogReaderState *record)
367 : {
368 1324429 : XLogRecPtr lsn = record->EndRecPtr;
369 1324429 : xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
370 : Buffer buffer;
371 : Page page;
372 : union
373 : {
374 : HeapTupleHeaderData hdr;
375 : char data[MaxHeapTupleSize];
376 : } tbuf;
377 : HeapTupleHeader htup;
378 : xl_heap_header xlhdr;
379 : uint32 newlen;
380 1324429 : Size freespace = 0;
381 : RelFileLocator target_locator;
382 : BlockNumber blkno;
383 : ItemPointerData target_tid;
384 : XLogRedoAction action;
385 :
386 1324429 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
387 1324429 : ItemPointerSetBlockNumber(&target_tid, blkno);
388 1324429 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
389 :
390 : /* No freezing in the heap_insert() code path */
391 : Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
392 :
393 : /*
394 : * The visibility map may need to be fixed even if the heap page is
395 : * already up-to-date.
396 : */
397 1324429 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
398 : {
399 882 : Relation reln = CreateFakeRelcacheEntry(target_locator);
400 882 : Buffer vmbuffer = InvalidBuffer;
401 :
402 882 : visibilitymap_pin(reln, blkno, &vmbuffer);
403 882 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
404 882 : ReleaseBuffer(vmbuffer);
405 882 : FreeFakeRelcacheEntry(reln);
406 : }
407 :
408 : /*
409 : * If we inserted the first and only tuple on the page, re-initialize the
410 : * page from scratch.
411 : */
412 1324429 : if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
413 : {
414 17739 : buffer = XLogInitBufferForRedo(record, 0);
415 17739 : page = BufferGetPage(buffer);
416 17739 : PageInit(page, BufferGetPageSize(buffer), 0);
417 17739 : action = BLK_NEEDS_REDO;
418 : }
419 : else
420 1306690 : action = XLogReadBufferForRedo(record, 0, &buffer);
421 1324429 : if (action == BLK_NEEDS_REDO)
422 : {
423 : Size datalen;
424 : char *data;
425 :
426 1321403 : page = BufferGetPage(buffer);
427 :
428 1321403 : if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
429 0 : elog(PANIC, "invalid max offset number");
430 :
431 1321403 : data = XLogRecGetBlockData(record, 0, &datalen);
432 :
433 1321403 : newlen = datalen - SizeOfHeapHeader;
434 : Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
435 1321403 : memcpy(&xlhdr, data, SizeOfHeapHeader);
436 1321403 : data += SizeOfHeapHeader;
437 :
438 1321403 : htup = &tbuf.hdr;
439 1321403 : MemSet(htup, 0, SizeofHeapTupleHeader);
440 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
441 1321403 : memcpy((char *) htup + SizeofHeapTupleHeader,
442 : data,
443 : newlen);
444 1321403 : newlen += SizeofHeapTupleHeader;
445 1321403 : htup->t_infomask2 = xlhdr.t_infomask2;
446 1321403 : htup->t_infomask = xlhdr.t_infomask;
447 1321403 : htup->t_hoff = xlhdr.t_hoff;
448 1321403 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
449 1321403 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
450 1321403 : htup->t_ctid = target_tid;
451 :
452 1321403 : if (PageAddItem(page, htup, newlen, xlrec->offnum, true, true) == InvalidOffsetNumber)
453 0 : elog(PANIC, "failed to add tuple");
454 :
455 1321403 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
456 :
457 : /*
458 : * Set the page prunable to trigger on-access pruning later, which may
459 : * set the page all-visible in the VM. See comments in heap_insert().
460 : */
461 1321403 : if (TransactionIdIsNormal(XLogRecGetXid(record)) &&
462 1321403 : !HeapTupleHeaderXminFrozen(htup))
463 1320873 : PageSetPrunable(page, XLogRecGetXid(record));
464 :
465 1321403 : PageSetLSN(page, lsn);
466 :
467 1321403 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
468 289 : PageClearAllVisible(page);
469 :
470 1321403 : MarkBufferDirty(buffer);
471 : }
472 1324429 : if (BufferIsValid(buffer))
473 1324429 : UnlockReleaseBuffer(buffer);
474 :
475 : /*
476 : * If the page is running low on free space, update the FSM as well.
477 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
478 : * better than that without knowing the fill-factor for the table.
479 : *
480 : * XXX: Don't do this if the page was restored from full page image. We
481 : * don't bother to update the FSM in that case, it doesn't need to be
482 : * totally accurate anyway.
483 : */
484 1324429 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
485 259862 : XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
486 1324429 : }
487 :
488 : /*
489 : * Replay XLOG_HEAP2_MULTI_INSERT records.
490 : */
491 : static void
492 67379 : heap_xlog_multi_insert(XLogReaderState *record)
493 : {
494 67379 : XLogRecPtr lsn = record->EndRecPtr;
495 : xl_heap_multi_insert *xlrec;
496 : RelFileLocator rlocator;
497 : BlockNumber blkno;
498 : Buffer buffer;
499 : Page page;
500 : union
501 : {
502 : HeapTupleHeaderData hdr;
503 : char data[MaxHeapTupleSize];
504 : } tbuf;
505 : HeapTupleHeader htup;
506 : uint32 newlen;
507 67379 : Size freespace = 0;
508 : int i;
509 67379 : bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
510 : XLogRedoAction action;
511 67379 : Buffer vmbuffer = InvalidBuffer;
512 :
513 : /*
514 : * Insertion doesn't overwrite MVCC data, so no conflict processing is
515 : * required.
516 : */
517 67379 : xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
518 :
519 67379 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
520 :
521 : /* check that the mutually exclusive flags are not both set */
522 : Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
523 : (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
524 :
525 : /*
526 : * The visibility map may need to be fixed even if the heap page is
527 : * already up-to-date.
528 : */
529 67379 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
530 : {
531 901 : Relation reln = CreateFakeRelcacheEntry(rlocator);
532 :
533 901 : visibilitymap_pin(reln, blkno, &vmbuffer);
534 901 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
535 901 : ReleaseBuffer(vmbuffer);
536 901 : vmbuffer = InvalidBuffer;
537 901 : FreeFakeRelcacheEntry(reln);
538 : }
539 :
540 67379 : if (isinit)
541 : {
542 1918 : buffer = XLogInitBufferForRedo(record, 0);
543 1918 : page = BufferGetPage(buffer);
544 1918 : PageInit(page, BufferGetPageSize(buffer), 0);
545 1918 : action = BLK_NEEDS_REDO;
546 : }
547 : else
548 65461 : action = XLogReadBufferForRedo(record, 0, &buffer);
549 67379 : if (action == BLK_NEEDS_REDO)
550 : {
551 : char *tupdata;
552 : char *endptr;
553 : Size len;
554 :
555 : /* Tuples are stored as block data */
556 65694 : tupdata = XLogRecGetBlockData(record, 0, &len);
557 65694 : endptr = tupdata + len;
558 :
559 65694 : page = BufferGetPage(buffer);
560 :
561 288528 : for (i = 0; i < xlrec->ntuples; i++)
562 : {
563 : OffsetNumber offnum;
564 : xl_multi_insert_tuple *xlhdr;
565 :
566 : /*
567 : * If we're reinitializing the page, the tuples are stored in
568 : * order from FirstOffsetNumber. Otherwise there's an array of
569 : * offsets in the WAL record, and the tuples come after that.
570 : */
571 222834 : if (isinit)
572 99729 : offnum = FirstOffsetNumber + i;
573 : else
574 123105 : offnum = xlrec->offsets[i];
575 222834 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
576 0 : elog(PANIC, "invalid max offset number");
577 :
578 222834 : xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
579 222834 : tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
580 :
581 222834 : newlen = xlhdr->datalen;
582 : Assert(newlen <= MaxHeapTupleSize);
583 222834 : htup = &tbuf.hdr;
584 222834 : MemSet(htup, 0, SizeofHeapTupleHeader);
585 : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
586 222834 : memcpy((char *) htup + SizeofHeapTupleHeader,
587 : tupdata,
588 : newlen);
589 222834 : tupdata += newlen;
590 :
591 222834 : newlen += SizeofHeapTupleHeader;
592 222834 : htup->t_infomask2 = xlhdr->t_infomask2;
593 222834 : htup->t_infomask = xlhdr->t_infomask;
594 222834 : htup->t_hoff = xlhdr->t_hoff;
595 222834 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
596 222834 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
597 222834 : ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
598 222834 : ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
599 :
600 222834 : offnum = PageAddItem(page, htup, newlen, offnum, true, true);
601 222834 : if (offnum == InvalidOffsetNumber)
602 0 : elog(PANIC, "failed to add tuple");
603 : }
604 65694 : if (tupdata != endptr)
605 0 : elog(PANIC, "total tuple length mismatch");
606 :
607 65694 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
608 :
609 65694 : PageSetLSN(page, lsn);
610 :
611 65694 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
612 81 : PageClearAllVisible(page);
613 :
614 : /*
615 : * XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible. If
616 : * we are not setting the page frozen, then set the page's prunable
617 : * hint so that we trigger on-access pruning later which may set the
618 : * page all-visible in the VM.
619 : */
620 65694 : if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
621 : {
622 4 : PageSetAllVisible(page);
623 4 : PageClearPrunable(page);
624 : }
625 : else
626 65690 : PageSetPrunable(page, XLogRecGetXid(record));
627 :
628 65694 : MarkBufferDirty(buffer);
629 : }
630 67379 : if (BufferIsValid(buffer))
631 67379 : UnlockReleaseBuffer(buffer);
632 :
633 67379 : buffer = InvalidBuffer;
634 :
635 : /*
636 : * Read and update the visibility map (VM) block.
637 : *
638 : * We must always redo VM changes, even if the corresponding heap page
639 : * update was skipped due to the LSN interlock. Each VM block covers
640 : * multiple heap pages, so later WAL records may update other bits in the
641 : * same block. If this record includes an FPI (full-page image),
642 : * subsequent WAL records may depend on it to guard against torn pages.
643 : *
644 : * Heap page changes are replayed first to preserve the invariant:
645 : * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
646 : *
647 : * Note that we released the heap page lock above. During normal
648 : * operation, this would be unsafe — a concurrent modification could
649 : * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
650 : * invariant.
651 : *
652 : * During recovery, however, no concurrent writers exist. Therefore,
653 : * updating the VM without holding the heap page lock is safe enough. This
654 : * same approach is taken when replaying XLOG_HEAP2_PRUNE* records (see
655 : * heap_xlog_prune_freeze()).
656 : */
657 67383 : if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
658 4 : XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
659 : &vmbuffer) == BLK_NEEDS_REDO)
660 : {
661 0 : Page vmpage = BufferGetPage(vmbuffer);
662 :
663 : /* initialize the page if it was read as zeros */
664 0 : if (PageIsNew(vmpage))
665 0 : PageInit(vmpage, BLCKSZ, 0);
666 :
667 0 : visibilitymap_set(blkno,
668 : vmbuffer,
669 : VISIBILITYMAP_ALL_VISIBLE |
670 : VISIBILITYMAP_ALL_FROZEN,
671 : rlocator);
672 :
673 : Assert(BufferIsDirty(vmbuffer));
674 0 : PageSetLSN(vmpage, lsn);
675 : }
676 :
677 67379 : if (BufferIsValid(vmbuffer))
678 4 : UnlockReleaseBuffer(vmbuffer);
679 :
680 : /*
681 : * If the page is running low on free space, update the FSM as well.
682 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
683 : * better than that without knowing the fill-factor for the table.
684 : *
685 : * XXX: Don't do this if the page was restored from full page image. We
686 : * don't bother to update the FSM in that case, it doesn't need to be
687 : * totally accurate anyway.
688 : */
689 67379 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
690 17484 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
691 67379 : }
692 :
693 : /*
694 : * Replay XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE records.
695 : */
696 : static void
697 96421 : heap_xlog_update(XLogReaderState *record, bool hot_update)
698 : {
699 96421 : XLogRecPtr lsn = record->EndRecPtr;
700 96421 : xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
701 : RelFileLocator rlocator;
702 : BlockNumber oldblk;
703 : BlockNumber newblk;
704 : ItemPointerData newtid;
705 : Buffer obuffer,
706 : nbuffer;
707 : Page opage,
708 : npage;
709 : OffsetNumber offnum;
710 : ItemId lp;
711 : HeapTupleData oldtup;
712 : HeapTupleHeader htup;
713 96421 : uint16 prefixlen = 0,
714 96421 : suffixlen = 0;
715 : char *newp;
716 : union
717 : {
718 : HeapTupleHeaderData hdr;
719 : char data[MaxHeapTupleSize];
720 : } tbuf;
721 : xl_heap_header xlhdr;
722 : uint32 newlen;
723 96421 : Size freespace = 0;
724 : XLogRedoAction oldaction;
725 : XLogRedoAction newaction;
726 :
727 : /* initialize to keep the compiler quiet */
728 96421 : oldtup.t_data = NULL;
729 96421 : oldtup.t_len = 0;
730 :
731 96421 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
732 96421 : if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
733 : {
734 : /* HOT updates are never done across pages */
735 : Assert(!hot_update);
736 : }
737 : else
738 41822 : oldblk = newblk;
739 :
740 96421 : ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
741 :
742 : /*
743 : * The visibility map may need to be fixed even if the heap page is
744 : * already up-to-date.
745 : */
746 96421 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
747 : {
748 387 : Relation reln = CreateFakeRelcacheEntry(rlocator);
749 387 : Buffer vmbuffer = InvalidBuffer;
750 :
751 387 : visibilitymap_pin(reln, oldblk, &vmbuffer);
752 387 : visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
753 387 : ReleaseBuffer(vmbuffer);
754 387 : FreeFakeRelcacheEntry(reln);
755 : }
756 :
757 : /*
758 : * In normal operation, it is important to lock the two pages in
759 : * page-number order, to avoid possible deadlocks against other update
760 : * operations going the other way. However, during WAL replay there can
761 : * be no other update happening, so we don't need to worry about that. But
762 : * we *do* need to worry that we don't expose an inconsistent state to Hot
763 : * Standby queries --- so the original page can't be unlocked before we've
764 : * added the new tuple to the new page.
765 : */
766 :
767 : /* Deal with old tuple version */
768 96421 : oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
769 : &obuffer);
770 96421 : if (oldaction == BLK_NEEDS_REDO)
771 : {
772 96163 : opage = BufferGetPage(obuffer);
773 96163 : offnum = xlrec->old_offnum;
774 96163 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(opage))
775 0 : elog(PANIC, "offnum out of range");
776 96163 : lp = PageGetItemId(opage, offnum);
777 96163 : if (!ItemIdIsNormal(lp))
778 0 : elog(PANIC, "invalid lp");
779 :
780 96163 : htup = (HeapTupleHeader) PageGetItem(opage, lp);
781 :
782 96163 : oldtup.t_data = htup;
783 96163 : oldtup.t_len = ItemIdGetLength(lp);
784 :
785 96163 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
786 96163 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
787 96163 : if (hot_update)
788 38418 : HeapTupleHeaderSetHotUpdated(htup);
789 : else
790 57745 : HeapTupleHeaderClearHotUpdated(htup);
791 96163 : fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
792 : &htup->t_infomask2);
793 96163 : HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
794 96163 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
795 : /* Set forward chain link in t_ctid */
796 96163 : htup->t_ctid = newtid;
797 :
798 : /* Mark the page as a candidate for pruning */
799 96163 : PageSetPrunable(opage, XLogRecGetXid(record));
800 :
801 96163 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
802 378 : PageClearAllVisible(opage);
803 :
804 96163 : PageSetLSN(opage, lsn);
805 96163 : MarkBufferDirty(obuffer);
806 : }
807 :
808 : /*
809 : * Read the page the new tuple goes into, if different from old.
810 : */
811 96421 : if (oldblk == newblk)
812 : {
813 41822 : nbuffer = obuffer;
814 41822 : newaction = oldaction;
815 : }
816 54599 : else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
817 : {
818 656 : nbuffer = XLogInitBufferForRedo(record, 0);
819 656 : npage = BufferGetPage(nbuffer);
820 656 : PageInit(npage, BufferGetPageSize(nbuffer), 0);
821 656 : newaction = BLK_NEEDS_REDO;
822 : }
823 : else
824 53943 : newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
825 :
826 : /*
827 : * The visibility map may need to be fixed even if the heap page is
828 : * already up-to-date.
829 : */
830 96421 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
831 : {
832 162 : Relation reln = CreateFakeRelcacheEntry(rlocator);
833 162 : Buffer vmbuffer = InvalidBuffer;
834 :
835 162 : visibilitymap_pin(reln, newblk, &vmbuffer);
836 162 : visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
837 162 : ReleaseBuffer(vmbuffer);
838 162 : FreeFakeRelcacheEntry(reln);
839 : }
840 :
841 : /* Deal with new tuple */
842 96421 : if (newaction == BLK_NEEDS_REDO)
843 : {
844 : char *recdata;
845 : char *recdata_end;
846 : Size datalen;
847 : Size tuplen;
848 :
849 95925 : recdata = XLogRecGetBlockData(record, 0, &datalen);
850 95925 : recdata_end = recdata + datalen;
851 :
852 95925 : npage = BufferGetPage(nbuffer);
853 :
854 95925 : offnum = xlrec->new_offnum;
855 95925 : if (PageGetMaxOffsetNumber(npage) + 1 < offnum)
856 0 : elog(PANIC, "invalid max offset number");
857 :
858 95925 : if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
859 : {
860 : Assert(newblk == oldblk);
861 17980 : memcpy(&prefixlen, recdata, sizeof(uint16));
862 17980 : recdata += sizeof(uint16);
863 : }
864 95925 : if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
865 : {
866 : Assert(newblk == oldblk);
867 35610 : memcpy(&suffixlen, recdata, sizeof(uint16));
868 35610 : recdata += sizeof(uint16);
869 : }
870 :
871 95925 : memcpy(&xlhdr, recdata, SizeOfHeapHeader);
872 95925 : recdata += SizeOfHeapHeader;
873 :
874 95925 : tuplen = recdata_end - recdata;
875 : Assert(tuplen <= MaxHeapTupleSize);
876 :
877 95925 : htup = &tbuf.hdr;
878 95925 : MemSet(htup, 0, SizeofHeapTupleHeader);
879 :
880 : /*
881 : * Reconstruct the new tuple using the prefix and/or suffix from the
882 : * old tuple, and the data stored in the WAL record.
883 : */
884 95925 : newp = (char *) htup + SizeofHeapTupleHeader;
885 95925 : if (prefixlen > 0)
886 : {
887 : int len;
888 :
889 : /* copy bitmap [+ padding] [+ oid] from WAL record */
890 17980 : len = xlhdr.t_hoff - SizeofHeapTupleHeader;
891 17980 : memcpy(newp, recdata, len);
892 17980 : recdata += len;
893 17980 : newp += len;
894 :
895 : /* copy prefix from old tuple */
896 17980 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
897 17980 : newp += prefixlen;
898 :
899 : /* copy new tuple data from WAL record */
900 17980 : len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
901 17980 : memcpy(newp, recdata, len);
902 17980 : recdata += len;
903 17980 : newp += len;
904 : }
905 : else
906 : {
907 : /*
908 : * copy bitmap [+ padding] [+ oid] + data from record, all in one
909 : * go
910 : */
911 77945 : memcpy(newp, recdata, tuplen);
912 77945 : recdata += tuplen;
913 77945 : newp += tuplen;
914 : }
915 : Assert(recdata == recdata_end);
916 :
917 : /* copy suffix from old tuple */
918 95925 : if (suffixlen > 0)
919 35610 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
920 :
921 95925 : newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
922 95925 : htup->t_infomask2 = xlhdr.t_infomask2;
923 95925 : htup->t_infomask = xlhdr.t_infomask;
924 95925 : htup->t_hoff = xlhdr.t_hoff;
925 :
926 95925 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
927 95925 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
928 95925 : HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
929 : /* Make sure there is no forward chain link in t_ctid */
930 95925 : htup->t_ctid = newtid;
931 :
932 95925 : offnum = PageAddItem(npage, htup, newlen, offnum, true, true);
933 95925 : if (offnum == InvalidOffsetNumber)
934 0 : elog(PANIC, "failed to add tuple");
935 :
936 95925 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
937 84 : PageClearAllVisible(npage);
938 :
939 : /* needed to update FSM below */
940 95925 : freespace = PageGetHeapFreeSpace(npage);
941 :
942 95925 : PageSetLSN(npage, lsn);
943 : /* See heap_insert() for why we set pd_prune_xid on insert */
944 95925 : PageSetPrunable(npage, XLogRecGetXid(record));
945 95925 : MarkBufferDirty(nbuffer);
946 : }
947 :
948 96421 : if (BufferIsValid(nbuffer) && nbuffer != obuffer)
949 54599 : UnlockReleaseBuffer(nbuffer);
950 96421 : if (BufferIsValid(obuffer))
951 96421 : UnlockReleaseBuffer(obuffer);
952 :
953 : /*
954 : * If the new page is running low on free space, update the FSM as well.
955 : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
956 : * better than that without knowing the fill-factor for the table.
957 : *
958 : * However, don't update the FSM on HOT updates, because after crash
959 : * recovery, either the old or the new tuple will certainly be dead and
960 : * prunable. After pruning, the page will have roughly as much free space
961 : * as it did before the update, assuming the new tuple is about the same
962 : * size as the old one.
963 : *
964 : * XXX: Don't do this if the page was restored from full page image. We
965 : * don't bother to update the FSM in that case, it doesn't need to be
966 : * totally accurate anyway.
967 : */
968 96421 : if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
969 11779 : XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
970 96421 : }
971 :
972 : /*
973 : * Replay XLOG_HEAP_CONFIRM records.
974 : */
975 : static void
976 93 : heap_xlog_confirm(XLogReaderState *record)
977 : {
978 93 : XLogRecPtr lsn = record->EndRecPtr;
979 93 : xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
980 : Buffer buffer;
981 : Page page;
982 : OffsetNumber offnum;
983 : ItemId lp;
984 : HeapTupleHeader htup;
985 :
986 93 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
987 : {
988 92 : page = BufferGetPage(buffer);
989 :
990 92 : offnum = xlrec->offnum;
991 92 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
992 0 : elog(PANIC, "offnum out of range");
993 92 : lp = PageGetItemId(page, offnum);
994 92 : if (!ItemIdIsNormal(lp))
995 0 : elog(PANIC, "invalid lp");
996 :
997 92 : htup = (HeapTupleHeader) PageGetItem(page, lp);
998 :
999 : /*
1000 : * Confirm tuple as actually inserted
1001 : */
1002 92 : ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
1003 :
1004 92 : PageSetLSN(page, lsn);
1005 92 : MarkBufferDirty(buffer);
1006 : }
1007 93 : if (BufferIsValid(buffer))
1008 93 : UnlockReleaseBuffer(buffer);
1009 93 : }
1010 :
1011 : /*
1012 : * Replay XLOG_HEAP_LOCK records.
1013 : */
1014 : static void
1015 55903 : heap_xlog_lock(XLogReaderState *record)
1016 : {
1017 55903 : XLogRecPtr lsn = record->EndRecPtr;
1018 55903 : xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
1019 : Buffer buffer;
1020 : Page page;
1021 : OffsetNumber offnum;
1022 : ItemId lp;
1023 : HeapTupleHeader htup;
1024 :
1025 : /*
1026 : * The visibility map may need to be fixed even if the heap page is
1027 : * already up-to-date.
1028 : */
1029 55903 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1030 : {
1031 : RelFileLocator rlocator;
1032 53 : Buffer vmbuffer = InvalidBuffer;
1033 : BlockNumber block;
1034 : Relation reln;
1035 :
1036 53 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1037 53 : reln = CreateFakeRelcacheEntry(rlocator);
1038 :
1039 53 : visibilitymap_pin(reln, block, &vmbuffer);
1040 53 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1041 :
1042 53 : ReleaseBuffer(vmbuffer);
1043 53 : FreeFakeRelcacheEntry(reln);
1044 : }
1045 :
1046 55903 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1047 : {
1048 55698 : page = BufferGetPage(buffer);
1049 :
1050 55698 : offnum = xlrec->offnum;
1051 55698 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1052 0 : elog(PANIC, "offnum out of range");
1053 55698 : lp = PageGetItemId(page, offnum);
1054 55698 : if (!ItemIdIsNormal(lp))
1055 0 : elog(PANIC, "invalid lp");
1056 :
1057 55698 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1058 :
1059 55698 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1060 55698 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1061 55698 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1062 : &htup->t_infomask2);
1063 :
1064 : /*
1065 : * Clear relevant update flags, but only if the modified infomask says
1066 : * there's no update.
1067 : */
1068 55698 : if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
1069 : {
1070 55698 : HeapTupleHeaderClearHotUpdated(htup);
1071 : /* Make sure there is no forward chain link in t_ctid */
1072 55698 : ItemPointerSet(&htup->t_ctid,
1073 : BufferGetBlockNumber(buffer),
1074 : offnum);
1075 : }
1076 55698 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1077 55698 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
1078 55698 : PageSetLSN(page, lsn);
1079 55698 : MarkBufferDirty(buffer);
1080 : }
1081 55903 : if (BufferIsValid(buffer))
1082 55903 : UnlockReleaseBuffer(buffer);
1083 55903 : }
1084 :
1085 : /*
1086 : * Replay XLOG_HEAP2_LOCK_UPDATED records.
1087 : */
1088 : static void
1089 0 : heap_xlog_lock_updated(XLogReaderState *record)
1090 : {
1091 0 : XLogRecPtr lsn = record->EndRecPtr;
1092 : xl_heap_lock_updated *xlrec;
1093 : Buffer buffer;
1094 : Page page;
1095 : OffsetNumber offnum;
1096 : ItemId lp;
1097 : HeapTupleHeader htup;
1098 :
1099 0 : xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
1100 :
1101 : /*
1102 : * The visibility map may need to be fixed even if the heap page is
1103 : * already up-to-date.
1104 : */
1105 0 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1106 : {
1107 : RelFileLocator rlocator;
1108 0 : Buffer vmbuffer = InvalidBuffer;
1109 : BlockNumber block;
1110 : Relation reln;
1111 :
1112 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1113 0 : reln = CreateFakeRelcacheEntry(rlocator);
1114 :
1115 0 : visibilitymap_pin(reln, block, &vmbuffer);
1116 0 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1117 :
1118 0 : ReleaseBuffer(vmbuffer);
1119 0 : FreeFakeRelcacheEntry(reln);
1120 : }
1121 :
1122 0 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1123 : {
1124 0 : page = BufferGetPage(buffer);
1125 :
1126 0 : offnum = xlrec->offnum;
1127 0 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1128 0 : elog(PANIC, "offnum out of range");
1129 0 : lp = PageGetItemId(page, offnum);
1130 0 : if (!ItemIdIsNormal(lp))
1131 0 : elog(PANIC, "invalid lp");
1132 :
1133 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1134 :
1135 0 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1136 0 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1137 0 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1138 : &htup->t_infomask2);
1139 0 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1140 :
1141 0 : PageSetLSN(page, lsn);
1142 0 : MarkBufferDirty(buffer);
1143 : }
1144 0 : if (BufferIsValid(buffer))
1145 0 : UnlockReleaseBuffer(buffer);
1146 0 : }
1147 :
1148 : /*
1149 : * Replay XLOG_HEAP_INPLACE records.
1150 : */
1151 : static void
1152 8317 : heap_xlog_inplace(XLogReaderState *record)
1153 : {
1154 8317 : XLogRecPtr lsn = record->EndRecPtr;
1155 8317 : xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
1156 : Buffer buffer;
1157 : Page page;
1158 : OffsetNumber offnum;
1159 : ItemId lp;
1160 : HeapTupleHeader htup;
1161 : uint32 oldlen;
1162 : Size newlen;
1163 :
1164 8317 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1165 : {
1166 8121 : char *newtup = XLogRecGetBlockData(record, 0, &newlen);
1167 :
1168 8121 : page = BufferGetPage(buffer);
1169 :
1170 8121 : offnum = xlrec->offnum;
1171 8121 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1172 0 : elog(PANIC, "offnum out of range");
1173 8121 : lp = PageGetItemId(page, offnum);
1174 8121 : if (!ItemIdIsNormal(lp))
1175 0 : elog(PANIC, "invalid lp");
1176 :
1177 8121 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1178 :
1179 8121 : oldlen = ItemIdGetLength(lp) - htup->t_hoff;
1180 8121 : if (oldlen != newlen)
1181 0 : elog(PANIC, "wrong tuple length");
1182 :
1183 8121 : memcpy((char *) htup + htup->t_hoff, newtup, newlen);
1184 :
1185 8121 : PageSetLSN(page, lsn);
1186 8121 : MarkBufferDirty(buffer);
1187 : }
1188 8317 : if (BufferIsValid(buffer))
1189 8317 : UnlockReleaseBuffer(buffer);
1190 :
1191 8317 : ProcessCommittedInvalidationMessages(xlrec->msgs,
1192 : xlrec->nmsgs,
1193 8317 : xlrec->relcacheInitFileInval,
1194 : xlrec->dbId,
1195 : xlrec->tsId);
1196 8317 : }
1197 :
1198 : void
1199 1803496 : heap_redo(XLogReaderState *record)
1200 : {
1201 1803496 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1202 :
1203 : /*
1204 : * These operations don't overwrite MVCC data so no conflict processing is
1205 : * required. The ones in heap2 rmgr do.
1206 : */
1207 :
1208 1803496 : switch (info & XLOG_HEAP_OPMASK)
1209 : {
1210 1324429 : case XLOG_HEAP_INSERT:
1211 1324429 : heap_xlog_insert(record);
1212 1324429 : break;
1213 318331 : case XLOG_HEAP_DELETE:
1214 318331 : heap_xlog_delete(record);
1215 318331 : break;
1216 57791 : case XLOG_HEAP_UPDATE:
1217 57791 : heap_xlog_update(record, false);
1218 57791 : break;
1219 2 : case XLOG_HEAP_TRUNCATE:
1220 :
1221 : /*
1222 : * TRUNCATE is a no-op because the actions are already logged as
1223 : * SMGR WAL records. TRUNCATE WAL record only exists for logical
1224 : * decoding.
1225 : */
1226 2 : break;
1227 38630 : case XLOG_HEAP_HOT_UPDATE:
1228 38630 : heap_xlog_update(record, true);
1229 38630 : break;
1230 93 : case XLOG_HEAP_CONFIRM:
1231 93 : heap_xlog_confirm(record);
1232 93 : break;
1233 55903 : case XLOG_HEAP_LOCK:
1234 55903 : heap_xlog_lock(record);
1235 55903 : break;
1236 8317 : case XLOG_HEAP_INPLACE:
1237 8317 : heap_xlog_inplace(record);
1238 8317 : break;
1239 0 : default:
1240 0 : elog(PANIC, "heap_redo: unknown op code %u", info);
1241 : }
1242 1803496 : }
1243 :
1244 : void
1245 88611 : heap2_redo(XLogReaderState *record)
1246 : {
1247 88611 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1248 :
1249 88611 : switch (info & XLOG_HEAP_OPMASK)
1250 : {
1251 20184 : case XLOG_HEAP2_PRUNE_ON_ACCESS:
1252 : case XLOG_HEAP2_PRUNE_VACUUM_SCAN:
1253 : case XLOG_HEAP2_PRUNE_VACUUM_CLEANUP:
1254 20184 : heap_xlog_prune_freeze(record);
1255 20184 : break;
1256 67379 : case XLOG_HEAP2_MULTI_INSERT:
1257 67379 : heap_xlog_multi_insert(record);
1258 67379 : break;
1259 0 : case XLOG_HEAP2_LOCK_UPDATED:
1260 0 : heap_xlog_lock_updated(record);
1261 0 : break;
1262 1048 : case XLOG_HEAP2_NEW_CID:
1263 :
1264 : /*
1265 : * Nothing to do on a real replay, only used during logical
1266 : * decoding.
1267 : */
1268 1048 : break;
1269 0 : case XLOG_HEAP2_REWRITE:
1270 0 : heap_xlog_logical_rewrite(record);
1271 0 : break;
1272 0 : default:
1273 0 : elog(PANIC, "heap2_redo: unknown op code %u", info);
1274 : }
1275 88611 : }
1276 :
1277 : /*
1278 : * Mask a heap page before performing consistency checks on it.
1279 : */
1280 : void
1281 3012686 : heap_mask(char *pagedata, BlockNumber blkno)
1282 : {
1283 3012686 : Page page = (Page) pagedata;
1284 : OffsetNumber off;
1285 :
1286 3012686 : mask_page_lsn_and_checksum(page);
1287 :
1288 3012686 : mask_page_hint_bits(page);
1289 3012686 : mask_unused_space(page);
1290 :
1291 248976954 : for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
1292 : {
1293 245964268 : ItemId iid = PageGetItemId(page, off);
1294 : char *page_item;
1295 :
1296 245964268 : page_item = (char *) (page + ItemIdGetOffset(iid));
1297 :
1298 245964268 : if (ItemIdIsNormal(iid))
1299 : {
1300 228298862 : HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
1301 :
1302 : /*
1303 : * If xmin of a tuple is not yet frozen, we should ignore
1304 : * differences in hint bits, since they can be set without
1305 : * emitting WAL.
1306 : */
1307 228298862 : if (!HeapTupleHeaderXminFrozen(page_htup))
1308 220584748 : page_htup->t_infomask &= ~HEAP_XACT_MASK;
1309 : else
1310 : {
1311 : /* Still we need to mask xmax hint bits. */
1312 7714114 : page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
1313 7714114 : page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
1314 : }
1315 :
1316 : /*
1317 : * During replay, we set Command Id to FirstCommandId. Hence, mask
1318 : * it. See heap_xlog_insert() for details.
1319 : */
1320 228298862 : page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
1321 :
1322 : /*
1323 : * For a speculative tuple, heap_insert() does not set ctid in the
1324 : * caller-passed heap tuple itself, leaving the ctid field to
1325 : * contain a speculative token value - a per-backend monotonically
1326 : * increasing identifier. Besides, it does not WAL-log ctid under
1327 : * any circumstances.
1328 : *
1329 : * During redo, heap_xlog_insert() sets t_ctid to current block
1330 : * number and self offset number. It doesn't care about any
1331 : * speculative insertions on the primary. Hence, we set t_ctid to
1332 : * current block number and self offset number to ignore any
1333 : * inconsistency.
1334 : */
1335 228298862 : if (HeapTupleHeaderIsSpeculative(page_htup))
1336 92 : ItemPointerSet(&page_htup->t_ctid, blkno, off);
1337 :
1338 : /*
1339 : * NB: Not ignoring ctid changes due to the tuple having moved
1340 : * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
1341 : * important information that needs to be in-sync between primary
1342 : * and standby, and thus is WAL logged.
1343 : */
1344 : }
1345 :
1346 : /*
1347 : * Ignore any padding bytes after the tuple, when the length of the
1348 : * item is not MAXALIGNed.
1349 : */
1350 245964268 : if (ItemIdHasStorage(iid))
1351 : {
1352 228298862 : int len = ItemIdGetLength(iid);
1353 228298862 : int padlen = MAXALIGN(len) - len;
1354 :
1355 228298862 : if (padlen > 0)
1356 123118842 : memset(page_item + len, MASK_MARKER, padlen);
1357 : }
1358 : }
1359 3012686 : }
|