Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * hash_xlog.c
4 : * WAL replay logic for hash index.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/hash/hash_xlog.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/bufmask.h"
18 : #include "access/hash.h"
19 : #include "access/hash_xlog.h"
20 : #include "access/xlogutils.h"
21 : #include "storage/standby.h"
22 :
23 : /*
24 : * replay a hash index meta page
25 : */
26 : static void
27 54 : hash_xlog_init_meta_page(XLogReaderState *record)
28 : {
29 54 : XLogRecPtr lsn = record->EndRecPtr;
30 : Page page;
31 : Buffer metabuf;
32 : ForkNumber forknum;
33 :
34 54 : xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) XLogRecGetData(record);
35 :
36 : /* create the index' metapage */
37 54 : metabuf = XLogInitBufferForRedo(record, 0);
38 : Assert(BufferIsValid(metabuf));
39 54 : _hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid,
40 54 : xlrec->ffactor, true);
41 54 : page = BufferGetPage(metabuf);
42 54 : PageSetLSN(page, lsn);
43 54 : MarkBufferDirty(metabuf);
44 :
45 : /*
46 : * Force the on-disk state of init forks to always be in sync with the
47 : * state in shared buffers. See XLogReadBufferForRedoExtended. We need
48 : * special handling for init forks as create index operations don't log a
49 : * full page image of the metapage.
50 : */
51 54 : XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
52 54 : if (forknum == INIT_FORKNUM)
53 2 : FlushOneBuffer(metabuf);
54 :
55 : /* all done */
56 54 : UnlockReleaseBuffer(metabuf);
57 54 : }
58 :
59 : /*
60 : * replay a hash index bitmap page
61 : */
62 : static void
63 54 : hash_xlog_init_bitmap_page(XLogReaderState *record)
64 : {
65 54 : XLogRecPtr lsn = record->EndRecPtr;
66 : Buffer bitmapbuf;
67 : Buffer metabuf;
68 : Page page;
69 : HashMetaPage metap;
70 : uint32 num_buckets;
71 : ForkNumber forknum;
72 :
73 54 : xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) XLogRecGetData(record);
74 :
75 : /*
76 : * Initialize bitmap page
77 : */
78 54 : bitmapbuf = XLogInitBufferForRedo(record, 0);
79 54 : _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
80 54 : PageSetLSN(BufferGetPage(bitmapbuf), lsn);
81 54 : MarkBufferDirty(bitmapbuf);
82 :
83 : /*
84 : * Force the on-disk state of init forks to always be in sync with the
85 : * state in shared buffers. See XLogReadBufferForRedoExtended. We need
86 : * special handling for init forks as create index operations don't log a
87 : * full page image of the metapage.
88 : */
89 54 : XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
90 54 : if (forknum == INIT_FORKNUM)
91 2 : FlushOneBuffer(bitmapbuf);
92 54 : UnlockReleaseBuffer(bitmapbuf);
93 :
94 : /* add the new bitmap page to the metapage's list of bitmaps */
95 54 : if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
96 : {
97 : /*
98 : * Note: in normal operation, we'd update the metapage while still
99 : * holding lock on the bitmap page. But during replay it's not
100 : * necessary to hold that lock, since nobody can see it yet; the
101 : * creating transaction hasn't yet committed.
102 : */
103 54 : page = BufferGetPage(metabuf);
104 54 : metap = HashPageGetMeta(page);
105 :
106 54 : num_buckets = metap->hashm_maxbucket + 1;
107 54 : metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
108 54 : metap->hashm_nmaps++;
109 :
110 54 : PageSetLSN(page, lsn);
111 54 : MarkBufferDirty(metabuf);
112 :
113 54 : XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
114 54 : if (forknum == INIT_FORKNUM)
115 2 : FlushOneBuffer(metabuf);
116 : }
117 54 : if (BufferIsValid(metabuf))
118 54 : UnlockReleaseBuffer(metabuf);
119 54 : }
120 :
121 : /*
122 : * replay a hash index insert without split
123 : */
124 : static void
125 228210 : hash_xlog_insert(XLogReaderState *record)
126 : {
127 : HashMetaPage metap;
128 228210 : XLogRecPtr lsn = record->EndRecPtr;
129 228210 : xl_hash_insert *xlrec = (xl_hash_insert *) XLogRecGetData(record);
130 : Buffer buffer;
131 : Page page;
132 :
133 228210 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
134 : {
135 : Size datalen;
136 225126 : char *datapos = XLogRecGetBlockData(record, 0, &datalen);
137 :
138 225126 : page = BufferGetPage(buffer);
139 :
140 225126 : if (PageAddItem(page, datapos, datalen, xlrec->offnum, false, false) == InvalidOffsetNumber)
141 0 : elog(PANIC, "hash_xlog_insert: failed to add item");
142 :
143 225126 : PageSetLSN(page, lsn);
144 225126 : MarkBufferDirty(buffer);
145 : }
146 228210 : if (BufferIsValid(buffer))
147 228210 : UnlockReleaseBuffer(buffer);
148 :
149 228210 : if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
150 : {
151 : /*
152 : * Note: in normal operation, we'd update the metapage while still
153 : * holding lock on the page we inserted into. But during replay it's
154 : * not necessary to hold that lock, since no other index updates can
155 : * be happening concurrently.
156 : */
157 228148 : page = BufferGetPage(buffer);
158 228148 : metap = HashPageGetMeta(page);
159 228148 : metap->hashm_ntuples += 1;
160 :
161 228148 : PageSetLSN(page, lsn);
162 228148 : MarkBufferDirty(buffer);
163 : }
164 228210 : if (BufferIsValid(buffer))
165 228210 : UnlockReleaseBuffer(buffer);
166 228210 : }
167 :
168 : /*
169 : * replay addition of overflow page for hash index
170 : */
171 : static void
172 108 : hash_xlog_add_ovfl_page(XLogReaderState *record)
173 : {
174 108 : XLogRecPtr lsn = record->EndRecPtr;
175 108 : xl_hash_add_ovfl_page *xlrec = (xl_hash_add_ovfl_page *) XLogRecGetData(record);
176 : Buffer leftbuf;
177 : Buffer ovflbuf;
178 : Buffer metabuf;
179 : BlockNumber leftblk;
180 : BlockNumber rightblk;
181 108 : BlockNumber newmapblk = InvalidBlockNumber;
182 : Page ovflpage;
183 : HashPageOpaque ovflopaque;
184 : uint32 *num_bucket;
185 : char *data;
186 : Size datalen PG_USED_FOR_ASSERTS_ONLY;
187 108 : bool new_bmpage = false;
188 :
189 108 : XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk);
190 108 : XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk);
191 :
192 108 : ovflbuf = XLogInitBufferForRedo(record, 0);
193 : Assert(BufferIsValid(ovflbuf));
194 :
195 108 : data = XLogRecGetBlockData(record, 0, &datalen);
196 108 : num_bucket = (uint32 *) data;
197 : Assert(datalen == sizeof(uint32));
198 108 : _hash_initbuf(ovflbuf, InvalidBlockNumber, *num_bucket, LH_OVERFLOW_PAGE,
199 : true);
200 : /* update backlink */
201 108 : ovflpage = BufferGetPage(ovflbuf);
202 108 : ovflopaque = HashPageGetOpaque(ovflpage);
203 108 : ovflopaque->hasho_prevblkno = leftblk;
204 :
205 108 : PageSetLSN(ovflpage, lsn);
206 108 : MarkBufferDirty(ovflbuf);
207 :
208 108 : if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
209 : {
210 : Page leftpage;
211 : HashPageOpaque leftopaque;
212 :
213 106 : leftpage = BufferGetPage(leftbuf);
214 106 : leftopaque = HashPageGetOpaque(leftpage);
215 106 : leftopaque->hasho_nextblkno = rightblk;
216 :
217 106 : PageSetLSN(leftpage, lsn);
218 106 : MarkBufferDirty(leftbuf);
219 : }
220 :
221 108 : if (BufferIsValid(leftbuf))
222 108 : UnlockReleaseBuffer(leftbuf);
223 108 : UnlockReleaseBuffer(ovflbuf);
224 :
225 : /*
226 : * Note: in normal operation, we'd update the bitmap and meta page while
227 : * still holding lock on the overflow pages. But during replay it's not
228 : * necessary to hold those locks, since no other index updates can be
229 : * happening concurrently.
230 : */
231 108 : if (XLogRecHasBlockRef(record, 2))
232 : {
233 : Buffer mapbuffer;
234 :
235 24 : if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO)
236 : {
237 16 : Page mappage = BufferGetPage(mapbuffer);
238 16 : uint32 *freep = NULL;
239 : uint32 *bitmap_page_bit;
240 :
241 16 : freep = HashPageGetBitmap(mappage);
242 :
243 16 : data = XLogRecGetBlockData(record, 2, &datalen);
244 16 : bitmap_page_bit = (uint32 *) data;
245 :
246 16 : SETBIT(freep, *bitmap_page_bit);
247 :
248 16 : PageSetLSN(mappage, lsn);
249 16 : MarkBufferDirty(mapbuffer);
250 : }
251 24 : if (BufferIsValid(mapbuffer))
252 24 : UnlockReleaseBuffer(mapbuffer);
253 : }
254 :
255 108 : if (XLogRecHasBlockRef(record, 3))
256 : {
257 : Buffer newmapbuf;
258 :
259 0 : newmapbuf = XLogInitBufferForRedo(record, 3);
260 :
261 0 : _hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true);
262 :
263 0 : new_bmpage = true;
264 0 : newmapblk = BufferGetBlockNumber(newmapbuf);
265 :
266 0 : MarkBufferDirty(newmapbuf);
267 0 : PageSetLSN(BufferGetPage(newmapbuf), lsn);
268 :
269 0 : UnlockReleaseBuffer(newmapbuf);
270 : }
271 :
272 108 : if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO)
273 : {
274 : HashMetaPage metap;
275 : Page page;
276 : uint32 *firstfree_ovflpage;
277 :
278 108 : data = XLogRecGetBlockData(record, 4, &datalen);
279 108 : firstfree_ovflpage = (uint32 *) data;
280 :
281 108 : page = BufferGetPage(metabuf);
282 108 : metap = HashPageGetMeta(page);
283 108 : metap->hashm_firstfree = *firstfree_ovflpage;
284 :
285 108 : if (!xlrec->bmpage_found)
286 : {
287 84 : metap->hashm_spares[metap->hashm_ovflpoint]++;
288 :
289 84 : if (new_bmpage)
290 : {
291 : Assert(BlockNumberIsValid(newmapblk));
292 :
293 0 : metap->hashm_mapp[metap->hashm_nmaps] = newmapblk;
294 0 : metap->hashm_nmaps++;
295 0 : metap->hashm_spares[metap->hashm_ovflpoint]++;
296 : }
297 : }
298 :
299 108 : PageSetLSN(page, lsn);
300 108 : MarkBufferDirty(metabuf);
301 : }
302 108 : if (BufferIsValid(metabuf))
303 108 : UnlockReleaseBuffer(metabuf);
304 108 : }
305 :
306 : /*
307 : * replay allocation of page for split operation
308 : */
309 : static void
310 196 : hash_xlog_split_allocate_page(XLogReaderState *record)
311 : {
312 196 : XLogRecPtr lsn = record->EndRecPtr;
313 196 : xl_hash_split_allocate_page *xlrec = (xl_hash_split_allocate_page *) XLogRecGetData(record);
314 : Buffer oldbuf;
315 : Buffer newbuf;
316 : Buffer metabuf;
317 : XLogRedoAction action;
318 :
319 : /*
320 : * To be consistent with normal operation, here we take cleanup locks on
321 : * both the old and new buckets even though there can't be any concurrent
322 : * inserts.
323 : */
324 :
325 : /* replay the record for old bucket */
326 196 : action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf);
327 :
328 : /*
329 : * Note that we still update the page even if it was restored from a full
330 : * page image, because the special space is not included in the image.
331 : */
332 196 : if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
333 : {
334 : Page oldpage;
335 : HashPageOpaque oldopaque;
336 :
337 196 : oldpage = BufferGetPage(oldbuf);
338 196 : oldopaque = HashPageGetOpaque(oldpage);
339 :
340 196 : oldopaque->hasho_flag = xlrec->old_bucket_flag;
341 196 : oldopaque->hasho_prevblkno = xlrec->new_bucket;
342 :
343 196 : PageSetLSN(oldpage, lsn);
344 196 : MarkBufferDirty(oldbuf);
345 : }
346 :
347 : /* replay the record for new bucket */
348 196 : XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_AND_CLEANUP_LOCK, true,
349 : &newbuf);
350 196 : _hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket,
351 196 : xlrec->new_bucket_flag, true);
352 196 : MarkBufferDirty(newbuf);
353 196 : PageSetLSN(BufferGetPage(newbuf), lsn);
354 :
355 : /*
356 : * We can release the lock on old bucket early as well but doing here to
357 : * consistent with normal operation.
358 : */
359 196 : if (BufferIsValid(oldbuf))
360 196 : UnlockReleaseBuffer(oldbuf);
361 196 : if (BufferIsValid(newbuf))
362 196 : UnlockReleaseBuffer(newbuf);
363 :
364 : /*
365 : * Note: in normal operation, we'd update the meta page while still
366 : * holding lock on the old and new bucket pages. But during replay it's
367 : * not necessary to hold those locks, since no other bucket splits can be
368 : * happening concurrently.
369 : */
370 :
371 : /* replay the record for metapage changes */
372 196 : if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO)
373 : {
374 : Page page;
375 : HashMetaPage metap;
376 : Size datalen;
377 : char *data;
378 : uint32 *uidata;
379 : int uidatacount;
380 :
381 196 : page = BufferGetPage(metabuf);
382 196 : metap = HashPageGetMeta(page);
383 196 : metap->hashm_maxbucket = xlrec->new_bucket;
384 :
385 196 : data = XLogRecGetBlockData(record, 2, &datalen);
386 :
387 : /*
388 : * This cast is ok because XLogRecGetBlockData() returns a MAXALIGNed
389 : * buffer.
390 : */
391 196 : uidata = (uint32 *) data;
392 196 : uidatacount = 0;
393 :
394 196 : if (xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS)
395 : {
396 6 : uint32 lowmask = uidata[uidatacount++];
397 6 : uint32 highmask = uidata[uidatacount++];
398 :
399 : /* update metapage */
400 6 : metap->hashm_lowmask = lowmask;
401 6 : metap->hashm_highmask = highmask;
402 : }
403 :
404 196 : if (xlrec->flags & XLH_SPLIT_META_UPDATE_SPLITPOINT)
405 : {
406 16 : uint32 ovflpoint = uidata[uidatacount++];
407 16 : uint32 ovflpages = uidata[uidatacount++];
408 :
409 : /* update metapage */
410 16 : metap->hashm_ovflpoint = ovflpoint;
411 16 : metap->hashm_spares[ovflpoint] = ovflpages;
412 : }
413 :
414 196 : MarkBufferDirty(metabuf);
415 196 : PageSetLSN(BufferGetPage(metabuf), lsn);
416 : }
417 :
418 196 : if (BufferIsValid(metabuf))
419 196 : UnlockReleaseBuffer(metabuf);
420 196 : }
421 :
422 : /*
423 : * replay of split operation
424 : */
425 : static void
426 222 : hash_xlog_split_page(XLogReaderState *record)
427 : {
428 : Buffer buf;
429 :
430 222 : if (XLogReadBufferForRedo(record, 0, &buf) != BLK_RESTORED)
431 0 : elog(ERROR, "Hash split record did not contain a full-page image");
432 :
433 222 : UnlockReleaseBuffer(buf);
434 222 : }
435 :
436 : /*
437 : * replay completion of split operation
438 : */
439 : static void
440 196 : hash_xlog_split_complete(XLogReaderState *record)
441 : {
442 196 : XLogRecPtr lsn = record->EndRecPtr;
443 196 : xl_hash_split_complete *xlrec = (xl_hash_split_complete *) XLogRecGetData(record);
444 : Buffer oldbuf;
445 : Buffer newbuf;
446 : XLogRedoAction action;
447 :
448 : /* replay the record for old bucket */
449 196 : action = XLogReadBufferForRedo(record, 0, &oldbuf);
450 :
451 : /*
452 : * Note that we still update the page even if it was restored from a full
453 : * page image, because the bucket flag is not included in the image.
454 : */
455 196 : if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
456 : {
457 : Page oldpage;
458 : HashPageOpaque oldopaque;
459 :
460 196 : oldpage = BufferGetPage(oldbuf);
461 196 : oldopaque = HashPageGetOpaque(oldpage);
462 :
463 196 : oldopaque->hasho_flag = xlrec->old_bucket_flag;
464 :
465 196 : PageSetLSN(oldpage, lsn);
466 196 : MarkBufferDirty(oldbuf);
467 : }
468 196 : if (BufferIsValid(oldbuf))
469 196 : UnlockReleaseBuffer(oldbuf);
470 :
471 : /* replay the record for new bucket */
472 196 : action = XLogReadBufferForRedo(record, 1, &newbuf);
473 :
474 : /*
475 : * Note that we still update the page even if it was restored from a full
476 : * page image, because the bucket flag is not included in the image.
477 : */
478 196 : if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
479 : {
480 : Page newpage;
481 : HashPageOpaque nopaque;
482 :
483 196 : newpage = BufferGetPage(newbuf);
484 196 : nopaque = HashPageGetOpaque(newpage);
485 :
486 196 : nopaque->hasho_flag = xlrec->new_bucket_flag;
487 :
488 196 : PageSetLSN(newpage, lsn);
489 196 : MarkBufferDirty(newbuf);
490 : }
491 196 : if (BufferIsValid(newbuf))
492 196 : UnlockReleaseBuffer(newbuf);
493 196 : }
494 :
495 : /*
496 : * replay move of page contents for squeeze operation of hash index
497 : */
498 : static void
499 2 : hash_xlog_move_page_contents(XLogReaderState *record)
500 : {
501 2 : XLogRecPtr lsn = record->EndRecPtr;
502 2 : xl_hash_move_page_contents *xldata = (xl_hash_move_page_contents *) XLogRecGetData(record);
503 2 : Buffer bucketbuf = InvalidBuffer;
504 2 : Buffer writebuf = InvalidBuffer;
505 2 : Buffer deletebuf = InvalidBuffer;
506 : XLogRedoAction action;
507 :
508 : /*
509 : * Ensure we have a cleanup lock on primary bucket page before we start
510 : * with the actual replay operation. This is to ensure that neither a
511 : * scan can start nor a scan can be already-in-progress during the replay
512 : * of this operation. If we allow scans during this operation, then they
513 : * can miss some records or show the same record multiple times.
514 : */
515 2 : if (xldata->is_prim_bucket_same_wrt)
516 2 : action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
517 : else
518 : {
519 : /*
520 : * we don't care for return value as the purpose of reading bucketbuf
521 : * is to ensure a cleanup lock on primary bucket page.
522 : */
523 0 : (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
524 :
525 0 : action = XLogReadBufferForRedo(record, 1, &writebuf);
526 : }
527 :
528 : /* replay the record for adding entries in overflow buffer */
529 2 : if (action == BLK_NEEDS_REDO)
530 : {
531 : Page writepage;
532 : char *begin;
533 : char *data;
534 : Size datalen;
535 2 : uint16 ninserted = 0;
536 :
537 2 : data = begin = XLogRecGetBlockData(record, 1, &datalen);
538 :
539 2 : writepage = BufferGetPage(writebuf);
540 :
541 2 : if (xldata->ntups > 0)
542 : {
543 2 : OffsetNumber *towrite = (OffsetNumber *) data;
544 :
545 2 : data += sizeof(OffsetNumber) * xldata->ntups;
546 :
547 686 : while (data - begin < datalen)
548 : {
549 684 : IndexTuple itup = (IndexTuple) data;
550 : Size itemsz;
551 : OffsetNumber l;
552 :
553 684 : itemsz = IndexTupleSize(itup);
554 684 : itemsz = MAXALIGN(itemsz);
555 :
556 684 : data += itemsz;
557 :
558 684 : l = PageAddItem(writepage, itup, itemsz, towrite[ninserted], false, false);
559 684 : if (l == InvalidOffsetNumber)
560 0 : elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %zu bytes", itemsz);
561 :
562 684 : ninserted++;
563 : }
564 : }
565 :
566 : /*
567 : * number of tuples inserted must be same as requested in REDO record.
568 : */
569 : Assert(ninserted == xldata->ntups);
570 :
571 2 : PageSetLSN(writepage, lsn);
572 2 : MarkBufferDirty(writebuf);
573 : }
574 :
575 : /* replay the record for deleting entries from overflow buffer */
576 2 : if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO)
577 : {
578 : Page page;
579 : char *ptr;
580 : Size len;
581 :
582 2 : ptr = XLogRecGetBlockData(record, 2, &len);
583 :
584 2 : page = BufferGetPage(deletebuf);
585 :
586 2 : if (len > 0)
587 : {
588 : OffsetNumber *unused;
589 : OffsetNumber *unend;
590 :
591 2 : unused = (OffsetNumber *) ptr;
592 2 : unend = (OffsetNumber *) (ptr + len);
593 :
594 2 : if ((unend - unused) > 0)
595 2 : PageIndexMultiDelete(page, unused, unend - unused);
596 : }
597 :
598 2 : PageSetLSN(page, lsn);
599 2 : MarkBufferDirty(deletebuf);
600 : }
601 :
602 : /*
603 : * Replay is complete, now we can release the buffers. We release locks at
604 : * end of replay operation to ensure that we hold lock on primary bucket
605 : * page till end of operation. We can optimize by releasing the lock on
606 : * write buffer as soon as the operation for same is complete, if it is
607 : * not same as primary bucket page, but that doesn't seem to be worth
608 : * complicating the code.
609 : */
610 2 : if (BufferIsValid(deletebuf))
611 2 : UnlockReleaseBuffer(deletebuf);
612 :
613 2 : if (BufferIsValid(writebuf))
614 2 : UnlockReleaseBuffer(writebuf);
615 :
616 2 : if (BufferIsValid(bucketbuf))
617 0 : UnlockReleaseBuffer(bucketbuf);
618 2 : }
619 :
620 : /*
621 : * replay squeeze page operation of hash index
622 : */
623 : static void
624 88 : hash_xlog_squeeze_page(XLogReaderState *record)
625 : {
626 88 : XLogRecPtr lsn = record->EndRecPtr;
627 88 : xl_hash_squeeze_page *xldata = (xl_hash_squeeze_page *) XLogRecGetData(record);
628 88 : Buffer bucketbuf = InvalidBuffer;
629 88 : Buffer writebuf = InvalidBuffer;
630 : Buffer ovflbuf;
631 88 : Buffer prevbuf = InvalidBuffer;
632 : Buffer mapbuf;
633 : XLogRedoAction action;
634 :
635 : /*
636 : * Ensure we have a cleanup lock on primary bucket page before we start
637 : * with the actual replay operation. This is to ensure that neither a
638 : * scan can start nor a scan can be already-in-progress during the replay
639 : * of this operation. If we allow scans during this operation, then they
640 : * can miss some records or show the same record multiple times.
641 : */
642 88 : if (xldata->is_prim_bucket_same_wrt)
643 72 : action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
644 : else
645 : {
646 : /*
647 : * we don't care for return value as the purpose of reading bucketbuf
648 : * is to ensure a cleanup lock on primary bucket page.
649 : */
650 16 : (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
651 :
652 16 : if (xldata->ntups > 0 || xldata->is_prev_bucket_same_wrt)
653 14 : action = XLogReadBufferForRedo(record, 1, &writebuf);
654 : else
655 2 : action = BLK_NOTFOUND;
656 : }
657 :
658 : /* replay the record for adding entries in overflow buffer */
659 88 : if (action == BLK_NEEDS_REDO)
660 : {
661 : Page writepage;
662 : char *begin;
663 : char *data;
664 : Size datalen;
665 82 : uint16 ninserted = 0;
666 82 : bool mod_wbuf = false;
667 :
668 82 : data = begin = XLogRecGetBlockData(record, 1, &datalen);
669 :
670 82 : writepage = BufferGetPage(writebuf);
671 :
672 82 : if (xldata->ntups > 0)
673 : {
674 30 : OffsetNumber *towrite = (OffsetNumber *) data;
675 :
676 30 : data += sizeof(OffsetNumber) * xldata->ntups;
677 :
678 1426 : while (data - begin < datalen)
679 : {
680 1396 : IndexTuple itup = (IndexTuple) data;
681 : Size itemsz;
682 : OffsetNumber l;
683 :
684 1396 : itemsz = IndexTupleSize(itup);
685 1396 : itemsz = MAXALIGN(itemsz);
686 :
687 1396 : data += itemsz;
688 :
689 1396 : l = PageAddItem(writepage, itup, itemsz, towrite[ninserted], false, false);
690 1396 : if (l == InvalidOffsetNumber)
691 0 : elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %zu bytes", itemsz);
692 :
693 1396 : ninserted++;
694 : }
695 :
696 30 : mod_wbuf = true;
697 : }
698 : else
699 : {
700 : /*
701 : * Ensure that the required flags are set when there are no
702 : * tuples. See _hash_freeovflpage().
703 : */
704 : Assert(xldata->is_prim_bucket_same_wrt ||
705 : xldata->is_prev_bucket_same_wrt);
706 : }
707 :
708 : /*
709 : * number of tuples inserted must be same as requested in REDO record.
710 : */
711 : Assert(ninserted == xldata->ntups);
712 :
713 : /*
714 : * if the page on which are adding tuples is a page previous to freed
715 : * overflow page, then update its nextblkno.
716 : */
717 82 : if (xldata->is_prev_bucket_same_wrt)
718 : {
719 22 : HashPageOpaque writeopaque = HashPageGetOpaque(writepage);
720 :
721 22 : writeopaque->hasho_nextblkno = xldata->nextblkno;
722 22 : mod_wbuf = true;
723 : }
724 :
725 : /* Set LSN and mark writebuf dirty iff it is modified */
726 82 : if (mod_wbuf)
727 : {
728 38 : PageSetLSN(writepage, lsn);
729 38 : MarkBufferDirty(writebuf);
730 : }
731 : }
732 :
733 : /* replay the record for initializing overflow buffer */
734 88 : if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO)
735 : {
736 : Page ovflpage;
737 : HashPageOpaque ovflopaque;
738 :
739 0 : ovflpage = BufferGetPage(ovflbuf);
740 :
741 0 : _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
742 :
743 0 : ovflopaque = HashPageGetOpaque(ovflpage);
744 :
745 0 : ovflopaque->hasho_prevblkno = InvalidBlockNumber;
746 0 : ovflopaque->hasho_nextblkno = InvalidBlockNumber;
747 0 : ovflopaque->hasho_bucket = InvalidBucket;
748 0 : ovflopaque->hasho_flag = LH_UNUSED_PAGE;
749 0 : ovflopaque->hasho_page_id = HASHO_PAGE_ID;
750 :
751 0 : PageSetLSN(ovflpage, lsn);
752 0 : MarkBufferDirty(ovflbuf);
753 : }
754 88 : if (BufferIsValid(ovflbuf))
755 88 : UnlockReleaseBuffer(ovflbuf);
756 :
757 : /* replay the record for page previous to the freed overflow page */
758 154 : if (!xldata->is_prev_bucket_same_wrt &&
759 66 : XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO)
760 : {
761 64 : Page prevpage = BufferGetPage(prevbuf);
762 64 : HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
763 :
764 64 : prevopaque->hasho_nextblkno = xldata->nextblkno;
765 :
766 64 : PageSetLSN(prevpage, lsn);
767 64 : MarkBufferDirty(prevbuf);
768 : }
769 88 : if (BufferIsValid(prevbuf))
770 66 : UnlockReleaseBuffer(prevbuf);
771 :
772 : /* replay the record for page next to the freed overflow page */
773 88 : if (XLogRecHasBlockRef(record, 4))
774 : {
775 : Buffer nextbuf;
776 :
777 0 : if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO)
778 : {
779 0 : Page nextpage = BufferGetPage(nextbuf);
780 0 : HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
781 :
782 0 : nextopaque->hasho_prevblkno = xldata->prevblkno;
783 :
784 0 : PageSetLSN(nextpage, lsn);
785 0 : MarkBufferDirty(nextbuf);
786 : }
787 0 : if (BufferIsValid(nextbuf))
788 0 : UnlockReleaseBuffer(nextbuf);
789 : }
790 :
791 88 : if (BufferIsValid(writebuf))
792 86 : UnlockReleaseBuffer(writebuf);
793 :
794 88 : if (BufferIsValid(bucketbuf))
795 16 : UnlockReleaseBuffer(bucketbuf);
796 :
797 : /*
798 : * Note: in normal operation, we'd update the bitmap and meta page while
799 : * still holding lock on the primary bucket page and overflow pages. But
800 : * during replay it's not necessary to hold those locks, since no other
801 : * index updates can be happening concurrently.
802 : */
803 : /* replay the record for bitmap page */
804 88 : if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO)
805 : {
806 74 : Page mappage = BufferGetPage(mapbuf);
807 74 : uint32 *freep = NULL;
808 : char *data;
809 : uint32 *bitmap_page_bit;
810 : Size datalen;
811 :
812 74 : freep = HashPageGetBitmap(mappage);
813 :
814 74 : data = XLogRecGetBlockData(record, 5, &datalen);
815 74 : bitmap_page_bit = (uint32 *) data;
816 :
817 74 : CLRBIT(freep, *bitmap_page_bit);
818 :
819 74 : PageSetLSN(mappage, lsn);
820 74 : MarkBufferDirty(mapbuf);
821 : }
822 88 : if (BufferIsValid(mapbuf))
823 88 : UnlockReleaseBuffer(mapbuf);
824 :
825 : /* replay the record for meta page */
826 88 : if (XLogRecHasBlockRef(record, 6))
827 : {
828 : Buffer metabuf;
829 :
830 60 : if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO)
831 : {
832 : HashMetaPage metap;
833 : Page page;
834 : char *data;
835 : uint32 *firstfree_ovflpage;
836 : Size datalen;
837 :
838 54 : data = XLogRecGetBlockData(record, 6, &datalen);
839 54 : firstfree_ovflpage = (uint32 *) data;
840 :
841 54 : page = BufferGetPage(metabuf);
842 54 : metap = HashPageGetMeta(page);
843 54 : metap->hashm_firstfree = *firstfree_ovflpage;
844 :
845 54 : PageSetLSN(page, lsn);
846 54 : MarkBufferDirty(metabuf);
847 : }
848 60 : if (BufferIsValid(metabuf))
849 60 : UnlockReleaseBuffer(metabuf);
850 : }
851 88 : }
852 :
853 : /*
854 : * replay delete operation of hash index
855 : */
856 : static void
857 278 : hash_xlog_delete(XLogReaderState *record)
858 : {
859 278 : XLogRecPtr lsn = record->EndRecPtr;
860 278 : xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record);
861 278 : Buffer bucketbuf = InvalidBuffer;
862 : Buffer deletebuf;
863 : Page page;
864 : XLogRedoAction action;
865 :
866 : /*
867 : * Ensure we have a cleanup lock on primary bucket page before we start
868 : * with the actual replay operation. This is to ensure that neither a
869 : * scan can start nor a scan can be already-in-progress during the replay
870 : * of this operation. If we allow scans during this operation, then they
871 : * can miss some records or show the same record multiple times.
872 : */
873 278 : if (xldata->is_primary_bucket_page)
874 188 : action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf);
875 : else
876 : {
877 : /*
878 : * we don't care for return value as the purpose of reading bucketbuf
879 : * is to ensure a cleanup lock on primary bucket page.
880 : */
881 90 : (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
882 :
883 90 : action = XLogReadBufferForRedo(record, 1, &deletebuf);
884 : }
885 :
886 : /* replay the record for deleting entries in bucket page */
887 278 : if (action == BLK_NEEDS_REDO)
888 : {
889 : char *ptr;
890 : Size len;
891 :
892 196 : ptr = XLogRecGetBlockData(record, 1, &len);
893 :
894 196 : page = BufferGetPage(deletebuf);
895 :
896 196 : if (len > 0)
897 : {
898 : OffsetNumber *unused;
899 : OffsetNumber *unend;
900 :
901 196 : unused = (OffsetNumber *) ptr;
902 196 : unend = (OffsetNumber *) (ptr + len);
903 :
904 196 : if ((unend - unused) > 0)
905 196 : PageIndexMultiDelete(page, unused, unend - unused);
906 : }
907 :
908 : /*
909 : * Mark the page as not containing any LP_DEAD items only if
910 : * clear_dead_marking flag is set to true. See comments in
911 : * hashbucketcleanup() for details.
912 : */
913 196 : if (xldata->clear_dead_marking)
914 : {
915 : HashPageOpaque pageopaque;
916 :
917 0 : pageopaque = HashPageGetOpaque(page);
918 0 : pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
919 : }
920 :
921 196 : PageSetLSN(page, lsn);
922 196 : MarkBufferDirty(deletebuf);
923 : }
924 278 : if (BufferIsValid(deletebuf))
925 278 : UnlockReleaseBuffer(deletebuf);
926 :
927 278 : if (BufferIsValid(bucketbuf))
928 90 : UnlockReleaseBuffer(bucketbuf);
929 278 : }
930 :
931 : /*
932 : * replay split cleanup flag operation for primary bucket page.
933 : */
934 : static void
935 196 : hash_xlog_split_cleanup(XLogReaderState *record)
936 : {
937 196 : XLogRecPtr lsn = record->EndRecPtr;
938 : Buffer buffer;
939 : Page page;
940 :
941 196 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
942 : {
943 : HashPageOpaque bucket_opaque;
944 :
945 196 : page = BufferGetPage(buffer);
946 :
947 196 : bucket_opaque = HashPageGetOpaque(page);
948 196 : bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
949 196 : PageSetLSN(page, lsn);
950 196 : MarkBufferDirty(buffer);
951 : }
952 196 : if (BufferIsValid(buffer))
953 196 : UnlockReleaseBuffer(buffer);
954 196 : }
955 :
956 : /*
957 : * replay for update meta page
958 : */
959 : static void
960 10 : hash_xlog_update_meta_page(XLogReaderState *record)
961 : {
962 : HashMetaPage metap;
963 10 : XLogRecPtr lsn = record->EndRecPtr;
964 10 : xl_hash_update_meta_page *xldata = (xl_hash_update_meta_page *) XLogRecGetData(record);
965 : Buffer metabuf;
966 : Page page;
967 :
968 10 : if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO)
969 : {
970 10 : page = BufferGetPage(metabuf);
971 10 : metap = HashPageGetMeta(page);
972 :
973 10 : metap->hashm_ntuples = xldata->ntuples;
974 :
975 10 : PageSetLSN(page, lsn);
976 10 : MarkBufferDirty(metabuf);
977 : }
978 10 : if (BufferIsValid(metabuf))
979 10 : UnlockReleaseBuffer(metabuf);
980 10 : }
981 :
982 : /*
983 : * replay delete operation in hash index to remove
984 : * tuples marked as DEAD during index tuple insertion.
985 : */
986 : static void
987 0 : hash_xlog_vacuum_one_page(XLogReaderState *record)
988 : {
989 0 : XLogRecPtr lsn = record->EndRecPtr;
990 : xl_hash_vacuum_one_page *xldata;
991 : Buffer buffer;
992 : Buffer metabuf;
993 : Page page;
994 : XLogRedoAction action;
995 : HashPageOpaque pageopaque;
996 : OffsetNumber *toDelete;
997 :
998 0 : xldata = (xl_hash_vacuum_one_page *) XLogRecGetData(record);
999 0 : toDelete = xldata->offsets;
1000 :
1001 : /*
1002 : * If we have any conflict processing to do, it must happen before we
1003 : * update the page.
1004 : *
1005 : * Hash index records that are marked as LP_DEAD and being removed during
1006 : * hash index tuple insertion can conflict with standby queries. You might
1007 : * think that vacuum records would conflict as well, but we've handled
1008 : * that already. XLOG_HEAP2_PRUNE_VACUUM_SCAN records provide the highest
1009 : * xid cleaned by the vacuum of the heap and so we can resolve any
1010 : * conflicts just once when that arrives. After that we know that no
1011 : * conflicts exist from individual hash index vacuum records on that
1012 : * index.
1013 : */
1014 0 : if (InHotStandby)
1015 : {
1016 : RelFileLocator rlocator;
1017 :
1018 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
1019 0 : ResolveRecoveryConflictWithSnapshot(xldata->snapshotConflictHorizon,
1020 0 : xldata->isCatalogRel,
1021 : rlocator);
1022 : }
1023 :
1024 0 : action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer);
1025 :
1026 0 : if (action == BLK_NEEDS_REDO)
1027 : {
1028 0 : page = BufferGetPage(buffer);
1029 :
1030 0 : PageIndexMultiDelete(page, toDelete, xldata->ntuples);
1031 :
1032 : /*
1033 : * Mark the page as not containing any LP_DEAD items. See comments in
1034 : * _hash_vacuum_one_page() for details.
1035 : */
1036 0 : pageopaque = HashPageGetOpaque(page);
1037 0 : pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1038 :
1039 0 : PageSetLSN(page, lsn);
1040 0 : MarkBufferDirty(buffer);
1041 : }
1042 0 : if (BufferIsValid(buffer))
1043 0 : UnlockReleaseBuffer(buffer);
1044 :
1045 0 : if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
1046 : {
1047 : Page metapage;
1048 : HashMetaPage metap;
1049 :
1050 0 : metapage = BufferGetPage(metabuf);
1051 0 : metap = HashPageGetMeta(metapage);
1052 :
1053 0 : metap->hashm_ntuples -= xldata->ntuples;
1054 :
1055 0 : PageSetLSN(metapage, lsn);
1056 0 : MarkBufferDirty(metabuf);
1057 : }
1058 0 : if (BufferIsValid(metabuf))
1059 0 : UnlockReleaseBuffer(metabuf);
1060 0 : }
1061 :
1062 : void
1063 229614 : hash_redo(XLogReaderState *record)
1064 : {
1065 229614 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1066 :
1067 229614 : switch (info)
1068 : {
1069 54 : case XLOG_HASH_INIT_META_PAGE:
1070 54 : hash_xlog_init_meta_page(record);
1071 54 : break;
1072 54 : case XLOG_HASH_INIT_BITMAP_PAGE:
1073 54 : hash_xlog_init_bitmap_page(record);
1074 54 : break;
1075 228210 : case XLOG_HASH_INSERT:
1076 228210 : hash_xlog_insert(record);
1077 228210 : break;
1078 108 : case XLOG_HASH_ADD_OVFL_PAGE:
1079 108 : hash_xlog_add_ovfl_page(record);
1080 108 : break;
1081 196 : case XLOG_HASH_SPLIT_ALLOCATE_PAGE:
1082 196 : hash_xlog_split_allocate_page(record);
1083 196 : break;
1084 222 : case XLOG_HASH_SPLIT_PAGE:
1085 222 : hash_xlog_split_page(record);
1086 222 : break;
1087 196 : case XLOG_HASH_SPLIT_COMPLETE:
1088 196 : hash_xlog_split_complete(record);
1089 196 : break;
1090 2 : case XLOG_HASH_MOVE_PAGE_CONTENTS:
1091 2 : hash_xlog_move_page_contents(record);
1092 2 : break;
1093 88 : case XLOG_HASH_SQUEEZE_PAGE:
1094 88 : hash_xlog_squeeze_page(record);
1095 88 : break;
1096 278 : case XLOG_HASH_DELETE:
1097 278 : hash_xlog_delete(record);
1098 278 : break;
1099 196 : case XLOG_HASH_SPLIT_CLEANUP:
1100 196 : hash_xlog_split_cleanup(record);
1101 196 : break;
1102 10 : case XLOG_HASH_UPDATE_META_PAGE:
1103 10 : hash_xlog_update_meta_page(record);
1104 10 : break;
1105 0 : case XLOG_HASH_VACUUM_ONE_PAGE:
1106 0 : hash_xlog_vacuum_one_page(record);
1107 0 : break;
1108 0 : default:
1109 0 : elog(PANIC, "hash_redo: unknown op code %u", info);
1110 : }
1111 229614 : }
1112 :
1113 : /*
1114 : * Mask a hash page before performing consistency checks on it.
1115 : */
1116 : void
1117 911020 : hash_mask(char *pagedata, BlockNumber blkno)
1118 : {
1119 911020 : Page page = (Page) pagedata;
1120 : HashPageOpaque opaque;
1121 : int pagetype;
1122 :
1123 911020 : mask_page_lsn_and_checksum(page);
1124 :
1125 911020 : mask_page_hint_bits(page);
1126 911020 : mask_unused_space(page);
1127 :
1128 911020 : opaque = HashPageGetOpaque(page);
1129 :
1130 911020 : pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
1131 911020 : if (pagetype == LH_UNUSED_PAGE)
1132 : {
1133 : /*
1134 : * Mask everything on a UNUSED page.
1135 : */
1136 0 : mask_page_content(page);
1137 : }
1138 911020 : else if (pagetype == LH_BUCKET_PAGE ||
1139 : pagetype == LH_OVERFLOW_PAGE)
1140 : {
1141 : /*
1142 : * In hash bucket and overflow pages, it is possible to modify the
1143 : * LP_FLAGS without emitting any WAL record. Hence, mask the line
1144 : * pointer flags. See hashgettuple(), _hash_kill_items() for details.
1145 : */
1146 453484 : mask_lp_flags(page);
1147 : }
1148 :
1149 : /*
1150 : * It is possible that the hint bit LH_PAGE_HAS_DEAD_TUPLES may remain
1151 : * unlogged. So, mask it. See _hash_kill_items() for details.
1152 : */
1153 911020 : opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1154 911020 : }
|