Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * hash_xlog.c
4 : * WAL replay logic for hash index.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/hash/hash_xlog.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/bufmask.h"
18 : #include "access/hash.h"
19 : #include "access/hash_xlog.h"
20 : #include "access/xlogutils.h"
21 : #include "storage/standby.h"
22 :
23 : /*
24 : * replay a hash index meta page
25 : */
26 : static void
27 54 : hash_xlog_init_meta_page(XLogReaderState *record)
28 : {
29 54 : XLogRecPtr lsn = record->EndRecPtr;
30 : Page page;
31 : Buffer metabuf;
32 : ForkNumber forknum;
33 :
34 54 : xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) XLogRecGetData(record);
35 :
36 : /* create the index' metapage */
37 54 : metabuf = XLogInitBufferForRedo(record, 0);
38 : Assert(BufferIsValid(metabuf));
39 54 : _hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid,
40 54 : xlrec->ffactor, true);
41 54 : page = BufferGetPage(metabuf);
42 54 : PageSetLSN(page, lsn);
43 54 : MarkBufferDirty(metabuf);
44 :
45 : /*
46 : * Force the on-disk state of init forks to always be in sync with the
47 : * state in shared buffers. See XLogReadBufferForRedoExtended. We need
48 : * special handling for init forks as create index operations don't log a
49 : * full page image of the metapage.
50 : */
51 54 : XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
52 54 : if (forknum == INIT_FORKNUM)
53 2 : FlushOneBuffer(metabuf);
54 :
55 : /* all done */
56 54 : UnlockReleaseBuffer(metabuf);
57 54 : }
58 :
59 : /*
60 : * replay a hash index bitmap page
61 : */
62 : static void
63 54 : hash_xlog_init_bitmap_page(XLogReaderState *record)
64 : {
65 54 : XLogRecPtr lsn = record->EndRecPtr;
66 : Buffer bitmapbuf;
67 : Buffer metabuf;
68 : Page page;
69 : HashMetaPage metap;
70 : uint32 num_buckets;
71 : ForkNumber forknum;
72 :
73 54 : xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) XLogRecGetData(record);
74 :
75 : /*
76 : * Initialize bitmap page
77 : */
78 54 : bitmapbuf = XLogInitBufferForRedo(record, 0);
79 54 : _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
80 54 : PageSetLSN(BufferGetPage(bitmapbuf), lsn);
81 54 : MarkBufferDirty(bitmapbuf);
82 :
83 : /*
84 : * Force the on-disk state of init forks to always be in sync with the
85 : * state in shared buffers. See XLogReadBufferForRedoExtended. We need
86 : * special handling for init forks as create index operations don't log a
87 : * full page image of the metapage.
88 : */
89 54 : XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
90 54 : if (forknum == INIT_FORKNUM)
91 2 : FlushOneBuffer(bitmapbuf);
92 54 : UnlockReleaseBuffer(bitmapbuf);
93 :
94 : /* add the new bitmap page to the metapage's list of bitmaps */
95 54 : if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
96 : {
97 : /*
98 : * Note: in normal operation, we'd update the metapage while still
99 : * holding lock on the bitmap page. But during replay it's not
100 : * necessary to hold that lock, since nobody can see it yet; the
101 : * creating transaction hasn't yet committed.
102 : */
103 54 : page = BufferGetPage(metabuf);
104 54 : metap = HashPageGetMeta(page);
105 :
106 54 : num_buckets = metap->hashm_maxbucket + 1;
107 54 : metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
108 54 : metap->hashm_nmaps++;
109 :
110 54 : PageSetLSN(page, lsn);
111 54 : MarkBufferDirty(metabuf);
112 :
113 54 : XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
114 54 : if (forknum == INIT_FORKNUM)
115 2 : FlushOneBuffer(metabuf);
116 : }
117 54 : if (BufferIsValid(metabuf))
118 54 : UnlockReleaseBuffer(metabuf);
119 54 : }
120 :
121 : /*
122 : * replay a hash index insert without split
123 : */
124 : static void
125 239290 : hash_xlog_insert(XLogReaderState *record)
126 : {
127 : HashMetaPage metap;
128 239290 : XLogRecPtr lsn = record->EndRecPtr;
129 239290 : xl_hash_insert *xlrec = (xl_hash_insert *) XLogRecGetData(record);
130 : Buffer buffer;
131 : Page page;
132 :
133 239290 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
134 : {
135 : Size datalen;
136 236608 : char *datapos = XLogRecGetBlockData(record, 0, &datalen);
137 :
138 236608 : page = BufferGetPage(buffer);
139 :
140 236608 : if (PageAddItem(page, datapos, datalen, xlrec->offnum, false, false) == InvalidOffsetNumber)
141 0 : elog(PANIC, "hash_xlog_insert: failed to add item");
142 :
143 236608 : PageSetLSN(page, lsn);
144 236608 : MarkBufferDirty(buffer);
145 : }
146 239290 : if (BufferIsValid(buffer))
147 239290 : UnlockReleaseBuffer(buffer);
148 :
149 239290 : if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
150 : {
151 : /*
152 : * Note: in normal operation, we'd update the metapage while still
153 : * holding lock on the page we inserted into. But during replay it's
154 : * not necessary to hold that lock, since no other index updates can
155 : * be happening concurrently.
156 : */
157 239236 : page = BufferGetPage(buffer);
158 239236 : metap = HashPageGetMeta(page);
159 239236 : metap->hashm_ntuples += 1;
160 :
161 239236 : PageSetLSN(page, lsn);
162 239236 : MarkBufferDirty(buffer);
163 : }
164 239290 : if (BufferIsValid(buffer))
165 239290 : UnlockReleaseBuffer(buffer);
166 239290 : }
167 :
168 : /*
169 : * replay addition of overflow page for hash index
170 : */
171 : static void
172 132 : hash_xlog_add_ovfl_page(XLogReaderState *record)
173 : {
174 132 : XLogRecPtr lsn = record->EndRecPtr;
175 132 : xl_hash_add_ovfl_page *xlrec = (xl_hash_add_ovfl_page *) XLogRecGetData(record);
176 : Buffer leftbuf;
177 : Buffer ovflbuf;
178 : Buffer metabuf;
179 : BlockNumber leftblk;
180 : BlockNumber rightblk;
181 132 : BlockNumber newmapblk = InvalidBlockNumber;
182 : Page ovflpage;
183 : HashPageOpaque ovflopaque;
184 : uint32 *num_bucket;
185 : char *data;
186 : Size datalen PG_USED_FOR_ASSERTS_ONLY;
187 132 : bool new_bmpage = false;
188 :
189 132 : XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk);
190 132 : XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk);
191 :
192 132 : ovflbuf = XLogInitBufferForRedo(record, 0);
193 : Assert(BufferIsValid(ovflbuf));
194 :
195 132 : data = XLogRecGetBlockData(record, 0, &datalen);
196 132 : num_bucket = (uint32 *) data;
197 : Assert(datalen == sizeof(uint32));
198 132 : _hash_initbuf(ovflbuf, InvalidBlockNumber, *num_bucket, LH_OVERFLOW_PAGE,
199 : true);
200 : /* update backlink */
201 132 : ovflpage = BufferGetPage(ovflbuf);
202 132 : ovflopaque = HashPageGetOpaque(ovflpage);
203 132 : ovflopaque->hasho_prevblkno = leftblk;
204 :
205 132 : PageSetLSN(ovflpage, lsn);
206 132 : MarkBufferDirty(ovflbuf);
207 :
208 132 : if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
209 : {
210 : Page leftpage;
211 : HashPageOpaque leftopaque;
212 :
213 132 : leftpage = BufferGetPage(leftbuf);
214 132 : leftopaque = HashPageGetOpaque(leftpage);
215 132 : leftopaque->hasho_nextblkno = rightblk;
216 :
217 132 : PageSetLSN(leftpage, lsn);
218 132 : MarkBufferDirty(leftbuf);
219 : }
220 :
221 132 : if (BufferIsValid(leftbuf))
222 132 : UnlockReleaseBuffer(leftbuf);
223 132 : UnlockReleaseBuffer(ovflbuf);
224 :
225 : /*
226 : * Note: in normal operation, we'd update the bitmap and meta page while
227 : * still holding lock on the overflow pages. But during replay it's not
228 : * necessary to hold those locks, since no other index updates can be
229 : * happening concurrently.
230 : */
231 132 : if (XLogRecHasBlockRef(record, 2))
232 : {
233 : Buffer mapbuffer;
234 :
235 22 : if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO)
236 : {
237 14 : Page mappage = BufferGetPage(mapbuffer);
238 14 : uint32 *freep = NULL;
239 : uint32 *bitmap_page_bit;
240 :
241 14 : freep = HashPageGetBitmap(mappage);
242 :
243 14 : data = XLogRecGetBlockData(record, 2, &datalen);
244 14 : bitmap_page_bit = (uint32 *) data;
245 :
246 14 : SETBIT(freep, *bitmap_page_bit);
247 :
248 14 : PageSetLSN(mappage, lsn);
249 14 : MarkBufferDirty(mapbuffer);
250 : }
251 22 : if (BufferIsValid(mapbuffer))
252 22 : UnlockReleaseBuffer(mapbuffer);
253 : }
254 :
255 132 : if (XLogRecHasBlockRef(record, 3))
256 : {
257 : Buffer newmapbuf;
258 :
259 0 : newmapbuf = XLogInitBufferForRedo(record, 3);
260 :
261 0 : _hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true);
262 :
263 0 : new_bmpage = true;
264 0 : newmapblk = BufferGetBlockNumber(newmapbuf);
265 :
266 0 : MarkBufferDirty(newmapbuf);
267 0 : PageSetLSN(BufferGetPage(newmapbuf), lsn);
268 :
269 0 : UnlockReleaseBuffer(newmapbuf);
270 : }
271 :
272 132 : if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO)
273 : {
274 : HashMetaPage metap;
275 : Page page;
276 : uint32 *firstfree_ovflpage;
277 :
278 132 : data = XLogRecGetBlockData(record, 4, &datalen);
279 132 : firstfree_ovflpage = (uint32 *) data;
280 :
281 132 : page = BufferGetPage(metabuf);
282 132 : metap = HashPageGetMeta(page);
283 132 : metap->hashm_firstfree = *firstfree_ovflpage;
284 :
285 132 : if (!xlrec->bmpage_found)
286 : {
287 110 : metap->hashm_spares[metap->hashm_ovflpoint]++;
288 :
289 110 : if (new_bmpage)
290 : {
291 : Assert(BlockNumberIsValid(newmapblk));
292 :
293 0 : metap->hashm_mapp[metap->hashm_nmaps] = newmapblk;
294 0 : metap->hashm_nmaps++;
295 0 : metap->hashm_spares[metap->hashm_ovflpoint]++;
296 : }
297 : }
298 :
299 132 : PageSetLSN(page, lsn);
300 132 : MarkBufferDirty(metabuf);
301 : }
302 132 : if (BufferIsValid(metabuf))
303 132 : UnlockReleaseBuffer(metabuf);
304 132 : }
305 :
306 : /*
307 : * replay allocation of page for split operation
308 : */
309 : static void
310 448 : hash_xlog_split_allocate_page(XLogReaderState *record)
311 : {
312 448 : XLogRecPtr lsn = record->EndRecPtr;
313 448 : xl_hash_split_allocate_page *xlrec = (xl_hash_split_allocate_page *) XLogRecGetData(record);
314 : Buffer oldbuf;
315 : Buffer newbuf;
316 : Buffer metabuf;
317 : XLogRedoAction action;
318 :
319 : /*
320 : * To be consistent with normal operation, here we take cleanup locks on
321 : * both the old and new buckets even though there can't be any concurrent
322 : * inserts.
323 : */
324 :
325 : /* replay the record for old bucket */
326 448 : action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf);
327 :
328 : /*
329 : * Note that we still update the page even if it was restored from a full
330 : * page image, because the special space is not included in the image.
331 : */
332 448 : if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
333 : {
334 : Page oldpage;
335 : HashPageOpaque oldopaque;
336 :
337 448 : oldpage = BufferGetPage(oldbuf);
338 448 : oldopaque = HashPageGetOpaque(oldpage);
339 :
340 448 : oldopaque->hasho_flag = xlrec->old_bucket_flag;
341 448 : oldopaque->hasho_prevblkno = xlrec->new_bucket;
342 :
343 448 : PageSetLSN(oldpage, lsn);
344 448 : MarkBufferDirty(oldbuf);
345 : }
346 :
347 : /* replay the record for new bucket */
348 448 : XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_AND_CLEANUP_LOCK, true,
349 : &newbuf);
350 448 : _hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket,
351 448 : xlrec->new_bucket_flag, true);
352 448 : MarkBufferDirty(newbuf);
353 448 : PageSetLSN(BufferGetPage(newbuf), lsn);
354 :
355 : /*
356 : * We can release the lock on old bucket early as well but doing here to
357 : * consistent with normal operation.
358 : */
359 448 : if (BufferIsValid(oldbuf))
360 448 : UnlockReleaseBuffer(oldbuf);
361 448 : if (BufferIsValid(newbuf))
362 448 : UnlockReleaseBuffer(newbuf);
363 :
364 : /*
365 : * Note: in normal operation, we'd update the meta page while still
366 : * holding lock on the old and new bucket pages. But during replay it's
367 : * not necessary to hold those locks, since no other bucket splits can be
368 : * happening concurrently.
369 : */
370 :
371 : /* replay the record for metapage changes */
372 448 : if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO)
373 : {
374 : Page page;
375 : HashMetaPage metap;
376 : Size datalen;
377 : char *data;
378 : uint32 *uidata;
379 : int uidatacount;
380 :
381 448 : page = BufferGetPage(metabuf);
382 448 : metap = HashPageGetMeta(page);
383 448 : metap->hashm_maxbucket = xlrec->new_bucket;
384 :
385 448 : data = XLogRecGetBlockData(record, 2, &datalen);
386 :
387 : /*
388 : * This cast is ok because XLogRecGetBlockData() returns a MAXALIGNed
389 : * buffer.
390 : */
391 448 : uidata = (uint32 *) data;
392 448 : uidatacount = 0;
393 :
394 448 : if (xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS)
395 : {
396 8 : uint32 lowmask = uidata[uidatacount++];
397 8 : uint32 highmask = uidata[uidatacount++];
398 :
399 : /* update metapage */
400 8 : metap->hashm_lowmask = lowmask;
401 8 : metap->hashm_highmask = highmask;
402 : }
403 :
404 448 : if (xlrec->flags & XLH_SPLIT_META_UPDATE_SPLITPOINT)
405 : {
406 20 : uint32 ovflpoint = uidata[uidatacount++];
407 20 : uint32 ovflpages = uidata[uidatacount++];
408 :
409 : /* update metapage */
410 20 : metap->hashm_ovflpoint = ovflpoint;
411 20 : metap->hashm_spares[ovflpoint] = ovflpages;
412 : }
413 :
414 448 : MarkBufferDirty(metabuf);
415 448 : PageSetLSN(BufferGetPage(metabuf), lsn);
416 : }
417 :
418 448 : if (BufferIsValid(metabuf))
419 448 : UnlockReleaseBuffer(metabuf);
420 448 : }
421 :
422 : /*
423 : * replay of split operation
424 : */
425 : static void
426 474 : hash_xlog_split_page(XLogReaderState *record)
427 : {
428 : Buffer buf;
429 :
430 474 : if (XLogReadBufferForRedo(record, 0, &buf) != BLK_RESTORED)
431 0 : elog(ERROR, "Hash split record did not contain a full-page image");
432 :
433 474 : UnlockReleaseBuffer(buf);
434 474 : }
435 :
436 : /*
437 : * replay completion of split operation
438 : */
439 : static void
440 448 : hash_xlog_split_complete(XLogReaderState *record)
441 : {
442 448 : XLogRecPtr lsn = record->EndRecPtr;
443 448 : xl_hash_split_complete *xlrec = (xl_hash_split_complete *) XLogRecGetData(record);
444 : Buffer oldbuf;
445 : Buffer newbuf;
446 : XLogRedoAction action;
447 :
448 : /* replay the record for old bucket */
449 448 : action = XLogReadBufferForRedo(record, 0, &oldbuf);
450 :
451 : /*
452 : * Note that we still update the page even if it was restored from a full
453 : * page image, because the bucket flag is not included in the image.
454 : */
455 448 : if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
456 : {
457 : Page oldpage;
458 : HashPageOpaque oldopaque;
459 :
460 448 : oldpage = BufferGetPage(oldbuf);
461 448 : oldopaque = HashPageGetOpaque(oldpage);
462 :
463 448 : oldopaque->hasho_flag = xlrec->old_bucket_flag;
464 :
465 448 : PageSetLSN(oldpage, lsn);
466 448 : MarkBufferDirty(oldbuf);
467 : }
468 448 : if (BufferIsValid(oldbuf))
469 448 : UnlockReleaseBuffer(oldbuf);
470 :
471 : /* replay the record for new bucket */
472 448 : action = XLogReadBufferForRedo(record, 1, &newbuf);
473 :
474 : /*
475 : * Note that we still update the page even if it was restored from a full
476 : * page image, because the bucket flag is not included in the image.
477 : */
478 448 : if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
479 : {
480 : Page newpage;
481 : HashPageOpaque nopaque;
482 :
483 448 : newpage = BufferGetPage(newbuf);
484 448 : nopaque = HashPageGetOpaque(newpage);
485 :
486 448 : nopaque->hasho_flag = xlrec->new_bucket_flag;
487 :
488 448 : PageSetLSN(newpage, lsn);
489 448 : MarkBufferDirty(newbuf);
490 : }
491 448 : if (BufferIsValid(newbuf))
492 448 : UnlockReleaseBuffer(newbuf);
493 448 : }
494 :
495 : /*
496 : * replay move of page contents for squeeze operation of hash index
497 : */
498 : static void
499 2 : hash_xlog_move_page_contents(XLogReaderState *record)
500 : {
501 2 : XLogRecPtr lsn = record->EndRecPtr;
502 2 : xl_hash_move_page_contents *xldata = (xl_hash_move_page_contents *) XLogRecGetData(record);
503 2 : Buffer bucketbuf = InvalidBuffer;
504 2 : Buffer writebuf = InvalidBuffer;
505 2 : Buffer deletebuf = InvalidBuffer;
506 : XLogRedoAction action;
507 :
508 : /*
509 : * Ensure we have a cleanup lock on primary bucket page before we start
510 : * with the actual replay operation. This is to ensure that neither a
511 : * scan can start nor a scan can be already-in-progress during the replay
512 : * of this operation. If we allow scans during this operation, then they
513 : * can miss some records or show the same record multiple times.
514 : */
515 2 : if (xldata->is_prim_bucket_same_wrt)
516 2 : action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
517 : else
518 : {
519 : /*
520 : * we don't care for return value as the purpose of reading bucketbuf
521 : * is to ensure a cleanup lock on primary bucket page.
522 : */
523 0 : (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
524 :
525 0 : action = XLogReadBufferForRedo(record, 1, &writebuf);
526 : }
527 :
528 : /* replay the record for adding entries in overflow buffer */
529 2 : if (action == BLK_NEEDS_REDO)
530 : {
531 : Page writepage;
532 : char *begin;
533 : char *data;
534 : Size datalen;
535 2 : uint16 ninserted = 0;
536 :
537 2 : data = begin = XLogRecGetBlockData(record, 1, &datalen);
538 :
539 2 : writepage = BufferGetPage(writebuf);
540 :
541 2 : if (xldata->ntups > 0)
542 : {
543 2 : OffsetNumber *towrite = (OffsetNumber *) data;
544 :
545 2 : data += sizeof(OffsetNumber) * xldata->ntups;
546 :
547 686 : while (data - begin < datalen)
548 : {
549 684 : IndexTuple itup = (IndexTuple) data;
550 : Size itemsz;
551 : OffsetNumber l;
552 :
553 684 : itemsz = IndexTupleSize(itup);
554 684 : itemsz = MAXALIGN(itemsz);
555 :
556 684 : data += itemsz;
557 :
558 684 : l = PageAddItem(writepage, itup, itemsz, towrite[ninserted], false, false);
559 684 : if (l == InvalidOffsetNumber)
560 0 : elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %d bytes",
561 : (int) itemsz);
562 :
563 684 : ninserted++;
564 : }
565 : }
566 :
567 : /*
568 : * number of tuples inserted must be same as requested in REDO record.
569 : */
570 : Assert(ninserted == xldata->ntups);
571 :
572 2 : PageSetLSN(writepage, lsn);
573 2 : MarkBufferDirty(writebuf);
574 : }
575 :
576 : /* replay the record for deleting entries from overflow buffer */
577 2 : if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO)
578 : {
579 : Page page;
580 : char *ptr;
581 : Size len;
582 :
583 2 : ptr = XLogRecGetBlockData(record, 2, &len);
584 :
585 2 : page = BufferGetPage(deletebuf);
586 :
587 2 : if (len > 0)
588 : {
589 : OffsetNumber *unused;
590 : OffsetNumber *unend;
591 :
592 2 : unused = (OffsetNumber *) ptr;
593 2 : unend = (OffsetNumber *) (ptr + len);
594 :
595 2 : if ((unend - unused) > 0)
596 2 : PageIndexMultiDelete(page, unused, unend - unused);
597 : }
598 :
599 2 : PageSetLSN(page, lsn);
600 2 : MarkBufferDirty(deletebuf);
601 : }
602 :
603 : /*
604 : * Replay is complete, now we can release the buffers. We release locks at
605 : * end of replay operation to ensure that we hold lock on primary bucket
606 : * page till end of operation. We can optimize by releasing the lock on
607 : * write buffer as soon as the operation for same is complete, if it is
608 : * not same as primary bucket page, but that doesn't seem to be worth
609 : * complicating the code.
610 : */
611 2 : if (BufferIsValid(deletebuf))
612 2 : UnlockReleaseBuffer(deletebuf);
613 :
614 2 : if (BufferIsValid(writebuf))
615 2 : UnlockReleaseBuffer(writebuf);
616 :
617 2 : if (BufferIsValid(bucketbuf))
618 0 : UnlockReleaseBuffer(bucketbuf);
619 2 : }
620 :
621 : /*
622 : * replay squeeze page operation of hash index
623 : */
624 : static void
625 62 : hash_xlog_squeeze_page(XLogReaderState *record)
626 : {
627 62 : XLogRecPtr lsn = record->EndRecPtr;
628 62 : xl_hash_squeeze_page *xldata = (xl_hash_squeeze_page *) XLogRecGetData(record);
629 62 : Buffer bucketbuf = InvalidBuffer;
630 62 : Buffer writebuf = InvalidBuffer;
631 : Buffer ovflbuf;
632 62 : Buffer prevbuf = InvalidBuffer;
633 : Buffer mapbuf;
634 : XLogRedoAction action;
635 :
636 : /*
637 : * Ensure we have a cleanup lock on primary bucket page before we start
638 : * with the actual replay operation. This is to ensure that neither a
639 : * scan can start nor a scan can be already-in-progress during the replay
640 : * of this operation. If we allow scans during this operation, then they
641 : * can miss some records or show the same record multiple times.
642 : */
643 62 : if (xldata->is_prim_bucket_same_wrt)
644 46 : action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
645 : else
646 : {
647 : /*
648 : * we don't care for return value as the purpose of reading bucketbuf
649 : * is to ensure a cleanup lock on primary bucket page.
650 : */
651 16 : (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
652 :
653 16 : if (xldata->ntups > 0 || xldata->is_prev_bucket_same_wrt)
654 14 : action = XLogReadBufferForRedo(record, 1, &writebuf);
655 : else
656 2 : action = BLK_NOTFOUND;
657 : }
658 :
659 : /* replay the record for adding entries in overflow buffer */
660 62 : if (action == BLK_NEEDS_REDO)
661 : {
662 : Page writepage;
663 : char *begin;
664 : char *data;
665 : Size datalen;
666 56 : uint16 ninserted = 0;
667 56 : bool mod_wbuf = false;
668 :
669 56 : data = begin = XLogRecGetBlockData(record, 1, &datalen);
670 :
671 56 : writepage = BufferGetPage(writebuf);
672 :
673 56 : if (xldata->ntups > 0)
674 : {
675 28 : OffsetNumber *towrite = (OffsetNumber *) data;
676 :
677 28 : data += sizeof(OffsetNumber) * xldata->ntups;
678 :
679 980 : while (data - begin < datalen)
680 : {
681 952 : IndexTuple itup = (IndexTuple) data;
682 : Size itemsz;
683 : OffsetNumber l;
684 :
685 952 : itemsz = IndexTupleSize(itup);
686 952 : itemsz = MAXALIGN(itemsz);
687 :
688 952 : data += itemsz;
689 :
690 952 : l = PageAddItem(writepage, itup, itemsz, towrite[ninserted], false, false);
691 952 : if (l == InvalidOffsetNumber)
692 0 : elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %d bytes",
693 : (int) itemsz);
694 :
695 952 : ninserted++;
696 : }
697 :
698 28 : mod_wbuf = true;
699 : }
700 : else
701 : {
702 : /*
703 : * Ensure that the required flags are set when there are no
704 : * tuples. See _hash_freeovflpage().
705 : */
706 : Assert(xldata->is_prim_bucket_same_wrt ||
707 : xldata->is_prev_bucket_same_wrt);
708 : }
709 :
710 : /*
711 : * number of tuples inserted must be same as requested in REDO record.
712 : */
713 : Assert(ninserted == xldata->ntups);
714 :
715 : /*
716 : * if the page on which are adding tuples is a page previous to freed
717 : * overflow page, then update its nextblkno.
718 : */
719 56 : if (xldata->is_prev_bucket_same_wrt)
720 : {
721 20 : HashPageOpaque writeopaque = HashPageGetOpaque(writepage);
722 :
723 20 : writeopaque->hasho_nextblkno = xldata->nextblkno;
724 20 : mod_wbuf = true;
725 : }
726 :
727 : /* Set LSN and mark writebuf dirty iff it is modified */
728 56 : if (mod_wbuf)
729 : {
730 34 : PageSetLSN(writepage, lsn);
731 34 : MarkBufferDirty(writebuf);
732 : }
733 : }
734 :
735 : /* replay the record for initializing overflow buffer */
736 62 : if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO)
737 : {
738 : Page ovflpage;
739 : HashPageOpaque ovflopaque;
740 :
741 0 : ovflpage = BufferGetPage(ovflbuf);
742 :
743 0 : _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
744 :
745 0 : ovflopaque = HashPageGetOpaque(ovflpage);
746 :
747 0 : ovflopaque->hasho_prevblkno = InvalidBlockNumber;
748 0 : ovflopaque->hasho_nextblkno = InvalidBlockNumber;
749 0 : ovflopaque->hasho_bucket = InvalidBucket;
750 0 : ovflopaque->hasho_flag = LH_UNUSED_PAGE;
751 0 : ovflopaque->hasho_page_id = HASHO_PAGE_ID;
752 :
753 0 : PageSetLSN(ovflpage, lsn);
754 0 : MarkBufferDirty(ovflbuf);
755 : }
756 62 : if (BufferIsValid(ovflbuf))
757 62 : UnlockReleaseBuffer(ovflbuf);
758 :
759 : /* replay the record for page previous to the freed overflow page */
760 104 : if (!xldata->is_prev_bucket_same_wrt &&
761 42 : XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO)
762 : {
763 40 : Page prevpage = BufferGetPage(prevbuf);
764 40 : HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
765 :
766 40 : prevopaque->hasho_nextblkno = xldata->nextblkno;
767 :
768 40 : PageSetLSN(prevpage, lsn);
769 40 : MarkBufferDirty(prevbuf);
770 : }
771 62 : if (BufferIsValid(prevbuf))
772 42 : UnlockReleaseBuffer(prevbuf);
773 :
774 : /* replay the record for page next to the freed overflow page */
775 62 : if (XLogRecHasBlockRef(record, 4))
776 : {
777 : Buffer nextbuf;
778 :
779 0 : if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO)
780 : {
781 0 : Page nextpage = BufferGetPage(nextbuf);
782 0 : HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
783 :
784 0 : nextopaque->hasho_prevblkno = xldata->prevblkno;
785 :
786 0 : PageSetLSN(nextpage, lsn);
787 0 : MarkBufferDirty(nextbuf);
788 : }
789 0 : if (BufferIsValid(nextbuf))
790 0 : UnlockReleaseBuffer(nextbuf);
791 : }
792 :
793 62 : if (BufferIsValid(writebuf))
794 60 : UnlockReleaseBuffer(writebuf);
795 :
796 62 : if (BufferIsValid(bucketbuf))
797 16 : UnlockReleaseBuffer(bucketbuf);
798 :
799 : /*
800 : * Note: in normal operation, we'd update the bitmap and meta page while
801 : * still holding lock on the primary bucket page and overflow pages. But
802 : * during replay it's not necessary to hold those locks, since no other
803 : * index updates can be happening concurrently.
804 : */
805 : /* replay the record for bitmap page */
806 62 : if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO)
807 : {
808 48 : Page mappage = BufferGetPage(mapbuf);
809 48 : uint32 *freep = NULL;
810 : char *data;
811 : uint32 *bitmap_page_bit;
812 : Size datalen;
813 :
814 48 : freep = HashPageGetBitmap(mappage);
815 :
816 48 : data = XLogRecGetBlockData(record, 5, &datalen);
817 48 : bitmap_page_bit = (uint32 *) data;
818 :
819 48 : CLRBIT(freep, *bitmap_page_bit);
820 :
821 48 : PageSetLSN(mappage, lsn);
822 48 : MarkBufferDirty(mapbuf);
823 : }
824 62 : if (BufferIsValid(mapbuf))
825 62 : UnlockReleaseBuffer(mapbuf);
826 :
827 : /* replay the record for meta page */
828 62 : if (XLogRecHasBlockRef(record, 6))
829 : {
830 : Buffer metabuf;
831 :
832 60 : if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO)
833 : {
834 : HashMetaPage metap;
835 : Page page;
836 : char *data;
837 : uint32 *firstfree_ovflpage;
838 : Size datalen;
839 :
840 54 : data = XLogRecGetBlockData(record, 6, &datalen);
841 54 : firstfree_ovflpage = (uint32 *) data;
842 :
843 54 : page = BufferGetPage(metabuf);
844 54 : metap = HashPageGetMeta(page);
845 54 : metap->hashm_firstfree = *firstfree_ovflpage;
846 :
847 54 : PageSetLSN(page, lsn);
848 54 : MarkBufferDirty(metabuf);
849 : }
850 60 : if (BufferIsValid(metabuf))
851 60 : UnlockReleaseBuffer(metabuf);
852 : }
853 62 : }
854 :
855 : /*
856 : * replay delete operation of hash index
857 : */
858 : static void
859 522 : hash_xlog_delete(XLogReaderState *record)
860 : {
861 522 : XLogRecPtr lsn = record->EndRecPtr;
862 522 : xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record);
863 522 : Buffer bucketbuf = InvalidBuffer;
864 : Buffer deletebuf;
865 : Page page;
866 : XLogRedoAction action;
867 :
868 : /*
869 : * Ensure we have a cleanup lock on primary bucket page before we start
870 : * with the actual replay operation. This is to ensure that neither a
871 : * scan can start nor a scan can be already-in-progress during the replay
872 : * of this operation. If we allow scans during this operation, then they
873 : * can miss some records or show the same record multiple times.
874 : */
875 522 : if (xldata->is_primary_bucket_page)
876 456 : action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf);
877 : else
878 : {
879 : /*
880 : * we don't care for return value as the purpose of reading bucketbuf
881 : * is to ensure a cleanup lock on primary bucket page.
882 : */
883 66 : (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
884 :
885 66 : action = XLogReadBufferForRedo(record, 1, &deletebuf);
886 : }
887 :
888 : /* replay the record for deleting entries in bucket page */
889 522 : if (action == BLK_NEEDS_REDO)
890 : {
891 : char *ptr;
892 : Size len;
893 :
894 462 : ptr = XLogRecGetBlockData(record, 1, &len);
895 :
896 462 : page = BufferGetPage(deletebuf);
897 :
898 462 : if (len > 0)
899 : {
900 : OffsetNumber *unused;
901 : OffsetNumber *unend;
902 :
903 462 : unused = (OffsetNumber *) ptr;
904 462 : unend = (OffsetNumber *) (ptr + len);
905 :
906 462 : if ((unend - unused) > 0)
907 462 : PageIndexMultiDelete(page, unused, unend - unused);
908 : }
909 :
910 : /*
911 : * Mark the page as not containing any LP_DEAD items only if
912 : * clear_dead_marking flag is set to true. See comments in
913 : * hashbucketcleanup() for details.
914 : */
915 462 : if (xldata->clear_dead_marking)
916 : {
917 : HashPageOpaque pageopaque;
918 :
919 0 : pageopaque = HashPageGetOpaque(page);
920 0 : pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
921 : }
922 :
923 462 : PageSetLSN(page, lsn);
924 462 : MarkBufferDirty(deletebuf);
925 : }
926 522 : if (BufferIsValid(deletebuf))
927 522 : UnlockReleaseBuffer(deletebuf);
928 :
929 522 : if (BufferIsValid(bucketbuf))
930 66 : UnlockReleaseBuffer(bucketbuf);
931 522 : }
932 :
933 : /*
934 : * replay split cleanup flag operation for primary bucket page.
935 : */
936 : static void
937 448 : hash_xlog_split_cleanup(XLogReaderState *record)
938 : {
939 448 : XLogRecPtr lsn = record->EndRecPtr;
940 : Buffer buffer;
941 : Page page;
942 :
943 448 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
944 : {
945 : HashPageOpaque bucket_opaque;
946 :
947 448 : page = BufferGetPage(buffer);
948 :
949 448 : bucket_opaque = HashPageGetOpaque(page);
950 448 : bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
951 448 : PageSetLSN(page, lsn);
952 448 : MarkBufferDirty(buffer);
953 : }
954 448 : if (BufferIsValid(buffer))
955 448 : UnlockReleaseBuffer(buffer);
956 448 : }
957 :
958 : /*
959 : * replay for update meta page
960 : */
961 : static void
962 16 : hash_xlog_update_meta_page(XLogReaderState *record)
963 : {
964 : HashMetaPage metap;
965 16 : XLogRecPtr lsn = record->EndRecPtr;
966 16 : xl_hash_update_meta_page *xldata = (xl_hash_update_meta_page *) XLogRecGetData(record);
967 : Buffer metabuf;
968 : Page page;
969 :
970 16 : if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO)
971 : {
972 8 : page = BufferGetPage(metabuf);
973 8 : metap = HashPageGetMeta(page);
974 :
975 8 : metap->hashm_ntuples = xldata->ntuples;
976 :
977 8 : PageSetLSN(page, lsn);
978 8 : MarkBufferDirty(metabuf);
979 : }
980 16 : if (BufferIsValid(metabuf))
981 16 : UnlockReleaseBuffer(metabuf);
982 16 : }
983 :
984 : /*
985 : * replay delete operation in hash index to remove
986 : * tuples marked as DEAD during index tuple insertion.
987 : */
988 : static void
989 0 : hash_xlog_vacuum_one_page(XLogReaderState *record)
990 : {
991 0 : XLogRecPtr lsn = record->EndRecPtr;
992 : xl_hash_vacuum_one_page *xldata;
993 : Buffer buffer;
994 : Buffer metabuf;
995 : Page page;
996 : XLogRedoAction action;
997 : HashPageOpaque pageopaque;
998 : OffsetNumber *toDelete;
999 :
1000 0 : xldata = (xl_hash_vacuum_one_page *) XLogRecGetData(record);
1001 0 : toDelete = xldata->offsets;
1002 :
1003 : /*
1004 : * If we have any conflict processing to do, it must happen before we
1005 : * update the page.
1006 : *
1007 : * Hash index records that are marked as LP_DEAD and being removed during
1008 : * hash index tuple insertion can conflict with standby queries. You might
1009 : * think that vacuum records would conflict as well, but we've handled
1010 : * that already. XLOG_HEAP2_PRUNE_VACUUM_SCAN records provide the highest
1011 : * xid cleaned by the vacuum of the heap and so we can resolve any
1012 : * conflicts just once when that arrives. After that we know that no
1013 : * conflicts exist from individual hash index vacuum records on that
1014 : * index.
1015 : */
1016 0 : if (InHotStandby)
1017 : {
1018 : RelFileLocator rlocator;
1019 :
1020 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
1021 0 : ResolveRecoveryConflictWithSnapshot(xldata->snapshotConflictHorizon,
1022 0 : xldata->isCatalogRel,
1023 : rlocator);
1024 : }
1025 :
1026 0 : action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer);
1027 :
1028 0 : if (action == BLK_NEEDS_REDO)
1029 : {
1030 0 : page = BufferGetPage(buffer);
1031 :
1032 0 : PageIndexMultiDelete(page, toDelete, xldata->ntuples);
1033 :
1034 : /*
1035 : * Mark the page as not containing any LP_DEAD items. See comments in
1036 : * _hash_vacuum_one_page() for details.
1037 : */
1038 0 : pageopaque = HashPageGetOpaque(page);
1039 0 : pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1040 :
1041 0 : PageSetLSN(page, lsn);
1042 0 : MarkBufferDirty(buffer);
1043 : }
1044 0 : if (BufferIsValid(buffer))
1045 0 : UnlockReleaseBuffer(buffer);
1046 :
1047 0 : if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
1048 : {
1049 : Page metapage;
1050 : HashMetaPage metap;
1051 :
1052 0 : metapage = BufferGetPage(metabuf);
1053 0 : metap = HashPageGetMeta(metapage);
1054 :
1055 0 : metap->hashm_ntuples -= xldata->ntuples;
1056 :
1057 0 : PageSetLSN(metapage, lsn);
1058 0 : MarkBufferDirty(metabuf);
1059 : }
1060 0 : if (BufferIsValid(metabuf))
1061 0 : UnlockReleaseBuffer(metabuf);
1062 0 : }
1063 :
1064 : void
1065 241950 : hash_redo(XLogReaderState *record)
1066 : {
1067 241950 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1068 :
1069 241950 : switch (info)
1070 : {
1071 54 : case XLOG_HASH_INIT_META_PAGE:
1072 54 : hash_xlog_init_meta_page(record);
1073 54 : break;
1074 54 : case XLOG_HASH_INIT_BITMAP_PAGE:
1075 54 : hash_xlog_init_bitmap_page(record);
1076 54 : break;
1077 239290 : case XLOG_HASH_INSERT:
1078 239290 : hash_xlog_insert(record);
1079 239290 : break;
1080 132 : case XLOG_HASH_ADD_OVFL_PAGE:
1081 132 : hash_xlog_add_ovfl_page(record);
1082 132 : break;
1083 448 : case XLOG_HASH_SPLIT_ALLOCATE_PAGE:
1084 448 : hash_xlog_split_allocate_page(record);
1085 448 : break;
1086 474 : case XLOG_HASH_SPLIT_PAGE:
1087 474 : hash_xlog_split_page(record);
1088 474 : break;
1089 448 : case XLOG_HASH_SPLIT_COMPLETE:
1090 448 : hash_xlog_split_complete(record);
1091 448 : break;
1092 2 : case XLOG_HASH_MOVE_PAGE_CONTENTS:
1093 2 : hash_xlog_move_page_contents(record);
1094 2 : break;
1095 62 : case XLOG_HASH_SQUEEZE_PAGE:
1096 62 : hash_xlog_squeeze_page(record);
1097 62 : break;
1098 522 : case XLOG_HASH_DELETE:
1099 522 : hash_xlog_delete(record);
1100 522 : break;
1101 448 : case XLOG_HASH_SPLIT_CLEANUP:
1102 448 : hash_xlog_split_cleanup(record);
1103 448 : break;
1104 16 : case XLOG_HASH_UPDATE_META_PAGE:
1105 16 : hash_xlog_update_meta_page(record);
1106 16 : break;
1107 0 : case XLOG_HASH_VACUUM_ONE_PAGE:
1108 0 : hash_xlog_vacuum_one_page(record);
1109 0 : break;
1110 0 : default:
1111 0 : elog(PANIC, "hash_redo: unknown op code %u", info);
1112 : }
1113 241950 : }
1114 :
1115 : /*
1116 : * Mask a hash page before performing consistency checks on it.
1117 : */
1118 : void
1119 959164 : hash_mask(char *pagedata, BlockNumber blkno)
1120 : {
1121 959164 : Page page = (Page) pagedata;
1122 : HashPageOpaque opaque;
1123 : int pagetype;
1124 :
1125 959164 : mask_page_lsn_and_checksum(page);
1126 :
1127 959164 : mask_page_hint_bits(page);
1128 959164 : mask_unused_space(page);
1129 :
1130 959164 : opaque = HashPageGetOpaque(page);
1131 :
1132 959164 : pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
1133 959164 : if (pagetype == LH_UNUSED_PAGE)
1134 : {
1135 : /*
1136 : * Mask everything on a UNUSED page.
1137 : */
1138 0 : mask_page_content(page);
1139 : }
1140 959164 : else if (pagetype == LH_BUCKET_PAGE ||
1141 : pagetype == LH_OVERFLOW_PAGE)
1142 : {
1143 : /*
1144 : * In hash bucket and overflow pages, it is possible to modify the
1145 : * LP_FLAGS without emitting any WAL record. Hence, mask the line
1146 : * pointer flags. See hashgettuple(), _hash_kill_items() for details.
1147 : */
1148 478960 : mask_lp_flags(page);
1149 : }
1150 :
1151 : /*
1152 : * It is possible that the hint bit LH_PAGE_HAS_DEAD_TUPLES may remain
1153 : * unlogged. So, mask it. See _hash_kill_items() for details.
1154 : */
1155 959164 : opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1156 959164 : }
|