Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * hash_xlog.c
4 : * WAL replay logic for hash index.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/hash/hash_xlog.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/bufmask.h"
18 : #include "access/hash.h"
19 : #include "access/hash_xlog.h"
20 : #include "access/xlogutils.h"
21 : #include "storage/standby.h"
22 :
23 : /*
24 : * replay a hash index meta page
25 : */
26 : static void
27 52 : hash_xlog_init_meta_page(XLogReaderState *record)
28 : {
29 52 : XLogRecPtr lsn = record->EndRecPtr;
30 : Page page;
31 : Buffer metabuf;
32 : ForkNumber forknum;
33 :
34 52 : xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) XLogRecGetData(record);
35 :
36 : /* create the index' metapage */
37 52 : metabuf = XLogInitBufferForRedo(record, 0);
38 : Assert(BufferIsValid(metabuf));
39 52 : _hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid,
40 52 : xlrec->ffactor, true);
41 52 : page = BufferGetPage(metabuf);
42 52 : PageSetLSN(page, lsn);
43 52 : MarkBufferDirty(metabuf);
44 :
45 : /*
46 : * Force the on-disk state of init forks to always be in sync with the
47 : * state in shared buffers. See XLogReadBufferForRedoExtended. We need
48 : * special handling for init forks as create index operations don't log a
49 : * full page image of the metapage.
50 : */
51 52 : XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
52 52 : if (forknum == INIT_FORKNUM)
53 2 : FlushOneBuffer(metabuf);
54 :
55 : /* all done */
56 52 : UnlockReleaseBuffer(metabuf);
57 52 : }
58 :
59 : /*
60 : * replay a hash index bitmap page
61 : */
62 : static void
63 52 : hash_xlog_init_bitmap_page(XLogReaderState *record)
64 : {
65 52 : XLogRecPtr lsn = record->EndRecPtr;
66 : Buffer bitmapbuf;
67 : Buffer metabuf;
68 : Page page;
69 : HashMetaPage metap;
70 : uint32 num_buckets;
71 : ForkNumber forknum;
72 :
73 52 : xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) XLogRecGetData(record);
74 :
75 : /*
76 : * Initialize bitmap page
77 : */
78 52 : bitmapbuf = XLogInitBufferForRedo(record, 0);
79 52 : _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
80 52 : PageSetLSN(BufferGetPage(bitmapbuf), lsn);
81 52 : MarkBufferDirty(bitmapbuf);
82 :
83 : /*
84 : * Force the on-disk state of init forks to always be in sync with the
85 : * state in shared buffers. See XLogReadBufferForRedoExtended. We need
86 : * special handling for init forks as create index operations don't log a
87 : * full page image of the metapage.
88 : */
89 52 : XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
90 52 : if (forknum == INIT_FORKNUM)
91 2 : FlushOneBuffer(bitmapbuf);
92 52 : UnlockReleaseBuffer(bitmapbuf);
93 :
94 : /* add the new bitmap page to the metapage's list of bitmaps */
95 52 : if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
96 : {
97 : /*
98 : * Note: in normal operation, we'd update the metapage while still
99 : * holding lock on the bitmap page. But during replay it's not
100 : * necessary to hold that lock, since nobody can see it yet; the
101 : * creating transaction hasn't yet committed.
102 : */
103 52 : page = BufferGetPage(metabuf);
104 52 : metap = HashPageGetMeta(page);
105 :
106 52 : num_buckets = metap->hashm_maxbucket + 1;
107 52 : metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
108 52 : metap->hashm_nmaps++;
109 :
110 52 : PageSetLSN(page, lsn);
111 52 : MarkBufferDirty(metabuf);
112 :
113 52 : XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
114 52 : if (forknum == INIT_FORKNUM)
115 2 : FlushOneBuffer(metabuf);
116 : }
117 52 : if (BufferIsValid(metabuf))
118 52 : UnlockReleaseBuffer(metabuf);
119 52 : }
120 :
121 : /*
122 : * replay a hash index insert without split
123 : */
124 : static void
125 228208 : hash_xlog_insert(XLogReaderState *record)
126 : {
127 : HashMetaPage metap;
128 228208 : XLogRecPtr lsn = record->EndRecPtr;
129 228208 : xl_hash_insert *xlrec = (xl_hash_insert *) XLogRecGetData(record);
130 : Buffer buffer;
131 : Page page;
132 :
133 228208 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
134 : {
135 : Size datalen;
136 225204 : char *datapos = XLogRecGetBlockData(record, 0, &datalen);
137 :
138 225204 : page = BufferGetPage(buffer);
139 :
140 225204 : if (PageAddItem(page, datapos, datalen, xlrec->offnum, false, false) == InvalidOffsetNumber)
141 0 : elog(PANIC, "hash_xlog_insert: failed to add item");
142 :
143 225204 : PageSetLSN(page, lsn);
144 225204 : MarkBufferDirty(buffer);
145 : }
146 228208 : if (BufferIsValid(buffer))
147 228208 : UnlockReleaseBuffer(buffer);
148 :
149 228208 : if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
150 : {
151 : /*
152 : * Note: in normal operation, we'd update the metapage while still
153 : * holding lock on the page we inserted into. But during replay it's
154 : * not necessary to hold that lock, since no other index updates can
155 : * be happening concurrently.
156 : */
157 228148 : page = BufferGetPage(buffer);
158 228148 : metap = HashPageGetMeta(page);
159 228148 : metap->hashm_ntuples += 1;
160 :
161 228148 : PageSetLSN(page, lsn);
162 228148 : MarkBufferDirty(buffer);
163 : }
164 228208 : if (BufferIsValid(buffer))
165 228208 : UnlockReleaseBuffer(buffer);
166 228208 : }
167 :
168 : /*
169 : * replay addition of overflow page for hash index
170 : */
171 : static void
172 108 : hash_xlog_add_ovfl_page(XLogReaderState *record)
173 : {
174 108 : XLogRecPtr lsn = record->EndRecPtr;
175 108 : xl_hash_add_ovfl_page *xlrec = (xl_hash_add_ovfl_page *) XLogRecGetData(record);
176 : Buffer leftbuf;
177 : Buffer ovflbuf;
178 : Buffer metabuf;
179 : BlockNumber leftblk;
180 : BlockNumber rightblk;
181 108 : BlockNumber newmapblk = InvalidBlockNumber;
182 : Page ovflpage;
183 : HashPageOpaque ovflopaque;
184 : uint32 *num_bucket;
185 : char *data;
186 : Size datalen PG_USED_FOR_ASSERTS_ONLY;
187 108 : bool new_bmpage = false;
188 :
189 108 : XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk);
190 108 : XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk);
191 :
192 108 : ovflbuf = XLogInitBufferForRedo(record, 0);
193 : Assert(BufferIsValid(ovflbuf));
194 :
195 108 : data = XLogRecGetBlockData(record, 0, &datalen);
196 108 : num_bucket = (uint32 *) data;
197 : Assert(datalen == sizeof(uint32));
198 108 : _hash_initbuf(ovflbuf, InvalidBlockNumber, *num_bucket, LH_OVERFLOW_PAGE,
199 : true);
200 : /* update backlink */
201 108 : ovflpage = BufferGetPage(ovflbuf);
202 108 : ovflopaque = HashPageGetOpaque(ovflpage);
203 108 : ovflopaque->hasho_prevblkno = leftblk;
204 :
205 108 : PageSetLSN(ovflpage, lsn);
206 108 : MarkBufferDirty(ovflbuf);
207 :
208 108 : if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
209 : {
210 : Page leftpage;
211 : HashPageOpaque leftopaque;
212 :
213 108 : leftpage = BufferGetPage(leftbuf);
214 108 : leftopaque = HashPageGetOpaque(leftpage);
215 108 : leftopaque->hasho_nextblkno = rightblk;
216 :
217 108 : PageSetLSN(leftpage, lsn);
218 108 : MarkBufferDirty(leftbuf);
219 : }
220 :
221 108 : if (BufferIsValid(leftbuf))
222 108 : UnlockReleaseBuffer(leftbuf);
223 108 : UnlockReleaseBuffer(ovflbuf);
224 :
225 : /*
226 : * Note: in normal operation, we'd update the bitmap and meta page while
227 : * still holding lock on the overflow pages. But during replay it's not
228 : * necessary to hold those locks, since no other index updates can be
229 : * happening concurrently.
230 : */
231 108 : if (XLogRecHasBlockRef(record, 2))
232 : {
233 : Buffer mapbuffer;
234 :
235 24 : if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO)
236 : {
237 16 : Page mappage = BufferGetPage(mapbuffer);
238 16 : uint32 *freep = NULL;
239 : uint32 *bitmap_page_bit;
240 :
241 16 : freep = HashPageGetBitmap(mappage);
242 :
243 16 : data = XLogRecGetBlockData(record, 2, &datalen);
244 16 : bitmap_page_bit = (uint32 *) data;
245 :
246 16 : SETBIT(freep, *bitmap_page_bit);
247 :
248 16 : PageSetLSN(mappage, lsn);
249 16 : MarkBufferDirty(mapbuffer);
250 : }
251 24 : if (BufferIsValid(mapbuffer))
252 24 : UnlockReleaseBuffer(mapbuffer);
253 : }
254 :
255 108 : if (XLogRecHasBlockRef(record, 3))
256 : {
257 : Buffer newmapbuf;
258 :
259 0 : newmapbuf = XLogInitBufferForRedo(record, 3);
260 :
261 0 : _hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true);
262 :
263 0 : new_bmpage = true;
264 0 : newmapblk = BufferGetBlockNumber(newmapbuf);
265 :
266 0 : MarkBufferDirty(newmapbuf);
267 0 : PageSetLSN(BufferGetPage(newmapbuf), lsn);
268 :
269 0 : UnlockReleaseBuffer(newmapbuf);
270 : }
271 :
272 108 : if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO)
273 : {
274 : HashMetaPage metap;
275 : Page page;
276 : uint32 *firstfree_ovflpage;
277 :
278 108 : data = XLogRecGetBlockData(record, 4, &datalen);
279 108 : firstfree_ovflpage = (uint32 *) data;
280 :
281 108 : page = BufferGetPage(metabuf);
282 108 : metap = HashPageGetMeta(page);
283 108 : metap->hashm_firstfree = *firstfree_ovflpage;
284 :
285 108 : if (!xlrec->bmpage_found)
286 : {
287 84 : metap->hashm_spares[metap->hashm_ovflpoint]++;
288 :
289 84 : if (new_bmpage)
290 : {
291 : Assert(BlockNumberIsValid(newmapblk));
292 :
293 0 : metap->hashm_mapp[metap->hashm_nmaps] = newmapblk;
294 0 : metap->hashm_nmaps++;
295 0 : metap->hashm_spares[metap->hashm_ovflpoint]++;
296 : }
297 : }
298 :
299 108 : PageSetLSN(page, lsn);
300 108 : MarkBufferDirty(metabuf);
301 : }
302 108 : if (BufferIsValid(metabuf))
303 108 : UnlockReleaseBuffer(metabuf);
304 108 : }
305 :
306 : /*
307 : * replay allocation of page for split operation
308 : */
309 : static void
310 196 : hash_xlog_split_allocate_page(XLogReaderState *record)
311 : {
312 196 : XLogRecPtr lsn = record->EndRecPtr;
313 196 : xl_hash_split_allocate_page *xlrec = (xl_hash_split_allocate_page *) XLogRecGetData(record);
314 : Buffer oldbuf;
315 : Buffer newbuf;
316 : Buffer metabuf;
317 : Size datalen PG_USED_FOR_ASSERTS_ONLY;
318 : char *data;
319 : XLogRedoAction action;
320 :
321 : /*
322 : * To be consistent with normal operation, here we take cleanup locks on
323 : * both the old and new buckets even though there can't be any concurrent
324 : * inserts.
325 : */
326 :
327 : /* replay the record for old bucket */
328 196 : action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf);
329 :
330 : /*
331 : * Note that we still update the page even if it was restored from a full
332 : * page image, because the special space is not included in the image.
333 : */
334 196 : if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
335 : {
336 : Page oldpage;
337 : HashPageOpaque oldopaque;
338 :
339 196 : oldpage = BufferGetPage(oldbuf);
340 196 : oldopaque = HashPageGetOpaque(oldpage);
341 :
342 196 : oldopaque->hasho_flag = xlrec->old_bucket_flag;
343 196 : oldopaque->hasho_prevblkno = xlrec->new_bucket;
344 :
345 196 : PageSetLSN(oldpage, lsn);
346 196 : MarkBufferDirty(oldbuf);
347 : }
348 :
349 : /* replay the record for new bucket */
350 196 : XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_AND_CLEANUP_LOCK, true,
351 : &newbuf);
352 196 : _hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket,
353 196 : xlrec->new_bucket_flag, true);
354 196 : MarkBufferDirty(newbuf);
355 196 : PageSetLSN(BufferGetPage(newbuf), lsn);
356 :
357 : /*
358 : * We can release the lock on old bucket early as well but doing here to
359 : * consistent with normal operation.
360 : */
361 196 : if (BufferIsValid(oldbuf))
362 196 : UnlockReleaseBuffer(oldbuf);
363 196 : if (BufferIsValid(newbuf))
364 196 : UnlockReleaseBuffer(newbuf);
365 :
366 : /*
367 : * Note: in normal operation, we'd update the meta page while still
368 : * holding lock on the old and new bucket pages. But during replay it's
369 : * not necessary to hold those locks, since no other bucket splits can be
370 : * happening concurrently.
371 : */
372 :
373 : /* replay the record for metapage changes */
374 196 : if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO)
375 : {
376 : Page page;
377 : HashMetaPage metap;
378 :
379 196 : page = BufferGetPage(metabuf);
380 196 : metap = HashPageGetMeta(page);
381 196 : metap->hashm_maxbucket = xlrec->new_bucket;
382 :
383 196 : data = XLogRecGetBlockData(record, 2, &datalen);
384 :
385 196 : if (xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS)
386 : {
387 : uint32 lowmask;
388 : uint32 *highmask;
389 :
390 : /* extract low and high masks. */
391 6 : memcpy(&lowmask, data, sizeof(uint32));
392 6 : highmask = (uint32 *) ((char *) data + sizeof(uint32));
393 :
394 : /* update metapage */
395 6 : metap->hashm_lowmask = lowmask;
396 6 : metap->hashm_highmask = *highmask;
397 :
398 6 : data += sizeof(uint32) * 2;
399 : }
400 :
401 196 : if (xlrec->flags & XLH_SPLIT_META_UPDATE_SPLITPOINT)
402 : {
403 : uint32 ovflpoint;
404 : uint32 *ovflpages;
405 :
406 : /* extract information of overflow pages. */
407 16 : memcpy(&ovflpoint, data, sizeof(uint32));
408 16 : ovflpages = (uint32 *) ((char *) data + sizeof(uint32));
409 :
410 : /* update metapage */
411 16 : metap->hashm_spares[ovflpoint] = *ovflpages;
412 16 : metap->hashm_ovflpoint = ovflpoint;
413 : }
414 :
415 196 : MarkBufferDirty(metabuf);
416 196 : PageSetLSN(BufferGetPage(metabuf), lsn);
417 : }
418 :
419 196 : if (BufferIsValid(metabuf))
420 196 : UnlockReleaseBuffer(metabuf);
421 196 : }
422 :
423 : /*
424 : * replay of split operation
425 : */
426 : static void
427 222 : hash_xlog_split_page(XLogReaderState *record)
428 : {
429 : Buffer buf;
430 :
431 222 : if (XLogReadBufferForRedo(record, 0, &buf) != BLK_RESTORED)
432 0 : elog(ERROR, "Hash split record did not contain a full-page image");
433 :
434 222 : UnlockReleaseBuffer(buf);
435 222 : }
436 :
437 : /*
438 : * replay completion of split operation
439 : */
440 : static void
441 196 : hash_xlog_split_complete(XLogReaderState *record)
442 : {
443 196 : XLogRecPtr lsn = record->EndRecPtr;
444 196 : xl_hash_split_complete *xlrec = (xl_hash_split_complete *) XLogRecGetData(record);
445 : Buffer oldbuf;
446 : Buffer newbuf;
447 : XLogRedoAction action;
448 :
449 : /* replay the record for old bucket */
450 196 : action = XLogReadBufferForRedo(record, 0, &oldbuf);
451 :
452 : /*
453 : * Note that we still update the page even if it was restored from a full
454 : * page image, because the bucket flag is not included in the image.
455 : */
456 196 : if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
457 : {
458 : Page oldpage;
459 : HashPageOpaque oldopaque;
460 :
461 196 : oldpage = BufferGetPage(oldbuf);
462 196 : oldopaque = HashPageGetOpaque(oldpage);
463 :
464 196 : oldopaque->hasho_flag = xlrec->old_bucket_flag;
465 :
466 196 : PageSetLSN(oldpage, lsn);
467 196 : MarkBufferDirty(oldbuf);
468 : }
469 196 : if (BufferIsValid(oldbuf))
470 196 : UnlockReleaseBuffer(oldbuf);
471 :
472 : /* replay the record for new bucket */
473 196 : action = XLogReadBufferForRedo(record, 1, &newbuf);
474 :
475 : /*
476 : * Note that we still update the page even if it was restored from a full
477 : * page image, because the bucket flag is not included in the image.
478 : */
479 196 : if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
480 : {
481 : Page newpage;
482 : HashPageOpaque nopaque;
483 :
484 196 : newpage = BufferGetPage(newbuf);
485 196 : nopaque = HashPageGetOpaque(newpage);
486 :
487 196 : nopaque->hasho_flag = xlrec->new_bucket_flag;
488 :
489 196 : PageSetLSN(newpage, lsn);
490 196 : MarkBufferDirty(newbuf);
491 : }
492 196 : if (BufferIsValid(newbuf))
493 196 : UnlockReleaseBuffer(newbuf);
494 196 : }
495 :
496 : /*
497 : * replay move of page contents for squeeze operation of hash index
498 : */
499 : static void
500 2 : hash_xlog_move_page_contents(XLogReaderState *record)
501 : {
502 2 : XLogRecPtr lsn = record->EndRecPtr;
503 2 : xl_hash_move_page_contents *xldata = (xl_hash_move_page_contents *) XLogRecGetData(record);
504 2 : Buffer bucketbuf = InvalidBuffer;
505 2 : Buffer writebuf = InvalidBuffer;
506 2 : Buffer deletebuf = InvalidBuffer;
507 : XLogRedoAction action;
508 :
509 : /*
510 : * Ensure we have a cleanup lock on primary bucket page before we start
511 : * with the actual replay operation. This is to ensure that neither a
512 : * scan can start nor a scan can be already-in-progress during the replay
513 : * of this operation. If we allow scans during this operation, then they
514 : * can miss some records or show the same record multiple times.
515 : */
516 2 : if (xldata->is_prim_bucket_same_wrt)
517 2 : action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
518 : else
519 : {
520 : /*
521 : * we don't care for return value as the purpose of reading bucketbuf
522 : * is to ensure a cleanup lock on primary bucket page.
523 : */
524 0 : (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
525 :
526 0 : action = XLogReadBufferForRedo(record, 1, &writebuf);
527 : }
528 :
529 : /* replay the record for adding entries in overflow buffer */
530 2 : if (action == BLK_NEEDS_REDO)
531 : {
532 : Page writepage;
533 : char *begin;
534 : char *data;
535 : Size datalen;
536 2 : uint16 ninserted = 0;
537 :
538 2 : data = begin = XLogRecGetBlockData(record, 1, &datalen);
539 :
540 2 : writepage = BufferGetPage(writebuf);
541 :
542 2 : if (xldata->ntups > 0)
543 : {
544 2 : OffsetNumber *towrite = (OffsetNumber *) data;
545 :
546 2 : data += sizeof(OffsetNumber) * xldata->ntups;
547 :
548 686 : while (data - begin < datalen)
549 : {
550 684 : IndexTuple itup = (IndexTuple) data;
551 : Size itemsz;
552 : OffsetNumber l;
553 :
554 684 : itemsz = IndexTupleSize(itup);
555 684 : itemsz = MAXALIGN(itemsz);
556 :
557 684 : data += itemsz;
558 :
559 684 : l = PageAddItem(writepage, itup, itemsz, towrite[ninserted], false, false);
560 684 : if (l == InvalidOffsetNumber)
561 0 : elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %d bytes",
562 : (int) itemsz);
563 :
564 684 : ninserted++;
565 : }
566 : }
567 :
568 : /*
569 : * number of tuples inserted must be same as requested in REDO record.
570 : */
571 : Assert(ninserted == xldata->ntups);
572 :
573 2 : PageSetLSN(writepage, lsn);
574 2 : MarkBufferDirty(writebuf);
575 : }
576 :
577 : /* replay the record for deleting entries from overflow buffer */
578 2 : if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO)
579 : {
580 : Page page;
581 : char *ptr;
582 : Size len;
583 :
584 2 : ptr = XLogRecGetBlockData(record, 2, &len);
585 :
586 2 : page = BufferGetPage(deletebuf);
587 :
588 2 : if (len > 0)
589 : {
590 : OffsetNumber *unused;
591 : OffsetNumber *unend;
592 :
593 2 : unused = (OffsetNumber *) ptr;
594 2 : unend = (OffsetNumber *) ((char *) ptr + len);
595 :
596 2 : if ((unend - unused) > 0)
597 2 : PageIndexMultiDelete(page, unused, unend - unused);
598 : }
599 :
600 2 : PageSetLSN(page, lsn);
601 2 : MarkBufferDirty(deletebuf);
602 : }
603 :
604 : /*
605 : * Replay is complete, now we can release the buffers. We release locks at
606 : * end of replay operation to ensure that we hold lock on primary bucket
607 : * page till end of operation. We can optimize by releasing the lock on
608 : * write buffer as soon as the operation for same is complete, if it is
609 : * not same as primary bucket page, but that doesn't seem to be worth
610 : * complicating the code.
611 : */
612 2 : if (BufferIsValid(deletebuf))
613 2 : UnlockReleaseBuffer(deletebuf);
614 :
615 2 : if (BufferIsValid(writebuf))
616 2 : UnlockReleaseBuffer(writebuf);
617 :
618 2 : if (BufferIsValid(bucketbuf))
619 0 : UnlockReleaseBuffer(bucketbuf);
620 2 : }
621 :
622 : /*
623 : * replay squeeze page operation of hash index
624 : */
625 : static void
626 88 : hash_xlog_squeeze_page(XLogReaderState *record)
627 : {
628 88 : XLogRecPtr lsn = record->EndRecPtr;
629 88 : xl_hash_squeeze_page *xldata = (xl_hash_squeeze_page *) XLogRecGetData(record);
630 88 : Buffer bucketbuf = InvalidBuffer;
631 88 : Buffer writebuf = InvalidBuffer;
632 : Buffer ovflbuf;
633 88 : Buffer prevbuf = InvalidBuffer;
634 : Buffer mapbuf;
635 : XLogRedoAction action;
636 :
637 : /*
638 : * Ensure we have a cleanup lock on primary bucket page before we start
639 : * with the actual replay operation. This is to ensure that neither a
640 : * scan can start nor a scan can be already-in-progress during the replay
641 : * of this operation. If we allow scans during this operation, then they
642 : * can miss some records or show the same record multiple times.
643 : */
644 88 : if (xldata->is_prim_bucket_same_wrt)
645 72 : action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
646 : else
647 : {
648 : /*
649 : * we don't care for return value as the purpose of reading bucketbuf
650 : * is to ensure a cleanup lock on primary bucket page.
651 : */
652 16 : (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
653 :
654 16 : if (xldata->ntups > 0 || xldata->is_prev_bucket_same_wrt)
655 14 : action = XLogReadBufferForRedo(record, 1, &writebuf);
656 : else
657 2 : action = BLK_NOTFOUND;
658 : }
659 :
660 : /* replay the record for adding entries in overflow buffer */
661 88 : if (action == BLK_NEEDS_REDO)
662 : {
663 : Page writepage;
664 : char *begin;
665 : char *data;
666 : Size datalen;
667 82 : uint16 ninserted = 0;
668 82 : bool mod_wbuf = false;
669 :
670 82 : data = begin = XLogRecGetBlockData(record, 1, &datalen);
671 :
672 82 : writepage = BufferGetPage(writebuf);
673 :
674 82 : if (xldata->ntups > 0)
675 : {
676 30 : OffsetNumber *towrite = (OffsetNumber *) data;
677 :
678 30 : data += sizeof(OffsetNumber) * xldata->ntups;
679 :
680 1426 : while (data - begin < datalen)
681 : {
682 1396 : IndexTuple itup = (IndexTuple) data;
683 : Size itemsz;
684 : OffsetNumber l;
685 :
686 1396 : itemsz = IndexTupleSize(itup);
687 1396 : itemsz = MAXALIGN(itemsz);
688 :
689 1396 : data += itemsz;
690 :
691 1396 : l = PageAddItem(writepage, itup, itemsz, towrite[ninserted], false, false);
692 1396 : if (l == InvalidOffsetNumber)
693 0 : elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %d bytes",
694 : (int) itemsz);
695 :
696 1396 : ninserted++;
697 : }
698 :
699 30 : mod_wbuf = true;
700 : }
701 : else
702 : {
703 : /*
704 : * Ensure that the required flags are set when there are no
705 : * tuples. See _hash_freeovflpage().
706 : */
707 : Assert(xldata->is_prim_bucket_same_wrt ||
708 : xldata->is_prev_bucket_same_wrt);
709 : }
710 :
711 : /*
712 : * number of tuples inserted must be same as requested in REDO record.
713 : */
714 : Assert(ninserted == xldata->ntups);
715 :
716 : /*
717 : * if the page on which are adding tuples is a page previous to freed
718 : * overflow page, then update its nextblkno.
719 : */
720 82 : if (xldata->is_prev_bucket_same_wrt)
721 : {
722 22 : HashPageOpaque writeopaque = HashPageGetOpaque(writepage);
723 :
724 22 : writeopaque->hasho_nextblkno = xldata->nextblkno;
725 22 : mod_wbuf = true;
726 : }
727 :
728 : /* Set LSN and mark writebuf dirty iff it is modified */
729 82 : if (mod_wbuf)
730 : {
731 38 : PageSetLSN(writepage, lsn);
732 38 : MarkBufferDirty(writebuf);
733 : }
734 : }
735 :
736 : /* replay the record for initializing overflow buffer */
737 88 : if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO)
738 : {
739 : Page ovflpage;
740 : HashPageOpaque ovflopaque;
741 :
742 0 : ovflpage = BufferGetPage(ovflbuf);
743 :
744 0 : _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
745 :
746 0 : ovflopaque = HashPageGetOpaque(ovflpage);
747 :
748 0 : ovflopaque->hasho_prevblkno = InvalidBlockNumber;
749 0 : ovflopaque->hasho_nextblkno = InvalidBlockNumber;
750 0 : ovflopaque->hasho_bucket = InvalidBucket;
751 0 : ovflopaque->hasho_flag = LH_UNUSED_PAGE;
752 0 : ovflopaque->hasho_page_id = HASHO_PAGE_ID;
753 :
754 0 : PageSetLSN(ovflpage, lsn);
755 0 : MarkBufferDirty(ovflbuf);
756 : }
757 88 : if (BufferIsValid(ovflbuf))
758 88 : UnlockReleaseBuffer(ovflbuf);
759 :
760 : /* replay the record for page previous to the freed overflow page */
761 154 : if (!xldata->is_prev_bucket_same_wrt &&
762 66 : XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO)
763 : {
764 64 : Page prevpage = BufferGetPage(prevbuf);
765 64 : HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
766 :
767 64 : prevopaque->hasho_nextblkno = xldata->nextblkno;
768 :
769 64 : PageSetLSN(prevpage, lsn);
770 64 : MarkBufferDirty(prevbuf);
771 : }
772 88 : if (BufferIsValid(prevbuf))
773 66 : UnlockReleaseBuffer(prevbuf);
774 :
775 : /* replay the record for page next to the freed overflow page */
776 88 : if (XLogRecHasBlockRef(record, 4))
777 : {
778 : Buffer nextbuf;
779 :
780 0 : if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO)
781 : {
782 0 : Page nextpage = BufferGetPage(nextbuf);
783 0 : HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
784 :
785 0 : nextopaque->hasho_prevblkno = xldata->prevblkno;
786 :
787 0 : PageSetLSN(nextpage, lsn);
788 0 : MarkBufferDirty(nextbuf);
789 : }
790 0 : if (BufferIsValid(nextbuf))
791 0 : UnlockReleaseBuffer(nextbuf);
792 : }
793 :
794 88 : if (BufferIsValid(writebuf))
795 86 : UnlockReleaseBuffer(writebuf);
796 :
797 88 : if (BufferIsValid(bucketbuf))
798 16 : UnlockReleaseBuffer(bucketbuf);
799 :
800 : /*
801 : * Note: in normal operation, we'd update the bitmap and meta page while
802 : * still holding lock on the primary bucket page and overflow pages. But
803 : * during replay it's not necessary to hold those locks, since no other
804 : * index updates can be happening concurrently.
805 : */
806 : /* replay the record for bitmap page */
807 88 : if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO)
808 : {
809 76 : Page mappage = BufferGetPage(mapbuf);
810 76 : uint32 *freep = NULL;
811 : char *data;
812 : uint32 *bitmap_page_bit;
813 : Size datalen;
814 :
815 76 : freep = HashPageGetBitmap(mappage);
816 :
817 76 : data = XLogRecGetBlockData(record, 5, &datalen);
818 76 : bitmap_page_bit = (uint32 *) data;
819 :
820 76 : CLRBIT(freep, *bitmap_page_bit);
821 :
822 76 : PageSetLSN(mappage, lsn);
823 76 : MarkBufferDirty(mapbuf);
824 : }
825 88 : if (BufferIsValid(mapbuf))
826 88 : UnlockReleaseBuffer(mapbuf);
827 :
828 : /* replay the record for meta page */
829 88 : if (XLogRecHasBlockRef(record, 6))
830 : {
831 : Buffer metabuf;
832 :
833 60 : if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO)
834 : {
835 : HashMetaPage metap;
836 : Page page;
837 : char *data;
838 : uint32 *firstfree_ovflpage;
839 : Size datalen;
840 :
841 54 : data = XLogRecGetBlockData(record, 6, &datalen);
842 54 : firstfree_ovflpage = (uint32 *) data;
843 :
844 54 : page = BufferGetPage(metabuf);
845 54 : metap = HashPageGetMeta(page);
846 54 : metap->hashm_firstfree = *firstfree_ovflpage;
847 :
848 54 : PageSetLSN(page, lsn);
849 54 : MarkBufferDirty(metabuf);
850 : }
851 60 : if (BufferIsValid(metabuf))
852 60 : UnlockReleaseBuffer(metabuf);
853 : }
854 88 : }
855 :
856 : /*
857 : * replay delete operation of hash index
858 : */
859 : static void
860 290 : hash_xlog_delete(XLogReaderState *record)
861 : {
862 290 : XLogRecPtr lsn = record->EndRecPtr;
863 290 : xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record);
864 290 : Buffer bucketbuf = InvalidBuffer;
865 : Buffer deletebuf;
866 : Page page;
867 : XLogRedoAction action;
868 :
869 : /*
870 : * Ensure we have a cleanup lock on primary bucket page before we start
871 : * with the actual replay operation. This is to ensure that neither a
872 : * scan can start nor a scan can be already-in-progress during the replay
873 : * of this operation. If we allow scans during this operation, then they
874 : * can miss some records or show the same record multiple times.
875 : */
876 290 : if (xldata->is_primary_bucket_page)
877 200 : action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf);
878 : else
879 : {
880 : /*
881 : * we don't care for return value as the purpose of reading bucketbuf
882 : * is to ensure a cleanup lock on primary bucket page.
883 : */
884 90 : (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
885 :
886 90 : action = XLogReadBufferForRedo(record, 1, &deletebuf);
887 : }
888 :
889 : /* replay the record for deleting entries in bucket page */
890 290 : if (action == BLK_NEEDS_REDO)
891 : {
892 : char *ptr;
893 : Size len;
894 :
895 204 : ptr = XLogRecGetBlockData(record, 1, &len);
896 :
897 204 : page = BufferGetPage(deletebuf);
898 :
899 204 : if (len > 0)
900 : {
901 : OffsetNumber *unused;
902 : OffsetNumber *unend;
903 :
904 204 : unused = (OffsetNumber *) ptr;
905 204 : unend = (OffsetNumber *) ((char *) ptr + len);
906 :
907 204 : if ((unend - unused) > 0)
908 204 : PageIndexMultiDelete(page, unused, unend - unused);
909 : }
910 :
911 : /*
912 : * Mark the page as not containing any LP_DEAD items only if
913 : * clear_dead_marking flag is set to true. See comments in
914 : * hashbucketcleanup() for details.
915 : */
916 204 : if (xldata->clear_dead_marking)
917 : {
918 : HashPageOpaque pageopaque;
919 :
920 0 : pageopaque = HashPageGetOpaque(page);
921 0 : pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
922 : }
923 :
924 204 : PageSetLSN(page, lsn);
925 204 : MarkBufferDirty(deletebuf);
926 : }
927 290 : if (BufferIsValid(deletebuf))
928 290 : UnlockReleaseBuffer(deletebuf);
929 :
930 290 : if (BufferIsValid(bucketbuf))
931 90 : UnlockReleaseBuffer(bucketbuf);
932 290 : }
933 :
934 : /*
935 : * replay split cleanup flag operation for primary bucket page.
936 : */
937 : static void
938 196 : hash_xlog_split_cleanup(XLogReaderState *record)
939 : {
940 196 : XLogRecPtr lsn = record->EndRecPtr;
941 : Buffer buffer;
942 : Page page;
943 :
944 196 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
945 : {
946 : HashPageOpaque bucket_opaque;
947 :
948 196 : page = BufferGetPage(buffer);
949 :
950 196 : bucket_opaque = HashPageGetOpaque(page);
951 196 : bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
952 196 : PageSetLSN(page, lsn);
953 196 : MarkBufferDirty(buffer);
954 : }
955 196 : if (BufferIsValid(buffer))
956 196 : UnlockReleaseBuffer(buffer);
957 196 : }
958 :
959 : /*
960 : * replay for update meta page
961 : */
962 : static void
963 16 : hash_xlog_update_meta_page(XLogReaderState *record)
964 : {
965 : HashMetaPage metap;
966 16 : XLogRecPtr lsn = record->EndRecPtr;
967 16 : xl_hash_update_meta_page *xldata = (xl_hash_update_meta_page *) XLogRecGetData(record);
968 : Buffer metabuf;
969 : Page page;
970 :
971 16 : if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO)
972 : {
973 10 : page = BufferGetPage(metabuf);
974 10 : metap = HashPageGetMeta(page);
975 :
976 10 : metap->hashm_ntuples = xldata->ntuples;
977 :
978 10 : PageSetLSN(page, lsn);
979 10 : MarkBufferDirty(metabuf);
980 : }
981 16 : if (BufferIsValid(metabuf))
982 16 : UnlockReleaseBuffer(metabuf);
983 16 : }
984 :
985 : /*
986 : * replay delete operation in hash index to remove
987 : * tuples marked as DEAD during index tuple insertion.
988 : */
989 : static void
990 0 : hash_xlog_vacuum_one_page(XLogReaderState *record)
991 : {
992 0 : XLogRecPtr lsn = record->EndRecPtr;
993 : xl_hash_vacuum_one_page *xldata;
994 : Buffer buffer;
995 : Buffer metabuf;
996 : Page page;
997 : XLogRedoAction action;
998 : HashPageOpaque pageopaque;
999 : OffsetNumber *toDelete;
1000 :
1001 0 : xldata = (xl_hash_vacuum_one_page *) XLogRecGetData(record);
1002 0 : toDelete = xldata->offsets;
1003 :
1004 : /*
1005 : * If we have any conflict processing to do, it must happen before we
1006 : * update the page.
1007 : *
1008 : * Hash index records that are marked as LP_DEAD and being removed during
1009 : * hash index tuple insertion can conflict with standby queries. You might
1010 : * think that vacuum records would conflict as well, but we've handled
1011 : * that already. XLOG_HEAP2_PRUNE_VACUUM_SCAN records provide the highest
1012 : * xid cleaned by the vacuum of the heap and so we can resolve any
1013 : * conflicts just once when that arrives. After that we know that no
1014 : * conflicts exist from individual hash index vacuum records on that
1015 : * index.
1016 : */
1017 0 : if (InHotStandby)
1018 : {
1019 : RelFileLocator rlocator;
1020 :
1021 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
1022 0 : ResolveRecoveryConflictWithSnapshot(xldata->snapshotConflictHorizon,
1023 0 : xldata->isCatalogRel,
1024 : rlocator);
1025 : }
1026 :
1027 0 : action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer);
1028 :
1029 0 : if (action == BLK_NEEDS_REDO)
1030 : {
1031 0 : page = BufferGetPage(buffer);
1032 :
1033 0 : PageIndexMultiDelete(page, toDelete, xldata->ntuples);
1034 :
1035 : /*
1036 : * Mark the page as not containing any LP_DEAD items. See comments in
1037 : * _hash_vacuum_one_page() for details.
1038 : */
1039 0 : pageopaque = HashPageGetOpaque(page);
1040 0 : pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1041 :
1042 0 : PageSetLSN(page, lsn);
1043 0 : MarkBufferDirty(buffer);
1044 : }
1045 0 : if (BufferIsValid(buffer))
1046 0 : UnlockReleaseBuffer(buffer);
1047 :
1048 0 : if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
1049 : {
1050 : Page metapage;
1051 : HashMetaPage metap;
1052 :
1053 0 : metapage = BufferGetPage(metabuf);
1054 0 : metap = HashPageGetMeta(metapage);
1055 :
1056 0 : metap->hashm_ntuples -= xldata->ntuples;
1057 :
1058 0 : PageSetLSN(metapage, lsn);
1059 0 : MarkBufferDirty(metabuf);
1060 : }
1061 0 : if (BufferIsValid(metabuf))
1062 0 : UnlockReleaseBuffer(metabuf);
1063 0 : }
1064 :
1065 : void
1066 229626 : hash_redo(XLogReaderState *record)
1067 : {
1068 229626 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1069 :
1070 229626 : switch (info)
1071 : {
1072 52 : case XLOG_HASH_INIT_META_PAGE:
1073 52 : hash_xlog_init_meta_page(record);
1074 52 : break;
1075 52 : case XLOG_HASH_INIT_BITMAP_PAGE:
1076 52 : hash_xlog_init_bitmap_page(record);
1077 52 : break;
1078 228208 : case XLOG_HASH_INSERT:
1079 228208 : hash_xlog_insert(record);
1080 228208 : break;
1081 108 : case XLOG_HASH_ADD_OVFL_PAGE:
1082 108 : hash_xlog_add_ovfl_page(record);
1083 108 : break;
1084 196 : case XLOG_HASH_SPLIT_ALLOCATE_PAGE:
1085 196 : hash_xlog_split_allocate_page(record);
1086 196 : break;
1087 222 : case XLOG_HASH_SPLIT_PAGE:
1088 222 : hash_xlog_split_page(record);
1089 222 : break;
1090 196 : case XLOG_HASH_SPLIT_COMPLETE:
1091 196 : hash_xlog_split_complete(record);
1092 196 : break;
1093 2 : case XLOG_HASH_MOVE_PAGE_CONTENTS:
1094 2 : hash_xlog_move_page_contents(record);
1095 2 : break;
1096 88 : case XLOG_HASH_SQUEEZE_PAGE:
1097 88 : hash_xlog_squeeze_page(record);
1098 88 : break;
1099 290 : case XLOG_HASH_DELETE:
1100 290 : hash_xlog_delete(record);
1101 290 : break;
1102 196 : case XLOG_HASH_SPLIT_CLEANUP:
1103 196 : hash_xlog_split_cleanup(record);
1104 196 : break;
1105 16 : case XLOG_HASH_UPDATE_META_PAGE:
1106 16 : hash_xlog_update_meta_page(record);
1107 16 : break;
1108 0 : case XLOG_HASH_VACUUM_ONE_PAGE:
1109 0 : hash_xlog_vacuum_one_page(record);
1110 0 : break;
1111 0 : default:
1112 0 : elog(PANIC, "hash_redo: unknown op code %u", info);
1113 : }
1114 229626 : }
1115 :
1116 : /*
1117 : * Mask a hash page before performing consistency checks on it.
1118 : */
1119 : void
1120 911176 : hash_mask(char *pagedata, BlockNumber blkno)
1121 : {
1122 911176 : Page page = (Page) pagedata;
1123 : HashPageOpaque opaque;
1124 : int pagetype;
1125 :
1126 911176 : mask_page_lsn_and_checksum(page);
1127 :
1128 911176 : mask_page_hint_bits(page);
1129 911176 : mask_unused_space(page);
1130 :
1131 911176 : opaque = HashPageGetOpaque(page);
1132 :
1133 911176 : pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
1134 911176 : if (pagetype == LH_UNUSED_PAGE)
1135 : {
1136 : /*
1137 : * Mask everything on a UNUSED page.
1138 : */
1139 0 : mask_page_content(page);
1140 : }
1141 911176 : else if (pagetype == LH_BUCKET_PAGE ||
1142 : pagetype == LH_OVERFLOW_PAGE)
1143 : {
1144 : /*
1145 : * In hash bucket and overflow pages, it is possible to modify the
1146 : * LP_FLAGS without emitting any WAL record. Hence, mask the line
1147 : * pointer flags. See hashgettuple(), _hash_kill_items() for details.
1148 : */
1149 453648 : mask_lp_flags(page);
1150 : }
1151 :
1152 : /*
1153 : * It is possible that the hint bit LH_PAGE_HAS_DEAD_TUPLES may remain
1154 : * unlogged. So, mask it. See _hash_kill_items() for details.
1155 : */
1156 911176 : opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1157 911176 : }
|