LCOV - code coverage report
Current view: top level - src/backend/access/hash - hashovfl.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 91.2 % 387 353
Test Date: 2026-04-06 02:16:11 Functions: 100.0 % 7 7
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * hashovfl.c
       4              :  *    Overflow page management code for the Postgres hash access method
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/access/hash/hashovfl.c
      12              :  *
      13              :  * NOTES
      14              :  *    Overflow pages look like ordinary relation pages.
      15              :  *
      16              :  *-------------------------------------------------------------------------
      17              :  */
      18              : #include "postgres.h"
      19              : 
      20              : #include "access/hash.h"
      21              : #include "access/hash_xlog.h"
      22              : #include "access/xloginsert.h"
      23              : #include "miscadmin.h"
      24              : #include "utils/rel.h"
      25              : 
      26              : 
      27              : static uint32 _hash_firstfreebit(uint32 map);
      28              : 
      29              : 
      30              : /*
      31              :  * Convert overflow page bit number (its index in the free-page bitmaps)
      32              :  * to block number within the index.
      33              :  */
      34              : static BlockNumber
      35          278 : bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
      36              : {
      37          278 :     uint32      splitnum = metap->hashm_ovflpoint;
      38              :     uint32      i;
      39              : 
      40              :     /* Convert zero-based bitnumber to 1-based page number */
      41          278 :     ovflbitnum += 1;
      42              : 
      43              :     /* Determine the split number for this page (must be >= 1) */
      44          278 :     for (i = 1;
      45         1489 :          i < splitnum && ovflbitnum > metap->hashm_spares[i];
      46         1211 :          i++)
      47              :          /* loop */ ;
      48              : 
      49              :     /*
      50              :      * Convert to absolute page number by adding the number of bucket pages
      51              :      * that exist before this split point.
      52              :      */
      53          278 :     return (BlockNumber) (_hash_get_totalbuckets(i) + ovflbitnum);
      54              : }
      55              : 
      56              : /*
      57              :  * _hash_ovflblkno_to_bitno
      58              :  *
      59              :  * Convert overflow page block number to bit number for free-page bitmap.
      60              :  */
      61              : uint32
      62          129 : _hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
      63              : {
      64          129 :     uint32      splitnum = metap->hashm_ovflpoint;
      65              :     uint32      i;
      66              :     uint32      bitnum;
      67              : 
      68              :     /* Determine the split number containing this page */
      69          473 :     for (i = 1; i <= splitnum; i++)
      70              :     {
      71          473 :         if (ovflblkno <= (BlockNumber) _hash_get_totalbuckets(i))
      72            4 :             break;              /* oops */
      73          469 :         bitnum = ovflblkno - _hash_get_totalbuckets(i);
      74              : 
      75              :         /*
      76              :          * bitnum has to be greater than number of overflow page added in
      77              :          * previous split point. The overflow page at this splitnum (i) if any
      78              :          * should start from (_hash_get_totalbuckets(i) +
      79              :          * metap->hashm_spares[i - 1] + 1).
      80              :          */
      81          469 :         if (bitnum > metap->hashm_spares[i - 1] &&
      82          469 :             bitnum <= metap->hashm_spares[i])
      83          125 :             return bitnum - 1;  /* -1 to convert 1-based to 0-based */
      84              :     }
      85              : 
      86            4 :     ereport(ERROR,
      87              :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
      88              :              errmsg("invalid overflow block number %u", ovflblkno)));
      89              :     return 0;                   /* keep compiler quiet */
      90              : }
      91              : 
      92              : /*
      93              :  *  _hash_addovflpage
      94              :  *
      95              :  *  Add an overflow page to the bucket whose last page is pointed to by 'buf'.
      96              :  *
      97              :  *  On entry, the caller must hold a pin but no lock on 'buf'.  The pin is
      98              :  *  dropped before exiting (we assume the caller is not interested in 'buf'
      99              :  *  anymore) if not asked to retain.  The pin will be retained only for the
     100              :  *  primary bucket.  The returned overflow page will be pinned and
     101              :  *  write-locked; it is guaranteed to be empty.
     102              :  *
     103              :  *  The caller must hold a pin, but no lock, on the metapage buffer.
     104              :  *  That buffer is returned in the same state.
     105              :  *
     106              :  * NB: since this could be executed concurrently by multiple processes,
     107              :  * one should not assume that the returned overflow page will be the
     108              :  * immediate successor of the originally passed 'buf'.  Additional overflow
     109              :  * pages might have been added to the bucket chain in between.
     110              :  */
     111              : Buffer
     112          278 : _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
     113              : {
     114              :     Buffer      ovflbuf;
     115              :     Page        page;
     116              :     Page        ovflpage;
     117              :     HashPageOpaque pageopaque;
     118              :     HashPageOpaque ovflopaque;
     119              :     HashMetaPage metap;
     120          278 :     Buffer      mapbuf = InvalidBuffer;
     121          278 :     Buffer      newmapbuf = InvalidBuffer;
     122              :     BlockNumber blkno;
     123              :     uint32      orig_firstfree;
     124              :     uint32      splitnum;
     125          278 :     uint32     *freep = NULL;
     126              :     uint32      max_ovflpg;
     127              :     uint32      bit;
     128              :     uint32      bitmap_page_bit;
     129              :     uint32      first_page;
     130              :     uint32      last_bit;
     131              :     uint32      last_page;
     132              :     uint32      i,
     133              :                 j;
     134          278 :     bool        page_found = false;
     135              :     XLogRecPtr  recptr;
     136              : 
     137              :     /*
     138              :      * Write-lock the tail page.  Here, we need to maintain locking order such
     139              :      * that, first acquire the lock on tail page of bucket, then on meta page
     140              :      * to find and lock the bitmap page and if it is found, then lock on meta
     141              :      * page is released, then finally acquire the lock on new overflow buffer.
     142              :      * We need this locking order to avoid deadlock with backends that are
     143              :      * doing inserts.
     144              :      *
     145              :      * Note: We could have avoided locking many buffers here if we made two
     146              :      * WAL records for acquiring an overflow page (one to allocate an overflow
     147              :      * page and another to add it to overflow bucket chain).  However, doing
     148              :      * so can leak an overflow page, if the system crashes after allocation.
     149              :      * Needless to say, it is better to have a single record from a
     150              :      * performance point of view as well.
     151              :      */
     152          278 :     LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     153              : 
     154              :     /* probably redundant... */
     155          278 :     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
     156              : 
     157              :     /* loop to find current tail page, in case someone else inserted too */
     158              :     for (;;)
     159            0 :     {
     160              :         BlockNumber nextblkno;
     161              : 
     162          278 :         page = BufferGetPage(buf);
     163          278 :         pageopaque = HashPageGetOpaque(page);
     164          278 :         nextblkno = pageopaque->hasho_nextblkno;
     165              : 
     166          278 :         if (!BlockNumberIsValid(nextblkno))
     167          278 :             break;
     168              : 
     169              :         /* we assume we do not need to write the unmodified page */
     170            0 :         if (retain_pin)
     171              :         {
     172              :             /* pin will be retained only for the primary bucket page */
     173              :             Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_BUCKET_PAGE);
     174            0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     175              :         }
     176              :         else
     177            0 :             _hash_relbuf(rel, buf);
     178              : 
     179            0 :         retain_pin = false;
     180              : 
     181            0 :         buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
     182              :     }
     183              : 
     184              :     /* Get exclusive lock on the meta page */
     185          278 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     186              : 
     187          278 :     _hash_checkpage(rel, metabuf, LH_META_PAGE);
     188          278 :     metap = HashPageGetMeta(BufferGetPage(metabuf));
     189              : 
     190              :     /* start search at hashm_firstfree */
     191          278 :     orig_firstfree = metap->hashm_firstfree;
     192          278 :     first_page = orig_firstfree >> BMPG_SHIFT(metap);
     193          278 :     bit = orig_firstfree & BMPG_MASK(metap);
     194          278 :     i = first_page;
     195          278 :     j = bit / BITS_PER_MAP;
     196          278 :     bit &= ~(BITS_PER_MAP - 1);
     197              : 
     198              :     /* outer loop iterates once per bitmap page */
     199              :     for (;;)
     200          233 :     {
     201              :         BlockNumber mapblkno;
     202              :         Page        mappage;
     203              :         uint32      last_inpage;
     204              : 
     205              :         /* want to end search with the last existing overflow page */
     206          511 :         splitnum = metap->hashm_ovflpoint;
     207          511 :         max_ovflpg = metap->hashm_spares[splitnum] - 1;
     208          511 :         last_page = max_ovflpg >> BMPG_SHIFT(metap);
     209          511 :         last_bit = max_ovflpg & BMPG_MASK(metap);
     210              : 
     211          511 :         if (i > last_page)
     212          233 :             break;
     213              : 
     214              :         Assert(i < metap->hashm_nmaps);
     215          278 :         mapblkno = metap->hashm_mapp[i];
     216              : 
     217          278 :         if (i == last_page)
     218          278 :             last_inpage = last_bit;
     219              :         else
     220            0 :             last_inpage = BMPGSZ_BIT(metap) - 1;
     221              : 
     222              :         /* Release exclusive lock on metapage while reading bitmap page */
     223          278 :         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     224              : 
     225          278 :         mapbuf = _hash_getbuf(rel, mapblkno, HASH_WRITE, LH_BITMAP_PAGE);
     226          278 :         mappage = BufferGetPage(mapbuf);
     227          278 :         freep = HashPageGetBitmap(mappage);
     228              : 
     229          511 :         for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
     230              :         {
     231          278 :             if (freep[j] != ALL_SET)
     232              :             {
     233           45 :                 page_found = true;
     234              : 
     235              :                 /* Reacquire exclusive lock on the meta page */
     236           45 :                 LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     237              : 
     238              :                 /* convert bit to bit number within page */
     239           45 :                 bit += _hash_firstfreebit(freep[j]);
     240           45 :                 bitmap_page_bit = bit;
     241              : 
     242              :                 /* convert bit to absolute bit number */
     243           45 :                 bit += (i << BMPG_SHIFT(metap));
     244              :                 /* Calculate address of the recycled overflow page */
     245           45 :                 blkno = bitno_to_blkno(metap, bit);
     246              : 
     247              :                 /* Fetch and init the recycled page */
     248           45 :                 ovflbuf = _hash_getinitbuf(rel, blkno);
     249              : 
     250           45 :                 goto found;
     251              :             }
     252              :         }
     253              : 
     254              :         /* No free space here, try to advance to next map page */
     255          233 :         _hash_relbuf(rel, mapbuf);
     256          233 :         mapbuf = InvalidBuffer;
     257          233 :         i++;
     258          233 :         j = 0;                  /* scan from start of next map page */
     259          233 :         bit = 0;
     260              : 
     261              :         /* Reacquire exclusive lock on the meta page */
     262          233 :         LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     263              :     }
     264              : 
     265              :     /*
     266              :      * No free pages --- have to extend the relation to add an overflow page.
     267              :      * First, check to see if we have to add a new bitmap page too.
     268              :      */
     269          233 :     if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
     270              :     {
     271              :         /*
     272              :          * We create the new bitmap page with all pages marked "in use".
     273              :          * Actually two pages in the new bitmap's range will exist
     274              :          * immediately: the bitmap page itself, and the following page which
     275              :          * is the one we return to the caller.  Both of these are correctly
     276              :          * marked "in use".  Subsequent pages do not exist yet, but it is
     277              :          * convenient to pre-mark them as "in use" too.
     278              :          */
     279            0 :         bit = metap->hashm_spares[splitnum];
     280              : 
     281              :         /* metapage already has a write lock */
     282            0 :         if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
     283            0 :             ereport(ERROR,
     284              :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     285              :                      errmsg("out of overflow pages in hash index \"%s\"",
     286              :                             RelationGetRelationName(rel))));
     287              : 
     288            0 :         newmapbuf = _hash_getnewbuf(rel, bitno_to_blkno(metap, bit), MAIN_FORKNUM);
     289              :     }
     290              :     else
     291              :     {
     292              :         /*
     293              :          * Nothing to do here; since the page will be past the last used page,
     294              :          * we know its bitmap bit was preinitialized to "in use".
     295              :          */
     296              :     }
     297              : 
     298              :     /* Calculate address of the new overflow page */
     299          233 :     bit = BufferIsValid(newmapbuf) ?
     300          233 :         metap->hashm_spares[splitnum] + 1 : metap->hashm_spares[splitnum];
     301          233 :     blkno = bitno_to_blkno(metap, bit);
     302              : 
     303              :     /*
     304              :      * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
     305              :      * relation length stays in sync with ours.  XXX It's annoying to do this
     306              :      * with metapage write lock held; would be better to use a lock that
     307              :      * doesn't block incoming searches.
     308              :      *
     309              :      * It is okay to hold two buffer locks here (one on tail page of bucket
     310              :      * and other on new overflow page) since there cannot be anyone else
     311              :      * contending for access to ovflbuf.
     312              :      */
     313          233 :     ovflbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
     314              : 
     315          278 : found:
     316              : 
     317              :     /*
     318              :      * Do the update.  No ereport(ERROR) until changes are logged. We want to
     319              :      * log the changes for bitmap page and overflow page together to avoid
     320              :      * loss of pages in case the new page is added.
     321              :      */
     322          278 :     START_CRIT_SECTION();
     323              : 
     324          278 :     if (page_found)
     325              :     {
     326              :         Assert(BufferIsValid(mapbuf));
     327              : 
     328              :         /* mark page "in use" in the bitmap */
     329           45 :         SETBIT(freep, bitmap_page_bit);
     330           45 :         MarkBufferDirty(mapbuf);
     331              :     }
     332              :     else
     333              :     {
     334              :         /* update the count to indicate new overflow page is added */
     335          233 :         metap->hashm_spares[splitnum]++;
     336              : 
     337          233 :         if (BufferIsValid(newmapbuf))
     338              :         {
     339            0 :             _hash_initbitmapbuffer(newmapbuf, metap->hashm_bmsize, false);
     340            0 :             MarkBufferDirty(newmapbuf);
     341              : 
     342              :             /* add the new bitmap page to the metapage's list of bitmaps */
     343            0 :             metap->hashm_mapp[metap->hashm_nmaps] = BufferGetBlockNumber(newmapbuf);
     344            0 :             metap->hashm_nmaps++;
     345            0 :             metap->hashm_spares[splitnum]++;
     346              :         }
     347              : 
     348          233 :         MarkBufferDirty(metabuf);
     349              : 
     350              :         /*
     351              :          * for new overflow page, we don't need to explicitly set the bit in
     352              :          * bitmap page, as by default that will be set to "in use".
     353              :          */
     354              :     }
     355              : 
     356              :     /*
     357              :      * Adjust hashm_firstfree to avoid redundant searches.  But don't risk
     358              :      * changing it if someone moved it while we were searching bitmap pages.
     359              :      */
     360          278 :     if (metap->hashm_firstfree == orig_firstfree)
     361              :     {
     362          278 :         metap->hashm_firstfree = bit + 1;
     363          278 :         MarkBufferDirty(metabuf);
     364              :     }
     365              : 
     366              :     /* initialize new overflow page */
     367          278 :     ovflpage = BufferGetPage(ovflbuf);
     368          278 :     ovflopaque = HashPageGetOpaque(ovflpage);
     369          278 :     ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
     370          278 :     ovflopaque->hasho_nextblkno = InvalidBlockNumber;
     371          278 :     ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
     372          278 :     ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
     373          278 :     ovflopaque->hasho_page_id = HASHO_PAGE_ID;
     374              : 
     375          278 :     MarkBufferDirty(ovflbuf);
     376              : 
     377              :     /* logically chain overflow page to previous page */
     378          278 :     pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
     379              : 
     380          278 :     MarkBufferDirty(buf);
     381              : 
     382              :     /* XLOG stuff */
     383          278 :     if (RelationNeedsWAL(rel))
     384          263 :     {
     385              :         xl_hash_add_ovfl_page xlrec;
     386              : 
     387          263 :         xlrec.bmpage_found = page_found;
     388          263 :         xlrec.bmsize = metap->hashm_bmsize;
     389              : 
     390          263 :         XLogBeginInsert();
     391          263 :         XLogRegisterData(&xlrec, SizeOfHashAddOvflPage);
     392              : 
     393          263 :         XLogRegisterBuffer(0, ovflbuf, REGBUF_WILL_INIT);
     394          263 :         XLogRegisterBufData(0, &pageopaque->hasho_bucket, sizeof(Bucket));
     395              : 
     396          263 :         XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
     397              : 
     398          263 :         if (BufferIsValid(mapbuf))
     399              :         {
     400           45 :             XLogRegisterBuffer(2, mapbuf, REGBUF_STANDARD);
     401           45 :             XLogRegisterBufData(2, &bitmap_page_bit, sizeof(uint32));
     402              :         }
     403              : 
     404          263 :         if (BufferIsValid(newmapbuf))
     405            0 :             XLogRegisterBuffer(3, newmapbuf, REGBUF_WILL_INIT);
     406              : 
     407          263 :         XLogRegisterBuffer(4, metabuf, REGBUF_STANDARD);
     408          263 :         XLogRegisterBufData(4, &metap->hashm_firstfree, sizeof(uint32));
     409              : 
     410          263 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_ADD_OVFL_PAGE);
     411              :     }
     412              :     else
     413           15 :         recptr = XLogGetFakeLSN(rel);
     414              : 
     415          278 :     PageSetLSN(BufferGetPage(ovflbuf), recptr);
     416          278 :     PageSetLSN(BufferGetPage(buf), recptr);
     417              : 
     418          278 :     if (BufferIsValid(mapbuf))
     419           45 :         PageSetLSN(BufferGetPage(mapbuf), recptr);
     420              : 
     421          278 :     if (BufferIsValid(newmapbuf))
     422            0 :         PageSetLSN(BufferGetPage(newmapbuf), recptr);
     423              : 
     424          278 :     PageSetLSN(BufferGetPage(metabuf), recptr);
     425              : 
     426          278 :     END_CRIT_SECTION();
     427              : 
     428          278 :     if (retain_pin)
     429           78 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     430              :     else
     431          200 :         _hash_relbuf(rel, buf);
     432              : 
     433          278 :     if (BufferIsValid(mapbuf))
     434           45 :         _hash_relbuf(rel, mapbuf);
     435              : 
     436          278 :     LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     437              : 
     438          278 :     if (BufferIsValid(newmapbuf))
     439            0 :         _hash_relbuf(rel, newmapbuf);
     440              : 
     441          278 :     return ovflbuf;
     442              : }
     443              : 
     444              : /*
     445              :  *  _hash_firstfreebit()
     446              :  *
     447              :  *  Return the number of the first bit that is not set in the word 'map'.
     448              :  */
     449              : static uint32
     450           45 : _hash_firstfreebit(uint32 map)
     451              : {
     452              :     uint32      i,
     453              :                 mask;
     454              : 
     455           45 :     mask = 0x1;
     456          382 :     for (i = 0; i < BITS_PER_MAP; i++)
     457              :     {
     458          382 :         if (!(mask & map))
     459           45 :             return i;
     460          337 :         mask <<= 1;
     461              :     }
     462              : 
     463            0 :     elog(ERROR, "firstfreebit found no free bit");
     464              : 
     465              :     return 0;                   /* keep compiler quiet */
     466              : }
     467              : 
     468              : /*
     469              :  *  _hash_freeovflpage() -
     470              :  *
     471              :  *  Remove this overflow page from its bucket's chain, and mark the page as
     472              :  *  free.  On entry, ovflbuf is write-locked; it is released before exiting.
     473              :  *
     474              :  *  Add the tuples (itups) to wbuf in this function.  We could do that in the
     475              :  *  caller as well, but the advantage of doing it here is we can easily write
     476              :  *  the WAL for XLOG_HASH_SQUEEZE_PAGE operation.  Addition of tuples and
     477              :  *  removal of overflow page has to done as an atomic operation, otherwise
     478              :  *  during replay on standby users might find duplicate records.
     479              :  *
     480              :  *  Since this function is invoked in VACUUM, we provide an access strategy
     481              :  *  parameter that controls fetches of the bucket pages.
     482              :  *
     483              :  *  Returns the block number of the page that followed the given page
     484              :  *  in the bucket, or InvalidBlockNumber if no following page.
     485              :  *
     486              :  *  NB: caller must not hold lock on metapage, nor on page, that's next to
     487              :  *  ovflbuf in the bucket chain.  We don't acquire the lock on page that's
     488              :  *  prior to ovflbuf in chain if it is same as wbuf because the caller already
     489              :  *  has a lock on same.
     490              :  */
     491              : BlockNumber
     492          125 : _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
     493              :                    Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets,
     494              :                    Size *tups_size, uint16 nitups,
     495              :                    BufferAccessStrategy bstrategy)
     496              : {
     497              :     HashMetaPage metap;
     498              :     Buffer      metabuf;
     499              :     Buffer      mapbuf;
     500              :     BlockNumber ovflblkno;
     501              :     BlockNumber prevblkno;
     502              :     BlockNumber blkno;
     503              :     BlockNumber nextblkno;
     504              :     BlockNumber writeblkno;
     505              :     HashPageOpaque ovflopaque;
     506              :     Page        ovflpage;
     507              :     Page        mappage;
     508              :     uint32     *freep;
     509              :     uint32      ovflbitno;
     510              :     int32       bitmappage,
     511              :                 bitmapbit;
     512              :     Bucket      bucket PG_USED_FOR_ASSERTS_ONLY;
     513          125 :     Buffer      prevbuf = InvalidBuffer;
     514          125 :     Buffer      nextbuf = InvalidBuffer;
     515          125 :     bool        update_metap = false,
     516              :                 mod_wbuf,
     517              :                 is_prim_bucket_same_wrt,
     518              :                 is_prev_bucket_same_wrt;
     519              :     XLogRecPtr  recptr;
     520              : 
     521              :     /* Get information from the doomed page */
     522          125 :     _hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
     523          125 :     ovflblkno = BufferGetBlockNumber(ovflbuf);
     524          125 :     ovflpage = BufferGetPage(ovflbuf);
     525          125 :     ovflopaque = HashPageGetOpaque(ovflpage);
     526          125 :     nextblkno = ovflopaque->hasho_nextblkno;
     527          125 :     prevblkno = ovflopaque->hasho_prevblkno;
     528          125 :     writeblkno = BufferGetBlockNumber(wbuf);
     529          125 :     bucket = ovflopaque->hasho_bucket;
     530              : 
     531              :     /*
     532              :      * Fix up the bucket chain.  this is a doubly-linked list, so we must fix
     533              :      * up the bucket chain members behind and ahead of the overflow page being
     534              :      * deleted.  Concurrency issues are avoided by using lock chaining as
     535              :      * described atop hashbucketcleanup.
     536              :      */
     537          125 :     if (BlockNumberIsValid(prevblkno))
     538              :     {
     539          125 :         if (prevblkno == writeblkno)
     540           41 :             prevbuf = wbuf;
     541              :         else
     542           84 :             prevbuf = _hash_getbuf_with_strategy(rel,
     543              :                                                  prevblkno,
     544              :                                                  HASH_WRITE,
     545              :                                                  LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
     546              :                                                  bstrategy);
     547              :     }
     548          125 :     if (BlockNumberIsValid(nextblkno))
     549            0 :         nextbuf = _hash_getbuf_with_strategy(rel,
     550              :                                              nextblkno,
     551              :                                              HASH_WRITE,
     552              :                                              LH_OVERFLOW_PAGE,
     553              :                                              bstrategy);
     554              : 
     555              :     /* Note: bstrategy is intentionally not used for metapage and bitmap */
     556              : 
     557              :     /* Read the metapage so we can determine which bitmap page to use */
     558          125 :     metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
     559          125 :     metap = HashPageGetMeta(BufferGetPage(metabuf));
     560              : 
     561              :     /* Identify which bit to set */
     562          125 :     ovflbitno = _hash_ovflblkno_to_bitno(metap, ovflblkno);
     563              : 
     564          125 :     bitmappage = ovflbitno >> BMPG_SHIFT(metap);
     565          125 :     bitmapbit = ovflbitno & BMPG_MASK(metap);
     566              : 
     567          125 :     if (bitmappage >= metap->hashm_nmaps)
     568            0 :         elog(ERROR, "invalid overflow bit number %u", ovflbitno);
     569          125 :     blkno = metap->hashm_mapp[bitmappage];
     570              : 
     571              :     /* Release metapage lock while we access the bitmap page */
     572          125 :     LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     573              : 
     574              :     /* read the bitmap page to clear the bitmap bit */
     575          125 :     mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BITMAP_PAGE);
     576          125 :     mappage = BufferGetPage(mapbuf);
     577          125 :     freep = HashPageGetBitmap(mappage);
     578              :     Assert(ISSET(freep, bitmapbit));
     579              : 
     580              :     /* Get write-lock on metapage to update firstfree */
     581          125 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     582              : 
     583              :     /* This operation needs to log multiple tuples, prepare WAL for that */
     584          125 :     if (RelationNeedsWAL(rel))
     585          125 :         XLogEnsureRecordSpace(HASH_XLOG_FREE_OVFL_BUFS, 4 + nitups);
     586              : 
     587          125 :     START_CRIT_SECTION();
     588              : 
     589              :     /*
     590              :      * we have to insert tuples on the "write" page, being careful to preserve
     591              :      * hashkey ordering.  (If we insert many tuples into the same "write" page
     592              :      * it would be worth qsort'ing them).
     593              :      */
     594          125 :     if (nitups > 0)
     595              :     {
     596           57 :         _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
     597           57 :         MarkBufferDirty(wbuf);
     598              :     }
     599              : 
     600              :     /*
     601              :      * Reinitialize the freed overflow page.  Just zeroing the page won't
     602              :      * work, because WAL replay routines expect pages to be initialized. See
     603              :      * explanation of RBM_NORMAL mode atop XLogReadBufferExtended.  We are
     604              :      * careful to make the special space valid here so that tools like
     605              :      * pageinspect won't get confused.
     606              :      */
     607          125 :     _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
     608              : 
     609          125 :     ovflopaque = HashPageGetOpaque(ovflpage);
     610              : 
     611          125 :     ovflopaque->hasho_prevblkno = InvalidBlockNumber;
     612          125 :     ovflopaque->hasho_nextblkno = InvalidBlockNumber;
     613          125 :     ovflopaque->hasho_bucket = InvalidBucket;
     614          125 :     ovflopaque->hasho_flag = LH_UNUSED_PAGE;
     615          125 :     ovflopaque->hasho_page_id = HASHO_PAGE_ID;
     616              : 
     617          125 :     MarkBufferDirty(ovflbuf);
     618              : 
     619          125 :     if (BufferIsValid(prevbuf))
     620              :     {
     621          125 :         Page        prevpage = BufferGetPage(prevbuf);
     622          125 :         HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
     623              : 
     624              :         Assert(prevopaque->hasho_bucket == bucket);
     625          125 :         prevopaque->hasho_nextblkno = nextblkno;
     626          125 :         MarkBufferDirty(prevbuf);
     627              :     }
     628          125 :     if (BufferIsValid(nextbuf))
     629              :     {
     630            0 :         Page        nextpage = BufferGetPage(nextbuf);
     631            0 :         HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
     632              : 
     633              :         Assert(nextopaque->hasho_bucket == bucket);
     634            0 :         nextopaque->hasho_prevblkno = prevblkno;
     635            0 :         MarkBufferDirty(nextbuf);
     636              :     }
     637              : 
     638              :     /* Clear the bitmap bit to indicate that this overflow page is free */
     639          125 :     CLRBIT(freep, bitmapbit);
     640          125 :     MarkBufferDirty(mapbuf);
     641              : 
     642              :     /* if this is now the first free page, update hashm_firstfree */
     643          125 :     if (ovflbitno < metap->hashm_firstfree)
     644              :     {
     645          121 :         metap->hashm_firstfree = ovflbitno;
     646          121 :         update_metap = true;
     647          121 :         MarkBufferDirty(metabuf);
     648              :     }
     649              : 
     650              :     /* Determine which pages are modified */
     651          125 :     is_prim_bucket_same_wrt = (wbuf == bucketbuf);
     652          125 :     is_prev_bucket_same_wrt = (wbuf == prevbuf);
     653          125 :     mod_wbuf = (nitups > 0 || is_prev_bucket_same_wrt);
     654              : 
     655              :     /* XLOG stuff */
     656          125 :     if (RelationNeedsWAL(rel))
     657          125 :     {
     658              :         xl_hash_squeeze_page xlrec;
     659              : 
     660          125 :         xlrec.prevblkno = prevblkno;
     661          125 :         xlrec.nextblkno = nextblkno;
     662          125 :         xlrec.ntups = nitups;
     663          125 :         xlrec.is_prim_bucket_same_wrt = is_prim_bucket_same_wrt;
     664          125 :         xlrec.is_prev_bucket_same_wrt = is_prev_bucket_same_wrt;
     665              : 
     666          125 :         XLogBeginInsert();
     667          125 :         XLogRegisterData(&xlrec, SizeOfHashSqueezePage);
     668              : 
     669              :         /*
     670              :          * bucket buffer was not changed, but still needs to be registered to
     671              :          * ensure that we can acquire a cleanup lock on it during replay.
     672              :          */
     673          125 :         if (!is_prim_bucket_same_wrt)
     674              :         {
     675           32 :             uint8       flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
     676              : 
     677           32 :             XLogRegisterBuffer(0, bucketbuf, flags);
     678              :         }
     679              : 
     680          125 :         if (nitups > 0)
     681              :         {
     682           57 :             XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
     683           57 :             XLogRegisterBufData(1, itup_offsets,
     684              :                                 nitups * sizeof(OffsetNumber));
     685         2097 :             for (int i = 0; i < nitups; i++)
     686         2040 :                 XLogRegisterBufData(1, itups[i], tups_size[i]);
     687              :         }
     688           68 :         else if (is_prim_bucket_same_wrt || is_prev_bucket_same_wrt)
     689              :         {
     690              :             uint8       wbuf_flags;
     691              : 
     692              :             /*
     693              :              * A write buffer needs to be registered even if no tuples are
     694              :              * added to it to ensure that we can acquire a cleanup lock on it
     695              :              * if it is the same as primary bucket buffer or update the
     696              :              * nextblkno if it is same as the previous bucket buffer.
     697              :              */
     698              :             Assert(nitups == 0);
     699              : 
     700           64 :             wbuf_flags = REGBUF_STANDARD;
     701           64 :             if (!is_prev_bucket_same_wrt)
     702           52 :                 wbuf_flags |= REGBUF_NO_CHANGE;
     703              :             else
     704              :                 Assert(mod_wbuf);
     705           64 :             XLogRegisterBuffer(1, wbuf, wbuf_flags);
     706              :         }
     707              : 
     708          125 :         XLogRegisterBuffer(2, ovflbuf, REGBUF_STANDARD);
     709              : 
     710              :         /*
     711              :          * If prevpage and the writepage (block in which we are moving tuples
     712              :          * from overflow) are same, then no need to separately register
     713              :          * prevpage.  During replay, we can directly update the nextblock in
     714              :          * writepage.
     715              :          */
     716          125 :         if (BufferIsValid(prevbuf) && !is_prev_bucket_same_wrt)
     717           84 :             XLogRegisterBuffer(3, prevbuf, REGBUF_STANDARD);
     718              : 
     719          125 :         if (BufferIsValid(nextbuf))
     720            0 :             XLogRegisterBuffer(4, nextbuf, REGBUF_STANDARD);
     721              : 
     722          125 :         XLogRegisterBuffer(5, mapbuf, REGBUF_STANDARD);
     723          125 :         XLogRegisterBufData(5, &bitmapbit, sizeof(uint32));
     724              : 
     725          125 :         if (update_metap)
     726              :         {
     727          121 :             XLogRegisterBuffer(6, metabuf, REGBUF_STANDARD);
     728          121 :             XLogRegisterBufData(6, &metap->hashm_firstfree, sizeof(uint32));
     729              :         }
     730              : 
     731          125 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SQUEEZE_PAGE);
     732              :     }
     733              :     else                        /* !RelationNeedsWAL(rel) */
     734            0 :         recptr = XLogGetFakeLSN(rel);
     735              : 
     736              :     /* Set LSN iff wbuf is modified. */
     737          125 :     if (mod_wbuf)
     738           69 :         PageSetLSN(BufferGetPage(wbuf), recptr);
     739              : 
     740          125 :     PageSetLSN(BufferGetPage(ovflbuf), recptr);
     741              : 
     742          125 :     if (BufferIsValid(prevbuf) && !is_prev_bucket_same_wrt)
     743           84 :         PageSetLSN(BufferGetPage(prevbuf), recptr);
     744          125 :     if (BufferIsValid(nextbuf))
     745            0 :         PageSetLSN(BufferGetPage(nextbuf), recptr);
     746              : 
     747          125 :     PageSetLSN(BufferGetPage(mapbuf), recptr);
     748              : 
     749          125 :     if (update_metap)
     750          121 :         PageSetLSN(BufferGetPage(metabuf), recptr);
     751              : 
     752          125 :     END_CRIT_SECTION();
     753              : 
     754              :     /* release previous bucket if it is not same as write bucket */
     755          125 :     if (BufferIsValid(prevbuf) && prevblkno != writeblkno)
     756           84 :         _hash_relbuf(rel, prevbuf);
     757              : 
     758          125 :     if (BufferIsValid(ovflbuf))
     759          125 :         _hash_relbuf(rel, ovflbuf);
     760              : 
     761          125 :     if (BufferIsValid(nextbuf))
     762            0 :         _hash_relbuf(rel, nextbuf);
     763              : 
     764          125 :     _hash_relbuf(rel, mapbuf);
     765          125 :     _hash_relbuf(rel, metabuf);
     766              : 
     767          125 :     return nextblkno;
     768              : }
     769              : 
     770              : 
     771              : /*
     772              :  *  _hash_initbitmapbuffer()
     773              :  *
     774              :  *   Initialize a new bitmap page.  All bits in the new bitmap page are set to
     775              :  *   "1", indicating "in use".
     776              :  */
     777              : void
     778          240 : _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
     779              : {
     780              :     Page        pg;
     781              :     HashPageOpaque op;
     782              :     uint32     *freep;
     783              : 
     784          240 :     pg = BufferGetPage(buf);
     785              : 
     786              :     /* initialize the page */
     787          240 :     if (initpage)
     788           29 :         _hash_pageinit(pg, BufferGetPageSize(buf));
     789              : 
     790              :     /* initialize the page's special space */
     791          240 :     op = HashPageGetOpaque(pg);
     792          240 :     op->hasho_prevblkno = InvalidBlockNumber;
     793          240 :     op->hasho_nextblkno = InvalidBlockNumber;
     794          240 :     op->hasho_bucket = InvalidBucket;
     795          240 :     op->hasho_flag = LH_BITMAP_PAGE;
     796          240 :     op->hasho_page_id = HASHO_PAGE_ID;
     797              : 
     798              :     /* set all of the bits to 1 */
     799          240 :     freep = HashPageGetBitmap(pg);
     800          240 :     memset(freep, 0xFF, bmsize);
     801              : 
     802              :     /*
     803              :      * Set pd_lower just past the end of the bitmap page data.  We could even
     804              :      * set pd_lower equal to pd_upper, but this is more precise and makes the
     805              :      * page look compressible to xlog.c.
     806              :      */
     807          240 :     ((PageHeader) pg)->pd_lower = ((char *) freep + bmsize) - (char *) pg;
     808          240 : }
     809              : 
     810              : 
     811              : /*
     812              :  *  _hash_squeezebucket(rel, bucket)
     813              :  *
     814              :  *  Try to squeeze the tuples onto pages occurring earlier in the
     815              :  *  bucket chain in an attempt to free overflow pages. When we start
     816              :  *  the "squeezing", the page from which we start taking tuples (the
     817              :  *  "read" page) is the last bucket in the bucket chain and the page
     818              :  *  onto which we start squeezing tuples (the "write" page) is the
     819              :  *  first page in the bucket chain.  The read page works backward and
     820              :  *  the write page works forward; the procedure terminates when the
     821              :  *  read page and write page are the same page.
     822              :  *
     823              :  *  At completion of this procedure, it is guaranteed that all pages in
     824              :  *  the bucket are nonempty, unless the bucket is totally empty (in
     825              :  *  which case all overflow pages will be freed).  The original implementation
     826              :  *  required that to be true on entry as well, but it's a lot easier for
     827              :  *  callers to leave empty overflow pages and let this guy clean it up.
     828              :  *
     829              :  *  Caller must acquire cleanup lock on the primary page of the target
     830              :  *  bucket to exclude any scans that are in progress, which could easily
     831              :  *  be confused into returning the same tuple more than once or some tuples
     832              :  *  not at all by the rearrangement we are performing here.  To prevent
     833              :  *  any concurrent scan to cross the squeeze scan we use lock chaining
     834              :  *  similar to hashbucketcleanup.  Refer comments atop hashbucketcleanup.
     835              :  *
     836              :  *  We need to retain a pin on the primary bucket to ensure that no concurrent
     837              :  *  split can start.
     838              :  *
     839              :  *  Since this function is invoked in VACUUM, we provide an access strategy
     840              :  *  parameter that controls fetches of the bucket pages.
     841              :  */
     842              : void
     843          913 : _hash_squeezebucket(Relation rel,
     844              :                     Bucket bucket,
     845              :                     BlockNumber bucket_blkno,
     846              :                     Buffer bucket_buf,
     847              :                     BufferAccessStrategy bstrategy)
     848              : {
     849              :     BlockNumber wblkno;
     850              :     BlockNumber rblkno;
     851              :     Buffer      wbuf;
     852              :     Buffer      rbuf;
     853              :     Page        wpage;
     854              :     Page        rpage;
     855              :     HashPageOpaque wopaque;
     856              :     HashPageOpaque ropaque;
     857              : 
     858              :     /*
     859              :      * start squeezing into the primary bucket page.
     860              :      */
     861          913 :     wblkno = bucket_blkno;
     862          913 :     wbuf = bucket_buf;
     863          913 :     wpage = BufferGetPage(wbuf);
     864          913 :     wopaque = HashPageGetOpaque(wpage);
     865              : 
     866              :     /*
     867              :      * if there aren't any overflow pages, there's nothing to squeeze. caller
     868              :      * is responsible for releasing the pin on primary bucket page.
     869              :      */
     870          913 :     if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
     871              :     {
     872          864 :         LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
     873          864 :         return;
     874              :     }
     875              : 
     876              :     /*
     877              :      * Find the last page in the bucket chain by starting at the base bucket
     878              :      * page and working forward.  Note: we assume that a hash bucket chain is
     879              :      * usually smaller than the buffer ring being used by VACUUM, else using
     880              :      * the access strategy here would be counterproductive.
     881              :      */
     882           49 :     rbuf = InvalidBuffer;
     883           49 :     ropaque = wopaque;
     884              :     do
     885              :     {
     886          241 :         rblkno = ropaque->hasho_nextblkno;
     887          241 :         if (rbuf != InvalidBuffer)
     888          192 :             _hash_relbuf(rel, rbuf);
     889          241 :         rbuf = _hash_getbuf_with_strategy(rel,
     890              :                                           rblkno,
     891              :                                           HASH_WRITE,
     892              :                                           LH_OVERFLOW_PAGE,
     893              :                                           bstrategy);
     894          241 :         rpage = BufferGetPage(rbuf);
     895          241 :         ropaque = HashPageGetOpaque(rpage);
     896              :         Assert(ropaque->hasho_bucket == bucket);
     897          241 :     } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
     898              : 
     899              :     /*
     900              :      * squeeze the tuples.
     901              :      */
     902              :     for (;;)
     903           84 :     {
     904              :         OffsetNumber roffnum;
     905              :         OffsetNumber maxroffnum;
     906              :         OffsetNumber deletable[MaxOffsetNumber];
     907              :         IndexTuple  itups[MaxIndexTuplesPerPage];
     908              :         Size        tups_size[MaxIndexTuplesPerPage];
     909              :         OffsetNumber itup_offsets[MaxIndexTuplesPerPage];
     910          133 :         uint16      ndeletable = 0;
     911          133 :         uint16      nitups = 0;
     912          133 :         Size        all_tups_size = 0;
     913              :         int         i;
     914          133 :         bool        retain_pin = false;
     915              : 
     916          137 : readpage:
     917              :         /* Scan each tuple in "read" page */
     918          137 :         maxroffnum = PageGetMaxOffsetNumber(rpage);
     919          137 :         for (roffnum = FirstOffsetNumber;
     920         3545 :              roffnum <= maxroffnum;
     921         3408 :              roffnum = OffsetNumberNext(roffnum))
     922              :         {
     923              :             IndexTuple  itup;
     924              :             Size        itemsz;
     925              : 
     926              :             /* skip dead tuples */
     927         3420 :             if (ItemIdIsDead(PageGetItemId(rpage, roffnum)))
     928            0 :                 continue;
     929              : 
     930         3420 :             itup = (IndexTuple) PageGetItem(rpage,
     931         3420 :                                             PageGetItemId(rpage, roffnum));
     932         3420 :             itemsz = IndexTupleSize(itup);
     933         3420 :             itemsz = MAXALIGN(itemsz);
     934              : 
     935              :             /*
     936              :              * Walk up the bucket chain, looking for a page big enough for
     937              :              * this item and all other accumulated items.  Exit if we reach
     938              :              * the read page.
     939              :              */
     940         3524 :             while (PageGetFreeSpaceForMultipleTuples(wpage, nitups + 1) < (all_tups_size + itemsz))
     941              :             {
     942          116 :                 Buffer      next_wbuf = InvalidBuffer;
     943          116 :                 bool        tups_moved = false;
     944              : 
     945              :                 Assert(!PageIsEmpty(wpage));
     946              : 
     947          116 :                 if (wblkno == bucket_blkno)
     948           20 :                     retain_pin = true;
     949              : 
     950          116 :                 wblkno = wopaque->hasho_nextblkno;
     951              :                 Assert(BlockNumberIsValid(wblkno));
     952              : 
     953              :                 /* don't need to move to next page if we reached the read page */
     954          116 :                 if (wblkno != rblkno)
     955          108 :                     next_wbuf = _hash_getbuf_with_strategy(rel,
     956              :                                                            wblkno,
     957              :                                                            HASH_WRITE,
     958              :                                                            LH_OVERFLOW_PAGE,
     959              :                                                            bstrategy);
     960              : 
     961          116 :                 if (nitups > 0)
     962              :                 {
     963              :                     XLogRecPtr  recptr;
     964              : 
     965              :                     Assert(nitups == ndeletable);
     966              : 
     967              :                     /*
     968              :                      * This operation needs to log multiple tuples, prepare
     969              :                      * WAL for that.
     970              :                      */
     971            4 :                     if (RelationNeedsWAL(rel))
     972            4 :                         XLogEnsureRecordSpace(0, 3 + nitups);
     973              : 
     974            4 :                     START_CRIT_SECTION();
     975              : 
     976              :                     /*
     977              :                      * we have to insert tuples on the "write" page, being
     978              :                      * careful to preserve hashkey ordering.  (If we insert
     979              :                      * many tuples into the same "write" page it would be
     980              :                      * worth qsort'ing them).
     981              :                      */
     982            4 :                     _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
     983            4 :                     MarkBufferDirty(wbuf);
     984              : 
     985              :                     /* Delete tuples we already moved off read page */
     986            4 :                     PageIndexMultiDelete(rpage, deletable, ndeletable);
     987            4 :                     MarkBufferDirty(rbuf);
     988              : 
     989              :                     /* XLOG stuff */
     990            4 :                     if (RelationNeedsWAL(rel))
     991            4 :                     {
     992              :                         xl_hash_move_page_contents xlrec;
     993              : 
     994            4 :                         xlrec.ntups = nitups;
     995            4 :                         xlrec.is_prim_bucket_same_wrt = (wbuf == bucket_buf);
     996              : 
     997            4 :                         XLogBeginInsert();
     998            4 :                         XLogRegisterData(&xlrec, SizeOfHashMovePageContents);
     999              : 
    1000              :                         /*
    1001              :                          * bucket buffer was not changed, but still needs to
    1002              :                          * be registered to ensure that we can acquire a
    1003              :                          * cleanup lock on it during replay.
    1004              :                          */
    1005            4 :                         if (!xlrec.is_prim_bucket_same_wrt)
    1006              :                         {
    1007            0 :                             int         flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
    1008              : 
    1009            0 :                             XLogRegisterBuffer(0, bucket_buf, flags);
    1010              :                         }
    1011              : 
    1012            4 :                         XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
    1013            4 :                         XLogRegisterBufData(1, itup_offsets,
    1014              :                                             nitups * sizeof(OffsetNumber));
    1015         1372 :                         for (i = 0; i < nitups; i++)
    1016         1368 :                             XLogRegisterBufData(1, itups[i], tups_size[i]);
    1017              : 
    1018            4 :                         XLogRegisterBuffer(2, rbuf, REGBUF_STANDARD);
    1019            4 :                         XLogRegisterBufData(2, deletable,
    1020              :                                             ndeletable * sizeof(OffsetNumber));
    1021              : 
    1022            4 :                         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_MOVE_PAGE_CONTENTS);
    1023              :                     }
    1024              :                     else
    1025            0 :                         recptr = XLogGetFakeLSN(rel);
    1026              : 
    1027            4 :                     PageSetLSN(BufferGetPage(wbuf), recptr);
    1028            4 :                     PageSetLSN(BufferGetPage(rbuf), recptr);
    1029              : 
    1030            4 :                     END_CRIT_SECTION();
    1031              : 
    1032            4 :                     tups_moved = true;
    1033              :                 }
    1034              : 
    1035              :                 /*
    1036              :                  * release the lock on previous page after acquiring the lock
    1037              :                  * on next page
    1038              :                  */
    1039          116 :                 if (retain_pin)
    1040           20 :                     LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
    1041              :                 else
    1042           96 :                     _hash_relbuf(rel, wbuf);
    1043              : 
    1044              :                 /* nothing more to do if we reached the read page */
    1045          116 :                 if (rblkno == wblkno)
    1046              :                 {
    1047            8 :                     _hash_relbuf(rel, rbuf);
    1048           49 :                     return;
    1049              :                 }
    1050              : 
    1051          108 :                 wbuf = next_wbuf;
    1052          108 :                 wpage = BufferGetPage(wbuf);
    1053          108 :                 wopaque = HashPageGetOpaque(wpage);
    1054              :                 Assert(wopaque->hasho_bucket == bucket);
    1055          108 :                 retain_pin = false;
    1056              : 
    1057              :                 /* be tidy */
    1058         1476 :                 for (i = 0; i < nitups; i++)
    1059         1368 :                     pfree(itups[i]);
    1060          108 :                 nitups = 0;
    1061          108 :                 all_tups_size = 0;
    1062          108 :                 ndeletable = 0;
    1063              : 
    1064              :                 /*
    1065              :                  * after moving the tuples, rpage would have been compacted,
    1066              :                  * so we need to rescan it.
    1067              :                  */
    1068          108 :                 if (tups_moved)
    1069            4 :                     goto readpage;
    1070              :             }
    1071              : 
    1072              :             /* remember tuple for deletion from "read" page */
    1073         3408 :             deletable[ndeletable++] = roffnum;
    1074              : 
    1075              :             /*
    1076              :              * we need a copy of index tuples as they can be freed as part of
    1077              :              * overflow page, however we need them to write a WAL record in
    1078              :              * _hash_freeovflpage.
    1079              :              */
    1080         3408 :             itups[nitups] = CopyIndexTuple(itup);
    1081         3408 :             tups_size[nitups++] = itemsz;
    1082         3408 :             all_tups_size += itemsz;
    1083              :         }
    1084              : 
    1085              :         /*
    1086              :          * If we reach here, there are no live tuples on the "read" page ---
    1087              :          * it was empty when we got to it, or we moved them all.  So we can
    1088              :          * just free the page without bothering with deleting tuples
    1089              :          * individually.  Then advance to the previous "read" page.
    1090              :          *
    1091              :          * Tricky point here: if our read and write pages are adjacent in the
    1092              :          * bucket chain, our write lock on wbuf will conflict with
    1093              :          * _hash_freeovflpage's attempt to update the sibling links of the
    1094              :          * removed page.  In that case, we don't need to lock it again.
    1095              :          */
    1096          125 :         rblkno = ropaque->hasho_prevblkno;
    1097              :         Assert(BlockNumberIsValid(rblkno));
    1098              : 
    1099              :         /* free this overflow page (releases rbuf) */
    1100          125 :         _hash_freeovflpage(rel, bucket_buf, rbuf, wbuf, itups, itup_offsets,
    1101              :                            tups_size, nitups, bstrategy);
    1102              : 
    1103              :         /* be tidy */
    1104         2165 :         for (i = 0; i < nitups; i++)
    1105         2040 :             pfree(itups[i]);
    1106              : 
    1107              :         /* are we freeing the page adjacent to wbuf? */
    1108          125 :         if (rblkno == wblkno)
    1109              :         {
    1110              :             /* retain the pin on primary bucket page till end of bucket scan */
    1111           41 :             if (wblkno == bucket_blkno)
    1112           29 :                 LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
    1113              :             else
    1114           12 :                 _hash_relbuf(rel, wbuf);
    1115           41 :             return;
    1116              :         }
    1117              : 
    1118           84 :         rbuf = _hash_getbuf_with_strategy(rel,
    1119              :                                           rblkno,
    1120              :                                           HASH_WRITE,
    1121              :                                           LH_OVERFLOW_PAGE,
    1122              :                                           bstrategy);
    1123           84 :         rpage = BufferGetPage(rbuf);
    1124           84 :         ropaque = HashPageGetOpaque(rpage);
    1125              :         Assert(ropaque->hasho_bucket == bucket);
    1126              :     }
    1127              : 
    1128              :     /* NOTREACHED */
    1129              : }
        

Generated by: LCOV version 2.0-1