LCOV - code coverage report
Current view: top level - src/backend/access/hash - hashovfl.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 347 379 91.6 %
Date: 2025-01-18 04:15:08 Functions: 7 7 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * hashovfl.c
       4             :  *    Overflow page management code for the Postgres hash access method
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/hash/hashovfl.c
      12             :  *
      13             :  * NOTES
      14             :  *    Overflow pages look like ordinary relation pages.
      15             :  *
      16             :  *-------------------------------------------------------------------------
      17             :  */
      18             : #include "postgres.h"
      19             : 
      20             : #include "access/hash.h"
      21             : #include "access/hash_xlog.h"
      22             : #include "access/xloginsert.h"
      23             : #include "miscadmin.h"
      24             : #include "utils/rel.h"
      25             : 
      26             : 
      27             : static uint32 _hash_firstfreebit(uint32 map);
      28             : 
      29             : 
      30             : /*
      31             :  * Convert overflow page bit number (its index in the free-page bitmaps)
      32             :  * to block number within the index.
      33             :  */
      34             : static BlockNumber
      35         370 : bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
      36             : {
      37         370 :     uint32      splitnum = metap->hashm_ovflpoint;
      38             :     uint32      i;
      39             : 
      40             :     /* Convert zero-based bitnumber to 1-based page number */
      41         370 :     ovflbitnum += 1;
      42             : 
      43             :     /* Determine the split number for this page (must be >= 1) */
      44        1912 :     for (i = 1;
      45        1596 :          i < splitnum && ovflbitnum > metap->hashm_spares[i];
      46        1542 :          i++)
      47             :          /* loop */ ;
      48             : 
      49             :     /*
      50             :      * Convert to absolute page number by adding the number of bucket pages
      51             :      * that exist before this split point.
      52             :      */
      53         370 :     return (BlockNumber) (_hash_get_totalbuckets(i) + ovflbitnum);
      54             : }
      55             : 
      56             : /*
      57             :  * _hash_ovflblkno_to_bitno
      58             :  *
      59             :  * Convert overflow page block number to bit number for free-page bitmap.
      60             :  */
      61             : uint32
      62         220 : _hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
      63             : {
      64         220 :     uint32      splitnum = metap->hashm_ovflpoint;
      65             :     uint32      i;
      66             :     uint32      bitnum;
      67             : 
      68             :     /* Determine the split number containing this page */
      69         886 :     for (i = 1; i <= splitnum; i++)
      70             :     {
      71         886 :         if (ovflblkno <= (BlockNumber) _hash_get_totalbuckets(i))
      72           8 :             break;              /* oops */
      73         878 :         bitnum = ovflblkno - _hash_get_totalbuckets(i);
      74             : 
      75             :         /*
      76             :          * bitnum has to be greater than number of overflow page added in
      77             :          * previous split point. The overflow page at this splitnum (i) if any
      78             :          * should start from (_hash_get_totalbuckets(i) +
      79             :          * metap->hashm_spares[i - 1] + 1).
      80             :          */
      81         878 :         if (bitnum > metap->hashm_spares[i - 1] &&
      82         878 :             bitnum <= metap->hashm_spares[i])
      83         212 :             return bitnum - 1;  /* -1 to convert 1-based to 0-based */
      84             :     }
      85             : 
      86           8 :     ereport(ERROR,
      87             :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
      88             :              errmsg("invalid overflow block number %u", ovflblkno)));
      89             :     return 0;                   /* keep compiler quiet */
      90             : }
      91             : 
      92             : /*
      93             :  *  _hash_addovflpage
      94             :  *
      95             :  *  Add an overflow page to the bucket whose last page is pointed to by 'buf'.
      96             :  *
      97             :  *  On entry, the caller must hold a pin but no lock on 'buf'.  The pin is
      98             :  *  dropped before exiting (we assume the caller is not interested in 'buf'
      99             :  *  anymore) if not asked to retain.  The pin will be retained only for the
     100             :  *  primary bucket.  The returned overflow page will be pinned and
     101             :  *  write-locked; it is guaranteed to be empty.
     102             :  *
     103             :  *  The caller must hold a pin, but no lock, on the metapage buffer.
     104             :  *  That buffer is returned in the same state.
     105             :  *
     106             :  * NB: since this could be executed concurrently by multiple processes,
     107             :  * one should not assume that the returned overflow page will be the
     108             :  * immediate successor of the originally passed 'buf'.  Additional overflow
     109             :  * pages might have been added to the bucket chain in between.
     110             :  */
     111             : Buffer
     112         370 : _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
     113             : {
     114             :     Buffer      ovflbuf;
     115             :     Page        page;
     116             :     Page        ovflpage;
     117             :     HashPageOpaque pageopaque;
     118             :     HashPageOpaque ovflopaque;
     119             :     HashMetaPage metap;
     120         370 :     Buffer      mapbuf = InvalidBuffer;
     121         370 :     Buffer      newmapbuf = InvalidBuffer;
     122             :     BlockNumber blkno;
     123             :     uint32      orig_firstfree;
     124             :     uint32      splitnum;
     125         370 :     uint32     *freep = NULL;
     126             :     uint32      max_ovflpg;
     127             :     uint32      bit;
     128             :     uint32      bitmap_page_bit;
     129             :     uint32      first_page;
     130             :     uint32      last_bit;
     131             :     uint32      last_page;
     132             :     uint32      i,
     133             :                 j;
     134         370 :     bool        page_found = false;
     135             : 
     136             :     /*
     137             :      * Write-lock the tail page.  Here, we need to maintain locking order such
     138             :      * that, first acquire the lock on tail page of bucket, then on meta page
     139             :      * to find and lock the bitmap page and if it is found, then lock on meta
     140             :      * page is released, then finally acquire the lock on new overflow buffer.
     141             :      * We need this locking order to avoid deadlock with backends that are
     142             :      * doing inserts.
     143             :      *
     144             :      * Note: We could have avoided locking many buffers here if we made two
     145             :      * WAL records for acquiring an overflow page (one to allocate an overflow
     146             :      * page and another to add it to overflow bucket chain).  However, doing
     147             :      * so can leak an overflow page, if the system crashes after allocation.
     148             :      * Needless to say, it is better to have a single record from a
     149             :      * performance point of view as well.
     150             :      */
     151         370 :     LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     152             : 
     153             :     /* probably redundant... */
     154         370 :     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
     155             : 
     156             :     /* loop to find current tail page, in case someone else inserted too */
     157             :     for (;;)
     158           0 :     {
     159             :         BlockNumber nextblkno;
     160             : 
     161         370 :         page = BufferGetPage(buf);
     162         370 :         pageopaque = HashPageGetOpaque(page);
     163         370 :         nextblkno = pageopaque->hasho_nextblkno;
     164             : 
     165         370 :         if (!BlockNumberIsValid(nextblkno))
     166         370 :             break;
     167             : 
     168             :         /* we assume we do not need to write the unmodified page */
     169           0 :         if (retain_pin)
     170             :         {
     171             :             /* pin will be retained only for the primary bucket page */
     172             :             Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_BUCKET_PAGE);
     173           0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     174             :         }
     175             :         else
     176           0 :             _hash_relbuf(rel, buf);
     177             : 
     178           0 :         retain_pin = false;
     179             : 
     180           0 :         buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
     181             :     }
     182             : 
     183             :     /* Get exclusive lock on the meta page */
     184         370 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     185             : 
     186         370 :     _hash_checkpage(rel, metabuf, LH_META_PAGE);
     187         370 :     metap = HashPageGetMeta(BufferGetPage(metabuf));
     188             : 
     189             :     /* start search at hashm_firstfree */
     190         370 :     orig_firstfree = metap->hashm_firstfree;
     191         370 :     first_page = orig_firstfree >> BMPG_SHIFT(metap);
     192         370 :     bit = orig_firstfree & BMPG_MASK(metap);
     193         370 :     i = first_page;
     194         370 :     j = bit / BITS_PER_MAP;
     195         370 :     bit &= ~(BITS_PER_MAP - 1);
     196             : 
     197             :     /* outer loop iterates once per bitmap page */
     198             :     for (;;)
     199         304 :     {
     200             :         BlockNumber mapblkno;
     201             :         Page        mappage;
     202             :         uint32      last_inpage;
     203             : 
     204             :         /* want to end search with the last existing overflow page */
     205         674 :         splitnum = metap->hashm_ovflpoint;
     206         674 :         max_ovflpg = metap->hashm_spares[splitnum] - 1;
     207         674 :         last_page = max_ovflpg >> BMPG_SHIFT(metap);
     208         674 :         last_bit = max_ovflpg & BMPG_MASK(metap);
     209             : 
     210         674 :         if (i > last_page)
     211         304 :             break;
     212             : 
     213             :         Assert(i < metap->hashm_nmaps);
     214         370 :         mapblkno = metap->hashm_mapp[i];
     215             : 
     216         370 :         if (i == last_page)
     217         370 :             last_inpage = last_bit;
     218             :         else
     219           0 :             last_inpage = BMPGSZ_BIT(metap) - 1;
     220             : 
     221             :         /* Release exclusive lock on metapage while reading bitmap page */
     222         370 :         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     223             : 
     224         370 :         mapbuf = _hash_getbuf(rel, mapblkno, HASH_WRITE, LH_BITMAP_PAGE);
     225         370 :         mappage = BufferGetPage(mapbuf);
     226         370 :         freep = HashPageGetBitmap(mappage);
     227             : 
     228         674 :         for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
     229             :         {
     230         370 :             if (freep[j] != ALL_SET)
     231             :             {
     232          66 :                 page_found = true;
     233             : 
     234             :                 /* Reacquire exclusive lock on the meta page */
     235          66 :                 LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     236             : 
     237             :                 /* convert bit to bit number within page */
     238          66 :                 bit += _hash_firstfreebit(freep[j]);
     239          66 :                 bitmap_page_bit = bit;
     240             : 
     241             :                 /* convert bit to absolute bit number */
     242          66 :                 bit += (i << BMPG_SHIFT(metap));
     243             :                 /* Calculate address of the recycled overflow page */
     244          66 :                 blkno = bitno_to_blkno(metap, bit);
     245             : 
     246             :                 /* Fetch and init the recycled page */
     247          66 :                 ovflbuf = _hash_getinitbuf(rel, blkno);
     248             : 
     249          66 :                 goto found;
     250             :             }
     251             :         }
     252             : 
     253             :         /* No free space here, try to advance to next map page */
     254         304 :         _hash_relbuf(rel, mapbuf);
     255         304 :         mapbuf = InvalidBuffer;
     256         304 :         i++;
     257         304 :         j = 0;                  /* scan from start of next map page */
     258         304 :         bit = 0;
     259             : 
     260             :         /* Reacquire exclusive lock on the meta page */
     261         304 :         LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     262             :     }
     263             : 
     264             :     /*
     265             :      * No free pages --- have to extend the relation to add an overflow page.
     266             :      * First, check to see if we have to add a new bitmap page too.
     267             :      */
     268         304 :     if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
     269             :     {
     270             :         /*
     271             :          * We create the new bitmap page with all pages marked "in use".
     272             :          * Actually two pages in the new bitmap's range will exist
     273             :          * immediately: the bitmap page itself, and the following page which
     274             :          * is the one we return to the caller.  Both of these are correctly
     275             :          * marked "in use".  Subsequent pages do not exist yet, but it is
     276             :          * convenient to pre-mark them as "in use" too.
     277             :          */
     278           0 :         bit = metap->hashm_spares[splitnum];
     279             : 
     280             :         /* metapage already has a write lock */
     281           0 :         if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
     282           0 :             ereport(ERROR,
     283             :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     284             :                      errmsg("out of overflow pages in hash index \"%s\"",
     285             :                             RelationGetRelationName(rel))));
     286             : 
     287           0 :         newmapbuf = _hash_getnewbuf(rel, bitno_to_blkno(metap, bit), MAIN_FORKNUM);
     288             :     }
     289             :     else
     290             :     {
     291             :         /*
     292             :          * Nothing to do here; since the page will be past the last used page,
     293             :          * we know its bitmap bit was preinitialized to "in use".
     294             :          */
     295             :     }
     296             : 
     297             :     /* Calculate address of the new overflow page */
     298         304 :     bit = BufferIsValid(newmapbuf) ?
     299         304 :         metap->hashm_spares[splitnum] + 1 : metap->hashm_spares[splitnum];
     300         304 :     blkno = bitno_to_blkno(metap, bit);
     301             : 
     302             :     /*
     303             :      * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
     304             :      * relation length stays in sync with ours.  XXX It's annoying to do this
     305             :      * with metapage write lock held; would be better to use a lock that
     306             :      * doesn't block incoming searches.
     307             :      *
     308             :      * It is okay to hold two buffer locks here (one on tail page of bucket
     309             :      * and other on new overflow page) since there cannot be anyone else
     310             :      * contending for access to ovflbuf.
     311             :      */
     312         304 :     ovflbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
     313             : 
     314         370 : found:
     315             : 
     316             :     /*
     317             :      * Do the update.  No ereport(ERROR) until changes are logged. We want to
     318             :      * log the changes for bitmap page and overflow page together to avoid
     319             :      * loss of pages in case the new page is added.
     320             :      */
     321         370 :     START_CRIT_SECTION();
     322             : 
     323         370 :     if (page_found)
     324             :     {
     325             :         Assert(BufferIsValid(mapbuf));
     326             : 
     327             :         /* mark page "in use" in the bitmap */
     328          66 :         SETBIT(freep, bitmap_page_bit);
     329          66 :         MarkBufferDirty(mapbuf);
     330             :     }
     331             :     else
     332             :     {
     333             :         /* update the count to indicate new overflow page is added */
     334         304 :         metap->hashm_spares[splitnum]++;
     335             : 
     336         304 :         if (BufferIsValid(newmapbuf))
     337             :         {
     338           0 :             _hash_initbitmapbuffer(newmapbuf, metap->hashm_bmsize, false);
     339           0 :             MarkBufferDirty(newmapbuf);
     340             : 
     341             :             /* add the new bitmap page to the metapage's list of bitmaps */
     342           0 :             metap->hashm_mapp[metap->hashm_nmaps] = BufferGetBlockNumber(newmapbuf);
     343           0 :             metap->hashm_nmaps++;
     344           0 :             metap->hashm_spares[splitnum]++;
     345             :         }
     346             : 
     347         304 :         MarkBufferDirty(metabuf);
     348             : 
     349             :         /*
     350             :          * for new overflow page, we don't need to explicitly set the bit in
     351             :          * bitmap page, as by default that will be set to "in use".
     352             :          */
     353             :     }
     354             : 
     355             :     /*
     356             :      * Adjust hashm_firstfree to avoid redundant searches.  But don't risk
     357             :      * changing it if someone moved it while we were searching bitmap pages.
     358             :      */
     359         370 :     if (metap->hashm_firstfree == orig_firstfree)
     360             :     {
     361         370 :         metap->hashm_firstfree = bit + 1;
     362         370 :         MarkBufferDirty(metabuf);
     363             :     }
     364             : 
     365             :     /* initialize new overflow page */
     366         370 :     ovflpage = BufferGetPage(ovflbuf);
     367         370 :     ovflopaque = HashPageGetOpaque(ovflpage);
     368         370 :     ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
     369         370 :     ovflopaque->hasho_nextblkno = InvalidBlockNumber;
     370         370 :     ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
     371         370 :     ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
     372         370 :     ovflopaque->hasho_page_id = HASHO_PAGE_ID;
     373             : 
     374         370 :     MarkBufferDirty(ovflbuf);
     375             : 
     376             :     /* logically chain overflow page to previous page */
     377         370 :     pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
     378             : 
     379         370 :     MarkBufferDirty(buf);
     380             : 
     381             :     /* XLOG stuff */
     382         370 :     if (RelationNeedsWAL(rel))
     383             :     {
     384             :         XLogRecPtr  recptr;
     385             :         xl_hash_add_ovfl_page xlrec;
     386             : 
     387         342 :         xlrec.bmpage_found = page_found;
     388         342 :         xlrec.bmsize = metap->hashm_bmsize;
     389             : 
     390         342 :         XLogBeginInsert();
     391         342 :         XLogRegisterData((char *) &xlrec, SizeOfHashAddOvflPage);
     392             : 
     393         342 :         XLogRegisterBuffer(0, ovflbuf, REGBUF_WILL_INIT);
     394         342 :         XLogRegisterBufData(0, (char *) &pageopaque->hasho_bucket, sizeof(Bucket));
     395             : 
     396         342 :         XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
     397             : 
     398         342 :         if (BufferIsValid(mapbuf))
     399             :         {
     400          66 :             XLogRegisterBuffer(2, mapbuf, REGBUF_STANDARD);
     401          66 :             XLogRegisterBufData(2, (char *) &bitmap_page_bit, sizeof(uint32));
     402             :         }
     403             : 
     404         342 :         if (BufferIsValid(newmapbuf))
     405           0 :             XLogRegisterBuffer(3, newmapbuf, REGBUF_WILL_INIT);
     406             : 
     407         342 :         XLogRegisterBuffer(4, metabuf, REGBUF_STANDARD);
     408         342 :         XLogRegisterBufData(4, (char *) &metap->hashm_firstfree, sizeof(uint32));
     409             : 
     410         342 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_ADD_OVFL_PAGE);
     411             : 
     412         342 :         PageSetLSN(BufferGetPage(ovflbuf), recptr);
     413         342 :         PageSetLSN(BufferGetPage(buf), recptr);
     414             : 
     415         342 :         if (BufferIsValid(mapbuf))
     416          66 :             PageSetLSN(BufferGetPage(mapbuf), recptr);
     417             : 
     418         342 :         if (BufferIsValid(newmapbuf))
     419           0 :             PageSetLSN(BufferGetPage(newmapbuf), recptr);
     420             : 
     421         342 :         PageSetLSN(BufferGetPage(metabuf), recptr);
     422             :     }
     423             : 
     424         370 :     END_CRIT_SECTION();
     425             : 
     426         370 :     if (retain_pin)
     427         106 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     428             :     else
     429         264 :         _hash_relbuf(rel, buf);
     430             : 
     431         370 :     if (BufferIsValid(mapbuf))
     432          66 :         _hash_relbuf(rel, mapbuf);
     433             : 
     434         370 :     LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     435             : 
     436         370 :     if (BufferIsValid(newmapbuf))
     437           0 :         _hash_relbuf(rel, newmapbuf);
     438             : 
     439         370 :     return ovflbuf;
     440             : }
     441             : 
     442             : /*
     443             :  *  _hash_firstfreebit()
     444             :  *
     445             :  *  Return the number of the first bit that is not set in the word 'map'.
     446             :  */
     447             : static uint32
     448          66 : _hash_firstfreebit(uint32 map)
     449             : {
     450             :     uint32      i,
     451             :                 mask;
     452             : 
     453          66 :     mask = 0x1;
     454         570 :     for (i = 0; i < BITS_PER_MAP; i++)
     455             :     {
     456         570 :         if (!(mask & map))
     457          66 :             return i;
     458         504 :         mask <<= 1;
     459             :     }
     460             : 
     461           0 :     elog(ERROR, "firstfreebit found no free bit");
     462             : 
     463             :     return 0;                   /* keep compiler quiet */
     464             : }
     465             : 
     466             : /*
     467             :  *  _hash_freeovflpage() -
     468             :  *
     469             :  *  Remove this overflow page from its bucket's chain, and mark the page as
     470             :  *  free.  On entry, ovflbuf is write-locked; it is released before exiting.
     471             :  *
     472             :  *  Add the tuples (itups) to wbuf in this function.  We could do that in the
     473             :  *  caller as well, but the advantage of doing it here is we can easily write
     474             :  *  the WAL for XLOG_HASH_SQUEEZE_PAGE operation.  Addition of tuples and
     475             :  *  removal of overflow page has to done as an atomic operation, otherwise
     476             :  *  during replay on standby users might find duplicate records.
     477             :  *
     478             :  *  Since this function is invoked in VACUUM, we provide an access strategy
     479             :  *  parameter that controls fetches of the bucket pages.
     480             :  *
     481             :  *  Returns the block number of the page that followed the given page
     482             :  *  in the bucket, or InvalidBlockNumber if no following page.
     483             :  *
     484             :  *  NB: caller must not hold lock on metapage, nor on page, that's next to
     485             :  *  ovflbuf in the bucket chain.  We don't acquire the lock on page that's
     486             :  *  prior to ovflbuf in chain if it is same as wbuf because the caller already
     487             :  *  has a lock on same.
     488             :  */
     489             : BlockNumber
     490         212 : _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
     491             :                    Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets,
     492             :                    Size *tups_size, uint16 nitups,
     493             :                    BufferAccessStrategy bstrategy)
     494             : {
     495             :     HashMetaPage metap;
     496             :     Buffer      metabuf;
     497             :     Buffer      mapbuf;
     498             :     BlockNumber ovflblkno;
     499             :     BlockNumber prevblkno;
     500             :     BlockNumber blkno;
     501             :     BlockNumber nextblkno;
     502             :     BlockNumber writeblkno;
     503             :     HashPageOpaque ovflopaque;
     504             :     Page        ovflpage;
     505             :     Page        mappage;
     506             :     uint32     *freep;
     507             :     uint32      ovflbitno;
     508             :     int32       bitmappage,
     509             :                 bitmapbit;
     510             :     Bucket      bucket PG_USED_FOR_ASSERTS_ONLY;
     511         212 :     Buffer      prevbuf = InvalidBuffer;
     512         212 :     Buffer      nextbuf = InvalidBuffer;
     513         212 :     bool        update_metap = false;
     514             : 
     515             :     /* Get information from the doomed page */
     516         212 :     _hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
     517         212 :     ovflblkno = BufferGetBlockNumber(ovflbuf);
     518         212 :     ovflpage = BufferGetPage(ovflbuf);
     519         212 :     ovflopaque = HashPageGetOpaque(ovflpage);
     520         212 :     nextblkno = ovflopaque->hasho_nextblkno;
     521         212 :     prevblkno = ovflopaque->hasho_prevblkno;
     522         212 :     writeblkno = BufferGetBlockNumber(wbuf);
     523         212 :     bucket = ovflopaque->hasho_bucket;
     524             : 
     525             :     /*
     526             :      * Fix up the bucket chain.  this is a doubly-linked list, so we must fix
     527             :      * up the bucket chain members behind and ahead of the overflow page being
     528             :      * deleted.  Concurrency issues are avoided by using lock chaining as
     529             :      * described atop hashbucketcleanup.
     530             :      */
     531         212 :     if (BlockNumberIsValid(prevblkno))
     532             :     {
     533         212 :         if (prevblkno == writeblkno)
     534          62 :             prevbuf = wbuf;
     535             :         else
     536         150 :             prevbuf = _hash_getbuf_with_strategy(rel,
     537             :                                                  prevblkno,
     538             :                                                  HASH_WRITE,
     539             :                                                  LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
     540             :                                                  bstrategy);
     541             :     }
     542         212 :     if (BlockNumberIsValid(nextblkno))
     543           0 :         nextbuf = _hash_getbuf_with_strategy(rel,
     544             :                                              nextblkno,
     545             :                                              HASH_WRITE,
     546             :                                              LH_OVERFLOW_PAGE,
     547             :                                              bstrategy);
     548             : 
     549             :     /* Note: bstrategy is intentionally not used for metapage and bitmap */
     550             : 
     551             :     /* Read the metapage so we can determine which bitmap page to use */
     552         212 :     metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
     553         212 :     metap = HashPageGetMeta(BufferGetPage(metabuf));
     554             : 
     555             :     /* Identify which bit to set */
     556         212 :     ovflbitno = _hash_ovflblkno_to_bitno(metap, ovflblkno);
     557             : 
     558         212 :     bitmappage = ovflbitno >> BMPG_SHIFT(metap);
     559         212 :     bitmapbit = ovflbitno & BMPG_MASK(metap);
     560             : 
     561         212 :     if (bitmappage >= metap->hashm_nmaps)
     562           0 :         elog(ERROR, "invalid overflow bit number %u", ovflbitno);
     563         212 :     blkno = metap->hashm_mapp[bitmappage];
     564             : 
     565             :     /* Release metapage lock while we access the bitmap page */
     566         212 :     LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     567             : 
     568             :     /* read the bitmap page to clear the bitmap bit */
     569         212 :     mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BITMAP_PAGE);
     570         212 :     mappage = BufferGetPage(mapbuf);
     571         212 :     freep = HashPageGetBitmap(mappage);
     572             :     Assert(ISSET(freep, bitmapbit));
     573             : 
     574             :     /* Get write-lock on metapage to update firstfree */
     575         212 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     576             : 
     577             :     /* This operation needs to log multiple tuples, prepare WAL for that */
     578         212 :     if (RelationNeedsWAL(rel))
     579         212 :         XLogEnsureRecordSpace(HASH_XLOG_FREE_OVFL_BUFS, 4 + nitups);
     580             : 
     581         212 :     START_CRIT_SECTION();
     582             : 
     583             :     /*
     584             :      * we have to insert tuples on the "write" page, being careful to preserve
     585             :      * hashkey ordering.  (If we insert many tuples into the same "write" page
     586             :      * it would be worth qsort'ing them).
     587             :      */
     588         212 :     if (nitups > 0)
     589             :     {
     590          86 :         _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
     591          86 :         MarkBufferDirty(wbuf);
     592             :     }
     593             : 
     594             :     /*
     595             :      * Reinitialize the freed overflow page.  Just zeroing the page won't
     596             :      * work, because WAL replay routines expect pages to be initialized. See
     597             :      * explanation of RBM_NORMAL mode atop XLogReadBufferExtended.  We are
     598             :      * careful to make the special space valid here so that tools like
     599             :      * pageinspect won't get confused.
     600             :      */
     601         212 :     _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
     602             : 
     603         212 :     ovflopaque = HashPageGetOpaque(ovflpage);
     604             : 
     605         212 :     ovflopaque->hasho_prevblkno = InvalidBlockNumber;
     606         212 :     ovflopaque->hasho_nextblkno = InvalidBlockNumber;
     607         212 :     ovflopaque->hasho_bucket = InvalidBucket;
     608         212 :     ovflopaque->hasho_flag = LH_UNUSED_PAGE;
     609         212 :     ovflopaque->hasho_page_id = HASHO_PAGE_ID;
     610             : 
     611         212 :     MarkBufferDirty(ovflbuf);
     612             : 
     613         212 :     if (BufferIsValid(prevbuf))
     614             :     {
     615         212 :         Page        prevpage = BufferGetPage(prevbuf);
     616         212 :         HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
     617             : 
     618             :         Assert(prevopaque->hasho_bucket == bucket);
     619         212 :         prevopaque->hasho_nextblkno = nextblkno;
     620         212 :         MarkBufferDirty(prevbuf);
     621             :     }
     622         212 :     if (BufferIsValid(nextbuf))
     623             :     {
     624           0 :         Page        nextpage = BufferGetPage(nextbuf);
     625           0 :         HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
     626             : 
     627             :         Assert(nextopaque->hasho_bucket == bucket);
     628           0 :         nextopaque->hasho_prevblkno = prevblkno;
     629           0 :         MarkBufferDirty(nextbuf);
     630             :     }
     631             : 
     632             :     /* Clear the bitmap bit to indicate that this overflow page is free */
     633         212 :     CLRBIT(freep, bitmapbit);
     634         212 :     MarkBufferDirty(mapbuf);
     635             : 
     636             :     /* if this is now the first free page, update hashm_firstfree */
     637         212 :     if (ovflbitno < metap->hashm_firstfree)
     638             :     {
     639         180 :         metap->hashm_firstfree = ovflbitno;
     640         180 :         update_metap = true;
     641         180 :         MarkBufferDirty(metabuf);
     642             :     }
     643             : 
     644             :     /* XLOG stuff */
     645         212 :     if (RelationNeedsWAL(rel))
     646             :     {
     647             :         xl_hash_squeeze_page xlrec;
     648             :         XLogRecPtr  recptr;
     649             :         int         i;
     650         212 :         bool        mod_wbuf = false;
     651             : 
     652         212 :         xlrec.prevblkno = prevblkno;
     653         212 :         xlrec.nextblkno = nextblkno;
     654         212 :         xlrec.ntups = nitups;
     655         212 :         xlrec.is_prim_bucket_same_wrt = (wbuf == bucketbuf);
     656         212 :         xlrec.is_prev_bucket_same_wrt = (wbuf == prevbuf);
     657             : 
     658         212 :         XLogBeginInsert();
     659         212 :         XLogRegisterData((char *) &xlrec, SizeOfHashSqueezePage);
     660             : 
     661             :         /*
     662             :          * bucket buffer was not changed, but still needs to be registered to
     663             :          * ensure that we can acquire a cleanup lock on it during replay.
     664             :          */
     665         212 :         if (!xlrec.is_prim_bucket_same_wrt)
     666             :         {
     667          48 :             uint8       flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
     668             : 
     669          48 :             XLogRegisterBuffer(0, bucketbuf, flags);
     670             :         }
     671             : 
     672         212 :         if (xlrec.ntups > 0)
     673             :         {
     674          86 :             XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
     675             : 
     676             :             /* Remember that wbuf is modified. */
     677          86 :             mod_wbuf = true;
     678             : 
     679          86 :             XLogRegisterBufData(1, (char *) itup_offsets,
     680             :                                 nitups * sizeof(OffsetNumber));
     681        3322 :             for (i = 0; i < nitups; i++)
     682        3236 :                 XLogRegisterBufData(1, (char *) itups[i], tups_size[i]);
     683             :         }
     684         126 :         else if (xlrec.is_prim_bucket_same_wrt || xlrec.is_prev_bucket_same_wrt)
     685             :         {
     686             :             uint8       wbuf_flags;
     687             : 
     688             :             /*
     689             :              * A write buffer needs to be registered even if no tuples are
     690             :              * added to it to ensure that we can acquire a cleanup lock on it
     691             :              * if it is the same as primary bucket buffer or update the
     692             :              * nextblkno if it is same as the previous bucket buffer.
     693             :              */
     694             :             Assert(xlrec.ntups == 0);
     695             : 
     696         120 :             wbuf_flags = REGBUF_STANDARD;
     697         120 :             if (!xlrec.is_prev_bucket_same_wrt)
     698             :             {
     699         100 :                 wbuf_flags |= REGBUF_NO_CHANGE;
     700             :             }
     701             :             else
     702             :             {
     703             :                 /* Remember that wbuf is modified. */
     704          20 :                 mod_wbuf = true;
     705             :             }
     706         120 :             XLogRegisterBuffer(1, wbuf, wbuf_flags);
     707             :         }
     708             : 
     709         212 :         XLogRegisterBuffer(2, ovflbuf, REGBUF_STANDARD);
     710             : 
     711             :         /*
     712             :          * If prevpage and the writepage (block in which we are moving tuples
     713             :          * from overflow) are same, then no need to separately register
     714             :          * prevpage.  During replay, we can directly update the nextblock in
     715             :          * writepage.
     716             :          */
     717         212 :         if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
     718         150 :             XLogRegisterBuffer(3, prevbuf, REGBUF_STANDARD);
     719             : 
     720         212 :         if (BufferIsValid(nextbuf))
     721           0 :             XLogRegisterBuffer(4, nextbuf, REGBUF_STANDARD);
     722             : 
     723         212 :         XLogRegisterBuffer(5, mapbuf, REGBUF_STANDARD);
     724         212 :         XLogRegisterBufData(5, (char *) &bitmapbit, sizeof(uint32));
     725             : 
     726         212 :         if (update_metap)
     727             :         {
     728         180 :             XLogRegisterBuffer(6, metabuf, REGBUF_STANDARD);
     729         180 :             XLogRegisterBufData(6, (char *) &metap->hashm_firstfree, sizeof(uint32));
     730             :         }
     731             : 
     732         212 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SQUEEZE_PAGE);
     733             : 
     734             :         /* Set LSN iff wbuf is modified. */
     735         212 :         if (mod_wbuf)
     736         106 :             PageSetLSN(BufferGetPage(wbuf), recptr);
     737             : 
     738         212 :         PageSetLSN(BufferGetPage(ovflbuf), recptr);
     739             : 
     740         212 :         if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
     741         150 :             PageSetLSN(BufferGetPage(prevbuf), recptr);
     742         212 :         if (BufferIsValid(nextbuf))
     743           0 :             PageSetLSN(BufferGetPage(nextbuf), recptr);
     744             : 
     745         212 :         PageSetLSN(BufferGetPage(mapbuf), recptr);
     746             : 
     747         212 :         if (update_metap)
     748         180 :             PageSetLSN(BufferGetPage(metabuf), recptr);
     749             :     }
     750             : 
     751         212 :     END_CRIT_SECTION();
     752             : 
     753             :     /* release previous bucket if it is not same as write bucket */
     754         212 :     if (BufferIsValid(prevbuf) && prevblkno != writeblkno)
     755         150 :         _hash_relbuf(rel, prevbuf);
     756             : 
     757         212 :     if (BufferIsValid(ovflbuf))
     758         212 :         _hash_relbuf(rel, ovflbuf);
     759             : 
     760         212 :     if (BufferIsValid(nextbuf))
     761           0 :         _hash_relbuf(rel, nextbuf);
     762             : 
     763         212 :     _hash_relbuf(rel, mapbuf);
     764         212 :     _hash_relbuf(rel, metabuf);
     765             : 
     766         212 :     return nextblkno;
     767             : }
     768             : 
     769             : 
     770             : /*
     771             :  *  _hash_initbitmapbuffer()
     772             :  *
     773             :  *   Initialize a new bitmap page.  All bits in the new bitmap page are set to
     774             :  *   "1", indicating "in use".
     775             :  */
     776             : void
     777         388 : _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
     778             : {
     779             :     Page        pg;
     780             :     HashPageOpaque op;
     781             :     uint32     *freep;
     782             : 
     783         388 :     pg = BufferGetPage(buf);
     784             : 
     785             :     /* initialize the page */
     786         388 :     if (initpage)
     787          52 :         _hash_pageinit(pg, BufferGetPageSize(buf));
     788             : 
     789             :     /* initialize the page's special space */
     790         388 :     op = HashPageGetOpaque(pg);
     791         388 :     op->hasho_prevblkno = InvalidBlockNumber;
     792         388 :     op->hasho_nextblkno = InvalidBlockNumber;
     793         388 :     op->hasho_bucket = InvalidBucket;
     794         388 :     op->hasho_flag = LH_BITMAP_PAGE;
     795         388 :     op->hasho_page_id = HASHO_PAGE_ID;
     796             : 
     797             :     /* set all of the bits to 1 */
     798         388 :     freep = HashPageGetBitmap(pg);
     799         388 :     memset(freep, 0xFF, bmsize);
     800             : 
     801             :     /*
     802             :      * Set pd_lower just past the end of the bitmap page data.  We could even
     803             :      * set pd_lower equal to pd_upper, but this is more precise and makes the
     804             :      * page look compressible to xlog.c.
     805             :      */
     806         388 :     ((PageHeader) pg)->pd_lower = ((char *) freep + bmsize) - (char *) pg;
     807         388 : }
     808             : 
     809             : 
     810             : /*
     811             :  *  _hash_squeezebucket(rel, bucket)
     812             :  *
     813             :  *  Try to squeeze the tuples onto pages occurring earlier in the
     814             :  *  bucket chain in an attempt to free overflow pages. When we start
     815             :  *  the "squeezing", the page from which we start taking tuples (the
     816             :  *  "read" page) is the last bucket in the bucket chain and the page
     817             :  *  onto which we start squeezing tuples (the "write" page) is the
     818             :  *  first page in the bucket chain.  The read page works backward and
     819             :  *  the write page works forward; the procedure terminates when the
     820             :  *  read page and write page are the same page.
     821             :  *
     822             :  *  At completion of this procedure, it is guaranteed that all pages in
     823             :  *  the bucket are nonempty, unless the bucket is totally empty (in
     824             :  *  which case all overflow pages will be freed).  The original implementation
     825             :  *  required that to be true on entry as well, but it's a lot easier for
     826             :  *  callers to leave empty overflow pages and let this guy clean it up.
     827             :  *
     828             :  *  Caller must acquire cleanup lock on the primary page of the target
     829             :  *  bucket to exclude any scans that are in progress, which could easily
     830             :  *  be confused into returning the same tuple more than once or some tuples
     831             :  *  not at all by the rearrangement we are performing here.  To prevent
     832             :  *  any concurrent scan to cross the squeeze scan we use lock chaining
     833             :  *  similar to hashbucketcleanup.  Refer comments atop hashbucketcleanup.
     834             :  *
     835             :  *  We need to retain a pin on the primary bucket to ensure that no concurrent
     836             :  *  split can start.
     837             :  *
     838             :  *  Since this function is invoked in VACUUM, we provide an access strategy
     839             :  *  parameter that controls fetches of the bucket pages.
     840             :  */
     841             : void
     842        1128 : _hash_squeezebucket(Relation rel,
     843             :                     Bucket bucket,
     844             :                     BlockNumber bucket_blkno,
     845             :                     Buffer bucket_buf,
     846             :                     BufferAccessStrategy bstrategy)
     847             : {
     848             :     BlockNumber wblkno;
     849             :     BlockNumber rblkno;
     850             :     Buffer      wbuf;
     851             :     Buffer      rbuf;
     852             :     Page        wpage;
     853             :     Page        rpage;
     854             :     HashPageOpaque wopaque;
     855             :     HashPageOpaque ropaque;
     856             : 
     857             :     /*
     858             :      * start squeezing into the primary bucket page.
     859             :      */
     860        1128 :     wblkno = bucket_blkno;
     861        1128 :     wbuf = bucket_buf;
     862        1128 :     wpage = BufferGetPage(wbuf);
     863        1128 :     wopaque = HashPageGetOpaque(wpage);
     864             : 
     865             :     /*
     866             :      * if there aren't any overflow pages, there's nothing to squeeze. caller
     867             :      * is responsible for releasing the pin on primary bucket page.
     868             :      */
     869        1128 :     if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
     870             :     {
     871        1054 :         LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
     872        1054 :         return;
     873             :     }
     874             : 
     875             :     /*
     876             :      * Find the last page in the bucket chain by starting at the base bucket
     877             :      * page and working forward.  Note: we assume that a hash bucket chain is
     878             :      * usually smaller than the buffer ring being used by VACUUM, else using
     879             :      * the access strategy here would be counterproductive.
     880             :      */
     881          74 :     rbuf = InvalidBuffer;
     882          74 :     ropaque = wopaque;
     883             :     do
     884             :     {
     885         386 :         rblkno = ropaque->hasho_nextblkno;
     886         386 :         if (rbuf != InvalidBuffer)
     887         312 :             _hash_relbuf(rel, rbuf);
     888         386 :         rbuf = _hash_getbuf_with_strategy(rel,
     889             :                                           rblkno,
     890             :                                           HASH_WRITE,
     891             :                                           LH_OVERFLOW_PAGE,
     892             :                                           bstrategy);
     893         386 :         rpage = BufferGetPage(rbuf);
     894         386 :         ropaque = HashPageGetOpaque(rpage);
     895             :         Assert(ropaque->hasho_bucket == bucket);
     896         386 :     } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
     897             : 
     898             :     /*
     899             :      * squeeze the tuples.
     900             :      */
     901             :     for (;;)
     902         150 :     {
     903             :         OffsetNumber roffnum;
     904             :         OffsetNumber maxroffnum;
     905             :         OffsetNumber deletable[MaxOffsetNumber];
     906             :         IndexTuple  itups[MaxIndexTuplesPerPage];
     907             :         Size        tups_size[MaxIndexTuplesPerPage];
     908             :         OffsetNumber itup_offsets[MaxIndexTuplesPerPage];
     909         224 :         uint16      ndeletable = 0;
     910         224 :         uint16      nitups = 0;
     911         224 :         Size        all_tups_size = 0;
     912             :         int         i;
     913         224 :         bool        retain_pin = false;
     914             : 
     915         230 : readpage:
     916             :         /* Scan each tuple in "read" page */
     917         230 :         maxroffnum = PageGetMaxOffsetNumber(rpage);
     918        5518 :         for (roffnum = FirstOffsetNumber;
     919             :              roffnum <= maxroffnum;
     920        5288 :              roffnum = OffsetNumberNext(roffnum))
     921             :         {
     922             :             IndexTuple  itup;
     923             :             Size        itemsz;
     924             : 
     925             :             /* skip dead tuples */
     926        5306 :             if (ItemIdIsDead(PageGetItemId(rpage, roffnum)))
     927           0 :                 continue;
     928             : 
     929        5306 :             itup = (IndexTuple) PageGetItem(rpage,
     930             :                                             PageGetItemId(rpage, roffnum));
     931        5306 :             itemsz = IndexTupleSize(itup);
     932        5306 :             itemsz = MAXALIGN(itemsz);
     933             : 
     934             :             /*
     935             :              * Walk up the bucket chain, looking for a page big enough for
     936             :              * this item and all other accumulated items.  Exit if we reach
     937             :              * the read page.
     938             :              */
     939        5462 :             while (PageGetFreeSpaceForMultipleTuples(wpage, nitups + 1) < (all_tups_size + itemsz))
     940             :             {
     941         174 :                 Buffer      next_wbuf = InvalidBuffer;
     942         174 :                 bool        tups_moved = false;
     943             : 
     944             :                 Assert(!PageIsEmpty(wpage));
     945             : 
     946         174 :                 if (wblkno == bucket_blkno)
     947          30 :                     retain_pin = true;
     948             : 
     949         174 :                 wblkno = wopaque->hasho_nextblkno;
     950             :                 Assert(BlockNumberIsValid(wblkno));
     951             : 
     952             :                 /* don't need to move to next page if we reached the read page */
     953         174 :                 if (wblkno != rblkno)
     954         162 :                     next_wbuf = _hash_getbuf_with_strategy(rel,
     955             :                                                            wblkno,
     956             :                                                            HASH_WRITE,
     957             :                                                            LH_OVERFLOW_PAGE,
     958             :                                                            bstrategy);
     959             : 
     960         174 :                 if (nitups > 0)
     961             :                 {
     962             :                     Assert(nitups == ndeletable);
     963             : 
     964             :                     /*
     965             :                      * This operation needs to log multiple tuples, prepare
     966             :                      * WAL for that.
     967             :                      */
     968           6 :                     if (RelationNeedsWAL(rel))
     969           6 :                         XLogEnsureRecordSpace(0, 3 + nitups);
     970             : 
     971           6 :                     START_CRIT_SECTION();
     972             : 
     973             :                     /*
     974             :                      * we have to insert tuples on the "write" page, being
     975             :                      * careful to preserve hashkey ordering.  (If we insert
     976             :                      * many tuples into the same "write" page it would be
     977             :                      * worth qsort'ing them).
     978             :                      */
     979           6 :                     _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
     980           6 :                     MarkBufferDirty(wbuf);
     981             : 
     982             :                     /* Delete tuples we already moved off read page */
     983           6 :                     PageIndexMultiDelete(rpage, deletable, ndeletable);
     984           6 :                     MarkBufferDirty(rbuf);
     985             : 
     986             :                     /* XLOG stuff */
     987           6 :                     if (RelationNeedsWAL(rel))
     988             :                     {
     989             :                         XLogRecPtr  recptr;
     990             :                         xl_hash_move_page_contents xlrec;
     991             : 
     992           6 :                         xlrec.ntups = nitups;
     993           6 :                         xlrec.is_prim_bucket_same_wrt = (wbuf == bucket_buf);
     994             : 
     995           6 :                         XLogBeginInsert();
     996           6 :                         XLogRegisterData((char *) &xlrec, SizeOfHashMovePageContents);
     997             : 
     998             :                         /*
     999             :                          * bucket buffer was not changed, but still needs to
    1000             :                          * be registered to ensure that we can acquire a
    1001             :                          * cleanup lock on it during replay.
    1002             :                          */
    1003           6 :                         if (!xlrec.is_prim_bucket_same_wrt)
    1004             :                         {
    1005           0 :                             int         flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
    1006             : 
    1007           0 :                             XLogRegisterBuffer(0, bucket_buf, flags);
    1008             :                         }
    1009             : 
    1010           6 :                         XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
    1011           6 :                         XLogRegisterBufData(1, (char *) itup_offsets,
    1012             :                                             nitups * sizeof(OffsetNumber));
    1013        2058 :                         for (i = 0; i < nitups; i++)
    1014        2052 :                             XLogRegisterBufData(1, (char *) itups[i], tups_size[i]);
    1015             : 
    1016           6 :                         XLogRegisterBuffer(2, rbuf, REGBUF_STANDARD);
    1017           6 :                         XLogRegisterBufData(2, (char *) deletable,
    1018             :                                             ndeletable * sizeof(OffsetNumber));
    1019             : 
    1020           6 :                         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_MOVE_PAGE_CONTENTS);
    1021             : 
    1022           6 :                         PageSetLSN(BufferGetPage(wbuf), recptr);
    1023           6 :                         PageSetLSN(BufferGetPage(rbuf), recptr);
    1024             :                     }
    1025             : 
    1026           6 :                     END_CRIT_SECTION();
    1027             : 
    1028           6 :                     tups_moved = true;
    1029             :                 }
    1030             : 
    1031             :                 /*
    1032             :                  * release the lock on previous page after acquiring the lock
    1033             :                  * on next page
    1034             :                  */
    1035         174 :                 if (retain_pin)
    1036          30 :                     LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
    1037             :                 else
    1038         144 :                     _hash_relbuf(rel, wbuf);
    1039             : 
    1040             :                 /* nothing more to do if we reached the read page */
    1041         174 :                 if (rblkno == wblkno)
    1042             :                 {
    1043          12 :                     _hash_relbuf(rel, rbuf);
    1044          74 :                     return;
    1045             :                 }
    1046             : 
    1047         162 :                 wbuf = next_wbuf;
    1048         162 :                 wpage = BufferGetPage(wbuf);
    1049         162 :                 wopaque = HashPageGetOpaque(wpage);
    1050             :                 Assert(wopaque->hasho_bucket == bucket);
    1051         162 :                 retain_pin = false;
    1052             : 
    1053             :                 /* be tidy */
    1054        2214 :                 for (i = 0; i < nitups; i++)
    1055        2052 :                     pfree(itups[i]);
    1056         162 :                 nitups = 0;
    1057         162 :                 all_tups_size = 0;
    1058         162 :                 ndeletable = 0;
    1059             : 
    1060             :                 /*
    1061             :                  * after moving the tuples, rpage would have been compacted,
    1062             :                  * so we need to rescan it.
    1063             :                  */
    1064         162 :                 if (tups_moved)
    1065           6 :                     goto readpage;
    1066             :             }
    1067             : 
    1068             :             /* remember tuple for deletion from "read" page */
    1069        5288 :             deletable[ndeletable++] = roffnum;
    1070             : 
    1071             :             /*
    1072             :              * we need a copy of index tuples as they can be freed as part of
    1073             :              * overflow page, however we need them to write a WAL record in
    1074             :              * _hash_freeovflpage.
    1075             :              */
    1076        5288 :             itups[nitups] = CopyIndexTuple(itup);
    1077        5288 :             tups_size[nitups++] = itemsz;
    1078        5288 :             all_tups_size += itemsz;
    1079             :         }
    1080             : 
    1081             :         /*
    1082             :          * If we reach here, there are no live tuples on the "read" page ---
    1083             :          * it was empty when we got to it, or we moved them all.  So we can
    1084             :          * just free the page without bothering with deleting tuples
    1085             :          * individually.  Then advance to the previous "read" page.
    1086             :          *
    1087             :          * Tricky point here: if our read and write pages are adjacent in the
    1088             :          * bucket chain, our write lock on wbuf will conflict with
    1089             :          * _hash_freeovflpage's attempt to update the sibling links of the
    1090             :          * removed page.  In that case, we don't need to lock it again.
    1091             :          */
    1092         212 :         rblkno = ropaque->hasho_prevblkno;
    1093             :         Assert(BlockNumberIsValid(rblkno));
    1094             : 
    1095             :         /* free this overflow page (releases rbuf) */
    1096         212 :         _hash_freeovflpage(rel, bucket_buf, rbuf, wbuf, itups, itup_offsets,
    1097             :                            tups_size, nitups, bstrategy);
    1098             : 
    1099             :         /* be tidy */
    1100        3448 :         for (i = 0; i < nitups; i++)
    1101        3236 :             pfree(itups[i]);
    1102             : 
    1103             :         /* are we freeing the page adjacent to wbuf? */
    1104         212 :         if (rblkno == wblkno)
    1105             :         {
    1106             :             /* retain the pin on primary bucket page till end of bucket scan */
    1107          62 :             if (wblkno == bucket_blkno)
    1108          44 :                 LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
    1109             :             else
    1110          18 :                 _hash_relbuf(rel, wbuf);
    1111          62 :             return;
    1112             :         }
    1113             : 
    1114         150 :         rbuf = _hash_getbuf_with_strategy(rel,
    1115             :                                           rblkno,
    1116             :                                           HASH_WRITE,
    1117             :                                           LH_OVERFLOW_PAGE,
    1118             :                                           bstrategy);
    1119         150 :         rpage = BufferGetPage(rbuf);
    1120         150 :         ropaque = HashPageGetOpaque(rpage);
    1121             :         Assert(ropaque->hasho_bucket == bucket);
    1122             :     }
    1123             : 
    1124             :     /* NOTREACHED */
    1125             : }

Generated by: LCOV version 1.14