LCOV - code coverage report
Current view: top level - src/backend/access/hash - hashovfl.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 91.6 % 381 349
Test Date: 2026-02-17 17:20:33 Functions: 100.0 % 7 7
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * hashovfl.c
       4              :  *    Overflow page management code for the Postgres hash access method
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/access/hash/hashovfl.c
      12              :  *
      13              :  * NOTES
      14              :  *    Overflow pages look like ordinary relation pages.
      15              :  *
      16              :  *-------------------------------------------------------------------------
      17              :  */
      18              : #include "postgres.h"
      19              : 
      20              : #include "access/hash.h"
      21              : #include "access/hash_xlog.h"
      22              : #include "access/xloginsert.h"
      23              : #include "miscadmin.h"
      24              : #include "utils/rel.h"
      25              : 
      26              : 
      27              : static uint32 _hash_firstfreebit(uint32 map);
      28              : 
      29              : 
      30              : /*
      31              :  * Convert overflow page bit number (its index in the free-page bitmaps)
      32              :  * to block number within the index.
      33              :  */
      34              : static BlockNumber
      35          199 : bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
      36              : {
      37          199 :     uint32      splitnum = metap->hashm_ovflpoint;
      38              :     uint32      i;
      39              : 
      40              :     /* Convert zero-based bitnumber to 1-based page number */
      41          199 :     ovflbitnum += 1;
      42              : 
      43              :     /* Determine the split number for this page (must be >= 1) */
      44          199 :     for (i = 1;
      45         1089 :          i < splitnum && ovflbitnum > metap->hashm_spares[i];
      46          890 :          i++)
      47              :          /* loop */ ;
      48              : 
      49              :     /*
      50              :      * Convert to absolute page number by adding the number of bucket pages
      51              :      * that exist before this split point.
      52              :      */
      53          199 :     return (BlockNumber) (_hash_get_totalbuckets(i) + ovflbitnum);
      54              : }
      55              : 
      56              : /*
      57              :  * _hash_ovflblkno_to_bitno
      58              :  *
      59              :  * Convert overflow page block number to bit number for free-page bitmap.
      60              :  */
      61              : uint32
      62           98 : _hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
      63              : {
      64           98 :     uint32      splitnum = metap->hashm_ovflpoint;
      65              :     uint32      i;
      66              :     uint32      bitnum;
      67              : 
      68              :     /* Determine the split number containing this page */
      69          357 :     for (i = 1; i <= splitnum; i++)
      70              :     {
      71          357 :         if (ovflblkno <= (BlockNumber) _hash_get_totalbuckets(i))
      72            4 :             break;              /* oops */
      73          353 :         bitnum = ovflblkno - _hash_get_totalbuckets(i);
      74              : 
      75              :         /*
      76              :          * bitnum has to be greater than number of overflow page added in
      77              :          * previous split point. The overflow page at this splitnum (i) if any
      78              :          * should start from (_hash_get_totalbuckets(i) +
      79              :          * metap->hashm_spares[i - 1] + 1).
      80              :          */
      81          353 :         if (bitnum > metap->hashm_spares[i - 1] &&
      82          353 :             bitnum <= metap->hashm_spares[i])
      83           94 :             return bitnum - 1;  /* -1 to convert 1-based to 0-based */
      84              :     }
      85              : 
      86            4 :     ereport(ERROR,
      87              :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
      88              :              errmsg("invalid overflow block number %u", ovflblkno)));
      89              :     return 0;                   /* keep compiler quiet */
      90              : }
      91              : 
      92              : /*
      93              :  *  _hash_addovflpage
      94              :  *
      95              :  *  Add an overflow page to the bucket whose last page is pointed to by 'buf'.
      96              :  *
      97              :  *  On entry, the caller must hold a pin but no lock on 'buf'.  The pin is
      98              :  *  dropped before exiting (we assume the caller is not interested in 'buf'
      99              :  *  anymore) if not asked to retain.  The pin will be retained only for the
     100              :  *  primary bucket.  The returned overflow page will be pinned and
     101              :  *  write-locked; it is guaranteed to be empty.
     102              :  *
     103              :  *  The caller must hold a pin, but no lock, on the metapage buffer.
     104              :  *  That buffer is returned in the same state.
     105              :  *
     106              :  * NB: since this could be executed concurrently by multiple processes,
     107              :  * one should not assume that the returned overflow page will be the
     108              :  * immediate successor of the originally passed 'buf'.  Additional overflow
     109              :  * pages might have been added to the bucket chain in between.
     110              :  */
     111              : Buffer
     112          199 : _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
     113              : {
     114              :     Buffer      ovflbuf;
     115              :     Page        page;
     116              :     Page        ovflpage;
     117              :     HashPageOpaque pageopaque;
     118              :     HashPageOpaque ovflopaque;
     119              :     HashMetaPage metap;
     120          199 :     Buffer      mapbuf = InvalidBuffer;
     121          199 :     Buffer      newmapbuf = InvalidBuffer;
     122              :     BlockNumber blkno;
     123              :     uint32      orig_firstfree;
     124              :     uint32      splitnum;
     125          199 :     uint32     *freep = NULL;
     126              :     uint32      max_ovflpg;
     127              :     uint32      bit;
     128              :     uint32      bitmap_page_bit;
     129              :     uint32      first_page;
     130              :     uint32      last_bit;
     131              :     uint32      last_page;
     132              :     uint32      i,
     133              :                 j;
     134          199 :     bool        page_found = false;
     135              : 
     136              :     /*
     137              :      * Write-lock the tail page.  Here, we need to maintain locking order such
     138              :      * that, first acquire the lock on tail page of bucket, then on meta page
     139              :      * to find and lock the bitmap page and if it is found, then lock on meta
     140              :      * page is released, then finally acquire the lock on new overflow buffer.
     141              :      * We need this locking order to avoid deadlock with backends that are
     142              :      * doing inserts.
     143              :      *
     144              :      * Note: We could have avoided locking many buffers here if we made two
     145              :      * WAL records for acquiring an overflow page (one to allocate an overflow
     146              :      * page and another to add it to overflow bucket chain).  However, doing
     147              :      * so can leak an overflow page, if the system crashes after allocation.
     148              :      * Needless to say, it is better to have a single record from a
     149              :      * performance point of view as well.
     150              :      */
     151          199 :     LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     152              : 
     153              :     /* probably redundant... */
     154          199 :     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
     155              : 
     156              :     /* loop to find current tail page, in case someone else inserted too */
     157              :     for (;;)
     158            0 :     {
     159              :         BlockNumber nextblkno;
     160              : 
     161          199 :         page = BufferGetPage(buf);
     162          199 :         pageopaque = HashPageGetOpaque(page);
     163          199 :         nextblkno = pageopaque->hasho_nextblkno;
     164              : 
     165          199 :         if (!BlockNumberIsValid(nextblkno))
     166          199 :             break;
     167              : 
     168              :         /* we assume we do not need to write the unmodified page */
     169            0 :         if (retain_pin)
     170              :         {
     171              :             /* pin will be retained only for the primary bucket page */
     172              :             Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_BUCKET_PAGE);
     173            0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     174              :         }
     175              :         else
     176            0 :             _hash_relbuf(rel, buf);
     177              : 
     178            0 :         retain_pin = false;
     179              : 
     180            0 :         buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
     181              :     }
     182              : 
     183              :     /* Get exclusive lock on the meta page */
     184          199 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     185              : 
     186          199 :     _hash_checkpage(rel, metabuf, LH_META_PAGE);
     187          199 :     metap = HashPageGetMeta(BufferGetPage(metabuf));
     188              : 
     189              :     /* start search at hashm_firstfree */
     190          199 :     orig_firstfree = metap->hashm_firstfree;
     191          199 :     first_page = orig_firstfree >> BMPG_SHIFT(metap);
     192          199 :     bit = orig_firstfree & BMPG_MASK(metap);
     193          199 :     i = first_page;
     194          199 :     j = bit / BITS_PER_MAP;
     195          199 :     bit &= ~(BITS_PER_MAP - 1);
     196              : 
     197              :     /* outer loop iterates once per bitmap page */
     198              :     for (;;)
     199          165 :     {
     200              :         BlockNumber mapblkno;
     201              :         Page        mappage;
     202              :         uint32      last_inpage;
     203              : 
     204              :         /* want to end search with the last existing overflow page */
     205          364 :         splitnum = metap->hashm_ovflpoint;
     206          364 :         max_ovflpg = metap->hashm_spares[splitnum] - 1;
     207          364 :         last_page = max_ovflpg >> BMPG_SHIFT(metap);
     208          364 :         last_bit = max_ovflpg & BMPG_MASK(metap);
     209              : 
     210          364 :         if (i > last_page)
     211          165 :             break;
     212              : 
     213              :         Assert(i < metap->hashm_nmaps);
     214          199 :         mapblkno = metap->hashm_mapp[i];
     215              : 
     216          199 :         if (i == last_page)
     217          199 :             last_inpage = last_bit;
     218              :         else
     219            0 :             last_inpage = BMPGSZ_BIT(metap) - 1;
     220              : 
     221              :         /* Release exclusive lock on metapage while reading bitmap page */
     222          199 :         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     223              : 
     224          199 :         mapbuf = _hash_getbuf(rel, mapblkno, HASH_WRITE, LH_BITMAP_PAGE);
     225          199 :         mappage = BufferGetPage(mapbuf);
     226          199 :         freep = HashPageGetBitmap(mappage);
     227              : 
     228          364 :         for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
     229              :         {
     230          199 :             if (freep[j] != ALL_SET)
     231              :             {
     232           34 :                 page_found = true;
     233              : 
     234              :                 /* Reacquire exclusive lock on the meta page */
     235           34 :                 LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     236              : 
     237              :                 /* convert bit to bit number within page */
     238           34 :                 bit += _hash_firstfreebit(freep[j]);
     239           34 :                 bitmap_page_bit = bit;
     240              : 
     241              :                 /* convert bit to absolute bit number */
     242           34 :                 bit += (i << BMPG_SHIFT(metap));
     243              :                 /* Calculate address of the recycled overflow page */
     244           34 :                 blkno = bitno_to_blkno(metap, bit);
     245              : 
     246              :                 /* Fetch and init the recycled page */
     247           34 :                 ovflbuf = _hash_getinitbuf(rel, blkno);
     248              : 
     249           34 :                 goto found;
     250              :             }
     251              :         }
     252              : 
     253              :         /* No free space here, try to advance to next map page */
     254          165 :         _hash_relbuf(rel, mapbuf);
     255          165 :         mapbuf = InvalidBuffer;
     256          165 :         i++;
     257          165 :         j = 0;                  /* scan from start of next map page */
     258          165 :         bit = 0;
     259              : 
     260              :         /* Reacquire exclusive lock on the meta page */
     261          165 :         LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     262              :     }
     263              : 
     264              :     /*
     265              :      * No free pages --- have to extend the relation to add an overflow page.
     266              :      * First, check to see if we have to add a new bitmap page too.
     267              :      */
     268          165 :     if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
     269              :     {
     270              :         /*
     271              :          * We create the new bitmap page with all pages marked "in use".
     272              :          * Actually two pages in the new bitmap's range will exist
     273              :          * immediately: the bitmap page itself, and the following page which
     274              :          * is the one we return to the caller.  Both of these are correctly
     275              :          * marked "in use".  Subsequent pages do not exist yet, but it is
     276              :          * convenient to pre-mark them as "in use" too.
     277              :          */
     278            0 :         bit = metap->hashm_spares[splitnum];
     279              : 
     280              :         /* metapage already has a write lock */
     281            0 :         if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
     282            0 :             ereport(ERROR,
     283              :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     284              :                      errmsg("out of overflow pages in hash index \"%s\"",
     285              :                             RelationGetRelationName(rel))));
     286              : 
     287            0 :         newmapbuf = _hash_getnewbuf(rel, bitno_to_blkno(metap, bit), MAIN_FORKNUM);
     288              :     }
     289              :     else
     290              :     {
     291              :         /*
     292              :          * Nothing to do here; since the page will be past the last used page,
     293              :          * we know its bitmap bit was preinitialized to "in use".
     294              :          */
     295              :     }
     296              : 
     297              :     /* Calculate address of the new overflow page */
     298          165 :     bit = BufferIsValid(newmapbuf) ?
     299          165 :         metap->hashm_spares[splitnum] + 1 : metap->hashm_spares[splitnum];
     300          165 :     blkno = bitno_to_blkno(metap, bit);
     301              : 
     302              :     /*
     303              :      * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
     304              :      * relation length stays in sync with ours.  XXX It's annoying to do this
     305              :      * with metapage write lock held; would be better to use a lock that
     306              :      * doesn't block incoming searches.
     307              :      *
     308              :      * It is okay to hold two buffer locks here (one on tail page of bucket
     309              :      * and other on new overflow page) since there cannot be anyone else
     310              :      * contending for access to ovflbuf.
     311              :      */
     312          165 :     ovflbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
     313              : 
     314          199 : found:
     315              : 
     316              :     /*
     317              :      * Do the update.  No ereport(ERROR) until changes are logged. We want to
     318              :      * log the changes for bitmap page and overflow page together to avoid
     319              :      * loss of pages in case the new page is added.
     320              :      */
     321          199 :     START_CRIT_SECTION();
     322              : 
     323          199 :     if (page_found)
     324              :     {
     325              :         Assert(BufferIsValid(mapbuf));
     326              : 
     327              :         /* mark page "in use" in the bitmap */
     328           34 :         SETBIT(freep, bitmap_page_bit);
     329           34 :         MarkBufferDirty(mapbuf);
     330              :     }
     331              :     else
     332              :     {
     333              :         /* update the count to indicate new overflow page is added */
     334          165 :         metap->hashm_spares[splitnum]++;
     335              : 
     336          165 :         if (BufferIsValid(newmapbuf))
     337              :         {
     338            0 :             _hash_initbitmapbuffer(newmapbuf, metap->hashm_bmsize, false);
     339            0 :             MarkBufferDirty(newmapbuf);
     340              : 
     341              :             /* add the new bitmap page to the metapage's list of bitmaps */
     342            0 :             metap->hashm_mapp[metap->hashm_nmaps] = BufferGetBlockNumber(newmapbuf);
     343            0 :             metap->hashm_nmaps++;
     344            0 :             metap->hashm_spares[splitnum]++;
     345              :         }
     346              : 
     347          165 :         MarkBufferDirty(metabuf);
     348              : 
     349              :         /*
     350              :          * for new overflow page, we don't need to explicitly set the bit in
     351              :          * bitmap page, as by default that will be set to "in use".
     352              :          */
     353              :     }
     354              : 
     355              :     /*
     356              :      * Adjust hashm_firstfree to avoid redundant searches.  But don't risk
     357              :      * changing it if someone moved it while we were searching bitmap pages.
     358              :      */
     359          199 :     if (metap->hashm_firstfree == orig_firstfree)
     360              :     {
     361          199 :         metap->hashm_firstfree = bit + 1;
     362          199 :         MarkBufferDirty(metabuf);
     363              :     }
     364              : 
     365              :     /* initialize new overflow page */
     366          199 :     ovflpage = BufferGetPage(ovflbuf);
     367          199 :     ovflopaque = HashPageGetOpaque(ovflpage);
     368          199 :     ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
     369          199 :     ovflopaque->hasho_nextblkno = InvalidBlockNumber;
     370          199 :     ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
     371          199 :     ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
     372          199 :     ovflopaque->hasho_page_id = HASHO_PAGE_ID;
     373              : 
     374          199 :     MarkBufferDirty(ovflbuf);
     375              : 
     376              :     /* logically chain overflow page to previous page */
     377          199 :     pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
     378              : 
     379          199 :     MarkBufferDirty(buf);
     380              : 
     381              :     /* XLOG stuff */
     382          199 :     if (RelationNeedsWAL(rel))
     383              :     {
     384              :         XLogRecPtr  recptr;
     385              :         xl_hash_add_ovfl_page xlrec;
     386              : 
     387          199 :         xlrec.bmpage_found = page_found;
     388          199 :         xlrec.bmsize = metap->hashm_bmsize;
     389              : 
     390          199 :         XLogBeginInsert();
     391          199 :         XLogRegisterData(&xlrec, SizeOfHashAddOvflPage);
     392              : 
     393          199 :         XLogRegisterBuffer(0, ovflbuf, REGBUF_WILL_INIT);
     394          199 :         XLogRegisterBufData(0, &pageopaque->hasho_bucket, sizeof(Bucket));
     395              : 
     396          199 :         XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
     397              : 
     398          199 :         if (BufferIsValid(mapbuf))
     399              :         {
     400           34 :             XLogRegisterBuffer(2, mapbuf, REGBUF_STANDARD);
     401           34 :             XLogRegisterBufData(2, &bitmap_page_bit, sizeof(uint32));
     402              :         }
     403              : 
     404          199 :         if (BufferIsValid(newmapbuf))
     405            0 :             XLogRegisterBuffer(3, newmapbuf, REGBUF_WILL_INIT);
     406              : 
     407          199 :         XLogRegisterBuffer(4, metabuf, REGBUF_STANDARD);
     408          199 :         XLogRegisterBufData(4, &metap->hashm_firstfree, sizeof(uint32));
     409              : 
     410          199 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_ADD_OVFL_PAGE);
     411              : 
     412          199 :         PageSetLSN(BufferGetPage(ovflbuf), recptr);
     413          199 :         PageSetLSN(BufferGetPage(buf), recptr);
     414              : 
     415          199 :         if (BufferIsValid(mapbuf))
     416           34 :             PageSetLSN(BufferGetPage(mapbuf), recptr);
     417              : 
     418          199 :         if (BufferIsValid(newmapbuf))
     419            0 :             PageSetLSN(BufferGetPage(newmapbuf), recptr);
     420              : 
     421          199 :         PageSetLSN(BufferGetPage(metabuf), recptr);
     422              :     }
     423              : 
     424          199 :     END_CRIT_SECTION();
     425              : 
     426          199 :     if (retain_pin)
     427           55 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     428              :     else
     429          144 :         _hash_relbuf(rel, buf);
     430              : 
     431          199 :     if (BufferIsValid(mapbuf))
     432           34 :         _hash_relbuf(rel, mapbuf);
     433              : 
     434          199 :     LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     435              : 
     436          199 :     if (BufferIsValid(newmapbuf))
     437            0 :         _hash_relbuf(rel, newmapbuf);
     438              : 
     439          199 :     return ovflbuf;
     440              : }
     441              : 
     442              : /*
     443              :  *  _hash_firstfreebit()
     444              :  *
     445              :  *  Return the number of the first bit that is not set in the word 'map'.
     446              :  */
     447              : static uint32
     448           34 : _hash_firstfreebit(uint32 map)
     449              : {
     450              :     uint32      i,
     451              :                 mask;
     452              : 
     453           34 :     mask = 0x1;
     454          287 :     for (i = 0; i < BITS_PER_MAP; i++)
     455              :     {
     456          287 :         if (!(mask & map))
     457           34 :             return i;
     458          253 :         mask <<= 1;
     459              :     }
     460              : 
     461            0 :     elog(ERROR, "firstfreebit found no free bit");
     462              : 
     463              :     return 0;                   /* keep compiler quiet */
     464              : }
     465              : 
     466              : /*
     467              :  *  _hash_freeovflpage() -
     468              :  *
     469              :  *  Remove this overflow page from its bucket's chain, and mark the page as
     470              :  *  free.  On entry, ovflbuf is write-locked; it is released before exiting.
     471              :  *
     472              :  *  Add the tuples (itups) to wbuf in this function.  We could do that in the
     473              :  *  caller as well, but the advantage of doing it here is we can easily write
     474              :  *  the WAL for XLOG_HASH_SQUEEZE_PAGE operation.  Addition of tuples and
     475              :  *  removal of overflow page has to done as an atomic operation, otherwise
     476              :  *  during replay on standby users might find duplicate records.
     477              :  *
     478              :  *  Since this function is invoked in VACUUM, we provide an access strategy
     479              :  *  parameter that controls fetches of the bucket pages.
     480              :  *
     481              :  *  Returns the block number of the page that followed the given page
     482              :  *  in the bucket, or InvalidBlockNumber if no following page.
     483              :  *
     484              :  *  NB: caller must not hold lock on metapage, nor on page, that's next to
     485              :  *  ovflbuf in the bucket chain.  We don't acquire the lock on page that's
     486              :  *  prior to ovflbuf in chain if it is same as wbuf because the caller already
     487              :  *  has a lock on same.
     488              :  */
     489              : BlockNumber
     490           94 : _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
     491              :                    Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets,
     492              :                    Size *tups_size, uint16 nitups,
     493              :                    BufferAccessStrategy bstrategy)
     494              : {
     495              :     HashMetaPage metap;
     496              :     Buffer      metabuf;
     497              :     Buffer      mapbuf;
     498              :     BlockNumber ovflblkno;
     499              :     BlockNumber prevblkno;
     500              :     BlockNumber blkno;
     501              :     BlockNumber nextblkno;
     502              :     BlockNumber writeblkno;
     503              :     HashPageOpaque ovflopaque;
     504              :     Page        ovflpage;
     505              :     Page        mappage;
     506              :     uint32     *freep;
     507              :     uint32      ovflbitno;
     508              :     int32       bitmappage,
     509              :                 bitmapbit;
     510              :     Bucket      bucket PG_USED_FOR_ASSERTS_ONLY;
     511           94 :     Buffer      prevbuf = InvalidBuffer;
     512           94 :     Buffer      nextbuf = InvalidBuffer;
     513           94 :     bool        update_metap = false;
     514              : 
     515              :     /* Get information from the doomed page */
     516           94 :     _hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
     517           94 :     ovflblkno = BufferGetBlockNumber(ovflbuf);
     518           94 :     ovflpage = BufferGetPage(ovflbuf);
     519           94 :     ovflopaque = HashPageGetOpaque(ovflpage);
     520           94 :     nextblkno = ovflopaque->hasho_nextblkno;
     521           94 :     prevblkno = ovflopaque->hasho_prevblkno;
     522           94 :     writeblkno = BufferGetBlockNumber(wbuf);
     523           94 :     bucket = ovflopaque->hasho_bucket;
     524              : 
     525              :     /*
     526              :      * Fix up the bucket chain.  this is a doubly-linked list, so we must fix
     527              :      * up the bucket chain members behind and ahead of the overflow page being
     528              :      * deleted.  Concurrency issues are avoided by using lock chaining as
     529              :      * described atop hashbucketcleanup.
     530              :      */
     531           94 :     if (BlockNumberIsValid(prevblkno))
     532              :     {
     533           94 :         if (prevblkno == writeblkno)
     534           31 :             prevbuf = wbuf;
     535              :         else
     536           63 :             prevbuf = _hash_getbuf_with_strategy(rel,
     537              :                                                  prevblkno,
     538              :                                                  HASH_WRITE,
     539              :                                                  LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
     540              :                                                  bstrategy);
     541              :     }
     542           94 :     if (BlockNumberIsValid(nextblkno))
     543            0 :         nextbuf = _hash_getbuf_with_strategy(rel,
     544              :                                              nextblkno,
     545              :                                              HASH_WRITE,
     546              :                                              LH_OVERFLOW_PAGE,
     547              :                                              bstrategy);
     548              : 
     549              :     /* Note: bstrategy is intentionally not used for metapage and bitmap */
     550              : 
     551              :     /* Read the metapage so we can determine which bitmap page to use */
     552           94 :     metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
     553           94 :     metap = HashPageGetMeta(BufferGetPage(metabuf));
     554              : 
     555              :     /* Identify which bit to set */
     556           94 :     ovflbitno = _hash_ovflblkno_to_bitno(metap, ovflblkno);
     557              : 
     558           94 :     bitmappage = ovflbitno >> BMPG_SHIFT(metap);
     559           94 :     bitmapbit = ovflbitno & BMPG_MASK(metap);
     560              : 
     561           94 :     if (bitmappage >= metap->hashm_nmaps)
     562            0 :         elog(ERROR, "invalid overflow bit number %u", ovflbitno);
     563           94 :     blkno = metap->hashm_mapp[bitmappage];
     564              : 
     565              :     /* Release metapage lock while we access the bitmap page */
     566           94 :     LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     567              : 
     568              :     /* read the bitmap page to clear the bitmap bit */
     569           94 :     mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BITMAP_PAGE);
     570           94 :     mappage = BufferGetPage(mapbuf);
     571           94 :     freep = HashPageGetBitmap(mappage);
     572              :     Assert(ISSET(freep, bitmapbit));
     573              : 
     574              :     /* Get write-lock on metapage to update firstfree */
     575           94 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     576              : 
     577              :     /* This operation needs to log multiple tuples, prepare WAL for that */
     578           94 :     if (RelationNeedsWAL(rel))
     579           94 :         XLogEnsureRecordSpace(HASH_XLOG_FREE_OVFL_BUFS, 4 + nitups);
     580              : 
     581           94 :     START_CRIT_SECTION();
     582              : 
     583              :     /*
     584              :      * we have to insert tuples on the "write" page, being careful to preserve
     585              :      * hashkey ordering.  (If we insert many tuples into the same "write" page
     586              :      * it would be worth qsort'ing them).
     587              :      */
     588           94 :     if (nitups > 0)
     589              :     {
     590           43 :         _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
     591           43 :         MarkBufferDirty(wbuf);
     592              :     }
     593              : 
     594              :     /*
     595              :      * Reinitialize the freed overflow page.  Just zeroing the page won't
     596              :      * work, because WAL replay routines expect pages to be initialized. See
     597              :      * explanation of RBM_NORMAL mode atop XLogReadBufferExtended.  We are
     598              :      * careful to make the special space valid here so that tools like
     599              :      * pageinspect won't get confused.
     600              :      */
     601           94 :     _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
     602              : 
     603           94 :     ovflopaque = HashPageGetOpaque(ovflpage);
     604              : 
     605           94 :     ovflopaque->hasho_prevblkno = InvalidBlockNumber;
     606           94 :     ovflopaque->hasho_nextblkno = InvalidBlockNumber;
     607           94 :     ovflopaque->hasho_bucket = InvalidBucket;
     608           94 :     ovflopaque->hasho_flag = LH_UNUSED_PAGE;
     609           94 :     ovflopaque->hasho_page_id = HASHO_PAGE_ID;
     610              : 
     611           94 :     MarkBufferDirty(ovflbuf);
     612              : 
     613           94 :     if (BufferIsValid(prevbuf))
     614              :     {
     615           94 :         Page        prevpage = BufferGetPage(prevbuf);
     616           94 :         HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
     617              : 
     618              :         Assert(prevopaque->hasho_bucket == bucket);
     619           94 :         prevopaque->hasho_nextblkno = nextblkno;
     620           94 :         MarkBufferDirty(prevbuf);
     621              :     }
     622           94 :     if (BufferIsValid(nextbuf))
     623              :     {
     624            0 :         Page        nextpage = BufferGetPage(nextbuf);
     625            0 :         HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
     626              : 
     627              :         Assert(nextopaque->hasho_bucket == bucket);
     628            0 :         nextopaque->hasho_prevblkno = prevblkno;
     629            0 :         MarkBufferDirty(nextbuf);
     630              :     }
     631              : 
     632              :     /* Clear the bitmap bit to indicate that this overflow page is free */
     633           94 :     CLRBIT(freep, bitmapbit);
     634           94 :     MarkBufferDirty(mapbuf);
     635              : 
     636              :     /* if this is now the first free page, update hashm_firstfree */
     637           94 :     if (ovflbitno < metap->hashm_firstfree)
     638              :     {
     639           91 :         metap->hashm_firstfree = ovflbitno;
     640           91 :         update_metap = true;
     641           91 :         MarkBufferDirty(metabuf);
     642              :     }
     643              : 
     644              :     /* XLOG stuff */
     645           94 :     if (RelationNeedsWAL(rel))
     646              :     {
     647              :         xl_hash_squeeze_page xlrec;
     648              :         XLogRecPtr  recptr;
     649              :         int         i;
     650           94 :         bool        mod_wbuf = false;
     651              : 
     652           94 :         xlrec.prevblkno = prevblkno;
     653           94 :         xlrec.nextblkno = nextblkno;
     654           94 :         xlrec.ntups = nitups;
     655           94 :         xlrec.is_prim_bucket_same_wrt = (wbuf == bucketbuf);
     656           94 :         xlrec.is_prev_bucket_same_wrt = (wbuf == prevbuf);
     657              : 
     658           94 :         XLogBeginInsert();
     659           94 :         XLogRegisterData(&xlrec, SizeOfHashSqueezePage);
     660              : 
     661              :         /*
     662              :          * bucket buffer was not changed, but still needs to be registered to
     663              :          * ensure that we can acquire a cleanup lock on it during replay.
     664              :          */
     665           94 :         if (!xlrec.is_prim_bucket_same_wrt)
     666              :         {
     667           24 :             uint8       flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
     668              : 
     669           24 :             XLogRegisterBuffer(0, bucketbuf, flags);
     670              :         }
     671              : 
     672           94 :         if (xlrec.ntups > 0)
     673              :         {
     674           43 :             XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
     675              : 
     676              :             /* Remember that wbuf is modified. */
     677           43 :             mod_wbuf = true;
     678              : 
     679           43 :             XLogRegisterBufData(1, itup_offsets,
     680              :                                 nitups * sizeof(OffsetNumber));
     681         1607 :             for (i = 0; i < nitups; i++)
     682         1564 :                 XLogRegisterBufData(1, itups[i], tups_size[i]);
     683              :         }
     684           51 :         else if (xlrec.is_prim_bucket_same_wrt || xlrec.is_prev_bucket_same_wrt)
     685              :         {
     686              :             uint8       wbuf_flags;
     687              : 
     688              :             /*
     689              :              * A write buffer needs to be registered even if no tuples are
     690              :              * added to it to ensure that we can acquire a cleanup lock on it
     691              :              * if it is the same as primary bucket buffer or update the
     692              :              * nextblkno if it is same as the previous bucket buffer.
     693              :              */
     694              :             Assert(xlrec.ntups == 0);
     695              : 
     696           48 :             wbuf_flags = REGBUF_STANDARD;
     697           48 :             if (!xlrec.is_prev_bucket_same_wrt)
     698              :             {
     699           39 :                 wbuf_flags |= REGBUF_NO_CHANGE;
     700              :             }
     701              :             else
     702              :             {
     703              :                 /* Remember that wbuf is modified. */
     704            9 :                 mod_wbuf = true;
     705              :             }
     706           48 :             XLogRegisterBuffer(1, wbuf, wbuf_flags);
     707              :         }
     708              : 
     709           94 :         XLogRegisterBuffer(2, ovflbuf, REGBUF_STANDARD);
     710              : 
     711              :         /*
     712              :          * If prevpage and the writepage (block in which we are moving tuples
     713              :          * from overflow) are same, then no need to separately register
     714              :          * prevpage.  During replay, we can directly update the nextblock in
     715              :          * writepage.
     716              :          */
     717           94 :         if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
     718           63 :             XLogRegisterBuffer(3, prevbuf, REGBUF_STANDARD);
     719              : 
     720           94 :         if (BufferIsValid(nextbuf))
     721            0 :             XLogRegisterBuffer(4, nextbuf, REGBUF_STANDARD);
     722              : 
     723           94 :         XLogRegisterBuffer(5, mapbuf, REGBUF_STANDARD);
     724           94 :         XLogRegisterBufData(5, &bitmapbit, sizeof(uint32));
     725              : 
     726           94 :         if (update_metap)
     727              :         {
     728           91 :             XLogRegisterBuffer(6, metabuf, REGBUF_STANDARD);
     729           91 :             XLogRegisterBufData(6, &metap->hashm_firstfree, sizeof(uint32));
     730              :         }
     731              : 
     732           94 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SQUEEZE_PAGE);
     733              : 
     734              :         /* Set LSN iff wbuf is modified. */
     735           94 :         if (mod_wbuf)
     736           52 :             PageSetLSN(BufferGetPage(wbuf), recptr);
     737              : 
     738           94 :         PageSetLSN(BufferGetPage(ovflbuf), recptr);
     739              : 
     740           94 :         if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
     741           63 :             PageSetLSN(BufferGetPage(prevbuf), recptr);
     742           94 :         if (BufferIsValid(nextbuf))
     743            0 :             PageSetLSN(BufferGetPage(nextbuf), recptr);
     744              : 
     745           94 :         PageSetLSN(BufferGetPage(mapbuf), recptr);
     746              : 
     747           94 :         if (update_metap)
     748           91 :             PageSetLSN(BufferGetPage(metabuf), recptr);
     749              :     }
     750              : 
     751           94 :     END_CRIT_SECTION();
     752              : 
     753              :     /* release previous bucket if it is not same as write bucket */
     754           94 :     if (BufferIsValid(prevbuf) && prevblkno != writeblkno)
     755           63 :         _hash_relbuf(rel, prevbuf);
     756              : 
     757           94 :     if (BufferIsValid(ovflbuf))
     758           94 :         _hash_relbuf(rel, ovflbuf);
     759              : 
     760           94 :     if (BufferIsValid(nextbuf))
     761            0 :         _hash_relbuf(rel, nextbuf);
     762              : 
     763           94 :     _hash_relbuf(rel, mapbuf);
     764           94 :     _hash_relbuf(rel, metabuf);
     765              : 
     766           94 :     return nextblkno;
     767              : }
     768              : 
     769              : 
     770              : /*
     771              :  *  _hash_initbitmapbuffer()
     772              :  *
     773              :  *   Initialize a new bitmap page.  All bits in the new bitmap page are set to
     774              :  *   "1", indicating "in use".
     775              :  */
     776              : void
     777          204 : _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
     778              : {
     779              :     Page        pg;
     780              :     HashPageOpaque op;
     781              :     uint32     *freep;
     782              : 
     783          204 :     pg = BufferGetPage(buf);
     784              : 
     785              :     /* initialize the page */
     786          204 :     if (initpage)
     787           28 :         _hash_pageinit(pg, BufferGetPageSize(buf));
     788              : 
     789              :     /* initialize the page's special space */
     790          204 :     op = HashPageGetOpaque(pg);
     791          204 :     op->hasho_prevblkno = InvalidBlockNumber;
     792          204 :     op->hasho_nextblkno = InvalidBlockNumber;
     793          204 :     op->hasho_bucket = InvalidBucket;
     794          204 :     op->hasho_flag = LH_BITMAP_PAGE;
     795          204 :     op->hasho_page_id = HASHO_PAGE_ID;
     796              : 
     797              :     /* set all of the bits to 1 */
     798          204 :     freep = HashPageGetBitmap(pg);
     799          204 :     memset(freep, 0xFF, bmsize);
     800              : 
     801              :     /*
     802              :      * Set pd_lower just past the end of the bitmap page data.  We could even
     803              :      * set pd_lower equal to pd_upper, but this is more precise and makes the
     804              :      * page look compressible to xlog.c.
     805              :      */
     806          204 :     ((PageHeader) pg)->pd_lower = ((char *) freep + bmsize) - (char *) pg;
     807          204 : }
     808              : 
     809              : 
     810              : /*
     811              :  *  _hash_squeezebucket(rel, bucket)
     812              :  *
     813              :  *  Try to squeeze the tuples onto pages occurring earlier in the
     814              :  *  bucket chain in an attempt to free overflow pages. When we start
     815              :  *  the "squeezing", the page from which we start taking tuples (the
     816              :  *  "read" page) is the last bucket in the bucket chain and the page
     817              :  *  onto which we start squeezing tuples (the "write" page) is the
     818              :  *  first page in the bucket chain.  The read page works backward and
     819              :  *  the write page works forward; the procedure terminates when the
     820              :  *  read page and write page are the same page.
     821              :  *
     822              :  *  At completion of this procedure, it is guaranteed that all pages in
     823              :  *  the bucket are nonempty, unless the bucket is totally empty (in
     824              :  *  which case all overflow pages will be freed).  The original implementation
     825              :  *  required that to be true on entry as well, but it's a lot easier for
     826              :  *  callers to leave empty overflow pages and let this guy clean it up.
     827              :  *
     828              :  *  Caller must acquire cleanup lock on the primary page of the target
     829              :  *  bucket to exclude any scans that are in progress, which could easily
     830              :  *  be confused into returning the same tuple more than once or some tuples
     831              :  *  not at all by the rearrangement we are performing here.  To prevent
     832              :  *  any concurrent scan to cross the squeeze scan we use lock chaining
     833              :  *  similar to hashbucketcleanup.  Refer comments atop hashbucketcleanup.
     834              :  *
     835              :  *  We need to retain a pin on the primary bucket to ensure that no concurrent
     836              :  *  split can start.
     837              :  *
     838              :  *  Since this function is invoked in VACUUM, we provide an access strategy
     839              :  *  parameter that controls fetches of the bucket pages.
     840              :  */
     841              : void
     842          693 : _hash_squeezebucket(Relation rel,
     843              :                     Bucket bucket,
     844              :                     BlockNumber bucket_blkno,
     845              :                     Buffer bucket_buf,
     846              :                     BufferAccessStrategy bstrategy)
     847              : {
     848              :     BlockNumber wblkno;
     849              :     BlockNumber rblkno;
     850              :     Buffer      wbuf;
     851              :     Buffer      rbuf;
     852              :     Page        wpage;
     853              :     Page        rpage;
     854              :     HashPageOpaque wopaque;
     855              :     HashPageOpaque ropaque;
     856              : 
     857              :     /*
     858              :      * start squeezing into the primary bucket page.
     859              :      */
     860          693 :     wblkno = bucket_blkno;
     861          693 :     wbuf = bucket_buf;
     862          693 :     wpage = BufferGetPage(wbuf);
     863          693 :     wopaque = HashPageGetOpaque(wpage);
     864              : 
     865              :     /*
     866              :      * if there aren't any overflow pages, there's nothing to squeeze. caller
     867              :      * is responsible for releasing the pin on primary bucket page.
     868              :      */
     869          693 :     if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
     870              :     {
     871          656 :         LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
     872          656 :         return;
     873              :     }
     874              : 
     875              :     /*
     876              :      * Find the last page in the bucket chain by starting at the base bucket
     877              :      * page and working forward.  Note: we assume that a hash bucket chain is
     878              :      * usually smaller than the buffer ring being used by VACUUM, else using
     879              :      * the access strategy here would be counterproductive.
     880              :      */
     881           37 :     rbuf = InvalidBuffer;
     882           37 :     ropaque = wopaque;
     883              :     do
     884              :     {
     885          181 :         rblkno = ropaque->hasho_nextblkno;
     886          181 :         if (rbuf != InvalidBuffer)
     887          144 :             _hash_relbuf(rel, rbuf);
     888          181 :         rbuf = _hash_getbuf_with_strategy(rel,
     889              :                                           rblkno,
     890              :                                           HASH_WRITE,
     891              :                                           LH_OVERFLOW_PAGE,
     892              :                                           bstrategy);
     893          181 :         rpage = BufferGetPage(rbuf);
     894          181 :         ropaque = HashPageGetOpaque(rpage);
     895              :         Assert(ropaque->hasho_bucket == bucket);
     896          181 :     } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
     897              : 
     898              :     /*
     899              :      * squeeze the tuples.
     900              :      */
     901              :     for (;;)
     902           63 :     {
     903              :         OffsetNumber roffnum;
     904              :         OffsetNumber maxroffnum;
     905              :         OffsetNumber deletable[MaxOffsetNumber];
     906              :         IndexTuple  itups[MaxIndexTuplesPerPage];
     907              :         Size        tups_size[MaxIndexTuplesPerPage];
     908              :         OffsetNumber itup_offsets[MaxIndexTuplesPerPage];
     909          100 :         uint16      ndeletable = 0;
     910          100 :         uint16      nitups = 0;
     911          100 :         Size        all_tups_size = 0;
     912              :         int         i;
     913          100 :         bool        retain_pin = false;
     914              : 
     915          103 : readpage:
     916              :         /* Scan each tuple in "read" page */
     917          103 :         maxroffnum = PageGetMaxOffsetNumber(rpage);
     918          103 :         for (roffnum = FirstOffsetNumber;
     919         2693 :              roffnum <= maxroffnum;
     920         2590 :              roffnum = OffsetNumberNext(roffnum))
     921              :         {
     922              :             IndexTuple  itup;
     923              :             Size        itemsz;
     924              : 
     925              :             /* skip dead tuples */
     926         2599 :             if (ItemIdIsDead(PageGetItemId(rpage, roffnum)))
     927            0 :                 continue;
     928              : 
     929         2599 :             itup = (IndexTuple) PageGetItem(rpage,
     930         2599 :                                             PageGetItemId(rpage, roffnum));
     931         2599 :             itemsz = IndexTupleSize(itup);
     932         2599 :             itemsz = MAXALIGN(itemsz);
     933              : 
     934              :             /*
     935              :              * Walk up the bucket chain, looking for a page big enough for
     936              :              * this item and all other accumulated items.  Exit if we reach
     937              :              * the read page.
     938              :              */
     939         2677 :             while (PageGetFreeSpaceForMultipleTuples(wpage, nitups + 1) < (all_tups_size + itemsz))
     940              :             {
     941           87 :                 Buffer      next_wbuf = InvalidBuffer;
     942           87 :                 bool        tups_moved = false;
     943              : 
     944              :                 Assert(!PageIsEmpty(wpage));
     945              : 
     946           87 :                 if (wblkno == bucket_blkno)
     947           15 :                     retain_pin = true;
     948              : 
     949           87 :                 wblkno = wopaque->hasho_nextblkno;
     950              :                 Assert(BlockNumberIsValid(wblkno));
     951              : 
     952              :                 /* don't need to move to next page if we reached the read page */
     953           87 :                 if (wblkno != rblkno)
     954           81 :                     next_wbuf = _hash_getbuf_with_strategy(rel,
     955              :                                                            wblkno,
     956              :                                                            HASH_WRITE,
     957              :                                                            LH_OVERFLOW_PAGE,
     958              :                                                            bstrategy);
     959              : 
     960           87 :                 if (nitups > 0)
     961              :                 {
     962              :                     Assert(nitups == ndeletable);
     963              : 
     964              :                     /*
     965              :                      * This operation needs to log multiple tuples, prepare
     966              :                      * WAL for that.
     967              :                      */
     968            3 :                     if (RelationNeedsWAL(rel))
     969            3 :                         XLogEnsureRecordSpace(0, 3 + nitups);
     970              : 
     971            3 :                     START_CRIT_SECTION();
     972              : 
     973              :                     /*
     974              :                      * we have to insert tuples on the "write" page, being
     975              :                      * careful to preserve hashkey ordering.  (If we insert
     976              :                      * many tuples into the same "write" page it would be
     977              :                      * worth qsort'ing them).
     978              :                      */
     979            3 :                     _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
     980            3 :                     MarkBufferDirty(wbuf);
     981              : 
     982              :                     /* Delete tuples we already moved off read page */
     983            3 :                     PageIndexMultiDelete(rpage, deletable, ndeletable);
     984            3 :                     MarkBufferDirty(rbuf);
     985              : 
     986              :                     /* XLOG stuff */
     987            3 :                     if (RelationNeedsWAL(rel))
     988              :                     {
     989              :                         XLogRecPtr  recptr;
     990              :                         xl_hash_move_page_contents xlrec;
     991              : 
     992            3 :                         xlrec.ntups = nitups;
     993            3 :                         xlrec.is_prim_bucket_same_wrt = (wbuf == bucket_buf);
     994              : 
     995            3 :                         XLogBeginInsert();
     996            3 :                         XLogRegisterData(&xlrec, SizeOfHashMovePageContents);
     997              : 
     998              :                         /*
     999              :                          * bucket buffer was not changed, but still needs to
    1000              :                          * be registered to ensure that we can acquire a
    1001              :                          * cleanup lock on it during replay.
    1002              :                          */
    1003            3 :                         if (!xlrec.is_prim_bucket_same_wrt)
    1004              :                         {
    1005            0 :                             int         flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
    1006              : 
    1007            0 :                             XLogRegisterBuffer(0, bucket_buf, flags);
    1008              :                         }
    1009              : 
    1010            3 :                         XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
    1011            3 :                         XLogRegisterBufData(1, itup_offsets,
    1012              :                                             nitups * sizeof(OffsetNumber));
    1013         1029 :                         for (i = 0; i < nitups; i++)
    1014         1026 :                             XLogRegisterBufData(1, itups[i], tups_size[i]);
    1015              : 
    1016            3 :                         XLogRegisterBuffer(2, rbuf, REGBUF_STANDARD);
    1017            3 :                         XLogRegisterBufData(2, deletable,
    1018              :                                             ndeletable * sizeof(OffsetNumber));
    1019              : 
    1020            3 :                         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_MOVE_PAGE_CONTENTS);
    1021              : 
    1022            3 :                         PageSetLSN(BufferGetPage(wbuf), recptr);
    1023            3 :                         PageSetLSN(BufferGetPage(rbuf), recptr);
    1024              :                     }
    1025              : 
    1026            3 :                     END_CRIT_SECTION();
    1027              : 
    1028            3 :                     tups_moved = true;
    1029              :                 }
    1030              : 
    1031              :                 /*
    1032              :                  * release the lock on previous page after acquiring the lock
    1033              :                  * on next page
    1034              :                  */
    1035           87 :                 if (retain_pin)
    1036           15 :                     LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
    1037              :                 else
    1038           72 :                     _hash_relbuf(rel, wbuf);
    1039              : 
    1040              :                 /* nothing more to do if we reached the read page */
    1041           87 :                 if (rblkno == wblkno)
    1042              :                 {
    1043            6 :                     _hash_relbuf(rel, rbuf);
    1044           37 :                     return;
    1045              :                 }
    1046              : 
    1047           81 :                 wbuf = next_wbuf;
    1048           81 :                 wpage = BufferGetPage(wbuf);
    1049           81 :                 wopaque = HashPageGetOpaque(wpage);
    1050              :                 Assert(wopaque->hasho_bucket == bucket);
    1051           81 :                 retain_pin = false;
    1052              : 
    1053              :                 /* be tidy */
    1054         1107 :                 for (i = 0; i < nitups; i++)
    1055         1026 :                     pfree(itups[i]);
    1056           81 :                 nitups = 0;
    1057           81 :                 all_tups_size = 0;
    1058           81 :                 ndeletable = 0;
    1059              : 
    1060              :                 /*
    1061              :                  * after moving the tuples, rpage would have been compacted,
    1062              :                  * so we need to rescan it.
    1063              :                  */
    1064           81 :                 if (tups_moved)
    1065            3 :                     goto readpage;
    1066              :             }
    1067              : 
    1068              :             /* remember tuple for deletion from "read" page */
    1069         2590 :             deletable[ndeletable++] = roffnum;
    1070              : 
    1071              :             /*
    1072              :              * we need a copy of index tuples as they can be freed as part of
    1073              :              * overflow page, however we need them to write a WAL record in
    1074              :              * _hash_freeovflpage.
    1075              :              */
    1076         2590 :             itups[nitups] = CopyIndexTuple(itup);
    1077         2590 :             tups_size[nitups++] = itemsz;
    1078         2590 :             all_tups_size += itemsz;
    1079              :         }
    1080              : 
    1081              :         /*
    1082              :          * If we reach here, there are no live tuples on the "read" page ---
    1083              :          * it was empty when we got to it, or we moved them all.  So we can
    1084              :          * just free the page without bothering with deleting tuples
    1085              :          * individually.  Then advance to the previous "read" page.
    1086              :          *
    1087              :          * Tricky point here: if our read and write pages are adjacent in the
    1088              :          * bucket chain, our write lock on wbuf will conflict with
    1089              :          * _hash_freeovflpage's attempt to update the sibling links of the
    1090              :          * removed page.  In that case, we don't need to lock it again.
    1091              :          */
    1092           94 :         rblkno = ropaque->hasho_prevblkno;
    1093              :         Assert(BlockNumberIsValid(rblkno));
    1094              : 
    1095              :         /* free this overflow page (releases rbuf) */
    1096           94 :         _hash_freeovflpage(rel, bucket_buf, rbuf, wbuf, itups, itup_offsets,
    1097              :                            tups_size, nitups, bstrategy);
    1098              : 
    1099              :         /* be tidy */
    1100         1658 :         for (i = 0; i < nitups; i++)
    1101         1564 :             pfree(itups[i]);
    1102              : 
    1103              :         /* are we freeing the page adjacent to wbuf? */
    1104           94 :         if (rblkno == wblkno)
    1105              :         {
    1106              :             /* retain the pin on primary bucket page till end of bucket scan */
    1107           31 :             if (wblkno == bucket_blkno)
    1108           22 :                 LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
    1109              :             else
    1110            9 :                 _hash_relbuf(rel, wbuf);
    1111           31 :             return;
    1112              :         }
    1113              : 
    1114           63 :         rbuf = _hash_getbuf_with_strategy(rel,
    1115              :                                           rblkno,
    1116              :                                           HASH_WRITE,
    1117              :                                           LH_OVERFLOW_PAGE,
    1118              :                                           bstrategy);
    1119           63 :         rpage = BufferGetPage(rbuf);
    1120           63 :         ropaque = HashPageGetOpaque(rpage);
    1121              :         Assert(ropaque->hasho_bucket == bucket);
    1122              :     }
    1123              : 
    1124              :     /* NOTREACHED */
    1125              : }
        

Generated by: LCOV version 2.0-1