LCOV - code coverage report
Current view: top level - src/backend/access/hash - hashinsert.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 78 129 60.5 %
Date: 2025-11-15 01:18:03 Functions: 3 4 75.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * hashinsert.c
       4             :  *    Item insertion in hash tables for Postgres.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/hash/hashinsert.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : 
      16             : #include "postgres.h"
      17             : 
      18             : #include "access/hash.h"
      19             : #include "access/hash_xlog.h"
      20             : #include "access/xloginsert.h"
      21             : #include "miscadmin.h"
      22             : #include "storage/predicate.h"
      23             : #include "utils/rel.h"
      24             : 
      25             : static void _hash_vacuum_one_page(Relation rel, Relation hrel,
      26             :                                   Buffer metabuf, Buffer buf);
      27             : 
      28             : /*
      29             :  *  _hash_doinsert() -- Handle insertion of a single index tuple.
      30             :  *
      31             :  *      This routine is called by the public interface routines, hashbuild
      32             :  *      and hashinsert.  By here, itup is completely filled in.
      33             :  *
      34             :  * 'sorted' must only be passed as 'true' when inserts are done in hashkey
      35             :  * order.
      36             :  */
      37             : void
      38      725772 : _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel, bool sorted)
      39             : {
      40      725772 :     Buffer      buf = InvalidBuffer;
      41             :     Buffer      bucket_buf;
      42             :     Buffer      metabuf;
      43             :     HashMetaPage metap;
      44      725772 :     HashMetaPage usedmetap = NULL;
      45             :     Page        metapage;
      46             :     Page        page;
      47             :     HashPageOpaque pageopaque;
      48             :     Size        itemsz;
      49             :     bool        do_expand;
      50             :     uint32      hashkey;
      51             :     Bucket      bucket;
      52             :     OffsetNumber itup_off;
      53             : 
      54             :     /*
      55             :      * Get the hash key for the item (it's stored in the index tuple itself).
      56             :      */
      57      725772 :     hashkey = _hash_get_indextuple_hashkey(itup);
      58             : 
      59             :     /* compute item size too */
      60      725772 :     itemsz = IndexTupleSize(itup);
      61      725772 :     itemsz = MAXALIGN(itemsz);  /* be safe, PageAddItem will do this but we
      62             :                                  * need to be consistent */
      63             : 
      64      725772 : restart_insert:
      65             : 
      66             :     /*
      67             :      * Read the metapage.  We don't lock it yet; HashMaxItemSize() will
      68             :      * examine pd_pagesize_version, but that can't change so we can examine it
      69             :      * without a lock.
      70             :      */
      71      725772 :     metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
      72      725772 :     metapage = BufferGetPage(metabuf);
      73             : 
      74             :     /*
      75             :      * Check whether the item can fit on a hash page at all. (Eventually, we
      76             :      * ought to try to apply TOAST methods if not.)  Note that at this point,
      77             :      * itemsz doesn't include the ItemId.
      78             :      *
      79             :      * XXX this is useless code if we are only storing hash keys.
      80             :      */
      81      725772 :     if (itemsz > HashMaxItemSize(metapage))
      82           0 :         ereport(ERROR,
      83             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
      84             :                  errmsg("index row size %zu exceeds hash maximum %zu",
      85             :                         itemsz, HashMaxItemSize(metapage)),
      86             :                  errhint("Values larger than a buffer page cannot be indexed.")));
      87             : 
      88             :     /* Lock the primary bucket page for the target bucket. */
      89      725772 :     buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_WRITE,
      90             :                                           &usedmetap);
      91             :     Assert(usedmetap != NULL);
      92             : 
      93      725772 :     CheckForSerializableConflictIn(rel, NULL, BufferGetBlockNumber(buf));
      94             : 
      95             :     /* remember the primary bucket buffer to release the pin on it at end. */
      96      725760 :     bucket_buf = buf;
      97             : 
      98      725760 :     page = BufferGetPage(buf);
      99      725760 :     pageopaque = HashPageGetOpaque(page);
     100      725760 :     bucket = pageopaque->hasho_bucket;
     101             : 
     102             :     /*
     103             :      * If this bucket is in the process of being split, try to finish the
     104             :      * split before inserting, because that might create room for the
     105             :      * insertion to proceed without allocating an additional overflow page.
     106             :      * It's only interesting to finish the split if we're trying to insert
     107             :      * into the bucket from which we're removing tuples (the "old" bucket),
     108             :      * not if we're trying to insert into the bucket into which tuples are
     109             :      * being moved (the "new" bucket).
     110             :      */
     111      725760 :     if (H_BUCKET_BEING_SPLIT(pageopaque) && IsBufferCleanupOK(buf))
     112             :     {
     113             :         /* release the lock on bucket buffer, before completing the split. */
     114           0 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     115             : 
     116           0 :         _hash_finish_split(rel, metabuf, buf, bucket,
     117           0 :                            usedmetap->hashm_maxbucket,
     118           0 :                            usedmetap->hashm_highmask,
     119           0 :                            usedmetap->hashm_lowmask);
     120             : 
     121             :         /* release the pin on old and meta buffer.  retry for insert. */
     122           0 :         _hash_dropbuf(rel, buf);
     123           0 :         _hash_dropbuf(rel, metabuf);
     124           0 :         goto restart_insert;
     125             :     }
     126             : 
     127             :     /* Do the insertion */
     128     1245256 :     while (PageGetFreeSpace(page) < itemsz)
     129             :     {
     130             :         BlockNumber nextblkno;
     131             : 
     132             :         /*
     133             :          * Check if current page has any DEAD tuples. If yes, delete these
     134             :          * tuples and see if we can get a space for the new item to be
     135             :          * inserted before moving to the next page in the bucket chain.
     136             :          */
     137      519496 :         if (H_HAS_DEAD_TUPLES(pageopaque))
     138             :         {
     139             : 
     140           0 :             if (IsBufferCleanupOK(buf))
     141             :             {
     142           0 :                 _hash_vacuum_one_page(rel, heapRel, metabuf, buf);
     143             : 
     144           0 :                 if (PageGetFreeSpace(page) >= itemsz)
     145           0 :                     break;      /* OK, now we have enough space */
     146             :             }
     147             :         }
     148             : 
     149             :         /*
     150             :          * no space on this page; check for an overflow page
     151             :          */
     152      519496 :         nextblkno = pageopaque->hasho_nextblkno;
     153             : 
     154      519496 :         if (BlockNumberIsValid(nextblkno))
     155             :         {
     156             :             /*
     157             :              * ovfl page exists; go get it.  if it doesn't have room, we'll
     158             :              * find out next pass through the loop test above.  we always
     159             :              * release both the lock and pin if this is an overflow page, but
     160             :              * only the lock if this is the primary bucket page, since the pin
     161             :              * on the primary bucket must be retained throughout the scan.
     162             :              */
     163      519176 :             if (buf != bucket_buf)
     164      426558 :                 _hash_relbuf(rel, buf);
     165             :             else
     166       92618 :                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     167      519176 :             buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
     168      519176 :             page = BufferGetPage(buf);
     169             :         }
     170             :         else
     171             :         {
     172             :             /*
     173             :              * we're at the end of the bucket chain and we haven't found a
     174             :              * page with enough room.  allocate a new overflow page.
     175             :              */
     176             : 
     177             :             /* release our write lock without modifying buffer */
     178         320 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     179             : 
     180             :             /* chain to a new overflow page */
     181         320 :             buf = _hash_addovflpage(rel, metabuf, buf, (buf == bucket_buf));
     182         320 :             page = BufferGetPage(buf);
     183             : 
     184             :             /* should fit now, given test above */
     185             :             Assert(PageGetFreeSpace(page) >= itemsz);
     186             :         }
     187      519496 :         pageopaque = HashPageGetOpaque(page);
     188             :         Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_OVERFLOW_PAGE);
     189             :         Assert(pageopaque->hasho_bucket == bucket);
     190             :     }
     191             : 
     192             :     /*
     193             :      * Write-lock the metapage so we can increment the tuple count. After
     194             :      * incrementing it, check to see if it's time for a split.
     195             :      */
     196      725760 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     197             : 
     198             :     /* Do the update.  No ereport(ERROR) until changes are logged */
     199      725760 :     START_CRIT_SECTION();
     200             : 
     201             :     /* found page with enough space, so add the item here */
     202      725760 :     itup_off = _hash_pgaddtup(rel, buf, itemsz, itup, sorted);
     203      725760 :     MarkBufferDirty(buf);
     204             : 
     205             :     /* metapage operations */
     206      725760 :     metap = HashPageGetMeta(metapage);
     207      725760 :     metap->hashm_ntuples += 1;
     208             : 
     209             :     /* Make sure this stays in sync with _hash_expandtable() */
     210      725760 :     do_expand = metap->hashm_ntuples >
     211      725760 :         (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
     212             : 
     213      725760 :     MarkBufferDirty(metabuf);
     214             : 
     215             :     /* XLOG stuff */
     216      725760 :     if (RelationNeedsWAL(rel))
     217             :     {
     218             :         xl_hash_insert xlrec;
     219             :         XLogRecPtr  recptr;
     220             : 
     221      560420 :         xlrec.offnum = itup_off;
     222             : 
     223      560420 :         XLogBeginInsert();
     224      560420 :         XLogRegisterData(&xlrec, SizeOfHashInsert);
     225             : 
     226      560420 :         XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
     227             : 
     228      560420 :         XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
     229      560420 :         XLogRegisterBufData(0, itup, IndexTupleSize(itup));
     230             : 
     231      560420 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INSERT);
     232             : 
     233      560420 :         PageSetLSN(BufferGetPage(buf), recptr);
     234      560420 :         PageSetLSN(BufferGetPage(metabuf), recptr);
     235             :     }
     236             : 
     237      725760 :     END_CRIT_SECTION();
     238             : 
     239             :     /* drop lock on metapage, but keep pin */
     240      725760 :     LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     241             : 
     242             :     /*
     243             :      * Release the modified page and ensure to release the pin on primary
     244             :      * page.
     245             :      */
     246      725760 :     _hash_relbuf(rel, buf);
     247      725760 :     if (buf != bucket_buf)
     248       92722 :         _hash_dropbuf(rel, bucket_buf);
     249             : 
     250             :     /* Attempt to split if a split is needed */
     251      725760 :     if (do_expand)
     252        1338 :         _hash_expandtable(rel, metabuf);
     253             : 
     254             :     /* Finally drop our pin on the metapage */
     255      725760 :     _hash_dropbuf(rel, metabuf);
     256      725760 : }
     257             : 
     258             : /*
     259             :  *  _hash_pgaddtup() -- add a tuple to a particular page in the index.
     260             :  *
     261             :  * This routine adds the tuple to the page as requested; it does not write out
     262             :  * the page.  It is an error to call this function without pin and write lock
     263             :  * on the target buffer.
     264             :  *
     265             :  * Returns the offset number at which the tuple was inserted.  This function
     266             :  * is responsible for preserving the condition that tuples in a hash index
     267             :  * page are sorted by hashkey value, however, if the caller is certain that
     268             :  * the hashkey for the tuple being added is >= the hashkeys of all existing
     269             :  * tuples on the page, then the 'appendtup' flag may be passed as true.  This
     270             :  * saves from having to binary search for the correct location to insert the
     271             :  * tuple.
     272             :  */
     273             : OffsetNumber
     274      725760 : _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup,
     275             :                bool appendtup)
     276             : {
     277             :     OffsetNumber itup_off;
     278             :     Page        page;
     279             : 
     280      725760 :     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
     281      725760 :     page = BufferGetPage(buf);
     282             : 
     283             :     /*
     284             :      * Find where to insert the tuple (preserving page's hashkey ordering). If
     285             :      * 'appendtup' is true then we just insert it at the end.
     286             :      */
     287      725760 :     if (appendtup)
     288             :     {
     289      121000 :         itup_off = PageGetMaxOffsetNumber(page) + 1;
     290             : 
     291             : #ifdef USE_ASSERT_CHECKING
     292             :         /* ensure this tuple's hashkey is >= the final existing tuple */
     293             :         if (PageGetMaxOffsetNumber(page) > 0)
     294             :         {
     295             :             IndexTuple  lasttup;
     296             :             ItemId      itemid;
     297             : 
     298             :             itemid = PageGetItemId(page, PageGetMaxOffsetNumber(page));
     299             :             lasttup = (IndexTuple) PageGetItem(page, itemid);
     300             : 
     301             :             Assert(_hash_get_indextuple_hashkey(lasttup) <=
     302             :                    _hash_get_indextuple_hashkey(itup));
     303             :         }
     304             : #endif
     305             :     }
     306             :     else
     307             :     {
     308      604760 :         uint32      hashkey = _hash_get_indextuple_hashkey(itup);
     309             : 
     310      604760 :         itup_off = _hash_binsearch(page, hashkey);
     311             :     }
     312             : 
     313      725760 :     if (PageAddItem(page, itup, itemsize, itup_off, false, false) == InvalidOffsetNumber)
     314           0 :         elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(rel));
     315             : 
     316      725760 :     return itup_off;
     317             : }
     318             : 
     319             : /*
     320             :  *  _hash_pgaddmultitup() -- add a tuple vector to a particular page in the
     321             :  *                           index.
     322             :  *
     323             :  * This routine has same requirements for locking and tuple ordering as
     324             :  * _hash_pgaddtup().
     325             :  *
     326             :  * Returns the offset number array at which the tuples were inserted.
     327             :  */
     328             : void
     329        1508 : _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups,
     330             :                     OffsetNumber *itup_offsets, uint16 nitups)
     331             : {
     332             :     OffsetNumber itup_off;
     333             :     Page        page;
     334             :     uint32      hashkey;
     335             :     int         i;
     336             : 
     337        1508 :     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
     338        1508 :     page = BufferGetPage(buf);
     339             : 
     340      132846 :     for (i = 0; i < nitups; i++)
     341             :     {
     342             :         Size        itemsize;
     343             : 
     344      131338 :         itemsize = IndexTupleSize(itups[i]);
     345      131338 :         itemsize = MAXALIGN(itemsize);
     346             : 
     347             :         /* Find where to insert the tuple (preserving page's hashkey ordering) */
     348      131338 :         hashkey = _hash_get_indextuple_hashkey(itups[i]);
     349      131338 :         itup_off = _hash_binsearch(page, hashkey);
     350             : 
     351      131338 :         itup_offsets[i] = itup_off;
     352             : 
     353      131338 :         if (PageAddItem(page, itups[i], itemsize, itup_off, false, false) == InvalidOffsetNumber)
     354           0 :             elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(rel));
     355             :     }
     356        1508 : }
     357             : 
     358             : /*
     359             :  * _hash_vacuum_one_page - vacuum just one index page.
     360             :  *
     361             :  * Try to remove LP_DEAD items from the given page. We must acquire cleanup
     362             :  * lock on the page being modified before calling this function.
     363             :  */
     364             : 
     365             : static void
     366           0 : _hash_vacuum_one_page(Relation rel, Relation hrel, Buffer metabuf, Buffer buf)
     367             : {
     368             :     OffsetNumber deletable[MaxOffsetNumber];
     369           0 :     int         ndeletable = 0;
     370             :     OffsetNumber offnum,
     371             :                 maxoff;
     372           0 :     Page        page = BufferGetPage(buf);
     373             :     HashPageOpaque pageopaque;
     374             :     HashMetaPage metap;
     375             : 
     376             :     /* Scan each tuple in page to see if it is marked as LP_DEAD */
     377           0 :     maxoff = PageGetMaxOffsetNumber(page);
     378           0 :     for (offnum = FirstOffsetNumber;
     379           0 :          offnum <= maxoff;
     380           0 :          offnum = OffsetNumberNext(offnum))
     381             :     {
     382           0 :         ItemId      itemId = PageGetItemId(page, offnum);
     383             : 
     384           0 :         if (ItemIdIsDead(itemId))
     385           0 :             deletable[ndeletable++] = offnum;
     386             :     }
     387             : 
     388           0 :     if (ndeletable > 0)
     389             :     {
     390             :         TransactionId snapshotConflictHorizon;
     391             : 
     392             :         snapshotConflictHorizon =
     393           0 :             index_compute_xid_horizon_for_tuples(rel, hrel, buf,
     394             :                                                  deletable, ndeletable);
     395             : 
     396             :         /*
     397             :          * Write-lock the meta page so that we can decrement tuple count.
     398             :          */
     399           0 :         LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     400             : 
     401             :         /* No ereport(ERROR) until changes are logged */
     402           0 :         START_CRIT_SECTION();
     403             : 
     404           0 :         PageIndexMultiDelete(page, deletable, ndeletable);
     405             : 
     406             :         /*
     407             :          * Mark the page as not containing any LP_DEAD items. This is not
     408             :          * certainly true (there might be some that have recently been marked,
     409             :          * but weren't included in our target-item list), but it will almost
     410             :          * always be true and it doesn't seem worth an additional page scan to
     411             :          * check it. Remember that LH_PAGE_HAS_DEAD_TUPLES is only a hint
     412             :          * anyway.
     413             :          */
     414           0 :         pageopaque = HashPageGetOpaque(page);
     415           0 :         pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
     416             : 
     417           0 :         metap = HashPageGetMeta(BufferGetPage(metabuf));
     418           0 :         metap->hashm_ntuples -= ndeletable;
     419             : 
     420           0 :         MarkBufferDirty(buf);
     421           0 :         MarkBufferDirty(metabuf);
     422             : 
     423             :         /* XLOG stuff */
     424           0 :         if (RelationNeedsWAL(rel))
     425             :         {
     426             :             xl_hash_vacuum_one_page xlrec;
     427             :             XLogRecPtr  recptr;
     428             : 
     429           0 :             xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(hrel);
     430           0 :             xlrec.snapshotConflictHorizon = snapshotConflictHorizon;
     431           0 :             xlrec.ntuples = ndeletable;
     432             : 
     433           0 :             XLogBeginInsert();
     434           0 :             XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
     435           0 :             XLogRegisterData(&xlrec, SizeOfHashVacuumOnePage);
     436             : 
     437             :             /*
     438             :              * We need the target-offsets array whether or not we store the
     439             :              * whole buffer, to allow us to find the snapshotConflictHorizon
     440             :              * on a standby server.
     441             :              */
     442           0 :             XLogRegisterData(deletable,
     443             :                              ndeletable * sizeof(OffsetNumber));
     444             : 
     445           0 :             XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
     446             : 
     447           0 :             recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_VACUUM_ONE_PAGE);
     448             : 
     449           0 :             PageSetLSN(BufferGetPage(buf), recptr);
     450           0 :             PageSetLSN(BufferGetPage(metabuf), recptr);
     451             :         }
     452             : 
     453           0 :         END_CRIT_SECTION();
     454             : 
     455             :         /*
     456             :          * Releasing write lock on meta page as we have updated the tuple
     457             :          * count.
     458             :          */
     459           0 :         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     460             :     }
     461           0 : }

Generated by: LCOV version 1.16