LCOV - code coverage report
Current view: top level - src/backend/access/hash - hashinsert.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 91.0 % 133 121
Test Date: 2026-03-25 21:16:15 Functions: 100.0 % 4 4
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * hashinsert.c
       4              :  *    Item insertion in hash tables for Postgres.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/access/hash/hashinsert.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : 
      16              : #include "postgres.h"
      17              : 
      18              : #include "access/hash.h"
      19              : #include "access/hash_xlog.h"
      20              : #include "access/xloginsert.h"
      21              : #include "miscadmin.h"
      22              : #include "storage/predicate.h"
      23              : #include "utils/rel.h"
      24              : 
      25              : static void _hash_vacuum_one_page(Relation rel, Relation hrel,
      26              :                                   Buffer metabuf, Buffer buf);
      27              : 
      28              : /*
      29              :  *  _hash_doinsert() -- Handle insertion of a single index tuple.
      30              :  *
      31              :  *      This routine is called by the public interface routines, hashbuild
      32              :  *      and hashinsert.  By here, itup is completely filled in.
      33              :  *
      34              :  * 'sorted' must only be passed as 'true' when inserts are done in hashkey
      35              :  * order.
      36              :  */
      37              : void
      38       490813 : _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel, bool sorted)
      39              : {
      40       490813 :     Buffer      buf = InvalidBuffer;
      41              :     Buffer      bucket_buf;
      42              :     Buffer      metabuf;
      43              :     HashMetaPage metap;
      44       490813 :     HashMetaPage usedmetap = NULL;
      45              :     Page        metapage;
      46              :     Page        page;
      47              :     HashPageOpaque pageopaque;
      48              :     Size        itemsz;
      49              :     bool        do_expand;
      50              :     uint32      hashkey;
      51              :     Bucket      bucket;
      52              :     OffsetNumber itup_off;
      53              :     XLogRecPtr  recptr;
      54              : 
      55              :     /*
      56              :      * Get the hash key for the item (it's stored in the index tuple itself).
      57              :      */
      58       490813 :     hashkey = _hash_get_indextuple_hashkey(itup);
      59              : 
      60              :     /* compute item size too */
      61       490813 :     itemsz = IndexTupleSize(itup);
      62       490813 :     itemsz = MAXALIGN(itemsz);  /* be safe, PageAddItem will do this but we
      63              :                                  * need to be consistent */
      64              : 
      65       490813 : restart_insert:
      66              : 
      67              :     /*
      68              :      * Read the metapage.  We don't lock it yet; HashMaxItemSize() will
      69              :      * examine pd_pagesize_version, but that can't change so we can examine it
      70              :      * without a lock.
      71              :      */
      72       490813 :     metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
      73       490813 :     metapage = BufferGetPage(metabuf);
      74              : 
      75              :     /*
      76              :      * Check whether the item can fit on a hash page at all. (Eventually, we
      77              :      * ought to try to apply TOAST methods if not.)  Note that at this point,
      78              :      * itemsz doesn't include the ItemId.
      79              :      *
      80              :      * XXX this is useless code if we are only storing hash keys.
      81              :      */
      82       490813 :     if (itemsz > HashMaxItemSize(metapage))
      83            0 :         ereport(ERROR,
      84              :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
      85              :                  errmsg("index row size %zu exceeds hash maximum %zu",
      86              :                         itemsz, HashMaxItemSize(metapage)),
      87              :                  errhint("Values larger than a buffer page cannot be indexed.")));
      88              : 
      89              :     /* Lock the primary bucket page for the target bucket. */
      90       490813 :     buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_WRITE,
      91              :                                           &usedmetap);
      92              :     Assert(usedmetap != NULL);
      93              : 
      94       490813 :     CheckForSerializableConflictIn(rel, NULL, BufferGetBlockNumber(buf));
      95              : 
      96              :     /* remember the primary bucket buffer to release the pin on it at end. */
      97       490807 :     bucket_buf = buf;
      98              : 
      99       490807 :     page = BufferGetPage(buf);
     100       490807 :     pageopaque = HashPageGetOpaque(page);
     101       490807 :     bucket = pageopaque->hasho_bucket;
     102              : 
     103              :     /*
     104              :      * If this bucket is in the process of being split, try to finish the
     105              :      * split before inserting, because that might create room for the
     106              :      * insertion to proceed without allocating an additional overflow page.
     107              :      * It's only interesting to finish the split if we're trying to insert
     108              :      * into the bucket from which we're removing tuples (the "old" bucket),
     109              :      * not if we're trying to insert into the bucket into which tuples are
     110              :      * being moved (the "new" bucket).
     111              :      */
     112       490807 :     if (H_BUCKET_BEING_SPLIT(pageopaque) && IsBufferCleanupOK(buf))
     113              :     {
     114              :         /* release the lock on bucket buffer, before completing the split. */
     115            0 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     116              : 
     117            0 :         _hash_finish_split(rel, metabuf, buf, bucket,
     118            0 :                            usedmetap->hashm_maxbucket,
     119            0 :                            usedmetap->hashm_highmask,
     120            0 :                            usedmetap->hashm_lowmask);
     121              : 
     122              :         /* release the pin on old and meta buffer.  retry for insert. */
     123            0 :         _hash_dropbuf(rel, buf);
     124            0 :         _hash_dropbuf(rel, metabuf);
     125            0 :         goto restart_insert;
     126              :     }
     127              : 
     128              :     /* Do the insertion */
     129       846949 :     while (PageGetFreeSpace(page) < itemsz)
     130              :     {
     131              :         BlockNumber nextblkno;
     132              : 
     133              :         /*
     134              :          * Check if current page has any DEAD tuples. If yes, delete these
     135              :          * tuples and see if we can get a space for the new item to be
     136              :          * inserted before moving to the next page in the bucket chain.
     137              :          */
     138       356146 :         if (H_HAS_DEAD_TUPLES(pageopaque))
     139              :         {
     140              : 
     141            4 :             if (IsBufferCleanupOK(buf))
     142              :             {
     143            4 :                 _hash_vacuum_one_page(rel, heapRel, metabuf, buf);
     144              : 
     145            4 :                 if (PageGetFreeSpace(page) >= itemsz)
     146            4 :                     break;      /* OK, now we have enough space */
     147              :             }
     148              :         }
     149              : 
     150              :         /*
     151              :          * no space on this page; check for an overflow page
     152              :          */
     153       356142 :         nextblkno = pageopaque->hasho_nextblkno;
     154              : 
     155       356142 :         if (BlockNumberIsValid(nextblkno))
     156              :         {
     157              :             /*
     158              :              * ovfl page exists; go get it.  if it doesn't have room, we'll
     159              :              * find out next pass through the loop test above.  we always
     160              :              * release both the lock and pin if this is an overflow page, but
     161              :              * only the lock if this is the primary bucket page, since the pin
     162              :              * on the primary bucket must be retained throughout the scan.
     163              :              */
     164       355916 :             if (buf != bucket_buf)
     165       289024 :                 _hash_relbuf(rel, buf);
     166              :             else
     167        66892 :                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     168       355916 :             buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
     169       355916 :             page = BufferGetPage(buf);
     170              :         }
     171              :         else
     172              :         {
     173              :             /*
     174              :              * we're at the end of the bucket chain and we haven't found a
     175              :              * page with enough room.  allocate a new overflow page.
     176              :              */
     177              : 
     178              :             /* release our write lock without modifying buffer */
     179          226 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     180              : 
     181              :             /* chain to a new overflow page */
     182          226 :             buf = _hash_addovflpage(rel, metabuf, buf, (buf == bucket_buf));
     183          226 :             page = BufferGetPage(buf);
     184              : 
     185              :             /* should fit now, given test above */
     186              :             Assert(PageGetFreeSpace(page) >= itemsz);
     187              :         }
     188       356142 :         pageopaque = HashPageGetOpaque(page);
     189              :         Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_OVERFLOW_PAGE);
     190              :         Assert(pageopaque->hasho_bucket == bucket);
     191              :     }
     192              : 
     193              :     /*
     194              :      * Write-lock the metapage so we can increment the tuple count. After
     195              :      * incrementing it, check to see if it's time for a split.
     196              :      */
     197       490807 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     198              : 
     199              :     /* Do the update.  No ereport(ERROR) until changes are logged */
     200       490807 :     START_CRIT_SECTION();
     201              : 
     202              :     /* found page with enough space, so add the item here */
     203       490807 :     itup_off = _hash_pgaddtup(rel, buf, itemsz, itup, sorted);
     204       490807 :     MarkBufferDirty(buf);
     205              : 
     206              :     /* metapage operations */
     207       490807 :     metap = HashPageGetMeta(metapage);
     208       490807 :     metap->hashm_ntuples += 1;
     209              : 
     210              :     /* Make sure this stays in sync with _hash_expandtable() */
     211       490807 :     do_expand = metap->hashm_ntuples >
     212       490807 :         (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
     213              : 
     214       490807 :     MarkBufferDirty(metabuf);
     215              : 
     216              :     /* XLOG stuff */
     217       490807 :     if (RelationNeedsWAL(rel))
     218       407717 :     {
     219              :         xl_hash_insert xlrec;
     220              : 
     221       407717 :         xlrec.offnum = itup_off;
     222              : 
     223       407717 :         XLogBeginInsert();
     224       407717 :         XLogRegisterData(&xlrec, SizeOfHashInsert);
     225              : 
     226       407717 :         XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
     227              : 
     228       407717 :         XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
     229       407717 :         XLogRegisterBufData(0, itup, IndexTupleSize(itup));
     230              : 
     231       407717 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INSERT);
     232              :     }
     233              :     else
     234        83090 :         recptr = XLogGetFakeLSN(rel);
     235              : 
     236       490807 :     PageSetLSN(BufferGetPage(buf), recptr);
     237       490807 :     PageSetLSN(BufferGetPage(metabuf), recptr);
     238              : 
     239       490807 :     END_CRIT_SECTION();
     240              : 
     241              :     /* drop lock on metapage, but keep pin */
     242       490807 :     LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     243              : 
     244              :     /*
     245              :      * Release the modified page and ensure to release the pin on primary
     246              :      * page.
     247              :      */
     248       490807 :     _hash_relbuf(rel, buf);
     249       490807 :     if (buf != bucket_buf)
     250        66966 :         _hash_dropbuf(rel, bucket_buf);
     251              : 
     252              :     /* Attempt to split if a split is needed */
     253       490807 :     if (do_expand)
     254          893 :         _hash_expandtable(rel, metabuf);
     255              : 
     256              :     /* Finally drop our pin on the metapage */
     257       490807 :     _hash_dropbuf(rel, metabuf);
     258       490807 : }
     259              : 
     260              : /*
     261              :  *  _hash_pgaddtup() -- add a tuple to a particular page in the index.
     262              :  *
     263              :  * This routine adds the tuple to the page as requested; it does not write out
     264              :  * the page.  It is an error to call this function without pin and write lock
     265              :  * on the target buffer.
     266              :  *
     267              :  * Returns the offset number at which the tuple was inserted.  This function
     268              :  * is responsible for preserving the condition that tuples in a hash index
     269              :  * page are sorted by hashkey value, however, if the caller is certain that
     270              :  * the hashkey for the tuple being added is >= the hashkeys of all existing
     271              :  * tuples on the page, then the 'appendtup' flag may be passed as true.  This
     272              :  * saves from having to binary search for the correct location to insert the
     273              :  * tuple.
     274              :  */
     275              : OffsetNumber
     276       490807 : _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup,
     277              :                bool appendtup)
     278              : {
     279              :     OffsetNumber itup_off;
     280              :     Page        page;
     281              : 
     282       490807 :     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
     283       490807 :     page = BufferGetPage(buf);
     284              : 
     285              :     /*
     286              :      * Find where to insert the tuple (preserving page's hashkey ordering). If
     287              :      * 'appendtup' is true then we just insert it at the end.
     288              :      */
     289       490807 :     if (appendtup)
     290              :     {
     291        70500 :         itup_off = PageGetMaxOffsetNumber(page) + 1;
     292              : 
     293              : #ifdef USE_ASSERT_CHECKING
     294              :         /* ensure this tuple's hashkey is >= the final existing tuple */
     295              :         if (PageGetMaxOffsetNumber(page) > 0)
     296              :         {
     297              :             IndexTuple  lasttup;
     298              :             ItemId      itemid;
     299              : 
     300              :             itemid = PageGetItemId(page, PageGetMaxOffsetNumber(page));
     301              :             lasttup = (IndexTuple) PageGetItem(page, itemid);
     302              : 
     303              :             Assert(_hash_get_indextuple_hashkey(lasttup) <=
     304              :                    _hash_get_indextuple_hashkey(itup));
     305              :         }
     306              : #endif
     307              :     }
     308              :     else
     309              :     {
     310       420307 :         uint32      hashkey = _hash_get_indextuple_hashkey(itup);
     311              : 
     312       420307 :         itup_off = _hash_binsearch(page, hashkey);
     313              :     }
     314              : 
     315       490807 :     if (PageAddItem(page, itup, itemsize, itup_off, false, false) == InvalidOffsetNumber)
     316            0 :         elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(rel));
     317              : 
     318       490807 :     return itup_off;
     319              : }
     320              : 
     321              : /*
     322              :  *  _hash_pgaddmultitup() -- add a tuple vector to a particular page in the
     323              :  *                           index.
     324              :  *
     325              :  * This routine has same requirements for locking and tuple ordering as
     326              :  * _hash_pgaddtup().
     327              :  *
     328              :  * Returns the offset number array at which the tuples were inserted.
     329              :  */
     330              : void
     331         1005 : _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups,
     332              :                     OffsetNumber *itup_offsets, uint16 nitups)
     333              : {
     334              :     OffsetNumber itup_off;
     335              :     Page        page;
     336              :     uint32      hashkey;
     337              :     int         i;
     338              : 
     339         1005 :     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
     340         1005 :     page = BufferGetPage(buf);
     341              : 
     342        88496 :     for (i = 0; i < nitups; i++)
     343              :     {
     344              :         Size        itemsize;
     345              : 
     346        87491 :         itemsize = IndexTupleSize(itups[i]);
     347        87491 :         itemsize = MAXALIGN(itemsize);
     348              : 
     349              :         /* Find where to insert the tuple (preserving page's hashkey ordering) */
     350        87491 :         hashkey = _hash_get_indextuple_hashkey(itups[i]);
     351        87491 :         itup_off = _hash_binsearch(page, hashkey);
     352              : 
     353        87491 :         itup_offsets[i] = itup_off;
     354              : 
     355        87491 :         if (PageAddItem(page, itups[i], itemsize, itup_off, false, false) == InvalidOffsetNumber)
     356            0 :             elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(rel));
     357              :     }
     358         1005 : }
     359              : 
     360              : /*
     361              :  * _hash_vacuum_one_page - vacuum just one index page.
     362              :  *
     363              :  * Try to remove LP_DEAD items from the given page. We must acquire cleanup
     364              :  * lock on the page being modified before calling this function.
     365              :  */
     366              : 
     367              : static void
     368            4 : _hash_vacuum_one_page(Relation rel, Relation hrel, Buffer metabuf, Buffer buf)
     369              : {
     370              :     OffsetNumber deletable[MaxOffsetNumber];
     371            4 :     int         ndeletable = 0;
     372              :     OffsetNumber offnum,
     373              :                 maxoff;
     374            4 :     Page        page = BufferGetPage(buf);
     375              :     HashPageOpaque pageopaque;
     376              :     HashMetaPage metap;
     377              :     XLogRecPtr  recptr;
     378              : 
     379              :     /* Scan each tuple in page to see if it is marked as LP_DEAD */
     380            4 :     maxoff = PageGetMaxOffsetNumber(page);
     381            4 :     for (offnum = FirstOffsetNumber;
     382         1632 :          offnum <= maxoff;
     383         1628 :          offnum = OffsetNumberNext(offnum))
     384              :     {
     385         1628 :         ItemId      itemId = PageGetItemId(page, offnum);
     386              : 
     387         1628 :         if (ItemIdIsDead(itemId))
     388          884 :             deletable[ndeletable++] = offnum;
     389              :     }
     390              : 
     391            4 :     if (ndeletable > 0)
     392              :     {
     393              :         TransactionId snapshotConflictHorizon;
     394              : 
     395              :         snapshotConflictHorizon =
     396            4 :             index_compute_xid_horizon_for_tuples(rel, hrel, buf,
     397              :                                                  deletable, ndeletable);
     398              : 
     399              :         /*
     400              :          * Write-lock the meta page so that we can decrement tuple count.
     401              :          */
     402            4 :         LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     403              : 
     404              :         /* No ereport(ERROR) until changes are logged */
     405            4 :         START_CRIT_SECTION();
     406              : 
     407            4 :         PageIndexMultiDelete(page, deletable, ndeletable);
     408              : 
     409              :         /*
     410              :          * Mark the page as not containing any LP_DEAD items. This is not
     411              :          * certainly true (there might be some that have recently been marked,
     412              :          * but weren't included in our target-item list), but it will almost
     413              :          * always be true and it doesn't seem worth an additional page scan to
     414              :          * check it. Remember that LH_PAGE_HAS_DEAD_TUPLES is only a hint
     415              :          * anyway.
     416              :          */
     417            4 :         pageopaque = HashPageGetOpaque(page);
     418            4 :         pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
     419              : 
     420            4 :         metap = HashPageGetMeta(BufferGetPage(metabuf));
     421            4 :         metap->hashm_ntuples -= ndeletable;
     422              : 
     423            4 :         MarkBufferDirty(buf);
     424            4 :         MarkBufferDirty(metabuf);
     425              : 
     426              :         /* XLOG stuff */
     427            4 :         if (RelationNeedsWAL(rel))
     428            4 :         {
     429              :             xl_hash_vacuum_one_page xlrec;
     430              : 
     431            4 :             xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(hrel);
     432            4 :             xlrec.snapshotConflictHorizon = snapshotConflictHorizon;
     433            4 :             xlrec.ntuples = ndeletable;
     434              : 
     435            4 :             XLogBeginInsert();
     436            4 :             XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
     437            4 :             XLogRegisterData(&xlrec, SizeOfHashVacuumOnePage);
     438              : 
     439              :             /*
     440              :              * We need the target-offsets array whether or not we store the
     441              :              * whole buffer, to allow us to find the snapshotConflictHorizon
     442              :              * on a standby server.
     443              :              */
     444            4 :             XLogRegisterData(deletable,
     445              :                              ndeletable * sizeof(OffsetNumber));
     446              : 
     447            4 :             XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
     448              : 
     449            4 :             recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_VACUUM_ONE_PAGE);
     450              :         }
     451              :         else
     452            0 :             recptr = XLogGetFakeLSN(rel);
     453              : 
     454            4 :         PageSetLSN(BufferGetPage(buf), recptr);
     455            4 :         PageSetLSN(BufferGetPage(metabuf), recptr);
     456              : 
     457            4 :         END_CRIT_SECTION();
     458              : 
     459              :         /*
     460              :          * Releasing write lock on meta page as we have updated the tuple
     461              :          * count.
     462              :          */
     463            4 :         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     464              :     }
     465            4 : }
        

Generated by: LCOV version 2.0-1