LCOV - code coverage report
Current view: top level - src/backend/access/hash - hashinsert.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 60.5 % 129 78
Test Date: 2026-03-03 22:15:26 Functions: 75.0 % 4 3
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * hashinsert.c
       4              :  *    Item insertion in hash tables for Postgres.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/access/hash/hashinsert.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : 
      16              : #include "postgres.h"
      17              : 
      18              : #include "access/hash.h"
      19              : #include "access/hash_xlog.h"
      20              : #include "access/xloginsert.h"
      21              : #include "miscadmin.h"
      22              : #include "storage/predicate.h"
      23              : #include "utils/rel.h"
      24              : 
      25              : static void _hash_vacuum_one_page(Relation rel, Relation hrel,
      26              :                                   Buffer metabuf, Buffer buf);
      27              : 
      28              : /*
      29              :  *  _hash_doinsert() -- Handle insertion of a single index tuple.
      30              :  *
      31              :  *      This routine is called by the public interface routines, hashbuild
      32              :  *      and hashinsert.  By here, itup is completely filled in.
      33              :  *
      34              :  * 'sorted' must only be passed as 'true' when inserts are done in hashkey
      35              :  * order.
      36              :  */
      37              : void
      38       357419 : _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel, bool sorted)
      39              : {
      40       357419 :     Buffer      buf = InvalidBuffer;
      41              :     Buffer      bucket_buf;
      42              :     Buffer      metabuf;
      43              :     HashMetaPage metap;
      44       357419 :     HashMetaPage usedmetap = NULL;
      45              :     Page        metapage;
      46              :     Page        page;
      47              :     HashPageOpaque pageopaque;
      48              :     Size        itemsz;
      49              :     bool        do_expand;
      50              :     uint32      hashkey;
      51              :     Bucket      bucket;
      52              :     OffsetNumber itup_off;
      53              : 
      54              :     /*
      55              :      * Get the hash key for the item (it's stored in the index tuple itself).
      56              :      */
      57       357419 :     hashkey = _hash_get_indextuple_hashkey(itup);
      58              : 
      59              :     /* compute item size too */
      60       357419 :     itemsz = IndexTupleSize(itup);
      61       357419 :     itemsz = MAXALIGN(itemsz);  /* be safe, PageAddItem will do this but we
      62              :                                  * need to be consistent */
      63              : 
      64       357419 : restart_insert:
      65              : 
      66              :     /*
      67              :      * Read the metapage.  We don't lock it yet; HashMaxItemSize() will
      68              :      * examine pd_pagesize_version, but that can't change so we can examine it
      69              :      * without a lock.
      70              :      */
      71       357419 :     metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
      72       357419 :     metapage = BufferGetPage(metabuf);
      73              : 
      74              :     /*
      75              :      * Check whether the item can fit on a hash page at all. (Eventually, we
      76              :      * ought to try to apply TOAST methods if not.)  Note that at this point,
      77              :      * itemsz doesn't include the ItemId.
      78              :      *
      79              :      * XXX this is useless code if we are only storing hash keys.
      80              :      */
      81       357419 :     if (itemsz > HashMaxItemSize(metapage))
      82            0 :         ereport(ERROR,
      83              :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
      84              :                  errmsg("index row size %zu exceeds hash maximum %zu",
      85              :                         itemsz, HashMaxItemSize(metapage)),
      86              :                  errhint("Values larger than a buffer page cannot be indexed.")));
      87              : 
      88              :     /* Lock the primary bucket page for the target bucket. */
      89       357419 :     buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_WRITE,
      90              :                                           &usedmetap);
      91              :     Assert(usedmetap != NULL);
      92              : 
      93       357419 :     CheckForSerializableConflictIn(rel, NULL, BufferGetBlockNumber(buf));
      94              : 
      95              :     /* remember the primary bucket buffer to release the pin on it at end. */
      96       357413 :     bucket_buf = buf;
      97              : 
      98       357413 :     page = BufferGetPage(buf);
      99       357413 :     pageopaque = HashPageGetOpaque(page);
     100       357413 :     bucket = pageopaque->hasho_bucket;
     101              : 
     102              :     /*
     103              :      * If this bucket is in the process of being split, try to finish the
     104              :      * split before inserting, because that might create room for the
     105              :      * insertion to proceed without allocating an additional overflow page.
     106              :      * It's only interesting to finish the split if we're trying to insert
     107              :      * into the bucket from which we're removing tuples (the "old" bucket),
     108              :      * not if we're trying to insert into the bucket into which tuples are
     109              :      * being moved (the "new" bucket).
     110              :      */
     111       357413 :     if (H_BUCKET_BEING_SPLIT(pageopaque) && IsBufferCleanupOK(buf))
     112              :     {
     113              :         /* release the lock on bucket buffer, before completing the split. */
     114            0 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     115              : 
     116            0 :         _hash_finish_split(rel, metabuf, buf, bucket,
     117            0 :                            usedmetap->hashm_maxbucket,
     118            0 :                            usedmetap->hashm_highmask,
     119            0 :                            usedmetap->hashm_lowmask);
     120              : 
     121              :         /* release the pin on old and meta buffer.  retry for insert. */
     122            0 :         _hash_dropbuf(rel, buf);
     123            0 :         _hash_dropbuf(rel, metabuf);
     124            0 :         goto restart_insert;
     125              :     }
     126              : 
     127              :     /* Do the insertion */
     128       582126 :     while (PageGetFreeSpace(page) < itemsz)
     129              :     {
     130              :         BlockNumber nextblkno;
     131              : 
     132              :         /*
     133              :          * Check if current page has any DEAD tuples. If yes, delete these
     134              :          * tuples and see if we can get a space for the new item to be
     135              :          * inserted before moving to the next page in the bucket chain.
     136              :          */
     137       224713 :         if (H_HAS_DEAD_TUPLES(pageopaque))
     138              :         {
     139              : 
     140            0 :             if (IsBufferCleanupOK(buf))
     141              :             {
     142            0 :                 _hash_vacuum_one_page(rel, heapRel, metabuf, buf);
     143              : 
     144            0 :                 if (PageGetFreeSpace(page) >= itemsz)
     145            0 :                     break;      /* OK, now we have enough space */
     146              :             }
     147              :         }
     148              : 
     149              :         /*
     150              :          * no space on this page; check for an overflow page
     151              :          */
     152       224713 :         nextblkno = pageopaque->hasho_nextblkno;
     153              : 
     154       224713 :         if (BlockNumberIsValid(nextblkno))
     155              :         {
     156              :             /*
     157              :              * ovfl page exists; go get it.  if it doesn't have room, we'll
     158              :              * find out next pass through the loop test above.  we always
     159              :              * release both the lock and pin if this is an overflow page, but
     160              :              * only the lock if this is the primary bucket page, since the pin
     161              :              * on the primary bucket must be retained throughout the scan.
     162              :              */
     163       224566 :             if (buf != bucket_buf)
     164       183393 :                 _hash_relbuf(rel, buf);
     165              :             else
     166        41173 :                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     167       224566 :             buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
     168       224566 :             page = BufferGetPage(buf);
     169              :         }
     170              :         else
     171              :         {
     172              :             /*
     173              :              * we're at the end of the bucket chain and we haven't found a
     174              :              * page with enough room.  allocate a new overflow page.
     175              :              */
     176              : 
     177              :             /* release our write lock without modifying buffer */
     178          147 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     179              : 
     180              :             /* chain to a new overflow page */
     181          147 :             buf = _hash_addovflpage(rel, metabuf, buf, (buf == bucket_buf));
     182          147 :             page = BufferGetPage(buf);
     183              : 
     184              :             /* should fit now, given test above */
     185              :             Assert(PageGetFreeSpace(page) >= itemsz);
     186              :         }
     187       224713 :         pageopaque = HashPageGetOpaque(page);
     188              :         Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_OVERFLOW_PAGE);
     189              :         Assert(pageopaque->hasho_bucket == bucket);
     190              :     }
     191              : 
     192              :     /*
     193              :      * Write-lock the metapage so we can increment the tuple count. After
     194              :      * incrementing it, check to see if it's time for a split.
     195              :      */
     196       357413 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     197              : 
     198              :     /* Do the update.  No ereport(ERROR) until changes are logged */
     199       357413 :     START_CRIT_SECTION();
     200              : 
     201              :     /* found page with enough space, so add the item here */
     202       357413 :     itup_off = _hash_pgaddtup(rel, buf, itemsz, itup, sorted);
     203       357413 :     MarkBufferDirty(buf);
     204              : 
     205              :     /* metapage operations */
     206       357413 :     metap = HashPageGetMeta(metapage);
     207       357413 :     metap->hashm_ntuples += 1;
     208              : 
     209              :     /* Make sure this stays in sync with _hash_expandtable() */
     210       357413 :     do_expand = metap->hashm_ntuples >
     211       357413 :         (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
     212              : 
     213       357413 :     MarkBufferDirty(metabuf);
     214              : 
     215              :     /* XLOG stuff */
     216       357413 :     if (RelationNeedsWAL(rel))
     217              :     {
     218              :         xl_hash_insert xlrec;
     219              :         XLogRecPtr  recptr;
     220              : 
     221       356790 :         xlrec.offnum = itup_off;
     222              : 
     223       356790 :         XLogBeginInsert();
     224       356790 :         XLogRegisterData(&xlrec, SizeOfHashInsert);
     225              : 
     226       356790 :         XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
     227              : 
     228       356790 :         XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
     229       356790 :         XLogRegisterBufData(0, itup, IndexTupleSize(itup));
     230              : 
     231       356790 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INSERT);
     232              : 
     233       356790 :         PageSetLSN(BufferGetPage(buf), recptr);
     234       356790 :         PageSetLSN(BufferGetPage(metabuf), recptr);
     235              :     }
     236              : 
     237       357413 :     END_CRIT_SECTION();
     238              : 
     239              :     /* drop lock on metapage, but keep pin */
     240       357413 :     LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     241              : 
     242              :     /*
     243              :      * Release the modified page and ensure to release the pin on primary
     244              :      * page.
     245              :      */
     246       357413 :     _hash_relbuf(rel, buf);
     247       357413 :     if (buf != bucket_buf)
     248        41224 :         _hash_dropbuf(rel, bucket_buf);
     249              : 
     250              :     /* Attempt to split if a split is needed */
     251       357413 :     if (do_expand)
     252          546 :         _hash_expandtable(rel, metabuf);
     253              : 
     254              :     /* Finally drop our pin on the metapage */
     255       357413 :     _hash_dropbuf(rel, metabuf);
     256       357413 : }
     257              : 
     258              : /*
     259              :  *  _hash_pgaddtup() -- add a tuple to a particular page in the index.
     260              :  *
     261              :  * This routine adds the tuple to the page as requested; it does not write out
     262              :  * the page.  It is an error to call this function without pin and write lock
     263              :  * on the target buffer.
     264              :  *
     265              :  * Returns the offset number at which the tuple was inserted.  This function
     266              :  * is responsible for preserving the condition that tuples in a hash index
     267              :  * page are sorted by hashkey value, however, if the caller is certain that
     268              :  * the hashkey for the tuple being added is >= the hashkeys of all existing
     269              :  * tuples on the page, then the 'appendtup' flag may be passed as true.  This
     270              :  * saves from having to binary search for the correct location to insert the
     271              :  * tuple.
     272              :  */
     273              : OffsetNumber
     274       357413 : _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup,
     275              :                bool appendtup)
     276              : {
     277              :     OffsetNumber itup_off;
     278              :     Page        page;
     279              : 
     280       357413 :     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
     281       357413 :     page = BufferGetPage(buf);
     282              : 
     283              :     /*
     284              :      * Find where to insert the tuple (preserving page's hashkey ordering). If
     285              :      * 'appendtup' is true then we just insert it at the end.
     286              :      */
     287       357413 :     if (appendtup)
     288              :     {
     289        60500 :         itup_off = PageGetMaxOffsetNumber(page) + 1;
     290              : 
     291              : #ifdef USE_ASSERT_CHECKING
     292              :         /* ensure this tuple's hashkey is >= the final existing tuple */
     293              :         if (PageGetMaxOffsetNumber(page) > 0)
     294              :         {
     295              :             IndexTuple  lasttup;
     296              :             ItemId      itemid;
     297              : 
     298              :             itemid = PageGetItemId(page, PageGetMaxOffsetNumber(page));
     299              :             lasttup = (IndexTuple) PageGetItem(page, itemid);
     300              : 
     301              :             Assert(_hash_get_indextuple_hashkey(lasttup) <=
     302              :                    _hash_get_indextuple_hashkey(itup));
     303              :         }
     304              : #endif
     305              :     }
     306              :     else
     307              :     {
     308       296913 :         uint32      hashkey = _hash_get_indextuple_hashkey(itup);
     309              : 
     310       296913 :         itup_off = _hash_binsearch(page, hashkey);
     311              :     }
     312              : 
     313       357413 :     if (PageAddItem(page, itup, itemsize, itup_off, false, false) == InvalidOffsetNumber)
     314            0 :         elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(rel));
     315              : 
     316       357413 :     return itup_off;
     317              : }
     318              : 
     319              : /*
     320              :  *  _hash_pgaddmultitup() -- add a tuple vector to a particular page in the
     321              :  *                           index.
     322              :  *
     323              :  * This routine has same requirements for locking and tuple ordering as
     324              :  * _hash_pgaddtup().
     325              :  *
     326              :  * Returns the offset number array at which the tuples were inserted.
     327              :  */
     328              : void
     329          631 : _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups,
     330              :                     OffsetNumber *itup_offsets, uint16 nitups)
     331              : {
     332              :     OffsetNumber itup_off;
     333              :     Page        page;
     334              :     uint32      hashkey;
     335              :     int         i;
     336              : 
     337          631 :     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
     338          631 :     page = BufferGetPage(buf);
     339              : 
     340        63608 :     for (i = 0; i < nitups; i++)
     341              :     {
     342              :         Size        itemsize;
     343              : 
     344        62977 :         itemsize = IndexTupleSize(itups[i]);
     345        62977 :         itemsize = MAXALIGN(itemsize);
     346              : 
     347              :         /* Find where to insert the tuple (preserving page's hashkey ordering) */
     348        62977 :         hashkey = _hash_get_indextuple_hashkey(itups[i]);
     349        62977 :         itup_off = _hash_binsearch(page, hashkey);
     350              : 
     351        62977 :         itup_offsets[i] = itup_off;
     352              : 
     353        62977 :         if (PageAddItem(page, itups[i], itemsize, itup_off, false, false) == InvalidOffsetNumber)
     354            0 :             elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(rel));
     355              :     }
     356          631 : }
     357              : 
     358              : /*
     359              :  * _hash_vacuum_one_page - vacuum just one index page.
     360              :  *
     361              :  * Try to remove LP_DEAD items from the given page. We must acquire cleanup
     362              :  * lock on the page being modified before calling this function.
     363              :  */
     364              : 
     365              : static void
     366            0 : _hash_vacuum_one_page(Relation rel, Relation hrel, Buffer metabuf, Buffer buf)
     367              : {
     368              :     OffsetNumber deletable[MaxOffsetNumber];
     369            0 :     int         ndeletable = 0;
     370              :     OffsetNumber offnum,
     371              :                 maxoff;
     372            0 :     Page        page = BufferGetPage(buf);
     373              :     HashPageOpaque pageopaque;
     374              :     HashMetaPage metap;
     375              : 
     376              :     /* Scan each tuple in page to see if it is marked as LP_DEAD */
     377            0 :     maxoff = PageGetMaxOffsetNumber(page);
     378            0 :     for (offnum = FirstOffsetNumber;
     379            0 :          offnum <= maxoff;
     380            0 :          offnum = OffsetNumberNext(offnum))
     381              :     {
     382            0 :         ItemId      itemId = PageGetItemId(page, offnum);
     383              : 
     384            0 :         if (ItemIdIsDead(itemId))
     385            0 :             deletable[ndeletable++] = offnum;
     386              :     }
     387              : 
     388            0 :     if (ndeletable > 0)
     389              :     {
     390              :         TransactionId snapshotConflictHorizon;
     391              : 
     392              :         snapshotConflictHorizon =
     393            0 :             index_compute_xid_horizon_for_tuples(rel, hrel, buf,
     394              :                                                  deletable, ndeletable);
     395              : 
     396              :         /*
     397              :          * Write-lock the meta page so that we can decrement tuple count.
     398              :          */
     399            0 :         LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     400              : 
     401              :         /* No ereport(ERROR) until changes are logged */
     402            0 :         START_CRIT_SECTION();
     403              : 
     404            0 :         PageIndexMultiDelete(page, deletable, ndeletable);
     405              : 
     406              :         /*
     407              :          * Mark the page as not containing any LP_DEAD items. This is not
     408              :          * certainly true (there might be some that have recently been marked,
     409              :          * but weren't included in our target-item list), but it will almost
     410              :          * always be true and it doesn't seem worth an additional page scan to
     411              :          * check it. Remember that LH_PAGE_HAS_DEAD_TUPLES is only a hint
     412              :          * anyway.
     413              :          */
     414            0 :         pageopaque = HashPageGetOpaque(page);
     415            0 :         pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
     416              : 
     417            0 :         metap = HashPageGetMeta(BufferGetPage(metabuf));
     418            0 :         metap->hashm_ntuples -= ndeletable;
     419              : 
     420            0 :         MarkBufferDirty(buf);
     421            0 :         MarkBufferDirty(metabuf);
     422              : 
     423              :         /* XLOG stuff */
     424            0 :         if (RelationNeedsWAL(rel))
     425              :         {
     426              :             xl_hash_vacuum_one_page xlrec;
     427              :             XLogRecPtr  recptr;
     428              : 
     429            0 :             xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(hrel);
     430            0 :             xlrec.snapshotConflictHorizon = snapshotConflictHorizon;
     431            0 :             xlrec.ntuples = ndeletable;
     432              : 
     433            0 :             XLogBeginInsert();
     434            0 :             XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
     435            0 :             XLogRegisterData(&xlrec, SizeOfHashVacuumOnePage);
     436              : 
     437              :             /*
     438              :              * We need the target-offsets array whether or not we store the
     439              :              * whole buffer, to allow us to find the snapshotConflictHorizon
     440              :              * on a standby server.
     441              :              */
     442            0 :             XLogRegisterData(deletable,
     443              :                              ndeletable * sizeof(OffsetNumber));
     444              : 
     445            0 :             XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
     446              : 
     447            0 :             recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_VACUUM_ONE_PAGE);
     448              : 
     449            0 :             PageSetLSN(BufferGetPage(buf), recptr);
     450            0 :             PageSetLSN(BufferGetPage(metabuf), recptr);
     451              :         }
     452              : 
     453            0 :         END_CRIT_SECTION();
     454              : 
     455              :         /*
     456              :          * Releasing write lock on meta page as we have updated the tuple
     457              :          * count.
     458              :          */
     459            0 :         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     460              :     }
     461            0 : }
        

Generated by: LCOV version 2.0-1