LCOV - code coverage report
Current view: top level - src/backend/access/hash - hashinsert.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 75 125 60.0 %
Date: 2019-11-13 21:06:57 Functions: 3 4 75.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * hashinsert.c
       4             :  *    Item insertion in hash tables for Postgres.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/hash/hashinsert.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : 
      16             : #include "postgres.h"
      17             : 
      18             : #include "access/hash.h"
      19             : #include "access/hash_xlog.h"
      20             : #include "miscadmin.h"
      21             : #include "storage/buf_internals.h"
      22             : #include "storage/lwlock.h"
      23             : #include "storage/predicate.h"
      24             : #include "utils/rel.h"
      25             : 
      26             : static void _hash_vacuum_one_page(Relation rel, Relation hrel,
      27             :                                   Buffer metabuf, Buffer buf);
      28             : 
      29             : /*
      30             :  *  _hash_doinsert() -- Handle insertion of a single index tuple.
      31             :  *
      32             :  *      This routine is called by the public interface routines, hashbuild
      33             :  *      and hashinsert.  By here, itup is completely filled in.
      34             :  */
      35             : void
      36      439932 : _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel)
      37             : {
      38      439932 :     Buffer      buf = InvalidBuffer;
      39             :     Buffer      bucket_buf;
      40             :     Buffer      metabuf;
      41             :     HashMetaPage metap;
      42      439932 :     HashMetaPage usedmetap = NULL;
      43             :     Page        metapage;
      44             :     Page        page;
      45             :     HashPageOpaque pageopaque;
      46             :     Size        itemsz;
      47             :     bool        do_expand;
      48             :     uint32      hashkey;
      49             :     Bucket      bucket;
      50             :     OffsetNumber itup_off;
      51             : 
      52             :     /*
      53             :      * Get the hash key for the item (it's stored in the index tuple itself).
      54             :      */
      55      439932 :     hashkey = _hash_get_indextuple_hashkey(itup);
      56             : 
      57             :     /* compute item size too */
      58      439932 :     itemsz = IndexTupleSize(itup);
      59      439932 :     itemsz = MAXALIGN(itemsz);  /* be safe, PageAddItem will do this but we
      60             :                                  * need to be consistent */
      61             : 
      62             : restart_insert:
      63             : 
      64             :     /*
      65             :      * Read the metapage.  We don't lock it yet; HashMaxItemSize() will
      66             :      * examine pd_pagesize_version, but that can't change so we can examine it
      67             :      * without a lock.
      68             :      */
      69      439932 :     metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
      70      439932 :     metapage = BufferGetPage(metabuf);
      71             : 
      72             :     /*
      73             :      * Check whether the item can fit on a hash page at all. (Eventually, we
      74             :      * ought to try to apply TOAST methods if not.)  Note that at this point,
      75             :      * itemsz doesn't include the ItemId.
      76             :      *
      77             :      * XXX this is useless code if we are only storing hash keys.
      78             :      */
      79      439932 :     if (itemsz > HashMaxItemSize(metapage))
      80           0 :         ereport(ERROR,
      81             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
      82             :                  errmsg("index row size %zu exceeds hash maximum %zu",
      83             :                         itemsz, HashMaxItemSize(metapage)),
      84             :                  errhint("Values larger than a buffer page cannot be indexed.")));
      85             : 
      86             :     /* Lock the primary bucket page for the target bucket. */
      87      439932 :     buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_WRITE,
      88             :                                           &usedmetap);
      89             :     Assert(usedmetap != NULL);
      90             : 
      91      439932 :     CheckForSerializableConflictIn(rel, NULL, buf);
      92             : 
      93             :     /* remember the primary bucket buffer to release the pin on it at end. */
      94      439920 :     bucket_buf = buf;
      95             : 
      96      439920 :     page = BufferGetPage(buf);
      97      439920 :     pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
      98      439920 :     bucket = pageopaque->hasho_bucket;
      99             : 
     100             :     /*
     101             :      * If this bucket is in the process of being split, try to finish the
     102             :      * split before inserting, because that might create room for the
     103             :      * insertion to proceed without allocating an additional overflow page.
     104             :      * It's only interesting to finish the split if we're trying to insert
     105             :      * into the bucket from which we're removing tuples (the "old" bucket),
     106             :      * not if we're trying to insert into the bucket into which tuples are
     107             :      * being moved (the "new" bucket).
     108             :      */
     109      439920 :     if (H_BUCKET_BEING_SPLIT(pageopaque) && IsBufferCleanupOK(buf))
     110             :     {
     111             :         /* release the lock on bucket buffer, before completing the split. */
     112           0 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     113             : 
     114           0 :         _hash_finish_split(rel, metabuf, buf, bucket,
     115           0 :                            usedmetap->hashm_maxbucket,
     116           0 :                            usedmetap->hashm_highmask,
     117           0 :                            usedmetap->hashm_lowmask);
     118             : 
     119             :         /* release the pin on old and meta buffer.  retry for insert. */
     120           0 :         _hash_dropbuf(rel, buf);
     121           0 :         _hash_dropbuf(rel, metabuf);
     122           0 :         goto restart_insert;
     123             :     }
     124             : 
     125             :     /* Do the insertion */
     126     1113790 :     while (PageGetFreeSpace(page) < itemsz)
     127             :     {
     128             :         BlockNumber nextblkno;
     129             : 
     130             :         /*
     131             :          * Check if current page has any DEAD tuples. If yes, delete these
     132             :          * tuples and see if we can get a space for the new item to be
     133             :          * inserted before moving to the next page in the bucket chain.
     134             :          */
     135      233950 :         if (H_HAS_DEAD_TUPLES(pageopaque))
     136             :         {
     137             : 
     138           0 :             if (IsBufferCleanupOK(buf))
     139             :             {
     140           0 :                 _hash_vacuum_one_page(rel, heapRel, metabuf, buf);
     141             : 
     142           0 :                 if (PageGetFreeSpace(page) >= itemsz)
     143           0 :                     break;      /* OK, now we have enough space */
     144             :             }
     145             :         }
     146             : 
     147             :         /*
     148             :          * no space on this page; check for an overflow page
     149             :          */
     150      233950 :         nextblkno = pageopaque->hasho_nextblkno;
     151             : 
     152      233950 :         if (BlockNumberIsValid(nextblkno))
     153             :         {
     154             :             /*
     155             :              * ovfl page exists; go get it.  if it doesn't have room, we'll
     156             :              * find out next pass through the loop test above.  we always
     157             :              * release both the lock and pin if this is an overflow page, but
     158             :              * only the lock if this is the primary bucket page, since the pin
     159             :              * on the primary bucket must be retained throughout the scan.
     160             :              */
     161      233824 :             if (buf != bucket_buf)
     162      200764 :                 _hash_relbuf(rel, buf);
     163             :             else
     164       33060 :                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     165      233824 :             buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
     166      233824 :             page = BufferGetPage(buf);
     167             :         }
     168             :         else
     169             :         {
     170             :             /*
     171             :              * we're at the end of the bucket chain and we haven't found a
     172             :              * page with enough room.  allocate a new overflow page.
     173             :              */
     174             : 
     175             :             /* release our write lock without modifying buffer */
     176         126 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     177             : 
     178             :             /* chain to a new overflow page */
     179         126 :             buf = _hash_addovflpage(rel, metabuf, buf, (buf == bucket_buf) ? true : false);
     180         126 :             page = BufferGetPage(buf);
     181             : 
     182             :             /* should fit now, given test above */
     183             :             Assert(PageGetFreeSpace(page) >= itemsz);
     184             :         }
     185      233950 :         pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
     186             :         Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_OVERFLOW_PAGE);
     187             :         Assert(pageopaque->hasho_bucket == bucket);
     188             :     }
     189             : 
     190             :     /*
     191             :      * Write-lock the metapage so we can increment the tuple count. After
     192             :      * incrementing it, check to see if it's time for a split.
     193             :      */
     194      439920 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     195             : 
     196             :     /* Do the update.  No ereport(ERROR) until changes are logged */
     197      439920 :     START_CRIT_SECTION();
     198             : 
     199             :     /* found page with enough space, so add the item here */
     200      439920 :     itup_off = _hash_pgaddtup(rel, buf, itemsz, itup);
     201      439920 :     MarkBufferDirty(buf);
     202             : 
     203             :     /* metapage operations */
     204      439920 :     metap = HashPageGetMeta(metapage);
     205      439920 :     metap->hashm_ntuples += 1;
     206             : 
     207             :     /* Make sure this stays in sync with _hash_expandtable() */
     208      879840 :     do_expand = metap->hashm_ntuples >
     209      439920 :         (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
     210             : 
     211      439920 :     MarkBufferDirty(metabuf);
     212             : 
     213             :     /* XLOG stuff */
     214      439920 :     if (RelationNeedsWAL(rel))
     215             :     {
     216             :         xl_hash_insert xlrec;
     217             :         XLogRecPtr  recptr;
     218             : 
     219      439916 :         xlrec.offnum = itup_off;
     220             : 
     221      439916 :         XLogBeginInsert();
     222      439916 :         XLogRegisterData((char *) &xlrec, SizeOfHashInsert);
     223             : 
     224      439916 :         XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
     225             : 
     226      439916 :         XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
     227      439916 :         XLogRegisterBufData(0, (char *) itup, IndexTupleSize(itup));
     228             : 
     229      439916 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INSERT);
     230             : 
     231      439916 :         PageSetLSN(BufferGetPage(buf), recptr);
     232      439916 :         PageSetLSN(BufferGetPage(metabuf), recptr);
     233             :     }
     234             : 
     235      439920 :     END_CRIT_SECTION();
     236             : 
     237             :     /* drop lock on metapage, but keep pin */
     238      439920 :     LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     239             : 
     240             :     /*
     241             :      * Release the modified page and ensure to release the pin on primary
     242             :      * page.
     243             :      */
     244      439920 :     _hash_relbuf(rel, buf);
     245      439920 :     if (buf != bucket_buf)
     246       33106 :         _hash_dropbuf(rel, bucket_buf);
     247             : 
     248             :     /* Attempt to split if a split is needed */
     249      439920 :     if (do_expand)
     250         626 :         _hash_expandtable(rel, metabuf);
     251             : 
     252             :     /* Finally drop our pin on the metapage */
     253      439920 :     _hash_dropbuf(rel, metabuf);
     254      439920 : }
     255             : 
     256             : /*
     257             :  *  _hash_pgaddtup() -- add a tuple to a particular page in the index.
     258             :  *
     259             :  * This routine adds the tuple to the page as requested; it does not write out
     260             :  * the page.  It is an error to call this function without pin and write lock
     261             :  * on the target buffer.
     262             :  *
     263             :  * Returns the offset number at which the tuple was inserted.  This function
     264             :  * is responsible for preserving the condition that tuples in a hash index
     265             :  * page are sorted by hashkey value.
     266             :  */
     267             : OffsetNumber
     268      439920 : _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup)
     269             : {
     270             :     OffsetNumber itup_off;
     271             :     Page        page;
     272             :     uint32      hashkey;
     273             : 
     274      439920 :     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
     275      439920 :     page = BufferGetPage(buf);
     276             : 
     277             :     /* Find where to insert the tuple (preserving page's hashkey ordering) */
     278      439920 :     hashkey = _hash_get_indextuple_hashkey(itup);
     279      439920 :     itup_off = _hash_binsearch(page, hashkey);
     280             : 
     281      439920 :     if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
     282             :         == InvalidOffsetNumber)
     283           0 :         elog(ERROR, "failed to add index item to \"%s\"",
     284             :              RelationGetRelationName(rel));
     285             : 
     286      439920 :     return itup_off;
     287             : }
     288             : 
     289             : /*
     290             :  *  _hash_pgaddmultitup() -- add a tuple vector to a particular page in the
     291             :  *                           index.
     292             :  *
     293             :  * This routine has same requirements for locking and tuple ordering as
     294             :  * _hash_pgaddtup().
     295             :  *
     296             :  * Returns the offset number array at which the tuples were inserted.
     297             :  */
     298             : void
     299         708 : _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups,
     300             :                     OffsetNumber *itup_offsets, uint16 nitups)
     301             : {
     302             :     OffsetNumber itup_off;
     303             :     Page        page;
     304             :     uint32      hashkey;
     305             :     int         i;
     306             : 
     307         708 :     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
     308         708 :     page = BufferGetPage(buf);
     309             : 
     310       75928 :     for (i = 0; i < nitups; i++)
     311             :     {
     312             :         Size        itemsize;
     313             : 
     314       75220 :         itemsize = IndexTupleSize(itups[i]);
     315       75220 :         itemsize = MAXALIGN(itemsize);
     316             : 
     317             :         /* Find where to insert the tuple (preserving page's hashkey ordering) */
     318       75220 :         hashkey = _hash_get_indextuple_hashkey(itups[i]);
     319       75220 :         itup_off = _hash_binsearch(page, hashkey);
     320             : 
     321       75220 :         itup_offsets[i] = itup_off;
     322             : 
     323       75220 :         if (PageAddItem(page, (Item) itups[i], itemsize, itup_off, false, false)
     324             :             == InvalidOffsetNumber)
     325           0 :             elog(ERROR, "failed to add index item to \"%s\"",
     326             :                  RelationGetRelationName(rel));
     327             :     }
     328         708 : }
     329             : 
     330             : /*
     331             :  * _hash_vacuum_one_page - vacuum just one index page.
     332             :  *
     333             :  * Try to remove LP_DEAD items from the given page. We must acquire cleanup
     334             :  * lock on the page being modified before calling this function.
     335             :  */
     336             : 
     337             : static void
     338           0 : _hash_vacuum_one_page(Relation rel, Relation hrel, Buffer metabuf, Buffer buf)
     339             : {
     340             :     OffsetNumber deletable[MaxOffsetNumber];
     341           0 :     int         ndeletable = 0;
     342             :     OffsetNumber offnum,
     343             :                 maxoff;
     344           0 :     Page        page = BufferGetPage(buf);
     345             :     HashPageOpaque pageopaque;
     346             :     HashMetaPage metap;
     347             : 
     348             :     /* Scan each tuple in page to see if it is marked as LP_DEAD */
     349           0 :     maxoff = PageGetMaxOffsetNumber(page);
     350           0 :     for (offnum = FirstOffsetNumber;
     351             :          offnum <= maxoff;
     352           0 :          offnum = OffsetNumberNext(offnum))
     353             :     {
     354           0 :         ItemId      itemId = PageGetItemId(page, offnum);
     355             : 
     356           0 :         if (ItemIdIsDead(itemId))
     357           0 :             deletable[ndeletable++] = offnum;
     358             :     }
     359             : 
     360           0 :     if (ndeletable > 0)
     361             :     {
     362             :         TransactionId latestRemovedXid;
     363             : 
     364           0 :         latestRemovedXid =
     365             :             index_compute_xid_horizon_for_tuples(rel, hrel, buf,
     366             :                                                  deletable, ndeletable);
     367             : 
     368             :         /*
     369             :          * Write-lock the meta page so that we can decrement tuple count.
     370             :          */
     371           0 :         LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     372             : 
     373             :         /* No ereport(ERROR) until changes are logged */
     374           0 :         START_CRIT_SECTION();
     375             : 
     376           0 :         PageIndexMultiDelete(page, deletable, ndeletable);
     377             : 
     378             :         /*
     379             :          * Mark the page as not containing any LP_DEAD items. This is not
     380             :          * certainly true (there might be some that have recently been marked,
     381             :          * but weren't included in our target-item list), but it will almost
     382             :          * always be true and it doesn't seem worth an additional page scan to
     383             :          * check it. Remember that LH_PAGE_HAS_DEAD_TUPLES is only a hint
     384             :          * anyway.
     385             :          */
     386           0 :         pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
     387           0 :         pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
     388             : 
     389           0 :         metap = HashPageGetMeta(BufferGetPage(metabuf));
     390           0 :         metap->hashm_ntuples -= ndeletable;
     391             : 
     392           0 :         MarkBufferDirty(buf);
     393           0 :         MarkBufferDirty(metabuf);
     394             : 
     395             :         /* XLOG stuff */
     396           0 :         if (RelationNeedsWAL(rel))
     397             :         {
     398             :             xl_hash_vacuum_one_page xlrec;
     399             :             XLogRecPtr  recptr;
     400             : 
     401           0 :             xlrec.latestRemovedXid = latestRemovedXid;
     402           0 :             xlrec.ntuples = ndeletable;
     403             : 
     404           0 :             XLogBeginInsert();
     405           0 :             XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
     406           0 :             XLogRegisterData((char *) &xlrec, SizeOfHashVacuumOnePage);
     407             : 
     408             :             /*
     409             :              * We need the target-offsets array whether or not we store the
     410             :              * whole buffer, to allow us to find the latestRemovedXid on a
     411             :              * standby server.
     412             :              */
     413           0 :             XLogRegisterData((char *) deletable,
     414           0 :                              ndeletable * sizeof(OffsetNumber));
     415             : 
     416           0 :             XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
     417             : 
     418           0 :             recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_VACUUM_ONE_PAGE);
     419             : 
     420           0 :             PageSetLSN(BufferGetPage(buf), recptr);
     421           0 :             PageSetLSN(BufferGetPage(metabuf), recptr);
     422             :         }
     423             : 
     424           0 :         END_CRIT_SECTION();
     425             : 
     426             :         /*
     427             :          * Releasing write lock on meta page as we have updated the tuple
     428             :          * count.
     429             :          */
     430           0 :         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     431             :     }
     432           0 : }

Generated by: LCOV version 1.13