LCOV - code coverage report
Current view: top level - src/backend/access/hash - hash.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 93.0 % 270 251
Test Date: 2026-02-17 17:20:33 Functions: 93.3 % 15 14
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * hash.c
       4              :  *    Implementation of Margo Seltzer's Hashing package for postgres.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/access/hash/hash.c
      12              :  *
      13              :  * NOTES
      14              :  *    This file contains only the public interface routines.
      15              :  *
      16              :  *-------------------------------------------------------------------------
      17              :  */
      18              : 
      19              : #include "postgres.h"
      20              : 
      21              : #include "access/hash.h"
      22              : #include "access/hash_xlog.h"
      23              : #include "access/relscan.h"
      24              : #include "access/stratnum.h"
      25              : #include "access/tableam.h"
      26              : #include "access/xloginsert.h"
      27              : #include "commands/progress.h"
      28              : #include "commands/vacuum.h"
      29              : #include "miscadmin.h"
      30              : #include "nodes/execnodes.h"
      31              : #include "optimizer/plancat.h"
      32              : #include "pgstat.h"
      33              : #include "utils/fmgrprotos.h"
      34              : #include "utils/index_selfuncs.h"
      35              : #include "utils/rel.h"
      36              : 
      37              : /* Working state for hashbuild and its callback */
      38              : typedef struct
      39              : {
      40              :     HSpool     *spool;          /* NULL if not using spooling */
      41              :     double      indtuples;      /* # tuples accepted into index */
      42              :     Relation    heapRel;        /* heap relation descriptor */
      43              : } HashBuildState;
      44              : 
      45              : static void hashbuildCallback(Relation index,
      46              :                               ItemPointer tid,
      47              :                               Datum *values,
      48              :                               bool *isnull,
      49              :                               bool tupleIsAlive,
      50              :                               void *state);
      51              : 
      52              : 
      53              : /*
      54              :  * Hash handler function: return IndexAmRoutine with access method parameters
      55              :  * and callbacks.
      56              :  */
      57              : Datum
      58         1774 : hashhandler(PG_FUNCTION_ARGS)
      59              : {
      60              :     static const IndexAmRoutine amroutine = {
      61              :         .type = T_IndexAmRoutine,
      62              :         .amstrategies = HTMaxStrategyNumber,
      63              :         .amsupport = HASHNProcs,
      64              :         .amoptsprocnum = HASHOPTIONS_PROC,
      65              :         .amcanorder = false,
      66              :         .amcanorderbyop = false,
      67              :         .amcanhash = true,
      68              :         .amconsistentequality = true,
      69              :         .amconsistentordering = false,
      70              :         .amcanbackward = true,
      71              :         .amcanunique = false,
      72              :         .amcanmulticol = false,
      73              :         .amoptionalkey = false,
      74              :         .amsearcharray = false,
      75              :         .amsearchnulls = false,
      76              :         .amstorage = false,
      77              :         .amclusterable = false,
      78              :         .ampredlocks = true,
      79              :         .amcanparallel = false,
      80              :         .amcanbuildparallel = false,
      81              :         .amcaninclude = false,
      82              :         .amusemaintenanceworkmem = false,
      83              :         .amsummarizing = false,
      84              :         .amparallelvacuumoptions =
      85              :         VACUUM_OPTION_PARALLEL_BULKDEL,
      86              :         .amkeytype = INT4OID,
      87              : 
      88              :         .ambuild = hashbuild,
      89              :         .ambuildempty = hashbuildempty,
      90              :         .aminsert = hashinsert,
      91              :         .aminsertcleanup = NULL,
      92              :         .ambulkdelete = hashbulkdelete,
      93              :         .amvacuumcleanup = hashvacuumcleanup,
      94              :         .amcanreturn = NULL,
      95              :         .amcostestimate = hashcostestimate,
      96              :         .amgettreeheight = NULL,
      97              :         .amoptions = hashoptions,
      98              :         .amproperty = NULL,
      99              :         .ambuildphasename = NULL,
     100              :         .amvalidate = hashvalidate,
     101              :         .amadjustmembers = hashadjustmembers,
     102              :         .ambeginscan = hashbeginscan,
     103              :         .amrescan = hashrescan,
     104              :         .amgettuple = hashgettuple,
     105              :         .amgetbitmap = hashgetbitmap,
     106              :         .amendscan = hashendscan,
     107              :         .ammarkpos = NULL,
     108              :         .amrestrpos = NULL,
     109              :         .amestimateparallelscan = NULL,
     110              :         .aminitparallelscan = NULL,
     111              :         .amparallelrescan = NULL,
     112              :         .amtranslatestrategy = hashtranslatestrategy,
     113              :         .amtranslatecmptype = hashtranslatecmptype,
     114              :     };
     115              : 
     116         1774 :     PG_RETURN_POINTER(&amroutine);
     117              : }
     118              : 
     119              : /*
     120              :  *  hashbuild() -- build a new hash index.
     121              :  */
     122              : IndexBuildResult *
     123          173 : hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     124              : {
     125              :     IndexBuildResult *result;
     126              :     BlockNumber relpages;
     127              :     double      reltuples;
     128              :     double      allvisfrac;
     129              :     uint32      num_buckets;
     130              :     Size        sort_threshold;
     131              :     HashBuildState buildstate;
     132              : 
     133              :     /*
     134              :      * We expect to be called exactly once for any index relation. If that's
     135              :      * not the case, big trouble's what we have.
     136              :      */
     137          173 :     if (RelationGetNumberOfBlocks(index) != 0)
     138            0 :         elog(ERROR, "index \"%s\" already contains data",
     139              :              RelationGetRelationName(index));
     140              : 
     141              :     /* Estimate the number of rows currently present in the table */
     142          173 :     estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
     143              : 
     144              :     /* Initialize the hash index metadata page and initial buckets */
     145          173 :     num_buckets = _hash_init(index, reltuples, MAIN_FORKNUM);
     146              : 
     147              :     /*
     148              :      * If we just insert the tuples into the index in scan order, then
     149              :      * (assuming their hash codes are pretty random) there will be no locality
     150              :      * of access to the index, and if the index is bigger than available RAM
     151              :      * then we'll thrash horribly.  To prevent that scenario, we can sort the
     152              :      * tuples by (expected) bucket number.  However, such a sort is useless
     153              :      * overhead when the index does fit in RAM.  We choose to sort if the
     154              :      * initial index size exceeds maintenance_work_mem, or the number of
     155              :      * buffers usable for the index, whichever is less.  (Limiting by the
     156              :      * number of buffers should reduce thrashing between PG buffers and kernel
     157              :      * buffers, which seems useful even if no physical I/O results.  Limiting
     158              :      * by maintenance_work_mem is useful to allow easy testing of the sort
     159              :      * code path, and may be useful to DBAs as an additional control knob.)
     160              :      *
     161              :      * NOTE: this test will need adjustment if a bucket is ever different from
     162              :      * one page.  Also, "initial index size" accounting does not include the
     163              :      * metapage, nor the first bitmap page.
     164              :      */
     165          173 :     sort_threshold = (maintenance_work_mem * (Size) 1024) / BLCKSZ;
     166          173 :     if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
     167          169 :         sort_threshold = Min(sort_threshold, NBuffers);
     168              :     else
     169            4 :         sort_threshold = Min(sort_threshold, NLocBuffer);
     170              : 
     171          173 :     if (num_buckets >= sort_threshold)
     172            4 :         buildstate.spool = _h_spoolinit(heap, index, num_buckets);
     173              :     else
     174          169 :         buildstate.spool = NULL;
     175              : 
     176              :     /* prepare to build the index */
     177          173 :     buildstate.indtuples = 0;
     178          173 :     buildstate.heapRel = heap;
     179              : 
     180              :     /* do the heap scan */
     181          173 :     reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
     182              :                                        hashbuildCallback,
     183              :                                        &buildstate, NULL);
     184          173 :     pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_TOTAL,
     185          173 :                                  buildstate.indtuples);
     186              : 
     187          173 :     if (buildstate.spool)
     188              :     {
     189              :         /* sort the tuples and insert them into the index */
     190            4 :         _h_indexbuild(buildstate.spool, buildstate.heapRel);
     191            4 :         _h_spooldestroy(buildstate.spool);
     192              :     }
     193              : 
     194              :     /*
     195              :      * Return statistics
     196              :      */
     197          173 :     result = palloc_object(IndexBuildResult);
     198              : 
     199          173 :     result->heap_tuples = reltuples;
     200          173 :     result->index_tuples = buildstate.indtuples;
     201              : 
     202          173 :     return result;
     203              : }
     204              : 
     205              : /*
     206              :  *  hashbuildempty() -- build an empty hash index in the initialization fork
     207              :  */
     208              : void
     209            3 : hashbuildempty(Relation index)
     210              : {
     211            3 :     _hash_init(index, 0, INIT_FORKNUM);
     212            3 : }
     213              : 
     214              : /*
     215              :  * Per-tuple callback for table_index_build_scan
     216              :  */
     217              : static void
     218       247805 : hashbuildCallback(Relation index,
     219              :                   ItemPointer tid,
     220              :                   Datum *values,
     221              :                   bool *isnull,
     222              :                   bool tupleIsAlive,
     223              :                   void *state)
     224              : {
     225       247805 :     HashBuildState *buildstate = (HashBuildState *) state;
     226              :     Datum       index_values[1];
     227              :     bool        index_isnull[1];
     228              :     IndexTuple  itup;
     229              : 
     230              :     /* convert data to a hash key; on failure, do not insert anything */
     231       247805 :     if (!_hash_convert_tuple(index,
     232              :                              values, isnull,
     233              :                              index_values, index_isnull))
     234            0 :         return;
     235              : 
     236              :     /* Either spool the tuple for sorting, or just put it into the index */
     237       247805 :     if (buildstate->spool)
     238        60500 :         _h_spool(buildstate->spool, tid, index_values, index_isnull);
     239              :     else
     240              :     {
     241              :         /* form an index tuple and point it at the heap tuple */
     242       187305 :         itup = index_form_tuple(RelationGetDescr(index),
     243              :                                 index_values, index_isnull);
     244       187305 :         itup->t_tid = *tid;
     245       187305 :         _hash_doinsert(index, itup, buildstate->heapRel, false);
     246       187305 :         pfree(itup);
     247              :     }
     248              : 
     249       247805 :     buildstate->indtuples += 1;
     250              : }
     251              : 
     252              : /*
     253              :  *  hashinsert() -- insert an index tuple into a hash table.
     254              :  *
     255              :  *  Hash on the heap tuple's key, form an index tuple with hash code.
     256              :  *  Find the appropriate location for the new tuple, and put it there.
     257              :  */
     258              : bool
     259       115114 : hashinsert(Relation rel, Datum *values, bool *isnull,
     260              :            ItemPointer ht_ctid, Relation heapRel,
     261              :            IndexUniqueCheck checkUnique,
     262              :            bool indexUnchanged,
     263              :            IndexInfo *indexInfo)
     264              : {
     265              :     Datum       index_values[1];
     266              :     bool        index_isnull[1];
     267              :     IndexTuple  itup;
     268              : 
     269              :     /* convert data to a hash key; on failure, do not insert anything */
     270       115114 :     if (!_hash_convert_tuple(rel,
     271              :                              values, isnull,
     272              :                              index_values, index_isnull))
     273            0 :         return false;
     274              : 
     275              :     /* form an index tuple and point it at the heap tuple */
     276       115114 :     itup = index_form_tuple(RelationGetDescr(rel), index_values, index_isnull);
     277       115114 :     itup->t_tid = *ht_ctid;
     278              : 
     279       115114 :     _hash_doinsert(rel, itup, heapRel, false);
     280              : 
     281       115108 :     pfree(itup);
     282              : 
     283       115108 :     return false;
     284              : }
     285              : 
     286              : 
     287              : /*
     288              :  *  hashgettuple() -- Get the next tuple in the scan.
     289              :  */
     290              : bool
     291        50737 : hashgettuple(IndexScanDesc scan, ScanDirection dir)
     292              : {
     293        50737 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     294              :     bool        res;
     295              : 
     296              :     /* Hash indexes are always lossy since we store only the hash code */
     297        50737 :     scan->xs_recheck = true;
     298              : 
     299              :     /*
     300              :      * If we've already initialized this scan, we can just advance it in the
     301              :      * appropriate direction.  If we haven't done so yet, we call a routine to
     302              :      * get the first item in the scan.
     303              :      */
     304        50737 :     if (!HashScanPosIsValid(so->currPos))
     305          239 :         res = _hash_first(scan, dir);
     306              :     else
     307              :     {
     308              :         /*
     309              :          * Check to see if we should kill the previously-fetched tuple.
     310              :          */
     311        50498 :         if (scan->kill_prior_tuple)
     312              :         {
     313              :             /*
     314              :              * Yes, so remember it for later. (We'll deal with all such tuples
     315              :              * at once right after leaving the index page or at end of scan.)
     316              :              * In case if caller reverses the indexscan direction it is quite
     317              :              * possible that the same item might get entered multiple times.
     318              :              * But, we don't detect that; instead, we just forget any excess
     319              :              * entries.
     320              :              */
     321            1 :             if (so->killedItems == NULL)
     322            1 :                 so->killedItems = palloc_array(int, MaxIndexTuplesPerPage);
     323              : 
     324            1 :             if (so->numKilled < MaxIndexTuplesPerPage)
     325            1 :                 so->killedItems[so->numKilled++] = so->currPos.itemIndex;
     326              :         }
     327              : 
     328              :         /*
     329              :          * Now continue the scan.
     330              :          */
     331        50498 :         res = _hash_next(scan, dir);
     332              :     }
     333              : 
     334        50737 :     return res;
     335              : }
     336              : 
     337              : 
     338              : /*
     339              :  *  hashgetbitmap() -- get all tuples at once
     340              :  */
     341              : int64
     342           34 : hashgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
     343              : {
     344           34 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     345              :     bool        res;
     346           34 :     int64       ntids = 0;
     347              :     HashScanPosItem *currItem;
     348              : 
     349           34 :     res = _hash_first(scan, ForwardScanDirection);
     350              : 
     351          102 :     while (res)
     352              :     {
     353           68 :         currItem = &so->currPos.items[so->currPos.itemIndex];
     354              : 
     355              :         /*
     356              :          * _hash_first and _hash_next handle eliminate dead index entries
     357              :          * whenever scan->ignore_killed_tuples is true.  Therefore, there's
     358              :          * nothing to do here except add the results to the TIDBitmap.
     359              :          */
     360           68 :         tbm_add_tuples(tbm, &(currItem->heapTid), 1, true);
     361           68 :         ntids++;
     362              : 
     363           68 :         res = _hash_next(scan, ForwardScanDirection);
     364              :     }
     365              : 
     366           34 :     return ntids;
     367              : }
     368              : 
     369              : 
     370              : /*
     371              :  *  hashbeginscan() -- start a scan on a hash index
     372              :  */
     373              : IndexScanDesc
     374          194 : hashbeginscan(Relation rel, int nkeys, int norderbys)
     375              : {
     376              :     IndexScanDesc scan;
     377              :     HashScanOpaque so;
     378              : 
     379              :     /* no order by operators allowed */
     380              :     Assert(norderbys == 0);
     381              : 
     382          194 :     scan = RelationGetIndexScan(rel, nkeys, norderbys);
     383              : 
     384          194 :     so = (HashScanOpaque) palloc_object(HashScanOpaqueData);
     385          194 :     HashScanPosInvalidate(so->currPos);
     386          194 :     so->hashso_bucket_buf = InvalidBuffer;
     387          194 :     so->hashso_split_bucket_buf = InvalidBuffer;
     388              : 
     389          194 :     so->hashso_buc_populated = false;
     390          194 :     so->hashso_buc_split = false;
     391              : 
     392          194 :     so->killedItems = NULL;
     393          194 :     so->numKilled = 0;
     394              : 
     395          194 :     scan->opaque = so;
     396              : 
     397          194 :     return scan;
     398              : }
     399              : 
     400              : /*
     401              :  *  hashrescan() -- rescan an index relation
     402              :  */
     403              : void
     404          270 : hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
     405              :            ScanKey orderbys, int norderbys)
     406              : {
     407          270 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     408          270 :     Relation    rel = scan->indexRelation;
     409              : 
     410          270 :     if (HashScanPosIsValid(so->currPos))
     411              :     {
     412              :         /* Before leaving current page, deal with any killed items */
     413           30 :         if (so->numKilled > 0)
     414            0 :             _hash_kill_items(scan);
     415              :     }
     416              : 
     417          270 :     _hash_dropscanbuf(rel, so);
     418              : 
     419              :     /* set position invalid (this will cause _hash_first call) */
     420          270 :     HashScanPosInvalidate(so->currPos);
     421              : 
     422              :     /* Update scan key, if a new one is given */
     423          270 :     if (scankey && scan->numberOfKeys > 0)
     424          270 :         memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
     425              : 
     426          270 :     so->hashso_buc_populated = false;
     427          270 :     so->hashso_buc_split = false;
     428          270 : }
     429              : 
     430              : /*
     431              :  *  hashendscan() -- close down a scan
     432              :  */
     433              : void
     434          194 : hashendscan(IndexScanDesc scan)
     435              : {
     436          194 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     437          194 :     Relation    rel = scan->indexRelation;
     438              : 
     439          194 :     if (HashScanPosIsValid(so->currPos))
     440              :     {
     441              :         /* Before leaving current page, deal with any killed items */
     442           32 :         if (so->numKilled > 0)
     443            0 :             _hash_kill_items(scan);
     444              :     }
     445              : 
     446          194 :     _hash_dropscanbuf(rel, so);
     447              : 
     448          194 :     if (so->killedItems != NULL)
     449            1 :         pfree(so->killedItems);
     450          194 :     pfree(so);
     451          194 :     scan->opaque = NULL;
     452          194 : }
     453              : 
     454              : /*
     455              :  * Bulk deletion of all index entries pointing to a set of heap tuples.
     456              :  * The set of target tuples is specified via a callback routine that tells
     457              :  * whether any given heap tuple (identified by ItemPointer) is being deleted.
     458              :  *
     459              :  * This function also deletes the tuples that are moved by split to other
     460              :  * bucket.
     461              :  *
     462              :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     463              :  */
     464              : IndexBulkDeleteResult *
     465           23 : hashbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     466              :                IndexBulkDeleteCallback callback, void *callback_state)
     467              : {
     468           23 :     Relation    rel = info->index;
     469              :     double      tuples_removed;
     470              :     double      num_index_tuples;
     471              :     double      orig_ntuples;
     472              :     Bucket      orig_maxbucket;
     473              :     Bucket      cur_maxbucket;
     474              :     Bucket      cur_bucket;
     475           23 :     Buffer      metabuf = InvalidBuffer;
     476              :     HashMetaPage metap;
     477              :     HashMetaPage cachedmetap;
     478              : 
     479           23 :     tuples_removed = 0;
     480           23 :     num_index_tuples = 0;
     481              : 
     482              :     /*
     483              :      * We need a copy of the metapage so that we can use its hashm_spares[]
     484              :      * values to compute bucket page addresses, but a cached copy should be
     485              :      * good enough.  (If not, we'll detect that further down and refresh the
     486              :      * cache as necessary.)
     487              :      */
     488           23 :     cachedmetap = _hash_getcachedmetap(rel, &metabuf, false);
     489              :     Assert(cachedmetap != NULL);
     490              : 
     491           23 :     orig_maxbucket = cachedmetap->hashm_maxbucket;
     492           23 :     orig_ntuples = cachedmetap->hashm_ntuples;
     493              : 
     494              :     /* Scan the buckets that we know exist */
     495           23 :     cur_bucket = 0;
     496           23 :     cur_maxbucket = orig_maxbucket;
     497              : 
     498           23 : loop_top:
     499          465 :     while (cur_bucket <= cur_maxbucket)
     500              :     {
     501              :         BlockNumber bucket_blkno;
     502              :         BlockNumber blkno;
     503              :         Buffer      bucket_buf;
     504              :         Buffer      buf;
     505              :         HashPageOpaque bucket_opaque;
     506              :         Page        page;
     507          442 :         bool        split_cleanup = false;
     508              : 
     509              :         /* Get address of bucket's start page */
     510          442 :         bucket_blkno = BUCKET_TO_BLKNO(cachedmetap, cur_bucket);
     511              : 
     512          442 :         blkno = bucket_blkno;
     513              : 
     514              :         /*
     515              :          * We need to acquire a cleanup lock on the primary bucket page to out
     516              :          * wait concurrent scans before deleting the dead tuples.
     517              :          */
     518          442 :         buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy);
     519          442 :         LockBufferForCleanup(buf);
     520          442 :         _hash_checkpage(rel, buf, LH_BUCKET_PAGE);
     521              : 
     522          442 :         page = BufferGetPage(buf);
     523          442 :         bucket_opaque = HashPageGetOpaque(page);
     524              : 
     525              :         /*
     526              :          * If the bucket contains tuples that are moved by split, then we need
     527              :          * to delete such tuples.  We can't delete such tuples if the split
     528              :          * operation on bucket is not finished as those are needed by scans.
     529              :          */
     530          442 :         if (!H_BUCKET_BEING_SPLIT(bucket_opaque) &&
     531          442 :             H_NEEDS_SPLIT_CLEANUP(bucket_opaque))
     532              :         {
     533            0 :             split_cleanup = true;
     534              : 
     535              :             /*
     536              :              * This bucket might have been split since we last held a lock on
     537              :              * the metapage.  If so, hashm_maxbucket, hashm_highmask and
     538              :              * hashm_lowmask might be old enough to cause us to fail to remove
     539              :              * tuples left behind by the most recent split.  To prevent that,
     540              :              * now that the primary page of the target bucket has been locked
     541              :              * (and thus can't be further split), check whether we need to
     542              :              * update our cached metapage data.
     543              :              */
     544              :             Assert(bucket_opaque->hasho_prevblkno != InvalidBlockNumber);
     545            0 :             if (bucket_opaque->hasho_prevblkno > cachedmetap->hashm_maxbucket)
     546              :             {
     547            0 :                 cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
     548              :                 Assert(cachedmetap != NULL);
     549              :             }
     550              :         }
     551              : 
     552          442 :         bucket_buf = buf;
     553              : 
     554          442 :         hashbucketcleanup(rel, cur_bucket, bucket_buf, blkno, info->strategy,
     555              :                           cachedmetap->hashm_maxbucket,
     556              :                           cachedmetap->hashm_highmask,
     557              :                           cachedmetap->hashm_lowmask, &tuples_removed,
     558              :                           &num_index_tuples, split_cleanup,
     559              :                           callback, callback_state);
     560              : 
     561          442 :         _hash_dropbuf(rel, bucket_buf);
     562              : 
     563              :         /* Advance to next bucket */
     564          442 :         cur_bucket++;
     565              :     }
     566              : 
     567           23 :     if (BufferIsInvalid(metabuf))
     568           15 :         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
     569              : 
     570              :     /* Write-lock metapage and check for split since we started */
     571           23 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     572           23 :     metap = HashPageGetMeta(BufferGetPage(metabuf));
     573              : 
     574           23 :     if (cur_maxbucket != metap->hashm_maxbucket)
     575              :     {
     576              :         /* There's been a split, so process the additional bucket(s) */
     577            0 :         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     578            0 :         cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
     579              :         Assert(cachedmetap != NULL);
     580            0 :         cur_maxbucket = cachedmetap->hashm_maxbucket;
     581            0 :         goto loop_top;
     582              :     }
     583              : 
     584              :     /* Okay, we're really done.  Update tuple count in metapage. */
     585           23 :     START_CRIT_SECTION();
     586              : 
     587           23 :     if (orig_maxbucket == metap->hashm_maxbucket &&
     588           23 :         orig_ntuples == metap->hashm_ntuples)
     589              :     {
     590              :         /*
     591              :          * No one has split or inserted anything since start of scan, so
     592              :          * believe our count as gospel.
     593              :          */
     594            8 :         metap->hashm_ntuples = num_index_tuples;
     595              :     }
     596              :     else
     597              :     {
     598              :         /*
     599              :          * Otherwise, our count is untrustworthy since we may have
     600              :          * double-scanned tuples in split buckets.  Proceed by dead-reckoning.
     601              :          * (Note: we still return estimated_count = false, because using this
     602              :          * count is better than not updating reltuples at all.)
     603              :          */
     604           15 :         if (metap->hashm_ntuples > tuples_removed)
     605           13 :             metap->hashm_ntuples -= tuples_removed;
     606              :         else
     607            2 :             metap->hashm_ntuples = 0;
     608           15 :         num_index_tuples = metap->hashm_ntuples;
     609              :     }
     610              : 
     611           23 :     MarkBufferDirty(metabuf);
     612              : 
     613              :     /* XLOG stuff */
     614           23 :     if (RelationNeedsWAL(rel))
     615              :     {
     616              :         xl_hash_update_meta_page xlrec;
     617              :         XLogRecPtr  recptr;
     618              : 
     619           23 :         xlrec.ntuples = metap->hashm_ntuples;
     620              : 
     621           23 :         XLogBeginInsert();
     622           23 :         XLogRegisterData(&xlrec, SizeOfHashUpdateMetaPage);
     623              : 
     624           23 :         XLogRegisterBuffer(0, metabuf, REGBUF_STANDARD);
     625              : 
     626           23 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_UPDATE_META_PAGE);
     627           23 :         PageSetLSN(BufferGetPage(metabuf), recptr);
     628              :     }
     629              : 
     630           23 :     END_CRIT_SECTION();
     631              : 
     632           23 :     _hash_relbuf(rel, metabuf);
     633              : 
     634              :     /* return statistics */
     635           23 :     if (stats == NULL)
     636           23 :         stats = palloc0_object(IndexBulkDeleteResult);
     637           23 :     stats->estimated_count = false;
     638           23 :     stats->num_index_tuples = num_index_tuples;
     639           23 :     stats->tuples_removed += tuples_removed;
     640              :     /* hashvacuumcleanup will fill in num_pages */
     641              : 
     642           23 :     return stats;
     643              : }
     644              : 
     645              : /*
     646              :  * Post-VACUUM cleanup.
     647              :  *
     648              :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     649              :  */
     650              : IndexBulkDeleteResult *
     651           34 : hashvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
     652              : {
     653           34 :     Relation    rel = info->index;
     654              :     BlockNumber num_pages;
     655              : 
     656              :     /* If hashbulkdelete wasn't called, return NULL signifying no change */
     657              :     /* Note: this covers the analyze_only case too */
     658           34 :     if (stats == NULL)
     659           11 :         return NULL;
     660              : 
     661              :     /* update statistics */
     662           23 :     num_pages = RelationGetNumberOfBlocks(rel);
     663           23 :     stats->num_pages = num_pages;
     664              : 
     665           23 :     return stats;
     666              : }
     667              : 
     668              : /*
     669              :  * Helper function to perform deletion of index entries from a bucket.
     670              :  *
     671              :  * This function expects that the caller has acquired a cleanup lock on the
     672              :  * primary bucket page, and will return with a write lock again held on the
     673              :  * primary bucket page.  The lock won't necessarily be held continuously,
     674              :  * though, because we'll release it when visiting overflow pages.
     675              :  *
     676              :  * There can't be any concurrent scans in progress when we first enter this
     677              :  * function because of the cleanup lock we hold on the primary bucket page,
     678              :  * but as soon as we release that lock, there might be.  If those scans got
     679              :  * ahead of our cleanup scan, they might see a tuple before we kill it and
     680              :  * wake up only after VACUUM has completed and the TID has been recycled for
     681              :  * an unrelated tuple.  To avoid that calamity, we prevent scans from passing
     682              :  * our cleanup scan by locking the next page in the bucket chain before
     683              :  * releasing the lock on the previous page.  (This type of lock chaining is not
     684              :  * ideal, so we might want to look for a better solution at some point.)
     685              :  *
     686              :  * We need to retain a pin on the primary bucket to ensure that no concurrent
     687              :  * split can start.
     688              :  */
     689              : void
     690         1111 : hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
     691              :                   BlockNumber bucket_blkno, BufferAccessStrategy bstrategy,
     692              :                   uint32 maxbucket, uint32 highmask, uint32 lowmask,
     693              :                   double *tuples_removed, double *num_index_tuples,
     694              :                   bool split_cleanup,
     695              :                   IndexBulkDeleteCallback callback, void *callback_state)
     696              : {
     697              :     BlockNumber blkno;
     698              :     Buffer      buf;
     699         1111 :     Bucket      new_bucket PG_USED_FOR_ASSERTS_ONLY = InvalidBucket;
     700         1111 :     bool        bucket_dirty = false;
     701              : 
     702         1111 :     blkno = bucket_blkno;
     703         1111 :     buf = bucket_buf;
     704              : 
     705         1111 :     if (split_cleanup)
     706          669 :         new_bucket = _hash_get_newbucket_from_oldbucket(rel, cur_bucket,
     707              :                                                         lowmask, maxbucket);
     708              : 
     709              :     /* Scan each page in bucket */
     710              :     for (;;)
     711          211 :     {
     712              :         HashPageOpaque opaque;
     713              :         OffsetNumber offno;
     714              :         OffsetNumber maxoffno;
     715              :         Buffer      next_buf;
     716              :         Page        page;
     717              :         OffsetNumber deletable[MaxOffsetNumber];
     718         1322 :         int         ndeletable = 0;
     719         1322 :         bool        retain_pin = false;
     720         1322 :         bool        clear_dead_marking = false;
     721              : 
     722         1322 :         vacuum_delay_point(false);
     723              : 
     724         1322 :         page = BufferGetPage(buf);
     725         1322 :         opaque = HashPageGetOpaque(page);
     726              : 
     727              :         /* Scan each tuple in page */
     728         1322 :         maxoffno = PageGetMaxOffsetNumber(page);
     729         1322 :         for (offno = FirstOffsetNumber;
     730       241815 :              offno <= maxoffno;
     731       240493 :              offno = OffsetNumberNext(offno))
     732              :         {
     733              :             ItemPointer htup;
     734              :             IndexTuple  itup;
     735              :             Bucket      bucket;
     736       240493 :             bool        kill_tuple = false;
     737              : 
     738       240493 :             itup = (IndexTuple) PageGetItem(page,
     739       240493 :                                             PageGetItemId(page, offno));
     740       240493 :             htup = &(itup->t_tid);
     741              : 
     742              :             /*
     743              :              * To remove the dead tuples, we strictly want to rely on results
     744              :              * of callback function.  refer btvacuumpage for detailed reason.
     745              :              */
     746       240493 :             if (callback && callback(htup, callback_state))
     747              :             {
     748        15943 :                 kill_tuple = true;
     749        15943 :                 if (tuples_removed)
     750        15943 :                     *tuples_removed += 1;
     751              :             }
     752       224550 :             else if (split_cleanup)
     753              :             {
     754              :                 /* delete the tuples that are moved by split. */
     755       153804 :                 bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
     756              :                                               maxbucket,
     757              :                                               highmask,
     758              :                                               lowmask);
     759              :                 /* mark the item for deletion */
     760       153804 :                 if (bucket != cur_bucket)
     761              :                 {
     762              :                     /*
     763              :                      * We expect tuples to either belong to current bucket or
     764              :                      * new_bucket.  This is ensured because we don't allow
     765              :                      * further splits from bucket that contains garbage. See
     766              :                      * comments in _hash_expandtable.
     767              :                      */
     768              :                     Assert(bucket == new_bucket);
     769        63079 :                     kill_tuple = true;
     770              :                 }
     771              :             }
     772              : 
     773       240493 :             if (kill_tuple)
     774              :             {
     775              :                 /* mark the item for deletion */
     776        79022 :                 deletable[ndeletable++] = offno;
     777              :             }
     778              :             else
     779              :             {
     780              :                 /* we're keeping it, so count it */
     781       161471 :                 if (num_index_tuples)
     782        70746 :                     *num_index_tuples += 1;
     783              :             }
     784              :         }
     785              : 
     786              :         /* retain the pin on primary bucket page till end of bucket scan */
     787         1322 :         if (blkno == bucket_blkno)
     788         1111 :             retain_pin = true;
     789              :         else
     790          211 :             retain_pin = false;
     791              : 
     792         1322 :         blkno = opaque->hasho_nextblkno;
     793              : 
     794              :         /*
     795              :          * Apply deletions, advance to next page and write page if needed.
     796              :          */
     797         1322 :         if (ndeletable > 0)
     798              :         {
     799              :             /* No ereport(ERROR) until changes are logged */
     800          779 :             START_CRIT_SECTION();
     801              : 
     802          779 :             PageIndexMultiDelete(page, deletable, ndeletable);
     803          779 :             bucket_dirty = true;
     804              : 
     805              :             /*
     806              :              * Let us mark the page as clean if vacuum removes the DEAD tuples
     807              :              * from an index page. We do this by clearing
     808              :              * LH_PAGE_HAS_DEAD_TUPLES flag.
     809              :              */
     810          779 :             if (tuples_removed && *tuples_removed > 0 &&
     811           81 :                 H_HAS_DEAD_TUPLES(opaque))
     812              :             {
     813            0 :                 opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
     814            0 :                 clear_dead_marking = true;
     815              :             }
     816              : 
     817          779 :             MarkBufferDirty(buf);
     818              : 
     819              :             /* XLOG stuff */
     820          779 :             if (RelationNeedsWAL(rel))
     821              :             {
     822              :                 xl_hash_delete xlrec;
     823              :                 XLogRecPtr  recptr;
     824              : 
     825          779 :                 xlrec.clear_dead_marking = clear_dead_marking;
     826          779 :                 xlrec.is_primary_bucket_page = (buf == bucket_buf);
     827              : 
     828          779 :                 XLogBeginInsert();
     829          779 :                 XLogRegisterData(&xlrec, SizeOfHashDelete);
     830              : 
     831              :                 /*
     832              :                  * bucket buffer was not changed, but still needs to be
     833              :                  * registered to ensure that we can acquire a cleanup lock on
     834              :                  * it during replay.
     835              :                  */
     836          779 :                 if (!xlrec.is_primary_bucket_page)
     837              :                 {
     838           98 :                     uint8       flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
     839              : 
     840           98 :                     XLogRegisterBuffer(0, bucket_buf, flags);
     841              :                 }
     842              : 
     843          779 :                 XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
     844          779 :                 XLogRegisterBufData(1, deletable,
     845              :                                     ndeletable * sizeof(OffsetNumber));
     846              : 
     847          779 :                 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_DELETE);
     848          779 :                 PageSetLSN(BufferGetPage(buf), recptr);
     849              :             }
     850              : 
     851          779 :             END_CRIT_SECTION();
     852              :         }
     853              : 
     854              :         /* bail out if there are no more pages to scan. */
     855         1322 :         if (!BlockNumberIsValid(blkno))
     856         1111 :             break;
     857              : 
     858          211 :         next_buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
     859              :                                               LH_OVERFLOW_PAGE,
     860              :                                               bstrategy);
     861              : 
     862              :         /*
     863              :          * release the lock on previous page after acquiring the lock on next
     864              :          * page
     865              :          */
     866          211 :         if (retain_pin)
     867           40 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     868              :         else
     869          171 :             _hash_relbuf(rel, buf);
     870              : 
     871          211 :         buf = next_buf;
     872              :     }
     873              : 
     874              :     /*
     875              :      * lock the bucket page to clear the garbage flag and squeeze the bucket.
     876              :      * if the current buffer is same as bucket buffer, then we already have
     877              :      * lock on bucket page.
     878              :      */
     879         1111 :     if (buf != bucket_buf)
     880              :     {
     881           40 :         _hash_relbuf(rel, buf);
     882           40 :         LockBuffer(bucket_buf, BUFFER_LOCK_EXCLUSIVE);
     883              :     }
     884              : 
     885              :     /*
     886              :      * Clear the garbage flag from bucket after deleting the tuples that are
     887              :      * moved by split.  We purposefully clear the flag before squeeze bucket,
     888              :      * so that after restart, vacuum shouldn't again try to delete the moved
     889              :      * by split tuples.
     890              :      */
     891         1111 :     if (split_cleanup)
     892              :     {
     893              :         HashPageOpaque bucket_opaque;
     894              :         Page        page;
     895              : 
     896          669 :         page = BufferGetPage(bucket_buf);
     897          669 :         bucket_opaque = HashPageGetOpaque(page);
     898              : 
     899              :         /* No ereport(ERROR) until changes are logged */
     900          669 :         START_CRIT_SECTION();
     901              : 
     902          669 :         bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
     903          669 :         MarkBufferDirty(bucket_buf);
     904              : 
     905              :         /* XLOG stuff */
     906          669 :         if (RelationNeedsWAL(rel))
     907              :         {
     908              :             XLogRecPtr  recptr;
     909              : 
     910          669 :             XLogBeginInsert();
     911          669 :             XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD);
     912              : 
     913          669 :             recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_CLEANUP);
     914          669 :             PageSetLSN(page, recptr);
     915              :         }
     916              : 
     917          669 :         END_CRIT_SECTION();
     918              :     }
     919              : 
     920              :     /*
     921              :      * If we have deleted anything, try to compact free space.  For squeezing
     922              :      * the bucket, we must have a cleanup lock, else it can impact the
     923              :      * ordering of tuples for a scan that has started before it.
     924              :      */
     925         1111 :     if (bucket_dirty && IsBufferCleanupOK(bucket_buf))
     926          693 :         _hash_squeezebucket(rel, cur_bucket, bucket_blkno, bucket_buf,
     927              :                             bstrategy);
     928              :     else
     929          418 :         LockBuffer(bucket_buf, BUFFER_LOCK_UNLOCK);
     930         1111 : }
     931              : 
     932              : CompareType
     933            0 : hashtranslatestrategy(StrategyNumber strategy, Oid opfamily)
     934              : {
     935            0 :     if (strategy == HTEqualStrategyNumber)
     936            0 :         return COMPARE_EQ;
     937            0 :     return COMPARE_INVALID;
     938              : }
     939              : 
     940              : StrategyNumber
     941            6 : hashtranslatecmptype(CompareType cmptype, Oid opfamily)
     942              : {
     943            6 :     if (cmptype == COMPARE_EQ)
     944            6 :         return HTEqualStrategyNumber;
     945            0 :     return InvalidStrategy;
     946              : }
        

Generated by: LCOV version 2.0-1