LCOV - code coverage report
Current view: top level - src/backend/access/hash - hash.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 306 321 95.3 %
Date: 2025-12-22 17:18:44 Functions: 14 15 93.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * hash.c
       4             :  *    Implementation of Margo Seltzer's Hashing package for postgres.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/hash/hash.c
      12             :  *
      13             :  * NOTES
      14             :  *    This file contains only the public interface routines.
      15             :  *
      16             :  *-------------------------------------------------------------------------
      17             :  */
      18             : 
      19             : #include "postgres.h"
      20             : 
      21             : #include "access/hash.h"
      22             : #include "access/hash_xlog.h"
      23             : #include "access/relscan.h"
      24             : #include "access/stratnum.h"
      25             : #include "access/tableam.h"
      26             : #include "access/xloginsert.h"
      27             : #include "commands/progress.h"
      28             : #include "commands/vacuum.h"
      29             : #include "miscadmin.h"
      30             : #include "nodes/execnodes.h"
      31             : #include "optimizer/plancat.h"
      32             : #include "pgstat.h"
      33             : #include "utils/fmgrprotos.h"
      34             : #include "utils/index_selfuncs.h"
      35             : #include "utils/rel.h"
      36             : 
      37             : /* Working state for hashbuild and its callback */
      38             : typedef struct
      39             : {
      40             :     HSpool     *spool;          /* NULL if not using spooling */
      41             :     double      indtuples;      /* # tuples accepted into index */
      42             :     Relation    heapRel;        /* heap relation descriptor */
      43             : } HashBuildState;
      44             : 
      45             : static void hashbuildCallback(Relation index,
      46             :                               ItemPointer tid,
      47             :                               Datum *values,
      48             :                               bool *isnull,
      49             :                               bool tupleIsAlive,
      50             :                               void *state);
      51             : 
      52             : 
      53             : /*
      54             :  * Hash handler function: return IndexAmRoutine with access method parameters
      55             :  * and callbacks.
      56             :  */
      57             : Datum
      58        3508 : hashhandler(PG_FUNCTION_ARGS)
      59             : {
      60        3508 :     IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
      61             : 
      62        3508 :     amroutine->amstrategies = HTMaxStrategyNumber;
      63        3508 :     amroutine->amsupport = HASHNProcs;
      64        3508 :     amroutine->amoptsprocnum = HASHOPTIONS_PROC;
      65        3508 :     amroutine->amcanorder = false;
      66        3508 :     amroutine->amcanorderbyop = false;
      67        3508 :     amroutine->amcanhash = true;
      68        3508 :     amroutine->amconsistentequality = true;
      69        3508 :     amroutine->amconsistentordering = false;
      70        3508 :     amroutine->amcanbackward = true;
      71        3508 :     amroutine->amcanunique = false;
      72        3508 :     amroutine->amcanmulticol = false;
      73        3508 :     amroutine->amoptionalkey = false;
      74        3508 :     amroutine->amsearcharray = false;
      75        3508 :     amroutine->amsearchnulls = false;
      76        3508 :     amroutine->amstorage = false;
      77        3508 :     amroutine->amclusterable = false;
      78        3508 :     amroutine->ampredlocks = true;
      79        3508 :     amroutine->amcanparallel = false;
      80        3508 :     amroutine->amcanbuildparallel = false;
      81        3508 :     amroutine->amcaninclude = false;
      82        3508 :     amroutine->amusemaintenanceworkmem = false;
      83        3508 :     amroutine->amsummarizing = false;
      84        3508 :     amroutine->amparallelvacuumoptions =
      85             :         VACUUM_OPTION_PARALLEL_BULKDEL;
      86        3508 :     amroutine->amkeytype = INT4OID;
      87             : 
      88        3508 :     amroutine->ambuild = hashbuild;
      89        3508 :     amroutine->ambuildempty = hashbuildempty;
      90        3508 :     amroutine->aminsert = hashinsert;
      91        3508 :     amroutine->aminsertcleanup = NULL;
      92        3508 :     amroutine->ambulkdelete = hashbulkdelete;
      93        3508 :     amroutine->amvacuumcleanup = hashvacuumcleanup;
      94        3508 :     amroutine->amcanreturn = NULL;
      95        3508 :     amroutine->amcostestimate = hashcostestimate;
      96        3508 :     amroutine->amgettreeheight = NULL;
      97        3508 :     amroutine->amoptions = hashoptions;
      98        3508 :     amroutine->amproperty = NULL;
      99        3508 :     amroutine->ambuildphasename = NULL;
     100        3508 :     amroutine->amvalidate = hashvalidate;
     101        3508 :     amroutine->amadjustmembers = hashadjustmembers;
     102        3508 :     amroutine->ambeginscan = hashbeginscan;
     103        3508 :     amroutine->amrescan = hashrescan;
     104        3508 :     amroutine->amgettuple = hashgettuple;
     105        3508 :     amroutine->amgetbitmap = hashgetbitmap;
     106        3508 :     amroutine->amendscan = hashendscan;
     107        3508 :     amroutine->ammarkpos = NULL;
     108        3508 :     amroutine->amrestrpos = NULL;
     109        3508 :     amroutine->amestimateparallelscan = NULL;
     110        3508 :     amroutine->aminitparallelscan = NULL;
     111        3508 :     amroutine->amparallelrescan = NULL;
     112        3508 :     amroutine->amtranslatestrategy = hashtranslatestrategy;
     113        3508 :     amroutine->amtranslatecmptype = hashtranslatecmptype;
     114             : 
     115        3508 :     PG_RETURN_POINTER(amroutine);
     116             : }
     117             : 
     118             : /*
     119             :  *  hashbuild() -- build a new hash index.
     120             :  */
     121             : IndexBuildResult *
     122         340 : hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     123             : {
     124             :     IndexBuildResult *result;
     125             :     BlockNumber relpages;
     126             :     double      reltuples;
     127             :     double      allvisfrac;
     128             :     uint32      num_buckets;
     129             :     Size        sort_threshold;
     130             :     HashBuildState buildstate;
     131             : 
     132             :     /*
     133             :      * We expect to be called exactly once for any index relation. If that's
     134             :      * not the case, big trouble's what we have.
     135             :      */
     136         340 :     if (RelationGetNumberOfBlocks(index) != 0)
     137           0 :         elog(ERROR, "index \"%s\" already contains data",
     138             :              RelationGetRelationName(index));
     139             : 
     140             :     /* Estimate the number of rows currently present in the table */
     141         340 :     estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
     142             : 
     143             :     /* Initialize the hash index metadata page and initial buckets */
     144         340 :     num_buckets = _hash_init(index, reltuples, MAIN_FORKNUM);
     145             : 
     146             :     /*
     147             :      * If we just insert the tuples into the index in scan order, then
     148             :      * (assuming their hash codes are pretty random) there will be no locality
     149             :      * of access to the index, and if the index is bigger than available RAM
     150             :      * then we'll thrash horribly.  To prevent that scenario, we can sort the
     151             :      * tuples by (expected) bucket number.  However, such a sort is useless
     152             :      * overhead when the index does fit in RAM.  We choose to sort if the
     153             :      * initial index size exceeds maintenance_work_mem, or the number of
     154             :      * buffers usable for the index, whichever is less.  (Limiting by the
     155             :      * number of buffers should reduce thrashing between PG buffers and kernel
     156             :      * buffers, which seems useful even if no physical I/O results.  Limiting
     157             :      * by maintenance_work_mem is useful to allow easy testing of the sort
     158             :      * code path, and may be useful to DBAs as an additional control knob.)
     159             :      *
     160             :      * NOTE: this test will need adjustment if a bucket is ever different from
     161             :      * one page.  Also, "initial index size" accounting does not include the
     162             :      * metapage, nor the first bitmap page.
     163             :      */
     164         340 :     sort_threshold = (maintenance_work_mem * (Size) 1024) / BLCKSZ;
     165         340 :     if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
     166         332 :         sort_threshold = Min(sort_threshold, NBuffers);
     167             :     else
     168           8 :         sort_threshold = Min(sort_threshold, NLocBuffer);
     169             : 
     170         340 :     if (num_buckets >= sort_threshold)
     171           8 :         buildstate.spool = _h_spoolinit(heap, index, num_buckets);
     172             :     else
     173         332 :         buildstate.spool = NULL;
     174             : 
     175             :     /* prepare to build the index */
     176         340 :     buildstate.indtuples = 0;
     177         340 :     buildstate.heapRel = heap;
     178             : 
     179             :     /* do the heap scan */
     180         340 :     reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
     181             :                                        hashbuildCallback,
     182             :                                        &buildstate, NULL);
     183         340 :     pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_TOTAL,
     184         340 :                                  buildstate.indtuples);
     185             : 
     186         340 :     if (buildstate.spool)
     187             :     {
     188             :         /* sort the tuples and insert them into the index */
     189           8 :         _h_indexbuild(buildstate.spool, buildstate.heapRel);
     190           8 :         _h_spooldestroy(buildstate.spool);
     191             :     }
     192             : 
     193             :     /*
     194             :      * Return statistics
     195             :      */
     196         340 :     result = palloc_object(IndexBuildResult);
     197             : 
     198         340 :     result->heap_tuples = reltuples;
     199         340 :     result->index_tuples = buildstate.indtuples;
     200             : 
     201         340 :     return result;
     202             : }
     203             : 
     204             : /*
     205             :  *  hashbuildempty() -- build an empty hash index in the initialization fork
     206             :  */
     207             : void
     208           6 : hashbuildempty(Relation index)
     209             : {
     210           6 :     _hash_init(index, 0, INIT_FORKNUM);
     211           6 : }
     212             : 
     213             : /*
     214             :  * Per-tuple callback for table_index_build_scan
     215             :  */
     216             : static void
     217      484550 : hashbuildCallback(Relation index,
     218             :                   ItemPointer tid,
     219             :                   Datum *values,
     220             :                   bool *isnull,
     221             :                   bool tupleIsAlive,
     222             :                   void *state)
     223             : {
     224      484550 :     HashBuildState *buildstate = (HashBuildState *) state;
     225             :     Datum       index_values[1];
     226             :     bool        index_isnull[1];
     227             :     IndexTuple  itup;
     228             : 
     229             :     /* convert data to a hash key; on failure, do not insert anything */
     230      484550 :     if (!_hash_convert_tuple(index,
     231             :                              values, isnull,
     232             :                              index_values, index_isnull))
     233           0 :         return;
     234             : 
     235             :     /* Either spool the tuple for sorting, or just put it into the index */
     236      484550 :     if (buildstate->spool)
     237      110000 :         _h_spool(buildstate->spool, tid, index_values, index_isnull);
     238             :     else
     239             :     {
     240             :         /* form an index tuple and point it at the heap tuple */
     241      374550 :         itup = index_form_tuple(RelationGetDescr(index),
     242             :                                 index_values, index_isnull);
     243      374550 :         itup->t_tid = *tid;
     244      374550 :         _hash_doinsert(index, itup, buildstate->heapRel, false);
     245      374550 :         pfree(itup);
     246             :     }
     247             : 
     248      484550 :     buildstate->indtuples += 1;
     249             : }
     250             : 
     251             : /*
     252             :  *  hashinsert() -- insert an index tuple into a hash table.
     253             :  *
     254             :  *  Hash on the heap tuple's key, form an index tuple with hash code.
     255             :  *  Find the appropriate location for the new tuple, and put it there.
     256             :  */
     257             : bool
     258      230228 : hashinsert(Relation rel, Datum *values, bool *isnull,
     259             :            ItemPointer ht_ctid, Relation heapRel,
     260             :            IndexUniqueCheck checkUnique,
     261             :            bool indexUnchanged,
     262             :            IndexInfo *indexInfo)
     263             : {
     264             :     Datum       index_values[1];
     265             :     bool        index_isnull[1];
     266             :     IndexTuple  itup;
     267             : 
     268             :     /* convert data to a hash key; on failure, do not insert anything */
     269      230228 :     if (!_hash_convert_tuple(rel,
     270             :                              values, isnull,
     271             :                              index_values, index_isnull))
     272           0 :         return false;
     273             : 
     274             :     /* form an index tuple and point it at the heap tuple */
     275      230228 :     itup = index_form_tuple(RelationGetDescr(rel), index_values, index_isnull);
     276      230228 :     itup->t_tid = *ht_ctid;
     277             : 
     278      230228 :     _hash_doinsert(rel, itup, heapRel, false);
     279             : 
     280      230216 :     pfree(itup);
     281             : 
     282      230216 :     return false;
     283             : }
     284             : 
     285             : 
     286             : /*
     287             :  *  hashgettuple() -- Get the next tuple in the scan.
     288             :  */
     289             : bool
     290      101474 : hashgettuple(IndexScanDesc scan, ScanDirection dir)
     291             : {
     292      101474 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     293             :     bool        res;
     294             : 
     295             :     /* Hash indexes are always lossy since we store only the hash code */
     296      101474 :     scan->xs_recheck = true;
     297             : 
     298             :     /*
     299             :      * If we've already initialized this scan, we can just advance it in the
     300             :      * appropriate direction.  If we haven't done so yet, we call a routine to
     301             :      * get the first item in the scan.
     302             :      */
     303      101474 :     if (!HashScanPosIsValid(so->currPos))
     304         478 :         res = _hash_first(scan, dir);
     305             :     else
     306             :     {
     307             :         /*
     308             :          * Check to see if we should kill the previously-fetched tuple.
     309             :          */
     310      100996 :         if (scan->kill_prior_tuple)
     311             :         {
     312             :             /*
     313             :              * Yes, so remember it for later. (We'll deal with all such tuples
     314             :              * at once right after leaving the index page or at end of scan.)
     315             :              * In case if caller reverses the indexscan direction it is quite
     316             :              * possible that the same item might get entered multiple times.
     317             :              * But, we don't detect that; instead, we just forget any excess
     318             :              * entries.
     319             :              */
     320           2 :             if (so->killedItems == NULL)
     321           2 :                 so->killedItems = palloc_array(int, MaxIndexTuplesPerPage);
     322             : 
     323           2 :             if (so->numKilled < MaxIndexTuplesPerPage)
     324           2 :                 so->killedItems[so->numKilled++] = so->currPos.itemIndex;
     325             :         }
     326             : 
     327             :         /*
     328             :          * Now continue the scan.
     329             :          */
     330      100996 :         res = _hash_next(scan, dir);
     331             :     }
     332             : 
     333      101474 :     return res;
     334             : }
     335             : 
     336             : 
     337             : /*
     338             :  *  hashgetbitmap() -- get all tuples at once
     339             :  */
     340             : int64
     341          68 : hashgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
     342             : {
     343          68 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     344             :     bool        res;
     345          68 :     int64       ntids = 0;
     346             :     HashScanPosItem *currItem;
     347             : 
     348          68 :     res = _hash_first(scan, ForwardScanDirection);
     349             : 
     350         204 :     while (res)
     351             :     {
     352         136 :         currItem = &so->currPos.items[so->currPos.itemIndex];
     353             : 
     354             :         /*
     355             :          * _hash_first and _hash_next handle eliminate dead index entries
     356             :          * whenever scan->ignore_killed_tuples is true.  Therefore, there's
     357             :          * nothing to do here except add the results to the TIDBitmap.
     358             :          */
     359         136 :         tbm_add_tuples(tbm, &(currItem->heapTid), 1, true);
     360         136 :         ntids++;
     361             : 
     362         136 :         res = _hash_next(scan, ForwardScanDirection);
     363             :     }
     364             : 
     365          68 :     return ntids;
     366             : }
     367             : 
     368             : 
     369             : /*
     370             :  *  hashbeginscan() -- start a scan on a hash index
     371             :  */
     372             : IndexScanDesc
     373         388 : hashbeginscan(Relation rel, int nkeys, int norderbys)
     374             : {
     375             :     IndexScanDesc scan;
     376             :     HashScanOpaque so;
     377             : 
     378             :     /* no order by operators allowed */
     379             :     Assert(norderbys == 0);
     380             : 
     381         388 :     scan = RelationGetIndexScan(rel, nkeys, norderbys);
     382             : 
     383         388 :     so = (HashScanOpaque) palloc_object(HashScanOpaqueData);
     384         388 :     HashScanPosInvalidate(so->currPos);
     385         388 :     so->hashso_bucket_buf = InvalidBuffer;
     386         388 :     so->hashso_split_bucket_buf = InvalidBuffer;
     387             : 
     388         388 :     so->hashso_buc_populated = false;
     389         388 :     so->hashso_buc_split = false;
     390             : 
     391         388 :     so->killedItems = NULL;
     392         388 :     so->numKilled = 0;
     393             : 
     394         388 :     scan->opaque = so;
     395             : 
     396         388 :     return scan;
     397             : }
     398             : 
     399             : /*
     400             :  *  hashrescan() -- rescan an index relation
     401             :  */
     402             : void
     403         540 : hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
     404             :            ScanKey orderbys, int norderbys)
     405             : {
     406         540 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     407         540 :     Relation    rel = scan->indexRelation;
     408             : 
     409         540 :     if (HashScanPosIsValid(so->currPos))
     410             :     {
     411             :         /* Before leaving current page, deal with any killed items */
     412          60 :         if (so->numKilled > 0)
     413           0 :             _hash_kill_items(scan);
     414             :     }
     415             : 
     416         540 :     _hash_dropscanbuf(rel, so);
     417             : 
     418             :     /* set position invalid (this will cause _hash_first call) */
     419         540 :     HashScanPosInvalidate(so->currPos);
     420             : 
     421             :     /* Update scan key, if a new one is given */
     422         540 :     if (scankey && scan->numberOfKeys > 0)
     423         540 :         memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
     424             : 
     425         540 :     so->hashso_buc_populated = false;
     426         540 :     so->hashso_buc_split = false;
     427         540 : }
     428             : 
     429             : /*
     430             :  *  hashendscan() -- close down a scan
     431             :  */
     432             : void
     433         388 : hashendscan(IndexScanDesc scan)
     434             : {
     435         388 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     436         388 :     Relation    rel = scan->indexRelation;
     437             : 
     438         388 :     if (HashScanPosIsValid(so->currPos))
     439             :     {
     440             :         /* Before leaving current page, deal with any killed items */
     441          64 :         if (so->numKilled > 0)
     442           0 :             _hash_kill_items(scan);
     443             :     }
     444             : 
     445         388 :     _hash_dropscanbuf(rel, so);
     446             : 
     447         388 :     if (so->killedItems != NULL)
     448           2 :         pfree(so->killedItems);
     449         388 :     pfree(so);
     450         388 :     scan->opaque = NULL;
     451         388 : }
     452             : 
     453             : /*
     454             :  * Bulk deletion of all index entries pointing to a set of heap tuples.
     455             :  * The set of target tuples is specified via a callback routine that tells
     456             :  * whether any given heap tuple (identified by ItemPointer) is being deleted.
     457             :  *
     458             :  * This function also deletes the tuples that are moved by split to other
     459             :  * bucket.
     460             :  *
     461             :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     462             :  */
     463             : IndexBulkDeleteResult *
     464          40 : hashbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     465             :                IndexBulkDeleteCallback callback, void *callback_state)
     466             : {
     467          40 :     Relation    rel = info->index;
     468             :     double      tuples_removed;
     469             :     double      num_index_tuples;
     470             :     double      orig_ntuples;
     471             :     Bucket      orig_maxbucket;
     472             :     Bucket      cur_maxbucket;
     473             :     Bucket      cur_bucket;
     474          40 :     Buffer      metabuf = InvalidBuffer;
     475             :     HashMetaPage metap;
     476             :     HashMetaPage cachedmetap;
     477             : 
     478          40 :     tuples_removed = 0;
     479          40 :     num_index_tuples = 0;
     480             : 
     481             :     /*
     482             :      * We need a copy of the metapage so that we can use its hashm_spares[]
     483             :      * values to compute bucket page addresses, but a cached copy should be
     484             :      * good enough.  (If not, we'll detect that further down and refresh the
     485             :      * cache as necessary.)
     486             :      */
     487          40 :     cachedmetap = _hash_getcachedmetap(rel, &metabuf, false);
     488             :     Assert(cachedmetap != NULL);
     489             : 
     490          40 :     orig_maxbucket = cachedmetap->hashm_maxbucket;
     491          40 :     orig_ntuples = cachedmetap->hashm_ntuples;
     492             : 
     493             :     /* Scan the buckets that we know exist */
     494          40 :     cur_bucket = 0;
     495          40 :     cur_maxbucket = orig_maxbucket;
     496             : 
     497          42 : loop_top:
     498         802 :     while (cur_bucket <= cur_maxbucket)
     499             :     {
     500             :         BlockNumber bucket_blkno;
     501             :         BlockNumber blkno;
     502             :         Buffer      bucket_buf;
     503             :         Buffer      buf;
     504             :         HashPageOpaque bucket_opaque;
     505             :         Page        page;
     506         760 :         bool        split_cleanup = false;
     507             : 
     508             :         /* Get address of bucket's start page */
     509         760 :         bucket_blkno = BUCKET_TO_BLKNO(cachedmetap, cur_bucket);
     510             : 
     511         760 :         blkno = bucket_blkno;
     512             : 
     513             :         /*
     514             :          * We need to acquire a cleanup lock on the primary bucket page to out
     515             :          * wait concurrent scans before deleting the dead tuples.
     516             :          */
     517         760 :         buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy);
     518         760 :         LockBufferForCleanup(buf);
     519         760 :         _hash_checkpage(rel, buf, LH_BUCKET_PAGE);
     520             : 
     521         760 :         page = BufferGetPage(buf);
     522         760 :         bucket_opaque = HashPageGetOpaque(page);
     523             : 
     524             :         /*
     525             :          * If the bucket contains tuples that are moved by split, then we need
     526             :          * to delete such tuples.  We can't delete such tuples if the split
     527             :          * operation on bucket is not finished as those are needed by scans.
     528             :          */
     529         760 :         if (!H_BUCKET_BEING_SPLIT(bucket_opaque) &&
     530         760 :             H_NEEDS_SPLIT_CLEANUP(bucket_opaque))
     531             :         {
     532           0 :             split_cleanup = true;
     533             : 
     534             :             /*
     535             :              * This bucket might have been split since we last held a lock on
     536             :              * the metapage.  If so, hashm_maxbucket, hashm_highmask and
     537             :              * hashm_lowmask might be old enough to cause us to fail to remove
     538             :              * tuples left behind by the most recent split.  To prevent that,
     539             :              * now that the primary page of the target bucket has been locked
     540             :              * (and thus can't be further split), check whether we need to
     541             :              * update our cached metapage data.
     542             :              */
     543             :             Assert(bucket_opaque->hasho_prevblkno != InvalidBlockNumber);
     544           0 :             if (bucket_opaque->hasho_prevblkno > cachedmetap->hashm_maxbucket)
     545             :             {
     546           0 :                 cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
     547             :                 Assert(cachedmetap != NULL);
     548             :             }
     549             :         }
     550             : 
     551         760 :         bucket_buf = buf;
     552             : 
     553         760 :         hashbucketcleanup(rel, cur_bucket, bucket_buf, blkno, info->strategy,
     554             :                           cachedmetap->hashm_maxbucket,
     555             :                           cachedmetap->hashm_highmask,
     556             :                           cachedmetap->hashm_lowmask, &tuples_removed,
     557             :                           &num_index_tuples, split_cleanup,
     558             :                           callback, callback_state);
     559             : 
     560         760 :         _hash_dropbuf(rel, bucket_buf);
     561             : 
     562             :         /* Advance to next bucket */
     563         760 :         cur_bucket++;
     564             :     }
     565             : 
     566          42 :     if (BufferIsInvalid(metabuf))
     567          32 :         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
     568             : 
     569             :     /* Write-lock metapage and check for split since we started */
     570          42 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     571          42 :     metap = HashPageGetMeta(BufferGetPage(metabuf));
     572             : 
     573          42 :     if (cur_maxbucket != metap->hashm_maxbucket)
     574             :     {
     575             :         /* There's been a split, so process the additional bucket(s) */
     576           2 :         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     577           2 :         cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
     578             :         Assert(cachedmetap != NULL);
     579           2 :         cur_maxbucket = cachedmetap->hashm_maxbucket;
     580           2 :         goto loop_top;
     581             :     }
     582             : 
     583             :     /* Okay, we're really done.  Update tuple count in metapage. */
     584          40 :     START_CRIT_SECTION();
     585             : 
     586          40 :     if (orig_maxbucket == metap->hashm_maxbucket &&
     587          38 :         orig_ntuples == metap->hashm_ntuples)
     588             :     {
     589             :         /*
     590             :          * No one has split or inserted anything since start of scan, so
     591             :          * believe our count as gospel.
     592             :          */
     593           8 :         metap->hashm_ntuples = num_index_tuples;
     594             :     }
     595             :     else
     596             :     {
     597             :         /*
     598             :          * Otherwise, our count is untrustworthy since we may have
     599             :          * double-scanned tuples in split buckets.  Proceed by dead-reckoning.
     600             :          * (Note: we still return estimated_count = false, because using this
     601             :          * count is better than not updating reltuples at all.)
     602             :          */
     603          32 :         if (metap->hashm_ntuples > tuples_removed)
     604          28 :             metap->hashm_ntuples -= tuples_removed;
     605             :         else
     606           4 :             metap->hashm_ntuples = 0;
     607          32 :         num_index_tuples = metap->hashm_ntuples;
     608             :     }
     609             : 
     610          40 :     MarkBufferDirty(metabuf);
     611             : 
     612             :     /* XLOG stuff */
     613          40 :     if (RelationNeedsWAL(rel))
     614             :     {
     615             :         xl_hash_update_meta_page xlrec;
     616             :         XLogRecPtr  recptr;
     617             : 
     618          40 :         xlrec.ntuples = metap->hashm_ntuples;
     619             : 
     620          40 :         XLogBeginInsert();
     621          40 :         XLogRegisterData(&xlrec, SizeOfHashUpdateMetaPage);
     622             : 
     623          40 :         XLogRegisterBuffer(0, metabuf, REGBUF_STANDARD);
     624             : 
     625          40 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_UPDATE_META_PAGE);
     626          40 :         PageSetLSN(BufferGetPage(metabuf), recptr);
     627             :     }
     628             : 
     629          40 :     END_CRIT_SECTION();
     630             : 
     631          40 :     _hash_relbuf(rel, metabuf);
     632             : 
     633             :     /* return statistics */
     634          40 :     if (stats == NULL)
     635          40 :         stats = palloc0_object(IndexBulkDeleteResult);
     636          40 :     stats->estimated_count = false;
     637          40 :     stats->num_index_tuples = num_index_tuples;
     638          40 :     stats->tuples_removed += tuples_removed;
     639             :     /* hashvacuumcleanup will fill in num_pages */
     640             : 
     641          40 :     return stats;
     642             : }
     643             : 
     644             : /*
     645             :  * Post-VACUUM cleanup.
     646             :  *
     647             :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     648             :  */
     649             : IndexBulkDeleteResult *
     650          58 : hashvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
     651             : {
     652          58 :     Relation    rel = info->index;
     653             :     BlockNumber num_pages;
     654             : 
     655             :     /* If hashbulkdelete wasn't called, return NULL signifying no change */
     656             :     /* Note: this covers the analyze_only case too */
     657          58 :     if (stats == NULL)
     658          18 :         return NULL;
     659             : 
     660             :     /* update statistics */
     661          40 :     num_pages = RelationGetNumberOfBlocks(rel);
     662          40 :     stats->num_pages = num_pages;
     663             : 
     664          40 :     return stats;
     665             : }
     666             : 
     667             : /*
     668             :  * Helper function to perform deletion of index entries from a bucket.
     669             :  *
     670             :  * This function expects that the caller has acquired a cleanup lock on the
     671             :  * primary bucket page, and will return with a write lock again held on the
     672             :  * primary bucket page.  The lock won't necessarily be held continuously,
     673             :  * though, because we'll release it when visiting overflow pages.
     674             :  *
     675             :  * There can't be any concurrent scans in progress when we first enter this
     676             :  * function because of the cleanup lock we hold on the primary bucket page,
     677             :  * but as soon as we release that lock, there might be.  If those scans got
     678             :  * ahead of our cleanup scan, they might see a tuple before we kill it and
     679             :  * wake up only after VACUUM has completed and the TID has been recycled for
     680             :  * an unrelated tuple.  To avoid that calamity, we prevent scans from passing
     681             :  * our cleanup scan by locking the next page in the bucket chain before
     682             :  * releasing the lock on the previous page.  (This type of lock chaining is not
     683             :  * ideal, so we might want to look for a better solution at some point.)
     684             :  *
     685             :  * We need to retain a pin on the primary bucket to ensure that no concurrent
     686             :  * split can start.
     687             :  */
     688             : void
     689        1852 : hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
     690             :                   BlockNumber bucket_blkno, BufferAccessStrategy bstrategy,
     691             :                   uint32 maxbucket, uint32 highmask, uint32 lowmask,
     692             :                   double *tuples_removed, double *num_index_tuples,
     693             :                   bool split_cleanup,
     694             :                   IndexBulkDeleteCallback callback, void *callback_state)
     695             : {
     696             :     BlockNumber blkno;
     697             :     Buffer      buf;
     698        1852 :     Bucket      new_bucket PG_USED_FOR_ASSERTS_ONLY = InvalidBucket;
     699        1852 :     bool        bucket_dirty = false;
     700             : 
     701        1852 :     blkno = bucket_blkno;
     702        1852 :     buf = bucket_buf;
     703             : 
     704        1852 :     if (split_cleanup)
     705        1092 :         new_bucket = _hash_get_newbucket_from_oldbucket(rel, cur_bucket,
     706             :                                                         lowmask, maxbucket);
     707             : 
     708             :     /* Scan each page in bucket */
     709             :     for (;;)
     710         460 :     {
     711             :         HashPageOpaque opaque;
     712             :         OffsetNumber offno;
     713             :         OffsetNumber maxoffno;
     714             :         Buffer      next_buf;
     715             :         Page        page;
     716             :         OffsetNumber deletable[MaxOffsetNumber];
     717        2312 :         int         ndeletable = 0;
     718        2312 :         bool        retain_pin = false;
     719        2312 :         bool        clear_dead_marking = false;
     720             : 
     721        2312 :         vacuum_delay_point(false);
     722             : 
     723        2312 :         page = BufferGetPage(buf);
     724        2312 :         opaque = HashPageGetOpaque(page);
     725             : 
     726             :         /* Scan each tuple in page */
     727        2312 :         maxoffno = PageGetMaxOffsetNumber(page);
     728        2312 :         for (offno = FirstOffsetNumber;
     729      472048 :              offno <= maxoffno;
     730      469736 :              offno = OffsetNumberNext(offno))
     731             :         {
     732             :             ItemPointer htup;
     733             :             IndexTuple  itup;
     734             :             Bucket      bucket;
     735      469736 :             bool        kill_tuple = false;
     736             : 
     737      469736 :             itup = (IndexTuple) PageGetItem(page,
     738      469736 :                                             PageGetItemId(page, offno));
     739      469736 :             htup = &(itup->t_tid);
     740             : 
     741             :             /*
     742             :              * To remove the dead tuples, we strictly want to rely on results
     743             :              * of callback function.  refer btvacuumpage for detailed reason.
     744             :              */
     745      469736 :             if (callback && callback(htup, callback_state))
     746             :             {
     747       44818 :                 kill_tuple = true;
     748       44818 :                 if (tuples_removed)
     749       44818 :                     *tuples_removed += 1;
     750             :             }
     751      424918 :             else if (split_cleanup)
     752             :             {
     753             :                 /* delete the tuples that are moved by split. */
     754      295372 :                 bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
     755             :                                               maxbucket,
     756             :                                               highmask,
     757             :                                               lowmask);
     758             :                 /* mark the item for deletion */
     759      295372 :                 if (bucket != cur_bucket)
     760             :                 {
     761             :                     /*
     762             :                      * We expect tuples to either belong to current bucket or
     763             :                      * new_bucket.  This is ensured because we don't allow
     764             :                      * further splits from bucket that contains garbage. See
     765             :                      * comments in _hash_expandtable.
     766             :                      */
     767             :                     Assert(bucket == new_bucket);
     768      119900 :                     kill_tuple = true;
     769             :                 }
     770             :             }
     771             : 
     772      469736 :             if (kill_tuple)
     773             :             {
     774             :                 /* mark the item for deletion */
     775      164718 :                 deletable[ndeletable++] = offno;
     776             :             }
     777             :             else
     778             :             {
     779             :                 /* we're keeping it, so count it */
     780      305018 :                 if (num_index_tuples)
     781      129546 :                     *num_index_tuples += 1;
     782             :             }
     783             :         }
     784             : 
     785             :         /* retain the pin on primary bucket page till end of bucket scan */
     786        2312 :         if (blkno == bucket_blkno)
     787        1852 :             retain_pin = true;
     788             :         else
     789         460 :             retain_pin = false;
     790             : 
     791        2312 :         blkno = opaque->hasho_nextblkno;
     792             : 
     793             :         /*
     794             :          * Apply deletions, advance to next page and write page if needed.
     795             :          */
     796        2312 :         if (ndeletable > 0)
     797             :         {
     798             :             /* No ereport(ERROR) until changes are logged */
     799        1328 :             START_CRIT_SECTION();
     800             : 
     801        1328 :             PageIndexMultiDelete(page, deletable, ndeletable);
     802        1328 :             bucket_dirty = true;
     803             : 
     804             :             /*
     805             :              * Let us mark the page as clean if vacuum removes the DEAD tuples
     806             :              * from an index page. We do this by clearing
     807             :              * LH_PAGE_HAS_DEAD_TUPLES flag.
     808             :              */
     809        1328 :             if (tuples_removed && *tuples_removed > 0 &&
     810         178 :                 H_HAS_DEAD_TUPLES(opaque))
     811             :             {
     812           0 :                 opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
     813           0 :                 clear_dead_marking = true;
     814             :             }
     815             : 
     816        1328 :             MarkBufferDirty(buf);
     817             : 
     818             :             /* XLOG stuff */
     819        1328 :             if (RelationNeedsWAL(rel))
     820             :             {
     821             :                 xl_hash_delete xlrec;
     822             :                 XLogRecPtr  recptr;
     823             : 
     824        1076 :                 xlrec.clear_dead_marking = clear_dead_marking;
     825        1076 :                 xlrec.is_primary_bucket_page = (buf == bucket_buf);
     826             : 
     827        1076 :                 XLogBeginInsert();
     828        1076 :                 XLogRegisterData(&xlrec, SizeOfHashDelete);
     829             : 
     830             :                 /*
     831             :                  * bucket buffer was not changed, but still needs to be
     832             :                  * registered to ensure that we can acquire a cleanup lock on
     833             :                  * it during replay.
     834             :                  */
     835        1076 :                 if (!xlrec.is_primary_bucket_page)
     836             :                 {
     837         222 :                     uint8       flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
     838             : 
     839         222 :                     XLogRegisterBuffer(0, bucket_buf, flags);
     840             :                 }
     841             : 
     842        1076 :                 XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
     843        1076 :                 XLogRegisterBufData(1, deletable,
     844             :                                     ndeletable * sizeof(OffsetNumber));
     845             : 
     846        1076 :                 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_DELETE);
     847        1076 :                 PageSetLSN(BufferGetPage(buf), recptr);
     848             :             }
     849             : 
     850        1328 :             END_CRIT_SECTION();
     851             :         }
     852             : 
     853             :         /* bail out if there are no more pages to scan. */
     854        2312 :         if (!BlockNumberIsValid(blkno))
     855        1852 :             break;
     856             : 
     857         460 :         next_buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
     858             :                                               LH_OVERFLOW_PAGE,
     859             :                                               bstrategy);
     860             : 
     861             :         /*
     862             :          * release the lock on previous page after acquiring the lock on next
     863             :          * page
     864             :          */
     865         460 :         if (retain_pin)
     866          94 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     867             :         else
     868         366 :             _hash_relbuf(rel, buf);
     869             : 
     870         460 :         buf = next_buf;
     871             :     }
     872             : 
     873             :     /*
     874             :      * lock the bucket page to clear the garbage flag and squeeze the bucket.
     875             :      * if the current buffer is same as bucket buffer, then we already have
     876             :      * lock on bucket page.
     877             :      */
     878        1852 :     if (buf != bucket_buf)
     879             :     {
     880          94 :         _hash_relbuf(rel, buf);
     881          94 :         LockBuffer(bucket_buf, BUFFER_LOCK_EXCLUSIVE);
     882             :     }
     883             : 
     884             :     /*
     885             :      * Clear the garbage flag from bucket after deleting the tuples that are
     886             :      * moved by split.  We purposefully clear the flag before squeeze bucket,
     887             :      * so that after restart, vacuum shouldn't again try to delete the moved
     888             :      * by split tuples.
     889             :      */
     890        1852 :     if (split_cleanup)
     891             :     {
     892             :         HashPageOpaque bucket_opaque;
     893             :         Page        page;
     894             : 
     895        1092 :         page = BufferGetPage(bucket_buf);
     896        1092 :         bucket_opaque = HashPageGetOpaque(page);
     897             : 
     898             :         /* No ereport(ERROR) until changes are logged */
     899        1092 :         START_CRIT_SECTION();
     900             : 
     901        1092 :         bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
     902        1092 :         MarkBufferDirty(bucket_buf);
     903             : 
     904             :         /* XLOG stuff */
     905        1092 :         if (RelationNeedsWAL(rel))
     906             :         {
     907             :             XLogRecPtr  recptr;
     908             : 
     909         840 :             XLogBeginInsert();
     910         840 :             XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD);
     911             : 
     912         840 :             recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_CLEANUP);
     913         840 :             PageSetLSN(page, recptr);
     914             :         }
     915             : 
     916        1092 :         END_CRIT_SECTION();
     917             :     }
     918             : 
     919             :     /*
     920             :      * If we have deleted anything, try to compact free space.  For squeezing
     921             :      * the bucket, we must have a cleanup lock, else it can impact the
     922             :      * ordering of tuples for a scan that has started before it.
     923             :      */
     924        1852 :     if (bucket_dirty && IsBufferCleanupOK(bucket_buf))
     925        1130 :         _hash_squeezebucket(rel, cur_bucket, bucket_blkno, bucket_buf,
     926             :                             bstrategy);
     927             :     else
     928         722 :         LockBuffer(bucket_buf, BUFFER_LOCK_UNLOCK);
     929        1852 : }
     930             : 
     931             : CompareType
     932           0 : hashtranslatestrategy(StrategyNumber strategy, Oid opfamily)
     933             : {
     934           0 :     if (strategy == HTEqualStrategyNumber)
     935           0 :         return COMPARE_EQ;
     936           0 :     return COMPARE_INVALID;
     937             : }
     938             : 
     939             : StrategyNumber
     940          12 : hashtranslatecmptype(CompareType cmptype, Oid opfamily)
     941             : {
     942          12 :     if (cmptype == COMPARE_EQ)
     943          12 :         return HTEqualStrategyNumber;
     944           0 :     return InvalidStrategy;
     945             : }

Generated by: LCOV version 1.16