LCOV - code coverage report
Current view: top level - src/backend/access/hash - hash.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 287 307 93.5 %
Date: 2024-11-21 08:14:44 Functions: 13 13 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * hash.c
       4             :  *    Implementation of Margo Seltzer's Hashing package for postgres.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/hash/hash.c
      12             :  *
      13             :  * NOTES
      14             :  *    This file contains only the public interface routines.
      15             :  *
      16             :  *-------------------------------------------------------------------------
      17             :  */
      18             : 
      19             : #include "postgres.h"
      20             : 
      21             : #include "access/hash.h"
      22             : #include "access/hash_xlog.h"
      23             : #include "access/relscan.h"
      24             : #include "access/tableam.h"
      25             : #include "access/xloginsert.h"
      26             : #include "commands/progress.h"
      27             : #include "commands/vacuum.h"
      28             : #include "miscadmin.h"
      29             : #include "nodes/execnodes.h"
      30             : #include "optimizer/plancat.h"
      31             : #include "pgstat.h"
      32             : #include "utils/fmgrprotos.h"
      33             : #include "utils/index_selfuncs.h"
      34             : #include "utils/rel.h"
      35             : 
      36             : /* Working state for hashbuild and its callback */
      37             : typedef struct
      38             : {
      39             :     HSpool     *spool;          /* NULL if not using spooling */
      40             :     double      indtuples;      /* # tuples accepted into index */
      41             :     Relation    heapRel;        /* heap relation descriptor */
      42             : } HashBuildState;
      43             : 
      44             : static void hashbuildCallback(Relation index,
      45             :                               ItemPointer tid,
      46             :                               Datum *values,
      47             :                               bool *isnull,
      48             :                               bool tupleIsAlive,
      49             :                               void *state);
      50             : 
      51             : 
      52             : /*
      53             :  * Hash handler function: return IndexAmRoutine with access method parameters
      54             :  * and callbacks.
      55             :  */
      56             : Datum
      57        2098 : hashhandler(PG_FUNCTION_ARGS)
      58             : {
      59        2098 :     IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
      60             : 
      61        2098 :     amroutine->amstrategies = HTMaxStrategyNumber;
      62        2098 :     amroutine->amsupport = HASHNProcs;
      63        2098 :     amroutine->amoptsprocnum = HASHOPTIONS_PROC;
      64        2098 :     amroutine->amcanorder = false;
      65        2098 :     amroutine->amcanorderbyop = false;
      66        2098 :     amroutine->amcanbackward = true;
      67        2098 :     amroutine->amcanunique = false;
      68        2098 :     amroutine->amcanmulticol = false;
      69        2098 :     amroutine->amoptionalkey = false;
      70        2098 :     amroutine->amsearcharray = false;
      71        2098 :     amroutine->amsearchnulls = false;
      72        2098 :     amroutine->amstorage = false;
      73        2098 :     amroutine->amclusterable = false;
      74        2098 :     amroutine->ampredlocks = true;
      75        2098 :     amroutine->amcanparallel = false;
      76        2098 :     amroutine->amcanbuildparallel = false;
      77        2098 :     amroutine->amcaninclude = false;
      78        2098 :     amroutine->amusemaintenanceworkmem = false;
      79        2098 :     amroutine->amsummarizing = false;
      80        2098 :     amroutine->amparallelvacuumoptions =
      81             :         VACUUM_OPTION_PARALLEL_BULKDEL;
      82        2098 :     amroutine->amkeytype = INT4OID;
      83             : 
      84        2098 :     amroutine->ambuild = hashbuild;
      85        2098 :     amroutine->ambuildempty = hashbuildempty;
      86        2098 :     amroutine->aminsert = hashinsert;
      87        2098 :     amroutine->aminsertcleanup = NULL;
      88        2098 :     amroutine->ambulkdelete = hashbulkdelete;
      89        2098 :     amroutine->amvacuumcleanup = hashvacuumcleanup;
      90        2098 :     amroutine->amcanreturn = NULL;
      91        2098 :     amroutine->amcostestimate = hashcostestimate;
      92        2098 :     amroutine->amgettreeheight = NULL;
      93        2098 :     amroutine->amoptions = hashoptions;
      94        2098 :     amroutine->amproperty = NULL;
      95        2098 :     amroutine->ambuildphasename = NULL;
      96        2098 :     amroutine->amvalidate = hashvalidate;
      97        2098 :     amroutine->amadjustmembers = hashadjustmembers;
      98        2098 :     amroutine->ambeginscan = hashbeginscan;
      99        2098 :     amroutine->amrescan = hashrescan;
     100        2098 :     amroutine->amgettuple = hashgettuple;
     101        2098 :     amroutine->amgetbitmap = hashgetbitmap;
     102        2098 :     amroutine->amendscan = hashendscan;
     103        2098 :     amroutine->ammarkpos = NULL;
     104        2098 :     amroutine->amrestrpos = NULL;
     105        2098 :     amroutine->amestimateparallelscan = NULL;
     106        2098 :     amroutine->aminitparallelscan = NULL;
     107        2098 :     amroutine->amparallelrescan = NULL;
     108             : 
     109        2098 :     PG_RETURN_POINTER(amroutine);
     110             : }
     111             : 
     112             : /*
     113             :  *  hashbuild() -- build a new hash index.
     114             :  */
     115             : IndexBuildResult *
     116         318 : hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     117             : {
     118             :     IndexBuildResult *result;
     119             :     BlockNumber relpages;
     120             :     double      reltuples;
     121             :     double      allvisfrac;
     122             :     uint32      num_buckets;
     123             :     long        sort_threshold;
     124             :     HashBuildState buildstate;
     125             : 
     126             :     /*
     127             :      * We expect to be called exactly once for any index relation. If that's
     128             :      * not the case, big trouble's what we have.
     129             :      */
     130         318 :     if (RelationGetNumberOfBlocks(index) != 0)
     131           0 :         elog(ERROR, "index \"%s\" already contains data",
     132             :              RelationGetRelationName(index));
     133             : 
     134             :     /* Estimate the number of rows currently present in the table */
     135         318 :     estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
     136             : 
     137             :     /* Initialize the hash index metadata page and initial buckets */
     138         318 :     num_buckets = _hash_init(index, reltuples, MAIN_FORKNUM);
     139             : 
     140             :     /*
     141             :      * If we just insert the tuples into the index in scan order, then
     142             :      * (assuming their hash codes are pretty random) there will be no locality
     143             :      * of access to the index, and if the index is bigger than available RAM
     144             :      * then we'll thrash horribly.  To prevent that scenario, we can sort the
     145             :      * tuples by (expected) bucket number.  However, such a sort is useless
     146             :      * overhead when the index does fit in RAM.  We choose to sort if the
     147             :      * initial index size exceeds maintenance_work_mem, or the number of
     148             :      * buffers usable for the index, whichever is less.  (Limiting by the
     149             :      * number of buffers should reduce thrashing between PG buffers and kernel
     150             :      * buffers, which seems useful even if no physical I/O results.  Limiting
     151             :      * by maintenance_work_mem is useful to allow easy testing of the sort
     152             :      * code path, and may be useful to DBAs as an additional control knob.)
     153             :      *
     154             :      * NOTE: this test will need adjustment if a bucket is ever different from
     155             :      * one page.  Also, "initial index size" accounting does not include the
     156             :      * metapage, nor the first bitmap page.
     157             :      */
     158         318 :     sort_threshold = (maintenance_work_mem * 1024L) / BLCKSZ;
     159         318 :     if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
     160         312 :         sort_threshold = Min(sort_threshold, NBuffers);
     161             :     else
     162           6 :         sort_threshold = Min(sort_threshold, NLocBuffer);
     163             : 
     164         318 :     if (num_buckets >= (uint32) sort_threshold)
     165           8 :         buildstate.spool = _h_spoolinit(heap, index, num_buckets);
     166             :     else
     167         310 :         buildstate.spool = NULL;
     168             : 
     169             :     /* prepare to build the index */
     170         318 :     buildstate.indtuples = 0;
     171         318 :     buildstate.heapRel = heap;
     172             : 
     173             :     /* do the heap scan */
     174         318 :     reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
     175             :                                        hashbuildCallback,
     176             :                                        (void *) &buildstate, NULL);
     177         318 :     pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_TOTAL,
     178         318 :                                  buildstate.indtuples);
     179             : 
     180         318 :     if (buildstate.spool)
     181             :     {
     182             :         /* sort the tuples and insert them into the index */
     183           8 :         _h_indexbuild(buildstate.spool, buildstate.heapRel);
     184           8 :         _h_spooldestroy(buildstate.spool);
     185             :     }
     186             : 
     187             :     /*
     188             :      * Return statistics
     189             :      */
     190         318 :     result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
     191             : 
     192         318 :     result->heap_tuples = reltuples;
     193         318 :     result->index_tuples = buildstate.indtuples;
     194             : 
     195         318 :     return result;
     196             : }
     197             : 
     198             : /*
     199             :  *  hashbuildempty() -- build an empty hash index in the initialization fork
     200             :  */
     201             : void
     202           6 : hashbuildempty(Relation index)
     203             : {
     204           6 :     _hash_init(index, 0, INIT_FORKNUM);
     205           6 : }
     206             : 
     207             : /*
     208             :  * Per-tuple callback for table_index_build_scan
     209             :  */
     210             : static void
     211      494544 : hashbuildCallback(Relation index,
     212             :                   ItemPointer tid,
     213             :                   Datum *values,
     214             :                   bool *isnull,
     215             :                   bool tupleIsAlive,
     216             :                   void *state)
     217             : {
     218      494544 :     HashBuildState *buildstate = (HashBuildState *) state;
     219             :     Datum       index_values[1];
     220             :     bool        index_isnull[1];
     221             :     IndexTuple  itup;
     222             : 
     223             :     /* convert data to a hash key; on failure, do not insert anything */
     224      494544 :     if (!_hash_convert_tuple(index,
     225             :                              values, isnull,
     226             :                              index_values, index_isnull))
     227           0 :         return;
     228             : 
     229             :     /* Either spool the tuple for sorting, or just put it into the index */
     230      494544 :     if (buildstate->spool)
     231      121000 :         _h_spool(buildstate->spool, tid, index_values, index_isnull);
     232             :     else
     233             :     {
     234             :         /* form an index tuple and point it at the heap tuple */
     235      373544 :         itup = index_form_tuple(RelationGetDescr(index),
     236             :                                 index_values, index_isnull);
     237      373544 :         itup->t_tid = *tid;
     238      373544 :         _hash_doinsert(index, itup, buildstate->heapRel, false);
     239      373544 :         pfree(itup);
     240             :     }
     241             : 
     242      494544 :     buildstate->indtuples += 1;
     243             : }
     244             : 
     245             : /*
     246             :  *  hashinsert() -- insert an index tuple into a hash table.
     247             :  *
     248             :  *  Hash on the heap tuple's key, form an index tuple with hash code.
     249             :  *  Find the appropriate location for the new tuple, and put it there.
     250             :  */
     251             : bool
     252      230118 : hashinsert(Relation rel, Datum *values, bool *isnull,
     253             :            ItemPointer ht_ctid, Relation heapRel,
     254             :            IndexUniqueCheck checkUnique,
     255             :            bool indexUnchanged,
     256             :            IndexInfo *indexInfo)
     257             : {
     258             :     Datum       index_values[1];
     259             :     bool        index_isnull[1];
     260             :     IndexTuple  itup;
     261             : 
     262             :     /* convert data to a hash key; on failure, do not insert anything */
     263      230118 :     if (!_hash_convert_tuple(rel,
     264             :                              values, isnull,
     265             :                              index_values, index_isnull))
     266           0 :         return false;
     267             : 
     268             :     /* form an index tuple and point it at the heap tuple */
     269      230118 :     itup = index_form_tuple(RelationGetDescr(rel), index_values, index_isnull);
     270      230118 :     itup->t_tid = *ht_ctid;
     271             : 
     272      230118 :     _hash_doinsert(rel, itup, heapRel, false);
     273             : 
     274      230106 :     pfree(itup);
     275             : 
     276      230106 :     return false;
     277             : }
     278             : 
     279             : 
     280             : /*
     281             :  *  hashgettuple() -- Get the next tuple in the scan.
     282             :  */
     283             : bool
     284      101448 : hashgettuple(IndexScanDesc scan, ScanDirection dir)
     285             : {
     286      101448 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     287             :     bool        res;
     288             : 
     289             :     /* Hash indexes are always lossy since we store only the hash code */
     290      101448 :     scan->xs_recheck = true;
     291             : 
     292             :     /*
     293             :      * If we've already initialized this scan, we can just advance it in the
     294             :      * appropriate direction.  If we haven't done so yet, we call a routine to
     295             :      * get the first item in the scan.
     296             :      */
     297      101448 :     if (!HashScanPosIsValid(so->currPos))
     298         464 :         res = _hash_first(scan, dir);
     299             :     else
     300             :     {
     301             :         /*
     302             :          * Check to see if we should kill the previously-fetched tuple.
     303             :          */
     304      100984 :         if (scan->kill_prior_tuple)
     305             :         {
     306             :             /*
     307             :              * Yes, so remember it for later. (We'll deal with all such tuples
     308             :              * at once right after leaving the index page or at end of scan.)
     309             :              * In case if caller reverses the indexscan direction it is quite
     310             :              * possible that the same item might get entered multiple times.
     311             :              * But, we don't detect that; instead, we just forget any excess
     312             :              * entries.
     313             :              */
     314           0 :             if (so->killedItems == NULL)
     315           0 :                 so->killedItems = (int *)
     316           0 :                     palloc(MaxIndexTuplesPerPage * sizeof(int));
     317             : 
     318           0 :             if (so->numKilled < MaxIndexTuplesPerPage)
     319           0 :                 so->killedItems[so->numKilled++] = so->currPos.itemIndex;
     320             :         }
     321             : 
     322             :         /*
     323             :          * Now continue the scan.
     324             :          */
     325      100984 :         res = _hash_next(scan, dir);
     326             :     }
     327             : 
     328      101448 :     return res;
     329             : }
     330             : 
     331             : 
     332             : /*
     333             :  *  hashgetbitmap() -- get all tuples at once
     334             :  */
     335             : int64
     336          68 : hashgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
     337             : {
     338          68 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     339             :     bool        res;
     340          68 :     int64       ntids = 0;
     341             :     HashScanPosItem *currItem;
     342             : 
     343          68 :     res = _hash_first(scan, ForwardScanDirection);
     344             : 
     345         204 :     while (res)
     346             :     {
     347         136 :         currItem = &so->currPos.items[so->currPos.itemIndex];
     348             : 
     349             :         /*
     350             :          * _hash_first and _hash_next handle eliminate dead index entries
     351             :          * whenever scan->ignore_killed_tuples is true.  Therefore, there's
     352             :          * nothing to do here except add the results to the TIDBitmap.
     353             :          */
     354         136 :         tbm_add_tuples(tbm, &(currItem->heapTid), 1, true);
     355         136 :         ntids++;
     356             : 
     357         136 :         res = _hash_next(scan, ForwardScanDirection);
     358             :     }
     359             : 
     360          68 :     return ntids;
     361             : }
     362             : 
     363             : 
     364             : /*
     365             :  *  hashbeginscan() -- start a scan on a hash index
     366             :  */
     367             : IndexScanDesc
     368         374 : hashbeginscan(Relation rel, int nkeys, int norderbys)
     369             : {
     370             :     IndexScanDesc scan;
     371             :     HashScanOpaque so;
     372             : 
     373             :     /* no order by operators allowed */
     374             :     Assert(norderbys == 0);
     375             : 
     376         374 :     scan = RelationGetIndexScan(rel, nkeys, norderbys);
     377             : 
     378         374 :     so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData));
     379         374 :     HashScanPosInvalidate(so->currPos);
     380         374 :     so->hashso_bucket_buf = InvalidBuffer;
     381         374 :     so->hashso_split_bucket_buf = InvalidBuffer;
     382             : 
     383         374 :     so->hashso_buc_populated = false;
     384         374 :     so->hashso_buc_split = false;
     385             : 
     386         374 :     so->killedItems = NULL;
     387         374 :     so->numKilled = 0;
     388             : 
     389         374 :     scan->opaque = so;
     390             : 
     391         374 :     return scan;
     392             : }
     393             : 
     394             : /*
     395             :  *  hashrescan() -- rescan an index relation
     396             :  */
     397             : void
     398         526 : hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
     399             :            ScanKey orderbys, int norderbys)
     400             : {
     401         526 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     402         526 :     Relation    rel = scan->indexRelation;
     403             : 
     404         526 :     if (HashScanPosIsValid(so->currPos))
     405             :     {
     406             :         /* Before leaving current page, deal with any killed items */
     407          60 :         if (so->numKilled > 0)
     408           0 :             _hash_kill_items(scan);
     409             :     }
     410             : 
     411         526 :     _hash_dropscanbuf(rel, so);
     412             : 
     413             :     /* set position invalid (this will cause _hash_first call) */
     414         526 :     HashScanPosInvalidate(so->currPos);
     415             : 
     416             :     /* Update scan key, if a new one is given */
     417         526 :     if (scankey && scan->numberOfKeys > 0)
     418         526 :         memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
     419             : 
     420         526 :     so->hashso_buc_populated = false;
     421         526 :     so->hashso_buc_split = false;
     422         526 : }
     423             : 
     424             : /*
     425             :  *  hashendscan() -- close down a scan
     426             :  */
     427             : void
     428         374 : hashendscan(IndexScanDesc scan)
     429             : {
     430         374 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     431         374 :     Relation    rel = scan->indexRelation;
     432             : 
     433         374 :     if (HashScanPosIsValid(so->currPos))
     434             :     {
     435             :         /* Before leaving current page, deal with any killed items */
     436          62 :         if (so->numKilled > 0)
     437           0 :             _hash_kill_items(scan);
     438             :     }
     439             : 
     440         374 :     _hash_dropscanbuf(rel, so);
     441             : 
     442         374 :     if (so->killedItems != NULL)
     443           0 :         pfree(so->killedItems);
     444         374 :     pfree(so);
     445         374 :     scan->opaque = NULL;
     446         374 : }
     447             : 
     448             : /*
     449             :  * Bulk deletion of all index entries pointing to a set of heap tuples.
     450             :  * The set of target tuples is specified via a callback routine that tells
     451             :  * whether any given heap tuple (identified by ItemPointer) is being deleted.
     452             :  *
     453             :  * This function also deletes the tuples that are moved by split to other
     454             :  * bucket.
     455             :  *
     456             :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     457             :  */
     458             : IndexBulkDeleteResult *
     459          30 : hashbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     460             :                IndexBulkDeleteCallback callback, void *callback_state)
     461             : {
     462          30 :     Relation    rel = info->index;
     463             :     double      tuples_removed;
     464             :     double      num_index_tuples;
     465             :     double      orig_ntuples;
     466             :     Bucket      orig_maxbucket;
     467             :     Bucket      cur_maxbucket;
     468             :     Bucket      cur_bucket;
     469          30 :     Buffer      metabuf = InvalidBuffer;
     470             :     HashMetaPage metap;
     471             :     HashMetaPage cachedmetap;
     472             : 
     473          30 :     tuples_removed = 0;
     474          30 :     num_index_tuples = 0;
     475             : 
     476             :     /*
     477             :      * We need a copy of the metapage so that we can use its hashm_spares[]
     478             :      * values to compute bucket page addresses, but a cached copy should be
     479             :      * good enough.  (If not, we'll detect that further down and refresh the
     480             :      * cache as necessary.)
     481             :      */
     482          30 :     cachedmetap = _hash_getcachedmetap(rel, &metabuf, false);
     483             :     Assert(cachedmetap != NULL);
     484             : 
     485          30 :     orig_maxbucket = cachedmetap->hashm_maxbucket;
     486          30 :     orig_ntuples = cachedmetap->hashm_ntuples;
     487             : 
     488             :     /* Scan the buckets that we know exist */
     489          30 :     cur_bucket = 0;
     490          30 :     cur_maxbucket = orig_maxbucket;
     491             : 
     492          30 : loop_top:
     493         260 :     while (cur_bucket <= cur_maxbucket)
     494             :     {
     495             :         BlockNumber bucket_blkno;
     496             :         BlockNumber blkno;
     497             :         Buffer      bucket_buf;
     498             :         Buffer      buf;
     499             :         HashPageOpaque bucket_opaque;
     500             :         Page        page;
     501         230 :         bool        split_cleanup = false;
     502             : 
     503             :         /* Get address of bucket's start page */
     504         230 :         bucket_blkno = BUCKET_TO_BLKNO(cachedmetap, cur_bucket);
     505             : 
     506         230 :         blkno = bucket_blkno;
     507             : 
     508             :         /*
     509             :          * We need to acquire a cleanup lock on the primary bucket page to out
     510             :          * wait concurrent scans before deleting the dead tuples.
     511             :          */
     512         230 :         buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy);
     513         230 :         LockBufferForCleanup(buf);
     514         230 :         _hash_checkpage(rel, buf, LH_BUCKET_PAGE);
     515             : 
     516         230 :         page = BufferGetPage(buf);
     517         230 :         bucket_opaque = HashPageGetOpaque(page);
     518             : 
     519             :         /*
     520             :          * If the bucket contains tuples that are moved by split, then we need
     521             :          * to delete such tuples.  We can't delete such tuples if the split
     522             :          * operation on bucket is not finished as those are needed by scans.
     523             :          */
     524         230 :         if (!H_BUCKET_BEING_SPLIT(bucket_opaque) &&
     525         230 :             H_NEEDS_SPLIT_CLEANUP(bucket_opaque))
     526             :         {
     527           0 :             split_cleanup = true;
     528             : 
     529             :             /*
     530             :              * This bucket might have been split since we last held a lock on
     531             :              * the metapage.  If so, hashm_maxbucket, hashm_highmask and
     532             :              * hashm_lowmask might be old enough to cause us to fail to remove
     533             :              * tuples left behind by the most recent split.  To prevent that,
     534             :              * now that the primary page of the target bucket has been locked
     535             :              * (and thus can't be further split), check whether we need to
     536             :              * update our cached metapage data.
     537             :              */
     538             :             Assert(bucket_opaque->hasho_prevblkno != InvalidBlockNumber);
     539           0 :             if (bucket_opaque->hasho_prevblkno > cachedmetap->hashm_maxbucket)
     540             :             {
     541           0 :                 cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
     542             :                 Assert(cachedmetap != NULL);
     543             :             }
     544             :         }
     545             : 
     546         230 :         bucket_buf = buf;
     547             : 
     548         230 :         hashbucketcleanup(rel, cur_bucket, bucket_buf, blkno, info->strategy,
     549             :                           cachedmetap->hashm_maxbucket,
     550             :                           cachedmetap->hashm_highmask,
     551             :                           cachedmetap->hashm_lowmask, &tuples_removed,
     552             :                           &num_index_tuples, split_cleanup,
     553             :                           callback, callback_state);
     554             : 
     555         230 :         _hash_dropbuf(rel, bucket_buf);
     556             : 
     557             :         /* Advance to next bucket */
     558         230 :         cur_bucket++;
     559             :     }
     560             : 
     561          30 :     if (BufferIsInvalid(metabuf))
     562          28 :         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
     563             : 
     564             :     /* Write-lock metapage and check for split since we started */
     565          30 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     566          30 :     metap = HashPageGetMeta(BufferGetPage(metabuf));
     567             : 
     568          30 :     if (cur_maxbucket != metap->hashm_maxbucket)
     569             :     {
     570             :         /* There's been a split, so process the additional bucket(s) */
     571           0 :         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     572           0 :         cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
     573             :         Assert(cachedmetap != NULL);
     574           0 :         cur_maxbucket = cachedmetap->hashm_maxbucket;
     575           0 :         goto loop_top;
     576             :     }
     577             : 
     578             :     /* Okay, we're really done.  Update tuple count in metapage. */
     579          30 :     START_CRIT_SECTION();
     580             : 
     581          30 :     if (orig_maxbucket == metap->hashm_maxbucket &&
     582          30 :         orig_ntuples == metap->hashm_ntuples)
     583             :     {
     584             :         /*
     585             :          * No one has split or inserted anything since start of scan, so
     586             :          * believe our count as gospel.
     587             :          */
     588           2 :         metap->hashm_ntuples = num_index_tuples;
     589             :     }
     590             :     else
     591             :     {
     592             :         /*
     593             :          * Otherwise, our count is untrustworthy since we may have
     594             :          * double-scanned tuples in split buckets.  Proceed by dead-reckoning.
     595             :          * (Note: we still return estimated_count = false, because using this
     596             :          * count is better than not updating reltuples at all.)
     597             :          */
     598          28 :         if (metap->hashm_ntuples > tuples_removed)
     599          24 :             metap->hashm_ntuples -= tuples_removed;
     600             :         else
     601           4 :             metap->hashm_ntuples = 0;
     602          28 :         num_index_tuples = metap->hashm_ntuples;
     603             :     }
     604             : 
     605          30 :     MarkBufferDirty(metabuf);
     606             : 
     607             :     /* XLOG stuff */
     608          30 :     if (RelationNeedsWAL(rel))
     609             :     {
     610             :         xl_hash_update_meta_page xlrec;
     611             :         XLogRecPtr  recptr;
     612             : 
     613          30 :         xlrec.ntuples = metap->hashm_ntuples;
     614             : 
     615          30 :         XLogBeginInsert();
     616          30 :         XLogRegisterData((char *) &xlrec, SizeOfHashUpdateMetaPage);
     617             : 
     618          30 :         XLogRegisterBuffer(0, metabuf, REGBUF_STANDARD);
     619             : 
     620          30 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_UPDATE_META_PAGE);
     621          30 :         PageSetLSN(BufferGetPage(metabuf), recptr);
     622             :     }
     623             : 
     624          30 :     END_CRIT_SECTION();
     625             : 
     626          30 :     _hash_relbuf(rel, metabuf);
     627             : 
     628             :     /* return statistics */
     629          30 :     if (stats == NULL)
     630          30 :         stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
     631          30 :     stats->estimated_count = false;
     632          30 :     stats->num_index_tuples = num_index_tuples;
     633          30 :     stats->tuples_removed += tuples_removed;
     634             :     /* hashvacuumcleanup will fill in num_pages */
     635             : 
     636          30 :     return stats;
     637             : }
     638             : 
     639             : /*
     640             :  * Post-VACUUM cleanup.
     641             :  *
     642             :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     643             :  */
     644             : IndexBulkDeleteResult *
     645          48 : hashvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
     646             : {
     647          48 :     Relation    rel = info->index;
     648             :     BlockNumber num_pages;
     649             : 
     650             :     /* If hashbulkdelete wasn't called, return NULL signifying no change */
     651             :     /* Note: this covers the analyze_only case too */
     652          48 :     if (stats == NULL)
     653          18 :         return NULL;
     654             : 
     655             :     /* update statistics */
     656          30 :     num_pages = RelationGetNumberOfBlocks(rel);
     657          30 :     stats->num_pages = num_pages;
     658             : 
     659          30 :     return stats;
     660             : }
     661             : 
     662             : /*
     663             :  * Helper function to perform deletion of index entries from a bucket.
     664             :  *
     665             :  * This function expects that the caller has acquired a cleanup lock on the
     666             :  * primary bucket page, and will return with a write lock again held on the
     667             :  * primary bucket page.  The lock won't necessarily be held continuously,
     668             :  * though, because we'll release it when visiting overflow pages.
     669             :  *
     670             :  * There can't be any concurrent scans in progress when we first enter this
     671             :  * function because of the cleanup lock we hold on the primary bucket page,
     672             :  * but as soon as we release that lock, there might be.  If those scans got
     673             :  * ahead of our cleanup scan, they might see a tuple before we kill it and
     674             :  * wake up only after VACUUM has completed and the TID has been recycled for
     675             :  * an unrelated tuple.  To avoid that calamity, we prevent scans from passing
     676             :  * our cleanup scan by locking the next page in the bucket chain before
     677             :  * releasing the lock on the previous page.  (This type of lock chaining is not
     678             :  * ideal, so we might want to look for a better solution at some point.)
     679             :  *
     680             :  * We need to retain a pin on the primary bucket to ensure that no concurrent
     681             :  * split can start.
     682             :  */
     683             : void
     684        1562 : hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
     685             :                   BlockNumber bucket_blkno, BufferAccessStrategy bstrategy,
     686             :                   uint32 maxbucket, uint32 highmask, uint32 lowmask,
     687             :                   double *tuples_removed, double *num_index_tuples,
     688             :                   bool split_cleanup,
     689             :                   IndexBulkDeleteCallback callback, void *callback_state)
     690             : {
     691             :     BlockNumber blkno;
     692             :     Buffer      buf;
     693        1562 :     Bucket      new_bucket PG_USED_FOR_ASSERTS_ONLY = InvalidBucket;
     694        1562 :     bool        bucket_dirty = false;
     695             : 
     696        1562 :     blkno = bucket_blkno;
     697        1562 :     buf = bucket_buf;
     698             : 
     699        1562 :     if (split_cleanup)
     700        1332 :         new_bucket = _hash_get_newbucket_from_oldbucket(rel, cur_bucket,
     701             :                                                         lowmask, maxbucket);
     702             : 
     703             :     /* Scan each page in bucket */
     704             :     for (;;)
     705         422 :     {
     706             :         HashPageOpaque opaque;
     707             :         OffsetNumber offno;
     708             :         OffsetNumber maxoffno;
     709             :         Buffer      next_buf;
     710             :         Page        page;
     711             :         OffsetNumber deletable[MaxOffsetNumber];
     712        1984 :         int         ndeletable = 0;
     713        1984 :         bool        retain_pin = false;
     714        1984 :         bool        clear_dead_marking = false;
     715             : 
     716        1984 :         vacuum_delay_point();
     717             : 
     718        1984 :         page = BufferGetPage(buf);
     719        1984 :         opaque = HashPageGetOpaque(page);
     720             : 
     721             :         /* Scan each tuple in page */
     722        1984 :         maxoffno = PageGetMaxOffsetNumber(page);
     723      358612 :         for (offno = FirstOffsetNumber;
     724             :              offno <= maxoffno;
     725      356628 :              offno = OffsetNumberNext(offno))
     726             :         {
     727             :             ItemPointer htup;
     728             :             IndexTuple  itup;
     729             :             Bucket      bucket;
     730      356628 :             bool        kill_tuple = false;
     731             : 
     732      356628 :             itup = (IndexTuple) PageGetItem(page,
     733             :                                             PageGetItemId(page, offno));
     734      356628 :             htup = &(itup->t_tid);
     735             : 
     736             :             /*
     737             :              * To remove the dead tuples, we strictly want to rely on results
     738             :              * of callback function.  refer btvacuumpage for detailed reason.
     739             :              */
     740      356628 :             if (callback && callback(htup, callback_state))
     741             :             {
     742       31862 :                 kill_tuple = true;
     743       31862 :                 if (tuples_removed)
     744       31862 :                     *tuples_removed += 1;
     745             :             }
     746      324766 :             else if (split_cleanup)
     747             :             {
     748             :                 /* delete the tuples that are moved by split. */
     749      305222 :                 bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
     750             :                                               maxbucket,
     751             :                                               highmask,
     752             :                                               lowmask);
     753             :                 /* mark the item for deletion */
     754      305222 :                 if (bucket != cur_bucket)
     755             :                 {
     756             :                     /*
     757             :                      * We expect tuples to either belong to current bucket or
     758             :                      * new_bucket.  This is ensured because we don't allow
     759             :                      * further splits from bucket that contains garbage. See
     760             :                      * comments in _hash_expandtable.
     761             :                      */
     762             :                     Assert(bucket == new_bucket);
     763      124966 :                     kill_tuple = true;
     764             :                 }
     765             :             }
     766             : 
     767      356628 :             if (kill_tuple)
     768             :             {
     769             :                 /* mark the item for deletion */
     770      156828 :                 deletable[ndeletable++] = offno;
     771             :             }
     772             :             else
     773             :             {
     774             :                 /* we're keeping it, so count it */
     775      199800 :                 if (num_index_tuples)
     776       19544 :                     *num_index_tuples += 1;
     777             :             }
     778             :         }
     779             : 
     780             :         /* retain the pin on primary bucket page till end of bucket scan */
     781        1984 :         if (blkno == bucket_blkno)
     782        1562 :             retain_pin = true;
     783             :         else
     784         422 :             retain_pin = false;
     785             : 
     786        1984 :         blkno = opaque->hasho_nextblkno;
     787             : 
     788             :         /*
     789             :          * Apply deletions, advance to next page and write page if needed.
     790             :          */
     791        1984 :         if (ndeletable > 0)
     792             :         {
     793             :             /* No ereport(ERROR) until changes are logged */
     794        1520 :             START_CRIT_SECTION();
     795             : 
     796        1520 :             PageIndexMultiDelete(page, deletable, ndeletable);
     797        1520 :             bucket_dirty = true;
     798             : 
     799             :             /*
     800             :              * Let us mark the page as clean if vacuum removes the DEAD tuples
     801             :              * from an index page. We do this by clearing
     802             :              * LH_PAGE_HAS_DEAD_TUPLES flag.
     803             :              */
     804        1520 :             if (tuples_removed && *tuples_removed > 0 &&
     805         132 :                 H_HAS_DEAD_TUPLES(opaque))
     806             :             {
     807           0 :                 opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
     808           0 :                 clear_dead_marking = true;
     809             :             }
     810             : 
     811        1520 :             MarkBufferDirty(buf);
     812             : 
     813             :             /* XLOG stuff */
     814        1520 :             if (RelationNeedsWAL(rel))
     815             :             {
     816             :                 xl_hash_delete xlrec;
     817             :                 XLogRecPtr  recptr;
     818             : 
     819        1268 :                 xlrec.clear_dead_marking = clear_dead_marking;
     820        1268 :                 xlrec.is_primary_bucket_page = (buf == bucket_buf);
     821             : 
     822        1268 :                 XLogBeginInsert();
     823        1268 :                 XLogRegisterData((char *) &xlrec, SizeOfHashDelete);
     824             : 
     825             :                 /*
     826             :                  * bucket buffer was not changed, but still needs to be
     827             :                  * registered to ensure that we can acquire a cleanup lock on
     828             :                  * it during replay.
     829             :                  */
     830        1268 :                 if (!xlrec.is_primary_bucket_page)
     831             :                 {
     832         194 :                     uint8       flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
     833             : 
     834         194 :                     XLogRegisterBuffer(0, bucket_buf, flags);
     835             :                 }
     836             : 
     837        1268 :                 XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
     838        1268 :                 XLogRegisterBufData(1, (char *) deletable,
     839             :                                     ndeletable * sizeof(OffsetNumber));
     840             : 
     841        1268 :                 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_DELETE);
     842        1268 :                 PageSetLSN(BufferGetPage(buf), recptr);
     843             :             }
     844             : 
     845        1520 :             END_CRIT_SECTION();
     846             :         }
     847             : 
     848             :         /* bail out if there are no more pages to scan. */
     849        1984 :         if (!BlockNumberIsValid(blkno))
     850        1562 :             break;
     851             : 
     852         422 :         next_buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
     853             :                                               LH_OVERFLOW_PAGE,
     854             :                                               bstrategy);
     855             : 
     856             :         /*
     857             :          * release the lock on previous page after acquiring the lock on next
     858             :          * page
     859             :          */
     860         422 :         if (retain_pin)
     861          80 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     862             :         else
     863         342 :             _hash_relbuf(rel, buf);
     864             : 
     865         422 :         buf = next_buf;
     866             :     }
     867             : 
     868             :     /*
     869             :      * lock the bucket page to clear the garbage flag and squeeze the bucket.
     870             :      * if the current buffer is same as bucket buffer, then we already have
     871             :      * lock on bucket page.
     872             :      */
     873        1562 :     if (buf != bucket_buf)
     874             :     {
     875          80 :         _hash_relbuf(rel, buf);
     876          80 :         LockBuffer(bucket_buf, BUFFER_LOCK_EXCLUSIVE);
     877             :     }
     878             : 
     879             :     /*
     880             :      * Clear the garbage flag from bucket after deleting the tuples that are
     881             :      * moved by split.  We purposefully clear the flag before squeeze bucket,
     882             :      * so that after restart, vacuum shouldn't again try to delete the moved
     883             :      * by split tuples.
     884             :      */
     885        1562 :     if (split_cleanup)
     886             :     {
     887             :         HashPageOpaque bucket_opaque;
     888             :         Page        page;
     889             : 
     890        1332 :         page = BufferGetPage(bucket_buf);
     891        1332 :         bucket_opaque = HashPageGetOpaque(page);
     892             : 
     893             :         /* No ereport(ERROR) until changes are logged */
     894        1332 :         START_CRIT_SECTION();
     895             : 
     896        1332 :         bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
     897        1332 :         MarkBufferDirty(bucket_buf);
     898             : 
     899             :         /* XLOG stuff */
     900        1332 :         if (RelationNeedsWAL(rel))
     901             :         {
     902             :             XLogRecPtr  recptr;
     903             : 
     904        1080 :             XLogBeginInsert();
     905        1080 :             XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD);
     906             : 
     907        1080 :             recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_CLEANUP);
     908        1080 :             PageSetLSN(page, recptr);
     909             :         }
     910             : 
     911        1332 :         END_CRIT_SECTION();
     912             :     }
     913             : 
     914             :     /*
     915             :      * If we have deleted anything, try to compact free space.  For squeezing
     916             :      * the bucket, we must have a cleanup lock, else it can impact the
     917             :      * ordering of tuples for a scan that has started before it.
     918             :      */
     919        1562 :     if (bucket_dirty && IsBufferCleanupOK(bucket_buf))
     920        1350 :         _hash_squeezebucket(rel, cur_bucket, bucket_blkno, bucket_buf,
     921             :                             bstrategy);
     922             :     else
     923         212 :         LockBuffer(bucket_buf, BUFFER_LOCK_UNLOCK);
     924        1562 : }

Generated by: LCOV version 1.14