LCOV - code coverage report
Current view: top level - src/backend/access/hash - hash.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 280 299 93.6 %
Date: 2019-11-13 21:06:57 Functions: 13 13 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * hash.c
       4             :  *    Implementation of Margo Seltzer's Hashing package for postgres.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/hash/hash.c
      12             :  *
      13             :  * NOTES
      14             :  *    This file contains only the public interface routines.
      15             :  *
      16             :  *-------------------------------------------------------------------------
      17             :  */
      18             : 
      19             : #include "postgres.h"
      20             : 
      21             : #include "access/hash.h"
      22             : #include "access/hash_xlog.h"
      23             : #include "access/relscan.h"
      24             : #include "access/tableam.h"
      25             : #include "catalog/index.h"
      26             : #include "commands/progress.h"
      27             : #include "commands/vacuum.h"
      28             : #include "miscadmin.h"
      29             : #include "optimizer/plancat.h"
      30             : #include "pgstat.h"
      31             : #include "utils/builtins.h"
      32             : #include "utils/index_selfuncs.h"
      33             : #include "utils/rel.h"
      34             : 
      35             : /* Working state for hashbuild and its callback */
      36             : typedef struct
      37             : {
      38             :     HSpool     *spool;          /* NULL if not using spooling */
      39             :     double      indtuples;      /* # tuples accepted into index */
      40             :     Relation    heapRel;        /* heap relation descriptor */
      41             : } HashBuildState;
      42             : 
      43             : static void hashbuildCallback(Relation index,
      44             :                               ItemPointer tid,
      45             :                               Datum *values,
      46             :                               bool *isnull,
      47             :                               bool tupleIsAlive,
      48             :                               void *state);
      49             : 
      50             : 
      51             : /*
      52             :  * Hash handler function: return IndexAmRoutine with access method parameters
      53             :  * and callbacks.
      54             :  */
      55             : Datum
      56        1288 : hashhandler(PG_FUNCTION_ARGS)
      57             : {
      58        1288 :     IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
      59             : 
      60        1288 :     amroutine->amstrategies = HTMaxStrategyNumber;
      61        1288 :     amroutine->amsupport = HASHNProcs;
      62        1288 :     amroutine->amcanorder = false;
      63        1288 :     amroutine->amcanorderbyop = false;
      64        1288 :     amroutine->amcanbackward = true;
      65        1288 :     amroutine->amcanunique = false;
      66        1288 :     amroutine->amcanmulticol = false;
      67        1288 :     amroutine->amoptionalkey = false;
      68        1288 :     amroutine->amsearcharray = false;
      69        1288 :     amroutine->amsearchnulls = false;
      70        1288 :     amroutine->amstorage = false;
      71        1288 :     amroutine->amclusterable = false;
      72        1288 :     amroutine->ampredlocks = true;
      73        1288 :     amroutine->amcanparallel = false;
      74        1288 :     amroutine->amcaninclude = false;
      75        1288 :     amroutine->amkeytype = INT4OID;
      76             : 
      77        1288 :     amroutine->ambuild = hashbuild;
      78        1288 :     amroutine->ambuildempty = hashbuildempty;
      79        1288 :     amroutine->aminsert = hashinsert;
      80        1288 :     amroutine->ambulkdelete = hashbulkdelete;
      81        1288 :     amroutine->amvacuumcleanup = hashvacuumcleanup;
      82        1288 :     amroutine->amcanreturn = NULL;
      83        1288 :     amroutine->amcostestimate = hashcostestimate;
      84        1288 :     amroutine->amoptions = hashoptions;
      85        1288 :     amroutine->amproperty = NULL;
      86        1288 :     amroutine->ambuildphasename = NULL;
      87        1288 :     amroutine->amvalidate = hashvalidate;
      88        1288 :     amroutine->ambeginscan = hashbeginscan;
      89        1288 :     amroutine->amrescan = hashrescan;
      90        1288 :     amroutine->amgettuple = hashgettuple;
      91        1288 :     amroutine->amgetbitmap = hashgetbitmap;
      92        1288 :     amroutine->amendscan = hashendscan;
      93        1288 :     amroutine->ammarkpos = NULL;
      94        1288 :     amroutine->amrestrpos = NULL;
      95        1288 :     amroutine->amestimateparallelscan = NULL;
      96        1288 :     amroutine->aminitparallelscan = NULL;
      97        1288 :     amroutine->amparallelrescan = NULL;
      98             : 
      99        1288 :     PG_RETURN_POINTER(amroutine);
     100             : }
     101             : 
     102             : /*
     103             :  *  hashbuild() -- build a new hash index.
     104             :  */
     105             : IndexBuildResult *
     106         172 : hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     107             : {
     108             :     IndexBuildResult *result;
     109             :     BlockNumber relpages;
     110             :     double      reltuples;
     111             :     double      allvisfrac;
     112             :     uint32      num_buckets;
     113             :     long        sort_threshold;
     114             :     HashBuildState buildstate;
     115             : 
     116             :     /*
     117             :      * We expect to be called exactly once for any index relation. If that's
     118             :      * not the case, big trouble's what we have.
     119             :      */
     120         172 :     if (RelationGetNumberOfBlocks(index) != 0)
     121           0 :         elog(ERROR, "index \"%s\" already contains data",
     122             :              RelationGetRelationName(index));
     123             : 
     124             :     /* Estimate the number of rows currently present in the table */
     125         172 :     estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
     126             : 
     127             :     /* Initialize the hash index metadata page and initial buckets */
     128         172 :     num_buckets = _hash_init(index, reltuples, MAIN_FORKNUM);
     129             : 
     130             :     /*
     131             :      * If we just insert the tuples into the index in scan order, then
     132             :      * (assuming their hash codes are pretty random) there will be no locality
     133             :      * of access to the index, and if the index is bigger than available RAM
     134             :      * then we'll thrash horribly.  To prevent that scenario, we can sort the
     135             :      * tuples by (expected) bucket number.  However, such a sort is useless
     136             :      * overhead when the index does fit in RAM.  We choose to sort if the
     137             :      * initial index size exceeds maintenance_work_mem, or the number of
     138             :      * buffers usable for the index, whichever is less.  (Limiting by the
     139             :      * number of buffers should reduce thrashing between PG buffers and kernel
     140             :      * buffers, which seems useful even if no physical I/O results.  Limiting
     141             :      * by maintenance_work_mem is useful to allow easy testing of the sort
     142             :      * code path, and may be useful to DBAs as an additional control knob.)
     143             :      *
     144             :      * NOTE: this test will need adjustment if a bucket is ever different from
     145             :      * one page.  Also, "initial index size" accounting does not include the
     146             :      * metapage, nor the first bitmap page.
     147             :      */
     148         172 :     sort_threshold = (maintenance_work_mem * 1024L) / BLCKSZ;
     149         172 :     if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
     150         168 :         sort_threshold = Min(sort_threshold, NBuffers);
     151             :     else
     152           4 :         sort_threshold = Min(sort_threshold, NLocBuffer);
     153             : 
     154         172 :     if (num_buckets >= (uint32) sort_threshold)
     155           4 :         buildstate.spool = _h_spoolinit(heap, index, num_buckets);
     156             :     else
     157         168 :         buildstate.spool = NULL;
     158             : 
     159             :     /* prepare to build the index */
     160         172 :     buildstate.indtuples = 0;
     161         172 :     buildstate.heapRel = heap;
     162             : 
     163             :     /* do the heap scan */
     164         172 :     reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
     165             :                                        hashbuildCallback,
     166             :                                        (void *) &buildstate, NULL);
     167         172 :     pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_TOTAL,
     168         172 :                                  buildstate.indtuples);
     169             : 
     170         172 :     if (buildstate.spool)
     171             :     {
     172             :         /* sort the tuples and insert them into the index */
     173           4 :         _h_indexbuild(buildstate.spool, buildstate.heapRel);
     174           4 :         _h_spooldestroy(buildstate.spool);
     175             :     }
     176             : 
     177             :     /*
     178             :      * Return statistics
     179             :      */
     180         172 :     result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
     181             : 
     182         172 :     result->heap_tuples = reltuples;
     183         172 :     result->index_tuples = buildstate.indtuples;
     184             : 
     185         172 :     return result;
     186             : }
     187             : 
     188             : /*
     189             :  *  hashbuildempty() -- build an empty hash index in the initialization fork
     190             :  */
     191             : void
     192           4 : hashbuildempty(Relation index)
     193             : {
     194           4 :     _hash_init(index, 0, INIT_FORKNUM);
     195           4 : }
     196             : 
     197             : /*
     198             :  * Per-tuple callback for table_index_build_scan
     199             :  */
     200             : static void
     201      315174 : hashbuildCallback(Relation index,
     202             :                   ItemPointer tid,
     203             :                   Datum *values,
     204             :                   bool *isnull,
     205             :                   bool tupleIsAlive,
     206             :                   void *state)
     207             : {
     208      315174 :     HashBuildState *buildstate = (HashBuildState *) state;
     209             :     Datum       index_values[1];
     210             :     bool        index_isnull[1];
     211             :     IndexTuple  itup;
     212             : 
     213             :     /* convert data to a hash key; on failure, do not insert anything */
     214      315174 :     if (!_hash_convert_tuple(index,
     215             :                              values, isnull,
     216             :                              index_values, index_isnull))
     217           0 :         return;
     218             : 
     219             :     /* Either spool the tuple for sorting, or just put it into the index */
     220      315174 :     if (buildstate->spool)
     221       40000 :         _h_spool(buildstate->spool, tid, index_values, index_isnull);
     222             :     else
     223             :     {
     224             :         /* form an index tuple and point it at the heap tuple */
     225      275174 :         itup = index_form_tuple(RelationGetDescr(index),
     226             :                                 index_values, index_isnull);
     227      275174 :         itup->t_tid = *tid;
     228      275174 :         _hash_doinsert(index, itup, buildstate->heapRel);
     229      275174 :         pfree(itup);
     230             :     }
     231             : 
     232      315174 :     buildstate->indtuples += 1;
     233             : }
     234             : 
     235             : /*
     236             :  *  hashinsert() -- insert an index tuple into a hash table.
     237             :  *
     238             :  *  Hash on the heap tuple's key, form an index tuple with hash code.
     239             :  *  Find the appropriate location for the new tuple, and put it there.
     240             :  */
     241             : bool
     242      124758 : hashinsert(Relation rel, Datum *values, bool *isnull,
     243             :            ItemPointer ht_ctid, Relation heapRel,
     244             :            IndexUniqueCheck checkUnique,
     245             :            IndexInfo *indexInfo)
     246             : {
     247             :     Datum       index_values[1];
     248             :     bool        index_isnull[1];
     249             :     IndexTuple  itup;
     250             : 
     251             :     /* convert data to a hash key; on failure, do not insert anything */
     252      124758 :     if (!_hash_convert_tuple(rel,
     253             :                              values, isnull,
     254             :                              index_values, index_isnull))
     255           0 :         return false;
     256             : 
     257             :     /* form an index tuple and point it at the heap tuple */
     258      124758 :     itup = index_form_tuple(RelationGetDescr(rel), index_values, index_isnull);
     259      124758 :     itup->t_tid = *ht_ctid;
     260             : 
     261      124758 :     _hash_doinsert(rel, itup, heapRel);
     262             : 
     263      124746 :     pfree(itup);
     264             : 
     265      124746 :     return false;
     266             : }
     267             : 
     268             : 
     269             : /*
     270             :  *  hashgettuple() -- Get the next tuple in the scan.
     271             :  */
     272             : bool
     273       68146 : hashgettuple(IndexScanDesc scan, ScanDirection dir)
     274             : {
     275       68146 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     276             :     bool        res;
     277             : 
     278             :     /* Hash indexes are always lossy since we store only the hash code */
     279       68146 :     scan->xs_recheck = true;
     280             : 
     281             :     /*
     282             :      * If we've already initialized this scan, we can just advance it in the
     283             :      * appropriate direction.  If we haven't done so yet, we call a routine to
     284             :      * get the first item in the scan.
     285             :      */
     286       68146 :     if (!HashScanPosIsValid(so->currPos))
     287         286 :         res = _hash_first(scan, dir);
     288             :     else
     289             :     {
     290             :         /*
     291             :          * Check to see if we should kill the previously-fetched tuple.
     292             :          */
     293       67860 :         if (scan->kill_prior_tuple)
     294             :         {
     295             :             /*
     296             :              * Yes, so remember it for later. (We'll deal with all such tuples
     297             :              * at once right after leaving the index page or at end of scan.)
     298             :              * In case if caller reverses the indexscan direction it is quite
     299             :              * possible that the same item might get entered multiple times.
     300             :              * But, we don't detect that; instead, we just forget any excess
     301             :              * entries.
     302             :              */
     303           0 :             if (so->killedItems == NULL)
     304           0 :                 so->killedItems = (int *)
     305           0 :                     palloc(MaxIndexTuplesPerPage * sizeof(int));
     306             : 
     307           0 :             if (so->numKilled < MaxIndexTuplesPerPage)
     308           0 :                 so->killedItems[so->numKilled++] = so->currPos.itemIndex;
     309             :         }
     310             : 
     311             :         /*
     312             :          * Now continue the scan.
     313             :          */
     314       67860 :         res = _hash_next(scan, dir);
     315             :     }
     316             : 
     317       68144 :     return res;
     318             : }
     319             : 
     320             : 
     321             : /*
     322             :  *  hashgetbitmap() -- get all tuples at once
     323             :  */
     324             : int64
     325          16 : hashgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
     326             : {
     327          16 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     328             :     bool        res;
     329          16 :     int64       ntids = 0;
     330             :     HashScanPosItem *currItem;
     331             : 
     332          16 :     res = _hash_first(scan, ForwardScanDirection);
     333             : 
     334         100 :     while (res)
     335             :     {
     336          68 :         currItem = &so->currPos.items[so->currPos.itemIndex];
     337             : 
     338             :         /*
     339             :          * _hash_first and _hash_next handle eliminate dead index entries
     340             :          * whenever scan->ignore_killed_tuples is true.  Therefore, there's
     341             :          * nothing to do here except add the results to the TIDBitmap.
     342             :          */
     343          68 :         tbm_add_tuples(tbm, &(currItem->heapTid), 1, true);
     344          68 :         ntids++;
     345             : 
     346          68 :         res = _hash_next(scan, ForwardScanDirection);
     347             :     }
     348             : 
     349          16 :     return ntids;
     350             : }
     351             : 
     352             : 
     353             : /*
     354             :  *  hashbeginscan() -- start a scan on a hash index
     355             :  */
     356             : IndexScanDesc
     357         278 : hashbeginscan(Relation rel, int nkeys, int norderbys)
     358             : {
     359             :     IndexScanDesc scan;
     360             :     HashScanOpaque so;
     361             : 
     362             :     /* no order by operators allowed */
     363             :     Assert(norderbys == 0);
     364             : 
     365         278 :     scan = RelationGetIndexScan(rel, nkeys, norderbys);
     366             : 
     367         278 :     so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData));
     368         278 :     HashScanPosInvalidate(so->currPos);
     369         278 :     so->hashso_bucket_buf = InvalidBuffer;
     370         278 :     so->hashso_split_bucket_buf = InvalidBuffer;
     371             : 
     372         278 :     so->hashso_buc_populated = false;
     373         278 :     so->hashso_buc_split = false;
     374             : 
     375         278 :     so->killedItems = NULL;
     376         278 :     so->numKilled = 0;
     377             : 
     378         278 :     scan->opaque = so;
     379             : 
     380         278 :     return scan;
     381             : }
     382             : 
     383             : /*
     384             :  *  hashrescan() -- rescan an index relation
     385             :  */
     386             : void
     387         302 : hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
     388             :            ScanKey orderbys, int norderbys)
     389             : {
     390         302 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     391         302 :     Relation    rel = scan->indexRelation;
     392             : 
     393         302 :     if (HashScanPosIsValid(so->currPos))
     394             :     {
     395             :         /* Before leaving current page, deal with any killed items */
     396           8 :         if (so->numKilled > 0)
     397           0 :             _hash_kill_items(scan);
     398             :     }
     399             : 
     400         302 :     _hash_dropscanbuf(rel, so);
     401             : 
     402             :     /* set position invalid (this will cause _hash_first call) */
     403         302 :     HashScanPosInvalidate(so->currPos);
     404             : 
     405             :     /* Update scan key, if a new one is given */
     406         302 :     if (scankey && scan->numberOfKeys > 0)
     407             :     {
     408         302 :         memmove(scan->keyData,
     409             :                 scankey,
     410         302 :                 scan->numberOfKeys * sizeof(ScanKeyData));
     411             :     }
     412             : 
     413         302 :     so->hashso_buc_populated = false;
     414         302 :     so->hashso_buc_split = false;
     415         302 : }
     416             : 
     417             : /*
     418             :  *  hashendscan() -- close down a scan
     419             :  */
     420             : void
     421         276 : hashendscan(IndexScanDesc scan)
     422             : {
     423         276 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     424         276 :     Relation    rel = scan->indexRelation;
     425             : 
     426         276 :     if (HashScanPosIsValid(so->currPos))
     427             :     {
     428             :         /* Before leaving current page, deal with any killed items */
     429          24 :         if (so->numKilled > 0)
     430           0 :             _hash_kill_items(scan);
     431             :     }
     432             : 
     433         276 :     _hash_dropscanbuf(rel, so);
     434             : 
     435         276 :     if (so->killedItems != NULL)
     436           0 :         pfree(so->killedItems);
     437         276 :     pfree(so);
     438         276 :     scan->opaque = NULL;
     439         276 : }
     440             : 
     441             : /*
     442             :  * Bulk deletion of all index entries pointing to a set of heap tuples.
     443             :  * The set of target tuples is specified via a callback routine that tells
     444             :  * whether any given heap tuple (identified by ItemPointer) is being deleted.
     445             :  *
     446             :  * This function also deletes the tuples that are moved by split to other
     447             :  * bucket.
     448             :  *
     449             :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     450             :  */
     451             : IndexBulkDeleteResult *
     452           2 : hashbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     453             :                IndexBulkDeleteCallback callback, void *callback_state)
     454             : {
     455           2 :     Relation    rel = info->index;
     456             :     double      tuples_removed;
     457             :     double      num_index_tuples;
     458             :     double      orig_ntuples;
     459             :     Bucket      orig_maxbucket;
     460             :     Bucket      cur_maxbucket;
     461             :     Bucket      cur_bucket;
     462           2 :     Buffer      metabuf = InvalidBuffer;
     463             :     HashMetaPage metap;
     464             :     HashMetaPage cachedmetap;
     465             : 
     466           2 :     tuples_removed = 0;
     467           2 :     num_index_tuples = 0;
     468             : 
     469             :     /*
     470             :      * We need a copy of the metapage so that we can use its hashm_spares[]
     471             :      * values to compute bucket page addresses, but a cached copy should be
     472             :      * good enough.  (If not, we'll detect that further down and refresh the
     473             :      * cache as necessary.)
     474             :      */
     475           2 :     cachedmetap = _hash_getcachedmetap(rel, &metabuf, false);
     476             :     Assert(cachedmetap != NULL);
     477             : 
     478           2 :     orig_maxbucket = cachedmetap->hashm_maxbucket;
     479           2 :     orig_ntuples = cachedmetap->hashm_ntuples;
     480             : 
     481             :     /* Scan the buckets that we know exist */
     482           2 :     cur_bucket = 0;
     483           2 :     cur_maxbucket = orig_maxbucket;
     484             : 
     485             : loop_top:
     486         208 :     while (cur_bucket <= cur_maxbucket)
     487             :     {
     488             :         BlockNumber bucket_blkno;
     489             :         BlockNumber blkno;
     490             :         Buffer      bucket_buf;
     491             :         Buffer      buf;
     492             :         HashPageOpaque bucket_opaque;
     493             :         Page        page;
     494         200 :         bool        split_cleanup = false;
     495             : 
     496             :         /* Get address of bucket's start page */
     497         200 :         bucket_blkno = BUCKET_TO_BLKNO(cachedmetap, cur_bucket);
     498             : 
     499         200 :         blkno = bucket_blkno;
     500             : 
     501             :         /*
     502             :          * We need to acquire a cleanup lock on the primary bucket page to out
     503             :          * wait concurrent scans before deleting the dead tuples.
     504             :          */
     505         200 :         buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy);
     506         200 :         LockBufferForCleanup(buf);
     507         200 :         _hash_checkpage(rel, buf, LH_BUCKET_PAGE);
     508             : 
     509         200 :         page = BufferGetPage(buf);
     510         200 :         bucket_opaque = (HashPageOpaque) PageGetSpecialPointer(page);
     511             : 
     512             :         /*
     513             :          * If the bucket contains tuples that are moved by split, then we need
     514             :          * to delete such tuples.  We can't delete such tuples if the split
     515             :          * operation on bucket is not finished as those are needed by scans.
     516             :          */
     517         400 :         if (!H_BUCKET_BEING_SPLIT(bucket_opaque) &&
     518         200 :             H_NEEDS_SPLIT_CLEANUP(bucket_opaque))
     519             :         {
     520           0 :             split_cleanup = true;
     521             : 
     522             :             /*
     523             :              * This bucket might have been split since we last held a lock on
     524             :              * the metapage.  If so, hashm_maxbucket, hashm_highmask and
     525             :              * hashm_lowmask might be old enough to cause us to fail to remove
     526             :              * tuples left behind by the most recent split.  To prevent that,
     527             :              * now that the primary page of the target bucket has been locked
     528             :              * (and thus can't be further split), check whether we need to
     529             :              * update our cached metapage data.
     530             :              */
     531             :             Assert(bucket_opaque->hasho_prevblkno != InvalidBlockNumber);
     532           0 :             if (bucket_opaque->hasho_prevblkno > cachedmetap->hashm_maxbucket)
     533             :             {
     534           0 :                 cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
     535             :                 Assert(cachedmetap != NULL);
     536             :             }
     537             :         }
     538             : 
     539         200 :         bucket_buf = buf;
     540             : 
     541         200 :         hashbucketcleanup(rel, cur_bucket, bucket_buf, blkno, info->strategy,
     542             :                           cachedmetap->hashm_maxbucket,
     543             :                           cachedmetap->hashm_highmask,
     544             :                           cachedmetap->hashm_lowmask, &tuples_removed,
     545             :                           &num_index_tuples, split_cleanup,
     546             :                           callback, callback_state);
     547             : 
     548         200 :         _hash_dropbuf(rel, bucket_buf);
     549             : 
     550             :         /* Advance to next bucket */
     551         200 :         cur_bucket++;
     552             :     }
     553             : 
     554           4 :     if (BufferIsInvalid(metabuf))
     555           2 :         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
     556             : 
     557             :     /* Write-lock metapage and check for split since we started */
     558           4 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     559           4 :     metap = HashPageGetMeta(BufferGetPage(metabuf));
     560             : 
     561           4 :     if (cur_maxbucket != metap->hashm_maxbucket)
     562             :     {
     563             :         /* There's been a split, so process the additional bucket(s) */
     564           2 :         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     565           2 :         cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
     566             :         Assert(cachedmetap != NULL);
     567           2 :         cur_maxbucket = cachedmetap->hashm_maxbucket;
     568           2 :         goto loop_top;
     569             :     }
     570             : 
     571             :     /* Okay, we're really done.  Update tuple count in metapage. */
     572           2 :     START_CRIT_SECTION();
     573             : 
     574           2 :     if (orig_maxbucket == metap->hashm_maxbucket &&
     575           0 :         orig_ntuples == metap->hashm_ntuples)
     576             :     {
     577             :         /*
     578             :          * No one has split or inserted anything since start of scan, so
     579             :          * believe our count as gospel.
     580             :          */
     581           0 :         metap->hashm_ntuples = num_index_tuples;
     582             :     }
     583             :     else
     584             :     {
     585             :         /*
     586             :          * Otherwise, our count is untrustworthy since we may have
     587             :          * double-scanned tuples in split buckets.  Proceed by dead-reckoning.
     588             :          * (Note: we still return estimated_count = false, because using this
     589             :          * count is better than not updating reltuples at all.)
     590             :          */
     591           2 :         if (metap->hashm_ntuples > tuples_removed)
     592           2 :             metap->hashm_ntuples -= tuples_removed;
     593             :         else
     594           0 :             metap->hashm_ntuples = 0;
     595           2 :         num_index_tuples = metap->hashm_ntuples;
     596             :     }
     597             : 
     598           2 :     MarkBufferDirty(metabuf);
     599             : 
     600             :     /* XLOG stuff */
     601           2 :     if (RelationNeedsWAL(rel))
     602             :     {
     603             :         xl_hash_update_meta_page xlrec;
     604             :         XLogRecPtr  recptr;
     605             : 
     606           2 :         xlrec.ntuples = metap->hashm_ntuples;
     607             : 
     608           2 :         XLogBeginInsert();
     609           2 :         XLogRegisterData((char *) &xlrec, SizeOfHashUpdateMetaPage);
     610             : 
     611           2 :         XLogRegisterBuffer(0, metabuf, REGBUF_STANDARD);
     612             : 
     613           2 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_UPDATE_META_PAGE);
     614           2 :         PageSetLSN(BufferGetPage(metabuf), recptr);
     615             :     }
     616             : 
     617           2 :     END_CRIT_SECTION();
     618             : 
     619           2 :     _hash_relbuf(rel, metabuf);
     620             : 
     621             :     /* return statistics */
     622           2 :     if (stats == NULL)
     623           2 :         stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
     624           2 :     stats->estimated_count = false;
     625           2 :     stats->num_index_tuples = num_index_tuples;
     626           2 :     stats->tuples_removed += tuples_removed;
     627             :     /* hashvacuumcleanup will fill in num_pages */
     628             : 
     629           2 :     return stats;
     630             : }
     631             : 
     632             : /*
     633             :  * Post-VACUUM cleanup.
     634             :  *
     635             :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     636             :  */
     637             : IndexBulkDeleteResult *
     638          46 : hashvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
     639             : {
     640          46 :     Relation    rel = info->index;
     641             :     BlockNumber num_pages;
     642             : 
     643             :     /* If hashbulkdelete wasn't called, return NULL signifying no change */
     644             :     /* Note: this covers the analyze_only case too */
     645          46 :     if (stats == NULL)
     646          44 :         return NULL;
     647             : 
     648             :     /* update statistics */
     649           2 :     num_pages = RelationGetNumberOfBlocks(rel);
     650           2 :     stats->num_pages = num_pages;
     651             : 
     652           2 :     return stats;
     653             : }
     654             : 
     655             : /*
     656             :  * Helper function to perform deletion of index entries from a bucket.
     657             :  *
     658             :  * This function expects that the caller has acquired a cleanup lock on the
     659             :  * primary bucket page, and will return with a write lock again held on the
     660             :  * primary bucket page.  The lock won't necessarily be held continuously,
     661             :  * though, because we'll release it when visiting overflow pages.
     662             :  *
     663             :  * There can't be any concurrent scans in progress when we first enter this
     664             :  * function because of the cleanup lock we hold on the primary bucket page,
     665             :  * but as soon as we release that lock, there might be.  If those scans got
     666             :  * ahead of our cleanup scan, they might see a tuple before we kill it and
     667             :  * wake up only after VACUUM has completed and the TID has been recycled for
     668             :  * an unrelated tuple.  To avoid that calamity, we prevent scans from passing
     669             :  * our cleanup scan by locking the next page in the bucket chain before
     670             :  * releasing the lock on the previous page.  (This type of lock chaining is not
     671             :  * ideal, so we might want to look for a better solution at some point.)
     672             :  *
     673             :  * We need to retain a pin on the primary bucket to ensure that no concurrent
     674             :  * split can start.
     675             :  */
     676             : void
     677         826 : hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
     678             :                   BlockNumber bucket_blkno, BufferAccessStrategy bstrategy,
     679             :                   uint32 maxbucket, uint32 highmask, uint32 lowmask,
     680             :                   double *tuples_removed, double *num_index_tuples,
     681             :                   bool split_cleanup,
     682             :                   IndexBulkDeleteCallback callback, void *callback_state)
     683             : {
     684             :     BlockNumber blkno;
     685             :     Buffer      buf;
     686         826 :     Bucket      new_bucket PG_USED_FOR_ASSERTS_ONLY = InvalidBucket;
     687         826 :     bool        bucket_dirty = false;
     688             : 
     689         826 :     blkno = bucket_blkno;
     690         826 :     buf = bucket_buf;
     691             : 
     692         826 :     if (split_cleanup)
     693         626 :         new_bucket = _hash_get_newbucket_from_oldbucket(rel, cur_bucket,
     694             :                                                         lowmask, maxbucket);
     695             : 
     696             :     /* Scan each page in bucket */
     697             :     for (;;)
     698         258 :     {
     699             :         HashPageOpaque opaque;
     700             :         OffsetNumber offno;
     701             :         OffsetNumber maxoffno;
     702             :         Buffer      next_buf;
     703             :         Page        page;
     704             :         OffsetNumber deletable[MaxOffsetNumber];
     705        1084 :         int         ndeletable = 0;
     706        1084 :         bool        retain_pin = false;
     707        1084 :         bool        clear_dead_marking = false;
     708             : 
     709        1084 :         vacuum_delay_point();
     710             : 
     711        1084 :         page = BufferGetPage(buf);
     712        1084 :         opaque = (HashPageOpaque) PageGetSpecialPointer(page);
     713             : 
     714             :         /* Scan each tuple in page */
     715        1084 :         maxoffno = PageGetMaxOffsetNumber(page);
     716      248702 :         for (offno = FirstOffsetNumber;
     717             :              offno <= maxoffno;
     718      246534 :              offno = OffsetNumberNext(offno))
     719             :         {
     720             :             ItemPointer htup;
     721             :             IndexTuple  itup;
     722             :             Bucket      bucket;
     723      246534 :             bool        kill_tuple = false;
     724             : 
     725      246534 :             itup = (IndexTuple) PageGetItem(page,
     726             :                                             PageGetItemId(page, offno));
     727      246534 :             htup = &(itup->t_tid);
     728             : 
     729             :             /*
     730             :              * To remove the dead tuples, we strictly want to rely on results
     731             :              * of callback function.  refer btvacuumpage for detailed reason.
     732             :              */
     733      246534 :             if (callback && callback(htup, callback_state))
     734             :             {
     735       11000 :                 kill_tuple = true;
     736       22000 :                 if (tuples_removed)
     737       11000 :                     *tuples_removed += 1;
     738             :             }
     739      235534 :             else if (split_cleanup)
     740             :             {
     741             :                 /* delete the tuples that are moved by split. */
     742      185534 :                 bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
     743             :                                               maxbucket,
     744             :                                               highmask,
     745             :                                               lowmask);
     746             :                 /* mark the item for deletion */
     747      185534 :                 if (bucket != cur_bucket)
     748             :                 {
     749             :                     /*
     750             :                      * We expect tuples to either belong to current bucket or
     751             :                      * new_bucket.  This is ensured because we don't allow
     752             :                      * further splits from bucket that contains garbage. See
     753             :                      * comments in _hash_expandtable.
     754             :                      */
     755             :                     Assert(bucket == new_bucket);
     756       74224 :                     kill_tuple = true;
     757             :                 }
     758             :             }
     759             : 
     760      246534 :             if (kill_tuple)
     761             :             {
     762             :                 /* mark the item for deletion */
     763       85224 :                 deletable[ndeletable++] = offno;
     764             :             }
     765             :             else
     766             :             {
     767             :                 /* we're keeping it, so count it */
     768      161310 :                 if (num_index_tuples)
     769       50000 :                     *num_index_tuples += 1;
     770             :             }
     771             :         }
     772             : 
     773             :         /* retain the pin on primary bucket page till end of bucket scan */
     774        1084 :         if (blkno == bucket_blkno)
     775         826 :             retain_pin = true;
     776             :         else
     777         258 :             retain_pin = false;
     778             : 
     779        1084 :         blkno = opaque->hasho_nextblkno;
     780             : 
     781             :         /*
     782             :          * Apply deletions, advance to next page and write page if needed.
     783             :          */
     784        1084 :         if (ndeletable > 0)
     785             :         {
     786             :             /* No ereport(ERROR) until changes are logged */
     787         686 :             START_CRIT_SECTION();
     788             : 
     789         686 :             PageIndexMultiDelete(page, deletable, ndeletable);
     790         686 :             bucket_dirty = true;
     791             : 
     792             :             /*
     793             :              * Let us mark the page as clean if vacuum removes the DEAD tuples
     794             :              * from an index page. We do this by clearing
     795             :              * LH_PAGE_HAS_DEAD_TUPLES flag.
     796             :              */
     797         714 :             if (tuples_removed && *tuples_removed > 0 &&
     798          28 :                 H_HAS_DEAD_TUPLES(opaque))
     799             :             {
     800           0 :                 opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
     801           0 :                 clear_dead_marking = true;
     802             :             }
     803             : 
     804         686 :             MarkBufferDirty(buf);
     805             : 
     806             :             /* XLOG stuff */
     807         686 :             if (RelationNeedsWAL(rel))
     808             :             {
     809             :                 xl_hash_delete xlrec;
     810             :                 XLogRecPtr  recptr;
     811             : 
     812         686 :                 xlrec.clear_dead_marking = clear_dead_marking;
     813         686 :                 xlrec.is_primary_bucket_page = (buf == bucket_buf) ? true : false;
     814             : 
     815         686 :                 XLogBeginInsert();
     816         686 :                 XLogRegisterData((char *) &xlrec, SizeOfHashDelete);
     817             : 
     818             :                 /*
     819             :                  * bucket buffer needs to be registered to ensure that we can
     820             :                  * acquire a cleanup lock on it during replay.
     821             :                  */
     822         686 :                 if (!xlrec.is_primary_bucket_page)
     823         110 :                     XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
     824             : 
     825         686 :                 XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
     826         686 :                 XLogRegisterBufData(1, (char *) deletable,
     827         686 :                                     ndeletable * sizeof(OffsetNumber));
     828             : 
     829         686 :                 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_DELETE);
     830         686 :                 PageSetLSN(BufferGetPage(buf), recptr);
     831             :             }
     832             : 
     833         686 :             END_CRIT_SECTION();
     834             :         }
     835             : 
     836             :         /* bail out if there are no more pages to scan. */
     837        1084 :         if (!BlockNumberIsValid(blkno))
     838         826 :             break;
     839             : 
     840         258 :         next_buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
     841             :                                               LH_OVERFLOW_PAGE,
     842             :                                               bstrategy);
     843             : 
     844             :         /*
     845             :          * release the lock on previous page after acquiring the lock on next
     846             :          * page
     847             :          */
     848         258 :         if (retain_pin)
     849          46 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     850             :         else
     851         212 :             _hash_relbuf(rel, buf);
     852             : 
     853         258 :         buf = next_buf;
     854             :     }
     855             : 
     856             :     /*
     857             :      * lock the bucket page to clear the garbage flag and squeeze the bucket.
     858             :      * if the current buffer is same as bucket buffer, then we already have
     859             :      * lock on bucket page.
     860             :      */
     861         826 :     if (buf != bucket_buf)
     862             :     {
     863          46 :         _hash_relbuf(rel, buf);
     864          46 :         LockBuffer(bucket_buf, BUFFER_LOCK_EXCLUSIVE);
     865             :     }
     866             : 
     867             :     /*
     868             :      * Clear the garbage flag from bucket after deleting the tuples that are
     869             :      * moved by split.  We purposefully clear the flag before squeeze bucket,
     870             :      * so that after restart, vacuum shouldn't again try to delete the moved
     871             :      * by split tuples.
     872             :      */
     873         826 :     if (split_cleanup)
     874             :     {
     875             :         HashPageOpaque bucket_opaque;
     876             :         Page        page;
     877             : 
     878         626 :         page = BufferGetPage(bucket_buf);
     879         626 :         bucket_opaque = (HashPageOpaque) PageGetSpecialPointer(page);
     880             : 
     881             :         /* No ereport(ERROR) until changes are logged */
     882         626 :         START_CRIT_SECTION();
     883             : 
     884         626 :         bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
     885         626 :         MarkBufferDirty(bucket_buf);
     886             : 
     887             :         /* XLOG stuff */
     888         626 :         if (RelationNeedsWAL(rel))
     889             :         {
     890             :             XLogRecPtr  recptr;
     891             : 
     892         626 :             XLogBeginInsert();
     893         626 :             XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD);
     894             : 
     895         626 :             recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_CLEANUP);
     896         626 :             PageSetLSN(page, recptr);
     897             :         }
     898             : 
     899         626 :         END_CRIT_SECTION();
     900             :     }
     901             : 
     902             :     /*
     903             :      * If we have deleted anything, try to compact free space.  For squeezing
     904             :      * the bucket, we must have a cleanup lock, else it can impact the
     905             :      * ordering of tuples for a scan that has started before it.
     906             :      */
     907         826 :     if (bucket_dirty && IsBufferCleanupOK(bucket_buf))
     908         584 :         _hash_squeezebucket(rel, cur_bucket, bucket_blkno, bucket_buf,
     909             :                             bstrategy);
     910             :     else
     911         242 :         LockBuffer(bucket_buf, BUFFER_LOCK_UNLOCK);
     912         826 : }

Generated by: LCOV version 1.13