LCOV - code coverage report
Current view: top level - src/backend/access/hash - hashsort.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 100.0 % 23 23
Test Date: 2026-03-03 18:14:56 Functions: 100.0 % 4 4
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * hashsort.c
       4              :  *      Sort tuples for insertion into a new hash index.
       5              :  *
       6              :  * When building a very large hash index, we pre-sort the tuples by bucket
       7              :  * number to improve locality of access to the index, and thereby avoid
       8              :  * thrashing.  We use tuplesort.c to sort the given index tuples into order.
       9              :  *
      10              :  * Note: if the number of rows in the table has been underestimated,
      11              :  * bucket splits may occur during the index build.  In that case we'd
      12              :  * be inserting into two or more buckets for each possible masked-off
      13              :  * hash code value.  That's no big problem though, since we'll still have
      14              :  * plenty of locality of access.
      15              :  *
      16              :  *
      17              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      18              :  * Portions Copyright (c) 1994, Regents of the University of California
      19              :  *
      20              :  * IDENTIFICATION
      21              :  *    src/backend/access/hash/hashsort.c
      22              :  *
      23              :  *-------------------------------------------------------------------------
      24              :  */
      25              : 
      26              : #include "postgres.h"
      27              : 
      28              : #include "access/hash.h"
      29              : #include "commands/progress.h"
      30              : #include "miscadmin.h"
      31              : #include "pgstat.h"
      32              : #include "port/pg_bitutils.h"
      33              : #include "utils/tuplesort.h"
      34              : 
      35              : 
      36              : /*
      37              :  * Status record for spooling/sorting phase.
      38              :  */
      39              : struct HSpool
      40              : {
      41              :     Tuplesortstate *sortstate;  /* state data for tuplesort.c */
      42              :     Relation    index;
      43              : 
      44              :     /*
      45              :      * We sort the hash keys based on the buckets they belong to, then by the
      46              :      * hash values themselves, to optimize insertions onto hash pages.  The
      47              :      * masks below are used in _hash_hashkey2bucket to determine the bucket of
      48              :      * a given hash key.
      49              :      */
      50              :     uint32      high_mask;
      51              :     uint32      low_mask;
      52              :     uint32      max_buckets;
      53              : };
      54              : 
      55              : 
      56              : /*
      57              :  * create and initialize a spool structure
      58              :  */
      59              : HSpool *
      60            4 : _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
      61              : {
      62            4 :     HSpool     *hspool = palloc0_object(HSpool);
      63              : 
      64            4 :     hspool->index = index;
      65              : 
      66              :     /*
      67              :      * Determine the bitmask for hash code values.  Since there are currently
      68              :      * num_buckets buckets in the index, the appropriate mask can be computed
      69              :      * as follows.
      70              :      *
      71              :      * NOTE : This hash mask calculation should be in sync with similar
      72              :      * calculation in _hash_init_metabuffer.
      73              :      */
      74            4 :     hspool->high_mask = pg_nextpower2_32(num_buckets + 1) - 1;
      75            4 :     hspool->low_mask = (hspool->high_mask >> 1);
      76            4 :     hspool->max_buckets = num_buckets - 1;
      77              : 
      78              :     /*
      79              :      * We size the sort area as maintenance_work_mem rather than work_mem to
      80              :      * speed index creation.  This should be OK since a single backend can't
      81              :      * run multiple index creations in parallel.
      82              :      */
      83            4 :     hspool->sortstate = tuplesort_begin_index_hash(heap,
      84              :                                                    index,
      85              :                                                    hspool->high_mask,
      86              :                                                    hspool->low_mask,
      87              :                                                    hspool->max_buckets,
      88              :                                                    maintenance_work_mem,
      89              :                                                    NULL,
      90              :                                                    TUPLESORT_NONE);
      91              : 
      92            4 :     return hspool;
      93              : }
      94              : 
      95              : /*
      96              :  * clean up a spool structure and its substructures.
      97              :  */
      98              : void
      99            4 : _h_spooldestroy(HSpool *hspool)
     100              : {
     101            4 :     tuplesort_end(hspool->sortstate);
     102            4 :     pfree(hspool);
     103            4 : }
     104              : 
     105              : /*
     106              :  * spool an index entry into the sort file.
     107              :  */
     108              : void
     109        60500 : _h_spool(HSpool *hspool, const ItemPointerData *self, const Datum *values, const bool *isnull)
     110              : {
     111        60500 :     tuplesort_putindextuplevalues(hspool->sortstate, hspool->index,
     112              :                                   self, values, isnull);
     113        60500 : }
     114              : 
     115              : /*
     116              :  * given a spool loaded by successive calls to _h_spool,
     117              :  * create an entire index.
     118              :  */
     119              : void
     120            4 : _h_indexbuild(HSpool *hspool, Relation heapRel)
     121              : {
     122              :     IndexTuple  itup;
     123            4 :     int64       tups_done = 0;
     124              : #ifdef USE_ASSERT_CHECKING
     125              :     uint32      hashkey = 0;
     126              : #endif
     127              : 
     128            4 :     tuplesort_performsort(hspool->sortstate);
     129              : 
     130        60504 :     while ((itup = tuplesort_getindextuple(hspool->sortstate, true)) != NULL)
     131              :     {
     132              :         /*
     133              :          * Technically, it isn't critical that hash keys be found in sorted
     134              :          * order, since this sorting is only used to increase locality of
     135              :          * access as a performance optimization.  It still seems like a good
     136              :          * idea to test tuplesort.c's handling of hash index tuple sorts
     137              :          * through an assertion, though.
     138              :          */
     139              : #ifdef USE_ASSERT_CHECKING
     140              :         uint32      lasthashkey = hashkey;
     141              : 
     142              :         hashkey = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
     143              :                                        hspool->max_buckets, hspool->high_mask,
     144              :                                        hspool->low_mask);
     145              :         Assert(hashkey >= lasthashkey);
     146              : #endif
     147              : 
     148              :         /* the tuples are sorted by hashkey, so pass 'sorted' as true */
     149        60500 :         _hash_doinsert(hspool->index, itup, heapRel, true);
     150              : 
     151              :         /* allow insertion phase to be interrupted, and track progress */
     152        60500 :         CHECK_FOR_INTERRUPTS();
     153              : 
     154        60500 :         pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
     155              :                                      ++tups_done);
     156              :     }
     157            4 : }
        

Generated by: LCOV version 2.0-1