LCOV - code coverage report
Current view: top level - src/backend/executor - execGrouping.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 103 114 90.4 %
Date: 2025-01-18 04:15:08 Functions: 10 11 90.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * execGrouping.c
       4             :  *    executor utility routines for grouping, hashing, and aggregation
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/executor/execGrouping.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/parallel.h"
      18             : #include "common/hashfn.h"
      19             : #include "executor/executor.h"
      20             : #include "miscadmin.h"
      21             : #include "utils/lsyscache.h"
      22             : 
      23             : static int  TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
      24             : static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
      25             :                                                  const MinimalTuple tuple);
      26             : static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
      27             :                                                            TupleTableSlot *slot,
      28             :                                                            bool *isnew, uint32 hash);
      29             : 
      30             : /*
      31             :  * Define parameters for tuple hash table code generation. The interface is
      32             :  * *also* declared in execnodes.h (to generate the types, which are externally
      33             :  * visible).
      34             :  */
      35             : #define SH_PREFIX tuplehash
      36             : #define SH_ELEMENT_TYPE TupleHashEntryData
      37             : #define SH_KEY_TYPE MinimalTuple
      38             : #define SH_KEY firstTuple
      39             : #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
      40             : #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
      41             : #define SH_SCOPE extern
      42             : #define SH_STORE_HASH
      43             : #define SH_GET_HASH(tb, a) a->hash
      44             : #define SH_DEFINE
      45             : #include "lib/simplehash.h"
      46             : 
      47             : 
      48             : /*****************************************************************************
      49             :  *      Utility routines for grouping tuples together
      50             :  *****************************************************************************/
      51             : 
      52             : /*
      53             :  * execTuplesMatchPrepare
      54             :  *      Build expression that can be evaluated using ExecQual(), returning
      55             :  *      whether an ExprContext's inner/outer tuples are NOT DISTINCT
      56             :  */
      57             : ExprState *
      58       10696 : execTuplesMatchPrepare(TupleDesc desc,
      59             :                        int numCols,
      60             :                        const AttrNumber *keyColIdx,
      61             :                        const Oid *eqOperators,
      62             :                        const Oid *collations,
      63             :                        PlanState *parent)
      64             : {
      65             :     Oid        *eqFunctions;
      66             :     int         i;
      67             :     ExprState  *expr;
      68             : 
      69       10696 :     if (numCols == 0)
      70          54 :         return NULL;
      71             : 
      72       10642 :     eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
      73             : 
      74             :     /* lookup equality functions */
      75       29120 :     for (i = 0; i < numCols; i++)
      76       18478 :         eqFunctions[i] = get_opcode(eqOperators[i]);
      77             : 
      78             :     /* build actual expression */
      79       10642 :     expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
      80             :                                   numCols, keyColIdx, eqFunctions, collations,
      81             :                                   parent);
      82             : 
      83       10642 :     return expr;
      84             : }
      85             : 
      86             : /*
      87             :  * execTuplesHashPrepare
      88             :  *      Look up the equality and hashing functions needed for a TupleHashTable.
      89             :  *
      90             :  * This is similar to execTuplesMatchPrepare, but we also need to find the
      91             :  * hash functions associated with the equality operators.  *eqFunctions and
      92             :  * *hashFunctions receive the palloc'd result arrays.
      93             :  *
      94             :  * Note: we expect that the given operators are not cross-type comparisons.
      95             :  */
      96             : void
      97        6758 : execTuplesHashPrepare(int numCols,
      98             :                       const Oid *eqOperators,
      99             :                       Oid **eqFuncOids,
     100             :                       FmgrInfo **hashFunctions)
     101             : {
     102             :     int         i;
     103             : 
     104        6758 :     *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
     105        6758 :     *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
     106             : 
     107       18018 :     for (i = 0; i < numCols; i++)
     108             :     {
     109       11260 :         Oid         eq_opr = eqOperators[i];
     110             :         Oid         eq_function;
     111             :         Oid         left_hash_function;
     112             :         Oid         right_hash_function;
     113             : 
     114       11260 :         eq_function = get_opcode(eq_opr);
     115       11260 :         if (!get_op_hash_functions(eq_opr,
     116             :                                    &left_hash_function, &right_hash_function))
     117           0 :             elog(ERROR, "could not find hash function for hash operator %u",
     118             :                  eq_opr);
     119             :         /* We're not supporting cross-type cases here */
     120             :         Assert(left_hash_function == right_hash_function);
     121       11260 :         (*eqFuncOids)[i] = eq_function;
     122       11260 :         fmgr_info(right_hash_function, &(*hashFunctions)[i]);
     123             :     }
     124        6758 : }
     125             : 
     126             : 
     127             : /*****************************************************************************
     128             :  *      Utility routines for all-in-memory hash tables
     129             :  *
     130             :  * These routines build hash tables for grouping tuples together (eg, for
     131             :  * hash aggregation).  There is one entry for each not-distinct set of tuples
     132             :  * presented.
     133             :  *****************************************************************************/
     134             : 
     135             : /*
     136             :  * Construct an empty TupleHashTable
     137             :  *
     138             :  *  parent: PlanState node that will own this hash table
     139             :  *  inputDesc: tuple descriptor for input tuples
     140             :  *  inputOps: slot ops for input tuples, or NULL if unknown or not fixed
     141             :  *  numCols: number of columns to be compared (length of next 4 arrays)
     142             :  *  keyColIdx: indexes of tuple columns to compare
     143             :  *  eqfuncoids: OIDs of equality comparison functions to use
     144             :  *  hashfunctions: FmgrInfos of datatype-specific hashing functions to use
     145             :  *  collations: collations to use in comparisons
     146             :  *  nbuckets: initial estimate of hashtable size
     147             :  *  additionalsize: size of data stored in ->additional
     148             :  *  metacxt: memory context for long-lived allocation, but not per-entry data
     149             :  *  tablecxt: memory context in which to store table entries
     150             :  *  tempcxt: short-lived context for evaluation hash and comparison functions
     151             :  *  use_variable_hash_iv: if true, adjust hash IV per-parallel-worker
     152             :  *
     153             :  * The hashfunctions array may be made with execTuplesHashPrepare().  Note they
     154             :  * are not cross-type functions, but expect to see the table datatype(s)
     155             :  * on both sides.
     156             :  *
     157             :  * Note that the keyColIdx, hashfunctions, and collations arrays must be
     158             :  * allocated in storage that will live as long as the hashtable does.
     159             :  */
     160             : TupleHashTable
     161        6190 : BuildTupleHashTable(PlanState *parent,
     162             :                     TupleDesc inputDesc,
     163             :                     const TupleTableSlotOps *inputOps,
     164             :                     int numCols,
     165             :                     AttrNumber *keyColIdx,
     166             :                     const Oid *eqfuncoids,
     167             :                     FmgrInfo *hashfunctions,
     168             :                     Oid *collations,
     169             :                     long nbuckets,
     170             :                     Size additionalsize,
     171             :                     MemoryContext metacxt,
     172             :                     MemoryContext tablecxt,
     173             :                     MemoryContext tempcxt,
     174             :                     bool use_variable_hash_iv)
     175             : {
     176             :     TupleHashTable hashtable;
     177        6190 :     Size        entrysize = sizeof(TupleHashEntryData) + additionalsize;
     178             :     Size        hash_mem_limit;
     179             :     MemoryContext oldcontext;
     180             :     bool        allow_jit;
     181        6190 :     uint32      hash_iv = 0;
     182             : 
     183             :     Assert(nbuckets > 0);
     184             : 
     185             :     /* Limit initial table size request to not more than hash_mem */
     186        6190 :     hash_mem_limit = get_hash_memory_limit() / entrysize;
     187        6190 :     if (nbuckets > hash_mem_limit)
     188          24 :         nbuckets = hash_mem_limit;
     189             : 
     190        6190 :     oldcontext = MemoryContextSwitchTo(metacxt);
     191             : 
     192        6190 :     hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
     193             : 
     194        6190 :     hashtable->numCols = numCols;
     195        6190 :     hashtable->keyColIdx = keyColIdx;
     196        6190 :     hashtable->tab_collations = collations;
     197        6190 :     hashtable->tablecxt = tablecxt;
     198        6190 :     hashtable->tempcxt = tempcxt;
     199        6190 :     hashtable->tableslot = NULL; /* will be made on first lookup */
     200        6190 :     hashtable->inputslot = NULL;
     201        6190 :     hashtable->in_hash_expr = NULL;
     202        6190 :     hashtable->cur_eq_func = NULL;
     203             : 
     204             :     /*
     205             :      * If parallelism is in use, even if the leader backend is performing the
     206             :      * scan itself, we don't want to create the hashtable exactly the same way
     207             :      * in all workers. As hashtables are iterated over in keyspace-order,
     208             :      * doing so in all processes in the same way is likely to lead to
     209             :      * "unbalanced" hashtables when the table size initially is
     210             :      * underestimated.
     211             :      */
     212        6190 :     if (use_variable_hash_iv)
     213         690 :         hash_iv = murmurhash32(ParallelWorkerNumber);
     214             : 
     215        6190 :     hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
     216             : 
     217             :     /*
     218             :      * We copy the input tuple descriptor just for safety --- we assume all
     219             :      * input tuples will have equivalent descriptors.
     220             :      */
     221        6190 :     hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
     222             :                                                     &TTSOpsMinimalTuple);
     223             : 
     224             :     /*
     225             :      * If the caller fails to make the metacxt different from the tablecxt,
     226             :      * allowing JIT would lead to the generated functions to a) live longer
     227             :      * than the query or b) be re-generated each time the table is being
     228             :      * reset. Therefore prevent JIT from being used in that case, by not
     229             :      * providing a parent node (which prevents accessing the JitContext in the
     230             :      * EState).
     231             :      */
     232        6190 :     allow_jit = (metacxt != tablecxt);
     233             : 
     234             :     /* build hash ExprState for all columns */
     235        6190 :     hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc,
     236             :                                                         inputOps,
     237             :                                                         hashfunctions,
     238             :                                                         collations,
     239             :                                                         numCols,
     240             :                                                         keyColIdx,
     241             :                                                         allow_jit ? parent : NULL,
     242             :                                                         hash_iv);
     243             : 
     244             :     /* build comparator for all columns */
     245        6190 :     hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
     246             :                                                     inputOps,
     247             :                                                     &TTSOpsMinimalTuple,
     248             :                                                     numCols,
     249             :                                                     keyColIdx, eqfuncoids, collations,
     250             :                                                     allow_jit ? parent : NULL);
     251             : 
     252             :     /*
     253             :      * While not pretty, it's ok to not shut down this context, but instead
     254             :      * rely on the containing memory context being reset, as
     255             :      * ExecBuildGroupingEqual() only builds a very simple expression calling
     256             :      * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
     257             :      */
     258        6190 :     hashtable->exprcontext = CreateStandaloneExprContext();
     259             : 
     260        6190 :     MemoryContextSwitchTo(oldcontext);
     261             : 
     262        6190 :     return hashtable;
     263             : }
     264             : 
     265             : /*
     266             :  * Reset contents of the hashtable to be empty, preserving all the non-content
     267             :  * state. Note that the tablecxt passed to BuildTupleHashTable() should
     268             :  * also be reset, otherwise there will be leaks.
     269             :  */
     270             : void
     271      193328 : ResetTupleHashTable(TupleHashTable hashtable)
     272             : {
     273      193328 :     tuplehash_reset(hashtable->hashtab);
     274      193328 : }
     275             : 
     276             : /*
     277             :  * Find or create a hashtable entry for the tuple group containing the
     278             :  * given tuple.  The tuple must be the same type as the hashtable entries.
     279             :  *
     280             :  * If isnew is NULL, we do not create new entries; we return NULL if no
     281             :  * match is found.
     282             :  *
     283             :  * If hash is not NULL, we set it to the calculated hash value. This allows
     284             :  * callers access to the hash value even if no entry is returned.
     285             :  *
     286             :  * If isnew isn't NULL, then a new entry is created if no existing entry
     287             :  * matches.  On return, *isnew is true if the entry is newly created,
     288             :  * false if it existed already.  ->additional_data in the new entry has
     289             :  * been zeroed.
     290             :  */
     291             : TupleHashEntry
     292     6571658 : LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
     293             :                      bool *isnew, uint32 *hash)
     294             : {
     295             :     TupleHashEntry entry;
     296             :     MemoryContext oldContext;
     297             :     uint32      local_hash;
     298             : 
     299             :     /* Need to run the hash functions in short-lived context */
     300     6571658 :     oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
     301             : 
     302             :     /* set up data needed by hash and match functions */
     303     6571658 :     hashtable->inputslot = slot;
     304     6571658 :     hashtable->in_hash_expr = hashtable->tab_hash_expr;
     305     6571658 :     hashtable->cur_eq_func = hashtable->tab_eq_func;
     306             : 
     307     6571658 :     local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
     308     6571652 :     entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
     309             : 
     310     6571652 :     if (hash != NULL)
     311     5624628 :         *hash = local_hash;
     312             : 
     313             :     Assert(entry == NULL || entry->hash == local_hash);
     314             : 
     315     6571652 :     MemoryContextSwitchTo(oldContext);
     316             : 
     317     6571652 :     return entry;
     318             : }
     319             : 
     320             : /*
     321             :  * Compute the hash value for a tuple
     322             :  */
     323             : uint32
     324           0 : TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
     325             : {
     326             :     MemoryContext oldContext;
     327             :     uint32      hash;
     328             : 
     329           0 :     hashtable->inputslot = slot;
     330           0 :     hashtable->in_hash_expr = hashtable->tab_hash_expr;
     331             : 
     332             :     /* Need to run the hash functions in short-lived context */
     333           0 :     oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
     334             : 
     335           0 :     hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
     336             : 
     337           0 :     MemoryContextSwitchTo(oldContext);
     338             : 
     339           0 :     return hash;
     340             : }
     341             : 
     342             : /*
     343             :  * A variant of LookupTupleHashEntry for callers that have already computed
     344             :  * the hash value.
     345             :  */
     346             : TupleHashEntry
     347      657036 : LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
     348             :                          bool *isnew, uint32 hash)
     349             : {
     350             :     TupleHashEntry entry;
     351             :     MemoryContext oldContext;
     352             : 
     353             :     /* Need to run the hash functions in short-lived context */
     354      657036 :     oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
     355             : 
     356             :     /* set up data needed by hash and match functions */
     357      657036 :     hashtable->inputslot = slot;
     358      657036 :     hashtable->in_hash_expr = hashtable->tab_hash_expr;
     359      657036 :     hashtable->cur_eq_func = hashtable->tab_eq_func;
     360             : 
     361      657036 :     entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
     362             :     Assert(entry == NULL || entry->hash == hash);
     363             : 
     364      657036 :     MemoryContextSwitchTo(oldContext);
     365             : 
     366      657036 :     return entry;
     367             : }
     368             : 
     369             : /*
     370             :  * Search for a hashtable entry matching the given tuple.  No entry is
     371             :  * created if there's not a match.  This is similar to the non-creating
     372             :  * case of LookupTupleHashEntry, except that it supports cross-type
     373             :  * comparisons, in which the given tuple is not of the same type as the
     374             :  * table entries.  The caller must provide the hash ExprState to use for
     375             :  * the input tuple, as well as the equality ExprState, since these may be
     376             :  * different from the table's internal functions.
     377             :  */
     378             : TupleHashEntry
     379      842954 : FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
     380             :                    ExprState *eqcomp,
     381             :                    ExprState *hashexpr)
     382             : {
     383             :     TupleHashEntry entry;
     384             :     MemoryContext oldContext;
     385             :     MinimalTuple key;
     386             : 
     387             :     /* Need to run the hash functions in short-lived context */
     388      842954 :     oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
     389             : 
     390             :     /* Set up data needed by hash and match functions */
     391      842954 :     hashtable->inputslot = slot;
     392      842954 :     hashtable->in_hash_expr = hashexpr;
     393      842954 :     hashtable->cur_eq_func = eqcomp;
     394             : 
     395             :     /* Search the hash table */
     396      842954 :     key = NULL;                 /* flag to reference inputslot */
     397      842954 :     entry = tuplehash_lookup(hashtable->hashtab, key);
     398      842954 :     MemoryContextSwitchTo(oldContext);
     399             : 
     400      842954 :     return entry;
     401             : }
     402             : 
     403             : /*
     404             :  * If tuple is NULL, use the input slot instead. This convention avoids the
     405             :  * need to materialize virtual input tuples unless they actually need to get
     406             :  * copied into the table.
     407             :  *
     408             :  * Also, the caller must select an appropriate memory context for running
     409             :  * the hash functions.
     410             :  */
     411             : static uint32
     412     7414612 : TupleHashTableHash_internal(struct tuplehash_hash *tb,
     413             :                             const MinimalTuple tuple)
     414             : {
     415     7414612 :     TupleHashTable hashtable = (TupleHashTable) tb->private_data;
     416             :     uint32      hashkey;
     417             :     TupleTableSlot *slot;
     418             :     bool        isnull;
     419             : 
     420     7414612 :     if (tuple == NULL)
     421             :     {
     422             :         /* Process the current input tuple for the table */
     423     7414612 :         hashtable->exprcontext->ecxt_innertuple = hashtable->inputslot;
     424     7414612 :         hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->in_hash_expr,
     425             :                                               hashtable->exprcontext,
     426             :                                               &isnull));
     427             :     }
     428             :     else
     429             :     {
     430             :         /*
     431             :          * Process a tuple already stored in the table.
     432             :          *
     433             :          * (this case never actually occurs due to the way simplehash.h is
     434             :          * used, as the hash-value is stored in the entries)
     435             :          */
     436           0 :         slot = hashtable->exprcontext->ecxt_innertuple = hashtable->tableslot;
     437           0 :         ExecStoreMinimalTuple(tuple, slot, false);
     438           0 :         hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->tab_hash_expr,
     439             :                                               hashtable->exprcontext,
     440             :                                               &isnull));
     441             :     }
     442             : 
     443             :     /*
     444             :      * The hashing done above, even with an initial value, doesn't tend to
     445             :      * result in good hash perturbation.  Running the value produced above
     446             :      * through murmurhash32 leads to near perfect hash perturbation.
     447             :      */
     448     7414606 :     return murmurhash32(hashkey);
     449             : }
     450             : 
     451             : /*
     452             :  * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
     453             :  * so that we can avoid switching the memory context multiple times for
     454             :  * LookupTupleHashEntry.
     455             :  *
     456             :  * NB: This function may or may not change the memory context. Caller is
     457             :  * expected to change it back.
     458             :  */
     459             : static inline TupleHashEntry
     460     7228688 : LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
     461             :                               bool *isnew, uint32 hash)
     462             : {
     463             :     TupleHashEntryData *entry;
     464             :     bool        found;
     465             :     MinimalTuple key;
     466             : 
     467     7228688 :     key = NULL;                 /* flag to reference inputslot */
     468             : 
     469     7228688 :     if (isnew)
     470             :     {
     471     6108980 :         entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
     472             : 
     473     6108980 :         if (found)
     474             :         {
     475             :             /* found pre-existing entry */
     476     5188582 :             *isnew = false;
     477             :         }
     478             :         else
     479             :         {
     480             :             /* created new entry */
     481      920398 :             *isnew = true;
     482             :             /* zero caller data */
     483      920398 :             entry->additional = NULL;
     484      920398 :             MemoryContextSwitchTo(hashtable->tablecxt);
     485             :             /* Copy the first tuple into the table context */
     486      920398 :             entry->firstTuple = ExecCopySlotMinimalTuple(slot);
     487             :         }
     488             :     }
     489             :     else
     490             :     {
     491     1119708 :         entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
     492             :     }
     493             : 
     494     7228688 :     return entry;
     495             : }
     496             : 
     497             : /*
     498             :  * See whether two tuples (presumably of the same hash value) match
     499             :  */
     500             : static int
     501     5697160 : TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
     502             : {
     503             :     TupleTableSlot *slot1;
     504             :     TupleTableSlot *slot2;
     505     5697160 :     TupleHashTable hashtable = (TupleHashTable) tb->private_data;
     506     5697160 :     ExprContext *econtext = hashtable->exprcontext;
     507             : 
     508             :     /*
     509             :      * We assume that simplehash.h will only ever call us with the first
     510             :      * argument being an actual table entry, and the second argument being
     511             :      * LookupTupleHashEntry's dummy TupleHashEntryData.  The other direction
     512             :      * could be supported too, but is not currently required.
     513             :      */
     514             :     Assert(tuple1 != NULL);
     515     5697160 :     slot1 = hashtable->tableslot;
     516     5697160 :     ExecStoreMinimalTuple(tuple1, slot1, false);
     517             :     Assert(tuple2 == NULL);
     518     5697160 :     slot2 = hashtable->inputslot;
     519             : 
     520             :     /* For crosstype comparisons, the inputslot must be first */
     521     5697160 :     econtext->ecxt_innertuple = slot2;
     522     5697160 :     econtext->ecxt_outertuple = slot1;
     523     5697160 :     return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
     524             : }

Generated by: LCOV version 1.14