Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * execGrouping.c
4 : * executor utility routines for grouping, hashing, and aggregation
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/execGrouping.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/parallel.h"
18 : #include "common/hashfn.h"
19 : #include "executor/executor.h"
20 : #include "miscadmin.h"
21 : #include "utils/lsyscache.h"
22 :
23 : static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
24 : static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
25 : const MinimalTuple tuple);
26 : static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
27 : TupleTableSlot *slot,
28 : bool *isnew, uint32 hash);
29 :
30 : /*
31 : * Define parameters for tuple hash table code generation. The interface is
32 : * *also* declared in execnodes.h (to generate the types, which are externally
33 : * visible).
34 : */
35 : #define SH_PREFIX tuplehash
36 : #define SH_ELEMENT_TYPE TupleHashEntryData
37 : #define SH_KEY_TYPE MinimalTuple
38 : #define SH_KEY firstTuple
39 : #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
40 : #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
41 : #define SH_SCOPE extern
42 : #define SH_STORE_HASH
43 : #define SH_GET_HASH(tb, a) a->hash
44 : #define SH_DEFINE
45 : #include "lib/simplehash.h"
46 :
47 :
48 : /*****************************************************************************
49 : * Utility routines for grouping tuples together
50 : *****************************************************************************/
51 :
52 : /*
53 : * execTuplesMatchPrepare
54 : * Build expression that can be evaluated using ExecQual(), returning
55 : * whether an ExprContext's inner/outer tuples are NOT DISTINCT
56 : */
57 : ExprState *
58 10696 : execTuplesMatchPrepare(TupleDesc desc,
59 : int numCols,
60 : const AttrNumber *keyColIdx,
61 : const Oid *eqOperators,
62 : const Oid *collations,
63 : PlanState *parent)
64 : {
65 : Oid *eqFunctions;
66 : int i;
67 : ExprState *expr;
68 :
69 10696 : if (numCols == 0)
70 54 : return NULL;
71 :
72 10642 : eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
73 :
74 : /* lookup equality functions */
75 29120 : for (i = 0; i < numCols; i++)
76 18478 : eqFunctions[i] = get_opcode(eqOperators[i]);
77 :
78 : /* build actual expression */
79 10642 : expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
80 : numCols, keyColIdx, eqFunctions, collations,
81 : parent);
82 :
83 10642 : return expr;
84 : }
85 :
86 : /*
87 : * execTuplesHashPrepare
88 : * Look up the equality and hashing functions needed for a TupleHashTable.
89 : *
90 : * This is similar to execTuplesMatchPrepare, but we also need to find the
91 : * hash functions associated with the equality operators. *eqFunctions and
92 : * *hashFunctions receive the palloc'd result arrays.
93 : *
94 : * Note: we expect that the given operators are not cross-type comparisons.
95 : */
96 : void
97 6758 : execTuplesHashPrepare(int numCols,
98 : const Oid *eqOperators,
99 : Oid **eqFuncOids,
100 : FmgrInfo **hashFunctions)
101 : {
102 : int i;
103 :
104 6758 : *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
105 6758 : *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
106 :
107 18018 : for (i = 0; i < numCols; i++)
108 : {
109 11260 : Oid eq_opr = eqOperators[i];
110 : Oid eq_function;
111 : Oid left_hash_function;
112 : Oid right_hash_function;
113 :
114 11260 : eq_function = get_opcode(eq_opr);
115 11260 : if (!get_op_hash_functions(eq_opr,
116 : &left_hash_function, &right_hash_function))
117 0 : elog(ERROR, "could not find hash function for hash operator %u",
118 : eq_opr);
119 : /* We're not supporting cross-type cases here */
120 : Assert(left_hash_function == right_hash_function);
121 11260 : (*eqFuncOids)[i] = eq_function;
122 11260 : fmgr_info(right_hash_function, &(*hashFunctions)[i]);
123 : }
124 6758 : }
125 :
126 :
127 : /*****************************************************************************
128 : * Utility routines for all-in-memory hash tables
129 : *
130 : * These routines build hash tables for grouping tuples together (eg, for
131 : * hash aggregation). There is one entry for each not-distinct set of tuples
132 : * presented.
133 : *****************************************************************************/
134 :
135 : /*
136 : * Construct an empty TupleHashTable
137 : *
138 : * parent: PlanState node that will own this hash table
139 : * inputDesc: tuple descriptor for input tuples
140 : * inputOps: slot ops for input tuples, or NULL if unknown or not fixed
141 : * numCols: number of columns to be compared (length of next 4 arrays)
142 : * keyColIdx: indexes of tuple columns to compare
143 : * eqfuncoids: OIDs of equality comparison functions to use
144 : * hashfunctions: FmgrInfos of datatype-specific hashing functions to use
145 : * collations: collations to use in comparisons
146 : * nbuckets: initial estimate of hashtable size
147 : * additionalsize: size of data stored in ->additional
148 : * metacxt: memory context for long-lived allocation, but not per-entry data
149 : * tablecxt: memory context in which to store table entries
150 : * tempcxt: short-lived context for evaluation hash and comparison functions
151 : * use_variable_hash_iv: if true, adjust hash IV per-parallel-worker
152 : *
153 : * The hashfunctions array may be made with execTuplesHashPrepare(). Note they
154 : * are not cross-type functions, but expect to see the table datatype(s)
155 : * on both sides.
156 : *
157 : * Note that the keyColIdx, hashfunctions, and collations arrays must be
158 : * allocated in storage that will live as long as the hashtable does.
159 : */
160 : TupleHashTable
161 6190 : BuildTupleHashTable(PlanState *parent,
162 : TupleDesc inputDesc,
163 : const TupleTableSlotOps *inputOps,
164 : int numCols,
165 : AttrNumber *keyColIdx,
166 : const Oid *eqfuncoids,
167 : FmgrInfo *hashfunctions,
168 : Oid *collations,
169 : long nbuckets,
170 : Size additionalsize,
171 : MemoryContext metacxt,
172 : MemoryContext tablecxt,
173 : MemoryContext tempcxt,
174 : bool use_variable_hash_iv)
175 : {
176 : TupleHashTable hashtable;
177 6190 : Size entrysize = sizeof(TupleHashEntryData) + additionalsize;
178 : Size hash_mem_limit;
179 : MemoryContext oldcontext;
180 : bool allow_jit;
181 6190 : uint32 hash_iv = 0;
182 :
183 : Assert(nbuckets > 0);
184 :
185 : /* Limit initial table size request to not more than hash_mem */
186 6190 : hash_mem_limit = get_hash_memory_limit() / entrysize;
187 6190 : if (nbuckets > hash_mem_limit)
188 24 : nbuckets = hash_mem_limit;
189 :
190 6190 : oldcontext = MemoryContextSwitchTo(metacxt);
191 :
192 6190 : hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
193 :
194 6190 : hashtable->numCols = numCols;
195 6190 : hashtable->keyColIdx = keyColIdx;
196 6190 : hashtable->tab_collations = collations;
197 6190 : hashtable->tablecxt = tablecxt;
198 6190 : hashtable->tempcxt = tempcxt;
199 6190 : hashtable->tableslot = NULL; /* will be made on first lookup */
200 6190 : hashtable->inputslot = NULL;
201 6190 : hashtable->in_hash_expr = NULL;
202 6190 : hashtable->cur_eq_func = NULL;
203 :
204 : /*
205 : * If parallelism is in use, even if the leader backend is performing the
206 : * scan itself, we don't want to create the hashtable exactly the same way
207 : * in all workers. As hashtables are iterated over in keyspace-order,
208 : * doing so in all processes in the same way is likely to lead to
209 : * "unbalanced" hashtables when the table size initially is
210 : * underestimated.
211 : */
212 6190 : if (use_variable_hash_iv)
213 690 : hash_iv = murmurhash32(ParallelWorkerNumber);
214 :
215 6190 : hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
216 :
217 : /*
218 : * We copy the input tuple descriptor just for safety --- we assume all
219 : * input tuples will have equivalent descriptors.
220 : */
221 6190 : hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
222 : &TTSOpsMinimalTuple);
223 :
224 : /*
225 : * If the caller fails to make the metacxt different from the tablecxt,
226 : * allowing JIT would lead to the generated functions to a) live longer
227 : * than the query or b) be re-generated each time the table is being
228 : * reset. Therefore prevent JIT from being used in that case, by not
229 : * providing a parent node (which prevents accessing the JitContext in the
230 : * EState).
231 : */
232 6190 : allow_jit = (metacxt != tablecxt);
233 :
234 : /* build hash ExprState for all columns */
235 6190 : hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc,
236 : inputOps,
237 : hashfunctions,
238 : collations,
239 : numCols,
240 : keyColIdx,
241 : allow_jit ? parent : NULL,
242 : hash_iv);
243 :
244 : /* build comparator for all columns */
245 6190 : hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
246 : inputOps,
247 : &TTSOpsMinimalTuple,
248 : numCols,
249 : keyColIdx, eqfuncoids, collations,
250 : allow_jit ? parent : NULL);
251 :
252 : /*
253 : * While not pretty, it's ok to not shut down this context, but instead
254 : * rely on the containing memory context being reset, as
255 : * ExecBuildGroupingEqual() only builds a very simple expression calling
256 : * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
257 : */
258 6190 : hashtable->exprcontext = CreateStandaloneExprContext();
259 :
260 6190 : MemoryContextSwitchTo(oldcontext);
261 :
262 6190 : return hashtable;
263 : }
264 :
265 : /*
266 : * Reset contents of the hashtable to be empty, preserving all the non-content
267 : * state. Note that the tablecxt passed to BuildTupleHashTable() should
268 : * also be reset, otherwise there will be leaks.
269 : */
270 : void
271 193328 : ResetTupleHashTable(TupleHashTable hashtable)
272 : {
273 193328 : tuplehash_reset(hashtable->hashtab);
274 193328 : }
275 :
276 : /*
277 : * Find or create a hashtable entry for the tuple group containing the
278 : * given tuple. The tuple must be the same type as the hashtable entries.
279 : *
280 : * If isnew is NULL, we do not create new entries; we return NULL if no
281 : * match is found.
282 : *
283 : * If hash is not NULL, we set it to the calculated hash value. This allows
284 : * callers access to the hash value even if no entry is returned.
285 : *
286 : * If isnew isn't NULL, then a new entry is created if no existing entry
287 : * matches. On return, *isnew is true if the entry is newly created,
288 : * false if it existed already. ->additional_data in the new entry has
289 : * been zeroed.
290 : */
291 : TupleHashEntry
292 6571658 : LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
293 : bool *isnew, uint32 *hash)
294 : {
295 : TupleHashEntry entry;
296 : MemoryContext oldContext;
297 : uint32 local_hash;
298 :
299 : /* Need to run the hash functions in short-lived context */
300 6571658 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
301 :
302 : /* set up data needed by hash and match functions */
303 6571658 : hashtable->inputslot = slot;
304 6571658 : hashtable->in_hash_expr = hashtable->tab_hash_expr;
305 6571658 : hashtable->cur_eq_func = hashtable->tab_eq_func;
306 :
307 6571658 : local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
308 6571652 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
309 :
310 6571652 : if (hash != NULL)
311 5624628 : *hash = local_hash;
312 :
313 : Assert(entry == NULL || entry->hash == local_hash);
314 :
315 6571652 : MemoryContextSwitchTo(oldContext);
316 :
317 6571652 : return entry;
318 : }
319 :
320 : /*
321 : * Compute the hash value for a tuple
322 : */
323 : uint32
324 0 : TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
325 : {
326 : MemoryContext oldContext;
327 : uint32 hash;
328 :
329 0 : hashtable->inputslot = slot;
330 0 : hashtable->in_hash_expr = hashtable->tab_hash_expr;
331 :
332 : /* Need to run the hash functions in short-lived context */
333 0 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
334 :
335 0 : hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
336 :
337 0 : MemoryContextSwitchTo(oldContext);
338 :
339 0 : return hash;
340 : }
341 :
342 : /*
343 : * A variant of LookupTupleHashEntry for callers that have already computed
344 : * the hash value.
345 : */
346 : TupleHashEntry
347 657036 : LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
348 : bool *isnew, uint32 hash)
349 : {
350 : TupleHashEntry entry;
351 : MemoryContext oldContext;
352 :
353 : /* Need to run the hash functions in short-lived context */
354 657036 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
355 :
356 : /* set up data needed by hash and match functions */
357 657036 : hashtable->inputslot = slot;
358 657036 : hashtable->in_hash_expr = hashtable->tab_hash_expr;
359 657036 : hashtable->cur_eq_func = hashtable->tab_eq_func;
360 :
361 657036 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
362 : Assert(entry == NULL || entry->hash == hash);
363 :
364 657036 : MemoryContextSwitchTo(oldContext);
365 :
366 657036 : return entry;
367 : }
368 :
369 : /*
370 : * Search for a hashtable entry matching the given tuple. No entry is
371 : * created if there's not a match. This is similar to the non-creating
372 : * case of LookupTupleHashEntry, except that it supports cross-type
373 : * comparisons, in which the given tuple is not of the same type as the
374 : * table entries. The caller must provide the hash ExprState to use for
375 : * the input tuple, as well as the equality ExprState, since these may be
376 : * different from the table's internal functions.
377 : */
378 : TupleHashEntry
379 842954 : FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
380 : ExprState *eqcomp,
381 : ExprState *hashexpr)
382 : {
383 : TupleHashEntry entry;
384 : MemoryContext oldContext;
385 : MinimalTuple key;
386 :
387 : /* Need to run the hash functions in short-lived context */
388 842954 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
389 :
390 : /* Set up data needed by hash and match functions */
391 842954 : hashtable->inputslot = slot;
392 842954 : hashtable->in_hash_expr = hashexpr;
393 842954 : hashtable->cur_eq_func = eqcomp;
394 :
395 : /* Search the hash table */
396 842954 : key = NULL; /* flag to reference inputslot */
397 842954 : entry = tuplehash_lookup(hashtable->hashtab, key);
398 842954 : MemoryContextSwitchTo(oldContext);
399 :
400 842954 : return entry;
401 : }
402 :
403 : /*
404 : * If tuple is NULL, use the input slot instead. This convention avoids the
405 : * need to materialize virtual input tuples unless they actually need to get
406 : * copied into the table.
407 : *
408 : * Also, the caller must select an appropriate memory context for running
409 : * the hash functions.
410 : */
411 : static uint32
412 7414612 : TupleHashTableHash_internal(struct tuplehash_hash *tb,
413 : const MinimalTuple tuple)
414 : {
415 7414612 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
416 : uint32 hashkey;
417 : TupleTableSlot *slot;
418 : bool isnull;
419 :
420 7414612 : if (tuple == NULL)
421 : {
422 : /* Process the current input tuple for the table */
423 7414612 : hashtable->exprcontext->ecxt_innertuple = hashtable->inputslot;
424 7414612 : hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->in_hash_expr,
425 : hashtable->exprcontext,
426 : &isnull));
427 : }
428 : else
429 : {
430 : /*
431 : * Process a tuple already stored in the table.
432 : *
433 : * (this case never actually occurs due to the way simplehash.h is
434 : * used, as the hash-value is stored in the entries)
435 : */
436 0 : slot = hashtable->exprcontext->ecxt_innertuple = hashtable->tableslot;
437 0 : ExecStoreMinimalTuple(tuple, slot, false);
438 0 : hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->tab_hash_expr,
439 : hashtable->exprcontext,
440 : &isnull));
441 : }
442 :
443 : /*
444 : * The hashing done above, even with an initial value, doesn't tend to
445 : * result in good hash perturbation. Running the value produced above
446 : * through murmurhash32 leads to near perfect hash perturbation.
447 : */
448 7414606 : return murmurhash32(hashkey);
449 : }
450 :
451 : /*
452 : * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
453 : * so that we can avoid switching the memory context multiple times for
454 : * LookupTupleHashEntry.
455 : *
456 : * NB: This function may or may not change the memory context. Caller is
457 : * expected to change it back.
458 : */
459 : static inline TupleHashEntry
460 7228688 : LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
461 : bool *isnew, uint32 hash)
462 : {
463 : TupleHashEntryData *entry;
464 : bool found;
465 : MinimalTuple key;
466 :
467 7228688 : key = NULL; /* flag to reference inputslot */
468 :
469 7228688 : if (isnew)
470 : {
471 6108980 : entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
472 :
473 6108980 : if (found)
474 : {
475 : /* found pre-existing entry */
476 5188582 : *isnew = false;
477 : }
478 : else
479 : {
480 : /* created new entry */
481 920398 : *isnew = true;
482 : /* zero caller data */
483 920398 : entry->additional = NULL;
484 920398 : MemoryContextSwitchTo(hashtable->tablecxt);
485 : /* Copy the first tuple into the table context */
486 920398 : entry->firstTuple = ExecCopySlotMinimalTuple(slot);
487 : }
488 : }
489 : else
490 : {
491 1119708 : entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
492 : }
493 :
494 7228688 : return entry;
495 : }
496 :
497 : /*
498 : * See whether two tuples (presumably of the same hash value) match
499 : */
500 : static int
501 5697160 : TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
502 : {
503 : TupleTableSlot *slot1;
504 : TupleTableSlot *slot2;
505 5697160 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
506 5697160 : ExprContext *econtext = hashtable->exprcontext;
507 :
508 : /*
509 : * We assume that simplehash.h will only ever call us with the first
510 : * argument being an actual table entry, and the second argument being
511 : * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
512 : * could be supported too, but is not currently required.
513 : */
514 : Assert(tuple1 != NULL);
515 5697160 : slot1 = hashtable->tableslot;
516 5697160 : ExecStoreMinimalTuple(tuple1, slot1, false);
517 : Assert(tuple2 == NULL);
518 5697160 : slot2 = hashtable->inputslot;
519 :
520 : /* For crosstype comparisons, the inputslot must be first */
521 5697160 : econtext->ecxt_innertuple = slot2;
522 5697160 : econtext->ecxt_outertuple = slot1;
523 5697160 : return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
524 : }
|