Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * execGrouping.c
4 : * executor utility routines for grouping, hashing, and aggregation
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/execGrouping.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/parallel.h"
18 : #include "common/hashfn.h"
19 : #include "executor/executor.h"
20 : #include "miscadmin.h"
21 : #include "utils/lsyscache.h"
22 :
23 : static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
24 : static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
25 : const MinimalTuple tuple);
26 : static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
27 : TupleTableSlot *slot,
28 : bool *isnew, uint32 hash);
29 :
30 : /*
31 : * Define parameters for tuple hash table code generation. The interface is
32 : * *also* declared in execnodes.h (to generate the types, which are externally
33 : * visible).
34 : */
35 : #define SH_PREFIX tuplehash
36 : #define SH_ELEMENT_TYPE TupleHashEntryData
37 : #define SH_KEY_TYPE MinimalTuple
38 : #define SH_KEY firstTuple
39 : #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
40 : #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
41 : #define SH_SCOPE extern
42 : #define SH_STORE_HASH
43 : #define SH_GET_HASH(tb, a) a->hash
44 : #define SH_DEFINE
45 : #include "lib/simplehash.h"
46 :
47 :
48 : /*****************************************************************************
49 : * Utility routines for grouping tuples together
50 : *****************************************************************************/
51 :
52 : /*
53 : * execTuplesMatchPrepare
54 : * Build expression that can be evaluated using ExecQual(), returning
55 : * whether an ExprContext's inner/outer tuples are NOT DISTINCT
56 : */
57 : ExprState *
58 11096 : execTuplesMatchPrepare(TupleDesc desc,
59 : int numCols,
60 : const AttrNumber *keyColIdx,
61 : const Oid *eqOperators,
62 : const Oid *collations,
63 : PlanState *parent)
64 : {
65 : Oid *eqFunctions;
66 : int i;
67 : ExprState *expr;
68 :
69 11096 : if (numCols == 0)
70 54 : return NULL;
71 :
72 11042 : eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
73 :
74 : /* lookup equality functions */
75 30314 : for (i = 0; i < numCols; i++)
76 19272 : eqFunctions[i] = get_opcode(eqOperators[i]);
77 :
78 : /* build actual expression */
79 11042 : expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
80 : numCols, keyColIdx, eqFunctions, collations,
81 : parent);
82 :
83 11042 : return expr;
84 : }
85 :
86 : /*
87 : * execTuplesHashPrepare
88 : * Look up the equality and hashing functions needed for a TupleHashTable.
89 : *
90 : * This is similar to execTuplesMatchPrepare, but we also need to find the
91 : * hash functions associated with the equality operators. *eqFunctions and
92 : * *hashFunctions receive the palloc'd result arrays.
93 : *
94 : * Note: we expect that the given operators are not cross-type comparisons.
95 : */
96 : void
97 6978 : execTuplesHashPrepare(int numCols,
98 : const Oid *eqOperators,
99 : Oid **eqFuncOids,
100 : FmgrInfo **hashFunctions)
101 : {
102 : int i;
103 :
104 6978 : *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
105 6978 : *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
106 :
107 18506 : for (i = 0; i < numCols; i++)
108 : {
109 11528 : Oid eq_opr = eqOperators[i];
110 : Oid eq_function;
111 : Oid left_hash_function;
112 : Oid right_hash_function;
113 :
114 11528 : eq_function = get_opcode(eq_opr);
115 11528 : if (!get_op_hash_functions(eq_opr,
116 : &left_hash_function, &right_hash_function))
117 0 : elog(ERROR, "could not find hash function for hash operator %u",
118 : eq_opr);
119 : /* We're not supporting cross-type cases here */
120 : Assert(left_hash_function == right_hash_function);
121 11528 : (*eqFuncOids)[i] = eq_function;
122 11528 : fmgr_info(right_hash_function, &(*hashFunctions)[i]);
123 : }
124 6978 : }
125 :
126 :
127 : /*****************************************************************************
128 : * Utility routines for all-in-memory hash tables
129 : *
130 : * These routines build hash tables for grouping tuples together (eg, for
131 : * hash aggregation). There is one entry for each not-distinct set of tuples
132 : * presented.
133 : *****************************************************************************/
134 :
135 : /*
136 : * Construct an empty TupleHashTable
137 : *
138 : * parent: PlanState node that will own this hash table
139 : * inputDesc: tuple descriptor for input tuples
140 : * inputOps: slot ops for input tuples, or NULL if unknown or not fixed
141 : * numCols: number of columns to be compared (length of next 4 arrays)
142 : * keyColIdx: indexes of tuple columns to compare
143 : * eqfuncoids: OIDs of equality comparison functions to use
144 : * hashfunctions: FmgrInfos of datatype-specific hashing functions to use
145 : * collations: collations to use in comparisons
146 : * nbuckets: initial estimate of hashtable size
147 : * additionalsize: size of data stored in ->additional
148 : * metacxt: memory context for long-lived allocation, but not per-entry data
149 : * tablecxt: memory context in which to store table entries
150 : * tempcxt: short-lived context for evaluation hash and comparison functions
151 : * use_variable_hash_iv: if true, adjust hash IV per-parallel-worker
152 : *
153 : * The hashfunctions array may be made with execTuplesHashPrepare(). Note they
154 : * are not cross-type functions, but expect to see the table datatype(s)
155 : * on both sides.
156 : *
157 : * Note that the keyColIdx, hashfunctions, and collations arrays must be
158 : * allocated in storage that will live as long as the hashtable does.
159 : */
160 : TupleHashTable
161 6486 : BuildTupleHashTable(PlanState *parent,
162 : TupleDesc inputDesc,
163 : const TupleTableSlotOps *inputOps,
164 : int numCols,
165 : AttrNumber *keyColIdx,
166 : const Oid *eqfuncoids,
167 : FmgrInfo *hashfunctions,
168 : Oid *collations,
169 : long nbuckets,
170 : Size additionalsize,
171 : MemoryContext metacxt,
172 : MemoryContext tablecxt,
173 : MemoryContext tempcxt,
174 : bool use_variable_hash_iv)
175 : {
176 : TupleHashTable hashtable;
177 : Size entrysize;
178 : Size hash_mem_limit;
179 : MemoryContext oldcontext;
180 : bool allow_jit;
181 6486 : uint32 hash_iv = 0;
182 :
183 : Assert(nbuckets > 0);
184 6486 : additionalsize = MAXALIGN(additionalsize);
185 6486 : entrysize = sizeof(TupleHashEntryData) + additionalsize;
186 :
187 : /* Limit initial table size request to not more than hash_mem */
188 6486 : hash_mem_limit = get_hash_memory_limit() / entrysize;
189 6486 : if (nbuckets > hash_mem_limit)
190 18 : nbuckets = hash_mem_limit;
191 :
192 6486 : oldcontext = MemoryContextSwitchTo(metacxt);
193 :
194 6486 : hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
195 :
196 6486 : hashtable->numCols = numCols;
197 6486 : hashtable->keyColIdx = keyColIdx;
198 6486 : hashtable->tab_collations = collations;
199 6486 : hashtable->tablecxt = tablecxt;
200 6486 : hashtable->tempcxt = tempcxt;
201 6486 : hashtable->additionalsize = additionalsize;
202 6486 : hashtable->tableslot = NULL; /* will be made on first lookup */
203 6486 : hashtable->inputslot = NULL;
204 6486 : hashtable->in_hash_expr = NULL;
205 6486 : hashtable->cur_eq_func = NULL;
206 :
207 : /*
208 : * If parallelism is in use, even if the leader backend is performing the
209 : * scan itself, we don't want to create the hashtable exactly the same way
210 : * in all workers. As hashtables are iterated over in keyspace-order,
211 : * doing so in all processes in the same way is likely to lead to
212 : * "unbalanced" hashtables when the table size initially is
213 : * underestimated.
214 : */
215 6486 : if (use_variable_hash_iv)
216 690 : hash_iv = murmurhash32(ParallelWorkerNumber);
217 :
218 6486 : hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
219 :
220 : /*
221 : * We copy the input tuple descriptor just for safety --- we assume all
222 : * input tuples will have equivalent descriptors.
223 : */
224 6486 : hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
225 : &TTSOpsMinimalTuple);
226 :
227 : /*
228 : * If the caller fails to make the metacxt different from the tablecxt,
229 : * allowing JIT would lead to the generated functions to a) live longer
230 : * than the query or b) be re-generated each time the table is being
231 : * reset. Therefore prevent JIT from being used in that case, by not
232 : * providing a parent node (which prevents accessing the JitContext in the
233 : * EState).
234 : */
235 6486 : allow_jit = (metacxt != tablecxt);
236 :
237 : /* build hash ExprState for all columns */
238 6486 : hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc,
239 : inputOps,
240 : hashfunctions,
241 : collations,
242 : numCols,
243 : keyColIdx,
244 : allow_jit ? parent : NULL,
245 : hash_iv);
246 :
247 : /* build comparator for all columns */
248 6486 : hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
249 : inputOps,
250 : &TTSOpsMinimalTuple,
251 : numCols,
252 : keyColIdx, eqfuncoids, collations,
253 : allow_jit ? parent : NULL);
254 :
255 : /*
256 : * While not pretty, it's ok to not shut down this context, but instead
257 : * rely on the containing memory context being reset, as
258 : * ExecBuildGroupingEqual() only builds a very simple expression calling
259 : * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
260 : */
261 6486 : hashtable->exprcontext = CreateStandaloneExprContext();
262 :
263 6486 : MemoryContextSwitchTo(oldcontext);
264 :
265 6486 : return hashtable;
266 : }
267 :
268 : /*
269 : * Reset contents of the hashtable to be empty, preserving all the non-content
270 : * state. Note that the tablecxt passed to BuildTupleHashTable() should
271 : * also be reset, otherwise there will be leaks.
272 : */
273 : void
274 193372 : ResetTupleHashTable(TupleHashTable hashtable)
275 : {
276 193372 : tuplehash_reset(hashtable->hashtab);
277 193372 : }
278 :
279 : /*
280 : * Find or create a hashtable entry for the tuple group containing the
281 : * given tuple. The tuple must be the same type as the hashtable entries.
282 : *
283 : * If isnew is NULL, we do not create new entries; we return NULL if no
284 : * match is found.
285 : *
286 : * If hash is not NULL, we set it to the calculated hash value. This allows
287 : * callers access to the hash value even if no entry is returned.
288 : *
289 : * If isnew isn't NULL, then a new entry is created if no existing entry
290 : * matches. On return, *isnew is true if the entry is newly created,
291 : * false if it existed already. ->additional_data in the new entry has
292 : * been zeroed.
293 : */
294 : TupleHashEntry
295 7227582 : LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
296 : bool *isnew, uint32 *hash)
297 : {
298 : TupleHashEntry entry;
299 : MemoryContext oldContext;
300 : uint32 local_hash;
301 :
302 : /* Need to run the hash functions in short-lived context */
303 7227582 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
304 :
305 : /* set up data needed by hash and match functions */
306 7227582 : hashtable->inputslot = slot;
307 7227582 : hashtable->in_hash_expr = hashtable->tab_hash_expr;
308 7227582 : hashtable->cur_eq_func = hashtable->tab_eq_func;
309 :
310 7227582 : local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
311 7227576 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
312 :
313 7227576 : if (hash != NULL)
314 6279636 : *hash = local_hash;
315 :
316 : Assert(entry == NULL || entry->hash == local_hash);
317 :
318 7227576 : MemoryContextSwitchTo(oldContext);
319 :
320 7227576 : return entry;
321 : }
322 :
323 : /*
324 : * Compute the hash value for a tuple
325 : */
326 : uint32
327 0 : TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
328 : {
329 : MemoryContext oldContext;
330 : uint32 hash;
331 :
332 0 : hashtable->inputslot = slot;
333 0 : hashtable->in_hash_expr = hashtable->tab_hash_expr;
334 :
335 : /* Need to run the hash functions in short-lived context */
336 0 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
337 :
338 0 : hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
339 :
340 0 : MemoryContextSwitchTo(oldContext);
341 :
342 0 : return hash;
343 : }
344 :
345 : /*
346 : * A variant of LookupTupleHashEntry for callers that have already computed
347 : * the hash value.
348 : */
349 : TupleHashEntry
350 1216776 : LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
351 : bool *isnew, uint32 hash)
352 : {
353 : TupleHashEntry entry;
354 : MemoryContext oldContext;
355 :
356 : /* Need to run the hash functions in short-lived context */
357 1216776 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
358 :
359 : /* set up data needed by hash and match functions */
360 1216776 : hashtable->inputslot = slot;
361 1216776 : hashtable->in_hash_expr = hashtable->tab_hash_expr;
362 1216776 : hashtable->cur_eq_func = hashtable->tab_eq_func;
363 :
364 1216776 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
365 : Assert(entry == NULL || entry->hash == hash);
366 :
367 1216776 : MemoryContextSwitchTo(oldContext);
368 :
369 1216776 : return entry;
370 : }
371 :
372 : /*
373 : * Search for a hashtable entry matching the given tuple. No entry is
374 : * created if there's not a match. This is similar to the non-creating
375 : * case of LookupTupleHashEntry, except that it supports cross-type
376 : * comparisons, in which the given tuple is not of the same type as the
377 : * table entries. The caller must provide the hash ExprState to use for
378 : * the input tuple, as well as the equality ExprState, since these may be
379 : * different from the table's internal functions.
380 : */
381 : TupleHashEntry
382 998908 : FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
383 : ExprState *eqcomp,
384 : ExprState *hashexpr)
385 : {
386 : TupleHashEntry entry;
387 : MemoryContext oldContext;
388 : MinimalTuple key;
389 :
390 : /* Need to run the hash functions in short-lived context */
391 998908 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
392 :
393 : /* Set up data needed by hash and match functions */
394 998908 : hashtable->inputslot = slot;
395 998908 : hashtable->in_hash_expr = hashexpr;
396 998908 : hashtable->cur_eq_func = eqcomp;
397 :
398 : /* Search the hash table */
399 998908 : key = NULL; /* flag to reference inputslot */
400 998908 : entry = tuplehash_lookup(hashtable->hashtab, key);
401 998908 : MemoryContextSwitchTo(oldContext);
402 :
403 998908 : return entry;
404 : }
405 :
406 : /*
407 : * If tuple is NULL, use the input slot instead. This convention avoids the
408 : * need to materialize virtual input tuples unless they actually need to get
409 : * copied into the table.
410 : *
411 : * Also, the caller must select an appropriate memory context for running
412 : * the hash functions.
413 : */
414 : static uint32
415 8226490 : TupleHashTableHash_internal(struct tuplehash_hash *tb,
416 : const MinimalTuple tuple)
417 : {
418 8226490 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
419 : uint32 hashkey;
420 : TupleTableSlot *slot;
421 : bool isnull;
422 :
423 8226490 : if (tuple == NULL)
424 : {
425 : /* Process the current input tuple for the table */
426 8226490 : hashtable->exprcontext->ecxt_innertuple = hashtable->inputslot;
427 8226490 : hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->in_hash_expr,
428 : hashtable->exprcontext,
429 : &isnull));
430 : }
431 : else
432 : {
433 : /*
434 : * Process a tuple already stored in the table.
435 : *
436 : * (this case never actually occurs due to the way simplehash.h is
437 : * used, as the hash-value is stored in the entries)
438 : */
439 0 : slot = hashtable->exprcontext->ecxt_innertuple = hashtable->tableslot;
440 0 : ExecStoreMinimalTuple(tuple, slot, false);
441 0 : hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->tab_hash_expr,
442 : hashtable->exprcontext,
443 : &isnull));
444 : }
445 :
446 : /*
447 : * The hashing done above, even with an initial value, doesn't tend to
448 : * result in good hash perturbation. Running the value produced above
449 : * through murmurhash32 leads to near perfect hash perturbation.
450 : */
451 8226484 : return murmurhash32(hashkey);
452 : }
453 :
454 : /*
455 : * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
456 : * so that we can avoid switching the memory context multiple times for
457 : * LookupTupleHashEntry.
458 : *
459 : * NB: This function may or may not change the memory context. Caller is
460 : * expected to change it back.
461 : */
462 : static inline TupleHashEntry
463 8444352 : LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
464 : bool *isnew, uint32 hash)
465 : {
466 : TupleHashEntryData *entry;
467 : bool found;
468 : MinimalTuple key;
469 :
470 8444352 : key = NULL; /* flag to reference inputslot */
471 :
472 8444352 : if (isnew)
473 : {
474 6710814 : entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
475 :
476 6710814 : if (found)
477 : {
478 : /* found pre-existing entry */
479 5697296 : *isnew = false;
480 : }
481 : else
482 : {
483 : /* created new entry */
484 1013518 : *isnew = true;
485 :
486 1013518 : MemoryContextSwitchTo(hashtable->tablecxt);
487 :
488 : /*
489 : * Copy the first tuple into the table context, and request
490 : * additionalsize extra bytes before the allocation.
491 : *
492 : * The caller can get a pointer to the additional data with
493 : * TupleHashEntryGetAdditional(), and store arbitrary data there.
494 : * Placing both the tuple and additional data in the same
495 : * allocation avoids the need to store an extra pointer in
496 : * TupleHashEntryData or allocate an additional chunk.
497 : */
498 1013518 : entry->firstTuple = ExecCopySlotMinimalTupleExtra(slot,
499 : hashtable->additionalsize);
500 : }
501 : }
502 : else
503 : {
504 1733538 : entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
505 : }
506 :
507 8444352 : return entry;
508 : }
509 :
510 : /*
511 : * See whether two tuples (presumably of the same hash value) match
512 : */
513 : static int
514 6259998 : TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
515 : {
516 : TupleTableSlot *slot1;
517 : TupleTableSlot *slot2;
518 6259998 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
519 6259998 : ExprContext *econtext = hashtable->exprcontext;
520 :
521 : /*
522 : * We assume that simplehash.h will only ever call us with the first
523 : * argument being an actual table entry, and the second argument being
524 : * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
525 : * could be supported too, but is not currently required.
526 : */
527 : Assert(tuple1 != NULL);
528 6259998 : slot1 = hashtable->tableslot;
529 6259998 : ExecStoreMinimalTuple(tuple1, slot1, false);
530 : Assert(tuple2 == NULL);
531 6259998 : slot2 = hashtable->inputslot;
532 :
533 : /* For crosstype comparisons, the inputslot must be first */
534 6259998 : econtext->ecxt_innertuple = slot2;
535 6259998 : econtext->ecxt_outertuple = slot1;
536 6259998 : return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
537 : }
|