Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * execGrouping.c
4 : * executor utility routines for grouping, hashing, and aggregation
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/execGrouping.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/parallel.h"
18 : #include "common/hashfn.h"
19 : #include "executor/executor.h"
20 : #include "miscadmin.h"
21 : #include "utils/lsyscache.h"
22 :
23 : static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
24 : static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
25 : const MinimalTuple tuple);
26 : static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
27 : TupleTableSlot *slot,
28 : bool *isnew, uint32 hash);
29 :
30 : /*
31 : * Define parameters for tuple hash table code generation. The interface is
32 : * *also* declared in execnodes.h (to generate the types, which are externally
33 : * visible).
34 : */
35 : #define SH_PREFIX tuplehash
36 : #define SH_ELEMENT_TYPE TupleHashEntryData
37 : #define SH_KEY_TYPE MinimalTuple
38 : #define SH_KEY firstTuple
39 : #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
40 : #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
41 : #define SH_SCOPE extern
42 : #define SH_STORE_HASH
43 : #define SH_GET_HASH(tb, a) a->hash
44 : #define SH_DEFINE
45 : #include "lib/simplehash.h"
46 :
47 :
48 : /*****************************************************************************
49 : * Utility routines for grouping tuples together
50 : *****************************************************************************/
51 :
52 : /*
53 : * execTuplesMatchPrepare
54 : * Build expression that can be evaluated using ExecQual(), returning
55 : * whether an ExprContext's inner/outer tuples are NOT DISTINCT
56 : */
57 : ExprState *
58 11440 : execTuplesMatchPrepare(TupleDesc desc,
59 : int numCols,
60 : const AttrNumber *keyColIdx,
61 : const Oid *eqOperators,
62 : const Oid *collations,
63 : PlanState *parent)
64 : {
65 : Oid *eqFunctions;
66 : int i;
67 : ExprState *expr;
68 :
69 11440 : if (numCols == 0)
70 54 : return NULL;
71 :
72 11386 : eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
73 :
74 : /* lookup equality functions */
75 31112 : for (i = 0; i < numCols; i++)
76 19726 : eqFunctions[i] = get_opcode(eqOperators[i]);
77 :
78 : /* build actual expression */
79 11386 : expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
80 : numCols, keyColIdx, eqFunctions, collations,
81 : parent);
82 :
83 11386 : return expr;
84 : }
85 :
86 : /*
87 : * execTuplesHashPrepare
88 : * Look up the equality and hashing functions needed for a TupleHashTable.
89 : *
90 : * This is similar to execTuplesMatchPrepare, but we also need to find the
91 : * hash functions associated with the equality operators. *eqFunctions and
92 : * *hashFunctions receive the palloc'd result arrays.
93 : *
94 : * Note: we expect that the given operators are not cross-type comparisons.
95 : */
96 : void
97 6972 : execTuplesHashPrepare(int numCols,
98 : const Oid *eqOperators,
99 : Oid **eqFuncOids,
100 : FmgrInfo **hashFunctions)
101 : {
102 : int i;
103 :
104 6972 : *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
105 6972 : *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
106 :
107 18500 : for (i = 0; i < numCols; i++)
108 : {
109 11528 : Oid eq_opr = eqOperators[i];
110 : Oid eq_function;
111 : Oid left_hash_function;
112 : Oid right_hash_function;
113 :
114 11528 : eq_function = get_opcode(eq_opr);
115 11528 : if (!get_op_hash_functions(eq_opr,
116 : &left_hash_function, &right_hash_function))
117 0 : elog(ERROR, "could not find hash function for hash operator %u",
118 : eq_opr);
119 : /* We're not supporting cross-type cases here */
120 : Assert(left_hash_function == right_hash_function);
121 11528 : (*eqFuncOids)[i] = eq_function;
122 11528 : fmgr_info(right_hash_function, &(*hashFunctions)[i]);
123 : }
124 6972 : }
125 :
126 :
127 : /*****************************************************************************
128 : * Utility routines for all-in-memory hash tables
129 : *
130 : * These routines build hash tables for grouping tuples together (eg, for
131 : * hash aggregation). There is one entry for each not-distinct set of tuples
132 : * presented.
133 : *****************************************************************************/
134 :
135 : /*
136 : * Construct an empty TupleHashTable
137 : *
138 : * parent: PlanState node that will own this hash table
139 : * inputDesc: tuple descriptor for input tuples
140 : * inputOps: slot ops for input tuples, or NULL if unknown or not fixed
141 : * numCols: number of columns to be compared (length of next 4 arrays)
142 : * keyColIdx: indexes of tuple columns to compare
143 : * eqfuncoids: OIDs of equality comparison functions to use
144 : * hashfunctions: FmgrInfos of datatype-specific hashing functions to use
145 : * collations: collations to use in comparisons
146 : * nbuckets: initial estimate of hashtable size
147 : * additionalsize: size of data that may be stored along with the hash entry
148 : * metacxt: memory context for long-lived allocation, but not per-entry data
149 : * tablecxt: memory context in which to store table entries
150 : * tempcxt: short-lived context for evaluation hash and comparison functions
151 : * use_variable_hash_iv: if true, adjust hash IV per-parallel-worker
152 : *
153 : * The hashfunctions array may be made with execTuplesHashPrepare(). Note they
154 : * are not cross-type functions, but expect to see the table datatype(s)
155 : * on both sides.
156 : *
157 : * Note that the keyColIdx, hashfunctions, and collations arrays must be
158 : * allocated in storage that will live as long as the hashtable does.
159 : *
160 : * LookupTupleHashEntry, FindTupleHashEntry, and related functions may leak
161 : * memory in the tempcxt. It is caller's responsibility to reset that context
162 : * reasonably often, typically once per tuple. (We do it that way, rather
163 : * than managing an extra context within the hashtable, because in many cases
164 : * the caller can specify a tempcxt that it needs to reset per-tuple anyway.)
165 : */
166 : TupleHashTable
167 6538 : BuildTupleHashTable(PlanState *parent,
168 : TupleDesc inputDesc,
169 : const TupleTableSlotOps *inputOps,
170 : int numCols,
171 : AttrNumber *keyColIdx,
172 : const Oid *eqfuncoids,
173 : FmgrInfo *hashfunctions,
174 : Oid *collations,
175 : long nbuckets,
176 : Size additionalsize,
177 : MemoryContext metacxt,
178 : MemoryContext tablecxt,
179 : MemoryContext tempcxt,
180 : bool use_variable_hash_iv)
181 : {
182 : TupleHashTable hashtable;
183 : Size entrysize;
184 : Size hash_mem_limit;
185 : MemoryContext oldcontext;
186 : bool allow_jit;
187 6538 : uint32 hash_iv = 0;
188 :
189 : Assert(nbuckets > 0);
190 6538 : additionalsize = MAXALIGN(additionalsize);
191 6538 : entrysize = sizeof(TupleHashEntryData) + additionalsize;
192 :
193 : /* Limit initial table size request to not more than hash_mem */
194 6538 : hash_mem_limit = get_hash_memory_limit() / entrysize;
195 6538 : if (nbuckets > hash_mem_limit)
196 18 : nbuckets = hash_mem_limit;
197 :
198 6538 : oldcontext = MemoryContextSwitchTo(metacxt);
199 :
200 6538 : hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
201 :
202 6538 : hashtable->numCols = numCols;
203 6538 : hashtable->keyColIdx = keyColIdx;
204 6538 : hashtable->tab_collations = collations;
205 6538 : hashtable->tablecxt = tablecxt;
206 6538 : hashtable->tempcxt = tempcxt;
207 6538 : hashtable->additionalsize = additionalsize;
208 6538 : hashtable->tableslot = NULL; /* will be made on first lookup */
209 6538 : hashtable->inputslot = NULL;
210 6538 : hashtable->in_hash_expr = NULL;
211 6538 : hashtable->cur_eq_func = NULL;
212 :
213 : /*
214 : * If parallelism is in use, even if the leader backend is performing the
215 : * scan itself, we don't want to create the hashtable exactly the same way
216 : * in all workers. As hashtables are iterated over in keyspace-order,
217 : * doing so in all processes in the same way is likely to lead to
218 : * "unbalanced" hashtables when the table size initially is
219 : * underestimated.
220 : */
221 6538 : if (use_variable_hash_iv)
222 690 : hash_iv = murmurhash32(ParallelWorkerNumber);
223 :
224 6538 : hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
225 :
226 : /*
227 : * We copy the input tuple descriptor just for safety --- we assume all
228 : * input tuples will have equivalent descriptors.
229 : */
230 6538 : hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
231 : &TTSOpsMinimalTuple);
232 :
233 : /*
234 : * If the caller fails to make the metacxt different from the tablecxt,
235 : * allowing JIT would lead to the generated functions to a) live longer
236 : * than the query or b) be re-generated each time the table is being
237 : * reset. Therefore prevent JIT from being used in that case, by not
238 : * providing a parent node (which prevents accessing the JitContext in the
239 : * EState).
240 : */
241 6538 : allow_jit = (metacxt != tablecxt);
242 :
243 : /* build hash ExprState for all columns */
244 6538 : hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc,
245 : inputOps,
246 : hashfunctions,
247 : collations,
248 : numCols,
249 : keyColIdx,
250 : allow_jit ? parent : NULL,
251 : hash_iv);
252 :
253 : /* build comparator for all columns */
254 6538 : hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
255 : inputOps,
256 : &TTSOpsMinimalTuple,
257 : numCols,
258 : keyColIdx, eqfuncoids, collations,
259 : allow_jit ? parent : NULL);
260 :
261 : /*
262 : * While not pretty, it's ok to not shut down this context, but instead
263 : * rely on the containing memory context being reset, as
264 : * ExecBuildGroupingEqual() only builds a very simple expression calling
265 : * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
266 : */
267 6538 : hashtable->exprcontext = CreateStandaloneExprContext();
268 :
269 6538 : MemoryContextSwitchTo(oldcontext);
270 :
271 6538 : return hashtable;
272 : }
273 :
274 : /*
275 : * Reset contents of the hashtable to be empty, preserving all the non-content
276 : * state. Note that the tablecxt passed to BuildTupleHashTable() should
277 : * also be reset, otherwise there will be leaks.
278 : */
279 : void
280 194644 : ResetTupleHashTable(TupleHashTable hashtable)
281 : {
282 194644 : tuplehash_reset(hashtable->hashtab);
283 194644 : }
284 :
285 : /*
286 : * Find or create a hashtable entry for the tuple group containing the
287 : * given tuple. The tuple must be the same type as the hashtable entries.
288 : *
289 : * If isnew is NULL, we do not create new entries; we return NULL if no
290 : * match is found.
291 : *
292 : * If hash is not NULL, we set it to the calculated hash value. This allows
293 : * callers access to the hash value even if no entry is returned.
294 : *
295 : * If isnew isn't NULL, then a new entry is created if no existing entry
296 : * matches. On return, *isnew is true if the entry is newly created,
297 : * false if it existed already. The additional data in the new entry has
298 : * been zeroed.
299 : */
300 : TupleHashEntry
301 6998946 : LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
302 : bool *isnew, uint32 *hash)
303 : {
304 : TupleHashEntry entry;
305 : MemoryContext oldContext;
306 : uint32 local_hash;
307 :
308 : /* Need to run the hash functions in short-lived context */
309 6998946 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
310 :
311 : /* set up data needed by hash and match functions */
312 6998946 : hashtable->inputslot = slot;
313 6998946 : hashtable->in_hash_expr = hashtable->tab_hash_expr;
314 6998946 : hashtable->cur_eq_func = hashtable->tab_eq_func;
315 :
316 6998946 : local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
317 6998940 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
318 :
319 6998940 : if (hash != NULL)
320 6049764 : *hash = local_hash;
321 :
322 : Assert(entry == NULL || entry->hash == local_hash);
323 :
324 6998940 : MemoryContextSwitchTo(oldContext);
325 :
326 6998940 : return entry;
327 : }
328 :
329 : /*
330 : * Compute the hash value for a tuple
331 : */
332 : uint32
333 0 : TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
334 : {
335 : MemoryContext oldContext;
336 : uint32 hash;
337 :
338 0 : hashtable->inputslot = slot;
339 0 : hashtable->in_hash_expr = hashtable->tab_hash_expr;
340 :
341 : /* Need to run the hash functions in short-lived context */
342 0 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
343 :
344 0 : hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
345 :
346 0 : MemoryContextSwitchTo(oldContext);
347 :
348 0 : return hash;
349 : }
350 :
351 : /*
352 : * A variant of LookupTupleHashEntry for callers that have already computed
353 : * the hash value.
354 : */
355 : TupleHashEntry
356 1216776 : LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
357 : bool *isnew, uint32 hash)
358 : {
359 : TupleHashEntry entry;
360 : MemoryContext oldContext;
361 :
362 : /* Need to run the hash functions in short-lived context */
363 1216776 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
364 :
365 : /* set up data needed by hash and match functions */
366 1216776 : hashtable->inputslot = slot;
367 1216776 : hashtable->in_hash_expr = hashtable->tab_hash_expr;
368 1216776 : hashtable->cur_eq_func = hashtable->tab_eq_func;
369 :
370 1216776 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
371 : Assert(entry == NULL || entry->hash == hash);
372 :
373 1216776 : MemoryContextSwitchTo(oldContext);
374 :
375 1216776 : return entry;
376 : }
377 :
378 : /*
379 : * Search for a hashtable entry matching the given tuple. No entry is
380 : * created if there's not a match. This is similar to the non-creating
381 : * case of LookupTupleHashEntry, except that it supports cross-type
382 : * comparisons, in which the given tuple is not of the same type as the
383 : * table entries. The caller must provide the hash ExprState to use for
384 : * the input tuple, as well as the equality ExprState, since these may be
385 : * different from the table's internal functions.
386 : */
387 : TupleHashEntry
388 1051566 : FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
389 : ExprState *eqcomp,
390 : ExprState *hashexpr)
391 : {
392 : TupleHashEntry entry;
393 : MemoryContext oldContext;
394 : MinimalTuple key;
395 :
396 : /* Need to run the hash functions in short-lived context */
397 1051566 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
398 :
399 : /* Set up data needed by hash and match functions */
400 1051566 : hashtable->inputslot = slot;
401 1051566 : hashtable->in_hash_expr = hashexpr;
402 1051566 : hashtable->cur_eq_func = eqcomp;
403 :
404 : /* Search the hash table */
405 1051566 : key = NULL; /* flag to reference inputslot */
406 1051566 : entry = tuplehash_lookup(hashtable->hashtab, key);
407 1051566 : MemoryContextSwitchTo(oldContext);
408 :
409 1051566 : return entry;
410 : }
411 :
412 : /*
413 : * If tuple is NULL, use the input slot instead. This convention avoids the
414 : * need to materialize virtual input tuples unless they actually need to get
415 : * copied into the table.
416 : *
417 : * Also, the caller must select an appropriate memory context for running
418 : * the hash functions.
419 : */
420 : static uint32
421 8050512 : TupleHashTableHash_internal(struct tuplehash_hash *tb,
422 : const MinimalTuple tuple)
423 : {
424 8050512 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
425 : uint32 hashkey;
426 : TupleTableSlot *slot;
427 : bool isnull;
428 :
429 8050512 : if (tuple == NULL)
430 : {
431 : /* Process the current input tuple for the table */
432 8050512 : hashtable->exprcontext->ecxt_innertuple = hashtable->inputslot;
433 8050512 : hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->in_hash_expr,
434 : hashtable->exprcontext,
435 : &isnull));
436 : }
437 : else
438 : {
439 : /*
440 : * Process a tuple already stored in the table.
441 : *
442 : * (this case never actually occurs due to the way simplehash.h is
443 : * used, as the hash-value is stored in the entries)
444 : */
445 0 : slot = hashtable->exprcontext->ecxt_innertuple = hashtable->tableslot;
446 0 : ExecStoreMinimalTuple(tuple, slot, false);
447 0 : hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->tab_hash_expr,
448 : hashtable->exprcontext,
449 : &isnull));
450 : }
451 :
452 : /*
453 : * The hashing done above, even with an initial value, doesn't tend to
454 : * result in good hash perturbation. Running the value produced above
455 : * through murmurhash32 leads to near perfect hash perturbation.
456 : */
457 8050506 : return murmurhash32(hashkey);
458 : }
459 :
460 : /*
461 : * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
462 : * so that we can avoid switching the memory context multiple times for
463 : * LookupTupleHashEntry.
464 : *
465 : * NB: This function may or may not change the memory context. Caller is
466 : * expected to change it back.
467 : */
468 : static inline TupleHashEntry
469 8215716 : LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
470 : bool *isnew, uint32 hash)
471 : {
472 : TupleHashEntryData *entry;
473 : bool found;
474 : MinimalTuple key;
475 :
476 8215716 : key = NULL; /* flag to reference inputslot */
477 :
478 8215716 : if (isnew)
479 : {
480 6482158 : entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
481 :
482 6482158 : if (found)
483 : {
484 : /* found pre-existing entry */
485 5471510 : *isnew = false;
486 : }
487 : else
488 : {
489 : /* created new entry */
490 1010648 : *isnew = true;
491 :
492 1010648 : MemoryContextSwitchTo(hashtable->tablecxt);
493 :
494 : /*
495 : * Copy the first tuple into the table context, and request
496 : * additionalsize extra bytes before the allocation.
497 : *
498 : * The caller can get a pointer to the additional data with
499 : * TupleHashEntryGetAdditional(), and store arbitrary data there.
500 : * Placing both the tuple and additional data in the same
501 : * allocation avoids the need to store an extra pointer in
502 : * TupleHashEntryData or allocate an additional chunk.
503 : */
504 1010648 : entry->firstTuple = ExecCopySlotMinimalTupleExtra(slot,
505 : hashtable->additionalsize);
506 : }
507 : }
508 : else
509 : {
510 1733558 : entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
511 : }
512 :
513 8215716 : return entry;
514 : }
515 :
516 : /*
517 : * See whether two tuples (presumably of the same hash value) match
518 : */
519 : static int
520 6034214 : TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
521 : {
522 : TupleTableSlot *slot1;
523 : TupleTableSlot *slot2;
524 6034214 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
525 6034214 : ExprContext *econtext = hashtable->exprcontext;
526 :
527 : /*
528 : * We assume that simplehash.h will only ever call us with the first
529 : * argument being an actual table entry, and the second argument being
530 : * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
531 : * could be supported too, but is not currently required.
532 : */
533 : Assert(tuple1 != NULL);
534 6034214 : slot1 = hashtable->tableslot;
535 6034214 : ExecStoreMinimalTuple(tuple1, slot1, false);
536 : Assert(tuple2 == NULL);
537 6034214 : slot2 = hashtable->inputslot;
538 :
539 : /* For crosstype comparisons, the inputslot must be first */
540 6034214 : econtext->ecxt_innertuple = slot2;
541 6034214 : econtext->ecxt_outertuple = slot1;
542 6034214 : return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
543 : }
|