Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * execGrouping.c
4 : * executor utility routines for grouping, hashing, and aggregation
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/execGrouping.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/parallel.h"
18 : #include "common/hashfn.h"
19 : #include "executor/executor.h"
20 : #include "miscadmin.h"
21 : #include "utils/lsyscache.h"
22 :
23 : static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
24 : static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
25 : const MinimalTuple tuple);
26 : static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
27 : TupleTableSlot *slot,
28 : bool *isnew, uint32 hash);
29 :
30 : /*
31 : * Define parameters for tuple hash table code generation. The interface is
32 : * *also* declared in execnodes.h (to generate the types, which are externally
33 : * visible).
34 : */
35 : #define SH_PREFIX tuplehash
36 : #define SH_ELEMENT_TYPE TupleHashEntryData
37 : #define SH_KEY_TYPE MinimalTuple
38 : #define SH_KEY firstTuple
39 : #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
40 : #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
41 : #define SH_SCOPE extern
42 : #define SH_STORE_HASH
43 : #define SH_GET_HASH(tb, a) a->hash
44 : #define SH_DEFINE
45 : #include "lib/simplehash.h"
46 :
47 :
48 : /*****************************************************************************
49 : * Utility routines for grouping tuples together
50 : *****************************************************************************/
51 :
52 : /*
53 : * execTuplesMatchPrepare
54 : * Build expression that can be evaluated using ExecQual(), returning
55 : * whether an ExprContext's inner/outer tuples are NOT DISTINCT
56 : */
57 : ExprState *
58 10656 : execTuplesMatchPrepare(TupleDesc desc,
59 : int numCols,
60 : const AttrNumber *keyColIdx,
61 : const Oid *eqOperators,
62 : const Oid *collations,
63 : PlanState *parent)
64 : {
65 : Oid *eqFunctions;
66 : int i;
67 : ExprState *expr;
68 :
69 10656 : if (numCols == 0)
70 84 : return NULL;
71 :
72 10572 : eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
73 :
74 : /* lookup equality functions */
75 28824 : for (i = 0; i < numCols; i++)
76 18252 : eqFunctions[i] = get_opcode(eqOperators[i]);
77 :
78 : /* build actual expression */
79 10572 : expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
80 : numCols, keyColIdx, eqFunctions, collations,
81 : parent);
82 :
83 10572 : return expr;
84 : }
85 :
86 : /*
87 : * execTuplesHashPrepare
88 : * Look up the equality and hashing functions needed for a TupleHashTable.
89 : *
90 : * This is similar to execTuplesMatchPrepare, but we also need to find the
91 : * hash functions associated with the equality operators. *eqFunctions and
92 : * *hashFunctions receive the palloc'd result arrays.
93 : *
94 : * Note: we expect that the given operators are not cross-type comparisons.
95 : */
96 : void
97 6818 : execTuplesHashPrepare(int numCols,
98 : const Oid *eqOperators,
99 : Oid **eqFuncOids,
100 : FmgrInfo **hashFunctions)
101 : {
102 : int i;
103 :
104 6818 : *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
105 6818 : *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
106 :
107 19538 : for (i = 0; i < numCols; i++)
108 : {
109 12720 : Oid eq_opr = eqOperators[i];
110 : Oid eq_function;
111 : Oid left_hash_function;
112 : Oid right_hash_function;
113 :
114 12720 : eq_function = get_opcode(eq_opr);
115 12720 : if (!get_op_hash_functions(eq_opr,
116 : &left_hash_function, &right_hash_function))
117 0 : elog(ERROR, "could not find hash function for hash operator %u",
118 : eq_opr);
119 : /* We're not supporting cross-type cases here */
120 : Assert(left_hash_function == right_hash_function);
121 12720 : (*eqFuncOids)[i] = eq_function;
122 12720 : fmgr_info(right_hash_function, &(*hashFunctions)[i]);
123 : }
124 6818 : }
125 :
126 :
127 : /*****************************************************************************
128 : * Utility routines for all-in-memory hash tables
129 : *
130 : * These routines build hash tables for grouping tuples together (eg, for
131 : * hash aggregation). There is one entry for each not-distinct set of tuples
132 : * presented.
133 : *****************************************************************************/
134 :
135 : /*
136 : * Construct an empty TupleHashTable
137 : *
138 : * numCols, keyColIdx: identify the tuple fields to use as lookup key
139 : * eqfunctions: equality comparison functions to use
140 : * hashfunctions: datatype-specific hashing functions to use
141 : * nbuckets: initial estimate of hashtable size
142 : * additionalsize: size of data stored in ->additional
143 : * metacxt: memory context for long-lived allocation, but not per-entry data
144 : * tablecxt: memory context in which to store table entries
145 : * tempcxt: short-lived context for evaluation hash and comparison functions
146 : *
147 : * The function arrays may be made with execTuplesHashPrepare(). Note they
148 : * are not cross-type functions, but expect to see the table datatype(s)
149 : * on both sides.
150 : *
151 : * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
152 : * storage that will live as long as the hashtable does.
153 : */
154 : TupleHashTable
155 6294 : BuildTupleHashTableExt(PlanState *parent,
156 : TupleDesc inputDesc,
157 : int numCols, AttrNumber *keyColIdx,
158 : const Oid *eqfuncoids,
159 : FmgrInfo *hashfunctions,
160 : Oid *collations,
161 : long nbuckets, Size additionalsize,
162 : MemoryContext metacxt,
163 : MemoryContext tablecxt,
164 : MemoryContext tempcxt,
165 : bool use_variable_hash_iv)
166 : {
167 : TupleHashTable hashtable;
168 6294 : Size entrysize = sizeof(TupleHashEntryData) + additionalsize;
169 : Size hash_mem_limit;
170 : MemoryContext oldcontext;
171 : bool allow_jit;
172 :
173 : Assert(nbuckets > 0);
174 :
175 : /* Limit initial table size request to not more than hash_mem */
176 6294 : hash_mem_limit = get_hash_memory_limit() / entrysize;
177 6294 : if (nbuckets > hash_mem_limit)
178 24 : nbuckets = hash_mem_limit;
179 :
180 6294 : oldcontext = MemoryContextSwitchTo(metacxt);
181 :
182 6294 : hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
183 :
184 6294 : hashtable->numCols = numCols;
185 6294 : hashtable->keyColIdx = keyColIdx;
186 6294 : hashtable->tab_hash_funcs = hashfunctions;
187 6294 : hashtable->tab_collations = collations;
188 6294 : hashtable->tablecxt = tablecxt;
189 6294 : hashtable->tempcxt = tempcxt;
190 6294 : hashtable->entrysize = entrysize;
191 6294 : hashtable->tableslot = NULL; /* will be made on first lookup */
192 6294 : hashtable->inputslot = NULL;
193 6294 : hashtable->in_hash_funcs = NULL;
194 6294 : hashtable->cur_eq_func = NULL;
195 :
196 : /*
197 : * If parallelism is in use, even if the leader backend is performing the
198 : * scan itself, we don't want to create the hashtable exactly the same way
199 : * in all workers. As hashtables are iterated over in keyspace-order,
200 : * doing so in all processes in the same way is likely to lead to
201 : * "unbalanced" hashtables when the table size initially is
202 : * underestimated.
203 : */
204 6294 : if (use_variable_hash_iv)
205 690 : hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
206 : else
207 5604 : hashtable->hash_iv = 0;
208 :
209 6294 : hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
210 :
211 : /*
212 : * We copy the input tuple descriptor just for safety --- we assume all
213 : * input tuples will have equivalent descriptors.
214 : */
215 6294 : hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
216 : &TTSOpsMinimalTuple);
217 :
218 : /*
219 : * If the old reset interface is used (i.e. BuildTupleHashTable, rather
220 : * than BuildTupleHashTableExt), allowing JIT would lead to the generated
221 : * functions to a) live longer than the query b) be re-generated each time
222 : * the table is being reset. Therefore prevent JIT from being used in that
223 : * case, by not providing a parent node (which prevents accessing the
224 : * JitContext in the EState).
225 : */
226 6294 : allow_jit = metacxt != tablecxt;
227 :
228 : /* build comparator for all columns */
229 : /* XXX: should we support non-minimal tuples for the inputslot? */
230 6294 : hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
231 : &TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
232 : numCols,
233 : keyColIdx, eqfuncoids, collations,
234 : allow_jit ? parent : NULL);
235 :
236 : /*
237 : * While not pretty, it's ok to not shut down this context, but instead
238 : * rely on the containing memory context being reset, as
239 : * ExecBuildGroupingEqual() only builds a very simple expression calling
240 : * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
241 : */
242 6294 : hashtable->exprcontext = CreateStandaloneExprContext();
243 :
244 6294 : MemoryContextSwitchTo(oldcontext);
245 :
246 6294 : return hashtable;
247 : }
248 :
249 : /*
250 : * BuildTupleHashTable is a backwards-compatibility wrapper for
251 : * BuildTupleHashTableExt(), that allocates the hashtable's metadata in
252 : * tablecxt. Note that hashtables created this way cannot be reset leak-free
253 : * with ResetTupleHashTable().
254 : */
255 : TupleHashTable
256 0 : BuildTupleHashTable(PlanState *parent,
257 : TupleDesc inputDesc,
258 : int numCols, AttrNumber *keyColIdx,
259 : const Oid *eqfuncoids,
260 : FmgrInfo *hashfunctions,
261 : Oid *collations,
262 : long nbuckets, Size additionalsize,
263 : MemoryContext tablecxt,
264 : MemoryContext tempcxt,
265 : bool use_variable_hash_iv)
266 : {
267 0 : return BuildTupleHashTableExt(parent,
268 : inputDesc,
269 : numCols, keyColIdx,
270 : eqfuncoids,
271 : hashfunctions,
272 : collations,
273 : nbuckets, additionalsize,
274 : tablecxt,
275 : tablecxt,
276 : tempcxt,
277 : use_variable_hash_iv);
278 : }
279 :
280 : /*
281 : * Reset contents of the hashtable to be empty, preserving all the non-content
282 : * state. Note that the tablecxt passed to BuildTupleHashTableExt() should
283 : * also be reset, otherwise there will be leaks.
284 : */
285 : void
286 193328 : ResetTupleHashTable(TupleHashTable hashtable)
287 : {
288 193328 : tuplehash_reset(hashtable->hashtab);
289 193328 : }
290 :
291 : /*
292 : * Find or create a hashtable entry for the tuple group containing the
293 : * given tuple. The tuple must be the same type as the hashtable entries.
294 : *
295 : * If isnew is NULL, we do not create new entries; we return NULL if no
296 : * match is found.
297 : *
298 : * If hash is not NULL, we set it to the calculated hash value. This allows
299 : * callers access to the hash value even if no entry is returned.
300 : *
301 : * If isnew isn't NULL, then a new entry is created if no existing entry
302 : * matches. On return, *isnew is true if the entry is newly created,
303 : * false if it existed already. ->additional_data in the new entry has
304 : * been zeroed.
305 : */
306 : TupleHashEntry
307 6482634 : LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
308 : bool *isnew, uint32 *hash)
309 : {
310 : TupleHashEntry entry;
311 : MemoryContext oldContext;
312 : uint32 local_hash;
313 :
314 : /* Need to run the hash functions in short-lived context */
315 6482634 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
316 :
317 : /* set up data needed by hash and match functions */
318 6482634 : hashtable->inputslot = slot;
319 6482634 : hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
320 6482634 : hashtable->cur_eq_func = hashtable->tab_eq_func;
321 :
322 6482634 : local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
323 6482628 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
324 :
325 6482628 : if (hash != NULL)
326 5535254 : *hash = local_hash;
327 :
328 : Assert(entry == NULL || entry->hash == local_hash);
329 :
330 6482628 : MemoryContextSwitchTo(oldContext);
331 :
332 6482628 : return entry;
333 : }
334 :
335 : /*
336 : * Compute the hash value for a tuple
337 : */
338 : uint32
339 0 : TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
340 : {
341 : MemoryContext oldContext;
342 : uint32 hash;
343 :
344 0 : hashtable->inputslot = slot;
345 0 : hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
346 :
347 : /* Need to run the hash functions in short-lived context */
348 0 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
349 :
350 0 : hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
351 :
352 0 : MemoryContextSwitchTo(oldContext);
353 :
354 0 : return hash;
355 : }
356 :
357 : /*
358 : * A variant of LookupTupleHashEntry for callers that have already computed
359 : * the hash value.
360 : */
361 : TupleHashEntry
362 655824 : LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
363 : bool *isnew, uint32 hash)
364 : {
365 : TupleHashEntry entry;
366 : MemoryContext oldContext;
367 :
368 : /* Need to run the hash functions in short-lived context */
369 655824 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
370 :
371 : /* set up data needed by hash and match functions */
372 655824 : hashtable->inputslot = slot;
373 655824 : hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
374 655824 : hashtable->cur_eq_func = hashtable->tab_eq_func;
375 :
376 655824 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
377 : Assert(entry == NULL || entry->hash == hash);
378 :
379 655824 : MemoryContextSwitchTo(oldContext);
380 :
381 655824 : return entry;
382 : }
383 :
384 : /*
385 : * Search for a hashtable entry matching the given tuple. No entry is
386 : * created if there's not a match. This is similar to the non-creating
387 : * case of LookupTupleHashEntry, except that it supports cross-type
388 : * comparisons, in which the given tuple is not of the same type as the
389 : * table entries. The caller must provide the hash functions to use for
390 : * the input tuple, as well as the equality functions, since these may be
391 : * different from the table's internal functions.
392 : */
393 : TupleHashEntry
394 842032 : FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
395 : ExprState *eqcomp,
396 : FmgrInfo *hashfunctions)
397 : {
398 : TupleHashEntry entry;
399 : MemoryContext oldContext;
400 : MinimalTuple key;
401 :
402 : /* Need to run the hash functions in short-lived context */
403 842032 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
404 :
405 : /* Set up data needed by hash and match functions */
406 842032 : hashtable->inputslot = slot;
407 842032 : hashtable->in_hash_funcs = hashfunctions;
408 842032 : hashtable->cur_eq_func = eqcomp;
409 :
410 : /* Search the hash table */
411 842032 : key = NULL; /* flag to reference inputslot */
412 842032 : entry = tuplehash_lookup(hashtable->hashtab, key);
413 842032 : MemoryContextSwitchTo(oldContext);
414 :
415 842032 : return entry;
416 : }
417 :
418 : /*
419 : * If tuple is NULL, use the input slot instead. This convention avoids the
420 : * need to materialize virtual input tuples unless they actually need to get
421 : * copied into the table.
422 : *
423 : * Also, the caller must select an appropriate memory context for running
424 : * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
425 : */
426 : static uint32
427 7324666 : TupleHashTableHash_internal(struct tuplehash_hash *tb,
428 : const MinimalTuple tuple)
429 : {
430 7324666 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
431 7324666 : int numCols = hashtable->numCols;
432 7324666 : AttrNumber *keyColIdx = hashtable->keyColIdx;
433 7324666 : uint32 hashkey = hashtable->hash_iv;
434 : TupleTableSlot *slot;
435 : FmgrInfo *hashfunctions;
436 : int i;
437 :
438 7324666 : if (tuple == NULL)
439 : {
440 : /* Process the current input tuple for the table */
441 7324666 : slot = hashtable->inputslot;
442 7324666 : hashfunctions = hashtable->in_hash_funcs;
443 : }
444 : else
445 : {
446 : /*
447 : * Process a tuple already stored in the table.
448 : *
449 : * (this case never actually occurs due to the way simplehash.h is
450 : * used, as the hash-value is stored in the entries)
451 : */
452 0 : slot = hashtable->tableslot;
453 0 : ExecStoreMinimalTuple(tuple, slot, false);
454 0 : hashfunctions = hashtable->tab_hash_funcs;
455 : }
456 :
457 18973362 : for (i = 0; i < numCols; i++)
458 : {
459 11648702 : AttrNumber att = keyColIdx[i];
460 : Datum attr;
461 : bool isNull;
462 :
463 : /* combine successive hashkeys by rotating */
464 11648702 : hashkey = pg_rotate_left32(hashkey, 1);
465 :
466 11648702 : attr = slot_getattr(slot, att, &isNull);
467 :
468 11648702 : if (!isNull) /* treat nulls as having hash key 0 */
469 : {
470 : uint32 hkey;
471 :
472 11427670 : hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
473 11427676 : hashtable->tab_collations[i],
474 : attr));
475 11427670 : hashkey ^= hkey;
476 : }
477 : }
478 :
479 : /*
480 : * The way hashes are combined above, among each other and with the IV,
481 : * doesn't lead to good bit perturbation. As the IV's goal is to lead to
482 : * achieve that, perform a round of hashing of the combined hash -
483 : * resulting in near perfect perturbation.
484 : */
485 7324660 : return murmurhash32(hashkey);
486 : }
487 :
488 : /*
489 : * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
490 : * so that we can avoid switching the memory context multiple times for
491 : * LookupTupleHashEntry.
492 : *
493 : * NB: This function may or may not change the memory context. Caller is
494 : * expected to change it back.
495 : */
496 : static inline TupleHashEntry
497 7138452 : LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
498 : bool *isnew, uint32 hash)
499 : {
500 : TupleHashEntryData *entry;
501 : bool found;
502 : MinimalTuple key;
503 :
504 7138452 : key = NULL; /* flag to reference inputslot */
505 :
506 7138452 : if (isnew)
507 : {
508 6019810 : entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
509 :
510 6019810 : if (found)
511 : {
512 : /* found pre-existing entry */
513 5098708 : *isnew = false;
514 : }
515 : else
516 : {
517 : /* created new entry */
518 921102 : *isnew = true;
519 : /* zero caller data */
520 921102 : entry->additional = NULL;
521 921102 : MemoryContextSwitchTo(hashtable->tablecxt);
522 : /* Copy the first tuple into the table context */
523 921102 : entry->firstTuple = ExecCopySlotMinimalTuple(slot);
524 : }
525 : }
526 : else
527 : {
528 1118642 : entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
529 : }
530 :
531 7138452 : return entry;
532 : }
533 :
534 : /*
535 : * See whether two tuples (presumably of the same hash value) match
536 : */
537 : static int
538 5607252 : TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
539 : {
540 : TupleTableSlot *slot1;
541 : TupleTableSlot *slot2;
542 5607252 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
543 5607252 : ExprContext *econtext = hashtable->exprcontext;
544 :
545 : /*
546 : * We assume that simplehash.h will only ever call us with the first
547 : * argument being an actual table entry, and the second argument being
548 : * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
549 : * could be supported too, but is not currently required.
550 : */
551 : Assert(tuple1 != NULL);
552 5607252 : slot1 = hashtable->tableslot;
553 5607252 : ExecStoreMinimalTuple(tuple1, slot1, false);
554 : Assert(tuple2 == NULL);
555 5607252 : slot2 = hashtable->inputslot;
556 :
557 : /* For crosstype comparisons, the inputslot must be first */
558 5607252 : econtext->ecxt_innertuple = slot2;
559 5607252 : econtext->ecxt_outertuple = slot1;
560 5607252 : return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
561 : }
|