Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * execGrouping.c
4 : * executor utility routines for grouping, hashing, and aggregation
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/execGrouping.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/parallel.h"
18 : #include "common/hashfn.h"
19 : #include "executor/executor.h"
20 : #include "miscadmin.h"
21 : #include "utils/lsyscache.h"
22 :
23 : static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
24 : static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
25 : const MinimalTuple tuple);
26 : static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
27 : TupleTableSlot *slot,
28 : bool *isnew, uint32 hash);
29 :
30 : /*
31 : * Define parameters for tuple hash table code generation. The interface is
32 : * *also* declared in execnodes.h (to generate the types, which are externally
33 : * visible).
34 : */
35 : #define SH_PREFIX tuplehash
36 : #define SH_ELEMENT_TYPE TupleHashEntryData
37 : #define SH_KEY_TYPE MinimalTuple
38 : #define SH_KEY firstTuple
39 : #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
40 : #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
41 : #define SH_SCOPE extern
42 : #define SH_STORE_HASH
43 : #define SH_GET_HASH(tb, a) a->hash
44 : #define SH_DEFINE
45 : #include "lib/simplehash.h"
46 :
47 :
48 : /*****************************************************************************
49 : * Utility routines for grouping tuples together
50 : *****************************************************************************/
51 :
52 : /*
53 : * execTuplesMatchPrepare
54 : * Build expression that can be evaluated using ExecQual(), returning
55 : * whether an ExprContext's inner/outer tuples are NOT DISTINCT
56 : */
57 : ExprState *
58 10474 : execTuplesMatchPrepare(TupleDesc desc,
59 : int numCols,
60 : const AttrNumber *keyColIdx,
61 : const Oid *eqOperators,
62 : const Oid *collations,
63 : PlanState *parent)
64 : {
65 10474 : Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
66 : int i;
67 : ExprState *expr;
68 :
69 10474 : if (numCols == 0)
70 160 : return NULL;
71 :
72 : /* lookup equality functions */
73 28048 : for (i = 0; i < numCols; i++)
74 17734 : eqFunctions[i] = get_opcode(eqOperators[i]);
75 :
76 : /* build actual expression */
77 10314 : expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
78 : numCols, keyColIdx, eqFunctions, collations,
79 : parent);
80 :
81 10314 : return expr;
82 : }
83 :
84 : /*
85 : * execTuplesHashPrepare
86 : * Look up the equality and hashing functions needed for a TupleHashTable.
87 : *
88 : * This is similar to execTuplesMatchPrepare, but we also need to find the
89 : * hash functions associated with the equality operators. *eqFunctions and
90 : * *hashFunctions receive the palloc'd result arrays.
91 : *
92 : * Note: we expect that the given operators are not cross-type comparisons.
93 : */
94 : void
95 6278 : execTuplesHashPrepare(int numCols,
96 : const Oid *eqOperators,
97 : Oid **eqFuncOids,
98 : FmgrInfo **hashFunctions)
99 : {
100 : int i;
101 :
102 6278 : *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
103 6278 : *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
104 :
105 16930 : for (i = 0; i < numCols; i++)
106 : {
107 10652 : Oid eq_opr = eqOperators[i];
108 : Oid eq_function;
109 : Oid left_hash_function;
110 : Oid right_hash_function;
111 :
112 10652 : eq_function = get_opcode(eq_opr);
113 10652 : if (!get_op_hash_functions(eq_opr,
114 : &left_hash_function, &right_hash_function))
115 0 : elog(ERROR, "could not find hash function for hash operator %u",
116 : eq_opr);
117 : /* We're not supporting cross-type cases here */
118 : Assert(left_hash_function == right_hash_function);
119 10652 : (*eqFuncOids)[i] = eq_function;
120 10652 : fmgr_info(right_hash_function, &(*hashFunctions)[i]);
121 : }
122 6278 : }
123 :
124 :
125 : /*****************************************************************************
126 : * Utility routines for all-in-memory hash tables
127 : *
128 : * These routines build hash tables for grouping tuples together (eg, for
129 : * hash aggregation). There is one entry for each not-distinct set of tuples
130 : * presented.
131 : *****************************************************************************/
132 :
133 : /*
134 : * Construct an empty TupleHashTable
135 : *
136 : * numCols, keyColIdx: identify the tuple fields to use as lookup key
137 : * eqfunctions: equality comparison functions to use
138 : * hashfunctions: datatype-specific hashing functions to use
139 : * nbuckets: initial estimate of hashtable size
140 : * additionalsize: size of data stored in ->additional
141 : * metacxt: memory context for long-lived allocation, but not per-entry data
142 : * tablecxt: memory context in which to store table entries
143 : * tempcxt: short-lived context for evaluation hash and comparison functions
144 : *
145 : * The function arrays may be made with execTuplesHashPrepare(). Note they
146 : * are not cross-type functions, but expect to see the table datatype(s)
147 : * on both sides.
148 : *
149 : * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
150 : * storage that will live as long as the hashtable does.
151 : */
152 : TupleHashTable
153 5940 : BuildTupleHashTableExt(PlanState *parent,
154 : TupleDesc inputDesc,
155 : int numCols, AttrNumber *keyColIdx,
156 : const Oid *eqfuncoids,
157 : FmgrInfo *hashfunctions,
158 : Oid *collations,
159 : long nbuckets, Size additionalsize,
160 : MemoryContext metacxt,
161 : MemoryContext tablecxt,
162 : MemoryContext tempcxt,
163 : bool use_variable_hash_iv)
164 : {
165 : TupleHashTable hashtable;
166 5940 : Size entrysize = sizeof(TupleHashEntryData) + additionalsize;
167 : Size hash_mem_limit;
168 : MemoryContext oldcontext;
169 : bool allow_jit;
170 :
171 : Assert(nbuckets > 0);
172 :
173 : /* Limit initial table size request to not more than hash_mem */
174 5940 : hash_mem_limit = get_hash_memory_limit() / entrysize;
175 5940 : if (nbuckets > hash_mem_limit)
176 24 : nbuckets = hash_mem_limit;
177 :
178 5940 : oldcontext = MemoryContextSwitchTo(metacxt);
179 :
180 5940 : hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
181 :
182 5940 : hashtable->numCols = numCols;
183 5940 : hashtable->keyColIdx = keyColIdx;
184 5940 : hashtable->tab_hash_funcs = hashfunctions;
185 5940 : hashtable->tab_collations = collations;
186 5940 : hashtable->tablecxt = tablecxt;
187 5940 : hashtable->tempcxt = tempcxt;
188 5940 : hashtable->entrysize = entrysize;
189 5940 : hashtable->tableslot = NULL; /* will be made on first lookup */
190 5940 : hashtable->inputslot = NULL;
191 5940 : hashtable->in_hash_funcs = NULL;
192 5940 : hashtable->cur_eq_func = NULL;
193 :
194 : /*
195 : * If parallelism is in use, even if the leader backend is performing the
196 : * scan itself, we don't want to create the hashtable exactly the same way
197 : * in all workers. As hashtables are iterated over in keyspace-order,
198 : * doing so in all processes in the same way is likely to lead to
199 : * "unbalanced" hashtables when the table size initially is
200 : * underestimated.
201 : */
202 5940 : if (use_variable_hash_iv)
203 678 : hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
204 : else
205 5262 : hashtable->hash_iv = 0;
206 :
207 5940 : hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
208 :
209 : /*
210 : * We copy the input tuple descriptor just for safety --- we assume all
211 : * input tuples will have equivalent descriptors.
212 : */
213 5940 : hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
214 : &TTSOpsMinimalTuple);
215 :
216 : /*
217 : * If the old reset interface is used (i.e. BuildTupleHashTable, rather
218 : * than BuildTupleHashTableExt), allowing JIT would lead to the generated
219 : * functions to a) live longer than the query b) be re-generated each time
220 : * the table is being reset. Therefore prevent JIT from being used in that
221 : * case, by not providing a parent node (which prevents accessing the
222 : * JitContext in the EState).
223 : */
224 5940 : allow_jit = metacxt != tablecxt;
225 :
226 : /* build comparator for all columns */
227 : /* XXX: should we support non-minimal tuples for the inputslot? */
228 5940 : hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
229 : &TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
230 : numCols,
231 : keyColIdx, eqfuncoids, collations,
232 : allow_jit ? parent : NULL);
233 :
234 : /*
235 : * While not pretty, it's ok to not shut down this context, but instead
236 : * rely on the containing memory context being reset, as
237 : * ExecBuildGroupingEqual() only builds a very simple expression calling
238 : * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
239 : */
240 5940 : hashtable->exprcontext = CreateStandaloneExprContext();
241 :
242 5940 : MemoryContextSwitchTo(oldcontext);
243 :
244 5940 : return hashtable;
245 : }
246 :
247 : /*
248 : * BuildTupleHashTable is a backwards-compatibility wrapper for
249 : * BuildTupleHashTableExt(), that allocates the hashtable's metadata in
250 : * tablecxt. Note that hashtables created this way cannot be reset leak-free
251 : * with ResetTupleHashTable().
252 : */
253 : TupleHashTable
254 0 : BuildTupleHashTable(PlanState *parent,
255 : TupleDesc inputDesc,
256 : int numCols, AttrNumber *keyColIdx,
257 : const Oid *eqfuncoids,
258 : FmgrInfo *hashfunctions,
259 : Oid *collations,
260 : long nbuckets, Size additionalsize,
261 : MemoryContext tablecxt,
262 : MemoryContext tempcxt,
263 : bool use_variable_hash_iv)
264 : {
265 0 : return BuildTupleHashTableExt(parent,
266 : inputDesc,
267 : numCols, keyColIdx,
268 : eqfuncoids,
269 : hashfunctions,
270 : collations,
271 : nbuckets, additionalsize,
272 : tablecxt,
273 : tablecxt,
274 : tempcxt,
275 : use_variable_hash_iv);
276 : }
277 :
278 : /*
279 : * Reset contents of the hashtable to be empty, preserving all the non-content
280 : * state. Note that the tablecxt passed to BuildTupleHashTableExt() should
281 : * also be reset, otherwise there will be leaks.
282 : */
283 : void
284 182382 : ResetTupleHashTable(TupleHashTable hashtable)
285 : {
286 182382 : tuplehash_reset(hashtable->hashtab);
287 182382 : }
288 :
289 : /*
290 : * Find or create a hashtable entry for the tuple group containing the
291 : * given tuple. The tuple must be the same type as the hashtable entries.
292 : *
293 : * If isnew is NULL, we do not create new entries; we return NULL if no
294 : * match is found.
295 : *
296 : * If hash is not NULL, we set it to the calculated hash value. This allows
297 : * callers access to the hash value even if no entry is returned.
298 : *
299 : * If isnew isn't NULL, then a new entry is created if no existing entry
300 : * matches. On return, *isnew is true if the entry is newly created,
301 : * false if it existed already. ->additional_data in the new entry has
302 : * been zeroed.
303 : */
304 : TupleHashEntry
305 6455664 : LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
306 : bool *isnew, uint32 *hash)
307 : {
308 : TupleHashEntry entry;
309 : MemoryContext oldContext;
310 : uint32 local_hash;
311 :
312 : /* Need to run the hash functions in short-lived context */
313 6455664 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
314 :
315 : /* set up data needed by hash and match functions */
316 6455664 : hashtable->inputslot = slot;
317 6455664 : hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
318 6455664 : hashtable->cur_eq_func = hashtable->tab_eq_func;
319 :
320 6455664 : local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
321 6455658 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
322 :
323 6455658 : if (hash != NULL)
324 5509912 : *hash = local_hash;
325 :
326 : Assert(entry == NULL || entry->hash == local_hash);
327 :
328 6455658 : MemoryContextSwitchTo(oldContext);
329 :
330 6455658 : return entry;
331 : }
332 :
333 : /*
334 : * Compute the hash value for a tuple
335 : */
336 : uint32
337 0 : TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
338 : {
339 : MemoryContext oldContext;
340 : uint32 hash;
341 :
342 0 : hashtable->inputslot = slot;
343 0 : hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
344 :
345 : /* Need to run the hash functions in short-lived context */
346 0 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
347 :
348 0 : hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
349 :
350 0 : MemoryContextSwitchTo(oldContext);
351 :
352 0 : return hash;
353 : }
354 :
355 : /*
356 : * A variant of LookupTupleHashEntry for callers that have already computed
357 : * the hash value.
358 : */
359 : TupleHashEntry
360 655824 : LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
361 : bool *isnew, uint32 hash)
362 : {
363 : TupleHashEntry entry;
364 : MemoryContext oldContext;
365 :
366 : /* Need to run the hash functions in short-lived context */
367 655824 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
368 :
369 : /* set up data needed by hash and match functions */
370 655824 : hashtable->inputslot = slot;
371 655824 : hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
372 655824 : hashtable->cur_eq_func = hashtable->tab_eq_func;
373 :
374 655824 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
375 : Assert(entry == NULL || entry->hash == hash);
376 :
377 655824 : MemoryContextSwitchTo(oldContext);
378 :
379 655824 : return entry;
380 : }
381 :
382 : /*
383 : * Search for a hashtable entry matching the given tuple. No entry is
384 : * created if there's not a match. This is similar to the non-creating
385 : * case of LookupTupleHashEntry, except that it supports cross-type
386 : * comparisons, in which the given tuple is not of the same type as the
387 : * table entries. The caller must provide the hash functions to use for
388 : * the input tuple, as well as the equality functions, since these may be
389 : * different from the table's internal functions.
390 : */
391 : TupleHashEntry
392 824424 : FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
393 : ExprState *eqcomp,
394 : FmgrInfo *hashfunctions)
395 : {
396 : TupleHashEntry entry;
397 : MemoryContext oldContext;
398 : MinimalTuple key;
399 :
400 : /* Need to run the hash functions in short-lived context */
401 824424 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
402 :
403 : /* Set up data needed by hash and match functions */
404 824424 : hashtable->inputslot = slot;
405 824424 : hashtable->in_hash_funcs = hashfunctions;
406 824424 : hashtable->cur_eq_func = eqcomp;
407 :
408 : /* Search the hash table */
409 824424 : key = NULL; /* flag to reference inputslot */
410 824424 : entry = tuplehash_lookup(hashtable->hashtab, key);
411 824424 : MemoryContextSwitchTo(oldContext);
412 :
413 824424 : return entry;
414 : }
415 :
416 : /*
417 : * If tuple is NULL, use the input slot instead. This convention avoids the
418 : * need to materialize virtual input tuples unless they actually need to get
419 : * copied into the table.
420 : *
421 : * Also, the caller must select an appropriate memory context for running
422 : * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
423 : */
424 : static uint32
425 7280088 : TupleHashTableHash_internal(struct tuplehash_hash *tb,
426 : const MinimalTuple tuple)
427 : {
428 7280088 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
429 7280088 : int numCols = hashtable->numCols;
430 7280088 : AttrNumber *keyColIdx = hashtable->keyColIdx;
431 7280088 : uint32 hashkey = hashtable->hash_iv;
432 : TupleTableSlot *slot;
433 : FmgrInfo *hashfunctions;
434 : int i;
435 :
436 7280088 : if (tuple == NULL)
437 : {
438 : /* Process the current input tuple for the table */
439 7280088 : slot = hashtable->inputslot;
440 7280088 : hashfunctions = hashtable->in_hash_funcs;
441 : }
442 : else
443 : {
444 : /*
445 : * Process a tuple already stored in the table.
446 : *
447 : * (this case never actually occurs due to the way simplehash.h is
448 : * used, as the hash-value is stored in the entries)
449 : */
450 0 : slot = hashtable->tableslot;
451 0 : ExecStoreMinimalTuple(tuple, slot, false);
452 0 : hashfunctions = hashtable->tab_hash_funcs;
453 : }
454 :
455 18876438 : for (i = 0; i < numCols; i++)
456 : {
457 11596356 : AttrNumber att = keyColIdx[i];
458 : Datum attr;
459 : bool isNull;
460 :
461 : /* combine successive hashkeys by rotating */
462 11596356 : hashkey = pg_rotate_left32(hashkey, 1);
463 :
464 11596356 : attr = slot_getattr(slot, att, &isNull);
465 :
466 11596356 : if (!isNull) /* treat nulls as having hash key 0 */
467 : {
468 : uint32 hkey;
469 :
470 11378972 : hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
471 11378978 : hashtable->tab_collations[i],
472 : attr));
473 11378972 : hashkey ^= hkey;
474 : }
475 : }
476 :
477 : /*
478 : * The way hashes are combined above, among each other and with the IV,
479 : * doesn't lead to good bit perturbation. As the IV's goal is to lead to
480 : * achieve that, perform a round of hashing of the combined hash -
481 : * resulting in near perfect perturbation.
482 : */
483 7280082 : return murmurhash32(hashkey);
484 : }
485 :
486 : /*
487 : * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
488 : * so that we can avoid switching the memory context multiple times for
489 : * LookupTupleHashEntry.
490 : *
491 : * NB: This function may or may not change the memory context. Caller is
492 : * expected to change it back.
493 : */
494 : static inline TupleHashEntry
495 7111482 : LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
496 : bool *isnew, uint32 hash)
497 : {
498 : TupleHashEntryData *entry;
499 : bool found;
500 : MinimalTuple key;
501 :
502 7111482 : key = NULL; /* flag to reference inputslot */
503 :
504 7111482 : if (isnew)
505 : {
506 5993364 : entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
507 :
508 5993364 : if (found)
509 : {
510 : /* found pre-existing entry */
511 5084738 : *isnew = false;
512 : }
513 : else
514 : {
515 : /* created new entry */
516 908626 : *isnew = true;
517 : /* zero caller data */
518 908626 : entry->additional = NULL;
519 908626 : MemoryContextSwitchTo(hashtable->tablecxt);
520 : /* Copy the first tuple into the table context */
521 908626 : entry->firstTuple = ExecCopySlotMinimalTuple(slot);
522 : }
523 : }
524 : else
525 : {
526 1118118 : entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
527 : }
528 :
529 7111482 : return entry;
530 : }
531 :
532 : /*
533 : * See whether two tuples (presumably of the same hash value) match
534 : */
535 : static int
536 5592672 : TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
537 : {
538 : TupleTableSlot *slot1;
539 : TupleTableSlot *slot2;
540 5592672 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
541 5592672 : ExprContext *econtext = hashtable->exprcontext;
542 :
543 : /*
544 : * We assume that simplehash.h will only ever call us with the first
545 : * argument being an actual table entry, and the second argument being
546 : * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
547 : * could be supported too, but is not currently required.
548 : */
549 : Assert(tuple1 != NULL);
550 5592672 : slot1 = hashtable->tableslot;
551 5592672 : ExecStoreMinimalTuple(tuple1, slot1, false);
552 : Assert(tuple2 == NULL);
553 5592672 : slot2 = hashtable->inputslot;
554 :
555 : /* For crosstype comparisons, the inputslot must be first */
556 5592672 : econtext->ecxt_innertuple = slot2;
557 5592672 : econtext->ecxt_outertuple = slot1;
558 5592672 : return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
559 : }
|