Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * execGrouping.c
4 : * executor utility routines for grouping, hashing, and aggregation
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/execGrouping.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/parallel.h"
18 : #include "common/hashfn.h"
19 : #include "executor/executor.h"
20 : #include "miscadmin.h"
21 : #include "utils/lsyscache.h"
22 : #include "utils/memutils.h"
23 :
24 : static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
25 : static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
26 : const MinimalTuple tuple);
27 : static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
28 : TupleTableSlot *slot,
29 : bool *isnew, uint32 hash);
30 :
31 : /*
32 : * Define parameters for tuple hash table code generation. The interface is
33 : * *also* declared in execnodes.h (to generate the types, which are externally
34 : * visible).
35 : */
36 : #define SH_PREFIX tuplehash
37 : #define SH_ELEMENT_TYPE TupleHashEntryData
38 : #define SH_KEY_TYPE MinimalTuple
39 : #define SH_KEY firstTuple
40 : #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
41 : #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
42 : #define SH_SCOPE extern
43 : #define SH_STORE_HASH
44 : #define SH_GET_HASH(tb, a) a->hash
45 : #define SH_DEFINE
46 : #include "lib/simplehash.h"
47 :
48 :
49 : /*****************************************************************************
50 : * Utility routines for grouping tuples together
51 : *****************************************************************************/
52 :
53 : /*
54 : * execTuplesMatchPrepare
55 : * Build expression that can be evaluated using ExecQual(), returning
56 : * whether an ExprContext's inner/outer tuples are NOT DISTINCT
57 : */
58 : ExprState *
59 7310 : execTuplesMatchPrepare(TupleDesc desc,
60 : int numCols,
61 : const AttrNumber *keyColIdx,
62 : const Oid *eqOperators,
63 : const Oid *collations,
64 : PlanState *parent)
65 : {
66 7310 : Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
67 : int i;
68 : ExprState *expr;
69 :
70 7310 : if (numCols == 0)
71 72 : return NULL;
72 :
73 : /* lookup equality functions */
74 16060 : for (i = 0; i < numCols; i++)
75 8822 : eqFunctions[i] = get_opcode(eqOperators[i]);
76 :
77 : /* build actual expression */
78 7238 : expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
79 : numCols, keyColIdx, eqFunctions, collations,
80 : parent);
81 :
82 7238 : return expr;
83 : }
84 :
85 : /*
86 : * execTuplesHashPrepare
87 : * Look up the equality and hashing functions needed for a TupleHashTable.
88 : *
89 : * This is similar to execTuplesMatchPrepare, but we also need to find the
90 : * hash functions associated with the equality operators. *eqFunctions and
91 : * *hashFunctions receive the palloc'd result arrays.
92 : *
93 : * Note: we expect that the given operators are not cross-type comparisons.
94 : */
95 : void
96 8652 : execTuplesHashPrepare(int numCols,
97 : const Oid *eqOperators,
98 : Oid **eqFuncOids,
99 : FmgrInfo **hashFunctions)
100 : {
101 : int i;
102 :
103 8652 : *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
104 8652 : *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
105 :
106 26570 : for (i = 0; i < numCols; i++)
107 : {
108 17918 : Oid eq_opr = eqOperators[i];
109 : Oid eq_function;
110 : Oid left_hash_function;
111 : Oid right_hash_function;
112 :
113 17918 : eq_function = get_opcode(eq_opr);
114 17918 : if (!get_op_hash_functions(eq_opr,
115 : &left_hash_function, &right_hash_function))
116 0 : elog(ERROR, "could not find hash function for hash operator %u",
117 : eq_opr);
118 : /* We're not supporting cross-type cases here */
119 : Assert(left_hash_function == right_hash_function);
120 17918 : (*eqFuncOids)[i] = eq_function;
121 17918 : fmgr_info(right_hash_function, &(*hashFunctions)[i]);
122 : }
123 8652 : }
124 :
125 :
126 : /*****************************************************************************
127 : * Utility routines for all-in-memory hash tables
128 : *
129 : * These routines build hash tables for grouping tuples together (eg, for
130 : * hash aggregation). There is one entry for each not-distinct set of tuples
131 : * presented.
132 : *****************************************************************************/
133 :
134 : /*
135 : * Construct an empty TupleHashTable
136 : *
137 : * numCols, keyColIdx: identify the tuple fields to use as lookup key
138 : * eqfunctions: equality comparison functions to use
139 : * hashfunctions: datatype-specific hashing functions to use
140 : * nbuckets: initial estimate of hashtable size
141 : * additionalsize: size of data stored in ->additional
142 : * metacxt: memory context for long-lived allocation, but not per-entry data
143 : * tablecxt: memory context in which to store table entries
144 : * tempcxt: short-lived context for evaluation hash and comparison functions
145 : *
146 : * The function arrays may be made with execTuplesHashPrepare(). Note they
147 : * are not cross-type functions, but expect to see the table datatype(s)
148 : * on both sides.
149 : *
150 : * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
151 : * storage that will live as long as the hashtable does.
152 : */
153 : TupleHashTable
154 8338 : BuildTupleHashTableExt(PlanState *parent,
155 : TupleDesc inputDesc,
156 : int numCols, AttrNumber *keyColIdx,
157 : const Oid *eqfuncoids,
158 : FmgrInfo *hashfunctions,
159 : Oid *collations,
160 : long nbuckets, Size additionalsize,
161 : MemoryContext metacxt,
162 : MemoryContext tablecxt,
163 : MemoryContext tempcxt,
164 : bool use_variable_hash_iv)
165 : {
166 : TupleHashTable hashtable;
167 8338 : Size entrysize = sizeof(TupleHashEntryData) + additionalsize;
168 : Size hash_mem_limit;
169 : MemoryContext oldcontext;
170 : bool allow_jit;
171 :
172 : Assert(nbuckets > 0);
173 :
174 : /* Limit initial table size request to not more than hash_mem */
175 8338 : hash_mem_limit = get_hash_memory_limit() / entrysize;
176 8338 : if (nbuckets > hash_mem_limit)
177 24 : nbuckets = hash_mem_limit;
178 :
179 8338 : oldcontext = MemoryContextSwitchTo(metacxt);
180 :
181 8338 : hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
182 :
183 8338 : hashtable->numCols = numCols;
184 8338 : hashtable->keyColIdx = keyColIdx;
185 8338 : hashtable->tab_hash_funcs = hashfunctions;
186 8338 : hashtable->tab_collations = collations;
187 8338 : hashtable->tablecxt = tablecxt;
188 8338 : hashtable->tempcxt = tempcxt;
189 8338 : hashtable->entrysize = entrysize;
190 8338 : hashtable->tableslot = NULL; /* will be made on first lookup */
191 8338 : hashtable->inputslot = NULL;
192 8338 : hashtable->in_hash_funcs = NULL;
193 8338 : hashtable->cur_eq_func = NULL;
194 :
195 : /*
196 : * If parallelism is in use, even if the leader backend is performing the
197 : * scan itself, we don't want to create the hashtable exactly the same way
198 : * in all workers. As hashtables are iterated over in keyspace-order,
199 : * doing so in all processes in the same way is likely to lead to
200 : * "unbalanced" hashtables when the table size initially is
201 : * underestimated.
202 : */
203 8338 : if (use_variable_hash_iv)
204 678 : hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
205 : else
206 7660 : hashtable->hash_iv = 0;
207 :
208 8338 : hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
209 :
210 : /*
211 : * We copy the input tuple descriptor just for safety --- we assume all
212 : * input tuples will have equivalent descriptors.
213 : */
214 8338 : hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
215 : &TTSOpsMinimalTuple);
216 :
217 : /*
218 : * If the old reset interface is used (i.e. BuildTupleHashTable, rather
219 : * than BuildTupleHashTableExt), allowing JIT would lead to the generated
220 : * functions to a) live longer than the query b) be re-generated each time
221 : * the table is being reset. Therefore prevent JIT from being used in that
222 : * case, by not providing a parent node (which prevents accessing the
223 : * JitContext in the EState).
224 : */
225 8338 : allow_jit = metacxt != tablecxt;
226 :
227 : /* build comparator for all columns */
228 : /* XXX: should we support non-minimal tuples for the inputslot? */
229 8338 : hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
230 : &TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
231 : numCols,
232 : keyColIdx, eqfuncoids, collations,
233 : allow_jit ? parent : NULL);
234 :
235 : /*
236 : * While not pretty, it's ok to not shut down this context, but instead
237 : * rely on the containing memory context being reset, as
238 : * ExecBuildGroupingEqual() only builds a very simple expression calling
239 : * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
240 : */
241 8338 : hashtable->exprcontext = CreateStandaloneExprContext();
242 :
243 8338 : MemoryContextSwitchTo(oldcontext);
244 :
245 8338 : return hashtable;
246 : }
247 :
248 : /*
249 : * BuildTupleHashTable is a backwards-compatibility wrapper for
250 : * BuildTupleHashTableExt(), that allocates the hashtable's metadata in
251 : * tablecxt. Note that hashtables created this way cannot be reset leak-free
252 : * with ResetTupleHashTable().
253 : */
254 : TupleHashTable
255 0 : BuildTupleHashTable(PlanState *parent,
256 : TupleDesc inputDesc,
257 : int numCols, AttrNumber *keyColIdx,
258 : const Oid *eqfuncoids,
259 : FmgrInfo *hashfunctions,
260 : Oid *collations,
261 : long nbuckets, Size additionalsize,
262 : MemoryContext tablecxt,
263 : MemoryContext tempcxt,
264 : bool use_variable_hash_iv)
265 : {
266 0 : return BuildTupleHashTableExt(parent,
267 : inputDesc,
268 : numCols, keyColIdx,
269 : eqfuncoids,
270 : hashfunctions,
271 : collations,
272 : nbuckets, additionalsize,
273 : tablecxt,
274 : tablecxt,
275 : tempcxt,
276 : use_variable_hash_iv);
277 : }
278 :
279 : /*
280 : * Reset contents of the hashtable to be empty, preserving all the non-content
281 : * state. Note that the tablecxt passed to BuildTupleHashTableExt() should
282 : * also be reset, otherwise there will be leaks.
283 : */
284 : void
285 190002 : ResetTupleHashTable(TupleHashTable hashtable)
286 : {
287 190002 : tuplehash_reset(hashtable->hashtab);
288 190002 : }
289 :
290 : /*
291 : * Find or create a hashtable entry for the tuple group containing the
292 : * given tuple. The tuple must be the same type as the hashtable entries.
293 : *
294 : * If isnew is NULL, we do not create new entries; we return NULL if no
295 : * match is found.
296 : *
297 : * If hash is not NULL, we set it to the calculated hash value. This allows
298 : * callers access to the hash value even if no entry is returned.
299 : *
300 : * If isnew isn't NULL, then a new entry is created if no existing entry
301 : * matches. On return, *isnew is true if the entry is newly created,
302 : * false if it existed already. ->additional_data in the new entry has
303 : * been zeroed.
304 : */
305 : TupleHashEntry
306 6393172 : LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
307 : bool *isnew, uint32 *hash)
308 : {
309 : TupleHashEntry entry;
310 : MemoryContext oldContext;
311 : uint32 local_hash;
312 :
313 : /* Need to run the hash functions in short-lived context */
314 6393172 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
315 :
316 : /* set up data needed by hash and match functions */
317 6393172 : hashtable->inputslot = slot;
318 6393172 : hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
319 6393172 : hashtable->cur_eq_func = hashtable->tab_eq_func;
320 :
321 6393172 : local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
322 6393166 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
323 :
324 6393166 : if (hash != NULL)
325 5448746 : *hash = local_hash;
326 :
327 : Assert(entry == NULL || entry->hash == local_hash);
328 :
329 6393166 : MemoryContextSwitchTo(oldContext);
330 :
331 6393166 : return entry;
332 : }
333 :
334 : /*
335 : * Compute the hash value for a tuple
336 : */
337 : uint32
338 0 : TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
339 : {
340 : MemoryContext oldContext;
341 : uint32 hash;
342 :
343 0 : hashtable->inputslot = slot;
344 0 : hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
345 :
346 : /* Need to run the hash functions in short-lived context */
347 0 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
348 :
349 0 : hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
350 :
351 0 : MemoryContextSwitchTo(oldContext);
352 :
353 0 : return hash;
354 : }
355 :
356 : /*
357 : * A variant of LookupTupleHashEntry for callers that have already computed
358 : * the hash value.
359 : */
360 : TupleHashEntry
361 655824 : LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
362 : bool *isnew, uint32 hash)
363 : {
364 : TupleHashEntry entry;
365 : MemoryContext oldContext;
366 :
367 : /* Need to run the hash functions in short-lived context */
368 655824 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
369 :
370 : /* set up data needed by hash and match functions */
371 655824 : hashtable->inputslot = slot;
372 655824 : hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
373 655824 : hashtable->cur_eq_func = hashtable->tab_eq_func;
374 :
375 655824 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
376 : Assert(entry == NULL || entry->hash == hash);
377 :
378 655824 : MemoryContextSwitchTo(oldContext);
379 :
380 655824 : return entry;
381 : }
382 :
383 : /*
384 : * Search for a hashtable entry matching the given tuple. No entry is
385 : * created if there's not a match. This is similar to the non-creating
386 : * case of LookupTupleHashEntry, except that it supports cross-type
387 : * comparisons, in which the given tuple is not of the same type as the
388 : * table entries. The caller must provide the hash functions to use for
389 : * the input tuple, as well as the equality functions, since these may be
390 : * different from the table's internal functions.
391 : */
392 : TupleHashEntry
393 808834 : FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
394 : ExprState *eqcomp,
395 : FmgrInfo *hashfunctions)
396 : {
397 : TupleHashEntry entry;
398 : MemoryContext oldContext;
399 : MinimalTuple key;
400 :
401 : /* Need to run the hash functions in short-lived context */
402 808834 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
403 :
404 : /* Set up data needed by hash and match functions */
405 808834 : hashtable->inputslot = slot;
406 808834 : hashtable->in_hash_funcs = hashfunctions;
407 808834 : hashtable->cur_eq_func = eqcomp;
408 :
409 : /* Search the hash table */
410 808834 : key = NULL; /* flag to reference inputslot */
411 808834 : entry = tuplehash_lookup(hashtable->hashtab, key);
412 808834 : MemoryContextSwitchTo(oldContext);
413 :
414 808834 : return entry;
415 : }
416 :
417 : /*
418 : * If tuple is NULL, use the input slot instead. This convention avoids the
419 : * need to materialize virtual input tuples unless they actually need to get
420 : * copied into the table.
421 : *
422 : * Also, the caller must select an appropriate memory context for running
423 : * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
424 : */
425 : static uint32
426 7202006 : TupleHashTableHash_internal(struct tuplehash_hash *tb,
427 : const MinimalTuple tuple)
428 : {
429 7202006 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
430 7202006 : int numCols = hashtable->numCols;
431 7202006 : AttrNumber *keyColIdx = hashtable->keyColIdx;
432 7202006 : uint32 hashkey = hashtable->hash_iv;
433 : TupleTableSlot *slot;
434 : FmgrInfo *hashfunctions;
435 : int i;
436 :
437 7202006 : if (tuple == NULL)
438 : {
439 : /* Process the current input tuple for the table */
440 7202006 : slot = hashtable->inputslot;
441 7202006 : hashfunctions = hashtable->in_hash_funcs;
442 : }
443 : else
444 : {
445 : /*
446 : * Process a tuple already stored in the table.
447 : *
448 : * (this case never actually occurs due to the way simplehash.h is
449 : * used, as the hash-value is stored in the entries)
450 : */
451 0 : slot = hashtable->tableslot;
452 0 : ExecStoreMinimalTuple(tuple, slot, false);
453 0 : hashfunctions = hashtable->tab_hash_funcs;
454 : }
455 :
456 18729730 : for (i = 0; i < numCols; i++)
457 : {
458 11527730 : AttrNumber att = keyColIdx[i];
459 : Datum attr;
460 : bool isNull;
461 :
462 : /* combine successive hashkeys by rotating */
463 11527730 : hashkey = pg_rotate_left32(hashkey, 1);
464 :
465 11527730 : attr = slot_getattr(slot, att, &isNull);
466 :
467 11527730 : if (!isNull) /* treat nulls as having hash key 0 */
468 : {
469 : uint32 hkey;
470 :
471 11304192 : hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
472 11304198 : hashtable->tab_collations[i],
473 : attr));
474 11304192 : hashkey ^= hkey;
475 : }
476 : }
477 :
478 : /*
479 : * The way hashes are combined above, among each other and with the IV,
480 : * doesn't lead to good bit perturbation. As the IV's goal is to lead to
481 : * achieve that, perform a round of hashing of the combined hash -
482 : * resulting in near perfect perturbation.
483 : */
484 7202000 : return murmurhash32(hashkey);
485 : }
486 :
487 : /*
488 : * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
489 : * so that we can avoid switching the memory context multiple times for
490 : * LookupTupleHashEntry.
491 : *
492 : * NB: This function may or may not change the memory context. Caller is
493 : * expected to change it back.
494 : */
495 : static inline TupleHashEntry
496 7048990 : LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
497 : bool *isnew, uint32 hash)
498 : {
499 : TupleHashEntryData *entry;
500 : bool found;
501 : MinimalTuple key;
502 :
503 7048990 : key = NULL; /* flag to reference inputslot */
504 :
505 7048990 : if (isnew)
506 : {
507 5931474 : entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
508 :
509 5931474 : if (found)
510 : {
511 : /* found pre-existing entry */
512 5039148 : *isnew = false;
513 : }
514 : else
515 : {
516 : /* created new entry */
517 892326 : *isnew = true;
518 : /* zero caller data */
519 892326 : entry->additional = NULL;
520 892326 : MemoryContextSwitchTo(hashtable->tablecxt);
521 : /* Copy the first tuple into the table context */
522 892326 : entry->firstTuple = ExecCopySlotMinimalTuple(slot);
523 : }
524 : }
525 : else
526 : {
527 1117516 : entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
528 : }
529 :
530 7048990 : return entry;
531 : }
532 :
533 : /*
534 : * See whether two tuples (presumably of the same hash value) match
535 : */
536 : static int
537 5539426 : TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
538 : {
539 : TupleTableSlot *slot1;
540 : TupleTableSlot *slot2;
541 5539426 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
542 5539426 : ExprContext *econtext = hashtable->exprcontext;
543 :
544 : /*
545 : * We assume that simplehash.h will only ever call us with the first
546 : * argument being an actual table entry, and the second argument being
547 : * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
548 : * could be supported too, but is not currently required.
549 : */
550 : Assert(tuple1 != NULL);
551 5539426 : slot1 = hashtable->tableslot;
552 5539426 : ExecStoreMinimalTuple(tuple1, slot1, false);
553 : Assert(tuple2 == NULL);
554 5539426 : slot2 = hashtable->inputslot;
555 :
556 : /* For crosstype comparisons, the inputslot must be first */
557 5539426 : econtext->ecxt_innertuple = slot2;
558 5539426 : econtext->ecxt_outertuple = slot1;
559 5539426 : return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
560 : }
|