Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * hashfunc.c
4 : * Support functions for hash access method.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/hash/hashfunc.c
12 : *
13 : * NOTES
14 : * These functions are stored in pg_amproc. For each operator class
15 : * defined for hash indexes, they compute the hash value of the argument.
16 : *
17 : * Additional hash functions appear in /utils/adt/ files for various
18 : * specialized datatypes.
19 : *
20 : * It is expected that every bit of a hash function's 32-bit result is
21 : * as random as every other; failure to ensure this is likely to lead
22 : * to poor performance of hash joins, for example. In most cases a hash
23 : * function should use hash_any() or its variant hash_uint32().
24 : *-------------------------------------------------------------------------
25 : */
26 :
27 : #include "postgres.h"
28 :
29 : #include "common/hashfn.h"
30 : #include "utils/builtins.h"
31 : #include "utils/float.h"
32 : #include "utils/fmgrprotos.h"
33 : #include "utils/pg_locale.h"
34 : #include "varatt.h"
35 :
36 : /*
37 : * Datatype-specific hash functions.
38 : *
39 : * These support both hash indexes and hash joins.
40 : *
41 : * NOTE: some of these are also used by catcache operations, without
42 : * any direct connection to hash indexes. Also, the common hash_any
43 : * routine is also used by dynahash tables.
44 : */
45 :
46 : /* Note: this is used for both "char" and boolean datatypes */
47 : Datum
48 149052 : hashchar(PG_FUNCTION_ARGS)
49 : {
50 149052 : return hash_uint32((int32) PG_GETARG_CHAR(0));
51 : }
52 :
53 : Datum
54 66 : hashcharextended(PG_FUNCTION_ARGS)
55 : {
56 66 : return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
57 : }
58 :
59 : Datum
60 462732 : hashint2(PG_FUNCTION_ARGS)
61 : {
62 462732 : return hash_uint32((int32) PG_GETARG_INT16(0));
63 : }
64 :
65 : Datum
66 48 : hashint2extended(PG_FUNCTION_ARGS)
67 : {
68 48 : return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
69 : }
70 :
71 : Datum
72 28745510 : hashint4(PG_FUNCTION_ARGS)
73 : {
74 28745510 : return hash_uint32(PG_GETARG_INT32(0));
75 : }
76 :
77 : Datum
78 205208 : hashint4extended(PG_FUNCTION_ARGS)
79 : {
80 205208 : return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
81 : }
82 :
83 : Datum
84 633182 : hashint8(PG_FUNCTION_ARGS)
85 : {
86 : /*
87 : * The idea here is to produce a hash value compatible with the values
88 : * produced by hashint4 and hashint2 for logically equal inputs; this is
89 : * necessary to support cross-type hash joins across these input types.
90 : * Since all three types are signed, we can xor the high half of the int8
91 : * value if the sign is positive, or the complement of the high half when
92 : * the sign is negative.
93 : */
94 633182 : int64 val = PG_GETARG_INT64(0);
95 633182 : uint32 lohalf = (uint32) val;
96 633182 : uint32 hihalf = (uint32) (val >> 32);
97 :
98 633182 : lohalf ^= (val >= 0) ? hihalf : ~hihalf;
99 :
100 633182 : return hash_uint32(lohalf);
101 : }
102 :
103 : Datum
104 444 : hashint8extended(PG_FUNCTION_ARGS)
105 : {
106 : /* Same approach as hashint8 */
107 444 : int64 val = PG_GETARG_INT64(0);
108 444 : uint32 lohalf = (uint32) val;
109 444 : uint32 hihalf = (uint32) (val >> 32);
110 :
111 444 : lohalf ^= (val >= 0) ? hihalf : ~hihalf;
112 :
113 444 : return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
114 : }
115 :
116 : Datum
117 17847022 : hashoid(PG_FUNCTION_ARGS)
118 : {
119 17847022 : return hash_uint32((uint32) PG_GETARG_OID(0));
120 : }
121 :
122 : Datum
123 72 : hashoidextended(PG_FUNCTION_ARGS)
124 : {
125 72 : return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
126 : }
127 :
128 : Datum
129 3142 : hashenum(PG_FUNCTION_ARGS)
130 : {
131 3142 : return hash_uint32((uint32) PG_GETARG_OID(0));
132 : }
133 :
134 : Datum
135 4036 : hashenumextended(PG_FUNCTION_ARGS)
136 : {
137 4036 : return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
138 : }
139 :
140 : Datum
141 42318 : hashfloat4(PG_FUNCTION_ARGS)
142 : {
143 42318 : float4 key = PG_GETARG_FLOAT4(0);
144 : float8 key8;
145 :
146 : /*
147 : * On IEEE-float machines, minus zero and zero have different bit patterns
148 : * but should compare as equal. We must ensure that they have the same
149 : * hash value, which is most reliably done this way:
150 : */
151 42318 : if (key == (float4) 0)
152 24 : PG_RETURN_UINT32(0);
153 :
154 : /*
155 : * To support cross-type hashing of float8 and float4, we want to return
156 : * the same hash value hashfloat8 would produce for an equal float8 value.
157 : * So, widen the value to float8 and hash that. (We must do this rather
158 : * than have hashfloat8 try to narrow its value to float4; that could fail
159 : * on overflow.)
160 : */
161 42294 : key8 = key;
162 :
163 : /*
164 : * Similarly, NaNs can have different bit patterns but they should all
165 : * compare as equal. For backwards-compatibility reasons we force them to
166 : * have the hash value of a standard float8 NaN. (You'd think we could
167 : * replace key with a float4 NaN and then widen it; but on some old
168 : * platforms, that way produces a different bit pattern.)
169 : */
170 42294 : if (isnan(key8))
171 18 : key8 = get_float8_nan();
172 :
173 42294 : return hash_any((unsigned char *) &key8, sizeof(key8));
174 : }
175 :
176 : Datum
177 72 : hashfloat4extended(PG_FUNCTION_ARGS)
178 : {
179 72 : float4 key = PG_GETARG_FLOAT4(0);
180 72 : uint64 seed = PG_GETARG_INT64(1);
181 : float8 key8;
182 :
183 : /* Same approach as hashfloat4 */
184 72 : if (key == (float4) 0)
185 12 : PG_RETURN_UINT64(seed);
186 60 : key8 = key;
187 60 : if (isnan(key8))
188 0 : key8 = get_float8_nan();
189 :
190 60 : return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
191 : }
192 :
193 : Datum
194 137580 : hashfloat8(PG_FUNCTION_ARGS)
195 : {
196 137580 : float8 key = PG_GETARG_FLOAT8(0);
197 :
198 : /*
199 : * On IEEE-float machines, minus zero and zero have different bit patterns
200 : * but should compare as equal. We must ensure that they have the same
201 : * hash value, which is most reliably done this way:
202 : */
203 137580 : if (key == (float8) 0)
204 690 : PG_RETURN_UINT32(0);
205 :
206 : /*
207 : * Similarly, NaNs can have different bit patterns but they should all
208 : * compare as equal. For backwards-compatibility reasons we force them to
209 : * have the hash value of a standard NaN.
210 : */
211 136890 : if (isnan(key))
212 18 : key = get_float8_nan();
213 :
214 136890 : return hash_any((unsigned char *) &key, sizeof(key));
215 : }
216 :
217 : Datum
218 72 : hashfloat8extended(PG_FUNCTION_ARGS)
219 : {
220 72 : float8 key = PG_GETARG_FLOAT8(0);
221 72 : uint64 seed = PG_GETARG_INT64(1);
222 :
223 : /* Same approach as hashfloat8 */
224 72 : if (key == (float8) 0)
225 12 : PG_RETURN_UINT64(seed);
226 60 : if (isnan(key))
227 0 : key = get_float8_nan();
228 :
229 60 : return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
230 : }
231 :
232 : Datum
233 465322 : hashoidvector(PG_FUNCTION_ARGS)
234 : {
235 465322 : oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
236 :
237 465322 : check_valid_oidvector(key);
238 465322 : return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
239 : }
240 :
241 : Datum
242 60 : hashoidvectorextended(PG_FUNCTION_ARGS)
243 : {
244 60 : oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
245 :
246 60 : check_valid_oidvector(key);
247 120 : return hash_any_extended((unsigned char *) key->values,
248 60 : key->dim1 * sizeof(Oid),
249 60 : PG_GETARG_INT64(1));
250 : }
251 :
252 : Datum
253 565016 : hashname(PG_FUNCTION_ARGS)
254 : {
255 565016 : char *key = NameStr(*PG_GETARG_NAME(0));
256 :
257 565016 : return hash_any((unsigned char *) key, strlen(key));
258 : }
259 :
260 : Datum
261 60 : hashnameextended(PG_FUNCTION_ARGS)
262 : {
263 60 : char *key = NameStr(*PG_GETARG_NAME(0));
264 :
265 60 : return hash_any_extended((unsigned char *) key, strlen(key),
266 60 : PG_GETARG_INT64(1));
267 : }
268 :
269 : Datum
270 1533308 : hashtext(PG_FUNCTION_ARGS)
271 : {
272 1533308 : text *key = PG_GETARG_TEXT_PP(0);
273 1533308 : Oid collid = PG_GET_COLLATION();
274 : pg_locale_t mylocale;
275 : Datum result;
276 :
277 1533308 : if (!collid)
278 6 : ereport(ERROR,
279 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
280 : errmsg("could not determine which collation to use for string hashing"),
281 : errhint("Use the COLLATE clause to set the collation explicitly.")));
282 :
283 1533302 : mylocale = pg_newlocale_from_collation(collid);
284 :
285 1533302 : if (mylocale->deterministic)
286 : {
287 1530632 : result = hash_any((unsigned char *) VARDATA_ANY(key),
288 1530632 : VARSIZE_ANY_EXHDR(key));
289 : }
290 : else
291 : {
292 : Size bsize,
293 : rsize;
294 : char *buf;
295 2670 : const char *keydata = VARDATA_ANY(key);
296 2670 : size_t keylen = VARSIZE_ANY_EXHDR(key);
297 :
298 :
299 2670 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
300 2670 : buf = palloc(bsize + 1);
301 :
302 2670 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
303 :
304 : /* the second call may return a smaller value than the first */
305 2670 : if (rsize > bsize)
306 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
307 :
308 : /*
309 : * In principle, there's no reason to include the terminating NUL
310 : * character in the hash, but it was done before and the behavior must
311 : * be preserved.
312 : */
313 2670 : result = hash_any((uint8_t *) buf, bsize + 1);
314 :
315 2670 : pfree(buf);
316 : }
317 :
318 : /* Avoid leaking memory for toasted inputs */
319 1533302 : PG_FREE_IF_COPY(key, 0);
320 :
321 1533302 : return result;
322 : }
323 :
324 : Datum
325 4068 : hashtextextended(PG_FUNCTION_ARGS)
326 : {
327 4068 : text *key = PG_GETARG_TEXT_PP(0);
328 4068 : Oid collid = PG_GET_COLLATION();
329 : pg_locale_t mylocale;
330 : Datum result;
331 :
332 4068 : if (!collid)
333 0 : ereport(ERROR,
334 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
335 : errmsg("could not determine which collation to use for string hashing"),
336 : errhint("Use the COLLATE clause to set the collation explicitly.")));
337 :
338 4068 : mylocale = pg_newlocale_from_collation(collid);
339 :
340 4068 : if (mylocale->deterministic)
341 : {
342 4044 : result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
343 4044 : VARSIZE_ANY_EXHDR(key),
344 4044 : PG_GETARG_INT64(1));
345 : }
346 : else
347 : {
348 : Size bsize,
349 : rsize;
350 : char *buf;
351 24 : const char *keydata = VARDATA_ANY(key);
352 24 : size_t keylen = VARSIZE_ANY_EXHDR(key);
353 :
354 24 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
355 24 : buf = palloc(bsize + 1);
356 :
357 24 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
358 :
359 : /* the second call may return a smaller value than the first */
360 24 : if (rsize > bsize)
361 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
362 :
363 : /*
364 : * In principle, there's no reason to include the terminating NUL
365 : * character in the hash, but it was done before and the behavior must
366 : * be preserved.
367 : */
368 24 : result = hash_any_extended((uint8_t *) buf, bsize + 1,
369 24 : PG_GETARG_INT64(1));
370 :
371 24 : pfree(buf);
372 : }
373 :
374 4068 : PG_FREE_IF_COPY(key, 0);
375 :
376 4068 : return result;
377 : }
378 :
379 : /*
380 : * hashvarlena() can be used for any varlena datatype in which there are
381 : * no non-significant bits, ie, distinct bitpatterns never compare as equal.
382 : *
383 : * (However, you need to define an SQL-level wrapper function around it with
384 : * the concrete input data type; otherwise hashvalidate() won't accept it.
385 : * Moreover, at least for built-in types, a C-level wrapper function is also
386 : * recommended; otherwise, the opr_sanity test will get upset.)
387 : */
388 : Datum
389 6146 : hashvarlena(PG_FUNCTION_ARGS)
390 : {
391 6146 : varlena *key = PG_GETARG_VARLENA_PP(0);
392 : Datum result;
393 :
394 6146 : result = hash_any((unsigned char *) VARDATA_ANY(key),
395 6146 : VARSIZE_ANY_EXHDR(key));
396 :
397 : /* Avoid leaking memory for toasted inputs */
398 6146 : PG_FREE_IF_COPY(key, 0);
399 :
400 6146 : return result;
401 : }
402 :
403 : Datum
404 0 : hashvarlenaextended(PG_FUNCTION_ARGS)
405 : {
406 0 : varlena *key = PG_GETARG_VARLENA_PP(0);
407 : Datum result;
408 :
409 0 : result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
410 0 : VARSIZE_ANY_EXHDR(key),
411 0 : PG_GETARG_INT64(1));
412 :
413 0 : PG_FREE_IF_COPY(key, 0);
414 :
415 0 : return result;
416 : }
417 :
418 : Datum
419 6146 : hashbytea(PG_FUNCTION_ARGS)
420 : {
421 6146 : return hashvarlena(fcinfo);
422 : }
423 :
424 : Datum
425 0 : hashbyteaextended(PG_FUNCTION_ARGS)
426 : {
427 0 : return hashvarlenaextended(fcinfo);
428 : }
|