Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * hashfunc.c
4 : * Support functions for hash access method.
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/hash/hashfunc.c
12 : *
13 : * NOTES
14 : * These functions are stored in pg_amproc. For each operator class
15 : * defined for hash indexes, they compute the hash value of the argument.
16 : *
17 : * Additional hash functions appear in /utils/adt/ files for various
18 : * specialized datatypes.
19 : *
20 : * It is expected that every bit of a hash function's 32-bit result is
21 : * as random as every other; failure to ensure this is likely to lead
22 : * to poor performance of hash joins, for example. In most cases a hash
23 : * function should use hash_any() or its variant hash_uint32().
24 : *-------------------------------------------------------------------------
25 : */
26 :
27 : #include "postgres.h"
28 :
29 : #include "common/hashfn.h"
30 : #include "utils/float.h"
31 : #include "utils/fmgrprotos.h"
32 : #include "utils/pg_locale.h"
33 : #include "varatt.h"
34 :
35 : /*
36 : * Datatype-specific hash functions.
37 : *
38 : * These support both hash indexes and hash joins.
39 : *
40 : * NOTE: some of these are also used by catcache operations, without
41 : * any direct connection to hash indexes. Also, the common hash_any
42 : * routine is also used by dynahash tables.
43 : */
44 :
45 : /* Note: this is used for both "char" and boolean datatypes */
46 : Datum
47 105494 : hashchar(PG_FUNCTION_ARGS)
48 : {
49 105494 : return hash_uint32((int32) PG_GETARG_CHAR(0));
50 : }
51 :
52 : Datum
53 66 : hashcharextended(PG_FUNCTION_ARGS)
54 : {
55 66 : return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
56 : }
57 :
58 : Datum
59 565102 : hashint2(PG_FUNCTION_ARGS)
60 : {
61 565102 : return hash_uint32((int32) PG_GETARG_INT16(0));
62 : }
63 :
64 : Datum
65 48 : hashint2extended(PG_FUNCTION_ARGS)
66 : {
67 48 : return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
68 : }
69 :
70 : Datum
71 21361172 : hashint4(PG_FUNCTION_ARGS)
72 : {
73 21361172 : return hash_uint32(PG_GETARG_INT32(0));
74 : }
75 :
76 : Datum
77 207208 : hashint4extended(PG_FUNCTION_ARGS)
78 : {
79 207208 : return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
80 : }
81 :
82 : Datum
83 630896 : hashint8(PG_FUNCTION_ARGS)
84 : {
85 : /*
86 : * The idea here is to produce a hash value compatible with the values
87 : * produced by hashint4 and hashint2 for logically equal inputs; this is
88 : * necessary to support cross-type hash joins across these input types.
89 : * Since all three types are signed, we can xor the high half of the int8
90 : * value if the sign is positive, or the complement of the high half when
91 : * the sign is negative.
92 : */
93 630896 : int64 val = PG_GETARG_INT64(0);
94 630896 : uint32 lohalf = (uint32) val;
95 630896 : uint32 hihalf = (uint32) (val >> 32);
96 :
97 630896 : lohalf ^= (val >= 0) ? hihalf : ~hihalf;
98 :
99 630896 : return hash_uint32(lohalf);
100 : }
101 :
102 : Datum
103 372 : hashint8extended(PG_FUNCTION_ARGS)
104 : {
105 : /* Same approach as hashint8 */
106 372 : int64 val = PG_GETARG_INT64(0);
107 372 : uint32 lohalf = (uint32) val;
108 372 : uint32 hihalf = (uint32) (val >> 32);
109 :
110 372 : lohalf ^= (val >= 0) ? hihalf : ~hihalf;
111 :
112 372 : return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
113 : }
114 :
115 : Datum
116 12496218 : hashoid(PG_FUNCTION_ARGS)
117 : {
118 12496218 : return hash_uint32((uint32) PG_GETARG_OID(0));
119 : }
120 :
121 : Datum
122 72 : hashoidextended(PG_FUNCTION_ARGS)
123 : {
124 72 : return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
125 : }
126 :
127 : Datum
128 3142 : hashenum(PG_FUNCTION_ARGS)
129 : {
130 3142 : return hash_uint32((uint32) PG_GETARG_OID(0));
131 : }
132 :
133 : Datum
134 6036 : hashenumextended(PG_FUNCTION_ARGS)
135 : {
136 6036 : return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
137 : }
138 :
139 : Datum
140 48550 : hashfloat4(PG_FUNCTION_ARGS)
141 : {
142 48550 : float4 key = PG_GETARG_FLOAT4(0);
143 : float8 key8;
144 :
145 : /*
146 : * On IEEE-float machines, minus zero and zero have different bit patterns
147 : * but should compare as equal. We must ensure that they have the same
148 : * hash value, which is most reliably done this way:
149 : */
150 48550 : if (key == (float4) 0)
151 264 : PG_RETURN_UINT32(0);
152 :
153 : /*
154 : * To support cross-type hashing of float8 and float4, we want to return
155 : * the same hash value hashfloat8 would produce for an equal float8 value.
156 : * So, widen the value to float8 and hash that. (We must do this rather
157 : * than have hashfloat8 try to narrow its value to float4; that could fail
158 : * on overflow.)
159 : */
160 48286 : key8 = key;
161 :
162 : /*
163 : * Similarly, NaNs can have different bit patterns but they should all
164 : * compare as equal. For backwards-compatibility reasons we force them to
165 : * have the hash value of a standard float8 NaN. (You'd think we could
166 : * replace key with a float4 NaN and then widen it; but on some old
167 : * platforms, that way produces a different bit pattern.)
168 : */
169 48286 : if (isnan(key8))
170 18 : key8 = get_float8_nan();
171 :
172 48286 : return hash_any((unsigned char *) &key8, sizeof(key8));
173 : }
174 :
175 : Datum
176 72 : hashfloat4extended(PG_FUNCTION_ARGS)
177 : {
178 72 : float4 key = PG_GETARG_FLOAT4(0);
179 72 : uint64 seed = PG_GETARG_INT64(1);
180 : float8 key8;
181 :
182 : /* Same approach as hashfloat4 */
183 72 : if (key == (float4) 0)
184 12 : PG_RETURN_UINT64(seed);
185 60 : key8 = key;
186 60 : if (isnan(key8))
187 0 : key8 = get_float8_nan();
188 :
189 60 : return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
190 : }
191 :
192 : Datum
193 136292 : hashfloat8(PG_FUNCTION_ARGS)
194 : {
195 136292 : float8 key = PG_GETARG_FLOAT8(0);
196 :
197 : /*
198 : * On IEEE-float machines, minus zero and zero have different bit patterns
199 : * but should compare as equal. We must ensure that they have the same
200 : * hash value, which is most reliably done this way:
201 : */
202 136292 : if (key == (float8) 0)
203 686 : PG_RETURN_UINT32(0);
204 :
205 : /*
206 : * Similarly, NaNs can have different bit patterns but they should all
207 : * compare as equal. For backwards-compatibility reasons we force them to
208 : * have the hash value of a standard NaN.
209 : */
210 135606 : if (isnan(key))
211 18 : key = get_float8_nan();
212 :
213 135606 : return hash_any((unsigned char *) &key, sizeof(key));
214 : }
215 :
216 : Datum
217 72 : hashfloat8extended(PG_FUNCTION_ARGS)
218 : {
219 72 : float8 key = PG_GETARG_FLOAT8(0);
220 72 : uint64 seed = PG_GETARG_INT64(1);
221 :
222 : /* Same approach as hashfloat8 */
223 72 : if (key == (float8) 0)
224 12 : PG_RETURN_UINT64(seed);
225 60 : if (isnan(key))
226 0 : key = get_float8_nan();
227 :
228 60 : return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
229 : }
230 :
231 : Datum
232 402884 : hashoidvector(PG_FUNCTION_ARGS)
233 : {
234 402884 : oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
235 :
236 402884 : return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
237 : }
238 :
239 : Datum
240 60 : hashoidvectorextended(PG_FUNCTION_ARGS)
241 : {
242 60 : oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
243 :
244 120 : return hash_any_extended((unsigned char *) key->values,
245 60 : key->dim1 * sizeof(Oid),
246 60 : PG_GETARG_INT64(1));
247 : }
248 :
249 : Datum
250 503512 : hashname(PG_FUNCTION_ARGS)
251 : {
252 503512 : char *key = NameStr(*PG_GETARG_NAME(0));
253 :
254 503512 : return hash_any((unsigned char *) key, strlen(key));
255 : }
256 :
257 : Datum
258 60 : hashnameextended(PG_FUNCTION_ARGS)
259 : {
260 60 : char *key = NameStr(*PG_GETARG_NAME(0));
261 :
262 60 : return hash_any_extended((unsigned char *) key, strlen(key),
263 60 : PG_GETARG_INT64(1));
264 : }
265 :
266 : Datum
267 1454772 : hashtext(PG_FUNCTION_ARGS)
268 : {
269 1454772 : text *key = PG_GETARG_TEXT_PP(0);
270 1454772 : Oid collid = PG_GET_COLLATION();
271 : pg_locale_t mylocale;
272 : Datum result;
273 :
274 1454772 : if (!collid)
275 6 : ereport(ERROR,
276 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
277 : errmsg("could not determine which collation to use for string hashing"),
278 : errhint("Use the COLLATE clause to set the collation explicitly.")));
279 :
280 1454766 : mylocale = pg_newlocale_from_collation(collid);
281 :
282 1454766 : if (mylocale->deterministic)
283 : {
284 1449960 : result = hash_any((unsigned char *) VARDATA_ANY(key),
285 1449960 : VARSIZE_ANY_EXHDR(key));
286 : }
287 : else
288 : {
289 : Size bsize,
290 : rsize;
291 : char *buf;
292 4806 : const char *keydata = VARDATA_ANY(key);
293 4806 : size_t keylen = VARSIZE_ANY_EXHDR(key);
294 :
295 :
296 4806 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
297 4806 : buf = palloc(bsize + 1);
298 :
299 4806 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
300 :
301 : /* the second call may return a smaller value than the first */
302 4806 : if (rsize > bsize)
303 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
304 :
305 : /*
306 : * In principle, there's no reason to include the terminating NUL
307 : * character in the hash, but it was done before and the behavior must
308 : * be preserved.
309 : */
310 4806 : result = hash_any((uint8_t *) buf, bsize + 1);
311 :
312 4806 : pfree(buf);
313 : }
314 :
315 : /* Avoid leaking memory for toasted inputs */
316 1454766 : PG_FREE_IF_COPY(key, 0);
317 :
318 1454766 : return result;
319 : }
320 :
321 : Datum
322 4068 : hashtextextended(PG_FUNCTION_ARGS)
323 : {
324 4068 : text *key = PG_GETARG_TEXT_PP(0);
325 4068 : Oid collid = PG_GET_COLLATION();
326 : pg_locale_t mylocale;
327 : Datum result;
328 :
329 4068 : if (!collid)
330 0 : ereport(ERROR,
331 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
332 : errmsg("could not determine which collation to use for string hashing"),
333 : errhint("Use the COLLATE clause to set the collation explicitly.")));
334 :
335 4068 : mylocale = pg_newlocale_from_collation(collid);
336 :
337 4068 : if (mylocale->deterministic)
338 : {
339 4044 : result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
340 4044 : VARSIZE_ANY_EXHDR(key),
341 4044 : PG_GETARG_INT64(1));
342 : }
343 : else
344 : {
345 : Size bsize,
346 : rsize;
347 : char *buf;
348 24 : const char *keydata = VARDATA_ANY(key);
349 24 : size_t keylen = VARSIZE_ANY_EXHDR(key);
350 :
351 24 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
352 24 : buf = palloc(bsize + 1);
353 :
354 24 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
355 :
356 : /* the second call may return a smaller value than the first */
357 24 : if (rsize > bsize)
358 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
359 :
360 : /*
361 : * In principle, there's no reason to include the terminating NUL
362 : * character in the hash, but it was done before and the behavior must
363 : * be preserved.
364 : */
365 24 : result = hash_any_extended((uint8_t *) buf, bsize + 1,
366 24 : PG_GETARG_INT64(1));
367 :
368 24 : pfree(buf);
369 : }
370 :
371 4068 : PG_FREE_IF_COPY(key, 0);
372 :
373 4068 : return result;
374 : }
375 :
376 : /*
377 : * hashvarlena() can be used for any varlena datatype in which there are
378 : * no non-significant bits, ie, distinct bitpatterns never compare as equal.
379 : *
380 : * (However, you need to define an SQL-level wrapper function around it with
381 : * the concrete input data type; otherwise hashvalidate() won't accept it.
382 : * Moreover, at least for built-in types, a C-level wrapper function is also
383 : * recommended; otherwise, the opr_sanity test will get upset.)
384 : */
385 : Datum
386 6138 : hashvarlena(PG_FUNCTION_ARGS)
387 : {
388 6138 : struct varlena *key = PG_GETARG_VARLENA_PP(0);
389 : Datum result;
390 :
391 6138 : result = hash_any((unsigned char *) VARDATA_ANY(key),
392 6138 : VARSIZE_ANY_EXHDR(key));
393 :
394 : /* Avoid leaking memory for toasted inputs */
395 6138 : PG_FREE_IF_COPY(key, 0);
396 :
397 6138 : return result;
398 : }
399 :
400 : Datum
401 0 : hashvarlenaextended(PG_FUNCTION_ARGS)
402 : {
403 0 : struct varlena *key = PG_GETARG_VARLENA_PP(0);
404 : Datum result;
405 :
406 0 : result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
407 0 : VARSIZE_ANY_EXHDR(key),
408 0 : PG_GETARG_INT64(1));
409 :
410 0 : PG_FREE_IF_COPY(key, 0);
411 :
412 0 : return result;
413 : }
414 :
415 : Datum
416 6138 : hashbytea(PG_FUNCTION_ARGS)
417 : {
418 6138 : return hashvarlena(fcinfo);
419 : }
420 :
421 : Datum
422 0 : hashbyteaextended(PG_FUNCTION_ARGS)
423 : {
424 0 : return hashvarlenaextended(fcinfo);
425 : }
|