Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * hashfunc.c
4 : * Support functions for hash access method.
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/hash/hashfunc.c
12 : *
13 : * NOTES
14 : * These functions are stored in pg_amproc. For each operator class
15 : * defined for hash indexes, they compute the hash value of the argument.
16 : *
17 : * Additional hash functions appear in /utils/adt/ files for various
18 : * specialized datatypes.
19 : *
20 : * It is expected that every bit of a hash function's 32-bit result is
21 : * as random as every other; failure to ensure this is likely to lead
22 : * to poor performance of hash joins, for example. In most cases a hash
23 : * function should use hash_any() or its variant hash_uint32().
24 : *-------------------------------------------------------------------------
25 : */
26 :
27 : #include "postgres.h"
28 :
29 : #include "common/hashfn.h"
30 : #include "utils/float.h"
31 : #include "utils/fmgrprotos.h"
32 : #include "utils/pg_locale.h"
33 : #include "varatt.h"
34 :
35 : /*
36 : * Datatype-specific hash functions.
37 : *
38 : * These support both hash indexes and hash joins.
39 : *
40 : * NOTE: some of these are also used by catcache operations, without
41 : * any direct connection to hash indexes. Also, the common hash_any
42 : * routine is also used by dynahash tables.
43 : */
44 :
45 : /* Note: this is used for both "char" and boolean datatypes */
46 : Datum
47 185402 : hashchar(PG_FUNCTION_ARGS)
48 : {
49 185402 : return hash_uint32((int32) PG_GETARG_CHAR(0));
50 : }
51 :
52 : Datum
53 66 : hashcharextended(PG_FUNCTION_ARGS)
54 : {
55 66 : return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
56 : }
57 :
58 : Datum
59 502672 : hashint2(PG_FUNCTION_ARGS)
60 : {
61 502672 : return hash_uint32((int32) PG_GETARG_INT16(0));
62 : }
63 :
64 : Datum
65 48 : hashint2extended(PG_FUNCTION_ARGS)
66 : {
67 48 : return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
68 : }
69 :
70 : Datum
71 24982680 : hashint4(PG_FUNCTION_ARGS)
72 : {
73 24982680 : return hash_uint32(PG_GETARG_INT32(0));
74 : }
75 :
76 : Datum
77 207208 : hashint4extended(PG_FUNCTION_ARGS)
78 : {
79 207208 : return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
80 : }
81 :
82 : Datum
83 630704 : hashint8(PG_FUNCTION_ARGS)
84 : {
85 : /*
86 : * The idea here is to produce a hash value compatible with the values
87 : * produced by hashint4 and hashint2 for logically equal inputs; this is
88 : * necessary to support cross-type hash joins across these input types.
89 : * Since all three types are signed, we can xor the high half of the int8
90 : * value if the sign is positive, or the complement of the high half when
91 : * the sign is negative.
92 : */
93 630704 : int64 val = PG_GETARG_INT64(0);
94 630704 : uint32 lohalf = (uint32) val;
95 630704 : uint32 hihalf = (uint32) (val >> 32);
96 :
97 630704 : lohalf ^= (val >= 0) ? hihalf : ~hihalf;
98 :
99 630704 : return hash_uint32(lohalf);
100 : }
101 :
102 : Datum
103 372 : hashint8extended(PG_FUNCTION_ARGS)
104 : {
105 : /* Same approach as hashint8 */
106 372 : int64 val = PG_GETARG_INT64(0);
107 372 : uint32 lohalf = (uint32) val;
108 372 : uint32 hihalf = (uint32) (val >> 32);
109 :
110 372 : lohalf ^= (val >= 0) ? hihalf : ~hihalf;
111 :
112 372 : return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
113 : }
114 :
115 : Datum
116 12113558 : hashoid(PG_FUNCTION_ARGS)
117 : {
118 12113558 : return hash_uint32((uint32) PG_GETARG_OID(0));
119 : }
120 :
121 : Datum
122 72 : hashoidextended(PG_FUNCTION_ARGS)
123 : {
124 72 : return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
125 : }
126 :
127 : Datum
128 3142 : hashenum(PG_FUNCTION_ARGS)
129 : {
130 3142 : return hash_uint32((uint32) PG_GETARG_OID(0));
131 : }
132 :
133 : Datum
134 6036 : hashenumextended(PG_FUNCTION_ARGS)
135 : {
136 6036 : return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
137 : }
138 :
139 : Datum
140 42310 : hashfloat4(PG_FUNCTION_ARGS)
141 : {
142 42310 : float4 key = PG_GETARG_FLOAT4(0);
143 : float8 key8;
144 :
145 : /*
146 : * On IEEE-float machines, minus zero and zero have different bit patterns
147 : * but should compare as equal. We must ensure that they have the same
148 : * hash value, which is most reliably done this way:
149 : */
150 42310 : if (key == (float4) 0)
151 24 : PG_RETURN_UINT32(0);
152 :
153 : /*
154 : * To support cross-type hashing of float8 and float4, we want to return
155 : * the same hash value hashfloat8 would produce for an equal float8 value.
156 : * So, widen the value to float8 and hash that. (We must do this rather
157 : * than have hashfloat8 try to narrow its value to float4; that could fail
158 : * on overflow.)
159 : */
160 42286 : key8 = key;
161 :
162 : /*
163 : * Similarly, NaNs can have different bit patterns but they should all
164 : * compare as equal. For backwards-compatibility reasons we force them to
165 : * have the hash value of a standard float8 NaN. (You'd think we could
166 : * replace key with a float4 NaN and then widen it; but on some old
167 : * platforms, that way produces a different bit pattern.)
168 : */
169 42286 : if (isnan(key8))
170 18 : key8 = get_float8_nan();
171 :
172 42286 : return hash_any((unsigned char *) &key8, sizeof(key8));
173 : }
174 :
175 : Datum
176 72 : hashfloat4extended(PG_FUNCTION_ARGS)
177 : {
178 72 : float4 key = PG_GETARG_FLOAT4(0);
179 72 : uint64 seed = PG_GETARG_INT64(1);
180 : float8 key8;
181 :
182 : /* Same approach as hashfloat4 */
183 72 : if (key == (float4) 0)
184 12 : PG_RETURN_UINT64(seed);
185 60 : key8 = key;
186 60 : if (isnan(key8))
187 0 : key8 = get_float8_nan();
188 :
189 60 : return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
190 : }
191 :
192 : Datum
193 136184 : hashfloat8(PG_FUNCTION_ARGS)
194 : {
195 136184 : float8 key = PG_GETARG_FLOAT8(0);
196 :
197 : /*
198 : * On IEEE-float machines, minus zero and zero have different bit patterns
199 : * but should compare as equal. We must ensure that they have the same
200 : * hash value, which is most reliably done this way:
201 : */
202 136184 : if (key == (float8) 0)
203 686 : PG_RETURN_UINT32(0);
204 :
205 : /*
206 : * Similarly, NaNs can have different bit patterns but they should all
207 : * compare as equal. For backwards-compatibility reasons we force them to
208 : * have the hash value of a standard NaN.
209 : */
210 135498 : if (isnan(key))
211 18 : key = get_float8_nan();
212 :
213 135498 : return hash_any((unsigned char *) &key, sizeof(key));
214 : }
215 :
216 : Datum
217 72 : hashfloat8extended(PG_FUNCTION_ARGS)
218 : {
219 72 : float8 key = PG_GETARG_FLOAT8(0);
220 72 : uint64 seed = PG_GETARG_INT64(1);
221 :
222 : /* Same approach as hashfloat8 */
223 72 : if (key == (float8) 0)
224 12 : PG_RETURN_UINT64(seed);
225 60 : if (isnan(key))
226 0 : key = get_float8_nan();
227 :
228 60 : return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
229 : }
230 :
231 : Datum
232 366372 : hashoidvector(PG_FUNCTION_ARGS)
233 : {
234 366372 : oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
235 :
236 366372 : return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
237 : }
238 :
239 : Datum
240 60 : hashoidvectorextended(PG_FUNCTION_ARGS)
241 : {
242 60 : oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
243 :
244 120 : return hash_any_extended((unsigned char *) key->values,
245 60 : key->dim1 * sizeof(Oid),
246 60 : PG_GETARG_INT64(1));
247 : }
248 :
249 : Datum
250 515306 : hashname(PG_FUNCTION_ARGS)
251 : {
252 515306 : char *key = NameStr(*PG_GETARG_NAME(0));
253 :
254 515306 : return hash_any((unsigned char *) key, strlen(key));
255 : }
256 :
257 : Datum
258 60 : hashnameextended(PG_FUNCTION_ARGS)
259 : {
260 60 : char *key = NameStr(*PG_GETARG_NAME(0));
261 :
262 60 : return hash_any_extended((unsigned char *) key, strlen(key),
263 60 : PG_GETARG_INT64(1));
264 : }
265 :
266 : Datum
267 1405736 : hashtext(PG_FUNCTION_ARGS)
268 : {
269 1405736 : text *key = PG_GETARG_TEXT_PP(0);
270 1405736 : Oid collid = PG_GET_COLLATION();
271 1405736 : pg_locale_t mylocale = 0;
272 : Datum result;
273 :
274 1405736 : if (!collid)
275 6 : ereport(ERROR,
276 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
277 : errmsg("could not determine which collation to use for string hashing"),
278 : errhint("Use the COLLATE clause to set the collation explicitly.")));
279 :
280 1405730 : if (!lc_collate_is_c(collid))
281 990042 : mylocale = pg_newlocale_from_collation(collid);
282 :
283 1405730 : if (pg_locale_deterministic(mylocale))
284 : {
285 1405556 : result = hash_any((unsigned char *) VARDATA_ANY(key),
286 1405556 : VARSIZE_ANY_EXHDR(key));
287 : }
288 : else
289 : {
290 : Size bsize,
291 : rsize;
292 : char *buf;
293 174 : const char *keydata = VARDATA_ANY(key);
294 174 : size_t keylen = VARSIZE_ANY_EXHDR(key);
295 :
296 :
297 174 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
298 174 : buf = palloc(bsize + 1);
299 :
300 174 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
301 174 : if (rsize != bsize)
302 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
303 :
304 : /*
305 : * In principle, there's no reason to include the terminating NUL
306 : * character in the hash, but it was done before and the behavior must
307 : * be preserved.
308 : */
309 174 : result = hash_any((uint8_t *) buf, bsize + 1);
310 :
311 174 : pfree(buf);
312 : }
313 :
314 : /* Avoid leaking memory for toasted inputs */
315 1405730 : PG_FREE_IF_COPY(key, 0);
316 :
317 1405730 : return result;
318 : }
319 :
320 : Datum
321 4068 : hashtextextended(PG_FUNCTION_ARGS)
322 : {
323 4068 : text *key = PG_GETARG_TEXT_PP(0);
324 4068 : Oid collid = PG_GET_COLLATION();
325 4068 : pg_locale_t mylocale = 0;
326 : Datum result;
327 :
328 4068 : if (!collid)
329 0 : ereport(ERROR,
330 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
331 : errmsg("could not determine which collation to use for string hashing"),
332 : errhint("Use the COLLATE clause to set the collation explicitly.")));
333 :
334 4068 : if (!lc_collate_is_c(collid))
335 2728 : mylocale = pg_newlocale_from_collation(collid);
336 :
337 4068 : if (pg_locale_deterministic(mylocale))
338 : {
339 4044 : result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
340 4044 : VARSIZE_ANY_EXHDR(key),
341 4044 : PG_GETARG_INT64(1));
342 : }
343 : else
344 : {
345 : Size bsize,
346 : rsize;
347 : char *buf;
348 24 : const char *keydata = VARDATA_ANY(key);
349 24 : size_t keylen = VARSIZE_ANY_EXHDR(key);
350 :
351 24 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
352 24 : buf = palloc(bsize + 1);
353 :
354 24 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
355 24 : if (rsize != bsize)
356 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
357 :
358 : /*
359 : * In principle, there's no reason to include the terminating NUL
360 : * character in the hash, but it was done before and the behavior must
361 : * be preserved.
362 : */
363 24 : result = hash_any_extended((uint8_t *) buf, bsize + 1,
364 24 : PG_GETARG_INT64(1));
365 :
366 24 : pfree(buf);
367 : }
368 :
369 4068 : PG_FREE_IF_COPY(key, 0);
370 :
371 4068 : return result;
372 : }
373 :
374 : /*
375 : * hashvarlena() can be used for any varlena datatype in which there are
376 : * no non-significant bits, ie, distinct bitpatterns never compare as equal.
377 : */
378 : Datum
379 6138 : hashvarlena(PG_FUNCTION_ARGS)
380 : {
381 6138 : struct varlena *key = PG_GETARG_VARLENA_PP(0);
382 : Datum result;
383 :
384 6138 : result = hash_any((unsigned char *) VARDATA_ANY(key),
385 6138 : VARSIZE_ANY_EXHDR(key));
386 :
387 : /* Avoid leaking memory for toasted inputs */
388 6138 : PG_FREE_IF_COPY(key, 0);
389 :
390 6138 : return result;
391 : }
392 :
393 : Datum
394 0 : hashvarlenaextended(PG_FUNCTION_ARGS)
395 : {
396 0 : struct varlena *key = PG_GETARG_VARLENA_PP(0);
397 : Datum result;
398 :
399 0 : result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
400 0 : VARSIZE_ANY_EXHDR(key),
401 0 : PG_GETARG_INT64(1));
402 :
403 0 : PG_FREE_IF_COPY(key, 0);
404 :
405 0 : return result;
406 : }
|