LCOV - code coverage report
Current view: top level - src/backend/access/hash - hashfunc.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 90.0 % 140 126
Test Date: 2026-03-03 18:14:56 Functions: 92.3 % 26 24
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * hashfunc.c
       4              :  *    Support functions for hash access method.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/access/hash/hashfunc.c
      12              :  *
      13              :  * NOTES
      14              :  *    These functions are stored in pg_amproc.  For each operator class
      15              :  *    defined for hash indexes, they compute the hash value of the argument.
      16              :  *
      17              :  *    Additional hash functions appear in /utils/adt/ files for various
      18              :  *    specialized datatypes.
      19              :  *
      20              :  *    It is expected that every bit of a hash function's 32-bit result is
      21              :  *    as random as every other; failure to ensure this is likely to lead
      22              :  *    to poor performance of hash joins, for example.  In most cases a hash
      23              :  *    function should use hash_any() or its variant hash_uint32().
      24              :  *-------------------------------------------------------------------------
      25              :  */
      26              : 
      27              : #include "postgres.h"
      28              : 
      29              : #include "common/hashfn.h"
      30              : #include "utils/builtins.h"
      31              : #include "utils/float.h"
      32              : #include "utils/fmgrprotos.h"
      33              : #include "utils/pg_locale.h"
      34              : #include "varatt.h"
      35              : 
      36              : /*
      37              :  * Datatype-specific hash functions.
      38              :  *
      39              :  * These support both hash indexes and hash joins.
      40              :  *
      41              :  * NOTE: some of these are also used by catcache operations, without
      42              :  * any direct connection to hash indexes.  Also, the common hash_any
      43              :  * routine is also used by dynahash tables.
      44              :  */
      45              : 
      46              : /* Note: this is used for both "char" and boolean datatypes */
      47              : Datum
      48        73723 : hashchar(PG_FUNCTION_ARGS)
      49              : {
      50        73723 :     return hash_uint32((int32) PG_GETARG_CHAR(0));
      51              : }
      52              : 
      53              : Datum
      54           33 : hashcharextended(PG_FUNCTION_ARGS)
      55              : {
      56           33 :     return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
      57              : }
      58              : 
      59              : Datum
      60       260403 : hashint2(PG_FUNCTION_ARGS)
      61              : {
      62       260403 :     return hash_uint32((int32) PG_GETARG_INT16(0));
      63              : }
      64              : 
      65              : Datum
      66           24 : hashint2extended(PG_FUNCTION_ARGS)
      67              : {
      68           24 :     return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
      69              : }
      70              : 
      71              : Datum
      72     14475515 : hashint4(PG_FUNCTION_ARGS)
      73              : {
      74     14475515 :     return hash_uint32(PG_GETARG_INT32(0));
      75              : }
      76              : 
      77              : Datum
      78       103604 : hashint4extended(PG_FUNCTION_ARGS)
      79              : {
      80       103604 :     return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
      81              : }
      82              : 
      83              : Datum
      84       316577 : hashint8(PG_FUNCTION_ARGS)
      85              : {
      86              :     /*
      87              :      * The idea here is to produce a hash value compatible with the values
      88              :      * produced by hashint4 and hashint2 for logically equal inputs; this is
      89              :      * necessary to support cross-type hash joins across these input types.
      90              :      * Since all three types are signed, we can xor the high half of the int8
      91              :      * value if the sign is positive, or the complement of the high half when
      92              :      * the sign is negative.
      93              :      */
      94       316577 :     int64       val = PG_GETARG_INT64(0);
      95       316577 :     uint32      lohalf = (uint32) val;
      96       316577 :     uint32      hihalf = (uint32) (val >> 32);
      97              : 
      98       316577 :     lohalf ^= (val >= 0) ? hihalf : ~hihalf;
      99              : 
     100       316577 :     return hash_uint32(lohalf);
     101              : }
     102              : 
     103              : Datum
     104          222 : hashint8extended(PG_FUNCTION_ARGS)
     105              : {
     106              :     /* Same approach as hashint8 */
     107          222 :     int64       val = PG_GETARG_INT64(0);
     108          222 :     uint32      lohalf = (uint32) val;
     109          222 :     uint32      hihalf = (uint32) (val >> 32);
     110              : 
     111          222 :     lohalf ^= (val >= 0) ? hihalf : ~hihalf;
     112              : 
     113          222 :     return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
     114              : }
     115              : 
     116              : Datum
     117      9387044 : hashoid(PG_FUNCTION_ARGS)
     118              : {
     119      9387044 :     return hash_uint32((uint32) PG_GETARG_OID(0));
     120              : }
     121              : 
     122              : Datum
     123           36 : hashoidextended(PG_FUNCTION_ARGS)
     124              : {
     125           36 :     return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     126              : }
     127              : 
     128              : Datum
     129         1571 : hashenum(PG_FUNCTION_ARGS)
     130              : {
     131         1571 :     return hash_uint32((uint32) PG_GETARG_OID(0));
     132              : }
     133              : 
     134              : Datum
     135         3018 : hashenumextended(PG_FUNCTION_ARGS)
     136              : {
     137         3018 :     return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     138              : }
     139              : 
     140              : Datum
     141        21157 : hashfloat4(PG_FUNCTION_ARGS)
     142              : {
     143        21157 :     float4      key = PG_GETARG_FLOAT4(0);
     144              :     float8      key8;
     145              : 
     146              :     /*
     147              :      * On IEEE-float machines, minus zero and zero have different bit patterns
     148              :      * but should compare as equal.  We must ensure that they have the same
     149              :      * hash value, which is most reliably done this way:
     150              :      */
     151        21157 :     if (key == (float4) 0)
     152           12 :         PG_RETURN_UINT32(0);
     153              : 
     154              :     /*
     155              :      * To support cross-type hashing of float8 and float4, we want to return
     156              :      * the same hash value hashfloat8 would produce for an equal float8 value.
     157              :      * So, widen the value to float8 and hash that.  (We must do this rather
     158              :      * than have hashfloat8 try to narrow its value to float4; that could fail
     159              :      * on overflow.)
     160              :      */
     161        21145 :     key8 = key;
     162              : 
     163              :     /*
     164              :      * Similarly, NaNs can have different bit patterns but they should all
     165              :      * compare as equal.  For backwards-compatibility reasons we force them to
     166              :      * have the hash value of a standard float8 NaN.  (You'd think we could
     167              :      * replace key with a float4 NaN and then widen it; but on some old
     168              :      * platforms, that way produces a different bit pattern.)
     169              :      */
     170        21145 :     if (isnan(key8))
     171            9 :         key8 = get_float8_nan();
     172              : 
     173        21145 :     return hash_any((unsigned char *) &key8, sizeof(key8));
     174              : }
     175              : 
     176              : Datum
     177           36 : hashfloat4extended(PG_FUNCTION_ARGS)
     178              : {
     179           36 :     float4      key = PG_GETARG_FLOAT4(0);
     180           36 :     uint64      seed = PG_GETARG_INT64(1);
     181              :     float8      key8;
     182              : 
     183              :     /* Same approach as hashfloat4 */
     184           36 :     if (key == (float4) 0)
     185            6 :         PG_RETURN_UINT64(seed);
     186           30 :     key8 = key;
     187           30 :     if (isnan(key8))
     188            0 :         key8 = get_float8_nan();
     189              : 
     190           30 :     return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
     191              : }
     192              : 
     193              : Datum
     194        68788 : hashfloat8(PG_FUNCTION_ARGS)
     195              : {
     196        68788 :     float8      key = PG_GETARG_FLOAT8(0);
     197              : 
     198              :     /*
     199              :      * On IEEE-float machines, minus zero and zero have different bit patterns
     200              :      * but should compare as equal.  We must ensure that they have the same
     201              :      * hash value, which is most reliably done this way:
     202              :      */
     203        68788 :     if (key == (float8) 0)
     204          344 :         PG_RETURN_UINT32(0);
     205              : 
     206              :     /*
     207              :      * Similarly, NaNs can have different bit patterns but they should all
     208              :      * compare as equal.  For backwards-compatibility reasons we force them to
     209              :      * have the hash value of a standard NaN.
     210              :      */
     211        68444 :     if (isnan(key))
     212            9 :         key = get_float8_nan();
     213              : 
     214        68444 :     return hash_any((unsigned char *) &key, sizeof(key));
     215              : }
     216              : 
     217              : Datum
     218           36 : hashfloat8extended(PG_FUNCTION_ARGS)
     219              : {
     220           36 :     float8      key = PG_GETARG_FLOAT8(0);
     221           36 :     uint64      seed = PG_GETARG_INT64(1);
     222              : 
     223              :     /* Same approach as hashfloat8 */
     224           36 :     if (key == (float8) 0)
     225            6 :         PG_RETURN_UINT64(seed);
     226           30 :     if (isnan(key))
     227            0 :         key = get_float8_nan();
     228              : 
     229           30 :     return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
     230              : }
     231              : 
     232              : Datum
     233       232451 : hashoidvector(PG_FUNCTION_ARGS)
     234              : {
     235       232451 :     oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     236              : 
     237       232451 :     check_valid_oidvector(key);
     238       232451 :     return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
     239              : }
     240              : 
     241              : Datum
     242           30 : hashoidvectorextended(PG_FUNCTION_ARGS)
     243              : {
     244           30 :     oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     245              : 
     246           30 :     check_valid_oidvector(key);
     247           60 :     return hash_any_extended((unsigned char *) key->values,
     248           30 :                              key->dim1 * sizeof(Oid),
     249           30 :                              PG_GETARG_INT64(1));
     250              : }
     251              : 
     252              : Datum
     253       278188 : hashname(PG_FUNCTION_ARGS)
     254              : {
     255       278188 :     char       *key = NameStr(*PG_GETARG_NAME(0));
     256              : 
     257       278188 :     return hash_any((unsigned char *) key, strlen(key));
     258              : }
     259              : 
     260              : Datum
     261           30 : hashnameextended(PG_FUNCTION_ARGS)
     262              : {
     263           30 :     char       *key = NameStr(*PG_GETARG_NAME(0));
     264              : 
     265           30 :     return hash_any_extended((unsigned char *) key, strlen(key),
     266           30 :                              PG_GETARG_INT64(1));
     267              : }
     268              : 
     269              : Datum
     270       764536 : hashtext(PG_FUNCTION_ARGS)
     271              : {
     272       764536 :     text       *key = PG_GETARG_TEXT_PP(0);
     273       764536 :     Oid         collid = PG_GET_COLLATION();
     274              :     pg_locale_t mylocale;
     275              :     Datum       result;
     276              : 
     277       764536 :     if (!collid)
     278            3 :         ereport(ERROR,
     279              :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     280              :                  errmsg("could not determine which collation to use for string hashing"),
     281              :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     282              : 
     283       764533 :     mylocale = pg_newlocale_from_collation(collid);
     284              : 
     285       764533 :     if (mylocale->deterministic)
     286              :     {
     287       763198 :         result = hash_any((unsigned char *) VARDATA_ANY(key),
     288       763198 :                           VARSIZE_ANY_EXHDR(key));
     289              :     }
     290              :     else
     291              :     {
     292              :         Size        bsize,
     293              :                     rsize;
     294              :         char       *buf;
     295         1335 :         const char *keydata = VARDATA_ANY(key);
     296         1335 :         size_t      keylen = VARSIZE_ANY_EXHDR(key);
     297              : 
     298              : 
     299         1335 :         bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
     300         1335 :         buf = palloc(bsize + 1);
     301              : 
     302         1335 :         rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
     303              : 
     304              :         /* the second call may return a smaller value than the first */
     305         1335 :         if (rsize > bsize)
     306            0 :             elog(ERROR, "pg_strnxfrm() returned unexpected result");
     307              : 
     308              :         /*
     309              :          * In principle, there's no reason to include the terminating NUL
     310              :          * character in the hash, but it was done before and the behavior must
     311              :          * be preserved.
     312              :          */
     313         1335 :         result = hash_any((uint8_t *) buf, bsize + 1);
     314              : 
     315         1335 :         pfree(buf);
     316              :     }
     317              : 
     318              :     /* Avoid leaking memory for toasted inputs */
     319       764533 :     PG_FREE_IF_COPY(key, 0);
     320              : 
     321       764533 :     return result;
     322              : }
     323              : 
     324              : Datum
     325         2034 : hashtextextended(PG_FUNCTION_ARGS)
     326              : {
     327         2034 :     text       *key = PG_GETARG_TEXT_PP(0);
     328         2034 :     Oid         collid = PG_GET_COLLATION();
     329              :     pg_locale_t mylocale;
     330              :     Datum       result;
     331              : 
     332         2034 :     if (!collid)
     333            0 :         ereport(ERROR,
     334              :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     335              :                  errmsg("could not determine which collation to use for string hashing"),
     336              :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     337              : 
     338         2034 :     mylocale = pg_newlocale_from_collation(collid);
     339              : 
     340         2034 :     if (mylocale->deterministic)
     341              :     {
     342         2022 :         result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     343         2022 :                                    VARSIZE_ANY_EXHDR(key),
     344         2022 :                                    PG_GETARG_INT64(1));
     345              :     }
     346              :     else
     347              :     {
     348              :         Size        bsize,
     349              :                     rsize;
     350              :         char       *buf;
     351           12 :         const char *keydata = VARDATA_ANY(key);
     352           12 :         size_t      keylen = VARSIZE_ANY_EXHDR(key);
     353              : 
     354           12 :         bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
     355           12 :         buf = palloc(bsize + 1);
     356              : 
     357           12 :         rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
     358              : 
     359              :         /* the second call may return a smaller value than the first */
     360           12 :         if (rsize > bsize)
     361            0 :             elog(ERROR, "pg_strnxfrm() returned unexpected result");
     362              : 
     363              :         /*
     364              :          * In principle, there's no reason to include the terminating NUL
     365              :          * character in the hash, but it was done before and the behavior must
     366              :          * be preserved.
     367              :          */
     368           12 :         result = hash_any_extended((uint8_t *) buf, bsize + 1,
     369           12 :                                    PG_GETARG_INT64(1));
     370              : 
     371           12 :         pfree(buf);
     372              :     }
     373              : 
     374         2034 :     PG_FREE_IF_COPY(key, 0);
     375              : 
     376         2034 :     return result;
     377              : }
     378              : 
     379              : /*
     380              :  * hashvarlena() can be used for any varlena datatype in which there are
     381              :  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
     382              :  *
     383              :  * (However, you need to define an SQL-level wrapper function around it with
     384              :  * the concrete input data type; otherwise hashvalidate() won't accept it.
     385              :  * Moreover, at least for built-in types, a C-level wrapper function is also
     386              :  * recommended; otherwise, the opr_sanity test will get upset.)
     387              :  */
     388              : Datum
     389         3071 : hashvarlena(PG_FUNCTION_ARGS)
     390              : {
     391         3071 :     varlena    *key = PG_GETARG_VARLENA_PP(0);
     392              :     Datum       result;
     393              : 
     394         3071 :     result = hash_any((unsigned char *) VARDATA_ANY(key),
     395         3071 :                       VARSIZE_ANY_EXHDR(key));
     396              : 
     397              :     /* Avoid leaking memory for toasted inputs */
     398         3071 :     PG_FREE_IF_COPY(key, 0);
     399              : 
     400         3071 :     return result;
     401              : }
     402              : 
     403              : Datum
     404            0 : hashvarlenaextended(PG_FUNCTION_ARGS)
     405              : {
     406            0 :     varlena    *key = PG_GETARG_VARLENA_PP(0);
     407              :     Datum       result;
     408              : 
     409            0 :     result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     410            0 :                                VARSIZE_ANY_EXHDR(key),
     411            0 :                                PG_GETARG_INT64(1));
     412              : 
     413            0 :     PG_FREE_IF_COPY(key, 0);
     414              : 
     415            0 :     return result;
     416              : }
     417              : 
     418              : Datum
     419         3071 : hashbytea(PG_FUNCTION_ARGS)
     420              : {
     421         3071 :     return hashvarlena(fcinfo);
     422              : }
     423              : 
     424              : Datum
     425            0 : hashbyteaextended(PG_FUNCTION_ARGS)
     426              : {
     427            0 :     return hashvarlenaextended(fcinfo);
     428              : }
        

Generated by: LCOV version 2.0-1