LCOV - code coverage report
Current view: top level - src/backend/access/hash - hashfunc.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 126 140 90.0 %
Date: 2026-02-11 00:17:59 Functions: 24 26 92.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * hashfunc.c
       4             :  *    Support functions for hash access method.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/hash/hashfunc.c
      12             :  *
      13             :  * NOTES
      14             :  *    These functions are stored in pg_amproc.  For each operator class
      15             :  *    defined for hash indexes, they compute the hash value of the argument.
      16             :  *
      17             :  *    Additional hash functions appear in /utils/adt/ files for various
      18             :  *    specialized datatypes.
      19             :  *
      20             :  *    It is expected that every bit of a hash function's 32-bit result is
      21             :  *    as random as every other; failure to ensure this is likely to lead
      22             :  *    to poor performance of hash joins, for example.  In most cases a hash
      23             :  *    function should use hash_any() or its variant hash_uint32().
      24             :  *-------------------------------------------------------------------------
      25             :  */
      26             : 
      27             : #include "postgres.h"
      28             : 
      29             : #include "common/hashfn.h"
      30             : #include "utils/builtins.h"
      31             : #include "utils/float.h"
      32             : #include "utils/fmgrprotos.h"
      33             : #include "utils/pg_locale.h"
      34             : #include "varatt.h"
      35             : 
      36             : /*
      37             :  * Datatype-specific hash functions.
      38             :  *
      39             :  * These support both hash indexes and hash joins.
      40             :  *
      41             :  * NOTE: some of these are also used by catcache operations, without
      42             :  * any direct connection to hash indexes.  Also, the common hash_any
      43             :  * routine is also used by dynahash tables.
      44             :  */
      45             : 
      46             : /* Note: this is used for both "char" and boolean datatypes */
      47             : Datum
      48      149052 : hashchar(PG_FUNCTION_ARGS)
      49             : {
      50      149052 :     return hash_uint32((int32) PG_GETARG_CHAR(0));
      51             : }
      52             : 
      53             : Datum
      54          66 : hashcharextended(PG_FUNCTION_ARGS)
      55             : {
      56          66 :     return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
      57             : }
      58             : 
      59             : Datum
      60      462732 : hashint2(PG_FUNCTION_ARGS)
      61             : {
      62      462732 :     return hash_uint32((int32) PG_GETARG_INT16(0));
      63             : }
      64             : 
      65             : Datum
      66          48 : hashint2extended(PG_FUNCTION_ARGS)
      67             : {
      68          48 :     return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
      69             : }
      70             : 
      71             : Datum
      72    28745510 : hashint4(PG_FUNCTION_ARGS)
      73             : {
      74    28745510 :     return hash_uint32(PG_GETARG_INT32(0));
      75             : }
      76             : 
      77             : Datum
      78      205208 : hashint4extended(PG_FUNCTION_ARGS)
      79             : {
      80      205208 :     return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
      81             : }
      82             : 
      83             : Datum
      84      633182 : hashint8(PG_FUNCTION_ARGS)
      85             : {
      86             :     /*
      87             :      * The idea here is to produce a hash value compatible with the values
      88             :      * produced by hashint4 and hashint2 for logically equal inputs; this is
      89             :      * necessary to support cross-type hash joins across these input types.
      90             :      * Since all three types are signed, we can xor the high half of the int8
      91             :      * value if the sign is positive, or the complement of the high half when
      92             :      * the sign is negative.
      93             :      */
      94      633182 :     int64       val = PG_GETARG_INT64(0);
      95      633182 :     uint32      lohalf = (uint32) val;
      96      633182 :     uint32      hihalf = (uint32) (val >> 32);
      97             : 
      98      633182 :     lohalf ^= (val >= 0) ? hihalf : ~hihalf;
      99             : 
     100      633182 :     return hash_uint32(lohalf);
     101             : }
     102             : 
     103             : Datum
     104         444 : hashint8extended(PG_FUNCTION_ARGS)
     105             : {
     106             :     /* Same approach as hashint8 */
     107         444 :     int64       val = PG_GETARG_INT64(0);
     108         444 :     uint32      lohalf = (uint32) val;
     109         444 :     uint32      hihalf = (uint32) (val >> 32);
     110             : 
     111         444 :     lohalf ^= (val >= 0) ? hihalf : ~hihalf;
     112             : 
     113         444 :     return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
     114             : }
     115             : 
     116             : Datum
     117    17847022 : hashoid(PG_FUNCTION_ARGS)
     118             : {
     119    17847022 :     return hash_uint32((uint32) PG_GETARG_OID(0));
     120             : }
     121             : 
     122             : Datum
     123          72 : hashoidextended(PG_FUNCTION_ARGS)
     124             : {
     125          72 :     return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     126             : }
     127             : 
     128             : Datum
     129        3142 : hashenum(PG_FUNCTION_ARGS)
     130             : {
     131        3142 :     return hash_uint32((uint32) PG_GETARG_OID(0));
     132             : }
     133             : 
     134             : Datum
     135        4036 : hashenumextended(PG_FUNCTION_ARGS)
     136             : {
     137        4036 :     return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     138             : }
     139             : 
     140             : Datum
     141       42318 : hashfloat4(PG_FUNCTION_ARGS)
     142             : {
     143       42318 :     float4      key = PG_GETARG_FLOAT4(0);
     144             :     float8      key8;
     145             : 
     146             :     /*
     147             :      * On IEEE-float machines, minus zero and zero have different bit patterns
     148             :      * but should compare as equal.  We must ensure that they have the same
     149             :      * hash value, which is most reliably done this way:
     150             :      */
     151       42318 :     if (key == (float4) 0)
     152          24 :         PG_RETURN_UINT32(0);
     153             : 
     154             :     /*
     155             :      * To support cross-type hashing of float8 and float4, we want to return
     156             :      * the same hash value hashfloat8 would produce for an equal float8 value.
     157             :      * So, widen the value to float8 and hash that.  (We must do this rather
     158             :      * than have hashfloat8 try to narrow its value to float4; that could fail
     159             :      * on overflow.)
     160             :      */
     161       42294 :     key8 = key;
     162             : 
     163             :     /*
     164             :      * Similarly, NaNs can have different bit patterns but they should all
     165             :      * compare as equal.  For backwards-compatibility reasons we force them to
     166             :      * have the hash value of a standard float8 NaN.  (You'd think we could
     167             :      * replace key with a float4 NaN and then widen it; but on some old
     168             :      * platforms, that way produces a different bit pattern.)
     169             :      */
     170       42294 :     if (isnan(key8))
     171          18 :         key8 = get_float8_nan();
     172             : 
     173       42294 :     return hash_any((unsigned char *) &key8, sizeof(key8));
     174             : }
     175             : 
     176             : Datum
     177          72 : hashfloat4extended(PG_FUNCTION_ARGS)
     178             : {
     179          72 :     float4      key = PG_GETARG_FLOAT4(0);
     180          72 :     uint64      seed = PG_GETARG_INT64(1);
     181             :     float8      key8;
     182             : 
     183             :     /* Same approach as hashfloat4 */
     184          72 :     if (key == (float4) 0)
     185          12 :         PG_RETURN_UINT64(seed);
     186          60 :     key8 = key;
     187          60 :     if (isnan(key8))
     188           0 :         key8 = get_float8_nan();
     189             : 
     190          60 :     return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
     191             : }
     192             : 
     193             : Datum
     194      137580 : hashfloat8(PG_FUNCTION_ARGS)
     195             : {
     196      137580 :     float8      key = PG_GETARG_FLOAT8(0);
     197             : 
     198             :     /*
     199             :      * On IEEE-float machines, minus zero and zero have different bit patterns
     200             :      * but should compare as equal.  We must ensure that they have the same
     201             :      * hash value, which is most reliably done this way:
     202             :      */
     203      137580 :     if (key == (float8) 0)
     204         690 :         PG_RETURN_UINT32(0);
     205             : 
     206             :     /*
     207             :      * Similarly, NaNs can have different bit patterns but they should all
     208             :      * compare as equal.  For backwards-compatibility reasons we force them to
     209             :      * have the hash value of a standard NaN.
     210             :      */
     211      136890 :     if (isnan(key))
     212          18 :         key = get_float8_nan();
     213             : 
     214      136890 :     return hash_any((unsigned char *) &key, sizeof(key));
     215             : }
     216             : 
     217             : Datum
     218          72 : hashfloat8extended(PG_FUNCTION_ARGS)
     219             : {
     220          72 :     float8      key = PG_GETARG_FLOAT8(0);
     221          72 :     uint64      seed = PG_GETARG_INT64(1);
     222             : 
     223             :     /* Same approach as hashfloat8 */
     224          72 :     if (key == (float8) 0)
     225          12 :         PG_RETURN_UINT64(seed);
     226          60 :     if (isnan(key))
     227           0 :         key = get_float8_nan();
     228             : 
     229          60 :     return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
     230             : }
     231             : 
     232             : Datum
     233      465322 : hashoidvector(PG_FUNCTION_ARGS)
     234             : {
     235      465322 :     oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     236             : 
     237      465322 :     check_valid_oidvector(key);
     238      465322 :     return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
     239             : }
     240             : 
     241             : Datum
     242          60 : hashoidvectorextended(PG_FUNCTION_ARGS)
     243             : {
     244          60 :     oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     245             : 
     246          60 :     check_valid_oidvector(key);
     247         120 :     return hash_any_extended((unsigned char *) key->values,
     248          60 :                              key->dim1 * sizeof(Oid),
     249          60 :                              PG_GETARG_INT64(1));
     250             : }
     251             : 
     252             : Datum
     253      565016 : hashname(PG_FUNCTION_ARGS)
     254             : {
     255      565016 :     char       *key = NameStr(*PG_GETARG_NAME(0));
     256             : 
     257      565016 :     return hash_any((unsigned char *) key, strlen(key));
     258             : }
     259             : 
     260             : Datum
     261          60 : hashnameextended(PG_FUNCTION_ARGS)
     262             : {
     263          60 :     char       *key = NameStr(*PG_GETARG_NAME(0));
     264             : 
     265          60 :     return hash_any_extended((unsigned char *) key, strlen(key),
     266          60 :                              PG_GETARG_INT64(1));
     267             : }
     268             : 
     269             : Datum
     270     1533308 : hashtext(PG_FUNCTION_ARGS)
     271             : {
     272     1533308 :     text       *key = PG_GETARG_TEXT_PP(0);
     273     1533308 :     Oid         collid = PG_GET_COLLATION();
     274             :     pg_locale_t mylocale;
     275             :     Datum       result;
     276             : 
     277     1533308 :     if (!collid)
     278           6 :         ereport(ERROR,
     279             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     280             :                  errmsg("could not determine which collation to use for string hashing"),
     281             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     282             : 
     283     1533302 :     mylocale = pg_newlocale_from_collation(collid);
     284             : 
     285     1533302 :     if (mylocale->deterministic)
     286             :     {
     287     1530632 :         result = hash_any((unsigned char *) VARDATA_ANY(key),
     288     1530632 :                           VARSIZE_ANY_EXHDR(key));
     289             :     }
     290             :     else
     291             :     {
     292             :         Size        bsize,
     293             :                     rsize;
     294             :         char       *buf;
     295        2670 :         const char *keydata = VARDATA_ANY(key);
     296        2670 :         size_t      keylen = VARSIZE_ANY_EXHDR(key);
     297             : 
     298             : 
     299        2670 :         bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
     300        2670 :         buf = palloc(bsize + 1);
     301             : 
     302        2670 :         rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
     303             : 
     304             :         /* the second call may return a smaller value than the first */
     305        2670 :         if (rsize > bsize)
     306           0 :             elog(ERROR, "pg_strnxfrm() returned unexpected result");
     307             : 
     308             :         /*
     309             :          * In principle, there's no reason to include the terminating NUL
     310             :          * character in the hash, but it was done before and the behavior must
     311             :          * be preserved.
     312             :          */
     313        2670 :         result = hash_any((uint8_t *) buf, bsize + 1);
     314             : 
     315        2670 :         pfree(buf);
     316             :     }
     317             : 
     318             :     /* Avoid leaking memory for toasted inputs */
     319     1533302 :     PG_FREE_IF_COPY(key, 0);
     320             : 
     321     1533302 :     return result;
     322             : }
     323             : 
     324             : Datum
     325        4068 : hashtextextended(PG_FUNCTION_ARGS)
     326             : {
     327        4068 :     text       *key = PG_GETARG_TEXT_PP(0);
     328        4068 :     Oid         collid = PG_GET_COLLATION();
     329             :     pg_locale_t mylocale;
     330             :     Datum       result;
     331             : 
     332        4068 :     if (!collid)
     333           0 :         ereport(ERROR,
     334             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     335             :                  errmsg("could not determine which collation to use for string hashing"),
     336             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     337             : 
     338        4068 :     mylocale = pg_newlocale_from_collation(collid);
     339             : 
     340        4068 :     if (mylocale->deterministic)
     341             :     {
     342        4044 :         result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     343        4044 :                                    VARSIZE_ANY_EXHDR(key),
     344        4044 :                                    PG_GETARG_INT64(1));
     345             :     }
     346             :     else
     347             :     {
     348             :         Size        bsize,
     349             :                     rsize;
     350             :         char       *buf;
     351          24 :         const char *keydata = VARDATA_ANY(key);
     352          24 :         size_t      keylen = VARSIZE_ANY_EXHDR(key);
     353             : 
     354          24 :         bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
     355          24 :         buf = palloc(bsize + 1);
     356             : 
     357          24 :         rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
     358             : 
     359             :         /* the second call may return a smaller value than the first */
     360          24 :         if (rsize > bsize)
     361           0 :             elog(ERROR, "pg_strnxfrm() returned unexpected result");
     362             : 
     363             :         /*
     364             :          * In principle, there's no reason to include the terminating NUL
     365             :          * character in the hash, but it was done before and the behavior must
     366             :          * be preserved.
     367             :          */
     368          24 :         result = hash_any_extended((uint8_t *) buf, bsize + 1,
     369          24 :                                    PG_GETARG_INT64(1));
     370             : 
     371          24 :         pfree(buf);
     372             :     }
     373             : 
     374        4068 :     PG_FREE_IF_COPY(key, 0);
     375             : 
     376        4068 :     return result;
     377             : }
     378             : 
     379             : /*
     380             :  * hashvarlena() can be used for any varlena datatype in which there are
     381             :  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
     382             :  *
     383             :  * (However, you need to define an SQL-level wrapper function around it with
     384             :  * the concrete input data type; otherwise hashvalidate() won't accept it.
     385             :  * Moreover, at least for built-in types, a C-level wrapper function is also
     386             :  * recommended; otherwise, the opr_sanity test will get upset.)
     387             :  */
     388             : Datum
     389        6146 : hashvarlena(PG_FUNCTION_ARGS)
     390             : {
     391        6146 :     varlena    *key = PG_GETARG_VARLENA_PP(0);
     392             :     Datum       result;
     393             : 
     394        6146 :     result = hash_any((unsigned char *) VARDATA_ANY(key),
     395        6146 :                       VARSIZE_ANY_EXHDR(key));
     396             : 
     397             :     /* Avoid leaking memory for toasted inputs */
     398        6146 :     PG_FREE_IF_COPY(key, 0);
     399             : 
     400        6146 :     return result;
     401             : }
     402             : 
     403             : Datum
     404           0 : hashvarlenaextended(PG_FUNCTION_ARGS)
     405             : {
     406           0 :     varlena    *key = PG_GETARG_VARLENA_PP(0);
     407             :     Datum       result;
     408             : 
     409           0 :     result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     410           0 :                                VARSIZE_ANY_EXHDR(key),
     411           0 :                                PG_GETARG_INT64(1));
     412             : 
     413           0 :     PG_FREE_IF_COPY(key, 0);
     414             : 
     415           0 :     return result;
     416             : }
     417             : 
     418             : Datum
     419        6146 : hashbytea(PG_FUNCTION_ARGS)
     420             : {
     421        6146 :     return hashvarlena(fcinfo);
     422             : }
     423             : 
     424             : Datum
     425           0 : hashbyteaextended(PG_FUNCTION_ARGS)
     426             : {
     427           0 :     return hashvarlenaextended(fcinfo);
     428             : }

Generated by: LCOV version 1.16