LCOV - code coverage report
Current view: top level - src/backend/access/hash - hashfunc.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 90.0 % 140 126
Test Date: 2026-04-07 14:16:30 Functions: 92.3 % 26 24
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * hashfunc.c
       4              :  *    Support functions for hash access method.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/access/hash/hashfunc.c
      12              :  *
      13              :  * NOTES
      14              :  *    These functions are stored in pg_amproc.  For each operator class
      15              :  *    defined for hash indexes, they compute the hash value of the argument.
      16              :  *
      17              :  *    Additional hash functions appear in /utils/adt/ files for various
      18              :  *    specialized datatypes.
      19              :  *
      20              :  *    It is expected that every bit of a hash function's 32-bit result is
      21              :  *    as random as every other; failure to ensure this is likely to lead
      22              :  *    to poor performance of hash joins, for example.  In most cases a hash
      23              :  *    function should use hash_any() or its variant hash_uint32().
      24              :  *-------------------------------------------------------------------------
      25              :  */
      26              : 
      27              : #include "postgres.h"
      28              : 
      29              : #include "common/hashfn.h"
      30              : #include "utils/builtins.h"
      31              : #include "utils/float.h"
      32              : #include "utils/fmgrprotos.h"
      33              : #include "utils/pg_locale.h"
      34              : #include "varatt.h"
      35              : 
      36              : /*
      37              :  * Datatype-specific hash functions.
      38              :  *
      39              :  * These support both hash indexes and hash joins.
      40              :  *
      41              :  * NOTE: some of these are also used by catcache operations, without
      42              :  * any direct connection to hash indexes.  Also, the common hash_any
      43              :  * routine is also used by dynahash tables.
      44              :  */
      45              : 
      46              : /* Note: this is used for both "char" and boolean datatypes */
      47              : Datum
      48        85350 : hashchar(PG_FUNCTION_ARGS)
      49              : {
      50        85350 :     return hash_uint32((int32) PG_GETARG_CHAR(0));
      51              : }
      52              : 
      53              : Datum
      54           44 : hashcharextended(PG_FUNCTION_ARGS)
      55              : {
      56           44 :     return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
      57              : }
      58              : 
      59              : Datum
      60      1060191 : hashint2(PG_FUNCTION_ARGS)
      61              : {
      62      1060191 :     return hash_uint32((int32) PG_GETARG_INT16(0));
      63              : }
      64              : 
      65              : Datum
      66           32 : hashint2extended(PG_FUNCTION_ARGS)
      67              : {
      68           32 :     return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
      69              : }
      70              : 
      71              : Datum
      72     19189717 : hashint4(PG_FUNCTION_ARGS)
      73              : {
      74     19189717 :     return hash_uint32(PG_GETARG_INT32(0));
      75              : }
      76              : 
      77              : Datum
      78       102747 : hashint4extended(PG_FUNCTION_ARGS)
      79              : {
      80       102747 :     return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
      81              : }
      82              : 
      83              : Datum
      84       421581 : hashint8(PG_FUNCTION_ARGS)
      85              : {
      86              :     /*
      87              :      * The idea here is to produce a hash value compatible with the values
      88              :      * produced by hashint4 and hashint2 for logically equal inputs; this is
      89              :      * necessary to support cross-type hash joins across these input types.
      90              :      * Since all three types are signed, we can xor the high half of the int8
      91              :      * value if the sign is positive, or the complement of the high half when
      92              :      * the sign is negative.
      93              :      */
      94       421581 :     int64       val = PG_GETARG_INT64(0);
      95       421581 :     uint32      lohalf = (uint32) val;
      96       421581 :     uint32      hihalf = (uint32) (val >> 32);
      97              : 
      98       421581 :     lohalf ^= (val >= 0) ? hihalf : ~hihalf;
      99              : 
     100       421581 :     return hash_uint32(lohalf);
     101              : }
     102              : 
     103              : Datum
     104          296 : hashint8extended(PG_FUNCTION_ARGS)
     105              : {
     106              :     /* Same approach as hashint8 */
     107          296 :     int64       val = PG_GETARG_INT64(0);
     108          296 :     uint32      lohalf = (uint32) val;
     109          296 :     uint32      hihalf = (uint32) (val >> 32);
     110              : 
     111          296 :     lohalf ^= (val >= 0) ? hihalf : ~hihalf;
     112              : 
     113          296 :     return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
     114              : }
     115              : 
     116              : Datum
     117     12405420 : hashoid(PG_FUNCTION_ARGS)
     118              : {
     119     12405420 :     return hash_uint32((uint32) PG_GETARG_OID(0));
     120              : }
     121              : 
     122              : Datum
     123           48 : hashoidextended(PG_FUNCTION_ARGS)
     124              : {
     125           48 :     return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     126              : }
     127              : 
     128              : Datum
     129         1584 : hashenum(PG_FUNCTION_ARGS)
     130              : {
     131         1584 :     return hash_uint32((uint32) PG_GETARG_OID(0));
     132              : }
     133              : 
     134              : Datum
     135         2024 : hashenumextended(PG_FUNCTION_ARGS)
     136              : {
     137         2024 :     return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     138              : }
     139              : 
     140              : Datum
     141        23803 : hashfloat4(PG_FUNCTION_ARGS)
     142              : {
     143        23803 :     float4      key = PG_GETARG_FLOAT4(0);
     144              :     float8      key8;
     145              : 
     146              :     /*
     147              :      * On IEEE-float machines, minus zero and zero have different bit patterns
     148              :      * but should compare as equal.  We must ensure that they have the same
     149              :      * hash value, which is most reliably done this way:
     150              :      */
     151        23803 :     if (key == (float4) 0)
     152          146 :         PG_RETURN_UINT32(0);
     153              : 
     154              :     /*
     155              :      * To support cross-type hashing of float8 and float4, we want to return
     156              :      * the same hash value hashfloat8 would produce for an equal float8 value.
     157              :      * So, widen the value to float8 and hash that.  (We must do this rather
     158              :      * than have hashfloat8 try to narrow its value to float4; that could fail
     159              :      * on overflow.)
     160              :      */
     161        23657 :     key8 = key;
     162              : 
     163              :     /*
     164              :      * Similarly, NaNs can have different bit patterns but they should all
     165              :      * compare as equal.  For backwards-compatibility reasons we force them to
     166              :      * have the hash value of a standard float8 NaN.  (You'd think we could
     167              :      * replace key with a float4 NaN and then widen it; but on some old
     168              :      * platforms, that way produces a different bit pattern.)
     169              :      */
     170        23657 :     if (isnan(key8))
     171           15 :         key8 = get_float8_nan();
     172              : 
     173        23657 :     return hash_any((unsigned char *) &key8, sizeof(key8));
     174              : }
     175              : 
     176              : Datum
     177           48 : hashfloat4extended(PG_FUNCTION_ARGS)
     178              : {
     179           48 :     float4      key = PG_GETARG_FLOAT4(0);
     180           48 :     uint64      seed = PG_GETARG_INT64(1);
     181              :     float8      key8;
     182              : 
     183              :     /* Same approach as hashfloat4 */
     184           48 :     if (key == (float4) 0)
     185            8 :         PG_RETURN_UINT64(seed);
     186           40 :     key8 = key;
     187           40 :     if (isnan(key8))
     188            0 :         key8 = get_float8_nan();
     189              : 
     190           40 :     return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
     191              : }
     192              : 
     193              : Datum
     194        91837 : hashfloat8(PG_FUNCTION_ARGS)
     195              : {
     196        91837 :     float8      key = PG_GETARG_FLOAT8(0);
     197              : 
     198              :     /*
     199              :      * On IEEE-float machines, minus zero and zero have different bit patterns
     200              :      * but should compare as equal.  We must ensure that they have the same
     201              :      * hash value, which is most reliably done this way:
     202              :      */
     203        91837 :     if (key == (float8) 0)
     204          427 :         PG_RETURN_UINT32(0);
     205              : 
     206              :     /*
     207              :      * Similarly, NaNs can have different bit patterns but they should all
     208              :      * compare as equal.  For backwards-compatibility reasons we force them to
     209              :      * have the hash value of a standard NaN.
     210              :      */
     211        91410 :     if (isnan(key))
     212           15 :         key = get_float8_nan();
     213              : 
     214        91410 :     return hash_any((unsigned char *) &key, sizeof(key));
     215              : }
     216              : 
     217              : Datum
     218           48 : hashfloat8extended(PG_FUNCTION_ARGS)
     219              : {
     220           48 :     float8      key = PG_GETARG_FLOAT8(0);
     221           48 :     uint64      seed = PG_GETARG_INT64(1);
     222              : 
     223              :     /* Same approach as hashfloat8 */
     224           48 :     if (key == (float8) 0)
     225            8 :         PG_RETURN_UINT64(seed);
     226           40 :     if (isnan(key))
     227            0 :         key = get_float8_nan();
     228              : 
     229           40 :     return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
     230              : }
     231              : 
     232              : Datum
     233       265237 : hashoidvector(PG_FUNCTION_ARGS)
     234              : {
     235       265237 :     oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     236              : 
     237       265237 :     check_valid_oidvector(key);
     238       265237 :     return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
     239              : }
     240              : 
     241              : Datum
     242           40 : hashoidvectorextended(PG_FUNCTION_ARGS)
     243              : {
     244           40 :     oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     245              : 
     246           40 :     check_valid_oidvector(key);
     247           80 :     return hash_any_extended((unsigned char *) key->values,
     248           40 :                              key->dim1 * sizeof(Oid),
     249           40 :                              PG_GETARG_INT64(1));
     250              : }
     251              : 
     252              : Datum
     253       342163 : hashname(PG_FUNCTION_ARGS)
     254              : {
     255       342163 :     char       *key = NameStr(*PG_GETARG_NAME(0));
     256              : 
     257       342163 :     return hash_any((unsigned char *) key, strlen(key));
     258              : }
     259              : 
     260              : Datum
     261           40 : hashnameextended(PG_FUNCTION_ARGS)
     262              : {
     263           40 :     char       *key = NameStr(*PG_GETARG_NAME(0));
     264              : 
     265           40 :     return hash_any_extended((unsigned char *) key, strlen(key),
     266           40 :                              PG_GETARG_INT64(1));
     267              : }
     268              : 
     269              : Datum
     270       930490 : hashtext(PG_FUNCTION_ARGS)
     271              : {
     272       930490 :     text       *key = PG_GETARG_TEXT_PP(0);
     273       930490 :     Oid         collid = PG_GET_COLLATION();
     274              :     pg_locale_t mylocale;
     275              :     Datum       result;
     276              : 
     277       930490 :     if (!collid)
     278            4 :         ereport(ERROR,
     279              :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     280              :                  errmsg("could not determine which collation to use for string hashing"),
     281              :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     282              : 
     283       930486 :     mylocale = pg_newlocale_from_collation(collid);
     284              : 
     285       930486 :     if (mylocale->deterministic)
     286              :     {
     287       927082 :         result = hash_any((unsigned char *) VARDATA_ANY(key),
     288       927082 :                           VARSIZE_ANY_EXHDR(key));
     289              :     }
     290              :     else
     291              :     {
     292              :         Size        bsize,
     293              :                     rsize;
     294              :         char       *buf;
     295         3404 :         const char *keydata = VARDATA_ANY(key);
     296         3404 :         size_t      keylen = VARSIZE_ANY_EXHDR(key);
     297              : 
     298              : 
     299         3404 :         bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
     300         3404 :         buf = palloc(bsize + 1);
     301              : 
     302         3404 :         rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
     303              : 
     304              :         /* the second call may return a smaller value than the first */
     305         3404 :         if (rsize > bsize)
     306            0 :             elog(ERROR, "pg_strnxfrm() returned unexpected result");
     307              : 
     308              :         /*
     309              :          * In principle, there's no reason to include the terminating NUL
     310              :          * character in the hash, but it was done before and the behavior must
     311              :          * be preserved.
     312              :          */
     313         3404 :         result = hash_any((uint8_t *) buf, bsize + 1);
     314              : 
     315         3404 :         pfree(buf);
     316              :     }
     317              : 
     318              :     /* Avoid leaking memory for toasted inputs */
     319       930486 :     PG_FREE_IF_COPY(key, 0);
     320              : 
     321       930486 :     return result;
     322              : }
     323              : 
     324              : Datum
     325         2718 : hashtextextended(PG_FUNCTION_ARGS)
     326              : {
     327         2718 :     text       *key = PG_GETARG_TEXT_PP(0);
     328         2718 :     Oid         collid = PG_GET_COLLATION();
     329              :     pg_locale_t mylocale;
     330              :     Datum       result;
     331              : 
     332         2718 :     if (!collid)
     333            0 :         ereport(ERROR,
     334              :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     335              :                  errmsg("could not determine which collation to use for string hashing"),
     336              :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     337              : 
     338         2718 :     mylocale = pg_newlocale_from_collation(collid);
     339              : 
     340         2718 :     if (mylocale->deterministic)
     341              :     {
     342         2702 :         result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     343         2702 :                                    VARSIZE_ANY_EXHDR(key),
     344         2702 :                                    PG_GETARG_INT64(1));
     345              :     }
     346              :     else
     347              :     {
     348              :         Size        bsize,
     349              :                     rsize;
     350              :         char       *buf;
     351           16 :         const char *keydata = VARDATA_ANY(key);
     352           16 :         size_t      keylen = VARSIZE_ANY_EXHDR(key);
     353              : 
     354           16 :         bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
     355           16 :         buf = palloc(bsize + 1);
     356              : 
     357           16 :         rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
     358              : 
     359              :         /* the second call may return a smaller value than the first */
     360           16 :         if (rsize > bsize)
     361            0 :             elog(ERROR, "pg_strnxfrm() returned unexpected result");
     362              : 
     363              :         /*
     364              :          * In principle, there's no reason to include the terminating NUL
     365              :          * character in the hash, but it was done before and the behavior must
     366              :          * be preserved.
     367              :          */
     368           16 :         result = hash_any_extended((uint8_t *) buf, bsize + 1,
     369           16 :                                    PG_GETARG_INT64(1));
     370              : 
     371           16 :         pfree(buf);
     372              :     }
     373              : 
     374         2718 :     PG_FREE_IF_COPY(key, 0);
     375              : 
     376         2718 :     return result;
     377              : }
     378              : 
     379              : /*
     380              :  * hashvarlena() can be used for any varlena datatype in which there are
     381              :  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
     382              :  *
     383              :  * (However, you need to define an SQL-level wrapper function around it with
     384              :  * the concrete input data type; otherwise hashvalidate() won't accept it.
     385              :  * Moreover, at least for built-in types, a C-level wrapper function is also
     386              :  * recommended; otherwise, the opr_sanity test will get upset.)
     387              :  */
     388              : Datum
     389         4158 : hashvarlena(PG_FUNCTION_ARGS)
     390              : {
     391         4158 :     varlena    *key = PG_GETARG_VARLENA_PP(0);
     392              :     Datum       result;
     393              : 
     394         4158 :     result = hash_any((unsigned char *) VARDATA_ANY(key),
     395         4158 :                       VARSIZE_ANY_EXHDR(key));
     396              : 
     397              :     /* Avoid leaking memory for toasted inputs */
     398         4158 :     PG_FREE_IF_COPY(key, 0);
     399              : 
     400         4158 :     return result;
     401              : }
     402              : 
     403              : Datum
     404            0 : hashvarlenaextended(PG_FUNCTION_ARGS)
     405              : {
     406            0 :     varlena    *key = PG_GETARG_VARLENA_PP(0);
     407              :     Datum       result;
     408              : 
     409            0 :     result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     410            0 :                                VARSIZE_ANY_EXHDR(key),
     411            0 :                                PG_GETARG_INT64(1));
     412              : 
     413            0 :     PG_FREE_IF_COPY(key, 0);
     414              : 
     415            0 :     return result;
     416              : }
     417              : 
     418              : Datum
     419         4158 : hashbytea(PG_FUNCTION_ARGS)
     420              : {
     421         4158 :     return hashvarlena(fcinfo);
     422              : }
     423              : 
     424              : Datum
     425            0 : hashbyteaextended(PG_FUNCTION_ARGS)
     426              : {
     427            0 :     return hashvarlenaextended(fcinfo);
     428              : }
        

Generated by: LCOV version 2.0-1