LCOV - code coverage report
Current view: top level - src/backend/access/hash - hashfunc.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 124 138 89.9 %
Date: 2025-01-18 05:15:39 Functions: 24 26 92.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * hashfunc.c
       4             :  *    Support functions for hash access method.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/hash/hashfunc.c
      12             :  *
      13             :  * NOTES
      14             :  *    These functions are stored in pg_amproc.  For each operator class
      15             :  *    defined for hash indexes, they compute the hash value of the argument.
      16             :  *
      17             :  *    Additional hash functions appear in /utils/adt/ files for various
      18             :  *    specialized datatypes.
      19             :  *
      20             :  *    It is expected that every bit of a hash function's 32-bit result is
      21             :  *    as random as every other; failure to ensure this is likely to lead
      22             :  *    to poor performance of hash joins, for example.  In most cases a hash
      23             :  *    function should use hash_any() or its variant hash_uint32().
      24             :  *-------------------------------------------------------------------------
      25             :  */
      26             : 
      27             : #include "postgres.h"
      28             : 
      29             : #include "common/hashfn.h"
      30             : #include "utils/float.h"
      31             : #include "utils/fmgrprotos.h"
      32             : #include "utils/pg_locale.h"
      33             : #include "varatt.h"
      34             : 
      35             : /*
      36             :  * Datatype-specific hash functions.
      37             :  *
      38             :  * These support both hash indexes and hash joins.
      39             :  *
      40             :  * NOTE: some of these are also used by catcache operations, without
      41             :  * any direct connection to hash indexes.  Also, the common hash_any
      42             :  * routine is also used by dynahash tables.
      43             :  */
      44             : 
      45             : /* Note: this is used for both "char" and boolean datatypes */
      46             : Datum
      47      111984 : hashchar(PG_FUNCTION_ARGS)
      48             : {
      49      111984 :     return hash_uint32((int32) PG_GETARG_CHAR(0));
      50             : }
      51             : 
      52             : Datum
      53          66 : hashcharextended(PG_FUNCTION_ARGS)
      54             : {
      55          66 :     return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
      56             : }
      57             : 
      58             : Datum
      59      397016 : hashint2(PG_FUNCTION_ARGS)
      60             : {
      61      397016 :     return hash_uint32((int32) PG_GETARG_INT16(0));
      62             : }
      63             : 
      64             : Datum
      65          48 : hashint2extended(PG_FUNCTION_ARGS)
      66             : {
      67          48 :     return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
      68             : }
      69             : 
      70             : Datum
      71    21924298 : hashint4(PG_FUNCTION_ARGS)
      72             : {
      73    21924298 :     return hash_uint32(PG_GETARG_INT32(0));
      74             : }
      75             : 
      76             : Datum
      77      207208 : hashint4extended(PG_FUNCTION_ARGS)
      78             : {
      79      207208 :     return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
      80             : }
      81             : 
      82             : Datum
      83      631978 : hashint8(PG_FUNCTION_ARGS)
      84             : {
      85             :     /*
      86             :      * The idea here is to produce a hash value compatible with the values
      87             :      * produced by hashint4 and hashint2 for logically equal inputs; this is
      88             :      * necessary to support cross-type hash joins across these input types.
      89             :      * Since all three types are signed, we can xor the high half of the int8
      90             :      * value if the sign is positive, or the complement of the high half when
      91             :      * the sign is negative.
      92             :      */
      93      631978 :     int64       val = PG_GETARG_INT64(0);
      94      631978 :     uint32      lohalf = (uint32) val;
      95      631978 :     uint32      hihalf = (uint32) (val >> 32);
      96             : 
      97      631978 :     lohalf ^= (val >= 0) ? hihalf : ~hihalf;
      98             : 
      99      631978 :     return hash_uint32(lohalf);
     100             : }
     101             : 
     102             : Datum
     103         372 : hashint8extended(PG_FUNCTION_ARGS)
     104             : {
     105             :     /* Same approach as hashint8 */
     106         372 :     int64       val = PG_GETARG_INT64(0);
     107         372 :     uint32      lohalf = (uint32) val;
     108         372 :     uint32      hihalf = (uint32) (val >> 32);
     109             : 
     110         372 :     lohalf ^= (val >= 0) ? hihalf : ~hihalf;
     111             : 
     112         372 :     return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
     113             : }
     114             : 
     115             : Datum
     116    12015654 : hashoid(PG_FUNCTION_ARGS)
     117             : {
     118    12015654 :     return hash_uint32((uint32) PG_GETARG_OID(0));
     119             : }
     120             : 
     121             : Datum
     122          72 : hashoidextended(PG_FUNCTION_ARGS)
     123             : {
     124          72 :     return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     125             : }
     126             : 
     127             : Datum
     128        3142 : hashenum(PG_FUNCTION_ARGS)
     129             : {
     130        3142 :     return hash_uint32((uint32) PG_GETARG_OID(0));
     131             : }
     132             : 
     133             : Datum
     134        6036 : hashenumextended(PG_FUNCTION_ARGS)
     135             : {
     136        6036 :     return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     137             : }
     138             : 
     139             : Datum
     140       42318 : hashfloat4(PG_FUNCTION_ARGS)
     141             : {
     142       42318 :     float4      key = PG_GETARG_FLOAT4(0);
     143             :     float8      key8;
     144             : 
     145             :     /*
     146             :      * On IEEE-float machines, minus zero and zero have different bit patterns
     147             :      * but should compare as equal.  We must ensure that they have the same
     148             :      * hash value, which is most reliably done this way:
     149             :      */
     150       42318 :     if (key == (float4) 0)
     151          24 :         PG_RETURN_UINT32(0);
     152             : 
     153             :     /*
     154             :      * To support cross-type hashing of float8 and float4, we want to return
     155             :      * the same hash value hashfloat8 would produce for an equal float8 value.
     156             :      * So, widen the value to float8 and hash that.  (We must do this rather
     157             :      * than have hashfloat8 try to narrow its value to float4; that could fail
     158             :      * on overflow.)
     159             :      */
     160       42294 :     key8 = key;
     161             : 
     162             :     /*
     163             :      * Similarly, NaNs can have different bit patterns but they should all
     164             :      * compare as equal.  For backwards-compatibility reasons we force them to
     165             :      * have the hash value of a standard float8 NaN.  (You'd think we could
     166             :      * replace key with a float4 NaN and then widen it; but on some old
     167             :      * platforms, that way produces a different bit pattern.)
     168             :      */
     169       42294 :     if (isnan(key8))
     170          18 :         key8 = get_float8_nan();
     171             : 
     172       42294 :     return hash_any((unsigned char *) &key8, sizeof(key8));
     173             : }
     174             : 
     175             : Datum
     176          72 : hashfloat4extended(PG_FUNCTION_ARGS)
     177             : {
     178          72 :     float4      key = PG_GETARG_FLOAT4(0);
     179          72 :     uint64      seed = PG_GETARG_INT64(1);
     180             :     float8      key8;
     181             : 
     182             :     /* Same approach as hashfloat4 */
     183          72 :     if (key == (float4) 0)
     184          12 :         PG_RETURN_UINT64(seed);
     185          60 :     key8 = key;
     186          60 :     if (isnan(key8))
     187           0 :         key8 = get_float8_nan();
     188             : 
     189          60 :     return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
     190             : }
     191             : 
     192             : Datum
     193      136316 : hashfloat8(PG_FUNCTION_ARGS)
     194             : {
     195      136316 :     float8      key = PG_GETARG_FLOAT8(0);
     196             : 
     197             :     /*
     198             :      * On IEEE-float machines, minus zero and zero have different bit patterns
     199             :      * but should compare as equal.  We must ensure that they have the same
     200             :      * hash value, which is most reliably done this way:
     201             :      */
     202      136316 :     if (key == (float8) 0)
     203         690 :         PG_RETURN_UINT32(0);
     204             : 
     205             :     /*
     206             :      * Similarly, NaNs can have different bit patterns but they should all
     207             :      * compare as equal.  For backwards-compatibility reasons we force them to
     208             :      * have the hash value of a standard NaN.
     209             :      */
     210      135626 :     if (isnan(key))
     211          18 :         key = get_float8_nan();
     212             : 
     213      135626 :     return hash_any((unsigned char *) &key, sizeof(key));
     214             : }
     215             : 
     216             : Datum
     217          72 : hashfloat8extended(PG_FUNCTION_ARGS)
     218             : {
     219          72 :     float8      key = PG_GETARG_FLOAT8(0);
     220          72 :     uint64      seed = PG_GETARG_INT64(1);
     221             : 
     222             :     /* Same approach as hashfloat8 */
     223          72 :     if (key == (float8) 0)
     224          12 :         PG_RETURN_UINT64(seed);
     225          60 :     if (isnan(key))
     226           0 :         key = get_float8_nan();
     227             : 
     228          60 :     return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
     229             : }
     230             : 
     231             : Datum
     232      405394 : hashoidvector(PG_FUNCTION_ARGS)
     233             : {
     234      405394 :     oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     235             : 
     236      405394 :     return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
     237             : }
     238             : 
     239             : Datum
     240          60 : hashoidvectorextended(PG_FUNCTION_ARGS)
     241             : {
     242          60 :     oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     243             : 
     244         120 :     return hash_any_extended((unsigned char *) key->values,
     245          60 :                              key->dim1 * sizeof(Oid),
     246          60 :                              PG_GETARG_INT64(1));
     247             : }
     248             : 
     249             : Datum
     250      503682 : hashname(PG_FUNCTION_ARGS)
     251             : {
     252      503682 :     char       *key = NameStr(*PG_GETARG_NAME(0));
     253             : 
     254      503682 :     return hash_any((unsigned char *) key, strlen(key));
     255             : }
     256             : 
     257             : Datum
     258          60 : hashnameextended(PG_FUNCTION_ARGS)
     259             : {
     260          60 :     char       *key = NameStr(*PG_GETARG_NAME(0));
     261             : 
     262          60 :     return hash_any_extended((unsigned char *) key, strlen(key),
     263          60 :                              PG_GETARG_INT64(1));
     264             : }
     265             : 
     266             : Datum
     267     1471006 : hashtext(PG_FUNCTION_ARGS)
     268             : {
     269     1471006 :     text       *key = PG_GETARG_TEXT_PP(0);
     270     1471006 :     Oid         collid = PG_GET_COLLATION();
     271             :     pg_locale_t mylocale;
     272             :     Datum       result;
     273             : 
     274     1471006 :     if (!collid)
     275           6 :         ereport(ERROR,
     276             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     277             :                  errmsg("could not determine which collation to use for string hashing"),
     278             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     279             : 
     280     1471000 :     mylocale = pg_newlocale_from_collation(collid);
     281             : 
     282     1471000 :     if (mylocale->deterministic)
     283             :     {
     284     1466194 :         result = hash_any((unsigned char *) VARDATA_ANY(key),
     285     1466194 :                           VARSIZE_ANY_EXHDR(key));
     286             :     }
     287             :     else
     288             :     {
     289             :         Size        bsize,
     290             :                     rsize;
     291             :         char       *buf;
     292        4806 :         const char *keydata = VARDATA_ANY(key);
     293        4806 :         size_t      keylen = VARSIZE_ANY_EXHDR(key);
     294             : 
     295             : 
     296        4806 :         bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
     297        4806 :         buf = palloc(bsize + 1);
     298             : 
     299        4806 :         rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
     300             : 
     301             :         /* the second call may return a smaller value than the first */
     302        4806 :         if (rsize > bsize)
     303           0 :             elog(ERROR, "pg_strnxfrm() returned unexpected result");
     304             : 
     305             :         /*
     306             :          * In principle, there's no reason to include the terminating NUL
     307             :          * character in the hash, but it was done before and the behavior must
     308             :          * be preserved.
     309             :          */
     310        4806 :         result = hash_any((uint8_t *) buf, bsize + 1);
     311             : 
     312        4806 :         pfree(buf);
     313             :     }
     314             : 
     315             :     /* Avoid leaking memory for toasted inputs */
     316     1471000 :     PG_FREE_IF_COPY(key, 0);
     317             : 
     318     1471000 :     return result;
     319             : }
     320             : 
     321             : Datum
     322        4068 : hashtextextended(PG_FUNCTION_ARGS)
     323             : {
     324        4068 :     text       *key = PG_GETARG_TEXT_PP(0);
     325        4068 :     Oid         collid = PG_GET_COLLATION();
     326             :     pg_locale_t mylocale;
     327             :     Datum       result;
     328             : 
     329        4068 :     if (!collid)
     330           0 :         ereport(ERROR,
     331             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     332             :                  errmsg("could not determine which collation to use for string hashing"),
     333             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     334             : 
     335        4068 :     mylocale = pg_newlocale_from_collation(collid);
     336             : 
     337        4068 :     if (mylocale->deterministic)
     338             :     {
     339        4044 :         result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     340        4044 :                                    VARSIZE_ANY_EXHDR(key),
     341        4044 :                                    PG_GETARG_INT64(1));
     342             :     }
     343             :     else
     344             :     {
     345             :         Size        bsize,
     346             :                     rsize;
     347             :         char       *buf;
     348          24 :         const char *keydata = VARDATA_ANY(key);
     349          24 :         size_t      keylen = VARSIZE_ANY_EXHDR(key);
     350             : 
     351          24 :         bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
     352          24 :         buf = palloc(bsize + 1);
     353             : 
     354          24 :         rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
     355             : 
     356             :         /* the second call may return a smaller value than the first */
     357          24 :         if (rsize > bsize)
     358           0 :             elog(ERROR, "pg_strnxfrm() returned unexpected result");
     359             : 
     360             :         /*
     361             :          * In principle, there's no reason to include the terminating NUL
     362             :          * character in the hash, but it was done before and the behavior must
     363             :          * be preserved.
     364             :          */
     365          24 :         result = hash_any_extended((uint8_t *) buf, bsize + 1,
     366          24 :                                    PG_GETARG_INT64(1));
     367             : 
     368          24 :         pfree(buf);
     369             :     }
     370             : 
     371        4068 :     PG_FREE_IF_COPY(key, 0);
     372             : 
     373        4068 :     return result;
     374             : }
     375             : 
     376             : /*
     377             :  * hashvarlena() can be used for any varlena datatype in which there are
     378             :  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
     379             :  *
     380             :  * (However, you need to define an SQL-level wrapper function around it with
     381             :  * the concrete input data type; otherwise hashvalidate() won't accept it.
     382             :  * Moreover, at least for built-in types, a C-level wrapper function is also
     383             :  * recommended; otherwise, the opr_sanity test will get upset.)
     384             :  */
     385             : Datum
     386        6146 : hashvarlena(PG_FUNCTION_ARGS)
     387             : {
     388        6146 :     struct varlena *key = PG_GETARG_VARLENA_PP(0);
     389             :     Datum       result;
     390             : 
     391        6146 :     result = hash_any((unsigned char *) VARDATA_ANY(key),
     392        6146 :                       VARSIZE_ANY_EXHDR(key));
     393             : 
     394             :     /* Avoid leaking memory for toasted inputs */
     395        6146 :     PG_FREE_IF_COPY(key, 0);
     396             : 
     397        6146 :     return result;
     398             : }
     399             : 
     400             : Datum
     401           0 : hashvarlenaextended(PG_FUNCTION_ARGS)
     402             : {
     403           0 :     struct varlena *key = PG_GETARG_VARLENA_PP(0);
     404             :     Datum       result;
     405             : 
     406           0 :     result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     407           0 :                                VARSIZE_ANY_EXHDR(key),
     408           0 :                                PG_GETARG_INT64(1));
     409             : 
     410           0 :     PG_FREE_IF_COPY(key, 0);
     411             : 
     412           0 :     return result;
     413             : }
     414             : 
     415             : Datum
     416        6146 : hashbytea(PG_FUNCTION_ARGS)
     417             : {
     418        6146 :     return hashvarlena(fcinfo);
     419             : }
     420             : 
     421             : Datum
     422           0 : hashbyteaextended(PG_FUNCTION_ARGS)
     423             : {
     424           0 :     return hashvarlenaextended(fcinfo);
     425             : }

Generated by: LCOV version 1.14