LCOV - code coverage report
Current view: top level - src/backend/access/hash - hashfunc.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 126 138 91.3 %
Date: 2024-05-01 21:11:23 Functions: 23 24 95.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * hashfunc.c
       4             :  *    Support functions for hash access method.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/hash/hashfunc.c
      12             :  *
      13             :  * NOTES
      14             :  *    These functions are stored in pg_amproc.  For each operator class
      15             :  *    defined for hash indexes, they compute the hash value of the argument.
      16             :  *
      17             :  *    Additional hash functions appear in /utils/adt/ files for various
      18             :  *    specialized datatypes.
      19             :  *
      20             :  *    It is expected that every bit of a hash function's 32-bit result is
      21             :  *    as random as every other; failure to ensure this is likely to lead
      22             :  *    to poor performance of hash joins, for example.  In most cases a hash
      23             :  *    function should use hash_any() or its variant hash_uint32().
      24             :  *-------------------------------------------------------------------------
      25             :  */
      26             : 
      27             : #include "postgres.h"
      28             : 
      29             : #include "common/hashfn.h"
      30             : #include "utils/float.h"
      31             : #include "utils/fmgrprotos.h"
      32             : #include "utils/pg_locale.h"
      33             : #include "varatt.h"
      34             : 
      35             : /*
      36             :  * Datatype-specific hash functions.
      37             :  *
      38             :  * These support both hash indexes and hash joins.
      39             :  *
      40             :  * NOTE: some of these are also used by catcache operations, without
      41             :  * any direct connection to hash indexes.  Also, the common hash_any
      42             :  * routine is also used by dynahash tables.
      43             :  */
      44             : 
      45             : /* Note: this is used for both "char" and boolean datatypes */
      46             : Datum
      47      185416 : hashchar(PG_FUNCTION_ARGS)
      48             : {
      49      185416 :     return hash_uint32((int32) PG_GETARG_CHAR(0));
      50             : }
      51             : 
      52             : Datum
      53          66 : hashcharextended(PG_FUNCTION_ARGS)
      54             : {
      55          66 :     return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
      56             : }
      57             : 
      58             : Datum
      59      561630 : hashint2(PG_FUNCTION_ARGS)
      60             : {
      61      561630 :     return hash_uint32((int32) PG_GETARG_INT16(0));
      62             : }
      63             : 
      64             : Datum
      65          48 : hashint2extended(PG_FUNCTION_ARGS)
      66             : {
      67          48 :     return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
      68             : }
      69             : 
      70             : Datum
      71    25009714 : hashint4(PG_FUNCTION_ARGS)
      72             : {
      73    25009714 :     return hash_uint32(PG_GETARG_INT32(0));
      74             : }
      75             : 
      76             : Datum
      77      207208 : hashint4extended(PG_FUNCTION_ARGS)
      78             : {
      79      207208 :     return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
      80             : }
      81             : 
      82             : Datum
      83      630704 : hashint8(PG_FUNCTION_ARGS)
      84             : {
      85             :     /*
      86             :      * The idea here is to produce a hash value compatible with the values
      87             :      * produced by hashint4 and hashint2 for logically equal inputs; this is
      88             :      * necessary to support cross-type hash joins across these input types.
      89             :      * Since all three types are signed, we can xor the high half of the int8
      90             :      * value if the sign is positive, or the complement of the high half when
      91             :      * the sign is negative.
      92             :      */
      93      630704 :     int64       val = PG_GETARG_INT64(0);
      94      630704 :     uint32      lohalf = (uint32) val;
      95      630704 :     uint32      hihalf = (uint32) (val >> 32);
      96             : 
      97      630704 :     lohalf ^= (val >= 0) ? hihalf : ~hihalf;
      98             : 
      99      630704 :     return hash_uint32(lohalf);
     100             : }
     101             : 
     102             : Datum
     103         372 : hashint8extended(PG_FUNCTION_ARGS)
     104             : {
     105             :     /* Same approach as hashint8 */
     106         372 :     int64       val = PG_GETARG_INT64(0);
     107         372 :     uint32      lohalf = (uint32) val;
     108         372 :     uint32      hihalf = (uint32) (val >> 32);
     109             : 
     110         372 :     lohalf ^= (val >= 0) ? hihalf : ~hihalf;
     111             : 
     112         372 :     return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
     113             : }
     114             : 
     115             : Datum
     116    12173734 : hashoid(PG_FUNCTION_ARGS)
     117             : {
     118    12173734 :     return hash_uint32((uint32) PG_GETARG_OID(0));
     119             : }
     120             : 
     121             : Datum
     122          72 : hashoidextended(PG_FUNCTION_ARGS)
     123             : {
     124          72 :     return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     125             : }
     126             : 
     127             : Datum
     128        3142 : hashenum(PG_FUNCTION_ARGS)
     129             : {
     130        3142 :     return hash_uint32((uint32) PG_GETARG_OID(0));
     131             : }
     132             : 
     133             : Datum
     134        6036 : hashenumextended(PG_FUNCTION_ARGS)
     135             : {
     136        6036 :     return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     137             : }
     138             : 
     139             : Datum
     140       42310 : hashfloat4(PG_FUNCTION_ARGS)
     141             : {
     142       42310 :     float4      key = PG_GETARG_FLOAT4(0);
     143             :     float8      key8;
     144             : 
     145             :     /*
     146             :      * On IEEE-float machines, minus zero and zero have different bit patterns
     147             :      * but should compare as equal.  We must ensure that they have the same
     148             :      * hash value, which is most reliably done this way:
     149             :      */
     150       42310 :     if (key == (float4) 0)
     151          24 :         PG_RETURN_UINT32(0);
     152             : 
     153             :     /*
     154             :      * To support cross-type hashing of float8 and float4, we want to return
     155             :      * the same hash value hashfloat8 would produce for an equal float8 value.
     156             :      * So, widen the value to float8 and hash that.  (We must do this rather
     157             :      * than have hashfloat8 try to narrow its value to float4; that could fail
     158             :      * on overflow.)
     159             :      */
     160       42286 :     key8 = key;
     161             : 
     162             :     /*
     163             :      * Similarly, NaNs can have different bit patterns but they should all
     164             :      * compare as equal.  For backwards-compatibility reasons we force them to
     165             :      * have the hash value of a standard float8 NaN.  (You'd think we could
     166             :      * replace key with a float4 NaN and then widen it; but on some old
     167             :      * platforms, that way produces a different bit pattern.)
     168             :      */
     169       42286 :     if (isnan(key8))
     170          18 :         key8 = get_float8_nan();
     171             : 
     172       42286 :     return hash_any((unsigned char *) &key8, sizeof(key8));
     173             : }
     174             : 
     175             : Datum
     176          72 : hashfloat4extended(PG_FUNCTION_ARGS)
     177             : {
     178          72 :     float4      key = PG_GETARG_FLOAT4(0);
     179          72 :     uint64      seed = PG_GETARG_INT64(1);
     180             :     float8      key8;
     181             : 
     182             :     /* Same approach as hashfloat4 */
     183          72 :     if (key == (float4) 0)
     184          12 :         PG_RETURN_UINT64(seed);
     185          60 :     key8 = key;
     186          60 :     if (isnan(key8))
     187           0 :         key8 = get_float8_nan();
     188             : 
     189          60 :     return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
     190             : }
     191             : 
     192             : Datum
     193      136120 : hashfloat8(PG_FUNCTION_ARGS)
     194             : {
     195      136120 :     float8      key = PG_GETARG_FLOAT8(0);
     196             : 
     197             :     /*
     198             :      * On IEEE-float machines, minus zero and zero have different bit patterns
     199             :      * but should compare as equal.  We must ensure that they have the same
     200             :      * hash value, which is most reliably done this way:
     201             :      */
     202      136120 :     if (key == (float8) 0)
     203         686 :         PG_RETURN_UINT32(0);
     204             : 
     205             :     /*
     206             :      * Similarly, NaNs can have different bit patterns but they should all
     207             :      * compare as equal.  For backwards-compatibility reasons we force them to
     208             :      * have the hash value of a standard NaN.
     209             :      */
     210      135434 :     if (isnan(key))
     211          18 :         key = get_float8_nan();
     212             : 
     213      135434 :     return hash_any((unsigned char *) &key, sizeof(key));
     214             : }
     215             : 
     216             : Datum
     217          72 : hashfloat8extended(PG_FUNCTION_ARGS)
     218             : {
     219          72 :     float8      key = PG_GETARG_FLOAT8(0);
     220          72 :     uint64      seed = PG_GETARG_INT64(1);
     221             : 
     222             :     /* Same approach as hashfloat8 */
     223          72 :     if (key == (float8) 0)
     224          12 :         PG_RETURN_UINT64(seed);
     225          60 :     if (isnan(key))
     226           0 :         key = get_float8_nan();
     227             : 
     228          60 :     return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
     229             : }
     230             : 
     231             : Datum
     232      367432 : hashoidvector(PG_FUNCTION_ARGS)
     233             : {
     234      367432 :     oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     235             : 
     236      367432 :     return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
     237             : }
     238             : 
     239             : Datum
     240          60 : hashoidvectorextended(PG_FUNCTION_ARGS)
     241             : {
     242          60 :     oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     243             : 
     244         120 :     return hash_any_extended((unsigned char *) key->values,
     245          60 :                              key->dim1 * sizeof(Oid),
     246          60 :                              PG_GETARG_INT64(1));
     247             : }
     248             : 
     249             : Datum
     250      515424 : hashname(PG_FUNCTION_ARGS)
     251             : {
     252      515424 :     char       *key = NameStr(*PG_GETARG_NAME(0));
     253             : 
     254      515424 :     return hash_any((unsigned char *) key, strlen(key));
     255             : }
     256             : 
     257             : Datum
     258          60 : hashnameextended(PG_FUNCTION_ARGS)
     259             : {
     260          60 :     char       *key = NameStr(*PG_GETARG_NAME(0));
     261             : 
     262          60 :     return hash_any_extended((unsigned char *) key, strlen(key),
     263          60 :                              PG_GETARG_INT64(1));
     264             : }
     265             : 
     266             : Datum
     267     1405534 : hashtext(PG_FUNCTION_ARGS)
     268             : {
     269     1405534 :     text       *key = PG_GETARG_TEXT_PP(0);
     270     1405534 :     Oid         collid = PG_GET_COLLATION();
     271     1405534 :     pg_locale_t mylocale = 0;
     272             :     Datum       result;
     273             : 
     274     1405534 :     if (!collid)
     275           6 :         ereport(ERROR,
     276             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     277             :                  errmsg("could not determine which collation to use for string hashing"),
     278             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     279             : 
     280     1405528 :     if (!lc_collate_is_c(collid))
     281      989978 :         mylocale = pg_newlocale_from_collation(collid);
     282             : 
     283     1405528 :     if (pg_locale_deterministic(mylocale))
     284             :     {
     285     1405354 :         result = hash_any((unsigned char *) VARDATA_ANY(key),
     286     1405354 :                           VARSIZE_ANY_EXHDR(key));
     287             :     }
     288             :     else
     289             :     {
     290             :         Size        bsize,
     291             :                     rsize;
     292             :         char       *buf;
     293         174 :         const char *keydata = VARDATA_ANY(key);
     294         174 :         size_t      keylen = VARSIZE_ANY_EXHDR(key);
     295             : 
     296             : 
     297         174 :         bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
     298         174 :         buf = palloc(bsize + 1);
     299             : 
     300         174 :         rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
     301         174 :         if (rsize != bsize)
     302           0 :             elog(ERROR, "pg_strnxfrm() returned unexpected result");
     303             : 
     304             :         /*
     305             :          * In principle, there's no reason to include the terminating NUL
     306             :          * character in the hash, but it was done before and the behavior must
     307             :          * be preserved.
     308             :          */
     309         174 :         result = hash_any((uint8_t *) buf, bsize + 1);
     310             : 
     311         174 :         pfree(buf);
     312             :     }
     313             : 
     314             :     /* Avoid leaking memory for toasted inputs */
     315     1405528 :     PG_FREE_IF_COPY(key, 0);
     316             : 
     317     1405528 :     return result;
     318             : }
     319             : 
     320             : Datum
     321        4068 : hashtextextended(PG_FUNCTION_ARGS)
     322             : {
     323        4068 :     text       *key = PG_GETARG_TEXT_PP(0);
     324        4068 :     Oid         collid = PG_GET_COLLATION();
     325        4068 :     pg_locale_t mylocale = 0;
     326             :     Datum       result;
     327             : 
     328        4068 :     if (!collid)
     329           0 :         ereport(ERROR,
     330             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     331             :                  errmsg("could not determine which collation to use for string hashing"),
     332             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     333             : 
     334        4068 :     if (!lc_collate_is_c(collid))
     335        2728 :         mylocale = pg_newlocale_from_collation(collid);
     336             : 
     337        4068 :     if (pg_locale_deterministic(mylocale))
     338             :     {
     339        4044 :         result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     340        4044 :                                    VARSIZE_ANY_EXHDR(key),
     341        4044 :                                    PG_GETARG_INT64(1));
     342             :     }
     343             :     else
     344             :     {
     345             :         Size        bsize,
     346             :                     rsize;
     347             :         char       *buf;
     348          24 :         const char *keydata = VARDATA_ANY(key);
     349          24 :         size_t      keylen = VARSIZE_ANY_EXHDR(key);
     350             : 
     351          24 :         bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
     352          24 :         buf = palloc(bsize + 1);
     353             : 
     354          24 :         rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
     355          24 :         if (rsize != bsize)
     356           0 :             elog(ERROR, "pg_strnxfrm() returned unexpected result");
     357             : 
     358             :         /*
     359             :          * In principle, there's no reason to include the terminating NUL
     360             :          * character in the hash, but it was done before and the behavior must
     361             :          * be preserved.
     362             :          */
     363          24 :         result = hash_any_extended((uint8_t *) buf, bsize + 1,
     364          24 :                                    PG_GETARG_INT64(1));
     365             : 
     366          24 :         pfree(buf);
     367             :     }
     368             : 
     369        4068 :     PG_FREE_IF_COPY(key, 0);
     370             : 
     371        4068 :     return result;
     372             : }
     373             : 
     374             : /*
     375             :  * hashvarlena() can be used for any varlena datatype in which there are
     376             :  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
     377             :  */
     378             : Datum
     379        6138 : hashvarlena(PG_FUNCTION_ARGS)
     380             : {
     381        6138 :     struct varlena *key = PG_GETARG_VARLENA_PP(0);
     382             :     Datum       result;
     383             : 
     384        6138 :     result = hash_any((unsigned char *) VARDATA_ANY(key),
     385        6138 :                       VARSIZE_ANY_EXHDR(key));
     386             : 
     387             :     /* Avoid leaking memory for toasted inputs */
     388        6138 :     PG_FREE_IF_COPY(key, 0);
     389             : 
     390        6138 :     return result;
     391             : }
     392             : 
     393             : Datum
     394           0 : hashvarlenaextended(PG_FUNCTION_ARGS)
     395             : {
     396           0 :     struct varlena *key = PG_GETARG_VARLENA_PP(0);
     397             :     Datum       result;
     398             : 
     399           0 :     result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     400           0 :                                VARSIZE_ANY_EXHDR(key),
     401           0 :                                PG_GETARG_INT64(1));
     402             : 
     403           0 :     PG_FREE_IF_COPY(key, 0);
     404             : 
     405           0 :     return result;
     406             : }

Generated by: LCOV version 1.14