LCOV - code coverage report
Current view: top level - src/backend/access/hash - hashfunc.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 101 113 89.4 %
Date: 2019-11-15 23:07:02 Functions: 23 24 95.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * hashfunc.c
       4             :  *    Support functions for hash access method.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/hash/hashfunc.c
      12             :  *
      13             :  * NOTES
      14             :  *    These functions are stored in pg_amproc.  For each operator class
      15             :  *    defined for hash indexes, they compute the hash value of the argument.
      16             :  *
      17             :  *    Additional hash functions appear in /utils/adt/ files for various
      18             :  *    specialized datatypes.
      19             :  *
      20             :  *    It is expected that every bit of a hash function's 32-bit result is
      21             :  *    as random as every other; failure to ensure this is likely to lead
      22             :  *    to poor performance of hash joins, for example.  In most cases a hash
      23             :  *    function should use hash_any() or its variant hash_uint32().
      24             :  *-------------------------------------------------------------------------
      25             :  */
      26             : 
      27             : #include "postgres.h"
      28             : 
      29             : #include "access/hash.h"
      30             : #include "catalog/pg_collation.h"
      31             : #include "utils/builtins.h"
      32             : #include "utils/hashutils.h"
      33             : #include "utils/pg_locale.h"
      34             : 
      35             : /*
      36             :  * Datatype-specific hash functions.
      37             :  *
      38             :  * These support both hash indexes and hash joins.
      39             :  *
      40             :  * NOTE: some of these are also used by catcache operations, without
      41             :  * any direct connection to hash indexes.  Also, the common hash_any
      42             :  * routine is also used by dynahash tables.
      43             :  */
      44             : 
      45             : /* Note: this is used for both "char" and boolean datatypes */
      46             : Datum
      47      213764 : hashchar(PG_FUNCTION_ARGS)
      48             : {
      49      213764 :     return hash_uint32((int32) PG_GETARG_CHAR(0));
      50             : }
      51             : 
      52             : Datum
      53          44 : hashcharextended(PG_FUNCTION_ARGS)
      54             : {
      55          44 :     return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
      56             : }
      57             : 
      58             : Datum
      59       96630 : hashint2(PG_FUNCTION_ARGS)
      60             : {
      61       96630 :     return hash_uint32((int32) PG_GETARG_INT16(0));
      62             : }
      63             : 
      64             : Datum
      65          32 : hashint2extended(PG_FUNCTION_ARGS)
      66             : {
      67          32 :     return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
      68             : }
      69             : 
      70             : Datum
      71    14010978 : hashint4(PG_FUNCTION_ARGS)
      72             : {
      73    14010978 :     return hash_uint32(PG_GETARG_INT32(0));
      74             : }
      75             : 
      76             : Datum
      77      200384 : hashint4extended(PG_FUNCTION_ARGS)
      78             : {
      79      200384 :     return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
      80             : }
      81             : 
      82             : Datum
      83      118050 : hashint8(PG_FUNCTION_ARGS)
      84             : {
      85             :     /*
      86             :      * The idea here is to produce a hash value compatible with the values
      87             :      * produced by hashint4 and hashint2 for logically equal inputs; this is
      88             :      * necessary to support cross-type hash joins across these input types.
      89             :      * Since all three types are signed, we can xor the high half of the int8
      90             :      * value if the sign is positive, or the complement of the high half when
      91             :      * the sign is negative.
      92             :      */
      93      118050 :     int64       val = PG_GETARG_INT64(0);
      94      118050 :     uint32      lohalf = (uint32) val;
      95      118050 :     uint32      hihalf = (uint32) (val >> 32);
      96             : 
      97      118050 :     lohalf ^= (val >= 0) ? hihalf : ~hihalf;
      98             : 
      99      118050 :     return hash_uint32(lohalf);
     100             : }
     101             : 
     102             : Datum
     103         248 : hashint8extended(PG_FUNCTION_ARGS)
     104             : {
     105             :     /* Same approach as hashint8 */
     106         248 :     int64       val = PG_GETARG_INT64(0);
     107         248 :     uint32      lohalf = (uint32) val;
     108         248 :     uint32      hihalf = (uint32) (val >> 32);
     109             : 
     110         248 :     lohalf ^= (val >= 0) ? hihalf : ~hihalf;
     111             : 
     112         248 :     return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
     113             : }
     114             : 
     115             : Datum
     116     7568432 : hashoid(PG_FUNCTION_ARGS)
     117             : {
     118     7568432 :     return hash_uint32((uint32) PG_GETARG_OID(0));
     119             : }
     120             : 
     121             : Datum
     122          48 : hashoidextended(PG_FUNCTION_ARGS)
     123             : {
     124          48 :     return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     125             : }
     126             : 
     127             : Datum
     128        3116 : hashenum(PG_FUNCTION_ARGS)
     129             : {
     130        3116 :     return hash_uint32((uint32) PG_GETARG_OID(0));
     131             : }
     132             : 
     133             : Datum
     134          24 : hashenumextended(PG_FUNCTION_ARGS)
     135             : {
     136          24 :     return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     137             : }
     138             : 
     139             : Datum
     140          52 : hashfloat4(PG_FUNCTION_ARGS)
     141             : {
     142          52 :     float4      key = PG_GETARG_FLOAT4(0);
     143             :     float8      key8;
     144             : 
     145             :     /*
     146             :      * On IEEE-float machines, minus zero and zero have different bit patterns
     147             :      * but should compare as equal.  We must ensure that they have the same
     148             :      * hash value, which is most reliably done this way:
     149             :      */
     150          52 :     if (key == (float4) 0)
     151           8 :         PG_RETURN_UINT32(0);
     152             : 
     153             :     /*
     154             :      * To support cross-type hashing of float8 and float4, we want to return
     155             :      * the same hash value hashfloat8 would produce for an equal float8 value.
     156             :      * So, widen the value to float8 and hash that.  (We must do this rather
     157             :      * than have hashfloat8 try to narrow its value to float4; that could fail
     158             :      * on overflow.)
     159             :      */
     160          44 :     key8 = key;
     161             : 
     162          44 :     return hash_any((unsigned char *) &key8, sizeof(key8));
     163             : }
     164             : 
     165             : Datum
     166          48 : hashfloat4extended(PG_FUNCTION_ARGS)
     167             : {
     168          48 :     float4      key = PG_GETARG_FLOAT4(0);
     169          48 :     uint64      seed = PG_GETARG_INT64(1);
     170             :     float8      key8;
     171             : 
     172             :     /* Same approach as hashfloat4 */
     173          48 :     if (key == (float4) 0)
     174           8 :         PG_RETURN_UINT64(seed);
     175          40 :     key8 = key;
     176             : 
     177          40 :     return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
     178             : }
     179             : 
     180             : Datum
     181       70284 : hashfloat8(PG_FUNCTION_ARGS)
     182             : {
     183       70284 :     float8      key = PG_GETARG_FLOAT8(0);
     184             : 
     185             :     /*
     186             :      * On IEEE-float machines, minus zero and zero have different bit patterns
     187             :      * but should compare as equal.  We must ensure that they have the same
     188             :      * hash value, which is most reliably done this way:
     189             :      */
     190       70284 :     if (key == (float8) 0)
     191         312 :         PG_RETURN_UINT32(0);
     192             : 
     193       69972 :     return hash_any((unsigned char *) &key, sizeof(key));
     194             : }
     195             : 
     196             : Datum
     197          48 : hashfloat8extended(PG_FUNCTION_ARGS)
     198             : {
     199          48 :     float8      key = PG_GETARG_FLOAT8(0);
     200          48 :     uint64      seed = PG_GETARG_INT64(1);
     201             : 
     202             :     /* Same approach as hashfloat8 */
     203          48 :     if (key == (float8) 0)
     204           8 :         PG_RETURN_UINT64(seed);
     205             : 
     206          40 :     return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
     207             : }
     208             : 
     209             : Datum
     210      313658 : hashoidvector(PG_FUNCTION_ARGS)
     211             : {
     212      313658 :     oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     213             : 
     214      313658 :     return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
     215             : }
     216             : 
     217             : Datum
     218          40 : hashoidvectorextended(PG_FUNCTION_ARGS)
     219             : {
     220          40 :     oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     221             : 
     222          40 :     return hash_any_extended((unsigned char *) key->values,
     223          40 :                              key->dim1 * sizeof(Oid),
     224          40 :                              PG_GETARG_INT64(1));
     225             : }
     226             : 
     227             : Datum
     228     1480638 : hashname(PG_FUNCTION_ARGS)
     229             : {
     230     1480638 :     char       *key = NameStr(*PG_GETARG_NAME(0));
     231             : 
     232     1480638 :     return hash_any((unsigned char *) key, strlen(key));
     233             : }
     234             : 
     235             : Datum
     236          40 : hashnameextended(PG_FUNCTION_ARGS)
     237             : {
     238          40 :     char       *key = NameStr(*PG_GETARG_NAME(0));
     239             : 
     240          40 :     return hash_any_extended((unsigned char *) key, strlen(key),
     241          40 :                              PG_GETARG_INT64(1));
     242             : }
     243             : 
     244             : Datum
     245      862932 : hashtext(PG_FUNCTION_ARGS)
     246             : {
     247      862932 :     text       *key = PG_GETARG_TEXT_PP(0);
     248      862932 :     Oid         collid = PG_GET_COLLATION();
     249      862932 :     pg_locale_t mylocale = 0;
     250             :     Datum       result;
     251             : 
     252      862932 :     if (!collid)
     253           4 :         ereport(ERROR,
     254             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     255             :                  errmsg("could not determine which collation to use for string hashing"),
     256             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     257             : 
     258      862928 :     if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
     259           0 :         mylocale = pg_newlocale_from_collation(collid);
     260             : 
     261      862928 :     if (!mylocale || mylocale->deterministic)
     262             :     {
     263     2588784 :         result = hash_any((unsigned char *) VARDATA_ANY(key),
     264     2588784 :                           VARSIZE_ANY_EXHDR(key));
     265             :     }
     266             :     else
     267             :     {
     268             : #ifdef USE_ICU
     269             :         if (mylocale->provider == COLLPROVIDER_ICU)
     270             :         {
     271             :             int32_t     ulen = -1;
     272             :             UChar      *uchar = NULL;
     273             :             Size        bsize;
     274             :             uint8_t    *buf;
     275             : 
     276             :             ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
     277             : 
     278             :             bsize = ucol_getSortKey(mylocale->info.icu.ucol,
     279             :                                     uchar, ulen, NULL, 0);
     280             :             buf = palloc(bsize);
     281             :             ucol_getSortKey(mylocale->info.icu.ucol,
     282             :                             uchar, ulen, buf, bsize);
     283             : 
     284             :             result = hash_any(buf, bsize);
     285             : 
     286             :             pfree(buf);
     287             :         }
     288             :         else
     289             : #endif
     290             :             /* shouldn't happen */
     291           0 :             elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
     292             :     }
     293             : 
     294             :     /* Avoid leaking memory for toasted inputs */
     295      862928 :     PG_FREE_IF_COPY(key, 0);
     296             : 
     297      862928 :     return result;
     298             : }
     299             : 
     300             : Datum
     301        2648 : hashtextextended(PG_FUNCTION_ARGS)
     302             : {
     303        2648 :     text       *key = PG_GETARG_TEXT_PP(0);
     304        2648 :     Oid         collid = PG_GET_COLLATION();
     305        2648 :     pg_locale_t mylocale = 0;
     306             :     Datum       result;
     307             : 
     308        2648 :     if (!collid)
     309           0 :         ereport(ERROR,
     310             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     311             :                  errmsg("could not determine which collation to use for string hashing"),
     312             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     313             : 
     314        2648 :     if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
     315           0 :         mylocale = pg_newlocale_from_collation(collid);
     316             : 
     317        2648 :     if (!mylocale || mylocale->deterministic)
     318             :     {
     319        7944 :         result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     320        7944 :                                    VARSIZE_ANY_EXHDR(key),
     321        2648 :                                    PG_GETARG_INT64(1));
     322             :     }
     323             :     else
     324             :     {
     325             : #ifdef USE_ICU
     326             :         if (mylocale->provider == COLLPROVIDER_ICU)
     327             :         {
     328             :             int32_t     ulen = -1;
     329             :             UChar      *uchar = NULL;
     330             :             Size        bsize;
     331             :             uint8_t    *buf;
     332             : 
     333             :             ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
     334             : 
     335             :             bsize = ucol_getSortKey(mylocale->info.icu.ucol,
     336             :                                     uchar, ulen, NULL, 0);
     337             :             buf = palloc(bsize);
     338             :             ucol_getSortKey(mylocale->info.icu.ucol,
     339             :                             uchar, ulen, buf, bsize);
     340             : 
     341             :             result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
     342             : 
     343             :             pfree(buf);
     344             :         }
     345             :         else
     346             : #endif
     347             :             /* shouldn't happen */
     348           0 :             elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
     349             :     }
     350             : 
     351        2648 :     PG_FREE_IF_COPY(key, 0);
     352             : 
     353        2648 :     return result;
     354             : }
     355             : 
     356             : /*
     357             :  * hashvarlena() can be used for any varlena datatype in which there are
     358             :  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
     359             :  */
     360             : Datum
     361        2624 : hashvarlena(PG_FUNCTION_ARGS)
     362             : {
     363        2624 :     struct varlena *key = PG_GETARG_VARLENA_PP(0);
     364             :     Datum       result;
     365             : 
     366        7872 :     result = hash_any((unsigned char *) VARDATA_ANY(key),
     367        7872 :                       VARSIZE_ANY_EXHDR(key));
     368             : 
     369             :     /* Avoid leaking memory for toasted inputs */
     370        2624 :     PG_FREE_IF_COPY(key, 0);
     371             : 
     372        2624 :     return result;
     373             : }
     374             : 
     375             : Datum
     376           0 : hashvarlenaextended(PG_FUNCTION_ARGS)
     377             : {
     378           0 :     struct varlena *key = PG_GETARG_VARLENA_PP(0);
     379             :     Datum       result;
     380             : 
     381           0 :     result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     382           0 :                                VARSIZE_ANY_EXHDR(key),
     383           0 :                                PG_GETARG_INT64(1));
     384             : 
     385           0 :     PG_FREE_IF_COPY(key, 0);
     386             : 
     387           0 :     return result;
     388             : }

Generated by: LCOV version 1.13