LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_icu.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 48.6 % 385 187
Test Date: 2026-05-24 06:16:20 Functions: 61.1 % 54 33
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-----------------------------------------------------------------------
       2              :  *
       3              :  * PostgreSQL locale utilities for ICU
       4              :  *
       5              :  * Portions Copyright (c) 2002-2026, PostgreSQL Global Development Group
       6              :  *
       7              :  * src/backend/utils/adt/pg_locale_icu.c
       8              :  *
       9              :  *-----------------------------------------------------------------------
      10              :  */
      11              : 
      12              : #include "postgres.h"
      13              : 
      14              : #ifdef USE_ICU
      15              : #include <unicode/ucasemap.h>
      16              : #include <unicode/ucnv.h>
      17              : #include <unicode/ucol.h>
      18              : #include <unicode/ustring.h>
      19              : 
      20              : /*
      21              :  * ucol_strcollUTF8() was introduced in ICU 50, but it is buggy before ICU 53.
      22              :  * (see
      23              :  * <https://www.postgresql.org/message-id/flat/f1438ec6-22aa-4029-9a3b-26f79d330e72%40manitou-mail.org>)
      24              :  */
      25              : #if U_ICU_VERSION_MAJOR_NUM >= 53
      26              : #define HAVE_UCOL_STRCOLLUTF8 1
      27              : #else
      28              : #undef HAVE_UCOL_STRCOLLUTF8
      29              : #endif
      30              : 
      31              : #endif
      32              : 
      33              : #include "access/htup_details.h"
      34              : #include "catalog/pg_database.h"
      35              : #include "catalog/pg_collation.h"
      36              : #include "mb/pg_wchar.h"
      37              : #include "miscadmin.h"
      38              : #include "utils/builtins.h"
      39              : #include "utils/formatting.h"
      40              : #include "utils/memutils.h"
      41              : #include "utils/pg_locale.h"
      42              : #include "utils/syscache.h"
      43              : 
      44              : /*
      45              :  * Size of stack buffer to use for string transformations, used to avoid heap
      46              :  * allocations in typical cases. This should be large enough that most strings
      47              :  * will fit, but small enough that we feel comfortable putting it on the
      48              :  * stack.
      49              :  */
      50              : #define     TEXTBUFLEN          1024
      51              : 
      52              : extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
      53              : 
      54              : #ifdef USE_ICU
      55              : 
      56              : extern UCollator *pg_ucol_open(const char *loc_str);
      57              : static UCaseMap *pg_ucasemap_open(const char *loc_str);
      58              : 
      59              : static size_t strlower_icu(char *dest, size_t destsize, const char *src,
      60              :                            size_t srclen, pg_locale_t locale);
      61              : static size_t strtitle_icu(char *dest, size_t destsize, const char *src,
      62              :                            size_t srclen, pg_locale_t locale);
      63              : static size_t strupper_icu(char *dest, size_t destsize, const char *src,
      64              :                            size_t srclen, pg_locale_t locale);
      65              : static size_t strfold_icu(char *dest, size_t destsize, const char *src,
      66              :                           size_t srclen, pg_locale_t locale);
      67              : static size_t strlower_icu_utf8(char *dest, size_t destsize, const char *src,
      68              :                                 size_t srclen, pg_locale_t locale);
      69              : static size_t strtitle_icu_utf8(char *dest, size_t destsize, const char *src,
      70              :                                 size_t srclen, pg_locale_t locale);
      71              : static size_t strupper_icu_utf8(char *dest, size_t destsize, const char *src,
      72              :                                 size_t srclen, pg_locale_t locale);
      73              : static size_t strfold_icu_utf8(char *dest, size_t destsize, const char *src,
      74              :                                size_t srclen, pg_locale_t locale);
      75              : static size_t downcase_ident_icu(char *dst, size_t dstsize, const char *src,
      76              :                                  size_t srclen, pg_locale_t locale);
      77              : static int  strncoll_icu(const char *arg1, size_t len1,
      78              :                          const char *arg2, size_t len2,
      79              :                          pg_locale_t locale);
      80              : static int  strcoll_icu(const char *arg1, const char *arg2,
      81              :                         pg_locale_t locale);
      82              : static size_t strnxfrm_icu(char *dest, size_t destsize,
      83              :                            const char *src, size_t srclen,
      84              :                            pg_locale_t locale);
      85              : static size_t strxfrm_icu(char *dest, size_t destsize, const char *src,
      86              :                           pg_locale_t locale);
      87              : extern char *get_collation_actual_version_icu(const char *collcollate);
      88              : 
      89              : typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
      90              :                                      const UChar *src, int32_t srcLength,
      91              :                                      const char *locale,
      92              :                                      UErrorCode *pErrorCode);
      93              : 
      94              : /*
      95              :  * Converter object for converting between ICU's UChar strings and C strings
      96              :  * in database encoding.  Since the database encoding doesn't change, we only
      97              :  * need one of these per session.
      98              :  */
      99              : static UConverter *icu_converter = NULL;
     100              : 
     101              : static UCollator *make_icu_collator(const char *iculocstr,
     102              :                                     const char *icurules);
     103              : static size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
     104              :                                   const char *src, size_t srclen,
     105              :                                   pg_locale_t locale);
     106              : static size_t strxfrm_prefix_icu(char *dest, size_t destsize, const char *src,
     107              :                                  pg_locale_t locale);
     108              : #ifdef HAVE_UCOL_STRCOLLUTF8
     109              : static int  strncoll_icu_utf8(const char *arg1, size_t len1,
     110              :                               const char *arg2, size_t len2,
     111              :                               pg_locale_t locale);
     112              : static int  strcoll_icu_utf8(const char *arg1,
     113              :                              const char *arg2,
     114              :                              pg_locale_t locale);
     115              : #endif
     116              : static size_t strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
     117              :                                        const char *src, size_t srclen,
     118              :                                        pg_locale_t locale);
     119              : static size_t strxfrm_prefix_icu_utf8(char *dest, size_t destsize, const char *src,
     120              :                                       pg_locale_t locale);
     121              : static void init_icu_converter(void);
     122              : static int32_t uchar_length(UConverter *converter,
     123              :                             const char *str, int32_t len);
     124              : static int32_t uchar_convert(UConverter *converter,
     125              :                              UChar *dest, int32_t destlen,
     126              :                              const char *src, int32_t srclen);
     127              : static int32_t icu_to_uchar(UChar **buff_uchar, const char *buff,
     128              :                             size_t nbytes);
     129              : static size_t icu_from_uchar(char *dest, size_t destsize,
     130              :                              const UChar *buff_uchar, int32_t len_uchar);
     131              : static void icu_set_collation_attributes(UCollator *collator, const char *loc,
     132              :                                          UErrorCode *status);
     133              : static int32_t icu_convert_case(ICU_Convert_Func func, char *dest,
     134              :                                 size_t destsize, const char *src,
     135              :                                 size_t srclen, pg_locale_t locale);
     136              : static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
     137              :                                        const UChar *src, int32_t srcLength,
     138              :                                        const char *locale,
     139              :                                        UErrorCode *pErrorCode);
     140              : static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
     141              :                                      const UChar *src, int32_t srcLength,
     142              :                                      const char *locale,
     143              :                                      UErrorCode *pErrorCode);
     144              : static int32_t foldcase_options(const char *locale);
     145              : 
     146              : /*
     147              :  * XXX: many of the functions below rely on casts directly from pg_wchar to
     148              :  * UChar32, which is correct for UTF-8 and LATIN1, but not in general.
     149              :  */
     150              : 
     151              : static pg_wchar
     152           72 : toupper_icu(pg_wchar wc, pg_locale_t locale)
     153              : {
     154           72 :     return u_toupper(wc);
     155              : }
     156              : 
     157              : static pg_wchar
     158           72 : tolower_icu(pg_wchar wc, pg_locale_t locale)
     159              : {
     160           72 :     return u_tolower(wc);
     161              : }
     162              : 
     163              : static const struct collate_methods collate_methods_icu = {
     164              :     .strncoll = strncoll_icu,
     165              :     .strcoll = strcoll_icu,
     166              :     .strnxfrm = strnxfrm_icu,
     167              :     .strxfrm = strxfrm_icu,
     168              :     .strnxfrm_prefix = strnxfrm_prefix_icu,
     169              :     .strxfrm_prefix = strxfrm_prefix_icu,
     170              :     .strxfrm_is_safe = true,
     171              : };
     172              : 
     173              : static const struct collate_methods collate_methods_icu_utf8 = {
     174              : #ifdef HAVE_UCOL_STRCOLLUTF8
     175              :     .strncoll = strncoll_icu_utf8,
     176              :     .strcoll = strcoll_icu_utf8,
     177              : #else
     178              :     .strncoll = strncoll_icu,
     179              :     .strcoll = strcoll_icu,
     180              : #endif
     181              :     .strnxfrm = strnxfrm_icu,
     182              :     .strxfrm = strxfrm_icu,
     183              :     .strnxfrm_prefix = strnxfrm_prefix_icu_utf8,
     184              :     .strxfrm_prefix = strxfrm_prefix_icu_utf8,
     185              :     .strxfrm_is_safe = true,
     186              : };
     187              : 
     188              : static bool
     189         8192 : wc_isdigit_icu(pg_wchar wc, pg_locale_t locale)
     190              : {
     191         8192 :     return u_isdigit(wc);
     192              : }
     193              : 
     194              : static bool
     195         8192 : wc_isalpha_icu(pg_wchar wc, pg_locale_t locale)
     196              : {
     197         8192 :     return u_isalpha(wc);
     198              : }
     199              : 
     200              : static bool
     201         8192 : wc_isalnum_icu(pg_wchar wc, pg_locale_t locale)
     202              : {
     203         8192 :     return u_isalnum(wc);
     204              : }
     205              : 
     206              : static bool
     207         8192 : wc_isupper_icu(pg_wchar wc, pg_locale_t locale)
     208              : {
     209         8192 :     return u_isupper(wc);
     210              : }
     211              : 
     212              : static bool
     213         8192 : wc_islower_icu(pg_wchar wc, pg_locale_t locale)
     214              : {
     215         8192 :     return u_islower(wc);
     216              : }
     217              : 
     218              : static bool
     219         8192 : wc_isgraph_icu(pg_wchar wc, pg_locale_t locale)
     220              : {
     221         8192 :     return u_isgraph(wc);
     222              : }
     223              : 
     224              : static bool
     225         8192 : wc_isprint_icu(pg_wchar wc, pg_locale_t locale)
     226              : {
     227         8192 :     return u_isprint(wc);
     228              : }
     229              : 
     230              : static bool
     231         8192 : wc_ispunct_icu(pg_wchar wc, pg_locale_t locale)
     232              : {
     233         8192 :     return u_ispunct(wc);
     234              : }
     235              : 
     236              : static bool
     237         8192 : wc_isspace_icu(pg_wchar wc, pg_locale_t locale)
     238              : {
     239         8192 :     return u_isspace(wc);
     240              : }
     241              : 
     242              : static bool
     243            0 : wc_isxdigit_icu(pg_wchar wc, pg_locale_t locale)
     244              : {
     245            0 :     return u_isxdigit(wc);
     246              : }
     247              : 
     248              : static bool
     249          105 : wc_iscased_icu(pg_wchar wc, pg_locale_t locale)
     250              : {
     251          105 :     return u_hasBinaryProperty(wc, UCHAR_CASED);
     252              : }
     253              : 
     254              : static const struct ctype_methods ctype_methods_icu = {
     255              :     .strlower = strlower_icu,
     256              :     .strtitle = strtitle_icu,
     257              :     .strupper = strupper_icu,
     258              :     .strfold = strfold_icu,
     259              :     .downcase_ident = downcase_ident_icu,
     260              :     .wc_isdigit = wc_isdigit_icu,
     261              :     .wc_isalpha = wc_isalpha_icu,
     262              :     .wc_isalnum = wc_isalnum_icu,
     263              :     .wc_isupper = wc_isupper_icu,
     264              :     .wc_islower = wc_islower_icu,
     265              :     .wc_isgraph = wc_isgraph_icu,
     266              :     .wc_isprint = wc_isprint_icu,
     267              :     .wc_ispunct = wc_ispunct_icu,
     268              :     .wc_isspace = wc_isspace_icu,
     269              :     .wc_isxdigit = wc_isxdigit_icu,
     270              :     .wc_iscased = wc_iscased_icu,
     271              :     .wc_toupper = toupper_icu,
     272              :     .wc_tolower = tolower_icu,
     273              : };
     274              : 
     275              : static const struct ctype_methods ctype_methods_icu_utf8 = {
     276              :     .strlower = strlower_icu_utf8,
     277              :     .strtitle = strtitle_icu_utf8,
     278              :     .strupper = strupper_icu_utf8,
     279              :     .strfold = strfold_icu_utf8,
     280              :     /* uses plain ASCII semantics for historical reasons */
     281              :     .downcase_ident = NULL,
     282              :     .wc_isdigit = wc_isdigit_icu,
     283              :     .wc_isalpha = wc_isalpha_icu,
     284              :     .wc_isalnum = wc_isalnum_icu,
     285              :     .wc_isupper = wc_isupper_icu,
     286              :     .wc_islower = wc_islower_icu,
     287              :     .wc_isgraph = wc_isgraph_icu,
     288              :     .wc_isprint = wc_isprint_icu,
     289              :     .wc_ispunct = wc_ispunct_icu,
     290              :     .wc_isspace = wc_isspace_icu,
     291              :     .wc_isxdigit = wc_isxdigit_icu,
     292              :     .wc_iscased = wc_iscased_icu,
     293              :     .wc_toupper = toupper_icu,
     294              :     .wc_tolower = tolower_icu,
     295              : };
     296              : 
     297              : /*
     298              :  * ICU still depends on libc for compatibility with certain historical
     299              :  * behavior for single-byte encodings.  See downcase_ident_icu().
     300              :  *
     301              :  * XXX: consider fixing by decoding the single byte into a code point, and
     302              :  * using u_tolower().
     303              :  */
     304              : static locale_t
     305            0 : make_libc_ctype_locale(const char *ctype)
     306              : {
     307              :     locale_t    loc;
     308              : 
     309              : #ifndef WIN32
     310            0 :     loc = newlocale(LC_CTYPE_MASK, ctype, NULL);
     311              : #else
     312              :     loc = _create_locale(LC_ALL, ctype);
     313              : #endif
     314            0 :     if (!loc)
     315            0 :         report_newlocale_failure(ctype);
     316              : 
     317            0 :     return loc;
     318              : }
     319              : #endif
     320              : 
     321              : pg_locale_t
     322          137 : create_pg_locale_icu(Oid collid, MemoryContext context)
     323              : {
     324              : #ifdef USE_ICU
     325              :     bool        deterministic;
     326              :     const char *iculocstr;
     327          137 :     const char *icurules = NULL;
     328              :     UCollator  *collator;
     329          137 :     locale_t    loc = (locale_t) 0;
     330              :     pg_locale_t result;
     331              : 
     332          137 :     if (collid == DEFAULT_COLLATION_OID)
     333              :     {
     334              :         HeapTuple   tp;
     335              :         Datum       datum;
     336              :         bool        isnull;
     337              : 
     338           13 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     339           13 :         if (!HeapTupleIsValid(tp))
     340            0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     341              : 
     342              :         /* default database collation is always deterministic */
     343           13 :         deterministic = true;
     344           13 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     345              :                                        Anum_pg_database_datlocale);
     346           13 :         iculocstr = TextDatumGetCString(datum);
     347           13 :         datum = SysCacheGetAttr(DATABASEOID, tp,
     348              :                                 Anum_pg_database_daticurules, &isnull);
     349           13 :         if (!isnull)
     350            0 :             icurules = TextDatumGetCString(datum);
     351              : 
     352              :         /* libc only needed for default locale and single-byte encoding */
     353           13 :         if (pg_database_encoding_max_length() == 1)
     354              :         {
     355              :             const char *ctype;
     356              : 
     357            0 :             datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     358              :                                            Anum_pg_database_datctype);
     359            0 :             ctype = TextDatumGetCString(datum);
     360              : 
     361            0 :             loc = make_libc_ctype_locale(ctype);
     362              :         }
     363              : 
     364           13 :         ReleaseSysCache(tp);
     365              :     }
     366              :     else
     367              :     {
     368              :         Form_pg_collation collform;
     369              :         HeapTuple   tp;
     370              :         Datum       datum;
     371              :         bool        isnull;
     372              : 
     373          124 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     374          124 :         if (!HeapTupleIsValid(tp))
     375            0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     376          124 :         collform = (Form_pg_collation) GETSTRUCT(tp);
     377          124 :         deterministic = collform->collisdeterministic;
     378          124 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     379              :                                        Anum_pg_collation_colllocale);
     380          124 :         iculocstr = TextDatumGetCString(datum);
     381          124 :         datum = SysCacheGetAttr(COLLOID, tp,
     382              :                                 Anum_pg_collation_collicurules, &isnull);
     383          124 :         if (!isnull)
     384           12 :             icurules = TextDatumGetCString(datum);
     385              : 
     386          124 :         ReleaseSysCache(tp);
     387              :     }
     388              : 
     389          137 :     collator = make_icu_collator(iculocstr, icurules);
     390              : 
     391          131 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     392          131 :     result->icu.locale = MemoryContextStrdup(context, iculocstr);
     393          131 :     result->icu.ucol = collator;
     394          131 :     result->icu.lt = loc;
     395          131 :     result->deterministic = deterministic;
     396          131 :     result->collate_is_c = false;
     397          131 :     result->ctype_is_c = false;
     398          131 :     if (GetDatabaseEncoding() == PG_UTF8)
     399              :     {
     400          131 :         result->icu.ucasemap = pg_ucasemap_open(iculocstr);
     401          131 :         result->collate = &collate_methods_icu_utf8;
     402          131 :         result->ctype = &ctype_methods_icu_utf8;
     403              :     }
     404              :     else
     405              :     {
     406            0 :         result->collate = &collate_methods_icu;
     407            0 :         result->ctype = &ctype_methods_icu;
     408              :     }
     409              : 
     410          131 :     return result;
     411              : #else
     412              :     /* could get here if a collation was created by a build with ICU */
     413              :     ereport(ERROR,
     414              :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     415              :              errmsg("ICU is not supported in this build")));
     416              : 
     417              :     return NULL;
     418              : #endif
     419              : }
     420              : 
     421              : #ifdef USE_ICU
     422              : 
     423              : /*
     424              :  * Check locale string and fix it if necessary. Returns a new palloc'd string.
     425              :  *
     426              :  * In ICU versions 54 and earlier, "und" is not a recognized spelling of the
     427              :  * root locale. If the first component of the locale is "und", replace with
     428              :  * "root" before opening.
     429              :  */
     430              : static char *
     431        48512 : fix_icu_locale_str(const char *loc_str)
     432              : {
     433              :     /*
     434              :      * Must never open default collator, because it depends on the environment
     435              :      * and may change at any time. Should not happen, but check here to catch
     436              :      * bugs that might be hard to catch otherwise.
     437              :      *
     438              :      * NB: the default collator is not the same as the collator for the root
     439              :      * locale. The root locale may be specified as the empty string, "und", or
     440              :      * "root". The default collator is opened by passing NULL to ucol_open().
     441              :      */
     442        48512 :     if (loc_str == NULL)
     443            0 :         elog(ERROR, "opening default collator is not supported");
     444              : 
     445              :     if (U_ICU_VERSION_MAJOR_NUM < 55)
     446              :     {
     447              :         char        lang[ULOC_LANG_CAPACITY];
     448              :         UErrorCode  status = U_ZERO_ERROR;
     449              : 
     450              :         uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
     451              :         if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
     452              :         {
     453              :             ereport(ERROR,
     454              :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     455              :                      errmsg("could not get language from locale \"%s\": %s",
     456              :                             loc_str, u_errorName(status))));
     457              :         }
     458              : 
     459              :         if (strcmp(lang, "und") == 0)
     460              :         {
     461              :             const char *remainder = loc_str + strlen("und");
     462              :             char       *fixed_str;
     463              : 
     464              :             fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
     465              :             strcpy(fixed_str, "root");
     466              :             strcat(fixed_str, remainder);
     467              : 
     468              :             return fixed_str;
     469              :         }
     470              :     }
     471              : 
     472        48512 :     return pstrdup(loc_str);
     473              : }
     474              : 
     475              : /*
     476              :  * Wrapper around ucol_open() to handle API differences for older ICU
     477              :  * versions.
     478              :  *
     479              :  * Ensure that no path leaks a UCollator.
     480              :  */
     481              : UCollator *
     482        48381 : pg_ucol_open(const char *loc_str)
     483              : {
     484              :     UCollator  *collator;
     485              :     UErrorCode  status;
     486              :     char       *fixed_str;
     487              : 
     488        48381 :     fixed_str = fix_icu_locale_str(loc_str);
     489              : 
     490        48381 :     status = U_ZERO_ERROR;
     491        48381 :     collator = ucol_open(fixed_str, &status);
     492        48381 :     if (U_FAILURE(status))
     493            7 :         ereport(ERROR,
     494              :         /* use original string for error report */
     495              :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     496              :                  errmsg("could not open collator for locale \"%s\": %s",
     497              :                         loc_str, u_errorName(status))));
     498              : 
     499              :     if (U_ICU_VERSION_MAJOR_NUM < 54)
     500              :     {
     501              :         status = U_ZERO_ERROR;
     502              :         icu_set_collation_attributes(collator, fixed_str, &status);
     503              : 
     504              :         /*
     505              :          * Pretend the error came from ucol_open(), for consistent error
     506              :          * message across ICU versions.
     507              :          */
     508              :         if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
     509              :         {
     510              :             ucol_close(collator);
     511              :             ereport(ERROR,
     512              :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     513              :                      errmsg("could not open collator for locale \"%s\": %s",
     514              :                             loc_str, u_errorName(status))));
     515              :         }
     516              :     }
     517              : 
     518        48374 :     pfree(fixed_str);
     519              : 
     520        48374 :     return collator;
     521              : }
     522              : 
     523              : /*
     524              :  * Wrapper around ucasemap_open() to handle API differences for older ICU
     525              :  * versions.
     526              :  *
     527              :  * Additionally makes sure we get the right options for case folding.
     528              :  */
     529              : static UCaseMap *
     530          131 : pg_ucasemap_open(const char *loc_str)
     531              : {
     532          131 :     UErrorCode  status = U_ZERO_ERROR;
     533              :     UCaseMap   *casemap;
     534              :     char       *fixed_str;
     535              : 
     536          131 :     fixed_str = fix_icu_locale_str(loc_str);
     537              : 
     538          131 :     casemap = ucasemap_open(fixed_str, foldcase_options(fixed_str), &status);
     539          131 :     if (U_FAILURE(status))
     540              :         /* use original string for error report */
     541            0 :         ereport(ERROR,
     542              :                 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     543              :                 errmsg("could not open casemap for locale \"%s\": %s",
     544              :                        loc_str, u_errorName(status)));
     545              : 
     546          131 :     pfree(fixed_str);
     547              : 
     548          131 :     return casemap;
     549              : }
     550              : 
     551              : /*
     552              :  * Create a UCollator with the given locale string and rules.
     553              :  *
     554              :  * Ensure that no path leaks a UCollator.
     555              :  */
     556              : static UCollator *
     557          137 : make_icu_collator(const char *iculocstr, const char *icurules)
     558              : {
     559          137 :     if (!icurules)
     560              :     {
     561              :         /* simple case without rules */
     562          125 :         return pg_ucol_open(iculocstr);
     563              :     }
     564              :     else
     565              :     {
     566              :         UCollator  *collator_std_rules;
     567              :         UCollator  *collator_all_rules;
     568              :         const UChar *std_rules;
     569              :         UChar      *my_rules;
     570              :         UChar      *all_rules;
     571              :         int32_t     length;
     572              :         int32_t     total;
     573              :         UErrorCode  status;
     574              : 
     575              :         /*
     576              :          * If rules are specified, we extract the rules of the standard
     577              :          * collation, add our own rules, and make a new collator with the
     578              :          * combined rules.
     579              :          */
     580           12 :         icu_to_uchar(&my_rules, icurules, strlen(icurules));
     581              : 
     582           12 :         collator_std_rules = pg_ucol_open(iculocstr);
     583              : 
     584           12 :         std_rules = ucol_getRules(collator_std_rules, &length);
     585              : 
     586           12 :         total = u_strlen(std_rules) + u_strlen(my_rules) + 1;
     587              : 
     588              :         /* avoid leaking collator on OOM */
     589           12 :         all_rules = palloc_array_extended(UChar, total, MCXT_ALLOC_NO_OOM);
     590           12 :         if (!all_rules)
     591              :         {
     592            0 :             ucol_close(collator_std_rules);
     593            0 :             ereport(ERROR,
     594              :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     595              :                      errmsg("out of memory")));
     596              :         }
     597              : 
     598           12 :         u_strcpy(all_rules, std_rules);
     599           12 :         u_strcat(all_rules, my_rules);
     600              : 
     601           12 :         ucol_close(collator_std_rules);
     602              : 
     603           12 :         status = U_ZERO_ERROR;
     604           12 :         collator_all_rules = ucol_openRules(all_rules, u_strlen(all_rules),
     605              :                                             UCOL_DEFAULT, UCOL_DEFAULT,
     606              :                                             NULL, &status);
     607           12 :         if (U_FAILURE(status))
     608              :         {
     609            4 :             ereport(ERROR,
     610              :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     611              :                      errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
     612              :                             iculocstr, icurules, u_errorName(status))));
     613              :         }
     614              : 
     615            8 :         pfree(my_rules);
     616            8 :         pfree(all_rules);
     617            8 :         return collator_all_rules;
     618              :     }
     619              : }
     620              : 
     621              : static size_t
     622            0 : strlower_icu(char *dest, size_t destsize, const char *src, size_t srclen,
     623              :              pg_locale_t locale)
     624              : {
     625            0 :     return icu_convert_case(u_strToLower, dest, destsize, src, srclen, locale);
     626              : }
     627              : 
     628              : static size_t
     629            0 : strtitle_icu(char *dest, size_t destsize, const char *src, size_t srclen,
     630              :              pg_locale_t locale)
     631              : {
     632            0 :     return icu_convert_case(u_strToTitle_default_BI, dest, destsize, src, srclen, locale);
     633              : }
     634              : 
     635              : static size_t
     636            0 : strupper_icu(char *dest, size_t destsize, const char *src, size_t srclen,
     637              :              pg_locale_t locale)
     638              : {
     639            0 :     return icu_convert_case(u_strToUpper, dest, destsize, src, srclen, locale);
     640              : }
     641              : 
     642              : static size_t
     643            0 : strfold_icu(char *dest, size_t destsize, const char *src, size_t srclen,
     644              :             pg_locale_t locale)
     645              : {
     646            0 :     return icu_convert_case(u_strFoldCase_default, dest, destsize, src, srclen, locale);
     647              : }
     648              : 
     649              : static size_t
     650          364 : strlower_icu_utf8(char *dest, size_t destsize, const char *src, size_t srclen,
     651              :                   pg_locale_t locale)
     652              : {
     653          364 :     UErrorCode  status = U_ZERO_ERROR;
     654              :     int32_t     needed;
     655              : 
     656          364 :     needed = ucasemap_utf8ToLower(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
     657          364 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     658            0 :         ereport(ERROR,
     659              :                 errmsg("case conversion failed: %s", u_errorName(status)));
     660          364 :     return needed;
     661              : }
     662              : 
     663              : static size_t
     664           20 : strtitle_icu_utf8(char *dest, size_t destsize, const char *src, size_t srclen,
     665              :                   pg_locale_t locale)
     666              : {
     667           20 :     UErrorCode  status = U_ZERO_ERROR;
     668              :     int32_t     needed;
     669              : 
     670           20 :     needed = ucasemap_utf8ToTitle(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
     671           20 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     672            0 :         ereport(ERROR,
     673              :                 errmsg("case conversion failed: %s", u_errorName(status)));
     674           20 :     return needed;
     675              : }
     676              : 
     677              : static size_t
     678           76 : strupper_icu_utf8(char *dest, size_t destsize, const char *src, size_t srclen,
     679              :                   pg_locale_t locale)
     680              : {
     681           76 :     UErrorCode  status = U_ZERO_ERROR;
     682              :     int32_t     needed;
     683              : 
     684           76 :     needed = ucasemap_utf8ToUpper(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
     685           76 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     686            0 :         ereport(ERROR,
     687              :                 errmsg("case conversion failed: %s", u_errorName(status)));
     688           76 :     return needed;
     689              : }
     690              : 
     691              : static size_t
     692           10 : strfold_icu_utf8(char *dest, size_t destsize, const char *src, size_t srclen,
     693              :                  pg_locale_t locale)
     694              : {
     695           10 :     UErrorCode  status = U_ZERO_ERROR;
     696              :     int32_t     needed;
     697              : 
     698           10 :     needed = ucasemap_utf8FoldCase(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
     699           10 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     700            0 :         ereport(ERROR,
     701              :                 errmsg("case conversion failed: %s", u_errorName(status)));
     702           10 :     return needed;
     703              : }
     704              : 
     705              : /*
     706              :  * For historical compatibility, behavior is not multibyte-aware.
     707              :  *
     708              :  * NB: uses libc tolower() for single-byte encodings (also for historical
     709              :  * compatibility), and therefore relies on the global LC_CTYPE setting.
     710              :  */
     711              : static size_t
     712            0 : downcase_ident_icu(char *dst, size_t dstsize, const char *src,
     713              :                    size_t srclen, pg_locale_t locale)
     714              : {
     715              :     int         i;
     716              :     bool        libc_lower;
     717            0 :     locale_t    lt = locale->icu.lt;
     718              : 
     719            0 :     libc_lower = lt && (pg_database_encoding_max_length() == 1);
     720              : 
     721            0 :     for (i = 0; i < srclen && i < dstsize; i++)
     722              :     {
     723            0 :         unsigned char ch = (unsigned char) src[i];
     724              : 
     725            0 :         if (ch >= 'A' && ch <= 'Z')
     726            0 :             ch = pg_ascii_tolower(ch);
     727            0 :         else if (libc_lower && IS_HIGHBIT_SET(ch) && isupper_l(ch, lt))
     728            0 :             ch = tolower_l(ch, lt);
     729            0 :         dst[i] = (char) ch;
     730              :     }
     731              : 
     732            0 :     if (i < dstsize)
     733            0 :         dst[i] = '\0';
     734              : 
     735            0 :     return srclen;
     736              : }
     737              : 
     738              : /*
     739              :  * strncoll_icu_utf8
     740              :  *
     741              :  * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
     742              :  * database encoding.
     743              :  */
     744              : #ifdef HAVE_UCOL_STRCOLLUTF8
     745              : int
     746        17454 : strncoll_icu_utf8(const char *arg1, size_t len1, const char *arg2, size_t len2,
     747              :                   pg_locale_t locale)
     748              : {
     749              :     int         result;
     750              :     UErrorCode  status;
     751              : 
     752              :     Assert(GetDatabaseEncoding() == PG_UTF8);
     753              : 
     754        17454 :     status = U_ZERO_ERROR;
     755        17454 :     result = ucol_strcollUTF8(locale->icu.ucol,
     756              :                               arg1, len1,
     757              :                               arg2, len2,
     758              :                               &status);
     759        17454 :     if (U_FAILURE(status))
     760            0 :         ereport(ERROR,
     761              :                 (errmsg("collation failed: %s", u_errorName(status))));
     762              : 
     763        17454 :     return result;
     764              : }
     765              : 
     766              : int
     767         1180 : strcoll_icu_utf8(const char *arg1, const char *arg2, pg_locale_t locale)
     768              : {
     769              :     int         result;
     770              :     UErrorCode  status;
     771              : 
     772              :     Assert(GetDatabaseEncoding() == PG_UTF8);
     773              : 
     774         1180 :     status = U_ZERO_ERROR;
     775         1180 :     result = ucol_strcollUTF8(locale->icu.ucol,
     776              :                               arg1, -1,
     777              :                               arg2, -1,
     778              :                               &status);
     779         1180 :     if (U_FAILURE(status))
     780            0 :         ereport(ERROR,
     781              :                 (errmsg("collation failed: %s", u_errorName(status))));
     782              : 
     783         1180 :     return result;
     784              : }
     785              : #endif
     786              : 
     787              : static size_t
     788         7928 : strnxfrm_icu_internal(char *dest, size_t destsize, const char *src, ssize_t srclen,
     789              :                       pg_locale_t locale)
     790              : {
     791              :     UChar       sbuf[TEXTBUFLEN / sizeof(UChar)];
     792         7928 :     UChar      *uchar = sbuf;
     793              :     int32_t     ulen;
     794              :     Size        result_bsize;
     795              : 
     796         7928 :     init_icu_converter();
     797              : 
     798         7928 :     ulen = uchar_length(icu_converter, src, srclen);
     799              : 
     800         7928 :     if (ulen >= lengthof(sbuf))
     801            0 :         uchar = palloc_array(UChar, ulen + 1);
     802              : 
     803         7928 :     ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
     804              : 
     805         7928 :     result_bsize = ucol_getSortKey(locale->icu.ucol,
     806              :                                    uchar, ulen,
     807              :                                    (uint8_t *) dest, destsize);
     808              : 
     809              :     /*
     810              :      * ucol_getSortKey() counts the nul-terminator in the result length, but
     811              :      * this function should not.
     812              :      */
     813              :     Assert(result_bsize > 0);
     814         7928 :     result_bsize--;
     815              : 
     816         7928 :     if (uchar != sbuf)
     817            0 :         pfree(uchar);
     818              : 
     819              :     /* if dest is defined, it should be nul-terminated */
     820              :     Assert(result_bsize >= destsize || dest[result_bsize] == '\0');
     821              : 
     822         7928 :     return result_bsize;
     823              : }
     824              : 
     825              : static size_t
     826         7928 : strnxfrm_icu(char *dest, size_t destsize, const char *src, size_t srclen,
     827              :              pg_locale_t locale)
     828              : {
     829         7928 :     return strnxfrm_icu_internal(dest, destsize, src, srclen, locale);
     830              : }
     831              : 
     832              : static size_t
     833            0 : strxfrm_icu(char *dest, size_t destsize, const char *src,
     834              :             pg_locale_t locale)
     835              : {
     836            0 :     return strnxfrm_icu_internal(dest, destsize, src, -1, locale);
     837              : }
     838              : 
     839              : static size_t
     840         1306 : strnxfrm_prefix_icu_utf8_internal(char *dest, size_t destsize,
     841              :                                   const char *src, ssize_t srclen,
     842              :                                   pg_locale_t locale)
     843              : {
     844              :     size_t      result;
     845              :     UCharIterator iter;
     846              :     uint32_t    state[2];
     847              :     UErrorCode  status;
     848              : 
     849              :     Assert(GetDatabaseEncoding() == PG_UTF8);
     850              : 
     851         1306 :     uiter_setUTF8(&iter, src, srclen);
     852         1306 :     state[0] = state[1] = 0;    /* won't need that again */
     853         1306 :     status = U_ZERO_ERROR;
     854         1306 :     result = ucol_nextSortKeyPart(locale->icu.ucol,
     855              :                                   &iter,
     856              :                                   state,
     857              :                                   (uint8_t *) dest,
     858              :                                   destsize,
     859              :                                   &status);
     860         1306 :     if (U_FAILURE(status))
     861            0 :         ereport(ERROR,
     862              :                 (errmsg("sort key generation failed: %s",
     863              :                         u_errorName(status))));
     864              : 
     865         1306 :     return result;
     866              : }
     867              : 
     868              : static size_t
     869            0 : strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
     870              :                          const char *src, size_t srclen,
     871              :                          pg_locale_t locale)
     872              : {
     873            0 :     return strnxfrm_prefix_icu_utf8_internal(dest, destsize, src, srclen, locale);
     874              : }
     875              : 
     876              : static size_t
     877         1306 : strxfrm_prefix_icu_utf8(char *dest, size_t destsize, const char *src,
     878              :                         pg_locale_t locale)
     879              : {
     880         1306 :     return strnxfrm_prefix_icu_utf8_internal(dest, destsize, src, -1, locale);
     881              : }
     882              : 
     883              : char *
     884        48147 : get_collation_actual_version_icu(const char *collcollate)
     885              : {
     886              :     UCollator  *collator;
     887              :     UVersionInfo versioninfo;
     888              :     char        buf[U_MAX_VERSION_STRING_LENGTH];
     889              : 
     890        48147 :     collator = pg_ucol_open(collcollate);
     891              : 
     892        48147 :     ucol_getVersion(collator, versioninfo);
     893        48147 :     ucol_close(collator);
     894              : 
     895        48147 :     u_versionToString(versioninfo, buf);
     896        48147 :     return pstrdup(buf);
     897              : }
     898              : 
     899              : /*
     900              :  * Convert a string in the database encoding into a string of UChars.
     901              :  *
     902              :  * The source string at buff is of length nbytes
     903              :  * (it needn't be nul-terminated)
     904              :  *
     905              :  * *buff_uchar receives a pointer to the palloc'd result string, and
     906              :  * the function's result is the number of UChars generated.
     907              :  *
     908              :  * The result string is nul-terminated, though most callers rely on the
     909              :  * result length instead.
     910              :  */
     911              : static int32_t
     912           12 : icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
     913              : {
     914              :     int32_t     len_uchar;
     915              : 
     916           12 :     init_icu_converter();
     917              : 
     918           12 :     len_uchar = uchar_length(icu_converter, buff, nbytes);
     919              : 
     920           12 :     *buff_uchar = palloc_array(UChar, len_uchar + 1);
     921           12 :     len_uchar = uchar_convert(icu_converter,
     922              :                               *buff_uchar, len_uchar + 1, buff, nbytes);
     923              : 
     924           12 :     return len_uchar;
     925              : }
     926              : 
     927              : /*
     928              :  * Convert a string of UChars into the database encoding.
     929              :  *
     930              :  * The source string at buff_uchar is of length len_uchar
     931              :  * (it needn't be nul-terminated)
     932              :  *
     933              :  * *result receives a pointer to the palloc'd result string, and the
     934              :  * function's result is the number of bytes generated (not counting nul).
     935              :  *
     936              :  * The result string is nul-terminated.
     937              :  */
     938              : static size_t
     939            0 : icu_from_uchar(char *dest, size_t destsize, const UChar *buff_uchar, int32_t len_uchar)
     940              : {
     941              :     UErrorCode  status;
     942              :     int32_t     len_result;
     943              : 
     944            0 :     init_icu_converter();
     945              : 
     946            0 :     status = U_ZERO_ERROR;
     947            0 :     len_result = ucnv_fromUChars(icu_converter, NULL, 0,
     948              :                                  buff_uchar, len_uchar, &status);
     949            0 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     950            0 :         ereport(ERROR,
     951              :                 (errmsg("%s failed: %s", "ucnv_fromUChars",
     952              :                         u_errorName(status))));
     953              : 
     954            0 :     if (len_result + 1 > destsize)
     955            0 :         return len_result;
     956              : 
     957            0 :     status = U_ZERO_ERROR;
     958            0 :     len_result = ucnv_fromUChars(icu_converter, dest, len_result + 1,
     959              :                                  buff_uchar, len_uchar, &status);
     960            0 :     if (U_FAILURE(status) ||
     961            0 :         status == U_STRING_NOT_TERMINATED_WARNING)
     962            0 :         ereport(ERROR,
     963              :                 (errmsg("%s failed: %s", "ucnv_fromUChars",
     964              :                         u_errorName(status))));
     965              : 
     966            0 :     return len_result;
     967              : }
     968              : 
     969              : static int32_t
     970            0 : convert_case_uchar(ICU_Convert_Func func, pg_locale_t mylocale,
     971              :                    UChar **buff_dest, UChar *buff_source, int32_t len_source)
     972              : {
     973              :     UErrorCode  status;
     974              :     int32_t     len_dest;
     975              : 
     976            0 :     len_dest = len_source;      /* try first with same length */
     977            0 :     *buff_dest = palloc_array(UChar, len_dest);
     978            0 :     status = U_ZERO_ERROR;
     979            0 :     len_dest = func(*buff_dest, len_dest, buff_source, len_source,
     980              :                     mylocale->icu.locale, &status);
     981            0 :     if (status == U_BUFFER_OVERFLOW_ERROR)
     982              :     {
     983              :         /* try again with adjusted length */
     984            0 :         pfree(*buff_dest);
     985            0 :         *buff_dest = palloc_array(UChar, len_dest);
     986            0 :         status = U_ZERO_ERROR;
     987            0 :         len_dest = func(*buff_dest, len_dest, buff_source, len_source,
     988              :                         mylocale->icu.locale, &status);
     989              :     }
     990            0 :     if (U_FAILURE(status))
     991            0 :         ereport(ERROR,
     992              :                 (errmsg("case conversion failed: %s", u_errorName(status))));
     993            0 :     return len_dest;
     994              : }
     995              : 
     996              : static int32_t
     997            0 : icu_convert_case(ICU_Convert_Func func, char *dest, size_t destsize,
     998              :                  const char *src, size_t srclen, pg_locale_t locale)
     999              : {
    1000              :     int32_t     len_uchar;
    1001              :     int32_t     len_conv;
    1002              :     UChar      *buff_uchar;
    1003              :     UChar      *buff_conv;
    1004              :     size_t      result_len;
    1005              : 
    1006            0 :     len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
    1007            0 :     len_conv = convert_case_uchar(func, locale, &buff_conv,
    1008              :                                   buff_uchar, len_uchar);
    1009            0 :     result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
    1010            0 :     pfree(buff_uchar);
    1011            0 :     pfree(buff_conv);
    1012              : 
    1013            0 :     return result_len;
    1014              : }
    1015              : 
    1016              : static int32_t
    1017            0 : u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
    1018              :                         const UChar *src, int32_t srcLength,
    1019              :                         const char *locale,
    1020              :                         UErrorCode *pErrorCode)
    1021              : {
    1022            0 :     return u_strToTitle(dest, destCapacity, src, srcLength,
    1023              :                         NULL, locale, pErrorCode);
    1024              : }
    1025              : 
    1026              : static int32_t
    1027            0 : u_strFoldCase_default(UChar *dest, int32_t destCapacity,
    1028              :                       const UChar *src, int32_t srcLength,
    1029              :                       const char *locale,
    1030              :                       UErrorCode *pErrorCode)
    1031              : {
    1032            0 :     return u_strFoldCase(dest, destCapacity, src, srcLength,
    1033            0 :                          foldcase_options(locale), pErrorCode);
    1034              : }
    1035              : 
    1036              : /*
    1037              :  * Return the correct u_strFoldCase() options for the given locale.
    1038              :  *
    1039              :  * Unlike the ICU APIs for lowercasing, titlecasing, and uppercasing, case
    1040              :  * folding does not accept a locale. Instead it just supports a single option
    1041              :  * relevant to Turkic languages 'az' and 'tr'; check for those languages.
    1042              :  */
    1043              : static int32_t
    1044          131 : foldcase_options(const char *locale)
    1045              : {
    1046          131 :     uint32      options = U_FOLD_CASE_DEFAULT;
    1047              :     char        lang[ULOC_LANG_CAPACITY];
    1048          131 :     UErrorCode  status = U_ZERO_ERROR;
    1049              : 
    1050          131 :     uloc_getLanguage(locale, lang, ULOC_LANG_CAPACITY, &status);
    1051          131 :     if (U_SUCCESS(status) && status != U_STRING_NOT_TERMINATED_WARNING)
    1052              :     {
    1053              :         /*
    1054              :          * The option name is confusing, but it causes u_strFoldCase to use
    1055              :          * the 'T' mappings, which are ignored for U_FOLD_CASE_DEFAULT.
    1056              :          */
    1057          131 :         if (strcmp(lang, "tr") == 0 || strcmp(lang, "az") == 0)
    1058            4 :             options = U_FOLD_CASE_EXCLUDE_SPECIAL_I;
    1059              :     }
    1060              : 
    1061          131 :     return options;
    1062              : }
    1063              : 
    1064              : /*
    1065              :  * strncoll_icu
    1066              :  *
    1067              :  * Convert the arguments from the database encoding to UChar strings, then
    1068              :  * call ucol_strcoll().
    1069              :  *
    1070              :  * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
    1071              :  * caller should call that instead.
    1072              :  */
    1073              : static int
    1074            0 : strncoll_icu_internal(const char *arg1, ssize_t len1,
    1075              :                       const char *arg2, ssize_t len2,
    1076              :                       pg_locale_t locale)
    1077              : {
    1078              :     UChar       sbuf[TEXTBUFLEN / sizeof(UChar)];
    1079            0 :     UChar      *buf = sbuf;
    1080              :     int32_t     ulen1;
    1081              :     int32_t     ulen2;
    1082              :     size_t      bufsize;
    1083              :     UChar      *uchar1,
    1084              :                *uchar2;
    1085              :     int         result;
    1086              : 
    1087              :     /* if encoding is UTF8, use more efficient strncoll_icu_utf8 */
    1088              : #ifdef HAVE_UCOL_STRCOLLUTF8
    1089              :     Assert(GetDatabaseEncoding() != PG_UTF8);
    1090              : #endif
    1091              : 
    1092            0 :     init_icu_converter();
    1093              : 
    1094            0 :     ulen1 = uchar_length(icu_converter, arg1, len1);
    1095            0 :     ulen2 = uchar_length(icu_converter, arg2, len2);
    1096              : 
    1097              :     /* ulen1+1 or ulen2+1 doesn't risk overflow, but summing them might */
    1098            0 :     bufsize = add_size(ulen1 + 1, ulen2 + 1);
    1099            0 :     if (bufsize > lengthof(sbuf))
    1100            0 :         buf = palloc_array(UChar, bufsize);
    1101              : 
    1102            0 :     uchar1 = buf;
    1103            0 :     uchar2 = buf + ulen1 + 1;
    1104              : 
    1105            0 :     ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
    1106            0 :     ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
    1107              : 
    1108            0 :     result = ucol_strcoll(locale->icu.ucol,
    1109              :                           uchar1, ulen1,
    1110              :                           uchar2, ulen2);
    1111              : 
    1112            0 :     if (buf != sbuf)
    1113            0 :         pfree(buf);
    1114              : 
    1115            0 :     return result;
    1116              : }
    1117              : 
    1118              : static int
    1119            0 : strncoll_icu(const char *arg1, size_t len1, const char *arg2, size_t len2,
    1120              :              pg_locale_t locale)
    1121              : {
    1122            0 :     return strncoll_icu_internal(arg1, len1, arg2, len2, locale);
    1123              : }
    1124              : 
    1125              : static int
    1126            0 : strcoll_icu(const char *arg1, const char *arg2, pg_locale_t locale)
    1127              : {
    1128            0 :     return strncoll_icu_internal(arg1, -1, arg2, -1, locale);
    1129              : }
    1130              : 
    1131              : static size_t
    1132            0 : strnxfrm_prefix_icu_internal(char *dest, size_t destsize,
    1133              :                              const char *src, ssize_t srclen,
    1134              :                              pg_locale_t locale)
    1135              : {
    1136              :     UChar       sbuf[TEXTBUFLEN / sizeof(UChar)];
    1137            0 :     UChar      *uchar = sbuf;
    1138              :     UCharIterator iter;
    1139              :     uint32_t    state[2];
    1140              :     UErrorCode  status;
    1141              :     int32_t     ulen;
    1142              :     Size        result_bsize;
    1143              : 
    1144              :     /* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */
    1145              :     Assert(GetDatabaseEncoding() != PG_UTF8);
    1146              : 
    1147            0 :     init_icu_converter();
    1148              : 
    1149            0 :     ulen = uchar_length(icu_converter, src, srclen);
    1150              : 
    1151            0 :     if (ulen >= lengthof(sbuf))
    1152            0 :         uchar = palloc_array(UChar, ulen + 1);
    1153              : 
    1154            0 :     ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
    1155              : 
    1156            0 :     uiter_setString(&iter, uchar, ulen);
    1157            0 :     state[0] = state[1] = 0;    /* won't need that again */
    1158            0 :     status = U_ZERO_ERROR;
    1159            0 :     result_bsize = ucol_nextSortKeyPart(locale->icu.ucol,
    1160              :                                         &iter,
    1161              :                                         state,
    1162              :                                         (uint8_t *) dest,
    1163              :                                         destsize,
    1164              :                                         &status);
    1165            0 :     if (U_FAILURE(status))
    1166            0 :         ereport(ERROR,
    1167              :                 (errmsg("sort key generation failed: %s",
    1168              :                         u_errorName(status))));
    1169              : 
    1170            0 :     if (uchar != sbuf)
    1171            0 :         pfree(uchar);
    1172              : 
    1173            0 :     return result_bsize;
    1174              : }
    1175              : 
    1176              : static size_t
    1177            0 : strnxfrm_prefix_icu(char *dest, size_t destsize, const char *src, size_t srclen,
    1178              :                     pg_locale_t locale)
    1179              : {
    1180            0 :     return strnxfrm_prefix_icu_internal(dest, destsize, src, srclen, locale);
    1181              : }
    1182              : 
    1183              : static size_t
    1184            0 : strxfrm_prefix_icu(char *dest, size_t destsize, const char *src,
    1185              :                    pg_locale_t locale)
    1186              : {
    1187            0 :     return strnxfrm_prefix_icu_internal(dest, destsize, src, -1, locale);
    1188              : }
    1189              : 
    1190              : static void
    1191         7940 : init_icu_converter(void)
    1192              : {
    1193              :     const char *icu_encoding_name;
    1194              :     UErrorCode  status;
    1195              :     UConverter *conv;
    1196              : 
    1197         7940 :     if (icu_converter)
    1198         7936 :         return;                 /* already done */
    1199              : 
    1200            4 :     icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
    1201            4 :     if (!icu_encoding_name)
    1202            0 :         ereport(ERROR,
    1203              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1204              :                  errmsg("encoding \"%s\" not supported by ICU",
    1205              :                         pg_encoding_to_char(GetDatabaseEncoding()))));
    1206              : 
    1207            4 :     status = U_ZERO_ERROR;
    1208            4 :     conv = ucnv_open(icu_encoding_name, &status);
    1209            4 :     if (U_FAILURE(status))
    1210            0 :         ereport(ERROR,
    1211              :                 (errmsg("could not open ICU converter for encoding \"%s\": %s",
    1212              :                         icu_encoding_name, u_errorName(status))));
    1213              : 
    1214            4 :     icu_converter = conv;
    1215              : }
    1216              : 
    1217              : /*
    1218              :  * Find length, in UChars, of given string if converted to UChar string.
    1219              :  *
    1220              :  * A length of -1 indicates that the input string is NUL-terminated.
    1221              :  *
    1222              :  * Note: given the assumption that the input string fits in MaxAllocSize,
    1223              :  * the result cannot overflow int32_t.  But callers must be careful about
    1224              :  * multiplying the result by sizeof(UChar).
    1225              :  */
    1226              : static int32_t
    1227         7940 : uchar_length(UConverter *converter, const char *str, int32_t len)
    1228              : {
    1229         7940 :     UErrorCode  status = U_ZERO_ERROR;
    1230              :     int32_t     ulen;
    1231              : 
    1232         7940 :     ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
    1233         7940 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
    1234            0 :         ereport(ERROR,
    1235              :                 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
    1236         7940 :     return ulen;
    1237              : }
    1238              : 
    1239              : /*
    1240              :  * Convert the given source string into a UChar string, stored in dest, and
    1241              :  * return the length (in UChars).
    1242              :  *
    1243              :  * A srclen of -1 indicates that the input string is NUL-terminated.
    1244              :  */
    1245              : static int32_t
    1246         7940 : uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
    1247              :               const char *src, int32_t srclen)
    1248              : {
    1249         7940 :     UErrorCode  status = U_ZERO_ERROR;
    1250              :     int32_t     ulen;
    1251              : 
    1252         7940 :     ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
    1253         7940 :     if (U_FAILURE(status))
    1254            0 :         ereport(ERROR,
    1255              :                 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
    1256         7940 :     return ulen;
    1257              : }
    1258              : 
    1259              : /*
    1260              :  * Parse collation attributes from the given locale string and apply them to
    1261              :  * the open collator.
    1262              :  *
    1263              :  * First, the locale string is canonicalized to an ICU format locale ID such
    1264              :  * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
    1265              :  * the key-value arguments.
    1266              :  *
    1267              :  * Starting with ICU version 54, the attributes are processed automatically by
    1268              :  * ucol_open(), so this is only necessary for emulating this behavior on older
    1269              :  * versions.
    1270              :  */
    1271              : pg_attribute_unused()
    1272              : static void
    1273            0 : icu_set_collation_attributes(UCollator *collator, const char *loc,
    1274              :                              UErrorCode *status)
    1275              : {
    1276              :     int32_t     len;
    1277              :     char       *icu_locale_id;
    1278              :     char       *lower_str;
    1279              :     char       *str;
    1280              :     char       *token;
    1281              : 
    1282              :     /*
    1283              :      * The input locale may be a BCP 47 language tag, e.g.
    1284              :      * "und-u-kc-ks-level1", which expresses the same attributes in a
    1285              :      * different form. It will be converted to the equivalent ICU format
    1286              :      * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
    1287              :      * uloc_canonicalize().
    1288              :      */
    1289            0 :     *status = U_ZERO_ERROR;
    1290            0 :     len = uloc_canonicalize(loc, NULL, 0, status);
    1291            0 :     icu_locale_id = palloc(len + 1);
    1292            0 :     *status = U_ZERO_ERROR;
    1293            0 :     len = uloc_canonicalize(loc, icu_locale_id, len + 1, status);
    1294            0 :     if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
    1295            0 :         return;
    1296              : 
    1297            0 :     lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
    1298              : 
    1299            0 :     pfree(icu_locale_id);
    1300              : 
    1301            0 :     str = strchr(lower_str, '@');
    1302            0 :     if (!str)
    1303            0 :         return;
    1304            0 :     str++;
    1305              : 
    1306            0 :     while ((token = strsep(&str, ";")))
    1307              :     {
    1308            0 :         char       *e = strchr(token, '=');
    1309              : 
    1310            0 :         if (e)
    1311              :         {
    1312              :             char       *name;
    1313              :             char       *value;
    1314              :             UColAttribute uattr;
    1315              :             UColAttributeValue uvalue;
    1316              : 
    1317            0 :             *status = U_ZERO_ERROR;
    1318              : 
    1319            0 :             *e = '\0';
    1320            0 :             name = token;
    1321            0 :             value = e + 1;
    1322              : 
    1323              :             /*
    1324              :              * See attribute name and value lists in ICU i18n/coll.cpp
    1325              :              */
    1326            0 :             if (strcmp(name, "colstrength") == 0)
    1327            0 :                 uattr = UCOL_STRENGTH;
    1328            0 :             else if (strcmp(name, "colbackwards") == 0)
    1329            0 :                 uattr = UCOL_FRENCH_COLLATION;
    1330            0 :             else if (strcmp(name, "colcaselevel") == 0)
    1331            0 :                 uattr = UCOL_CASE_LEVEL;
    1332            0 :             else if (strcmp(name, "colcasefirst") == 0)
    1333            0 :                 uattr = UCOL_CASE_FIRST;
    1334            0 :             else if (strcmp(name, "colalternate") == 0)
    1335            0 :                 uattr = UCOL_ALTERNATE_HANDLING;
    1336            0 :             else if (strcmp(name, "colnormalization") == 0)
    1337            0 :                 uattr = UCOL_NORMALIZATION_MODE;
    1338            0 :             else if (strcmp(name, "colnumeric") == 0)
    1339            0 :                 uattr = UCOL_NUMERIC_COLLATION;
    1340              :             else
    1341              :                 /* ignore if unknown */
    1342            0 :                 continue;
    1343              : 
    1344            0 :             if (strcmp(value, "primary") == 0)
    1345            0 :                 uvalue = UCOL_PRIMARY;
    1346            0 :             else if (strcmp(value, "secondary") == 0)
    1347            0 :                 uvalue = UCOL_SECONDARY;
    1348            0 :             else if (strcmp(value, "tertiary") == 0)
    1349            0 :                 uvalue = UCOL_TERTIARY;
    1350            0 :             else if (strcmp(value, "quaternary") == 0)
    1351            0 :                 uvalue = UCOL_QUATERNARY;
    1352            0 :             else if (strcmp(value, "identical") == 0)
    1353            0 :                 uvalue = UCOL_IDENTICAL;
    1354            0 :             else if (strcmp(value, "no") == 0)
    1355            0 :                 uvalue = UCOL_OFF;
    1356            0 :             else if (strcmp(value, "yes") == 0)
    1357            0 :                 uvalue = UCOL_ON;
    1358            0 :             else if (strcmp(value, "shifted") == 0)
    1359            0 :                 uvalue = UCOL_SHIFTED;
    1360            0 :             else if (strcmp(value, "non-ignorable") == 0)
    1361            0 :                 uvalue = UCOL_NON_IGNORABLE;
    1362            0 :             else if (strcmp(value, "lower") == 0)
    1363            0 :                 uvalue = UCOL_LOWER_FIRST;
    1364            0 :             else if (strcmp(value, "upper") == 0)
    1365            0 :                 uvalue = UCOL_UPPER_FIRST;
    1366              :             else
    1367              :             {
    1368            0 :                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    1369            0 :                 break;
    1370              :             }
    1371              : 
    1372            0 :             ucol_setAttribute(collator, uattr, uvalue, status);
    1373              :         }
    1374              :     }
    1375              : 
    1376            0 :     pfree(lower_str);
    1377              : }
    1378              : 
    1379              : #endif                          /* USE_ICU */
        

Generated by: LCOV version 2.0-1