LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_icu.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 216 343 63.0 %
Date: 2025-12-23 21:18:42 Functions: 32 37 86.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-----------------------------------------------------------------------
       2             :  *
       3             :  * PostgreSQL locale utilities for ICU
       4             :  *
       5             :  * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
       6             :  *
       7             :  * src/backend/utils/adt/pg_locale_icu.c
       8             :  *
       9             :  *-----------------------------------------------------------------------
      10             :  */
      11             : 
      12             : #include "postgres.h"
      13             : 
      14             : #ifdef USE_ICU
      15             : #include <unicode/ucnv.h>
      16             : #include <unicode/ustring.h>
      17             : 
      18             : /*
      19             :  * ucol_strcollUTF8() was introduced in ICU 50, but it is buggy before ICU 53.
      20             :  * (see
      21             :  * <https://www.postgresql.org/message-id/flat/f1438ec6-22aa-4029-9a3b-26f79d330e72%40manitou-mail.org>)
      22             :  */
      23             : #if U_ICU_VERSION_MAJOR_NUM >= 53
      24             : #define HAVE_UCOL_STRCOLLUTF8 1
      25             : #else
      26             : #undef HAVE_UCOL_STRCOLLUTF8
      27             : #endif
      28             : 
      29             : #endif
      30             : 
      31             : #include "access/htup_details.h"
      32             : #include "catalog/pg_database.h"
      33             : #include "catalog/pg_collation.h"
      34             : #include "mb/pg_wchar.h"
      35             : #include "miscadmin.h"
      36             : #include "utils/builtins.h"
      37             : #include "utils/formatting.h"
      38             : #include "utils/memutils.h"
      39             : #include "utils/pg_locale.h"
      40             : #include "utils/syscache.h"
      41             : 
      42             : /*
      43             :  * Size of stack buffer to use for string transformations, used to avoid heap
      44             :  * allocations in typical cases. This should be large enough that most strings
      45             :  * will fit, but small enough that we feel comfortable putting it on the
      46             :  * stack.
      47             :  */
      48             : #define     TEXTBUFLEN          1024
      49             : 
      50             : extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
      51             : 
      52             : #ifdef USE_ICU
      53             : 
      54             : extern UCollator *pg_ucol_open(const char *loc_str);
      55             : 
      56             : static size_t strlower_icu(char *dest, size_t destsize, const char *src,
      57             :                            ssize_t srclen, pg_locale_t locale);
      58             : static size_t strtitle_icu(char *dest, size_t destsize, const char *src,
      59             :                            ssize_t srclen, pg_locale_t locale);
      60             : static size_t strupper_icu(char *dest, size_t destsize, const char *src,
      61             :                            ssize_t srclen, pg_locale_t locale);
      62             : static size_t strfold_icu(char *dest, size_t destsize, const char *src,
      63             :                           ssize_t srclen, pg_locale_t locale);
      64             : static size_t downcase_ident_icu(char *dst, size_t dstsize, const char *src,
      65             :                                  ssize_t srclen, pg_locale_t locale);
      66             : static int  strncoll_icu(const char *arg1, ssize_t len1,
      67             :                          const char *arg2, ssize_t len2,
      68             :                          pg_locale_t locale);
      69             : static size_t strnxfrm_icu(char *dest, size_t destsize,
      70             :                            const char *src, ssize_t srclen,
      71             :                            pg_locale_t locale);
      72             : extern char *get_collation_actual_version_icu(const char *collcollate);
      73             : 
      74             : typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
      75             :                                      const UChar *src, int32_t srcLength,
      76             :                                      const char *locale,
      77             :                                      UErrorCode *pErrorCode);
      78             : 
      79             : /*
      80             :  * Converter object for converting between ICU's UChar strings and C strings
      81             :  * in database encoding.  Since the database encoding doesn't change, we only
      82             :  * need one of these per session.
      83             :  */
      84             : static UConverter *icu_converter = NULL;
      85             : 
      86             : static UCollator *make_icu_collator(const char *iculocstr,
      87             :                                     const char *icurules);
      88             : static int  strncoll_icu(const char *arg1, ssize_t len1,
      89             :                          const char *arg2, ssize_t len2,
      90             :                          pg_locale_t locale);
      91             : static size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
      92             :                                   const char *src, ssize_t srclen,
      93             :                                   pg_locale_t locale);
      94             : #ifdef HAVE_UCOL_STRCOLLUTF8
      95             : static int  strncoll_icu_utf8(const char *arg1, ssize_t len1,
      96             :                               const char *arg2, ssize_t len2,
      97             :                               pg_locale_t locale);
      98             : #endif
      99             : static size_t strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
     100             :                                        const char *src, ssize_t srclen,
     101             :                                        pg_locale_t locale);
     102             : static void init_icu_converter(void);
     103             : static size_t uchar_length(UConverter *converter,
     104             :                            const char *str, int32_t len);
     105             : static int32_t uchar_convert(UConverter *converter,
     106             :                              UChar *dest, int32_t destlen,
     107             :                              const char *src, int32_t srclen);
     108             : static int32_t icu_to_uchar(UChar **buff_uchar, const char *buff,
     109             :                             size_t nbytes);
     110             : static size_t icu_from_uchar(char *dest, size_t destsize,
     111             :                              const UChar *buff_uchar, int32_t len_uchar);
     112             : static void icu_set_collation_attributes(UCollator *collator, const char *loc,
     113             :                                          UErrorCode *status);
     114             : static int32_t icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
     115             :                                 UChar **buff_dest, UChar *buff_source,
     116             :                                 int32_t len_source);
     117             : static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
     118             :                                        const UChar *src, int32_t srcLength,
     119             :                                        const char *locale,
     120             :                                        UErrorCode *pErrorCode);
     121             : static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
     122             :                                      const UChar *src, int32_t srcLength,
     123             :                                      const char *locale,
     124             :                                      UErrorCode *pErrorCode);
     125             : 
     126             : /*
     127             :  * XXX: many of the functions below rely on casts directly from pg_wchar to
     128             :  * UChar32, which is correct for UTF-8 and LATIN1, but not in general.
     129             :  */
     130             : 
     131             : static pg_wchar
     132         108 : toupper_icu(pg_wchar wc, pg_locale_t locale)
     133             : {
     134         108 :     return u_toupper(wc);
     135             : }
     136             : 
     137             : static pg_wchar
     138         108 : tolower_icu(pg_wchar wc, pg_locale_t locale)
     139             : {
     140         108 :     return u_tolower(wc);
     141             : }
     142             : 
     143             : static const struct collate_methods collate_methods_icu = {
     144             :     .strncoll = strncoll_icu,
     145             :     .strnxfrm = strnxfrm_icu,
     146             :     .strnxfrm_prefix = strnxfrm_prefix_icu,
     147             :     .strxfrm_is_safe = true,
     148             : };
     149             : 
     150             : static const struct collate_methods collate_methods_icu_utf8 = {
     151             : #ifdef HAVE_UCOL_STRCOLLUTF8
     152             :     .strncoll = strncoll_icu_utf8,
     153             : #else
     154             :     .strncoll = strncoll_icu,
     155             : #endif
     156             :     .strnxfrm = strnxfrm_icu,
     157             :     .strnxfrm_prefix = strnxfrm_prefix_icu_utf8,
     158             :     .strxfrm_is_safe = true,
     159             : };
     160             : 
     161             : static bool
     162       12288 : wc_isdigit_icu(pg_wchar wc, pg_locale_t locale)
     163             : {
     164       12288 :     return u_isdigit(wc);
     165             : }
     166             : 
     167             : static bool
     168       12288 : wc_isalpha_icu(pg_wchar wc, pg_locale_t locale)
     169             : {
     170       12288 :     return u_isalpha(wc);
     171             : }
     172             : 
     173             : static bool
     174       12288 : wc_isalnum_icu(pg_wchar wc, pg_locale_t locale)
     175             : {
     176       12288 :     return u_isalnum(wc);
     177             : }
     178             : 
     179             : static bool
     180       12288 : wc_isupper_icu(pg_wchar wc, pg_locale_t locale)
     181             : {
     182       12288 :     return u_isupper(wc);
     183             : }
     184             : 
     185             : static bool
     186       12288 : wc_islower_icu(pg_wchar wc, pg_locale_t locale)
     187             : {
     188       12288 :     return u_islower(wc);
     189             : }
     190             : 
     191             : static bool
     192       12288 : wc_isgraph_icu(pg_wchar wc, pg_locale_t locale)
     193             : {
     194       12288 :     return u_isgraph(wc);
     195             : }
     196             : 
     197             : static bool
     198       12288 : wc_isprint_icu(pg_wchar wc, pg_locale_t locale)
     199             : {
     200       12288 :     return u_isprint(wc);
     201             : }
     202             : 
     203             : static bool
     204       12288 : wc_ispunct_icu(pg_wchar wc, pg_locale_t locale)
     205             : {
     206       12288 :     return u_ispunct(wc);
     207             : }
     208             : 
     209             : static bool
     210       12288 : wc_isspace_icu(pg_wchar wc, pg_locale_t locale)
     211             : {
     212       12288 :     return u_isspace(wc);
     213             : }
     214             : 
     215             : static bool
     216           0 : wc_isxdigit_icu(pg_wchar wc, pg_locale_t locale)
     217             : {
     218           0 :     return u_isxdigit(wc);
     219             : }
     220             : 
     221             : static bool
     222         126 : wc_iscased_icu(pg_wchar wc, pg_locale_t locale)
     223             : {
     224         126 :     return u_hasBinaryProperty(wc, UCHAR_CASED);
     225             : }
     226             : 
     227             : static const struct ctype_methods ctype_methods_icu = {
     228             :     .strlower = strlower_icu,
     229             :     .strtitle = strtitle_icu,
     230             :     .strupper = strupper_icu,
     231             :     .strfold = strfold_icu,
     232             :     .downcase_ident = downcase_ident_icu,
     233             :     .wc_isdigit = wc_isdigit_icu,
     234             :     .wc_isalpha = wc_isalpha_icu,
     235             :     .wc_isalnum = wc_isalnum_icu,
     236             :     .wc_isupper = wc_isupper_icu,
     237             :     .wc_islower = wc_islower_icu,
     238             :     .wc_isgraph = wc_isgraph_icu,
     239             :     .wc_isprint = wc_isprint_icu,
     240             :     .wc_ispunct = wc_ispunct_icu,
     241             :     .wc_isspace = wc_isspace_icu,
     242             :     .wc_isxdigit = wc_isxdigit_icu,
     243             :     .wc_iscased = wc_iscased_icu,
     244             :     .wc_toupper = toupper_icu,
     245             :     .wc_tolower = tolower_icu,
     246             : };
     247             : 
     248             : /*
     249             :  * ICU still depends on libc for compatibility with certain historical
     250             :  * behavior for single-byte encodings.  See downcase_ident_icu().
     251             :  *
     252             :  * XXX: consider fixing by decoding the single byte into a code point, and
     253             :  * using u_tolower().
     254             :  */
     255             : static locale_t
     256           0 : make_libc_ctype_locale(const char *ctype)
     257             : {
     258             :     locale_t    loc;
     259             : 
     260             : #ifndef WIN32
     261           0 :     loc = newlocale(LC_CTYPE_MASK, ctype, NULL);
     262             : #else
     263             :     loc = _create_locale(LC_ALL, ctype);
     264             : #endif
     265           0 :     if (!loc)
     266           0 :         report_newlocale_failure(ctype);
     267             : 
     268           0 :     return loc;
     269             : }
     270             : #endif
     271             : 
     272             : pg_locale_t
     273         210 : create_pg_locale_icu(Oid collid, MemoryContext context)
     274             : {
     275             : #ifdef USE_ICU
     276             :     bool        deterministic;
     277             :     const char *iculocstr;
     278         210 :     const char *icurules = NULL;
     279             :     UCollator  *collator;
     280         210 :     locale_t    loc = (locale_t) 0;
     281             :     pg_locale_t result;
     282             : 
     283         210 :     if (collid == DEFAULT_COLLATION_OID)
     284             :     {
     285             :         HeapTuple   tp;
     286             :         Datum       datum;
     287             :         bool        isnull;
     288             : 
     289          26 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     290          26 :         if (!HeapTupleIsValid(tp))
     291           0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     292             : 
     293             :         /* default database collation is always deterministic */
     294          26 :         deterministic = true;
     295          26 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     296             :                                        Anum_pg_database_datlocale);
     297          26 :         iculocstr = TextDatumGetCString(datum);
     298          26 :         datum = SysCacheGetAttr(DATABASEOID, tp,
     299             :                                 Anum_pg_database_daticurules, &isnull);
     300          26 :         if (!isnull)
     301           0 :             icurules = TextDatumGetCString(datum);
     302             : 
     303             :         /* libc only needed for default locale and single-byte encoding */
     304          26 :         if (pg_database_encoding_max_length() == 1)
     305             :         {
     306             :             const char *ctype;
     307             : 
     308           0 :             datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     309             :                                            Anum_pg_database_datctype);
     310           0 :             ctype = TextDatumGetCString(datum);
     311             : 
     312           0 :             loc = make_libc_ctype_locale(ctype);
     313             :         }
     314             : 
     315          26 :         ReleaseSysCache(tp);
     316             :     }
     317             :     else
     318             :     {
     319             :         Form_pg_collation collform;
     320             :         HeapTuple   tp;
     321             :         Datum       datum;
     322             :         bool        isnull;
     323             : 
     324         184 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     325         184 :         if (!HeapTupleIsValid(tp))
     326           0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     327         184 :         collform = (Form_pg_collation) GETSTRUCT(tp);
     328         184 :         deterministic = collform->collisdeterministic;
     329         184 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     330             :                                        Anum_pg_collation_colllocale);
     331         184 :         iculocstr = TextDatumGetCString(datum);
     332         184 :         datum = SysCacheGetAttr(COLLOID, tp,
     333             :                                 Anum_pg_collation_collicurules, &isnull);
     334         184 :         if (!isnull)
     335          12 :             icurules = TextDatumGetCString(datum);
     336             : 
     337         184 :         ReleaseSysCache(tp);
     338             :     }
     339             : 
     340         210 :     collator = make_icu_collator(iculocstr, icurules);
     341             : 
     342         200 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     343         200 :     result->icu.locale = MemoryContextStrdup(context, iculocstr);
     344         200 :     result->icu.ucol = collator;
     345         200 :     result->icu.lt = loc;
     346         200 :     result->deterministic = deterministic;
     347         200 :     result->collate_is_c = false;
     348         200 :     result->ctype_is_c = false;
     349         200 :     if (GetDatabaseEncoding() == PG_UTF8)
     350         200 :         result->collate = &collate_methods_icu_utf8;
     351             :     else
     352           0 :         result->collate = &collate_methods_icu;
     353         200 :     result->ctype = &ctype_methods_icu;
     354             : 
     355         200 :     return result;
     356             : #else
     357             :     /* could get here if a collation was created by a build with ICU */
     358             :     ereport(ERROR,
     359             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     360             :              errmsg("ICU is not supported in this build")));
     361             : 
     362             :     return NULL;
     363             : #endif
     364             : }
     365             : 
     366             : #ifdef USE_ICU
     367             : 
     368             : /*
     369             :  * Wrapper around ucol_open() to handle API differences for older ICU
     370             :  * versions.
     371             :  *
     372             :  * Ensure that no path leaks a UCollator.
     373             :  */
     374             : UCollator *
     375       79812 : pg_ucol_open(const char *loc_str)
     376             : {
     377             :     UCollator  *collator;
     378             :     UErrorCode  status;
     379       79812 :     const char *orig_str = loc_str;
     380       79812 :     char       *fixed_str = NULL;
     381             : 
     382             :     /*
     383             :      * Must never open default collator, because it depends on the environment
     384             :      * and may change at any time. Should not happen, but check here to catch
     385             :      * bugs that might be hard to catch otherwise.
     386             :      *
     387             :      * NB: the default collator is not the same as the collator for the root
     388             :      * locale. The root locale may be specified as the empty string, "und", or
     389             :      * "root". The default collator is opened by passing NULL to ucol_open().
     390             :      */
     391       79812 :     if (loc_str == NULL)
     392           0 :         elog(ERROR, "opening default collator is not supported");
     393             : 
     394             :     /*
     395             :      * In ICU versions 54 and earlier, "und" is not a recognized spelling of
     396             :      * the root locale. If the first component of the locale is "und", replace
     397             :      * with "root" before opening.
     398             :      */
     399             :     if (U_ICU_VERSION_MAJOR_NUM < 55)
     400             :     {
     401             :         char        lang[ULOC_LANG_CAPACITY];
     402             : 
     403             :         status = U_ZERO_ERROR;
     404             :         uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
     405             :         if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
     406             :         {
     407             :             ereport(ERROR,
     408             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     409             :                      errmsg("could not get language from locale \"%s\": %s",
     410             :                             loc_str, u_errorName(status))));
     411             :         }
     412             : 
     413             :         if (strcmp(lang, "und") == 0)
     414             :         {
     415             :             const char *remainder = loc_str + strlen("und");
     416             : 
     417             :             fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
     418             :             strcpy(fixed_str, "root");
     419             :             strcat(fixed_str, remainder);
     420             : 
     421             :             loc_str = fixed_str;
     422             :         }
     423             :     }
     424             : 
     425       79812 :     status = U_ZERO_ERROR;
     426       79812 :     collator = ucol_open(loc_str, &status);
     427       79812 :     if (U_FAILURE(status))
     428          12 :         ereport(ERROR,
     429             :         /* use original string for error report */
     430             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     431             :                  errmsg("could not open collator for locale \"%s\": %s",
     432             :                         orig_str, u_errorName(status))));
     433             : 
     434             :     if (U_ICU_VERSION_MAJOR_NUM < 54)
     435             :     {
     436             :         status = U_ZERO_ERROR;
     437             :         icu_set_collation_attributes(collator, loc_str, &status);
     438             : 
     439             :         /*
     440             :          * Pretend the error came from ucol_open(), for consistent error
     441             :          * message across ICU versions.
     442             :          */
     443             :         if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
     444             :         {
     445             :             ucol_close(collator);
     446             :             ereport(ERROR,
     447             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     448             :                      errmsg("could not open collator for locale \"%s\": %s",
     449             :                             orig_str, u_errorName(status))));
     450             :         }
     451             :     }
     452             : 
     453       79800 :     if (fixed_str != NULL)
     454           0 :         pfree(fixed_str);
     455             : 
     456       79800 :     return collator;
     457             : }
     458             : 
     459             : /*
     460             :  * Create a UCollator with the given locale string and rules.
     461             :  *
     462             :  * Ensure that no path leaks a UCollator.
     463             :  */
     464             : static UCollator *
     465         210 : make_icu_collator(const char *iculocstr, const char *icurules)
     466             : {
     467         210 :     if (!icurules)
     468             :     {
     469             :         /* simple case without rules */
     470         198 :         return pg_ucol_open(iculocstr);
     471             :     }
     472             :     else
     473             :     {
     474             :         UCollator  *collator_std_rules;
     475             :         UCollator  *collator_all_rules;
     476             :         const UChar *std_rules;
     477             :         UChar      *my_rules;
     478             :         UChar      *all_rules;
     479             :         int32_t     length;
     480             :         int32_t     total;
     481             :         UErrorCode  status;
     482             : 
     483             :         /*
     484             :          * If rules are specified, we extract the rules of the standard
     485             :          * collation, add our own rules, and make a new collator with the
     486             :          * combined rules.
     487             :          */
     488          12 :         icu_to_uchar(&my_rules, icurules, strlen(icurules));
     489             : 
     490          12 :         collator_std_rules = pg_ucol_open(iculocstr);
     491             : 
     492          12 :         std_rules = ucol_getRules(collator_std_rules, &length);
     493             : 
     494          12 :         total = u_strlen(std_rules) + u_strlen(my_rules) + 1;
     495             : 
     496             :         /* avoid leaking collator on OOM */
     497          12 :         all_rules = palloc_extended(sizeof(UChar) * total, MCXT_ALLOC_NO_OOM);
     498          12 :         if (!all_rules)
     499             :         {
     500           0 :             ucol_close(collator_std_rules);
     501           0 :             ereport(ERROR,
     502             :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     503             :                      errmsg("out of memory")));
     504             :         }
     505             : 
     506          12 :         u_strcpy(all_rules, std_rules);
     507          12 :         u_strcat(all_rules, my_rules);
     508             : 
     509          12 :         ucol_close(collator_std_rules);
     510             : 
     511          12 :         status = U_ZERO_ERROR;
     512          12 :         collator_all_rules = ucol_openRules(all_rules, u_strlen(all_rules),
     513             :                                             UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,
     514             :                                             NULL, &status);
     515          12 :         if (U_FAILURE(status))
     516             :         {
     517           6 :             ereport(ERROR,
     518             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     519             :                      errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
     520             :                             iculocstr, icurules, u_errorName(status))));
     521             :         }
     522             : 
     523           6 :         return collator_all_rules;
     524             :     }
     525             : }
     526             : 
     527             : static size_t
     528         528 : strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     529             :              pg_locale_t locale)
     530             : {
     531             :     int32_t     len_uchar;
     532             :     int32_t     len_conv;
     533             :     UChar      *buff_uchar;
     534             :     UChar      *buff_conv;
     535             :     size_t      result_len;
     536             : 
     537         528 :     len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
     538         528 :     len_conv = icu_convert_case(u_strToLower, locale,
     539             :                                 &buff_conv, buff_uchar, len_uchar);
     540         528 :     result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
     541         528 :     pfree(buff_uchar);
     542         528 :     pfree(buff_conv);
     543             : 
     544         528 :     return result_len;
     545             : }
     546             : 
     547             : static size_t
     548          30 : strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     549             :              pg_locale_t locale)
     550             : {
     551             :     int32_t     len_uchar;
     552             :     int32_t     len_conv;
     553             :     UChar      *buff_uchar;
     554             :     UChar      *buff_conv;
     555             :     size_t      result_len;
     556             : 
     557          30 :     len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
     558          30 :     len_conv = icu_convert_case(u_strToTitle_default_BI, locale,
     559             :                                 &buff_conv, buff_uchar, len_uchar);
     560          30 :     result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
     561          30 :     pfree(buff_uchar);
     562          30 :     pfree(buff_conv);
     563             : 
     564          30 :     return result_len;
     565             : }
     566             : 
     567             : static size_t
     568          54 : strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     569             :              pg_locale_t locale)
     570             : {
     571             :     int32_t     len_uchar;
     572             :     int32_t     len_conv;
     573             :     UChar      *buff_uchar;
     574             :     UChar      *buff_conv;
     575             :     size_t      result_len;
     576             : 
     577          54 :     len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
     578          54 :     len_conv = icu_convert_case(u_strToUpper, locale,
     579             :                                 &buff_conv, buff_uchar, len_uchar);
     580          54 :     result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
     581          54 :     pfree(buff_uchar);
     582          54 :     pfree(buff_conv);
     583             : 
     584          54 :     return result_len;
     585             : }
     586             : 
     587             : static size_t
     588          12 : strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     589             :             pg_locale_t locale)
     590             : {
     591             :     int32_t     len_uchar;
     592             :     int32_t     len_conv;
     593             :     UChar      *buff_uchar;
     594             :     UChar      *buff_conv;
     595             :     size_t      result_len;
     596             : 
     597          12 :     len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
     598          12 :     len_conv = icu_convert_case(u_strFoldCase_default, locale,
     599             :                                 &buff_conv, buff_uchar, len_uchar);
     600          12 :     result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
     601          12 :     pfree(buff_uchar);
     602          12 :     pfree(buff_conv);
     603             : 
     604          12 :     return result_len;
     605             : }
     606             : 
     607             : /*
     608             :  * For historical compatibility, behavior is not multibyte-aware.
     609             :  *
     610             :  * NB: uses libc tolower() for single-byte encodings (also for historical
     611             :  * compatibility), and therefore relies on the global LC_CTYPE setting.
     612             :  */
     613             : static size_t
     614       64968 : downcase_ident_icu(char *dst, size_t dstsize, const char *src,
     615             :                    ssize_t srclen, pg_locale_t locale)
     616             : {
     617             :     int         i;
     618             :     bool        libc_lower;
     619       64968 :     locale_t    lt = locale->icu.lt;
     620             : 
     621       64968 :     libc_lower = lt && (pg_database_encoding_max_length() == 1);
     622             : 
     623      612630 :     for (i = 0; i < srclen && i < dstsize; i++)
     624             :     {
     625      547662 :         unsigned char ch = (unsigned char) src[i];
     626             : 
     627      547662 :         if (ch >= 'A' && ch <= 'Z')
     628       11770 :             ch = pg_ascii_tolower(ch);
     629      535892 :         else if (libc_lower && IS_HIGHBIT_SET(ch) && isupper_l(ch, lt))
     630           0 :             ch = tolower_l(ch, lt);
     631      547662 :         dst[i] = (char) ch;
     632             :     }
     633             : 
     634       64968 :     if (i < dstsize)
     635       64968 :         dst[i] = '\0';
     636             : 
     637       64968 :     return srclen;
     638             : }
     639             : 
     640             : /*
     641             :  * strncoll_icu_utf8
     642             :  *
     643             :  * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
     644             :  * database encoding. An argument length of -1 means the string is
     645             :  * NUL-terminated.
     646             :  */
     647             : #ifdef HAVE_UCOL_STRCOLLUTF8
     648             : int
     649       23792 : strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
     650             :                   pg_locale_t locale)
     651             : {
     652             :     int         result;
     653             :     UErrorCode  status;
     654             : 
     655             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     656             : 
     657       23792 :     status = U_ZERO_ERROR;
     658       23792 :     result = ucol_strcollUTF8(locale->icu.ucol,
     659             :                               arg1, len1,
     660             :                               arg2, len2,
     661             :                               &status);
     662       23792 :     if (U_FAILURE(status))
     663           0 :         ereport(ERROR,
     664             :                 (errmsg("collation failed: %s", u_errorName(status))));
     665             : 
     666       23792 :     return result;
     667             : }
     668             : #endif
     669             : 
     670             : /* 'srclen' of -1 means the strings are NUL-terminated */
     671             : size_t
     672        5748 : strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     673             :              pg_locale_t locale)
     674             : {
     675             :     char        sbuf[TEXTBUFLEN];
     676        5748 :     char       *buf = sbuf;
     677             :     UChar      *uchar;
     678             :     int32_t     ulen;
     679             :     size_t      uchar_bsize;
     680             :     Size        result_bsize;
     681             : 
     682        5748 :     init_icu_converter();
     683             : 
     684        5748 :     ulen = uchar_length(icu_converter, src, srclen);
     685             : 
     686        5748 :     uchar_bsize = (ulen + 1) * sizeof(UChar);
     687             : 
     688        5748 :     if (uchar_bsize > TEXTBUFLEN)
     689           0 :         buf = palloc(uchar_bsize);
     690             : 
     691        5748 :     uchar = (UChar *) buf;
     692             : 
     693        5748 :     ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
     694             : 
     695        5748 :     result_bsize = ucol_getSortKey(locale->icu.ucol,
     696             :                                    uchar, ulen,
     697             :                                    (uint8_t *) dest, destsize);
     698             : 
     699             :     /*
     700             :      * ucol_getSortKey() counts the nul-terminator in the result length, but
     701             :      * this function should not.
     702             :      */
     703             :     Assert(result_bsize > 0);
     704        5748 :     result_bsize--;
     705             : 
     706        5748 :     if (buf != sbuf)
     707           0 :         pfree(buf);
     708             : 
     709             :     /* if dest is defined, it should be nul-terminated */
     710             :     Assert(result_bsize >= destsize || dest[result_bsize] == '\0');
     711             : 
     712        5748 :     return result_bsize;
     713             : }
     714             : 
     715             : /* 'srclen' of -1 means the strings are NUL-terminated */
     716             : size_t
     717        1668 : strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
     718             :                          const char *src, ssize_t srclen,
     719             :                          pg_locale_t locale)
     720             : {
     721             :     size_t      result;
     722             :     UCharIterator iter;
     723             :     uint32_t    state[2];
     724             :     UErrorCode  status;
     725             : 
     726             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     727             : 
     728        1668 :     uiter_setUTF8(&iter, src, srclen);
     729        1668 :     state[0] = state[1] = 0;    /* won't need that again */
     730        1668 :     status = U_ZERO_ERROR;
     731        1668 :     result = ucol_nextSortKeyPart(locale->icu.ucol,
     732             :                                   &iter,
     733             :                                   state,
     734             :                                   (uint8_t *) dest,
     735             :                                   destsize,
     736             :                                   &status);
     737        1668 :     if (U_FAILURE(status))
     738           0 :         ereport(ERROR,
     739             :                 (errmsg("sort key generation failed: %s",
     740             :                         u_errorName(status))));
     741             : 
     742        1668 :     return result;
     743             : }
     744             : 
     745             : char *
     746       79454 : get_collation_actual_version_icu(const char *collcollate)
     747             : {
     748             :     UCollator  *collator;
     749             :     UVersionInfo versioninfo;
     750             :     char        buf[U_MAX_VERSION_STRING_LENGTH];
     751             : 
     752       79454 :     collator = pg_ucol_open(collcollate);
     753             : 
     754       79454 :     ucol_getVersion(collator, versioninfo);
     755       79454 :     ucol_close(collator);
     756             : 
     757       79454 :     u_versionToString(versioninfo, buf);
     758       79454 :     return pstrdup(buf);
     759             : }
     760             : 
     761             : /*
     762             :  * Convert a string in the database encoding into a string of UChars.
     763             :  *
     764             :  * The source string at buff is of length nbytes
     765             :  * (it needn't be nul-terminated)
     766             :  *
     767             :  * *buff_uchar receives a pointer to the palloc'd result string, and
     768             :  * the function's result is the number of UChars generated.
     769             :  *
     770             :  * The result string is nul-terminated, though most callers rely on the
     771             :  * result length instead.
     772             :  */
     773             : static int32_t
     774         636 : icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
     775             : {
     776             :     int32_t     len_uchar;
     777             : 
     778         636 :     init_icu_converter();
     779             : 
     780         636 :     len_uchar = uchar_length(icu_converter, buff, nbytes);
     781             : 
     782         636 :     *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
     783         636 :     len_uchar = uchar_convert(icu_converter,
     784             :                               *buff_uchar, len_uchar + 1, buff, nbytes);
     785             : 
     786         636 :     return len_uchar;
     787             : }
     788             : 
     789             : /*
     790             :  * Convert a string of UChars into the database encoding.
     791             :  *
     792             :  * The source string at buff_uchar is of length len_uchar
     793             :  * (it needn't be nul-terminated)
     794             :  *
     795             :  * *result receives a pointer to the palloc'd result string, and the
     796             :  * function's result is the number of bytes generated (not counting nul).
     797             :  *
     798             :  * The result string is nul-terminated.
     799             :  */
     800             : static size_t
     801         624 : icu_from_uchar(char *dest, size_t destsize, const UChar *buff_uchar, int32_t len_uchar)
     802             : {
     803             :     UErrorCode  status;
     804             :     int32_t     len_result;
     805             : 
     806         624 :     init_icu_converter();
     807             : 
     808         624 :     status = U_ZERO_ERROR;
     809         624 :     len_result = ucnv_fromUChars(icu_converter, NULL, 0,
     810             :                                  buff_uchar, len_uchar, &status);
     811         624 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     812           0 :         ereport(ERROR,
     813             :                 (errmsg("%s failed: %s", "ucnv_fromUChars",
     814             :                         u_errorName(status))));
     815             : 
     816         624 :     if (len_result + 1 > destsize)
     817          60 :         return len_result;
     818             : 
     819         564 :     status = U_ZERO_ERROR;
     820         564 :     len_result = ucnv_fromUChars(icu_converter, dest, len_result + 1,
     821             :                                  buff_uchar, len_uchar, &status);
     822         564 :     if (U_FAILURE(status) ||
     823         564 :         status == U_STRING_NOT_TERMINATED_WARNING)
     824           0 :         ereport(ERROR,
     825             :                 (errmsg("%s failed: %s", "ucnv_fromUChars",
     826             :                         u_errorName(status))));
     827             : 
     828         564 :     return len_result;
     829             : }
     830             : 
     831             : static int32_t
     832         624 : icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
     833             :                  UChar **buff_dest, UChar *buff_source, int32_t len_source)
     834             : {
     835             :     UErrorCode  status;
     836             :     int32_t     len_dest;
     837             : 
     838         624 :     len_dest = len_source;      /* try first with same length */
     839         624 :     *buff_dest = palloc(len_dest * sizeof(**buff_dest));
     840         624 :     status = U_ZERO_ERROR;
     841         624 :     len_dest = func(*buff_dest, len_dest, buff_source, len_source,
     842             :                     mylocale->icu.locale, &status);
     843         624 :     if (status == U_BUFFER_OVERFLOW_ERROR)
     844             :     {
     845             :         /* try again with adjusted length */
     846          18 :         pfree(*buff_dest);
     847          18 :         *buff_dest = palloc(len_dest * sizeof(**buff_dest));
     848          18 :         status = U_ZERO_ERROR;
     849          18 :         len_dest = func(*buff_dest, len_dest, buff_source, len_source,
     850             :                         mylocale->icu.locale, &status);
     851             :     }
     852         624 :     if (U_FAILURE(status))
     853           0 :         ereport(ERROR,
     854             :                 (errmsg("case conversion failed: %s", u_errorName(status))));
     855         624 :     return len_dest;
     856             : }
     857             : 
     858             : static int32_t
     859          30 : u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
     860             :                         const UChar *src, int32_t srcLength,
     861             :                         const char *locale,
     862             :                         UErrorCode *pErrorCode)
     863             : {
     864          30 :     return u_strToTitle(dest, destCapacity, src, srcLength,
     865             :                         NULL, locale, pErrorCode);
     866             : }
     867             : 
     868             : static int32_t
     869          24 : u_strFoldCase_default(UChar *dest, int32_t destCapacity,
     870             :                       const UChar *src, int32_t srcLength,
     871             :                       const char *locale,
     872             :                       UErrorCode *pErrorCode)
     873             : {
     874          24 :     uint32      options = U_FOLD_CASE_DEFAULT;
     875             :     char        lang[3];
     876             :     UErrorCode  status;
     877             : 
     878             :     /*
     879             :      * Unlike the ICU APIs for lowercasing, titlecasing, and uppercasing, case
     880             :      * folding does not accept a locale. Instead it just supports a single
     881             :      * option relevant to Turkic languages 'az' and 'tr'; check for those
     882             :      * languages to enable the option.
     883             :      */
     884          24 :     status = U_ZERO_ERROR;
     885          24 :     uloc_getLanguage(locale, lang, 3, &status);
     886          24 :     if (U_SUCCESS(status))
     887             :     {
     888             :         /*
     889             :          * The option name is confusing, but it causes u_strFoldCase to use
     890             :          * the 'T' mappings, which are ignored for U_FOLD_CASE_DEFAULT.
     891             :          */
     892          24 :         if (strcmp(lang, "tr") == 0 || strcmp(lang, "az") == 0)
     893          12 :             options = U_FOLD_CASE_EXCLUDE_SPECIAL_I;
     894             :     }
     895             : 
     896          24 :     return u_strFoldCase(dest, destCapacity, src, srcLength,
     897             :                          options, pErrorCode);
     898             : }
     899             : 
     900             : /*
     901             :  * strncoll_icu
     902             :  *
     903             :  * Convert the arguments from the database encoding to UChar strings, then
     904             :  * call ucol_strcoll(). An argument length of -1 means that the string is
     905             :  * NUL-terminated.
     906             :  *
     907             :  * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
     908             :  * caller should call that instead.
     909             :  */
     910             : static int
     911           0 : strncoll_icu(const char *arg1, ssize_t len1,
     912             :              const char *arg2, ssize_t len2, pg_locale_t locale)
     913             : {
     914             :     char        sbuf[TEXTBUFLEN];
     915           0 :     char       *buf = sbuf;
     916             :     int32_t     ulen1;
     917             :     int32_t     ulen2;
     918             :     size_t      bufsize1;
     919             :     size_t      bufsize2;
     920             :     UChar      *uchar1,
     921             :                *uchar2;
     922             :     int         result;
     923             : 
     924             :     /* if encoding is UTF8, use more efficient strncoll_icu_utf8 */
     925             : #ifdef HAVE_UCOL_STRCOLLUTF8
     926             :     Assert(GetDatabaseEncoding() != PG_UTF8);
     927             : #endif
     928             : 
     929           0 :     init_icu_converter();
     930             : 
     931           0 :     ulen1 = uchar_length(icu_converter, arg1, len1);
     932           0 :     ulen2 = uchar_length(icu_converter, arg2, len2);
     933             : 
     934           0 :     bufsize1 = (ulen1 + 1) * sizeof(UChar);
     935           0 :     bufsize2 = (ulen2 + 1) * sizeof(UChar);
     936             : 
     937           0 :     if (bufsize1 + bufsize2 > TEXTBUFLEN)
     938           0 :         buf = palloc(bufsize1 + bufsize2);
     939             : 
     940           0 :     uchar1 = (UChar *) buf;
     941           0 :     uchar2 = (UChar *) (buf + bufsize1);
     942             : 
     943           0 :     ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
     944           0 :     ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
     945             : 
     946           0 :     result = ucol_strcoll(locale->icu.ucol,
     947             :                           uchar1, ulen1,
     948             :                           uchar2, ulen2);
     949             : 
     950           0 :     if (buf != sbuf)
     951           0 :         pfree(buf);
     952             : 
     953           0 :     return result;
     954             : }
     955             : 
     956             : /* 'srclen' of -1 means the strings are NUL-terminated */
     957             : static size_t
     958           0 : strnxfrm_prefix_icu(char *dest, size_t destsize,
     959             :                     const char *src, ssize_t srclen,
     960             :                     pg_locale_t locale)
     961             : {
     962             :     char        sbuf[TEXTBUFLEN];
     963           0 :     char       *buf = sbuf;
     964             :     UCharIterator iter;
     965             :     uint32_t    state[2];
     966             :     UErrorCode  status;
     967           0 :     int32_t     ulen = -1;
     968           0 :     UChar      *uchar = NULL;
     969             :     size_t      uchar_bsize;
     970             :     Size        result_bsize;
     971             : 
     972             :     /* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */
     973             :     Assert(GetDatabaseEncoding() != PG_UTF8);
     974             : 
     975           0 :     init_icu_converter();
     976             : 
     977           0 :     ulen = uchar_length(icu_converter, src, srclen);
     978             : 
     979           0 :     uchar_bsize = (ulen + 1) * sizeof(UChar);
     980             : 
     981           0 :     if (uchar_bsize > TEXTBUFLEN)
     982           0 :         buf = palloc(uchar_bsize);
     983             : 
     984           0 :     uchar = (UChar *) buf;
     985             : 
     986           0 :     ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
     987             : 
     988           0 :     uiter_setString(&iter, uchar, ulen);
     989           0 :     state[0] = state[1] = 0;    /* won't need that again */
     990           0 :     status = U_ZERO_ERROR;
     991           0 :     result_bsize = ucol_nextSortKeyPart(locale->icu.ucol,
     992             :                                         &iter,
     993             :                                         state,
     994             :                                         (uint8_t *) dest,
     995             :                                         destsize,
     996             :                                         &status);
     997           0 :     if (U_FAILURE(status))
     998           0 :         ereport(ERROR,
     999             :                 (errmsg("sort key generation failed: %s",
    1000             :                         u_errorName(status))));
    1001             : 
    1002           0 :     return result_bsize;
    1003             : }
    1004             : 
    1005             : static void
    1006        7008 : init_icu_converter(void)
    1007             : {
    1008             :     const char *icu_encoding_name;
    1009             :     UErrorCode  status;
    1010             :     UConverter *conv;
    1011             : 
    1012        7008 :     if (icu_converter)
    1013        7002 :         return;                 /* already done */
    1014             : 
    1015           6 :     icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
    1016           6 :     if (!icu_encoding_name)
    1017           0 :         ereport(ERROR,
    1018             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1019             :                  errmsg("encoding \"%s\" not supported by ICU",
    1020             :                         pg_encoding_to_char(GetDatabaseEncoding()))));
    1021             : 
    1022           6 :     status = U_ZERO_ERROR;
    1023           6 :     conv = ucnv_open(icu_encoding_name, &status);
    1024           6 :     if (U_FAILURE(status))
    1025           0 :         ereport(ERROR,
    1026             :                 (errmsg("could not open ICU converter for encoding \"%s\": %s",
    1027             :                         icu_encoding_name, u_errorName(status))));
    1028             : 
    1029           6 :     icu_converter = conv;
    1030             : }
    1031             : 
    1032             : /*
    1033             :  * Find length, in UChars, of given string if converted to UChar string.
    1034             :  *
    1035             :  * A length of -1 indicates that the input string is NUL-terminated.
    1036             :  */
    1037             : static size_t
    1038        6384 : uchar_length(UConverter *converter, const char *str, int32_t len)
    1039             : {
    1040        6384 :     UErrorCode  status = U_ZERO_ERROR;
    1041             :     int32_t     ulen;
    1042             : 
    1043        6384 :     ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
    1044        6384 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
    1045           0 :         ereport(ERROR,
    1046             :                 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
    1047        6384 :     return ulen;
    1048             : }
    1049             : 
    1050             : /*
    1051             :  * Convert the given source string into a UChar string, stored in dest, and
    1052             :  * return the length (in UChars).
    1053             :  *
    1054             :  * A srclen of -1 indicates that the input string is NUL-terminated.
    1055             :  */
    1056             : static int32_t
    1057        6384 : uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
    1058             :               const char *src, int32_t srclen)
    1059             : {
    1060        6384 :     UErrorCode  status = U_ZERO_ERROR;
    1061             :     int32_t     ulen;
    1062             : 
    1063        6384 :     status = U_ZERO_ERROR;
    1064        6384 :     ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
    1065        6384 :     if (U_FAILURE(status))
    1066           0 :         ereport(ERROR,
    1067             :                 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
    1068        6384 :     return ulen;
    1069             : }
    1070             : 
    1071             : /*
    1072             :  * Parse collation attributes from the given locale string and apply them to
    1073             :  * the open collator.
    1074             :  *
    1075             :  * First, the locale string is canonicalized to an ICU format locale ID such
    1076             :  * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
    1077             :  * the key-value arguments.
    1078             :  *
    1079             :  * Starting with ICU version 54, the attributes are processed automatically by
    1080             :  * ucol_open(), so this is only necessary for emulating this behavior on older
    1081             :  * versions.
    1082             :  */
    1083             : pg_attribute_unused()
    1084             : static void
    1085           0 : icu_set_collation_attributes(UCollator *collator, const char *loc,
    1086             :                              UErrorCode *status)
    1087             : {
    1088             :     int32_t     len;
    1089             :     char       *icu_locale_id;
    1090             :     char       *lower_str;
    1091             :     char       *str;
    1092             :     char       *token;
    1093             : 
    1094             :     /*
    1095             :      * The input locale may be a BCP 47 language tag, e.g.
    1096             :      * "und-u-kc-ks-level1", which expresses the same attributes in a
    1097             :      * different form. It will be converted to the equivalent ICU format
    1098             :      * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
    1099             :      * uloc_canonicalize().
    1100             :      */
    1101           0 :     *status = U_ZERO_ERROR;
    1102           0 :     len = uloc_canonicalize(loc, NULL, 0, status);
    1103           0 :     icu_locale_id = palloc(len + 1);
    1104           0 :     *status = U_ZERO_ERROR;
    1105           0 :     len = uloc_canonicalize(loc, icu_locale_id, len + 1, status);
    1106           0 :     if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
    1107           0 :         return;
    1108             : 
    1109           0 :     lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
    1110             : 
    1111           0 :     pfree(icu_locale_id);
    1112             : 
    1113           0 :     str = strchr(lower_str, '@');
    1114           0 :     if (!str)
    1115           0 :         return;
    1116           0 :     str++;
    1117             : 
    1118           0 :     while ((token = strsep(&str, ";")))
    1119             :     {
    1120           0 :         char       *e = strchr(token, '=');
    1121             : 
    1122           0 :         if (e)
    1123             :         {
    1124             :             char       *name;
    1125             :             char       *value;
    1126             :             UColAttribute uattr;
    1127             :             UColAttributeValue uvalue;
    1128             : 
    1129           0 :             *status = U_ZERO_ERROR;
    1130             : 
    1131           0 :             *e = '\0';
    1132           0 :             name = token;
    1133           0 :             value = e + 1;
    1134             : 
    1135             :             /*
    1136             :              * See attribute name and value lists in ICU i18n/coll.cpp
    1137             :              */
    1138           0 :             if (strcmp(name, "colstrength") == 0)
    1139           0 :                 uattr = UCOL_STRENGTH;
    1140           0 :             else if (strcmp(name, "colbackwards") == 0)
    1141           0 :                 uattr = UCOL_FRENCH_COLLATION;
    1142           0 :             else if (strcmp(name, "colcaselevel") == 0)
    1143           0 :                 uattr = UCOL_CASE_LEVEL;
    1144           0 :             else if (strcmp(name, "colcasefirst") == 0)
    1145           0 :                 uattr = UCOL_CASE_FIRST;
    1146           0 :             else if (strcmp(name, "colalternate") == 0)
    1147           0 :                 uattr = UCOL_ALTERNATE_HANDLING;
    1148           0 :             else if (strcmp(name, "colnormalization") == 0)
    1149           0 :                 uattr = UCOL_NORMALIZATION_MODE;
    1150           0 :             else if (strcmp(name, "colnumeric") == 0)
    1151           0 :                 uattr = UCOL_NUMERIC_COLLATION;
    1152             :             else
    1153             :                 /* ignore if unknown */
    1154           0 :                 continue;
    1155             : 
    1156           0 :             if (strcmp(value, "primary") == 0)
    1157           0 :                 uvalue = UCOL_PRIMARY;
    1158           0 :             else if (strcmp(value, "secondary") == 0)
    1159           0 :                 uvalue = UCOL_SECONDARY;
    1160           0 :             else if (strcmp(value, "tertiary") == 0)
    1161           0 :                 uvalue = UCOL_TERTIARY;
    1162           0 :             else if (strcmp(value, "quaternary") == 0)
    1163           0 :                 uvalue = UCOL_QUATERNARY;
    1164           0 :             else if (strcmp(value, "identical") == 0)
    1165           0 :                 uvalue = UCOL_IDENTICAL;
    1166           0 :             else if (strcmp(value, "no") == 0)
    1167           0 :                 uvalue = UCOL_OFF;
    1168           0 :             else if (strcmp(value, "yes") == 0)
    1169           0 :                 uvalue = UCOL_ON;
    1170           0 :             else if (strcmp(value, "shifted") == 0)
    1171           0 :                 uvalue = UCOL_SHIFTED;
    1172           0 :             else if (strcmp(value, "non-ignorable") == 0)
    1173           0 :                 uvalue = UCOL_NON_IGNORABLE;
    1174           0 :             else if (strcmp(value, "lower") == 0)
    1175           0 :                 uvalue = UCOL_LOWER_FIRST;
    1176           0 :             else if (strcmp(value, "upper") == 0)
    1177           0 :                 uvalue = UCOL_UPPER_FIRST;
    1178             :             else
    1179             :             {
    1180           0 :                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    1181           0 :                 break;
    1182             :             }
    1183             : 
    1184           0 :             ucol_setAttribute(collator, uattr, uvalue, status);
    1185             :         }
    1186             :     }
    1187             : 
    1188           0 :     pfree(lower_str);
    1189             : }
    1190             : 
    1191             : #endif                          /* USE_ICU */

Generated by: LCOV version 1.16