LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_icu.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 179 367 48.8 %
Date: 2026-01-22 20:17:48 Functions: 30 45 66.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-----------------------------------------------------------------------
       2             :  *
       3             :  * PostgreSQL locale utilities for ICU
       4             :  *
       5             :  * Portions Copyright (c) 2002-2026, PostgreSQL Global Development Group
       6             :  *
       7             :  * src/backend/utils/adt/pg_locale_icu.c
       8             :  *
       9             :  *-----------------------------------------------------------------------
      10             :  */
      11             : 
      12             : #include "postgres.h"
      13             : 
      14             : #ifdef USE_ICU
      15             : #include <unicode/ucasemap.h>
      16             : #include <unicode/ucnv.h>
      17             : #include <unicode/ucol.h>
      18             : #include <unicode/ustring.h>
      19             : 
      20             : /*
      21             :  * ucol_strcollUTF8() was introduced in ICU 50, but it is buggy before ICU 53.
      22             :  * (see
      23             :  * <https://www.postgresql.org/message-id/flat/f1438ec6-22aa-4029-9a3b-26f79d330e72%40manitou-mail.org>)
      24             :  */
      25             : #if U_ICU_VERSION_MAJOR_NUM >= 53
      26             : #define HAVE_UCOL_STRCOLLUTF8 1
      27             : #else
      28             : #undef HAVE_UCOL_STRCOLLUTF8
      29             : #endif
      30             : 
      31             : #endif
      32             : 
      33             : #include "access/htup_details.h"
      34             : #include "catalog/pg_database.h"
      35             : #include "catalog/pg_collation.h"
      36             : #include "mb/pg_wchar.h"
      37             : #include "miscadmin.h"
      38             : #include "utils/builtins.h"
      39             : #include "utils/formatting.h"
      40             : #include "utils/memutils.h"
      41             : #include "utils/pg_locale.h"
      42             : #include "utils/syscache.h"
      43             : 
      44             : /*
      45             :  * Size of stack buffer to use for string transformations, used to avoid heap
      46             :  * allocations in typical cases. This should be large enough that most strings
      47             :  * will fit, but small enough that we feel comfortable putting it on the
      48             :  * stack.
      49             :  */
      50             : #define     TEXTBUFLEN          1024
      51             : 
      52             : extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
      53             : 
      54             : #ifdef USE_ICU
      55             : 
      56             : extern UCollator *pg_ucol_open(const char *loc_str);
      57             : static UCaseMap *pg_ucasemap_open(const char *loc_str);
      58             : 
      59             : static size_t strlower_icu(char *dest, size_t destsize, const char *src,
      60             :                            ssize_t srclen, pg_locale_t locale);
      61             : static size_t strtitle_icu(char *dest, size_t destsize, const char *src,
      62             :                            ssize_t srclen, pg_locale_t locale);
      63             : static size_t strupper_icu(char *dest, size_t destsize, const char *src,
      64             :                            ssize_t srclen, pg_locale_t locale);
      65             : static size_t strfold_icu(char *dest, size_t destsize, const char *src,
      66             :                           ssize_t srclen, pg_locale_t locale);
      67             : static size_t strlower_icu_utf8(char *dest, size_t destsize, const char *src,
      68             :                                 ssize_t srclen, pg_locale_t locale);
      69             : static size_t strtitle_icu_utf8(char *dest, size_t destsize, const char *src,
      70             :                                 ssize_t srclen, pg_locale_t locale);
      71             : static size_t strupper_icu_utf8(char *dest, size_t destsize, const char *src,
      72             :                                 ssize_t srclen, pg_locale_t locale);
      73             : static size_t strfold_icu_utf8(char *dest, size_t destsize, const char *src,
      74             :                                ssize_t srclen, pg_locale_t locale);
      75             : static size_t downcase_ident_icu(char *dst, size_t dstsize, const char *src,
      76             :                                  ssize_t srclen, pg_locale_t locale);
      77             : static int  strncoll_icu(const char *arg1, ssize_t len1,
      78             :                          const char *arg2, ssize_t len2,
      79             :                          pg_locale_t locale);
      80             : static size_t strnxfrm_icu(char *dest, size_t destsize,
      81             :                            const char *src, ssize_t srclen,
      82             :                            pg_locale_t locale);
      83             : extern char *get_collation_actual_version_icu(const char *collcollate);
      84             : 
      85             : typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
      86             :                                      const UChar *src, int32_t srcLength,
      87             :                                      const char *locale,
      88             :                                      UErrorCode *pErrorCode);
      89             : 
      90             : /*
      91             :  * Converter object for converting between ICU's UChar strings and C strings
      92             :  * in database encoding.  Since the database encoding doesn't change, we only
      93             :  * need one of these per session.
      94             :  */
      95             : static UConverter *icu_converter = NULL;
      96             : 
      97             : static UCollator *make_icu_collator(const char *iculocstr,
      98             :                                     const char *icurules);
      99             : static int  strncoll_icu(const char *arg1, ssize_t len1,
     100             :                          const char *arg2, ssize_t len2,
     101             :                          pg_locale_t locale);
     102             : static size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
     103             :                                   const char *src, ssize_t srclen,
     104             :                                   pg_locale_t locale);
     105             : #ifdef HAVE_UCOL_STRCOLLUTF8
     106             : static int  strncoll_icu_utf8(const char *arg1, ssize_t len1,
     107             :                               const char *arg2, ssize_t len2,
     108             :                               pg_locale_t locale);
     109             : #endif
     110             : static size_t strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
     111             :                                        const char *src, ssize_t srclen,
     112             :                                        pg_locale_t locale);
     113             : static void init_icu_converter(void);
     114             : static size_t uchar_length(UConverter *converter,
     115             :                            const char *str, int32_t len);
     116             : static int32_t uchar_convert(UConverter *converter,
     117             :                              UChar *dest, int32_t destlen,
     118             :                              const char *src, int32_t srclen);
     119             : static int32_t icu_to_uchar(UChar **buff_uchar, const char *buff,
     120             :                             size_t nbytes);
     121             : static size_t icu_from_uchar(char *dest, size_t destsize,
     122             :                              const UChar *buff_uchar, int32_t len_uchar);
     123             : static void icu_set_collation_attributes(UCollator *collator, const char *loc,
     124             :                                          UErrorCode *status);
     125             : static int32_t icu_convert_case(ICU_Convert_Func func, char *dest,
     126             :                                 size_t destsize, const char *src,
     127             :                                 ssize_t srclen, pg_locale_t locale);
     128             : static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
     129             :                                        const UChar *src, int32_t srcLength,
     130             :                                        const char *locale,
     131             :                                        UErrorCode *pErrorCode);
     132             : static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
     133             :                                      const UChar *src, int32_t srcLength,
     134             :                                      const char *locale,
     135             :                                      UErrorCode *pErrorCode);
     136             : static int32_t foldcase_options(const char *locale);
     137             : 
     138             : /*
     139             :  * XXX: many of the functions below rely on casts directly from pg_wchar to
     140             :  * UChar32, which is correct for UTF-8 and LATIN1, but not in general.
     141             :  */
     142             : 
     143             : static pg_wchar
     144         108 : toupper_icu(pg_wchar wc, pg_locale_t locale)
     145             : {
     146         108 :     return u_toupper(wc);
     147             : }
     148             : 
     149             : static pg_wchar
     150         108 : tolower_icu(pg_wchar wc, pg_locale_t locale)
     151             : {
     152         108 :     return u_tolower(wc);
     153             : }
     154             : 
     155             : static const struct collate_methods collate_methods_icu = {
     156             :     .strncoll = strncoll_icu,
     157             :     .strnxfrm = strnxfrm_icu,
     158             :     .strnxfrm_prefix = strnxfrm_prefix_icu,
     159             :     .strxfrm_is_safe = true,
     160             : };
     161             : 
     162             : static const struct collate_methods collate_methods_icu_utf8 = {
     163             : #ifdef HAVE_UCOL_STRCOLLUTF8
     164             :     .strncoll = strncoll_icu_utf8,
     165             : #else
     166             :     .strncoll = strncoll_icu,
     167             : #endif
     168             :     .strnxfrm = strnxfrm_icu,
     169             :     .strnxfrm_prefix = strnxfrm_prefix_icu_utf8,
     170             :     .strxfrm_is_safe = true,
     171             : };
     172             : 
     173             : static bool
     174       12288 : wc_isdigit_icu(pg_wchar wc, pg_locale_t locale)
     175             : {
     176       12288 :     return u_isdigit(wc);
     177             : }
     178             : 
     179             : static bool
     180       12288 : wc_isalpha_icu(pg_wchar wc, pg_locale_t locale)
     181             : {
     182       12288 :     return u_isalpha(wc);
     183             : }
     184             : 
     185             : static bool
     186       12288 : wc_isalnum_icu(pg_wchar wc, pg_locale_t locale)
     187             : {
     188       12288 :     return u_isalnum(wc);
     189             : }
     190             : 
     191             : static bool
     192       12288 : wc_isupper_icu(pg_wchar wc, pg_locale_t locale)
     193             : {
     194       12288 :     return u_isupper(wc);
     195             : }
     196             : 
     197             : static bool
     198       12288 : wc_islower_icu(pg_wchar wc, pg_locale_t locale)
     199             : {
     200       12288 :     return u_islower(wc);
     201             : }
     202             : 
     203             : static bool
     204       12288 : wc_isgraph_icu(pg_wchar wc, pg_locale_t locale)
     205             : {
     206       12288 :     return u_isgraph(wc);
     207             : }
     208             : 
     209             : static bool
     210       12288 : wc_isprint_icu(pg_wchar wc, pg_locale_t locale)
     211             : {
     212       12288 :     return u_isprint(wc);
     213             : }
     214             : 
     215             : static bool
     216       12288 : wc_ispunct_icu(pg_wchar wc, pg_locale_t locale)
     217             : {
     218       12288 :     return u_ispunct(wc);
     219             : }
     220             : 
     221             : static bool
     222       12288 : wc_isspace_icu(pg_wchar wc, pg_locale_t locale)
     223             : {
     224       12288 :     return u_isspace(wc);
     225             : }
     226             : 
     227             : static bool
     228           0 : wc_isxdigit_icu(pg_wchar wc, pg_locale_t locale)
     229             : {
     230           0 :     return u_isxdigit(wc);
     231             : }
     232             : 
     233             : static bool
     234         126 : wc_iscased_icu(pg_wchar wc, pg_locale_t locale)
     235             : {
     236         126 :     return u_hasBinaryProperty(wc, UCHAR_CASED);
     237             : }
     238             : 
     239             : static const struct ctype_methods ctype_methods_icu = {
     240             :     .strlower = strlower_icu,
     241             :     .strtitle = strtitle_icu,
     242             :     .strupper = strupper_icu,
     243             :     .strfold = strfold_icu,
     244             :     .downcase_ident = downcase_ident_icu,
     245             :     .wc_isdigit = wc_isdigit_icu,
     246             :     .wc_isalpha = wc_isalpha_icu,
     247             :     .wc_isalnum = wc_isalnum_icu,
     248             :     .wc_isupper = wc_isupper_icu,
     249             :     .wc_islower = wc_islower_icu,
     250             :     .wc_isgraph = wc_isgraph_icu,
     251             :     .wc_isprint = wc_isprint_icu,
     252             :     .wc_ispunct = wc_ispunct_icu,
     253             :     .wc_isspace = wc_isspace_icu,
     254             :     .wc_isxdigit = wc_isxdigit_icu,
     255             :     .wc_iscased = wc_iscased_icu,
     256             :     .wc_toupper = toupper_icu,
     257             :     .wc_tolower = tolower_icu,
     258             : };
     259             : 
     260             : static const struct ctype_methods ctype_methods_icu_utf8 = {
     261             :     .strlower = strlower_icu_utf8,
     262             :     .strtitle = strtitle_icu_utf8,
     263             :     .strupper = strupper_icu_utf8,
     264             :     .strfold = strfold_icu_utf8,
     265             :     /* uses plain ASCII semantics for historical reasons */
     266             :     .downcase_ident = NULL,
     267             :     .wc_isdigit = wc_isdigit_icu,
     268             :     .wc_isalpha = wc_isalpha_icu,
     269             :     .wc_isalnum = wc_isalnum_icu,
     270             :     .wc_isupper = wc_isupper_icu,
     271             :     .wc_islower = wc_islower_icu,
     272             :     .wc_isgraph = wc_isgraph_icu,
     273             :     .wc_isprint = wc_isprint_icu,
     274             :     .wc_ispunct = wc_ispunct_icu,
     275             :     .wc_isspace = wc_isspace_icu,
     276             :     .wc_isxdigit = wc_isxdigit_icu,
     277             :     .wc_iscased = wc_iscased_icu,
     278             :     .wc_toupper = toupper_icu,
     279             :     .wc_tolower = tolower_icu,
     280             : };
     281             : 
     282             : /*
     283             :  * ICU still depends on libc for compatibility with certain historical
     284             :  * behavior for single-byte encodings.  See downcase_ident_icu().
     285             :  *
     286             :  * XXX: consider fixing by decoding the single byte into a code point, and
     287             :  * using u_tolower().
     288             :  */
     289             : static locale_t
     290           0 : make_libc_ctype_locale(const char *ctype)
     291             : {
     292             :     locale_t    loc;
     293             : 
     294             : #ifndef WIN32
     295           0 :     loc = newlocale(LC_CTYPE_MASK, ctype, NULL);
     296             : #else
     297             :     loc = _create_locale(LC_ALL, ctype);
     298             : #endif
     299           0 :     if (!loc)
     300           0 :         report_newlocale_failure(ctype);
     301             : 
     302           0 :     return loc;
     303             : }
     304             : #endif
     305             : 
     306             : pg_locale_t
     307         210 : create_pg_locale_icu(Oid collid, MemoryContext context)
     308             : {
     309             : #ifdef USE_ICU
     310             :     bool        deterministic;
     311             :     const char *iculocstr;
     312         210 :     const char *icurules = NULL;
     313             :     UCollator  *collator;
     314         210 :     locale_t    loc = (locale_t) 0;
     315             :     pg_locale_t result;
     316             : 
     317         210 :     if (collid == DEFAULT_COLLATION_OID)
     318             :     {
     319             :         HeapTuple   tp;
     320             :         Datum       datum;
     321             :         bool        isnull;
     322             : 
     323          26 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     324          26 :         if (!HeapTupleIsValid(tp))
     325           0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     326             : 
     327             :         /* default database collation is always deterministic */
     328          26 :         deterministic = true;
     329          26 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     330             :                                        Anum_pg_database_datlocale);
     331          26 :         iculocstr = TextDatumGetCString(datum);
     332          26 :         datum = SysCacheGetAttr(DATABASEOID, tp,
     333             :                                 Anum_pg_database_daticurules, &isnull);
     334          26 :         if (!isnull)
     335           0 :             icurules = TextDatumGetCString(datum);
     336             : 
     337             :         /* libc only needed for default locale and single-byte encoding */
     338          26 :         if (pg_database_encoding_max_length() == 1)
     339             :         {
     340             :             const char *ctype;
     341             : 
     342           0 :             datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     343             :                                            Anum_pg_database_datctype);
     344           0 :             ctype = TextDatumGetCString(datum);
     345             : 
     346           0 :             loc = make_libc_ctype_locale(ctype);
     347             :         }
     348             : 
     349          26 :         ReleaseSysCache(tp);
     350             :     }
     351             :     else
     352             :     {
     353             :         Form_pg_collation collform;
     354             :         HeapTuple   tp;
     355             :         Datum       datum;
     356             :         bool        isnull;
     357             : 
     358         184 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     359         184 :         if (!HeapTupleIsValid(tp))
     360           0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     361         184 :         collform = (Form_pg_collation) GETSTRUCT(tp);
     362         184 :         deterministic = collform->collisdeterministic;
     363         184 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     364             :                                        Anum_pg_collation_colllocale);
     365         184 :         iculocstr = TextDatumGetCString(datum);
     366         184 :         datum = SysCacheGetAttr(COLLOID, tp,
     367             :                                 Anum_pg_collation_collicurules, &isnull);
     368         184 :         if (!isnull)
     369          12 :             icurules = TextDatumGetCString(datum);
     370             : 
     371         184 :         ReleaseSysCache(tp);
     372             :     }
     373             : 
     374         210 :     collator = make_icu_collator(iculocstr, icurules);
     375             : 
     376         200 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     377         200 :     result->icu.locale = MemoryContextStrdup(context, iculocstr);
     378         200 :     result->icu.ucol = collator;
     379         200 :     result->icu.lt = loc;
     380         200 :     result->deterministic = deterministic;
     381         200 :     result->collate_is_c = false;
     382         200 :     result->ctype_is_c = false;
     383         200 :     if (GetDatabaseEncoding() == PG_UTF8)
     384             :     {
     385         200 :         result->icu.ucasemap = pg_ucasemap_open(iculocstr);
     386         200 :         result->collate = &collate_methods_icu_utf8;
     387         200 :         result->ctype = &ctype_methods_icu_utf8;
     388             :     }
     389             :     else
     390             :     {
     391           0 :         result->collate = &collate_methods_icu;
     392           0 :         result->ctype = &ctype_methods_icu;
     393             :     }
     394             : 
     395         200 :     return result;
     396             : #else
     397             :     /* could get here if a collation was created by a build with ICU */
     398             :     ereport(ERROR,
     399             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     400             :              errmsg("ICU is not supported in this build")));
     401             : 
     402             :     return NULL;
     403             : #endif
     404             : }
     405             : 
     406             : #ifdef USE_ICU
     407             : 
     408             : /*
     409             :  * Check locale string and fix it if necessary. Returns a new palloc'd string.
     410             :  *
     411             :  * In ICU versions 54 and earlier, "und" is not a recognized spelling of the
     412             :  * root locale. If the first component of the locale is "und", replace with
     413             :  * "root" before opening.
     414             :  */
     415             : static char *
     416       80012 : fix_icu_locale_str(const char *loc_str)
     417             : {
     418             :     /*
     419             :      * Must never open default collator, because it depends on the environment
     420             :      * and may change at any time. Should not happen, but check here to catch
     421             :      * bugs that might be hard to catch otherwise.
     422             :      *
     423             :      * NB: the default collator is not the same as the collator for the root
     424             :      * locale. The root locale may be specified as the empty string, "und", or
     425             :      * "root". The default collator is opened by passing NULL to ucol_open().
     426             :      */
     427       80012 :     if (loc_str == NULL)
     428           0 :         elog(ERROR, "opening default collator is not supported");
     429             : 
     430             :     if (U_ICU_VERSION_MAJOR_NUM < 55)
     431             :     {
     432             :         char        lang[ULOC_LANG_CAPACITY];
     433             :         UErrorCode  status = U_ZERO_ERROR;
     434             : 
     435             :         uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
     436             :         if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
     437             :         {
     438             :             ereport(ERROR,
     439             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     440             :                      errmsg("could not get language from locale \"%s\": %s",
     441             :                             loc_str, u_errorName(status))));
     442             :         }
     443             : 
     444             :         if (strcmp(lang, "und") == 0)
     445             :         {
     446             :             const char *remainder = loc_str + strlen("und");
     447             :             char       *fixed_str;
     448             : 
     449             :             fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
     450             :             strcpy(fixed_str, "root");
     451             :             strcat(fixed_str, remainder);
     452             : 
     453             :             return fixed_str;
     454             :         }
     455             :     }
     456             : 
     457       80012 :     return pstrdup(loc_str);
     458             : }
     459             : 
     460             : /*
     461             :  * Wrapper around ucol_open() to handle API differences for older ICU
     462             :  * versions.
     463             :  *
     464             :  * Ensure that no path leaks a UCollator.
     465             :  */
     466             : UCollator *
     467       79812 : pg_ucol_open(const char *loc_str)
     468             : {
     469             :     UCollator  *collator;
     470             :     UErrorCode  status;
     471             :     char       *fixed_str;
     472             : 
     473       79812 :     fixed_str = fix_icu_locale_str(loc_str);
     474             : 
     475       79812 :     status = U_ZERO_ERROR;
     476       79812 :     collator = ucol_open(fixed_str, &status);
     477       79812 :     if (U_FAILURE(status))
     478          12 :         ereport(ERROR,
     479             :         /* use original string for error report */
     480             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     481             :                  errmsg("could not open collator for locale \"%s\": %s",
     482             :                         loc_str, u_errorName(status))));
     483             : 
     484             :     if (U_ICU_VERSION_MAJOR_NUM < 54)
     485             :     {
     486             :         status = U_ZERO_ERROR;
     487             :         icu_set_collation_attributes(collator, fixed_str, &status);
     488             : 
     489             :         /*
     490             :          * Pretend the error came from ucol_open(), for consistent error
     491             :          * message across ICU versions.
     492             :          */
     493             :         if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
     494             :         {
     495             :             ucol_close(collator);
     496             :             ereport(ERROR,
     497             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     498             :                      errmsg("could not open collator for locale \"%s\": %s",
     499             :                             loc_str, u_errorName(status))));
     500             :         }
     501             :     }
     502             : 
     503       79800 :     pfree(fixed_str);
     504             : 
     505       79800 :     return collator;
     506             : }
     507             : 
     508             : /*
     509             :  * Wrapper around ucasemap_open() to handle API differences for older ICU
     510             :  * versions.
     511             :  *
     512             :  * Additionally makes sure we get the right options for case folding.
     513             :  */
     514             : static UCaseMap *
     515         200 : pg_ucasemap_open(const char *loc_str)
     516             : {
     517         200 :     UErrorCode  status = U_ZERO_ERROR;
     518             :     UCaseMap   *casemap;
     519             :     char       *fixed_str;
     520             : 
     521         200 :     fixed_str = fix_icu_locale_str(loc_str);
     522             : 
     523         200 :     casemap = ucasemap_open(fixed_str, foldcase_options(fixed_str), &status);
     524         200 :     if (U_FAILURE(status))
     525             :         /* use original string for error report */
     526           0 :         ereport(ERROR,
     527             :                 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     528             :                 errmsg("could not open casemap for locale \"%s\": %s",
     529             :                        loc_str, u_errorName(status)));
     530             : 
     531         200 :     pfree(fixed_str);
     532             : 
     533         200 :     return casemap;
     534             : }
     535             : 
     536             : /*
     537             :  * Create a UCollator with the given locale string and rules.
     538             :  *
     539             :  * Ensure that no path leaks a UCollator.
     540             :  */
     541             : static UCollator *
     542         210 : make_icu_collator(const char *iculocstr, const char *icurules)
     543             : {
     544         210 :     if (!icurules)
     545             :     {
     546             :         /* simple case without rules */
     547         198 :         return pg_ucol_open(iculocstr);
     548             :     }
     549             :     else
     550             :     {
     551             :         UCollator  *collator_std_rules;
     552             :         UCollator  *collator_all_rules;
     553             :         const UChar *std_rules;
     554             :         UChar      *my_rules;
     555             :         UChar      *all_rules;
     556             :         int32_t     length;
     557             :         int32_t     total;
     558             :         UErrorCode  status;
     559             : 
     560             :         /*
     561             :          * If rules are specified, we extract the rules of the standard
     562             :          * collation, add our own rules, and make a new collator with the
     563             :          * combined rules.
     564             :          */
     565          12 :         icu_to_uchar(&my_rules, icurules, strlen(icurules));
     566             : 
     567          12 :         collator_std_rules = pg_ucol_open(iculocstr);
     568             : 
     569          12 :         std_rules = ucol_getRules(collator_std_rules, &length);
     570             : 
     571          12 :         total = u_strlen(std_rules) + u_strlen(my_rules) + 1;
     572             : 
     573             :         /* avoid leaking collator on OOM */
     574          12 :         all_rules = palloc_extended(sizeof(UChar) * total, MCXT_ALLOC_NO_OOM);
     575          12 :         if (!all_rules)
     576             :         {
     577           0 :             ucol_close(collator_std_rules);
     578           0 :             ereport(ERROR,
     579             :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     580             :                      errmsg("out of memory")));
     581             :         }
     582             : 
     583          12 :         u_strcpy(all_rules, std_rules);
     584          12 :         u_strcat(all_rules, my_rules);
     585             : 
     586          12 :         ucol_close(collator_std_rules);
     587             : 
     588          12 :         status = U_ZERO_ERROR;
     589          12 :         collator_all_rules = ucol_openRules(all_rules, u_strlen(all_rules),
     590             :                                             UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,
     591             :                                             NULL, &status);
     592          12 :         if (U_FAILURE(status))
     593             :         {
     594           6 :             ereport(ERROR,
     595             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     596             :                      errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
     597             :                             iculocstr, icurules, u_errorName(status))));
     598             :         }
     599             : 
     600           6 :         return collator_all_rules;
     601             :     }
     602             : }
     603             : 
     604             : static size_t
     605           0 : strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     606             :              pg_locale_t locale)
     607             : {
     608           0 :     return icu_convert_case(u_strToLower, dest, destsize, src, srclen, locale);
     609             : }
     610             : 
     611             : static size_t
     612           0 : strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     613             :              pg_locale_t locale)
     614             : {
     615           0 :     return icu_convert_case(u_strToTitle_default_BI, dest, destsize, src, srclen, locale);
     616             : }
     617             : 
     618             : static size_t
     619           0 : strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     620             :              pg_locale_t locale)
     621             : {
     622           0 :     return icu_convert_case(u_strToUpper, dest, destsize, src, srclen, locale);
     623             : }
     624             : 
     625             : static size_t
     626           0 : strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     627             :             pg_locale_t locale)
     628             : {
     629           0 :     return icu_convert_case(u_strFoldCase_default, dest, destsize, src, srclen, locale);
     630             : }
     631             : 
     632             : static size_t
     633         528 : strlower_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
     634             :                   pg_locale_t locale)
     635             : {
     636         528 :     UErrorCode  status = U_ZERO_ERROR;
     637             :     int32_t     needed;
     638             : 
     639         528 :     needed = ucasemap_utf8ToLower(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
     640         528 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     641           0 :         ereport(ERROR,
     642             :                 errmsg("case conversion failed: %s", u_errorName(status)));
     643         528 :     return needed;
     644             : }
     645             : 
     646             : static size_t
     647          30 : strtitle_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
     648             :                   pg_locale_t locale)
     649             : {
     650          30 :     UErrorCode  status = U_ZERO_ERROR;
     651             :     int32_t     needed;
     652             : 
     653          30 :     needed = ucasemap_utf8ToTitle(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
     654          30 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     655           0 :         ereport(ERROR,
     656             :                 errmsg("case conversion failed: %s", u_errorName(status)));
     657          30 :     return needed;
     658             : }
     659             : 
     660             : static size_t
     661          54 : strupper_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
     662             :                   pg_locale_t locale)
     663             : {
     664          54 :     UErrorCode  status = U_ZERO_ERROR;
     665             :     int32_t     needed;
     666             : 
     667          54 :     needed = ucasemap_utf8ToUpper(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
     668          54 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     669           0 :         ereport(ERROR,
     670             :                 errmsg("case conversion failed: %s", u_errorName(status)));
     671          54 :     return needed;
     672             : }
     673             : 
     674             : static size_t
     675          12 : strfold_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
     676             :                  pg_locale_t locale)
     677             : {
     678          12 :     UErrorCode  status = U_ZERO_ERROR;
     679             :     int32_t     needed;
     680             : 
     681          12 :     needed = ucasemap_utf8FoldCase(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
     682          12 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     683           0 :         ereport(ERROR,
     684             :                 errmsg("case conversion failed: %s", u_errorName(status)));
     685          12 :     return needed;
     686             : }
     687             : 
     688             : /*
     689             :  * For historical compatibility, behavior is not multibyte-aware.
     690             :  *
     691             :  * NB: uses libc tolower() for single-byte encodings (also for historical
     692             :  * compatibility), and therefore relies on the global LC_CTYPE setting.
     693             :  */
     694             : static size_t
     695           0 : downcase_ident_icu(char *dst, size_t dstsize, const char *src,
     696             :                    ssize_t srclen, pg_locale_t locale)
     697             : {
     698             :     int         i;
     699             :     bool        libc_lower;
     700           0 :     locale_t    lt = locale->icu.lt;
     701             : 
     702           0 :     libc_lower = lt && (pg_database_encoding_max_length() == 1);
     703             : 
     704           0 :     for (i = 0; i < srclen && i < dstsize; i++)
     705             :     {
     706           0 :         unsigned char ch = (unsigned char) src[i];
     707             : 
     708           0 :         if (ch >= 'A' && ch <= 'Z')
     709           0 :             ch = pg_ascii_tolower(ch);
     710           0 :         else if (libc_lower && IS_HIGHBIT_SET(ch) && isupper_l(ch, lt))
     711           0 :             ch = tolower_l(ch, lt);
     712           0 :         dst[i] = (char) ch;
     713             :     }
     714             : 
     715           0 :     if (i < dstsize)
     716           0 :         dst[i] = '\0';
     717             : 
     718           0 :     return srclen;
     719             : }
     720             : 
     721             : /*
     722             :  * strncoll_icu_utf8
     723             :  *
     724             :  * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
     725             :  * database encoding. An argument length of -1 means the string is
     726             :  * NUL-terminated.
     727             :  */
     728             : #ifdef HAVE_UCOL_STRCOLLUTF8
     729             : int
     730       23966 : strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
     731             :                   pg_locale_t locale)
     732             : {
     733             :     int         result;
     734             :     UErrorCode  status;
     735             : 
     736             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     737             : 
     738       23966 :     status = U_ZERO_ERROR;
     739       23966 :     result = ucol_strcollUTF8(locale->icu.ucol,
     740             :                               arg1, len1,
     741             :                               arg2, len2,
     742             :                               &status);
     743       23966 :     if (U_FAILURE(status))
     744           0 :         ereport(ERROR,
     745             :                 (errmsg("collation failed: %s", u_errorName(status))));
     746             : 
     747       23966 :     return result;
     748             : }
     749             : #endif
     750             : 
     751             : /* 'srclen' of -1 means the strings are NUL-terminated */
     752             : size_t
     753        5748 : strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     754             :              pg_locale_t locale)
     755             : {
     756             :     char        sbuf[TEXTBUFLEN];
     757        5748 :     char       *buf = sbuf;
     758             :     UChar      *uchar;
     759             :     int32_t     ulen;
     760             :     size_t      uchar_bsize;
     761             :     Size        result_bsize;
     762             : 
     763        5748 :     init_icu_converter();
     764             : 
     765        5748 :     ulen = uchar_length(icu_converter, src, srclen);
     766             : 
     767        5748 :     uchar_bsize = (ulen + 1) * sizeof(UChar);
     768             : 
     769        5748 :     if (uchar_bsize > TEXTBUFLEN)
     770           0 :         buf = palloc(uchar_bsize);
     771             : 
     772        5748 :     uchar = (UChar *) buf;
     773             : 
     774        5748 :     ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
     775             : 
     776        5748 :     result_bsize = ucol_getSortKey(locale->icu.ucol,
     777             :                                    uchar, ulen,
     778             :                                    (uint8_t *) dest, destsize);
     779             : 
     780             :     /*
     781             :      * ucol_getSortKey() counts the nul-terminator in the result length, but
     782             :      * this function should not.
     783             :      */
     784             :     Assert(result_bsize > 0);
     785        5748 :     result_bsize--;
     786             : 
     787        5748 :     if (buf != sbuf)
     788           0 :         pfree(buf);
     789             : 
     790             :     /* if dest is defined, it should be nul-terminated */
     791             :     Assert(result_bsize >= destsize || dest[result_bsize] == '\0');
     792             : 
     793        5748 :     return result_bsize;
     794             : }
     795             : 
     796             : /* 'srclen' of -1 means the strings are NUL-terminated */
     797             : size_t
     798        1668 : strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
     799             :                          const char *src, ssize_t srclen,
     800             :                          pg_locale_t locale)
     801             : {
     802             :     size_t      result;
     803             :     UCharIterator iter;
     804             :     uint32_t    state[2];
     805             :     UErrorCode  status;
     806             : 
     807             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     808             : 
     809        1668 :     uiter_setUTF8(&iter, src, srclen);
     810        1668 :     state[0] = state[1] = 0;    /* won't need that again */
     811        1668 :     status = U_ZERO_ERROR;
     812        1668 :     result = ucol_nextSortKeyPart(locale->icu.ucol,
     813             :                                   &iter,
     814             :                                   state,
     815             :                                   (uint8_t *) dest,
     816             :                                   destsize,
     817             :                                   &status);
     818        1668 :     if (U_FAILURE(status))
     819           0 :         ereport(ERROR,
     820             :                 (errmsg("sort key generation failed: %s",
     821             :                         u_errorName(status))));
     822             : 
     823        1668 :     return result;
     824             : }
     825             : 
     826             : char *
     827       79454 : get_collation_actual_version_icu(const char *collcollate)
     828             : {
     829             :     UCollator  *collator;
     830             :     UVersionInfo versioninfo;
     831             :     char        buf[U_MAX_VERSION_STRING_LENGTH];
     832             : 
     833       79454 :     collator = pg_ucol_open(collcollate);
     834             : 
     835       79454 :     ucol_getVersion(collator, versioninfo);
     836       79454 :     ucol_close(collator);
     837             : 
     838       79454 :     u_versionToString(versioninfo, buf);
     839       79454 :     return pstrdup(buf);
     840             : }
     841             : 
     842             : /*
     843             :  * Convert a string in the database encoding into a string of UChars.
     844             :  *
     845             :  * The source string at buff is of length nbytes
     846             :  * (it needn't be nul-terminated)
     847             :  *
     848             :  * *buff_uchar receives a pointer to the palloc'd result string, and
     849             :  * the function's result is the number of UChars generated.
     850             :  *
     851             :  * The result string is nul-terminated, though most callers rely on the
     852             :  * result length instead.
     853             :  */
     854             : static int32_t
     855          12 : icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
     856             : {
     857             :     int32_t     len_uchar;
     858             : 
     859          12 :     init_icu_converter();
     860             : 
     861          12 :     len_uchar = uchar_length(icu_converter, buff, nbytes);
     862             : 
     863          12 :     *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
     864          12 :     len_uchar = uchar_convert(icu_converter,
     865             :                               *buff_uchar, len_uchar + 1, buff, nbytes);
     866             : 
     867          12 :     return len_uchar;
     868             : }
     869             : 
     870             : /*
     871             :  * Convert a string of UChars into the database encoding.
     872             :  *
     873             :  * The source string at buff_uchar is of length len_uchar
     874             :  * (it needn't be nul-terminated)
     875             :  *
     876             :  * *result receives a pointer to the palloc'd result string, and the
     877             :  * function's result is the number of bytes generated (not counting nul).
     878             :  *
     879             :  * The result string is nul-terminated.
     880             :  */
     881             : static size_t
     882           0 : icu_from_uchar(char *dest, size_t destsize, const UChar *buff_uchar, int32_t len_uchar)
     883             : {
     884             :     UErrorCode  status;
     885             :     int32_t     len_result;
     886             : 
     887           0 :     init_icu_converter();
     888             : 
     889           0 :     status = U_ZERO_ERROR;
     890           0 :     len_result = ucnv_fromUChars(icu_converter, NULL, 0,
     891             :                                  buff_uchar, len_uchar, &status);
     892           0 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     893           0 :         ereport(ERROR,
     894             :                 (errmsg("%s failed: %s", "ucnv_fromUChars",
     895             :                         u_errorName(status))));
     896             : 
     897           0 :     if (len_result + 1 > destsize)
     898           0 :         return len_result;
     899             : 
     900           0 :     status = U_ZERO_ERROR;
     901           0 :     len_result = ucnv_fromUChars(icu_converter, dest, len_result + 1,
     902             :                                  buff_uchar, len_uchar, &status);
     903           0 :     if (U_FAILURE(status) ||
     904           0 :         status == U_STRING_NOT_TERMINATED_WARNING)
     905           0 :         ereport(ERROR,
     906             :                 (errmsg("%s failed: %s", "ucnv_fromUChars",
     907             :                         u_errorName(status))));
     908             : 
     909           0 :     return len_result;
     910             : }
     911             : 
     912             : static int32_t
     913           0 : convert_case_uchar(ICU_Convert_Func func, pg_locale_t mylocale,
     914             :                    UChar **buff_dest, UChar *buff_source, int32_t len_source)
     915             : {
     916             :     UErrorCode  status;
     917             :     int32_t     len_dest;
     918             : 
     919           0 :     len_dest = len_source;      /* try first with same length */
     920           0 :     *buff_dest = palloc(len_dest * sizeof(**buff_dest));
     921           0 :     status = U_ZERO_ERROR;
     922           0 :     len_dest = func(*buff_dest, len_dest, buff_source, len_source,
     923             :                     mylocale->icu.locale, &status);
     924           0 :     if (status == U_BUFFER_OVERFLOW_ERROR)
     925             :     {
     926             :         /* try again with adjusted length */
     927           0 :         pfree(*buff_dest);
     928           0 :         *buff_dest = palloc(len_dest * sizeof(**buff_dest));
     929           0 :         status = U_ZERO_ERROR;
     930           0 :         len_dest = func(*buff_dest, len_dest, buff_source, len_source,
     931             :                         mylocale->icu.locale, &status);
     932             :     }
     933           0 :     if (U_FAILURE(status))
     934           0 :         ereport(ERROR,
     935             :                 (errmsg("case conversion failed: %s", u_errorName(status))));
     936           0 :     return len_dest;
     937             : }
     938             : 
     939             : static int32_t
     940           0 : icu_convert_case(ICU_Convert_Func func, char *dest, size_t destsize,
     941             :                  const char *src, ssize_t srclen, pg_locale_t locale)
     942             : {
     943             :     int32_t     len_uchar;
     944             :     int32_t     len_conv;
     945             :     UChar      *buff_uchar;
     946             :     UChar      *buff_conv;
     947             :     size_t      result_len;
     948             : 
     949           0 :     len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
     950           0 :     len_conv = convert_case_uchar(func, locale, &buff_conv,
     951             :                                   buff_uchar, len_uchar);
     952           0 :     result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
     953           0 :     pfree(buff_uchar);
     954           0 :     pfree(buff_conv);
     955             : 
     956           0 :     return result_len;
     957             : }
     958             : 
     959             : static int32_t
     960           0 : u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
     961             :                         const UChar *src, int32_t srcLength,
     962             :                         const char *locale,
     963             :                         UErrorCode *pErrorCode)
     964             : {
     965           0 :     return u_strToTitle(dest, destCapacity, src, srcLength,
     966             :                         NULL, locale, pErrorCode);
     967             : }
     968             : 
     969             : static int32_t
     970           0 : u_strFoldCase_default(UChar *dest, int32_t destCapacity,
     971             :                       const UChar *src, int32_t srcLength,
     972             :                       const char *locale,
     973             :                       UErrorCode *pErrorCode)
     974             : {
     975           0 :     return u_strFoldCase(dest, destCapacity, src, srcLength,
     976           0 :                          foldcase_options(locale), pErrorCode);
     977             : }
     978             : 
     979             : /*
     980             :  * Return the correct u_strFoldCase() options for the given locale.
     981             :  *
     982             :  * Unlike the ICU APIs for lowercasing, titlecasing, and uppercasing, case
     983             :  * folding does not accept a locale. Instead it just supports a single option
     984             :  * relevant to Turkic languages 'az' and 'tr'; check for those languages.
     985             :  */
     986             : static int32_t
     987         200 : foldcase_options(const char *locale)
     988             : {
     989         200 :     uint32      options = U_FOLD_CASE_DEFAULT;
     990             :     char        lang[3];
     991         200 :     UErrorCode  status = U_ZERO_ERROR;
     992             : 
     993         200 :     uloc_getLanguage(locale, lang, 3, &status);
     994         200 :     if (U_SUCCESS(status))
     995             :     {
     996             :         /*
     997             :          * The option name is confusing, but it causes u_strFoldCase to use
     998             :          * the 'T' mappings, which are ignored for U_FOLD_CASE_DEFAULT.
     999             :          */
    1000         194 :         if (strcmp(lang, "tr") == 0 || strcmp(lang, "az") == 0)
    1001           6 :             options = U_FOLD_CASE_EXCLUDE_SPECIAL_I;
    1002             :     }
    1003             : 
    1004         200 :     return options;
    1005             : }
    1006             : 
    1007             : /*
    1008             :  * strncoll_icu
    1009             :  *
    1010             :  * Convert the arguments from the database encoding to UChar strings, then
    1011             :  * call ucol_strcoll(). An argument length of -1 means that the string is
    1012             :  * NUL-terminated.
    1013             :  *
    1014             :  * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
    1015             :  * caller should call that instead.
    1016             :  */
    1017             : static int
    1018           0 : strncoll_icu(const char *arg1, ssize_t len1,
    1019             :              const char *arg2, ssize_t len2, pg_locale_t locale)
    1020             : {
    1021             :     char        sbuf[TEXTBUFLEN];
    1022           0 :     char       *buf = sbuf;
    1023             :     int32_t     ulen1;
    1024             :     int32_t     ulen2;
    1025             :     size_t      bufsize1;
    1026             :     size_t      bufsize2;
    1027             :     UChar      *uchar1,
    1028             :                *uchar2;
    1029             :     int         result;
    1030             : 
    1031             :     /* if encoding is UTF8, use more efficient strncoll_icu_utf8 */
    1032             : #ifdef HAVE_UCOL_STRCOLLUTF8
    1033             :     Assert(GetDatabaseEncoding() != PG_UTF8);
    1034             : #endif
    1035             : 
    1036           0 :     init_icu_converter();
    1037             : 
    1038           0 :     ulen1 = uchar_length(icu_converter, arg1, len1);
    1039           0 :     ulen2 = uchar_length(icu_converter, arg2, len2);
    1040             : 
    1041           0 :     bufsize1 = (ulen1 + 1) * sizeof(UChar);
    1042           0 :     bufsize2 = (ulen2 + 1) * sizeof(UChar);
    1043             : 
    1044           0 :     if (bufsize1 + bufsize2 > TEXTBUFLEN)
    1045           0 :         buf = palloc(bufsize1 + bufsize2);
    1046             : 
    1047           0 :     uchar1 = (UChar *) buf;
    1048           0 :     uchar2 = (UChar *) (buf + bufsize1);
    1049             : 
    1050           0 :     ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
    1051           0 :     ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
    1052             : 
    1053           0 :     result = ucol_strcoll(locale->icu.ucol,
    1054             :                           uchar1, ulen1,
    1055             :                           uchar2, ulen2);
    1056             : 
    1057           0 :     if (buf != sbuf)
    1058           0 :         pfree(buf);
    1059             : 
    1060           0 :     return result;
    1061             : }
    1062             : 
    1063             : /* 'srclen' of -1 means the strings are NUL-terminated */
    1064             : static size_t
    1065           0 : strnxfrm_prefix_icu(char *dest, size_t destsize,
    1066             :                     const char *src, ssize_t srclen,
    1067             :                     pg_locale_t locale)
    1068             : {
    1069             :     char        sbuf[TEXTBUFLEN];
    1070           0 :     char       *buf = sbuf;
    1071             :     UCharIterator iter;
    1072             :     uint32_t    state[2];
    1073             :     UErrorCode  status;
    1074           0 :     int32_t     ulen = -1;
    1075           0 :     UChar      *uchar = NULL;
    1076             :     size_t      uchar_bsize;
    1077             :     Size        result_bsize;
    1078             : 
    1079             :     /* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */
    1080             :     Assert(GetDatabaseEncoding() != PG_UTF8);
    1081             : 
    1082           0 :     init_icu_converter();
    1083             : 
    1084           0 :     ulen = uchar_length(icu_converter, src, srclen);
    1085             : 
    1086           0 :     uchar_bsize = (ulen + 1) * sizeof(UChar);
    1087             : 
    1088           0 :     if (uchar_bsize > TEXTBUFLEN)
    1089           0 :         buf = palloc(uchar_bsize);
    1090             : 
    1091           0 :     uchar = (UChar *) buf;
    1092             : 
    1093           0 :     ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
    1094             : 
    1095           0 :     uiter_setString(&iter, uchar, ulen);
    1096           0 :     state[0] = state[1] = 0;    /* won't need that again */
    1097           0 :     status = U_ZERO_ERROR;
    1098           0 :     result_bsize = ucol_nextSortKeyPart(locale->icu.ucol,
    1099             :                                         &iter,
    1100             :                                         state,
    1101             :                                         (uint8_t *) dest,
    1102             :                                         destsize,
    1103             :                                         &status);
    1104           0 :     if (U_FAILURE(status))
    1105           0 :         ereport(ERROR,
    1106             :                 (errmsg("sort key generation failed: %s",
    1107             :                         u_errorName(status))));
    1108             : 
    1109           0 :     return result_bsize;
    1110             : }
    1111             : 
    1112             : static void
    1113        5760 : init_icu_converter(void)
    1114             : {
    1115             :     const char *icu_encoding_name;
    1116             :     UErrorCode  status;
    1117             :     UConverter *conv;
    1118             : 
    1119        5760 :     if (icu_converter)
    1120        5754 :         return;                 /* already done */
    1121             : 
    1122           6 :     icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
    1123           6 :     if (!icu_encoding_name)
    1124           0 :         ereport(ERROR,
    1125             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1126             :                  errmsg("encoding \"%s\" not supported by ICU",
    1127             :                         pg_encoding_to_char(GetDatabaseEncoding()))));
    1128             : 
    1129           6 :     status = U_ZERO_ERROR;
    1130           6 :     conv = ucnv_open(icu_encoding_name, &status);
    1131           6 :     if (U_FAILURE(status))
    1132           0 :         ereport(ERROR,
    1133             :                 (errmsg("could not open ICU converter for encoding \"%s\": %s",
    1134             :                         icu_encoding_name, u_errorName(status))));
    1135             : 
    1136           6 :     icu_converter = conv;
    1137             : }
    1138             : 
    1139             : /*
    1140             :  * Find length, in UChars, of given string if converted to UChar string.
    1141             :  *
    1142             :  * A length of -1 indicates that the input string is NUL-terminated.
    1143             :  */
    1144             : static size_t
    1145        5760 : uchar_length(UConverter *converter, const char *str, int32_t len)
    1146             : {
    1147        5760 :     UErrorCode  status = U_ZERO_ERROR;
    1148             :     int32_t     ulen;
    1149             : 
    1150        5760 :     ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
    1151        5760 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
    1152           0 :         ereport(ERROR,
    1153             :                 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
    1154        5760 :     return ulen;
    1155             : }
    1156             : 
    1157             : /*
    1158             :  * Convert the given source string into a UChar string, stored in dest, and
    1159             :  * return the length (in UChars).
    1160             :  *
    1161             :  * A srclen of -1 indicates that the input string is NUL-terminated.
    1162             :  */
    1163             : static int32_t
    1164        5760 : uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
    1165             :               const char *src, int32_t srclen)
    1166             : {
    1167        5760 :     UErrorCode  status = U_ZERO_ERROR;
    1168             :     int32_t     ulen;
    1169             : 
    1170        5760 :     status = U_ZERO_ERROR;
    1171        5760 :     ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
    1172        5760 :     if (U_FAILURE(status))
    1173           0 :         ereport(ERROR,
    1174             :                 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
    1175        5760 :     return ulen;
    1176             : }
    1177             : 
    1178             : /*
    1179             :  * Parse collation attributes from the given locale string and apply them to
    1180             :  * the open collator.
    1181             :  *
    1182             :  * First, the locale string is canonicalized to an ICU format locale ID such
    1183             :  * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
    1184             :  * the key-value arguments.
    1185             :  *
    1186             :  * Starting with ICU version 54, the attributes are processed automatically by
    1187             :  * ucol_open(), so this is only necessary for emulating this behavior on older
    1188             :  * versions.
    1189             :  */
    1190             : pg_attribute_unused()
    1191             : static void
    1192           0 : icu_set_collation_attributes(UCollator *collator, const char *loc,
    1193             :                              UErrorCode *status)
    1194             : {
    1195             :     int32_t     len;
    1196             :     char       *icu_locale_id;
    1197             :     char       *lower_str;
    1198             :     char       *str;
    1199             :     char       *token;
    1200             : 
    1201             :     /*
    1202             :      * The input locale may be a BCP 47 language tag, e.g.
    1203             :      * "und-u-kc-ks-level1", which expresses the same attributes in a
    1204             :      * different form. It will be converted to the equivalent ICU format
    1205             :      * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
    1206             :      * uloc_canonicalize().
    1207             :      */
    1208           0 :     *status = U_ZERO_ERROR;
    1209           0 :     len = uloc_canonicalize(loc, NULL, 0, status);
    1210           0 :     icu_locale_id = palloc(len + 1);
    1211           0 :     *status = U_ZERO_ERROR;
    1212           0 :     len = uloc_canonicalize(loc, icu_locale_id, len + 1, status);
    1213           0 :     if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
    1214           0 :         return;
    1215             : 
    1216           0 :     lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
    1217             : 
    1218           0 :     pfree(icu_locale_id);
    1219             : 
    1220           0 :     str = strchr(lower_str, '@');
    1221           0 :     if (!str)
    1222           0 :         return;
    1223           0 :     str++;
    1224             : 
    1225           0 :     while ((token = strsep(&str, ";")))
    1226             :     {
    1227           0 :         char       *e = strchr(token, '=');
    1228             : 
    1229           0 :         if (e)
    1230             :         {
    1231             :             char       *name;
    1232             :             char       *value;
    1233             :             UColAttribute uattr;
    1234             :             UColAttributeValue uvalue;
    1235             : 
    1236           0 :             *status = U_ZERO_ERROR;
    1237             : 
    1238           0 :             *e = '\0';
    1239           0 :             name = token;
    1240           0 :             value = e + 1;
    1241             : 
    1242             :             /*
    1243             :              * See attribute name and value lists in ICU i18n/coll.cpp
    1244             :              */
    1245           0 :             if (strcmp(name, "colstrength") == 0)
    1246           0 :                 uattr = UCOL_STRENGTH;
    1247           0 :             else if (strcmp(name, "colbackwards") == 0)
    1248           0 :                 uattr = UCOL_FRENCH_COLLATION;
    1249           0 :             else if (strcmp(name, "colcaselevel") == 0)
    1250           0 :                 uattr = UCOL_CASE_LEVEL;
    1251           0 :             else if (strcmp(name, "colcasefirst") == 0)
    1252           0 :                 uattr = UCOL_CASE_FIRST;
    1253           0 :             else if (strcmp(name, "colalternate") == 0)
    1254           0 :                 uattr = UCOL_ALTERNATE_HANDLING;
    1255           0 :             else if (strcmp(name, "colnormalization") == 0)
    1256           0 :                 uattr = UCOL_NORMALIZATION_MODE;
    1257           0 :             else if (strcmp(name, "colnumeric") == 0)
    1258           0 :                 uattr = UCOL_NUMERIC_COLLATION;
    1259             :             else
    1260             :                 /* ignore if unknown */
    1261           0 :                 continue;
    1262             : 
    1263           0 :             if (strcmp(value, "primary") == 0)
    1264           0 :                 uvalue = UCOL_PRIMARY;
    1265           0 :             else if (strcmp(value, "secondary") == 0)
    1266           0 :                 uvalue = UCOL_SECONDARY;
    1267           0 :             else if (strcmp(value, "tertiary") == 0)
    1268           0 :                 uvalue = UCOL_TERTIARY;
    1269           0 :             else if (strcmp(value, "quaternary") == 0)
    1270           0 :                 uvalue = UCOL_QUATERNARY;
    1271           0 :             else if (strcmp(value, "identical") == 0)
    1272           0 :                 uvalue = UCOL_IDENTICAL;
    1273           0 :             else if (strcmp(value, "no") == 0)
    1274           0 :                 uvalue = UCOL_OFF;
    1275           0 :             else if (strcmp(value, "yes") == 0)
    1276           0 :                 uvalue = UCOL_ON;
    1277           0 :             else if (strcmp(value, "shifted") == 0)
    1278           0 :                 uvalue = UCOL_SHIFTED;
    1279           0 :             else if (strcmp(value, "non-ignorable") == 0)
    1280           0 :                 uvalue = UCOL_NON_IGNORABLE;
    1281           0 :             else if (strcmp(value, "lower") == 0)
    1282           0 :                 uvalue = UCOL_LOWER_FIRST;
    1283           0 :             else if (strcmp(value, "upper") == 0)
    1284           0 :                 uvalue = UCOL_UPPER_FIRST;
    1285             :             else
    1286             :             {
    1287           0 :                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    1288           0 :                 break;
    1289             :             }
    1290             : 
    1291           0 :             ucol_setAttribute(collator, uattr, uvalue, status);
    1292             :         }
    1293             :     }
    1294             : 
    1295           0 :     pfree(lower_str);
    1296             : }
    1297             : 
    1298             : #endif                          /* USE_ICU */

Generated by: LCOV version 1.16