LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_icu.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 48.8 % 371 181
Test Date: 2026-02-27 02:15:10 Functions: 66.7 % 45 30
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-----------------------------------------------------------------------
       2              :  *
       3              :  * PostgreSQL locale utilities for ICU
       4              :  *
       5              :  * Portions Copyright (c) 2002-2026, PostgreSQL Global Development Group
       6              :  *
       7              :  * src/backend/utils/adt/pg_locale_icu.c
       8              :  *
       9              :  *-----------------------------------------------------------------------
      10              :  */
      11              : 
      12              : #include "postgres.h"
      13              : 
      14              : #ifdef USE_ICU
      15              : #include <unicode/ucasemap.h>
      16              : #include <unicode/ucnv.h>
      17              : #include <unicode/ucol.h>
      18              : #include <unicode/ustring.h>
      19              : 
      20              : /*
      21              :  * ucol_strcollUTF8() was introduced in ICU 50, but it is buggy before ICU 53.
      22              :  * (see
      23              :  * <https://www.postgresql.org/message-id/flat/f1438ec6-22aa-4029-9a3b-26f79d330e72%40manitou-mail.org>)
      24              :  */
      25              : #if U_ICU_VERSION_MAJOR_NUM >= 53
      26              : #define HAVE_UCOL_STRCOLLUTF8 1
      27              : #else
      28              : #undef HAVE_UCOL_STRCOLLUTF8
      29              : #endif
      30              : 
      31              : #endif
      32              : 
      33              : #include "access/htup_details.h"
      34              : #include "catalog/pg_database.h"
      35              : #include "catalog/pg_collation.h"
      36              : #include "mb/pg_wchar.h"
      37              : #include "miscadmin.h"
      38              : #include "utils/builtins.h"
      39              : #include "utils/formatting.h"
      40              : #include "utils/memutils.h"
      41              : #include "utils/pg_locale.h"
      42              : #include "utils/syscache.h"
      43              : 
      44              : /*
      45              :  * Size of stack buffer to use for string transformations, used to avoid heap
      46              :  * allocations in typical cases. This should be large enough that most strings
      47              :  * will fit, but small enough that we feel comfortable putting it on the
      48              :  * stack.
      49              :  */
      50              : #define     TEXTBUFLEN          1024
      51              : 
      52              : extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
      53              : 
      54              : #ifdef USE_ICU
      55              : 
      56              : extern UCollator *pg_ucol_open(const char *loc_str);
      57              : static UCaseMap *pg_ucasemap_open(const char *loc_str);
      58              : 
      59              : static size_t strlower_icu(char *dest, size_t destsize, const char *src,
      60              :                            ssize_t srclen, pg_locale_t locale);
      61              : static size_t strtitle_icu(char *dest, size_t destsize, const char *src,
      62              :                            ssize_t srclen, pg_locale_t locale);
      63              : static size_t strupper_icu(char *dest, size_t destsize, const char *src,
      64              :                            ssize_t srclen, pg_locale_t locale);
      65              : static size_t strfold_icu(char *dest, size_t destsize, const char *src,
      66              :                           ssize_t srclen, pg_locale_t locale);
      67              : static size_t strlower_icu_utf8(char *dest, size_t destsize, const char *src,
      68              :                                 ssize_t srclen, pg_locale_t locale);
      69              : static size_t strtitle_icu_utf8(char *dest, size_t destsize, const char *src,
      70              :                                 ssize_t srclen, pg_locale_t locale);
      71              : static size_t strupper_icu_utf8(char *dest, size_t destsize, const char *src,
      72              :                                 ssize_t srclen, pg_locale_t locale);
      73              : static size_t strfold_icu_utf8(char *dest, size_t destsize, const char *src,
      74              :                                ssize_t srclen, pg_locale_t locale);
      75              : static size_t downcase_ident_icu(char *dst, size_t dstsize, const char *src,
      76              :                                  ssize_t srclen, pg_locale_t locale);
      77              : static int  strncoll_icu(const char *arg1, ssize_t len1,
      78              :                          const char *arg2, ssize_t len2,
      79              :                          pg_locale_t locale);
      80              : static size_t strnxfrm_icu(char *dest, size_t destsize,
      81              :                            const char *src, ssize_t srclen,
      82              :                            pg_locale_t locale);
      83              : extern char *get_collation_actual_version_icu(const char *collcollate);
      84              : 
      85              : typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
      86              :                                      const UChar *src, int32_t srcLength,
      87              :                                      const char *locale,
      88              :                                      UErrorCode *pErrorCode);
      89              : 
      90              : /*
      91              :  * Converter object for converting between ICU's UChar strings and C strings
      92              :  * in database encoding.  Since the database encoding doesn't change, we only
      93              :  * need one of these per session.
      94              :  */
      95              : static UConverter *icu_converter = NULL;
      96              : 
      97              : static UCollator *make_icu_collator(const char *iculocstr,
      98              :                                     const char *icurules);
      99              : static int  strncoll_icu(const char *arg1, ssize_t len1,
     100              :                          const char *arg2, ssize_t len2,
     101              :                          pg_locale_t locale);
     102              : static size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
     103              :                                   const char *src, ssize_t srclen,
     104              :                                   pg_locale_t locale);
     105              : #ifdef HAVE_UCOL_STRCOLLUTF8
     106              : static int  strncoll_icu_utf8(const char *arg1, ssize_t len1,
     107              :                               const char *arg2, ssize_t len2,
     108              :                               pg_locale_t locale);
     109              : #endif
     110              : static size_t strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
     111              :                                        const char *src, ssize_t srclen,
     112              :                                        pg_locale_t locale);
     113              : static void init_icu_converter(void);
     114              : static size_t uchar_length(UConverter *converter,
     115              :                            const char *str, int32_t len);
     116              : static int32_t uchar_convert(UConverter *converter,
     117              :                              UChar *dest, int32_t destlen,
     118              :                              const char *src, int32_t srclen);
     119              : static int32_t icu_to_uchar(UChar **buff_uchar, const char *buff,
     120              :                             size_t nbytes);
     121              : static size_t icu_from_uchar(char *dest, size_t destsize,
     122              :                              const UChar *buff_uchar, int32_t len_uchar);
     123              : static void icu_set_collation_attributes(UCollator *collator, const char *loc,
     124              :                                          UErrorCode *status);
     125              : static int32_t icu_convert_case(ICU_Convert_Func func, char *dest,
     126              :                                 size_t destsize, const char *src,
     127              :                                 ssize_t srclen, pg_locale_t locale);
     128              : static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
     129              :                                        const UChar *src, int32_t srcLength,
     130              :                                        const char *locale,
     131              :                                        UErrorCode *pErrorCode);
     132              : static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
     133              :                                      const UChar *src, int32_t srcLength,
     134              :                                      const char *locale,
     135              :                                      UErrorCode *pErrorCode);
     136              : static int32_t foldcase_options(const char *locale);
     137              : 
     138              : /*
     139              :  * XXX: many of the functions below rely on casts directly from pg_wchar to
     140              :  * UChar32, which is correct for UTF-8 and LATIN1, but not in general.
     141              :  */
     142              : 
     143              : static pg_wchar
     144           54 : toupper_icu(pg_wchar wc, pg_locale_t locale)
     145              : {
     146           54 :     return u_toupper(wc);
     147              : }
     148              : 
     149              : static pg_wchar
     150           54 : tolower_icu(pg_wchar wc, pg_locale_t locale)
     151              : {
     152           54 :     return u_tolower(wc);
     153              : }
     154              : 
     155              : static const struct collate_methods collate_methods_icu = {
     156              :     .strncoll = strncoll_icu,
     157              :     .strnxfrm = strnxfrm_icu,
     158              :     .strnxfrm_prefix = strnxfrm_prefix_icu,
     159              :     .strxfrm_is_safe = true,
     160              : };
     161              : 
     162              : static const struct collate_methods collate_methods_icu_utf8 = {
     163              : #ifdef HAVE_UCOL_STRCOLLUTF8
     164              :     .strncoll = strncoll_icu_utf8,
     165              : #else
     166              :     .strncoll = strncoll_icu,
     167              : #endif
     168              :     .strnxfrm = strnxfrm_icu,
     169              :     .strnxfrm_prefix = strnxfrm_prefix_icu_utf8,
     170              :     .strxfrm_is_safe = true,
     171              : };
     172              : 
     173              : static bool
     174         6144 : wc_isdigit_icu(pg_wchar wc, pg_locale_t locale)
     175              : {
     176         6144 :     return u_isdigit(wc);
     177              : }
     178              : 
     179              : static bool
     180         6144 : wc_isalpha_icu(pg_wchar wc, pg_locale_t locale)
     181              : {
     182         6144 :     return u_isalpha(wc);
     183              : }
     184              : 
     185              : static bool
     186         6144 : wc_isalnum_icu(pg_wchar wc, pg_locale_t locale)
     187              : {
     188         6144 :     return u_isalnum(wc);
     189              : }
     190              : 
     191              : static bool
     192         6144 : wc_isupper_icu(pg_wchar wc, pg_locale_t locale)
     193              : {
     194         6144 :     return u_isupper(wc);
     195              : }
     196              : 
     197              : static bool
     198         6144 : wc_islower_icu(pg_wchar wc, pg_locale_t locale)
     199              : {
     200         6144 :     return u_islower(wc);
     201              : }
     202              : 
     203              : static bool
     204         6144 : wc_isgraph_icu(pg_wchar wc, pg_locale_t locale)
     205              : {
     206         6144 :     return u_isgraph(wc);
     207              : }
     208              : 
     209              : static bool
     210         6144 : wc_isprint_icu(pg_wchar wc, pg_locale_t locale)
     211              : {
     212         6144 :     return u_isprint(wc);
     213              : }
     214              : 
     215              : static bool
     216         6144 : wc_ispunct_icu(pg_wchar wc, pg_locale_t locale)
     217              : {
     218         6144 :     return u_ispunct(wc);
     219              : }
     220              : 
     221              : static bool
     222         6144 : wc_isspace_icu(pg_wchar wc, pg_locale_t locale)
     223              : {
     224         6144 :     return u_isspace(wc);
     225              : }
     226              : 
     227              : static bool
     228            0 : wc_isxdigit_icu(pg_wchar wc, pg_locale_t locale)
     229              : {
     230            0 :     return u_isxdigit(wc);
     231              : }
     232              : 
     233              : static bool
     234           63 : wc_iscased_icu(pg_wchar wc, pg_locale_t locale)
     235              : {
     236           63 :     return u_hasBinaryProperty(wc, UCHAR_CASED);
     237              : }
     238              : 
     239              : static const struct ctype_methods ctype_methods_icu = {
     240              :     .strlower = strlower_icu,
     241              :     .strtitle = strtitle_icu,
     242              :     .strupper = strupper_icu,
     243              :     .strfold = strfold_icu,
     244              :     .downcase_ident = downcase_ident_icu,
     245              :     .wc_isdigit = wc_isdigit_icu,
     246              :     .wc_isalpha = wc_isalpha_icu,
     247              :     .wc_isalnum = wc_isalnum_icu,
     248              :     .wc_isupper = wc_isupper_icu,
     249              :     .wc_islower = wc_islower_icu,
     250              :     .wc_isgraph = wc_isgraph_icu,
     251              :     .wc_isprint = wc_isprint_icu,
     252              :     .wc_ispunct = wc_ispunct_icu,
     253              :     .wc_isspace = wc_isspace_icu,
     254              :     .wc_isxdigit = wc_isxdigit_icu,
     255              :     .wc_iscased = wc_iscased_icu,
     256              :     .wc_toupper = toupper_icu,
     257              :     .wc_tolower = tolower_icu,
     258              : };
     259              : 
     260              : static const struct ctype_methods ctype_methods_icu_utf8 = {
     261              :     .strlower = strlower_icu_utf8,
     262              :     .strtitle = strtitle_icu_utf8,
     263              :     .strupper = strupper_icu_utf8,
     264              :     .strfold = strfold_icu_utf8,
     265              :     /* uses plain ASCII semantics for historical reasons */
     266              :     .downcase_ident = NULL,
     267              :     .wc_isdigit = wc_isdigit_icu,
     268              :     .wc_isalpha = wc_isalpha_icu,
     269              :     .wc_isalnum = wc_isalnum_icu,
     270              :     .wc_isupper = wc_isupper_icu,
     271              :     .wc_islower = wc_islower_icu,
     272              :     .wc_isgraph = wc_isgraph_icu,
     273              :     .wc_isprint = wc_isprint_icu,
     274              :     .wc_ispunct = wc_ispunct_icu,
     275              :     .wc_isspace = wc_isspace_icu,
     276              :     .wc_isxdigit = wc_isxdigit_icu,
     277              :     .wc_iscased = wc_iscased_icu,
     278              :     .wc_toupper = toupper_icu,
     279              :     .wc_tolower = tolower_icu,
     280              : };
     281              : 
     282              : /*
     283              :  * ICU still depends on libc for compatibility with certain historical
     284              :  * behavior for single-byte encodings.  See downcase_ident_icu().
     285              :  *
     286              :  * XXX: consider fixing by decoding the single byte into a code point, and
     287              :  * using u_tolower().
     288              :  */
     289              : static locale_t
     290            0 : make_libc_ctype_locale(const char *ctype)
     291              : {
     292              :     locale_t    loc;
     293              : 
     294              : #ifndef WIN32
     295            0 :     loc = newlocale(LC_CTYPE_MASK, ctype, NULL);
     296              : #else
     297              :     loc = _create_locale(LC_ALL, ctype);
     298              : #endif
     299            0 :     if (!loc)
     300            0 :         report_newlocale_failure(ctype);
     301              : 
     302            0 :     return loc;
     303              : }
     304              : #endif
     305              : 
     306              : pg_locale_t
     307          105 : create_pg_locale_icu(Oid collid, MemoryContext context)
     308              : {
     309              : #ifdef USE_ICU
     310              :     bool        deterministic;
     311              :     const char *iculocstr;
     312          105 :     const char *icurules = NULL;
     313              :     UCollator  *collator;
     314          105 :     locale_t    loc = (locale_t) 0;
     315              :     pg_locale_t result;
     316              : 
     317          105 :     if (collid == DEFAULT_COLLATION_OID)
     318              :     {
     319              :         HeapTuple   tp;
     320              :         Datum       datum;
     321              :         bool        isnull;
     322              : 
     323           13 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     324           13 :         if (!HeapTupleIsValid(tp))
     325            0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     326              : 
     327              :         /* default database collation is always deterministic */
     328           13 :         deterministic = true;
     329           13 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     330              :                                        Anum_pg_database_datlocale);
     331           13 :         iculocstr = TextDatumGetCString(datum);
     332           13 :         datum = SysCacheGetAttr(DATABASEOID, tp,
     333              :                                 Anum_pg_database_daticurules, &isnull);
     334           13 :         if (!isnull)
     335            0 :             icurules = TextDatumGetCString(datum);
     336              : 
     337              :         /* libc only needed for default locale and single-byte encoding */
     338           13 :         if (pg_database_encoding_max_length() == 1)
     339              :         {
     340              :             const char *ctype;
     341              : 
     342            0 :             datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     343              :                                            Anum_pg_database_datctype);
     344            0 :             ctype = TextDatumGetCString(datum);
     345              : 
     346            0 :             loc = make_libc_ctype_locale(ctype);
     347              :         }
     348              : 
     349           13 :         ReleaseSysCache(tp);
     350              :     }
     351              :     else
     352              :     {
     353              :         Form_pg_collation collform;
     354              :         HeapTuple   tp;
     355              :         Datum       datum;
     356              :         bool        isnull;
     357              : 
     358           92 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     359           92 :         if (!HeapTupleIsValid(tp))
     360            0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     361           92 :         collform = (Form_pg_collation) GETSTRUCT(tp);
     362           92 :         deterministic = collform->collisdeterministic;
     363           92 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     364              :                                        Anum_pg_collation_colllocale);
     365           92 :         iculocstr = TextDatumGetCString(datum);
     366           92 :         datum = SysCacheGetAttr(COLLOID, tp,
     367              :                                 Anum_pg_collation_collicurules, &isnull);
     368           92 :         if (!isnull)
     369            6 :             icurules = TextDatumGetCString(datum);
     370              : 
     371           92 :         ReleaseSysCache(tp);
     372              :     }
     373              : 
     374          105 :     collator = make_icu_collator(iculocstr, icurules);
     375              : 
     376          100 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     377          100 :     result->icu.locale = MemoryContextStrdup(context, iculocstr);
     378          100 :     result->icu.ucol = collator;
     379          100 :     result->icu.lt = loc;
     380          100 :     result->deterministic = deterministic;
     381          100 :     result->collate_is_c = false;
     382          100 :     result->ctype_is_c = false;
     383          100 :     if (GetDatabaseEncoding() == PG_UTF8)
     384              :     {
     385          100 :         result->icu.ucasemap = pg_ucasemap_open(iculocstr);
     386          100 :         result->collate = &collate_methods_icu_utf8;
     387          100 :         result->ctype = &ctype_methods_icu_utf8;
     388              :     }
     389              :     else
     390              :     {
     391            0 :         result->collate = &collate_methods_icu;
     392            0 :         result->ctype = &ctype_methods_icu;
     393              :     }
     394              : 
     395          100 :     return result;
     396              : #else
     397              :     /* could get here if a collation was created by a build with ICU */
     398              :     ereport(ERROR,
     399              :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     400              :              errmsg("ICU is not supported in this build")));
     401              : 
     402              :     return NULL;
     403              : #endif
     404              : }
     405              : 
     406              : #ifdef USE_ICU
     407              : 
     408              : /*
     409              :  * Check locale string and fix it if necessary. Returns a new palloc'd string.
     410              :  *
     411              :  * In ICU versions 54 and earlier, "und" is not a recognized spelling of the
     412              :  * root locale. If the first component of the locale is "und", replace with
     413              :  * "root" before opening.
     414              :  */
     415              : static char *
     416        43140 : fix_icu_locale_str(const char *loc_str)
     417              : {
     418              :     /*
     419              :      * Must never open default collator, because it depends on the environment
     420              :      * and may change at any time. Should not happen, but check here to catch
     421              :      * bugs that might be hard to catch otherwise.
     422              :      *
     423              :      * NB: the default collator is not the same as the collator for the root
     424              :      * locale. The root locale may be specified as the empty string, "und", or
     425              :      * "root". The default collator is opened by passing NULL to ucol_open().
     426              :      */
     427        43140 :     if (loc_str == NULL)
     428            0 :         elog(ERROR, "opening default collator is not supported");
     429              : 
     430              :     if (U_ICU_VERSION_MAJOR_NUM < 55)
     431              :     {
     432              :         char        lang[ULOC_LANG_CAPACITY];
     433              :         UErrorCode  status = U_ZERO_ERROR;
     434              : 
     435              :         uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
     436              :         if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
     437              :         {
     438              :             ereport(ERROR,
     439              :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     440              :                      errmsg("could not get language from locale \"%s\": %s",
     441              :                             loc_str, u_errorName(status))));
     442              :         }
     443              : 
     444              :         if (strcmp(lang, "und") == 0)
     445              :         {
     446              :             const char *remainder = loc_str + strlen("und");
     447              :             char       *fixed_str;
     448              : 
     449              :             fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
     450              :             strcpy(fixed_str, "root");
     451              :             strcat(fixed_str, remainder);
     452              : 
     453              :             return fixed_str;
     454              :         }
     455              :     }
     456              : 
     457        43140 :     return pstrdup(loc_str);
     458              : }
     459              : 
     460              : /*
     461              :  * Wrapper around ucol_open() to handle API differences for older ICU
     462              :  * versions.
     463              :  *
     464              :  * Ensure that no path leaks a UCollator.
     465              :  */
     466              : UCollator *
     467        43040 : pg_ucol_open(const char *loc_str)
     468              : {
     469              :     UCollator  *collator;
     470              :     UErrorCode  status;
     471              :     char       *fixed_str;
     472              : 
     473        43040 :     fixed_str = fix_icu_locale_str(loc_str);
     474              : 
     475        43040 :     status = U_ZERO_ERROR;
     476        43040 :     collator = ucol_open(fixed_str, &status);
     477        43040 :     if (U_FAILURE(status))
     478            6 :         ereport(ERROR,
     479              :         /* use original string for error report */
     480              :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     481              :                  errmsg("could not open collator for locale \"%s\": %s",
     482              :                         loc_str, u_errorName(status))));
     483              : 
     484              :     if (U_ICU_VERSION_MAJOR_NUM < 54)
     485              :     {
     486              :         status = U_ZERO_ERROR;
     487              :         icu_set_collation_attributes(collator, fixed_str, &status);
     488              : 
     489              :         /*
     490              :          * Pretend the error came from ucol_open(), for consistent error
     491              :          * message across ICU versions.
     492              :          */
     493              :         if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
     494              :         {
     495              :             ucol_close(collator);
     496              :             ereport(ERROR,
     497              :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     498              :                      errmsg("could not open collator for locale \"%s\": %s",
     499              :                             loc_str, u_errorName(status))));
     500              :         }
     501              :     }
     502              : 
     503        43034 :     pfree(fixed_str);
     504              : 
     505        43034 :     return collator;
     506              : }
     507              : 
     508              : /*
     509              :  * Wrapper around ucasemap_open() to handle API differences for older ICU
     510              :  * versions.
     511              :  *
     512              :  * Additionally makes sure we get the right options for case folding.
     513              :  */
     514              : static UCaseMap *
     515          100 : pg_ucasemap_open(const char *loc_str)
     516              : {
     517          100 :     UErrorCode  status = U_ZERO_ERROR;
     518              :     UCaseMap   *casemap;
     519              :     char       *fixed_str;
     520              : 
     521          100 :     fixed_str = fix_icu_locale_str(loc_str);
     522              : 
     523          100 :     casemap = ucasemap_open(fixed_str, foldcase_options(fixed_str), &status);
     524          100 :     if (U_FAILURE(status))
     525              :         /* use original string for error report */
     526            0 :         ereport(ERROR,
     527              :                 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     528              :                 errmsg("could not open casemap for locale \"%s\": %s",
     529              :                        loc_str, u_errorName(status)));
     530              : 
     531          100 :     pfree(fixed_str);
     532              : 
     533          100 :     return casemap;
     534              : }
     535              : 
     536              : /*
     537              :  * Create a UCollator with the given locale string and rules.
     538              :  *
     539              :  * Ensure that no path leaks a UCollator.
     540              :  */
     541              : static UCollator *
     542          105 : make_icu_collator(const char *iculocstr, const char *icurules)
     543              : {
     544          105 :     if (!icurules)
     545              :     {
     546              :         /* simple case without rules */
     547           99 :         return pg_ucol_open(iculocstr);
     548              :     }
     549              :     else
     550              :     {
     551              :         UCollator  *collator_std_rules;
     552              :         UCollator  *collator_all_rules;
     553              :         const UChar *std_rules;
     554              :         UChar      *my_rules;
     555              :         UChar      *all_rules;
     556              :         int32_t     length;
     557              :         int32_t     total;
     558              :         UErrorCode  status;
     559              : 
     560              :         /*
     561              :          * If rules are specified, we extract the rules of the standard
     562              :          * collation, add our own rules, and make a new collator with the
     563              :          * combined rules.
     564              :          */
     565            6 :         icu_to_uchar(&my_rules, icurules, strlen(icurules));
     566              : 
     567            6 :         collator_std_rules = pg_ucol_open(iculocstr);
     568              : 
     569            6 :         std_rules = ucol_getRules(collator_std_rules, &length);
     570              : 
     571            6 :         total = u_strlen(std_rules) + u_strlen(my_rules) + 1;
     572              : 
     573              :         /* avoid leaking collator on OOM */
     574            6 :         all_rules = palloc_extended(sizeof(UChar) * total, MCXT_ALLOC_NO_OOM);
     575            6 :         if (!all_rules)
     576              :         {
     577            0 :             ucol_close(collator_std_rules);
     578            0 :             ereport(ERROR,
     579              :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     580              :                      errmsg("out of memory")));
     581              :         }
     582              : 
     583            6 :         u_strcpy(all_rules, std_rules);
     584            6 :         u_strcat(all_rules, my_rules);
     585              : 
     586            6 :         ucol_close(collator_std_rules);
     587              : 
     588            6 :         status = U_ZERO_ERROR;
     589            6 :         collator_all_rules = ucol_openRules(all_rules, u_strlen(all_rules),
     590              :                                             UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,
     591              :                                             NULL, &status);
     592            6 :         if (U_FAILURE(status))
     593              :         {
     594            3 :             ereport(ERROR,
     595              :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     596              :                      errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
     597              :                             iculocstr, icurules, u_errorName(status))));
     598              :         }
     599              : 
     600            3 :         pfree(my_rules);
     601            3 :         pfree(all_rules);
     602            3 :         return collator_all_rules;
     603              :     }
     604              : }
     605              : 
     606              : static size_t
     607            0 : strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     608              :              pg_locale_t locale)
     609              : {
     610            0 :     return icu_convert_case(u_strToLower, dest, destsize, src, srclen, locale);
     611              : }
     612              : 
     613              : static size_t
     614            0 : strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     615              :              pg_locale_t locale)
     616              : {
     617            0 :     return icu_convert_case(u_strToTitle_default_BI, dest, destsize, src, srclen, locale);
     618              : }
     619              : 
     620              : static size_t
     621            0 : strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     622              :              pg_locale_t locale)
     623              : {
     624            0 :     return icu_convert_case(u_strToUpper, dest, destsize, src, srclen, locale);
     625              : }
     626              : 
     627              : static size_t
     628            0 : strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     629              :             pg_locale_t locale)
     630              : {
     631            0 :     return icu_convert_case(u_strFoldCase_default, dest, destsize, src, srclen, locale);
     632              : }
     633              : 
     634              : static size_t
     635          264 : strlower_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
     636              :                   pg_locale_t locale)
     637              : {
     638          264 :     UErrorCode  status = U_ZERO_ERROR;
     639              :     int32_t     needed;
     640              : 
     641          264 :     needed = ucasemap_utf8ToLower(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
     642          264 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     643            0 :         ereport(ERROR,
     644              :                 errmsg("case conversion failed: %s", u_errorName(status)));
     645          264 :     return needed;
     646              : }
     647              : 
     648              : static size_t
     649           15 : strtitle_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
     650              :                   pg_locale_t locale)
     651              : {
     652           15 :     UErrorCode  status = U_ZERO_ERROR;
     653              :     int32_t     needed;
     654              : 
     655           15 :     needed = ucasemap_utf8ToTitle(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
     656           15 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     657            0 :         ereport(ERROR,
     658              :                 errmsg("case conversion failed: %s", u_errorName(status)));
     659           15 :     return needed;
     660              : }
     661              : 
     662              : static size_t
     663           27 : strupper_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
     664              :                   pg_locale_t locale)
     665              : {
     666           27 :     UErrorCode  status = U_ZERO_ERROR;
     667              :     int32_t     needed;
     668              : 
     669           27 :     needed = ucasemap_utf8ToUpper(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
     670           27 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     671            0 :         ereport(ERROR,
     672              :                 errmsg("case conversion failed: %s", u_errorName(status)));
     673           27 :     return needed;
     674              : }
     675              : 
     676              : static size_t
     677            6 : strfold_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
     678              :                  pg_locale_t locale)
     679              : {
     680            6 :     UErrorCode  status = U_ZERO_ERROR;
     681              :     int32_t     needed;
     682              : 
     683            6 :     needed = ucasemap_utf8FoldCase(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
     684            6 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     685            0 :         ereport(ERROR,
     686              :                 errmsg("case conversion failed: %s", u_errorName(status)));
     687            6 :     return needed;
     688              : }
     689              : 
     690              : /*
     691              :  * For historical compatibility, behavior is not multibyte-aware.
     692              :  *
     693              :  * NB: uses libc tolower() for single-byte encodings (also for historical
     694              :  * compatibility), and therefore relies on the global LC_CTYPE setting.
     695              :  */
     696              : static size_t
     697            0 : downcase_ident_icu(char *dst, size_t dstsize, const char *src,
     698              :                    ssize_t srclen, pg_locale_t locale)
     699              : {
     700              :     int         i;
     701              :     bool        libc_lower;
     702            0 :     locale_t    lt = locale->icu.lt;
     703              : 
     704            0 :     libc_lower = lt && (pg_database_encoding_max_length() == 1);
     705              : 
     706            0 :     for (i = 0; i < srclen && i < dstsize; i++)
     707              :     {
     708            0 :         unsigned char ch = (unsigned char) src[i];
     709              : 
     710            0 :         if (ch >= 'A' && ch <= 'Z')
     711            0 :             ch = pg_ascii_tolower(ch);
     712            0 :         else if (libc_lower && IS_HIGHBIT_SET(ch) && isupper_l(ch, lt))
     713            0 :             ch = tolower_l(ch, lt);
     714            0 :         dst[i] = (char) ch;
     715              :     }
     716              : 
     717            0 :     if (i < dstsize)
     718            0 :         dst[i] = '\0';
     719              : 
     720            0 :     return srclen;
     721              : }
     722              : 
     723              : /*
     724              :  * strncoll_icu_utf8
     725              :  *
     726              :  * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
     727              :  * database encoding. An argument length of -1 means the string is
     728              :  * NUL-terminated.
     729              :  */
     730              : #ifdef HAVE_UCOL_STRCOLLUTF8
     731              : int
     732        12028 : strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
     733              :                   pg_locale_t locale)
     734              : {
     735              :     int         result;
     736              :     UErrorCode  status;
     737              : 
     738              :     Assert(GetDatabaseEncoding() == PG_UTF8);
     739              : 
     740        12028 :     status = U_ZERO_ERROR;
     741        12028 :     result = ucol_strcollUTF8(locale->icu.ucol,
     742              :                               arg1, len1,
     743              :                               arg2, len2,
     744              :                               &status);
     745        12028 :     if (U_FAILURE(status))
     746            0 :         ereport(ERROR,
     747              :                 (errmsg("collation failed: %s", u_errorName(status))));
     748              : 
     749        12028 :     return result;
     750              : }
     751              : #endif
     752              : 
     753              : /* 'srclen' of -1 means the strings are NUL-terminated */
     754              : size_t
     755         2874 : strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     756              :              pg_locale_t locale)
     757              : {
     758              :     char        sbuf[TEXTBUFLEN];
     759         2874 :     char       *buf = sbuf;
     760              :     UChar      *uchar;
     761              :     int32_t     ulen;
     762              :     size_t      uchar_bsize;
     763              :     Size        result_bsize;
     764              : 
     765         2874 :     init_icu_converter();
     766              : 
     767         2874 :     ulen = uchar_length(icu_converter, src, srclen);
     768              : 
     769         2874 :     uchar_bsize = (ulen + 1) * sizeof(UChar);
     770              : 
     771         2874 :     if (uchar_bsize > TEXTBUFLEN)
     772            0 :         buf = palloc(uchar_bsize);
     773              : 
     774         2874 :     uchar = (UChar *) buf;
     775              : 
     776         2874 :     ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
     777              : 
     778         2874 :     result_bsize = ucol_getSortKey(locale->icu.ucol,
     779              :                                    uchar, ulen,
     780              :                                    (uint8_t *) dest, destsize);
     781              : 
     782              :     /*
     783              :      * ucol_getSortKey() counts the nul-terminator in the result length, but
     784              :      * this function should not.
     785              :      */
     786              :     Assert(result_bsize > 0);
     787         2874 :     result_bsize--;
     788              : 
     789         2874 :     if (buf != sbuf)
     790            0 :         pfree(buf);
     791              : 
     792              :     /* if dest is defined, it should be nul-terminated */
     793              :     Assert(result_bsize >= destsize || dest[result_bsize] == '\0');
     794              : 
     795         2874 :     return result_bsize;
     796              : }
     797              : 
     798              : /* 'srclen' of -1 means the strings are NUL-terminated */
     799              : size_t
     800          834 : strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
     801              :                          const char *src, ssize_t srclen,
     802              :                          pg_locale_t locale)
     803              : {
     804              :     size_t      result;
     805              :     UCharIterator iter;
     806              :     uint32_t    state[2];
     807              :     UErrorCode  status;
     808              : 
     809              :     Assert(GetDatabaseEncoding() == PG_UTF8);
     810              : 
     811          834 :     uiter_setUTF8(&iter, src, srclen);
     812          834 :     state[0] = state[1] = 0;    /* won't need that again */
     813          834 :     status = U_ZERO_ERROR;
     814          834 :     result = ucol_nextSortKeyPart(locale->icu.ucol,
     815              :                                   &iter,
     816              :                                   state,
     817              :                                   (uint8_t *) dest,
     818              :                                   destsize,
     819              :                                   &status);
     820          834 :     if (U_FAILURE(status))
     821            0 :         ereport(ERROR,
     822              :                 (errmsg("sort key generation failed: %s",
     823              :                         u_errorName(status))));
     824              : 
     825          834 :     return result;
     826              : }
     827              : 
     828              : char *
     829        42862 : get_collation_actual_version_icu(const char *collcollate)
     830              : {
     831              :     UCollator  *collator;
     832              :     UVersionInfo versioninfo;
     833              :     char        buf[U_MAX_VERSION_STRING_LENGTH];
     834              : 
     835        42862 :     collator = pg_ucol_open(collcollate);
     836              : 
     837        42862 :     ucol_getVersion(collator, versioninfo);
     838        42862 :     ucol_close(collator);
     839              : 
     840        42862 :     u_versionToString(versioninfo, buf);
     841        42862 :     return pstrdup(buf);
     842              : }
     843              : 
     844              : /*
     845              :  * Convert a string in the database encoding into a string of UChars.
     846              :  *
     847              :  * The source string at buff is of length nbytes
     848              :  * (it needn't be nul-terminated)
     849              :  *
     850              :  * *buff_uchar receives a pointer to the palloc'd result string, and
     851              :  * the function's result is the number of UChars generated.
     852              :  *
     853              :  * The result string is nul-terminated, though most callers rely on the
     854              :  * result length instead.
     855              :  */
     856              : static int32_t
     857            6 : icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
     858              : {
     859              :     int32_t     len_uchar;
     860              : 
     861            6 :     init_icu_converter();
     862              : 
     863            6 :     len_uchar = uchar_length(icu_converter, buff, nbytes);
     864              : 
     865            6 :     *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
     866            6 :     len_uchar = uchar_convert(icu_converter,
     867              :                               *buff_uchar, len_uchar + 1, buff, nbytes);
     868              : 
     869            6 :     return len_uchar;
     870              : }
     871              : 
     872              : /*
     873              :  * Convert a string of UChars into the database encoding.
     874              :  *
     875              :  * The source string at buff_uchar is of length len_uchar
     876              :  * (it needn't be nul-terminated)
     877              :  *
     878              :  * *result receives a pointer to the palloc'd result string, and the
     879              :  * function's result is the number of bytes generated (not counting nul).
     880              :  *
     881              :  * The result string is nul-terminated.
     882              :  */
     883              : static size_t
     884            0 : icu_from_uchar(char *dest, size_t destsize, const UChar *buff_uchar, int32_t len_uchar)
     885              : {
     886              :     UErrorCode  status;
     887              :     int32_t     len_result;
     888              : 
     889            0 :     init_icu_converter();
     890              : 
     891            0 :     status = U_ZERO_ERROR;
     892            0 :     len_result = ucnv_fromUChars(icu_converter, NULL, 0,
     893              :                                  buff_uchar, len_uchar, &status);
     894            0 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     895            0 :         ereport(ERROR,
     896              :                 (errmsg("%s failed: %s", "ucnv_fromUChars",
     897              :                         u_errorName(status))));
     898              : 
     899            0 :     if (len_result + 1 > destsize)
     900            0 :         return len_result;
     901              : 
     902            0 :     status = U_ZERO_ERROR;
     903            0 :     len_result = ucnv_fromUChars(icu_converter, dest, len_result + 1,
     904              :                                  buff_uchar, len_uchar, &status);
     905            0 :     if (U_FAILURE(status) ||
     906            0 :         status == U_STRING_NOT_TERMINATED_WARNING)
     907            0 :         ereport(ERROR,
     908              :                 (errmsg("%s failed: %s", "ucnv_fromUChars",
     909              :                         u_errorName(status))));
     910              : 
     911            0 :     return len_result;
     912              : }
     913              : 
     914              : static int32_t
     915            0 : convert_case_uchar(ICU_Convert_Func func, pg_locale_t mylocale,
     916              :                    UChar **buff_dest, UChar *buff_source, int32_t len_source)
     917              : {
     918              :     UErrorCode  status;
     919              :     int32_t     len_dest;
     920              : 
     921            0 :     len_dest = len_source;      /* try first with same length */
     922            0 :     *buff_dest = palloc(len_dest * sizeof(**buff_dest));
     923            0 :     status = U_ZERO_ERROR;
     924            0 :     len_dest = func(*buff_dest, len_dest, buff_source, len_source,
     925              :                     mylocale->icu.locale, &status);
     926            0 :     if (status == U_BUFFER_OVERFLOW_ERROR)
     927              :     {
     928              :         /* try again with adjusted length */
     929            0 :         pfree(*buff_dest);
     930            0 :         *buff_dest = palloc(len_dest * sizeof(**buff_dest));
     931            0 :         status = U_ZERO_ERROR;
     932            0 :         len_dest = func(*buff_dest, len_dest, buff_source, len_source,
     933              :                         mylocale->icu.locale, &status);
     934              :     }
     935            0 :     if (U_FAILURE(status))
     936            0 :         ereport(ERROR,
     937              :                 (errmsg("case conversion failed: %s", u_errorName(status))));
     938            0 :     return len_dest;
     939              : }
     940              : 
     941              : static int32_t
     942            0 : icu_convert_case(ICU_Convert_Func func, char *dest, size_t destsize,
     943              :                  const char *src, ssize_t srclen, pg_locale_t locale)
     944              : {
     945              :     int32_t     len_uchar;
     946              :     int32_t     len_conv;
     947              :     UChar      *buff_uchar;
     948              :     UChar      *buff_conv;
     949              :     size_t      result_len;
     950              : 
     951            0 :     len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
     952            0 :     len_conv = convert_case_uchar(func, locale, &buff_conv,
     953              :                                   buff_uchar, len_uchar);
     954            0 :     result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
     955            0 :     pfree(buff_uchar);
     956            0 :     pfree(buff_conv);
     957              : 
     958            0 :     return result_len;
     959              : }
     960              : 
     961              : static int32_t
     962            0 : u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
     963              :                         const UChar *src, int32_t srcLength,
     964              :                         const char *locale,
     965              :                         UErrorCode *pErrorCode)
     966              : {
     967            0 :     return u_strToTitle(dest, destCapacity, src, srcLength,
     968              :                         NULL, locale, pErrorCode);
     969              : }
     970              : 
     971              : static int32_t
     972            0 : u_strFoldCase_default(UChar *dest, int32_t destCapacity,
     973              :                       const UChar *src, int32_t srcLength,
     974              :                       const char *locale,
     975              :                       UErrorCode *pErrorCode)
     976              : {
     977            0 :     return u_strFoldCase(dest, destCapacity, src, srcLength,
     978            0 :                          foldcase_options(locale), pErrorCode);
     979              : }
     980              : 
     981              : /*
     982              :  * Return the correct u_strFoldCase() options for the given locale.
     983              :  *
     984              :  * Unlike the ICU APIs for lowercasing, titlecasing, and uppercasing, case
     985              :  * folding does not accept a locale. Instead it just supports a single option
     986              :  * relevant to Turkic languages 'az' and 'tr'; check for those languages.
     987              :  */
     988              : static int32_t
     989          100 : foldcase_options(const char *locale)
     990              : {
     991          100 :     uint32      options = U_FOLD_CASE_DEFAULT;
     992              :     char        lang[3];
     993          100 :     UErrorCode  status = U_ZERO_ERROR;
     994              : 
     995          100 :     uloc_getLanguage(locale, lang, 3, &status);
     996          100 :     if (U_SUCCESS(status))
     997              :     {
     998              :         /*
     999              :          * The option name is confusing, but it causes u_strFoldCase to use
    1000              :          * the 'T' mappings, which are ignored for U_FOLD_CASE_DEFAULT.
    1001              :          */
    1002           97 :         if (strcmp(lang, "tr") == 0 || strcmp(lang, "az") == 0)
    1003            3 :             options = U_FOLD_CASE_EXCLUDE_SPECIAL_I;
    1004              :     }
    1005              : 
    1006          100 :     return options;
    1007              : }
    1008              : 
    1009              : /*
    1010              :  * strncoll_icu
    1011              :  *
    1012              :  * Convert the arguments from the database encoding to UChar strings, then
    1013              :  * call ucol_strcoll(). An argument length of -1 means that the string is
    1014              :  * NUL-terminated.
    1015              :  *
    1016              :  * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
    1017              :  * caller should call that instead.
    1018              :  */
    1019              : static int
    1020            0 : strncoll_icu(const char *arg1, ssize_t len1,
    1021              :              const char *arg2, ssize_t len2, pg_locale_t locale)
    1022              : {
    1023              :     char        sbuf[TEXTBUFLEN];
    1024            0 :     char       *buf = sbuf;
    1025              :     int32_t     ulen1;
    1026              :     int32_t     ulen2;
    1027              :     size_t      bufsize1;
    1028              :     size_t      bufsize2;
    1029              :     UChar      *uchar1,
    1030              :                *uchar2;
    1031              :     int         result;
    1032              : 
    1033              :     /* if encoding is UTF8, use more efficient strncoll_icu_utf8 */
    1034              : #ifdef HAVE_UCOL_STRCOLLUTF8
    1035              :     Assert(GetDatabaseEncoding() != PG_UTF8);
    1036              : #endif
    1037              : 
    1038            0 :     init_icu_converter();
    1039              : 
    1040            0 :     ulen1 = uchar_length(icu_converter, arg1, len1);
    1041            0 :     ulen2 = uchar_length(icu_converter, arg2, len2);
    1042              : 
    1043            0 :     bufsize1 = (ulen1 + 1) * sizeof(UChar);
    1044            0 :     bufsize2 = (ulen2 + 1) * sizeof(UChar);
    1045              : 
    1046            0 :     if (bufsize1 + bufsize2 > TEXTBUFLEN)
    1047            0 :         buf = palloc(bufsize1 + bufsize2);
    1048              : 
    1049            0 :     uchar1 = (UChar *) buf;
    1050            0 :     uchar2 = (UChar *) (buf + bufsize1);
    1051              : 
    1052            0 :     ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
    1053            0 :     ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
    1054              : 
    1055            0 :     result = ucol_strcoll(locale->icu.ucol,
    1056              :                           uchar1, ulen1,
    1057              :                           uchar2, ulen2);
    1058              : 
    1059            0 :     if (buf != sbuf)
    1060            0 :         pfree(buf);
    1061              : 
    1062            0 :     return result;
    1063              : }
    1064              : 
    1065              : /* 'srclen' of -1 means the strings are NUL-terminated */
    1066              : static size_t
    1067            0 : strnxfrm_prefix_icu(char *dest, size_t destsize,
    1068              :                     const char *src, ssize_t srclen,
    1069              :                     pg_locale_t locale)
    1070              : {
    1071              :     char        sbuf[TEXTBUFLEN];
    1072            0 :     char       *buf = sbuf;
    1073              :     UCharIterator iter;
    1074              :     uint32_t    state[2];
    1075              :     UErrorCode  status;
    1076            0 :     int32_t     ulen = -1;
    1077            0 :     UChar      *uchar = NULL;
    1078              :     size_t      uchar_bsize;
    1079              :     Size        result_bsize;
    1080              : 
    1081              :     /* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */
    1082              :     Assert(GetDatabaseEncoding() != PG_UTF8);
    1083              : 
    1084            0 :     init_icu_converter();
    1085              : 
    1086            0 :     ulen = uchar_length(icu_converter, src, srclen);
    1087              : 
    1088            0 :     uchar_bsize = (ulen + 1) * sizeof(UChar);
    1089              : 
    1090            0 :     if (uchar_bsize > TEXTBUFLEN)
    1091            0 :         buf = palloc(uchar_bsize);
    1092              : 
    1093            0 :     uchar = (UChar *) buf;
    1094              : 
    1095            0 :     ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
    1096              : 
    1097            0 :     uiter_setString(&iter, uchar, ulen);
    1098            0 :     state[0] = state[1] = 0;    /* won't need that again */
    1099            0 :     status = U_ZERO_ERROR;
    1100            0 :     result_bsize = ucol_nextSortKeyPart(locale->icu.ucol,
    1101              :                                         &iter,
    1102              :                                         state,
    1103              :                                         (uint8_t *) dest,
    1104              :                                         destsize,
    1105              :                                         &status);
    1106            0 :     if (U_FAILURE(status))
    1107            0 :         ereport(ERROR,
    1108              :                 (errmsg("sort key generation failed: %s",
    1109              :                         u_errorName(status))));
    1110              : 
    1111            0 :     if (buf != sbuf)
    1112            0 :         pfree(buf);
    1113              : 
    1114            0 :     return result_bsize;
    1115              : }
    1116              : 
    1117              : static void
    1118         2880 : init_icu_converter(void)
    1119              : {
    1120              :     const char *icu_encoding_name;
    1121              :     UErrorCode  status;
    1122              :     UConverter *conv;
    1123              : 
    1124         2880 :     if (icu_converter)
    1125         2877 :         return;                 /* already done */
    1126              : 
    1127            3 :     icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
    1128            3 :     if (!icu_encoding_name)
    1129            0 :         ereport(ERROR,
    1130              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1131              :                  errmsg("encoding \"%s\" not supported by ICU",
    1132              :                         pg_encoding_to_char(GetDatabaseEncoding()))));
    1133              : 
    1134            3 :     status = U_ZERO_ERROR;
    1135            3 :     conv = ucnv_open(icu_encoding_name, &status);
    1136            3 :     if (U_FAILURE(status))
    1137            0 :         ereport(ERROR,
    1138              :                 (errmsg("could not open ICU converter for encoding \"%s\": %s",
    1139              :                         icu_encoding_name, u_errorName(status))));
    1140              : 
    1141            3 :     icu_converter = conv;
    1142              : }
    1143              : 
    1144              : /*
    1145              :  * Find length, in UChars, of given string if converted to UChar string.
    1146              :  *
    1147              :  * A length of -1 indicates that the input string is NUL-terminated.
    1148              :  */
    1149              : static size_t
    1150         2880 : uchar_length(UConverter *converter, const char *str, int32_t len)
    1151              : {
    1152         2880 :     UErrorCode  status = U_ZERO_ERROR;
    1153              :     int32_t     ulen;
    1154              : 
    1155         2880 :     ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
    1156         2880 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
    1157            0 :         ereport(ERROR,
    1158              :                 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
    1159         2880 :     return ulen;
    1160              : }
    1161              : 
    1162              : /*
    1163              :  * Convert the given source string into a UChar string, stored in dest, and
    1164              :  * return the length (in UChars).
    1165              :  *
    1166              :  * A srclen of -1 indicates that the input string is NUL-terminated.
    1167              :  */
    1168              : static int32_t
    1169         2880 : uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
    1170              :               const char *src, int32_t srclen)
    1171              : {
    1172         2880 :     UErrorCode  status = U_ZERO_ERROR;
    1173              :     int32_t     ulen;
    1174              : 
    1175         2880 :     status = U_ZERO_ERROR;
    1176         2880 :     ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
    1177         2880 :     if (U_FAILURE(status))
    1178            0 :         ereport(ERROR,
    1179              :                 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
    1180         2880 :     return ulen;
    1181              : }
    1182              : 
    1183              : /*
    1184              :  * Parse collation attributes from the given locale string and apply them to
    1185              :  * the open collator.
    1186              :  *
    1187              :  * First, the locale string is canonicalized to an ICU format locale ID such
    1188              :  * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
    1189              :  * the key-value arguments.
    1190              :  *
    1191              :  * Starting with ICU version 54, the attributes are processed automatically by
    1192              :  * ucol_open(), so this is only necessary for emulating this behavior on older
    1193              :  * versions.
    1194              :  */
    1195              : pg_attribute_unused()
    1196              : static void
    1197            0 : icu_set_collation_attributes(UCollator *collator, const char *loc,
    1198              :                              UErrorCode *status)
    1199              : {
    1200              :     int32_t     len;
    1201              :     char       *icu_locale_id;
    1202              :     char       *lower_str;
    1203              :     char       *str;
    1204              :     char       *token;
    1205              : 
    1206              :     /*
    1207              :      * The input locale may be a BCP 47 language tag, e.g.
    1208              :      * "und-u-kc-ks-level1", which expresses the same attributes in a
    1209              :      * different form. It will be converted to the equivalent ICU format
    1210              :      * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
    1211              :      * uloc_canonicalize().
    1212              :      */
    1213            0 :     *status = U_ZERO_ERROR;
    1214            0 :     len = uloc_canonicalize(loc, NULL, 0, status);
    1215            0 :     icu_locale_id = palloc(len + 1);
    1216            0 :     *status = U_ZERO_ERROR;
    1217            0 :     len = uloc_canonicalize(loc, icu_locale_id, len + 1, status);
    1218            0 :     if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
    1219            0 :         return;
    1220              : 
    1221            0 :     lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
    1222              : 
    1223            0 :     pfree(icu_locale_id);
    1224              : 
    1225            0 :     str = strchr(lower_str, '@');
    1226            0 :     if (!str)
    1227            0 :         return;
    1228            0 :     str++;
    1229              : 
    1230            0 :     while ((token = strsep(&str, ";")))
    1231              :     {
    1232            0 :         char       *e = strchr(token, '=');
    1233              : 
    1234            0 :         if (e)
    1235              :         {
    1236              :             char       *name;
    1237              :             char       *value;
    1238              :             UColAttribute uattr;
    1239              :             UColAttributeValue uvalue;
    1240              : 
    1241            0 :             *status = U_ZERO_ERROR;
    1242              : 
    1243            0 :             *e = '\0';
    1244            0 :             name = token;
    1245            0 :             value = e + 1;
    1246              : 
    1247              :             /*
    1248              :              * See attribute name and value lists in ICU i18n/coll.cpp
    1249              :              */
    1250            0 :             if (strcmp(name, "colstrength") == 0)
    1251            0 :                 uattr = UCOL_STRENGTH;
    1252            0 :             else if (strcmp(name, "colbackwards") == 0)
    1253            0 :                 uattr = UCOL_FRENCH_COLLATION;
    1254            0 :             else if (strcmp(name, "colcaselevel") == 0)
    1255            0 :                 uattr = UCOL_CASE_LEVEL;
    1256            0 :             else if (strcmp(name, "colcasefirst") == 0)
    1257            0 :                 uattr = UCOL_CASE_FIRST;
    1258            0 :             else if (strcmp(name, "colalternate") == 0)
    1259            0 :                 uattr = UCOL_ALTERNATE_HANDLING;
    1260            0 :             else if (strcmp(name, "colnormalization") == 0)
    1261            0 :                 uattr = UCOL_NORMALIZATION_MODE;
    1262            0 :             else if (strcmp(name, "colnumeric") == 0)
    1263            0 :                 uattr = UCOL_NUMERIC_COLLATION;
    1264              :             else
    1265              :                 /* ignore if unknown */
    1266            0 :                 continue;
    1267              : 
    1268            0 :             if (strcmp(value, "primary") == 0)
    1269            0 :                 uvalue = UCOL_PRIMARY;
    1270            0 :             else if (strcmp(value, "secondary") == 0)
    1271            0 :                 uvalue = UCOL_SECONDARY;
    1272            0 :             else if (strcmp(value, "tertiary") == 0)
    1273            0 :                 uvalue = UCOL_TERTIARY;
    1274            0 :             else if (strcmp(value, "quaternary") == 0)
    1275            0 :                 uvalue = UCOL_QUATERNARY;
    1276            0 :             else if (strcmp(value, "identical") == 0)
    1277            0 :                 uvalue = UCOL_IDENTICAL;
    1278            0 :             else if (strcmp(value, "no") == 0)
    1279            0 :                 uvalue = UCOL_OFF;
    1280            0 :             else if (strcmp(value, "yes") == 0)
    1281            0 :                 uvalue = UCOL_ON;
    1282            0 :             else if (strcmp(value, "shifted") == 0)
    1283            0 :                 uvalue = UCOL_SHIFTED;
    1284            0 :             else if (strcmp(value, "non-ignorable") == 0)
    1285            0 :                 uvalue = UCOL_NON_IGNORABLE;
    1286            0 :             else if (strcmp(value, "lower") == 0)
    1287            0 :                 uvalue = UCOL_LOWER_FIRST;
    1288            0 :             else if (strcmp(value, "upper") == 0)
    1289            0 :                 uvalue = UCOL_UPPER_FIRST;
    1290              :             else
    1291              :             {
    1292            0 :                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    1293            0 :                 break;
    1294              :             }
    1295              : 
    1296            0 :             ucol_setAttribute(collator, uattr, uvalue, status);
    1297              :         }
    1298              :     }
    1299              : 
    1300            0 :     pfree(lower_str);
    1301              : }
    1302              : 
    1303              : #endif                          /* USE_ICU */
        

Generated by: LCOV version 2.0-1