LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_icu.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 90 203 44.3 %
Date: 2024-11-21 08:14:44 Functions: 10 13 76.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-----------------------------------------------------------------------
       2             :  *
       3             :  * PostgreSQL locale utilities for ICU
       4             :  *
       5             :  * Portions Copyright (c) 2002-2024, PostgreSQL Global Development Group
       6             :  *
       7             :  * src/backend/utils/adt/pg_locale_icu.c
       8             :  *
       9             :  *-----------------------------------------------------------------------
      10             :  */
      11             : 
      12             : #include "postgres.h"
      13             : 
      14             : #ifdef USE_ICU
      15             : 
      16             : #include <unicode/ucnv.h>
      17             : #include <unicode/ustring.h>
      18             : 
      19             : #include "catalog/pg_collation.h"
      20             : #include "mb/pg_wchar.h"
      21             : #include "utils/formatting.h"
      22             : #include "utils/pg_locale.h"
      23             : 
      24             : /*
      25             :  * Size of stack buffer to use for string transformations, used to avoid heap
      26             :  * allocations in typical cases. This should be large enough that most strings
      27             :  * will fit, but small enough that we feel comfortable putting it on the
      28             :  * stack.
      29             :  */
      30             : #define     TEXTBUFLEN          1024
      31             : 
      32             : extern UCollator *pg_ucol_open(const char *loc_str);
      33             : extern UCollator *make_icu_collator(const char *iculocstr,
      34             :                                     const char *icurules);
      35             : extern int  strncoll_icu(const char *arg1, ssize_t len1,
      36             :                          const char *arg2, ssize_t len2,
      37             :                          pg_locale_t locale);
      38             : extern size_t strnxfrm_icu(char *dest, size_t destsize,
      39             :                            const char *src, ssize_t srclen,
      40             :                            pg_locale_t locale);
      41             : extern size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
      42             :                                   const char *src, ssize_t srclen,
      43             :                                   pg_locale_t locale);
      44             : 
      45             : /*
      46             :  * Converter object for converting between ICU's UChar strings and C strings
      47             :  * in database encoding.  Since the database encoding doesn't change, we only
      48             :  * need one of these per session.
      49             :  */
      50             : static UConverter *icu_converter = NULL;
      51             : 
      52             : static int  strncoll_icu_no_utf8(const char *arg1, ssize_t len1,
      53             :                                  const char *arg2, ssize_t len2,
      54             :                                  pg_locale_t locale);
      55             : static size_t strnxfrm_prefix_icu_no_utf8(char *dest, size_t destsize,
      56             :                                           const char *src, ssize_t srclen,
      57             :                                           pg_locale_t locale);
      58             : static void init_icu_converter(void);
      59             : static size_t uchar_length(UConverter *converter,
      60             :                            const char *str, int32_t len);
      61             : static int32_t uchar_convert(UConverter *converter,
      62             :                              UChar *dest, int32_t destlen,
      63             :                              const char *src, int32_t srclen);
      64             : static void icu_set_collation_attributes(UCollator *collator, const char *loc,
      65             :                                          UErrorCode *status);
      66             : 
      67             : /*
      68             :  * Wrapper around ucol_open() to handle API differences for older ICU
      69             :  * versions.
      70             :  *
      71             :  * Ensure that no path leaks a UCollator.
      72             :  */
      73             : UCollator *
      74       68150 : pg_ucol_open(const char *loc_str)
      75             : {
      76             :     UCollator  *collator;
      77             :     UErrorCode  status;
      78       68150 :     const char *orig_str = loc_str;
      79       68150 :     char       *fixed_str = NULL;
      80             : 
      81             :     /*
      82             :      * Must never open default collator, because it depends on the environment
      83             :      * and may change at any time. Should not happen, but check here to catch
      84             :      * bugs that might be hard to catch otherwise.
      85             :      *
      86             :      * NB: the default collator is not the same as the collator for the root
      87             :      * locale. The root locale may be specified as the empty string, "und", or
      88             :      * "root". The default collator is opened by passing NULL to ucol_open().
      89             :      */
      90       68150 :     if (loc_str == NULL)
      91           0 :         elog(ERROR, "opening default collator is not supported");
      92             : 
      93             :     /*
      94             :      * In ICU versions 54 and earlier, "und" is not a recognized spelling of
      95             :      * the root locale. If the first component of the locale is "und", replace
      96             :      * with "root" before opening.
      97             :      */
      98             :     if (U_ICU_VERSION_MAJOR_NUM < 55)
      99             :     {
     100             :         char        lang[ULOC_LANG_CAPACITY];
     101             : 
     102             :         status = U_ZERO_ERROR;
     103             :         uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
     104             :         if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
     105             :         {
     106             :             ereport(ERROR,
     107             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     108             :                      errmsg("could not get language from locale \"%s\": %s",
     109             :                             loc_str, u_errorName(status))));
     110             :         }
     111             : 
     112             :         if (strcmp(lang, "und") == 0)
     113             :         {
     114             :             const char *remainder = loc_str + strlen("und");
     115             : 
     116             :             fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
     117             :             strcpy(fixed_str, "root");
     118             :             strcat(fixed_str, remainder);
     119             : 
     120             :             loc_str = fixed_str;
     121             :         }
     122             :     }
     123             : 
     124       68150 :     status = U_ZERO_ERROR;
     125       68150 :     collator = ucol_open(loc_str, &status);
     126       68150 :     if (U_FAILURE(status))
     127          12 :         ereport(ERROR,
     128             :         /* use original string for error report */
     129             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     130             :                  errmsg("could not open collator for locale \"%s\": %s",
     131             :                         orig_str, u_errorName(status))));
     132             : 
     133             :     if (U_ICU_VERSION_MAJOR_NUM < 54)
     134             :     {
     135             :         status = U_ZERO_ERROR;
     136             :         icu_set_collation_attributes(collator, loc_str, &status);
     137             : 
     138             :         /*
     139             :          * Pretend the error came from ucol_open(), for consistent error
     140             :          * message across ICU versions.
     141             :          */
     142             :         if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
     143             :         {
     144             :             ucol_close(collator);
     145             :             ereport(ERROR,
     146             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     147             :                      errmsg("could not open collator for locale \"%s\": %s",
     148             :                             orig_str, u_errorName(status))));
     149             :         }
     150             :     }
     151             : 
     152       68138 :     if (fixed_str != NULL)
     153           0 :         pfree(fixed_str);
     154             : 
     155       68138 :     return collator;
     156             : }
     157             : 
     158             : /*
     159             :  * Create a UCollator with the given locale string and rules.
     160             :  *
     161             :  * Ensure that no path leaks a UCollator.
     162             :  */
     163             : UCollator *
     164         210 : make_icu_collator(const char *iculocstr, const char *icurules)
     165             : {
     166         210 :     if (!icurules)
     167             :     {
     168             :         /* simple case without rules */
     169         198 :         return pg_ucol_open(iculocstr);
     170             :     }
     171             :     else
     172             :     {
     173             :         UCollator  *collator_std_rules;
     174             :         UCollator  *collator_all_rules;
     175             :         const UChar *std_rules;
     176             :         UChar      *my_rules;
     177             :         UChar      *all_rules;
     178             :         int32_t     length;
     179             :         int32_t     total;
     180             :         UErrorCode  status;
     181             : 
     182             :         /*
     183             :          * If rules are specified, we extract the rules of the standard
     184             :          * collation, add our own rules, and make a new collator with the
     185             :          * combined rules.
     186             :          */
     187          12 :         icu_to_uchar(&my_rules, icurules, strlen(icurules));
     188             : 
     189          12 :         collator_std_rules = pg_ucol_open(iculocstr);
     190             : 
     191          12 :         std_rules = ucol_getRules(collator_std_rules, &length);
     192             : 
     193          12 :         total = u_strlen(std_rules) + u_strlen(my_rules) + 1;
     194             : 
     195             :         /* avoid leaking collator on OOM */
     196          12 :         all_rules = palloc_extended(sizeof(UChar) * total, MCXT_ALLOC_NO_OOM);
     197          12 :         if (!all_rules)
     198             :         {
     199           0 :             ucol_close(collator_std_rules);
     200           0 :             ereport(ERROR,
     201             :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     202             :                      errmsg("out of memory")));
     203             :         }
     204             : 
     205          12 :         u_strcpy(all_rules, std_rules);
     206          12 :         u_strcat(all_rules, my_rules);
     207             : 
     208          12 :         ucol_close(collator_std_rules);
     209             : 
     210          12 :         status = U_ZERO_ERROR;
     211          12 :         collator_all_rules = ucol_openRules(all_rules, u_strlen(all_rules),
     212             :                                             UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,
     213             :                                             NULL, &status);
     214          12 :         if (U_FAILURE(status))
     215             :         {
     216           6 :             ereport(ERROR,
     217             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     218             :                      errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
     219             :                             iculocstr, icurules, u_errorName(status))));
     220             :         }
     221             : 
     222           6 :         return collator_all_rules;
     223             :     }
     224             : }
     225             : 
     226             : /*
     227             :  * strncoll_icu
     228             :  *
     229             :  * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
     230             :  * database encoding. An argument length of -1 means the string is
     231             :  * NUL-terminated.
     232             :  */
     233             : int
     234       21946 : strncoll_icu(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
     235             :              pg_locale_t locale)
     236             : {
     237             :     int         result;
     238             : 
     239             :     Assert(locale->provider == COLLPROVIDER_ICU);
     240             : 
     241             : #ifdef HAVE_UCOL_STRCOLLUTF8
     242       21946 :     if (GetDatabaseEncoding() == PG_UTF8)
     243             :     {
     244             :         UErrorCode  status;
     245             : 
     246       21946 :         status = U_ZERO_ERROR;
     247       21946 :         result = ucol_strcollUTF8(locale->info.icu.ucol,
     248             :                                   arg1, len1,
     249             :                                   arg2, len2,
     250             :                                   &status);
     251       21946 :         if (U_FAILURE(status))
     252           0 :             ereport(ERROR,
     253             :                     (errmsg("collation failed: %s", u_errorName(status))));
     254             :     }
     255             :     else
     256             : #endif
     257             :     {
     258           0 :         result = strncoll_icu_no_utf8(arg1, len1, arg2, len2, locale);
     259             :     }
     260             : 
     261       21946 :     return result;
     262             : }
     263             : 
     264             : /* 'srclen' of -1 means the strings are NUL-terminated */
     265             : size_t
     266       10020 : strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
     267             :              pg_locale_t locale)
     268             : {
     269             :     char        sbuf[TEXTBUFLEN];
     270       10020 :     char       *buf = sbuf;
     271             :     UChar      *uchar;
     272             :     int32_t     ulen;
     273             :     size_t      uchar_bsize;
     274             :     Size        result_bsize;
     275             : 
     276             :     Assert(locale->provider == COLLPROVIDER_ICU);
     277             : 
     278       10020 :     init_icu_converter();
     279             : 
     280       10020 :     ulen = uchar_length(icu_converter, src, srclen);
     281             : 
     282       10020 :     uchar_bsize = (ulen + 1) * sizeof(UChar);
     283             : 
     284       10020 :     if (uchar_bsize > TEXTBUFLEN)
     285           0 :         buf = palloc(uchar_bsize);
     286             : 
     287       10020 :     uchar = (UChar *) buf;
     288             : 
     289       10020 :     ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
     290             : 
     291       10020 :     result_bsize = ucol_getSortKey(locale->info.icu.ucol,
     292             :                                    uchar, ulen,
     293             :                                    (uint8_t *) dest, destsize);
     294             : 
     295             :     /*
     296             :      * ucol_getSortKey() counts the nul-terminator in the result length, but
     297             :      * this function should not.
     298             :      */
     299             :     Assert(result_bsize > 0);
     300       10020 :     result_bsize--;
     301             : 
     302       10020 :     if (buf != sbuf)
     303           0 :         pfree(buf);
     304             : 
     305             :     /* if dest is defined, it should be nul-terminated */
     306             :     Assert(result_bsize >= destsize || dest[result_bsize] == '\0');
     307             : 
     308       10020 :     return result_bsize;
     309             : }
     310             : 
     311             : /* 'srclen' of -1 means the strings are NUL-terminated */
     312             : size_t
     313        1650 : strnxfrm_prefix_icu(char *dest, size_t destsize,
     314             :                     const char *src, ssize_t srclen,
     315             :                     pg_locale_t locale)
     316             : {
     317             :     size_t      result;
     318             : 
     319             :     Assert(locale->provider == COLLPROVIDER_ICU);
     320             : 
     321        1650 :     if (GetDatabaseEncoding() == PG_UTF8)
     322             :     {
     323             :         UCharIterator iter;
     324             :         uint32_t    state[2];
     325             :         UErrorCode  status;
     326             : 
     327        1650 :         uiter_setUTF8(&iter, src, srclen);
     328        1650 :         state[0] = state[1] = 0;    /* won't need that again */
     329        1650 :         status = U_ZERO_ERROR;
     330        1650 :         result = ucol_nextSortKeyPart(locale->info.icu.ucol,
     331             :                                       &iter,
     332             :                                       state,
     333             :                                       (uint8_t *) dest,
     334             :                                       destsize,
     335             :                                       &status);
     336        1650 :         if (U_FAILURE(status))
     337           0 :             ereport(ERROR,
     338             :                     (errmsg("sort key generation failed: %s",
     339             :                             u_errorName(status))));
     340             :     }
     341             :     else
     342           0 :         result = strnxfrm_prefix_icu_no_utf8(dest, destsize, src, srclen,
     343             :                                              locale);
     344             : 
     345        1650 :     return result;
     346             : }
     347             : 
     348             : /*
     349             :  * Convert a string in the database encoding into a string of UChars.
     350             :  *
     351             :  * The source string at buff is of length nbytes
     352             :  * (it needn't be nul-terminated)
     353             :  *
     354             :  * *buff_uchar receives a pointer to the palloc'd result string, and
     355             :  * the function's result is the number of UChars generated.
     356             :  *
     357             :  * The result string is nul-terminated, though most callers rely on the
     358             :  * result length instead.
     359             :  */
     360             : int32_t
     361         552 : icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
     362             : {
     363             :     int32_t     len_uchar;
     364             : 
     365         552 :     init_icu_converter();
     366             : 
     367         552 :     len_uchar = uchar_length(icu_converter, buff, nbytes);
     368             : 
     369         552 :     *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
     370         552 :     len_uchar = uchar_convert(icu_converter,
     371             :                               *buff_uchar, len_uchar + 1, buff, nbytes);
     372             : 
     373         552 :     return len_uchar;
     374             : }
     375             : 
     376             : /*
     377             :  * Convert a string of UChars into the database encoding.
     378             :  *
     379             :  * The source string at buff_uchar is of length len_uchar
     380             :  * (it needn't be nul-terminated)
     381             :  *
     382             :  * *result receives a pointer to the palloc'd result string, and the
     383             :  * function's result is the number of bytes generated (not counting nul).
     384             :  *
     385             :  * The result string is nul-terminated.
     386             :  */
     387             : int32_t
     388         540 : icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
     389             : {
     390             :     UErrorCode  status;
     391             :     int32_t     len_result;
     392             : 
     393         540 :     init_icu_converter();
     394             : 
     395         540 :     status = U_ZERO_ERROR;
     396         540 :     len_result = ucnv_fromUChars(icu_converter, NULL, 0,
     397             :                                  buff_uchar, len_uchar, &status);
     398         540 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     399           0 :         ereport(ERROR,
     400             :                 (errmsg("%s failed: %s", "ucnv_fromUChars",
     401             :                         u_errorName(status))));
     402             : 
     403         540 :     *result = palloc(len_result + 1);
     404             : 
     405         540 :     status = U_ZERO_ERROR;
     406         540 :     len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
     407             :                                  buff_uchar, len_uchar, &status);
     408         540 :     if (U_FAILURE(status) ||
     409         540 :         status == U_STRING_NOT_TERMINATED_WARNING)
     410           0 :         ereport(ERROR,
     411             :                 (errmsg("%s failed: %s", "ucnv_fromUChars",
     412             :                         u_errorName(status))));
     413             : 
     414         540 :     return len_result;
     415             : }
     416             : 
     417             : /*
     418             :  * strncoll_icu_no_utf8
     419             :  *
     420             :  * Convert the arguments from the database encoding to UChar strings, then
     421             :  * call ucol_strcoll(). An argument length of -1 means that the string is
     422             :  * NUL-terminated.
     423             :  *
     424             :  * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
     425             :  * caller should call that instead.
     426             :  */
     427             : static int
     428           0 : strncoll_icu_no_utf8(const char *arg1, ssize_t len1,
     429             :                      const char *arg2, ssize_t len2, pg_locale_t locale)
     430             : {
     431             :     char        sbuf[TEXTBUFLEN];
     432           0 :     char       *buf = sbuf;
     433             :     int32_t     ulen1;
     434             :     int32_t     ulen2;
     435             :     size_t      bufsize1;
     436             :     size_t      bufsize2;
     437             :     UChar      *uchar1,
     438             :                *uchar2;
     439             :     int         result;
     440             : 
     441             :     Assert(locale->provider == COLLPROVIDER_ICU);
     442             : #ifdef HAVE_UCOL_STRCOLLUTF8
     443             :     Assert(GetDatabaseEncoding() != PG_UTF8);
     444             : #endif
     445             : 
     446           0 :     init_icu_converter();
     447             : 
     448           0 :     ulen1 = uchar_length(icu_converter, arg1, len1);
     449           0 :     ulen2 = uchar_length(icu_converter, arg2, len2);
     450             : 
     451           0 :     bufsize1 = (ulen1 + 1) * sizeof(UChar);
     452           0 :     bufsize2 = (ulen2 + 1) * sizeof(UChar);
     453             : 
     454           0 :     if (bufsize1 + bufsize2 > TEXTBUFLEN)
     455           0 :         buf = palloc(bufsize1 + bufsize2);
     456             : 
     457           0 :     uchar1 = (UChar *) buf;
     458           0 :     uchar2 = (UChar *) (buf + bufsize1);
     459             : 
     460           0 :     ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
     461           0 :     ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
     462             : 
     463           0 :     result = ucol_strcoll(locale->info.icu.ucol,
     464             :                           uchar1, ulen1,
     465             :                           uchar2, ulen2);
     466             : 
     467           0 :     if (buf != sbuf)
     468           0 :         pfree(buf);
     469             : 
     470           0 :     return result;
     471             : }
     472             : 
     473             : /* 'srclen' of -1 means the strings are NUL-terminated */
     474             : static size_t
     475           0 : strnxfrm_prefix_icu_no_utf8(char *dest, size_t destsize,
     476             :                             const char *src, ssize_t srclen,
     477             :                             pg_locale_t locale)
     478             : {
     479             :     char        sbuf[TEXTBUFLEN];
     480           0 :     char       *buf = sbuf;
     481             :     UCharIterator iter;
     482             :     uint32_t    state[2];
     483             :     UErrorCode  status;
     484           0 :     int32_t     ulen = -1;
     485           0 :     UChar      *uchar = NULL;
     486             :     size_t      uchar_bsize;
     487             :     Size        result_bsize;
     488             : 
     489             :     Assert(locale->provider == COLLPROVIDER_ICU);
     490             :     Assert(GetDatabaseEncoding() != PG_UTF8);
     491             : 
     492           0 :     init_icu_converter();
     493             : 
     494           0 :     ulen = uchar_length(icu_converter, src, srclen);
     495             : 
     496           0 :     uchar_bsize = (ulen + 1) * sizeof(UChar);
     497             : 
     498           0 :     if (uchar_bsize > TEXTBUFLEN)
     499           0 :         buf = palloc(uchar_bsize);
     500             : 
     501           0 :     uchar = (UChar *) buf;
     502             : 
     503           0 :     ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
     504             : 
     505           0 :     uiter_setString(&iter, uchar, ulen);
     506           0 :     state[0] = state[1] = 0;    /* won't need that again */
     507           0 :     status = U_ZERO_ERROR;
     508           0 :     result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol,
     509             :                                         &iter,
     510             :                                         state,
     511             :                                         (uint8_t *) dest,
     512             :                                         destsize,
     513             :                                         &status);
     514           0 :     if (U_FAILURE(status))
     515           0 :         ereport(ERROR,
     516             :                 (errmsg("sort key generation failed: %s",
     517             :                         u_errorName(status))));
     518             : 
     519           0 :     return result_bsize;
     520             : }
     521             : 
     522             : static void
     523       11112 : init_icu_converter(void)
     524             : {
     525             :     const char *icu_encoding_name;
     526             :     UErrorCode  status;
     527             :     UConverter *conv;
     528             : 
     529       11112 :     if (icu_converter)
     530       11106 :         return;                 /* already done */
     531             : 
     532           6 :     icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
     533           6 :     if (!icu_encoding_name)
     534           0 :         ereport(ERROR,
     535             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     536             :                  errmsg("encoding \"%s\" not supported by ICU",
     537             :                         pg_encoding_to_char(GetDatabaseEncoding()))));
     538             : 
     539           6 :     status = U_ZERO_ERROR;
     540           6 :     conv = ucnv_open(icu_encoding_name, &status);
     541           6 :     if (U_FAILURE(status))
     542           0 :         ereport(ERROR,
     543             :                 (errmsg("could not open ICU converter for encoding \"%s\": %s",
     544             :                         icu_encoding_name, u_errorName(status))));
     545             : 
     546           6 :     icu_converter = conv;
     547             : }
     548             : 
     549             : /*
     550             :  * Find length, in UChars, of given string if converted to UChar string.
     551             :  *
     552             :  * A length of -1 indicates that the input string is NUL-terminated.
     553             :  */
     554             : static size_t
     555       10572 : uchar_length(UConverter *converter, const char *str, int32_t len)
     556             : {
     557       10572 :     UErrorCode  status = U_ZERO_ERROR;
     558             :     int32_t     ulen;
     559             : 
     560       10572 :     ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
     561       10572 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
     562           0 :         ereport(ERROR,
     563             :                 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
     564       10572 :     return ulen;
     565             : }
     566             : 
     567             : /*
     568             :  * Convert the given source string into a UChar string, stored in dest, and
     569             :  * return the length (in UChars).
     570             :  *
     571             :  * A srclen of -1 indicates that the input string is NUL-terminated.
     572             :  */
     573             : static int32_t
     574       10572 : uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
     575             :               const char *src, int32_t srclen)
     576             : {
     577       10572 :     UErrorCode  status = U_ZERO_ERROR;
     578             :     int32_t     ulen;
     579             : 
     580       10572 :     status = U_ZERO_ERROR;
     581       10572 :     ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
     582       10572 :     if (U_FAILURE(status))
     583           0 :         ereport(ERROR,
     584             :                 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
     585       10572 :     return ulen;
     586             : }
     587             : 
     588             : /*
     589             :  * Parse collation attributes from the given locale string and apply them to
     590             :  * the open collator.
     591             :  *
     592             :  * First, the locale string is canonicalized to an ICU format locale ID such
     593             :  * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
     594             :  * the key-value arguments.
     595             :  *
     596             :  * Starting with ICU version 54, the attributes are processed automatically by
     597             :  * ucol_open(), so this is only necessary for emulating this behavior on older
     598             :  * versions.
     599             :  */
     600             : pg_attribute_unused()
     601             : static void
     602           0 : icu_set_collation_attributes(UCollator *collator, const char *loc,
     603             :                              UErrorCode *status)
     604             : {
     605             :     int32_t     len;
     606             :     char       *icu_locale_id;
     607             :     char       *lower_str;
     608             :     char       *str;
     609             :     char       *token;
     610             : 
     611             :     /*
     612             :      * The input locale may be a BCP 47 language tag, e.g.
     613             :      * "und-u-kc-ks-level1", which expresses the same attributes in a
     614             :      * different form. It will be converted to the equivalent ICU format
     615             :      * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
     616             :      * uloc_canonicalize().
     617             :      */
     618           0 :     *status = U_ZERO_ERROR;
     619           0 :     len = uloc_canonicalize(loc, NULL, 0, status);
     620           0 :     icu_locale_id = palloc(len + 1);
     621           0 :     *status = U_ZERO_ERROR;
     622           0 :     len = uloc_canonicalize(loc, icu_locale_id, len + 1, status);
     623           0 :     if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
     624           0 :         return;
     625             : 
     626           0 :     lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
     627             : 
     628           0 :     pfree(icu_locale_id);
     629             : 
     630           0 :     str = strchr(lower_str, '@');
     631           0 :     if (!str)
     632           0 :         return;
     633           0 :     str++;
     634             : 
     635           0 :     while ((token = strsep(&str, ";")))
     636             :     {
     637           0 :         char       *e = strchr(token, '=');
     638             : 
     639           0 :         if (e)
     640             :         {
     641             :             char       *name;
     642             :             char       *value;
     643             :             UColAttribute uattr;
     644             :             UColAttributeValue uvalue;
     645             : 
     646           0 :             *status = U_ZERO_ERROR;
     647             : 
     648           0 :             *e = '\0';
     649           0 :             name = token;
     650           0 :             value = e + 1;
     651             : 
     652             :             /*
     653             :              * See attribute name and value lists in ICU i18n/coll.cpp
     654             :              */
     655           0 :             if (strcmp(name, "colstrength") == 0)
     656           0 :                 uattr = UCOL_STRENGTH;
     657           0 :             else if (strcmp(name, "colbackwards") == 0)
     658           0 :                 uattr = UCOL_FRENCH_COLLATION;
     659           0 :             else if (strcmp(name, "colcaselevel") == 0)
     660           0 :                 uattr = UCOL_CASE_LEVEL;
     661           0 :             else if (strcmp(name, "colcasefirst") == 0)
     662           0 :                 uattr = UCOL_CASE_FIRST;
     663           0 :             else if (strcmp(name, "colalternate") == 0)
     664           0 :                 uattr = UCOL_ALTERNATE_HANDLING;
     665           0 :             else if (strcmp(name, "colnormalization") == 0)
     666           0 :                 uattr = UCOL_NORMALIZATION_MODE;
     667           0 :             else if (strcmp(name, "colnumeric") == 0)
     668           0 :                 uattr = UCOL_NUMERIC_COLLATION;
     669             :             else
     670             :                 /* ignore if unknown */
     671           0 :                 continue;
     672             : 
     673           0 :             if (strcmp(value, "primary") == 0)
     674           0 :                 uvalue = UCOL_PRIMARY;
     675           0 :             else if (strcmp(value, "secondary") == 0)
     676           0 :                 uvalue = UCOL_SECONDARY;
     677           0 :             else if (strcmp(value, "tertiary") == 0)
     678           0 :                 uvalue = UCOL_TERTIARY;
     679           0 :             else if (strcmp(value, "quaternary") == 0)
     680           0 :                 uvalue = UCOL_QUATERNARY;
     681           0 :             else if (strcmp(value, "identical") == 0)
     682           0 :                 uvalue = UCOL_IDENTICAL;
     683           0 :             else if (strcmp(value, "no") == 0)
     684           0 :                 uvalue = UCOL_OFF;
     685           0 :             else if (strcmp(value, "yes") == 0)
     686           0 :                 uvalue = UCOL_ON;
     687           0 :             else if (strcmp(value, "shifted") == 0)
     688           0 :                 uvalue = UCOL_SHIFTED;
     689           0 :             else if (strcmp(value, "non-ignorable") == 0)
     690           0 :                 uvalue = UCOL_NON_IGNORABLE;
     691           0 :             else if (strcmp(value, "lower") == 0)
     692           0 :                 uvalue = UCOL_LOWER_FIRST;
     693           0 :             else if (strcmp(value, "upper") == 0)
     694           0 :                 uvalue = UCOL_UPPER_FIRST;
     695             :             else
     696             :             {
     697           0 :                 *status = U_ILLEGAL_ARGUMENT_ERROR;
     698           0 :                 break;
     699             :             }
     700             : 
     701           0 :             ucol_setAttribute(collator, uattr, uvalue, status);
     702             :         }
     703             :     }
     704             : 
     705           0 :     pfree(lower_str);
     706             : }
     707             : 
     708             : #endif                          /* USE_ICU */

Generated by: LCOV version 1.14