Line data Source code
1 : /*-----------------------------------------------------------------------
2 : *
3 : * PostgreSQL locale utilities for ICU
4 : *
5 : * Portions Copyright (c) 2002-2026, PostgreSQL Global Development Group
6 : *
7 : * src/backend/utils/adt/pg_locale_icu.c
8 : *
9 : *-----------------------------------------------------------------------
10 : */
11 :
12 : #include "postgres.h"
13 :
14 : #ifdef USE_ICU
15 : #include <unicode/ucasemap.h>
16 : #include <unicode/ucnv.h>
17 : #include <unicode/ucol.h>
18 : #include <unicode/ustring.h>
19 :
20 : /*
21 : * ucol_strcollUTF8() was introduced in ICU 50, but it is buggy before ICU 53.
22 : * (see
23 : * <https://www.postgresql.org/message-id/flat/f1438ec6-22aa-4029-9a3b-26f79d330e72%40manitou-mail.org>)
24 : */
25 : #if U_ICU_VERSION_MAJOR_NUM >= 53
26 : #define HAVE_UCOL_STRCOLLUTF8 1
27 : #else
28 : #undef HAVE_UCOL_STRCOLLUTF8
29 : #endif
30 :
31 : #endif
32 :
33 : #include "access/htup_details.h"
34 : #include "catalog/pg_database.h"
35 : #include "catalog/pg_collation.h"
36 : #include "mb/pg_wchar.h"
37 : #include "miscadmin.h"
38 : #include "utils/builtins.h"
39 : #include "utils/formatting.h"
40 : #include "utils/memutils.h"
41 : #include "utils/pg_locale.h"
42 : #include "utils/syscache.h"
43 :
44 : /*
45 : * Size of stack buffer to use for string transformations, used to avoid heap
46 : * allocations in typical cases. This should be large enough that most strings
47 : * will fit, but small enough that we feel comfortable putting it on the
48 : * stack.
49 : */
50 : #define TEXTBUFLEN 1024
51 :
52 : extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
53 :
54 : #ifdef USE_ICU
55 :
56 : extern UCollator *pg_ucol_open(const char *loc_str);
57 : static UCaseMap *pg_ucasemap_open(const char *loc_str);
58 :
59 : static size_t strlower_icu(char *dest, size_t destsize, const char *src,
60 : ssize_t srclen, pg_locale_t locale);
61 : static size_t strtitle_icu(char *dest, size_t destsize, const char *src,
62 : ssize_t srclen, pg_locale_t locale);
63 : static size_t strupper_icu(char *dest, size_t destsize, const char *src,
64 : ssize_t srclen, pg_locale_t locale);
65 : static size_t strfold_icu(char *dest, size_t destsize, const char *src,
66 : ssize_t srclen, pg_locale_t locale);
67 : static size_t strlower_icu_utf8(char *dest, size_t destsize, const char *src,
68 : ssize_t srclen, pg_locale_t locale);
69 : static size_t strtitle_icu_utf8(char *dest, size_t destsize, const char *src,
70 : ssize_t srclen, pg_locale_t locale);
71 : static size_t strupper_icu_utf8(char *dest, size_t destsize, const char *src,
72 : ssize_t srclen, pg_locale_t locale);
73 : static size_t strfold_icu_utf8(char *dest, size_t destsize, const char *src,
74 : ssize_t srclen, pg_locale_t locale);
75 : static size_t downcase_ident_icu(char *dst, size_t dstsize, const char *src,
76 : ssize_t srclen, pg_locale_t locale);
77 : static int strncoll_icu(const char *arg1, ssize_t len1,
78 : const char *arg2, ssize_t len2,
79 : pg_locale_t locale);
80 : static size_t strnxfrm_icu(char *dest, size_t destsize,
81 : const char *src, ssize_t srclen,
82 : pg_locale_t locale);
83 : extern char *get_collation_actual_version_icu(const char *collcollate);
84 :
85 : typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
86 : const UChar *src, int32_t srcLength,
87 : const char *locale,
88 : UErrorCode *pErrorCode);
89 :
90 : /*
91 : * Converter object for converting between ICU's UChar strings and C strings
92 : * in database encoding. Since the database encoding doesn't change, we only
93 : * need one of these per session.
94 : */
95 : static UConverter *icu_converter = NULL;
96 :
97 : static UCollator *make_icu_collator(const char *iculocstr,
98 : const char *icurules);
99 : static int strncoll_icu(const char *arg1, ssize_t len1,
100 : const char *arg2, ssize_t len2,
101 : pg_locale_t locale);
102 : static size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
103 : const char *src, ssize_t srclen,
104 : pg_locale_t locale);
105 : #ifdef HAVE_UCOL_STRCOLLUTF8
106 : static int strncoll_icu_utf8(const char *arg1, ssize_t len1,
107 : const char *arg2, ssize_t len2,
108 : pg_locale_t locale);
109 : #endif
110 : static size_t strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
111 : const char *src, ssize_t srclen,
112 : pg_locale_t locale);
113 : static void init_icu_converter(void);
114 : static size_t uchar_length(UConverter *converter,
115 : const char *str, int32_t len);
116 : static int32_t uchar_convert(UConverter *converter,
117 : UChar *dest, int32_t destlen,
118 : const char *src, int32_t srclen);
119 : static int32_t icu_to_uchar(UChar **buff_uchar, const char *buff,
120 : size_t nbytes);
121 : static size_t icu_from_uchar(char *dest, size_t destsize,
122 : const UChar *buff_uchar, int32_t len_uchar);
123 : static void icu_set_collation_attributes(UCollator *collator, const char *loc,
124 : UErrorCode *status);
125 : static int32_t icu_convert_case(ICU_Convert_Func func, char *dest,
126 : size_t destsize, const char *src,
127 : ssize_t srclen, pg_locale_t locale);
128 : static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
129 : const UChar *src, int32_t srcLength,
130 : const char *locale,
131 : UErrorCode *pErrorCode);
132 : static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
133 : const UChar *src, int32_t srcLength,
134 : const char *locale,
135 : UErrorCode *pErrorCode);
136 : static int32_t foldcase_options(const char *locale);
137 :
138 : /*
139 : * XXX: many of the functions below rely on casts directly from pg_wchar to
140 : * UChar32, which is correct for UTF-8 and LATIN1, but not in general.
141 : */
142 :
143 : static pg_wchar
144 108 : toupper_icu(pg_wchar wc, pg_locale_t locale)
145 : {
146 108 : return u_toupper(wc);
147 : }
148 :
149 : static pg_wchar
150 108 : tolower_icu(pg_wchar wc, pg_locale_t locale)
151 : {
152 108 : return u_tolower(wc);
153 : }
154 :
155 : static const struct collate_methods collate_methods_icu = {
156 : .strncoll = strncoll_icu,
157 : .strnxfrm = strnxfrm_icu,
158 : .strnxfrm_prefix = strnxfrm_prefix_icu,
159 : .strxfrm_is_safe = true,
160 : };
161 :
162 : static const struct collate_methods collate_methods_icu_utf8 = {
163 : #ifdef HAVE_UCOL_STRCOLLUTF8
164 : .strncoll = strncoll_icu_utf8,
165 : #else
166 : .strncoll = strncoll_icu,
167 : #endif
168 : .strnxfrm = strnxfrm_icu,
169 : .strnxfrm_prefix = strnxfrm_prefix_icu_utf8,
170 : .strxfrm_is_safe = true,
171 : };
172 :
173 : static bool
174 12288 : wc_isdigit_icu(pg_wchar wc, pg_locale_t locale)
175 : {
176 12288 : return u_isdigit(wc);
177 : }
178 :
179 : static bool
180 12288 : wc_isalpha_icu(pg_wchar wc, pg_locale_t locale)
181 : {
182 12288 : return u_isalpha(wc);
183 : }
184 :
185 : static bool
186 12288 : wc_isalnum_icu(pg_wchar wc, pg_locale_t locale)
187 : {
188 12288 : return u_isalnum(wc);
189 : }
190 :
191 : static bool
192 12288 : wc_isupper_icu(pg_wchar wc, pg_locale_t locale)
193 : {
194 12288 : return u_isupper(wc);
195 : }
196 :
197 : static bool
198 12288 : wc_islower_icu(pg_wchar wc, pg_locale_t locale)
199 : {
200 12288 : return u_islower(wc);
201 : }
202 :
203 : static bool
204 12288 : wc_isgraph_icu(pg_wchar wc, pg_locale_t locale)
205 : {
206 12288 : return u_isgraph(wc);
207 : }
208 :
209 : static bool
210 12288 : wc_isprint_icu(pg_wchar wc, pg_locale_t locale)
211 : {
212 12288 : return u_isprint(wc);
213 : }
214 :
215 : static bool
216 12288 : wc_ispunct_icu(pg_wchar wc, pg_locale_t locale)
217 : {
218 12288 : return u_ispunct(wc);
219 : }
220 :
221 : static bool
222 12288 : wc_isspace_icu(pg_wchar wc, pg_locale_t locale)
223 : {
224 12288 : return u_isspace(wc);
225 : }
226 :
227 : static bool
228 0 : wc_isxdigit_icu(pg_wchar wc, pg_locale_t locale)
229 : {
230 0 : return u_isxdigit(wc);
231 : }
232 :
233 : static bool
234 126 : wc_iscased_icu(pg_wchar wc, pg_locale_t locale)
235 : {
236 126 : return u_hasBinaryProperty(wc, UCHAR_CASED);
237 : }
238 :
239 : static const struct ctype_methods ctype_methods_icu = {
240 : .strlower = strlower_icu,
241 : .strtitle = strtitle_icu,
242 : .strupper = strupper_icu,
243 : .strfold = strfold_icu,
244 : .downcase_ident = downcase_ident_icu,
245 : .wc_isdigit = wc_isdigit_icu,
246 : .wc_isalpha = wc_isalpha_icu,
247 : .wc_isalnum = wc_isalnum_icu,
248 : .wc_isupper = wc_isupper_icu,
249 : .wc_islower = wc_islower_icu,
250 : .wc_isgraph = wc_isgraph_icu,
251 : .wc_isprint = wc_isprint_icu,
252 : .wc_ispunct = wc_ispunct_icu,
253 : .wc_isspace = wc_isspace_icu,
254 : .wc_isxdigit = wc_isxdigit_icu,
255 : .wc_iscased = wc_iscased_icu,
256 : .wc_toupper = toupper_icu,
257 : .wc_tolower = tolower_icu,
258 : };
259 :
260 : static const struct ctype_methods ctype_methods_icu_utf8 = {
261 : .strlower = strlower_icu_utf8,
262 : .strtitle = strtitle_icu_utf8,
263 : .strupper = strupper_icu_utf8,
264 : .strfold = strfold_icu_utf8,
265 : /* uses plain ASCII semantics for historical reasons */
266 : .downcase_ident = NULL,
267 : .wc_isdigit = wc_isdigit_icu,
268 : .wc_isalpha = wc_isalpha_icu,
269 : .wc_isalnum = wc_isalnum_icu,
270 : .wc_isupper = wc_isupper_icu,
271 : .wc_islower = wc_islower_icu,
272 : .wc_isgraph = wc_isgraph_icu,
273 : .wc_isprint = wc_isprint_icu,
274 : .wc_ispunct = wc_ispunct_icu,
275 : .wc_isspace = wc_isspace_icu,
276 : .wc_isxdigit = wc_isxdigit_icu,
277 : .wc_iscased = wc_iscased_icu,
278 : .wc_toupper = toupper_icu,
279 : .wc_tolower = tolower_icu,
280 : };
281 :
282 : /*
283 : * ICU still depends on libc for compatibility with certain historical
284 : * behavior for single-byte encodings. See downcase_ident_icu().
285 : *
286 : * XXX: consider fixing by decoding the single byte into a code point, and
287 : * using u_tolower().
288 : */
289 : static locale_t
290 0 : make_libc_ctype_locale(const char *ctype)
291 : {
292 : locale_t loc;
293 :
294 : #ifndef WIN32
295 0 : loc = newlocale(LC_CTYPE_MASK, ctype, NULL);
296 : #else
297 : loc = _create_locale(LC_ALL, ctype);
298 : #endif
299 0 : if (!loc)
300 0 : report_newlocale_failure(ctype);
301 :
302 0 : return loc;
303 : }
304 : #endif
305 :
306 : pg_locale_t
307 210 : create_pg_locale_icu(Oid collid, MemoryContext context)
308 : {
309 : #ifdef USE_ICU
310 : bool deterministic;
311 : const char *iculocstr;
312 210 : const char *icurules = NULL;
313 : UCollator *collator;
314 210 : locale_t loc = (locale_t) 0;
315 : pg_locale_t result;
316 :
317 210 : if (collid == DEFAULT_COLLATION_OID)
318 : {
319 : HeapTuple tp;
320 : Datum datum;
321 : bool isnull;
322 :
323 26 : tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
324 26 : if (!HeapTupleIsValid(tp))
325 0 : elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
326 :
327 : /* default database collation is always deterministic */
328 26 : deterministic = true;
329 26 : datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
330 : Anum_pg_database_datlocale);
331 26 : iculocstr = TextDatumGetCString(datum);
332 26 : datum = SysCacheGetAttr(DATABASEOID, tp,
333 : Anum_pg_database_daticurules, &isnull);
334 26 : if (!isnull)
335 0 : icurules = TextDatumGetCString(datum);
336 :
337 : /* libc only needed for default locale and single-byte encoding */
338 26 : if (pg_database_encoding_max_length() == 1)
339 : {
340 : const char *ctype;
341 :
342 0 : datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
343 : Anum_pg_database_datctype);
344 0 : ctype = TextDatumGetCString(datum);
345 :
346 0 : loc = make_libc_ctype_locale(ctype);
347 : }
348 :
349 26 : ReleaseSysCache(tp);
350 : }
351 : else
352 : {
353 : Form_pg_collation collform;
354 : HeapTuple tp;
355 : Datum datum;
356 : bool isnull;
357 :
358 184 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
359 184 : if (!HeapTupleIsValid(tp))
360 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
361 184 : collform = (Form_pg_collation) GETSTRUCT(tp);
362 184 : deterministic = collform->collisdeterministic;
363 184 : datum = SysCacheGetAttrNotNull(COLLOID, tp,
364 : Anum_pg_collation_colllocale);
365 184 : iculocstr = TextDatumGetCString(datum);
366 184 : datum = SysCacheGetAttr(COLLOID, tp,
367 : Anum_pg_collation_collicurules, &isnull);
368 184 : if (!isnull)
369 12 : icurules = TextDatumGetCString(datum);
370 :
371 184 : ReleaseSysCache(tp);
372 : }
373 :
374 210 : collator = make_icu_collator(iculocstr, icurules);
375 :
376 200 : result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
377 200 : result->icu.locale = MemoryContextStrdup(context, iculocstr);
378 200 : result->icu.ucol = collator;
379 200 : result->icu.lt = loc;
380 200 : result->deterministic = deterministic;
381 200 : result->collate_is_c = false;
382 200 : result->ctype_is_c = false;
383 200 : if (GetDatabaseEncoding() == PG_UTF8)
384 : {
385 200 : result->icu.ucasemap = pg_ucasemap_open(iculocstr);
386 200 : result->collate = &collate_methods_icu_utf8;
387 200 : result->ctype = &ctype_methods_icu_utf8;
388 : }
389 : else
390 : {
391 0 : result->collate = &collate_methods_icu;
392 0 : result->ctype = &ctype_methods_icu;
393 : }
394 :
395 200 : return result;
396 : #else
397 : /* could get here if a collation was created by a build with ICU */
398 : ereport(ERROR,
399 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
400 : errmsg("ICU is not supported in this build")));
401 :
402 : return NULL;
403 : #endif
404 : }
405 :
406 : #ifdef USE_ICU
407 :
408 : /*
409 : * Check locale string and fix it if necessary. Returns a new palloc'd string.
410 : *
411 : * In ICU versions 54 and earlier, "und" is not a recognized spelling of the
412 : * root locale. If the first component of the locale is "und", replace with
413 : * "root" before opening.
414 : */
415 : static char *
416 80012 : fix_icu_locale_str(const char *loc_str)
417 : {
418 : /*
419 : * Must never open default collator, because it depends on the environment
420 : * and may change at any time. Should not happen, but check here to catch
421 : * bugs that might be hard to catch otherwise.
422 : *
423 : * NB: the default collator is not the same as the collator for the root
424 : * locale. The root locale may be specified as the empty string, "und", or
425 : * "root". The default collator is opened by passing NULL to ucol_open().
426 : */
427 80012 : if (loc_str == NULL)
428 0 : elog(ERROR, "opening default collator is not supported");
429 :
430 : if (U_ICU_VERSION_MAJOR_NUM < 55)
431 : {
432 : char lang[ULOC_LANG_CAPACITY];
433 : UErrorCode status = U_ZERO_ERROR;
434 :
435 : uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
436 : if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
437 : {
438 : ereport(ERROR,
439 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
440 : errmsg("could not get language from locale \"%s\": %s",
441 : loc_str, u_errorName(status))));
442 : }
443 :
444 : if (strcmp(lang, "und") == 0)
445 : {
446 : const char *remainder = loc_str + strlen("und");
447 : char *fixed_str;
448 :
449 : fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
450 : strcpy(fixed_str, "root");
451 : strcat(fixed_str, remainder);
452 :
453 : return fixed_str;
454 : }
455 : }
456 :
457 80012 : return pstrdup(loc_str);
458 : }
459 :
460 : /*
461 : * Wrapper around ucol_open() to handle API differences for older ICU
462 : * versions.
463 : *
464 : * Ensure that no path leaks a UCollator.
465 : */
466 : UCollator *
467 79812 : pg_ucol_open(const char *loc_str)
468 : {
469 : UCollator *collator;
470 : UErrorCode status;
471 : char *fixed_str;
472 :
473 79812 : fixed_str = fix_icu_locale_str(loc_str);
474 :
475 79812 : status = U_ZERO_ERROR;
476 79812 : collator = ucol_open(fixed_str, &status);
477 79812 : if (U_FAILURE(status))
478 12 : ereport(ERROR,
479 : /* use original string for error report */
480 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
481 : errmsg("could not open collator for locale \"%s\": %s",
482 : loc_str, u_errorName(status))));
483 :
484 : if (U_ICU_VERSION_MAJOR_NUM < 54)
485 : {
486 : status = U_ZERO_ERROR;
487 : icu_set_collation_attributes(collator, fixed_str, &status);
488 :
489 : /*
490 : * Pretend the error came from ucol_open(), for consistent error
491 : * message across ICU versions.
492 : */
493 : if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
494 : {
495 : ucol_close(collator);
496 : ereport(ERROR,
497 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
498 : errmsg("could not open collator for locale \"%s\": %s",
499 : loc_str, u_errorName(status))));
500 : }
501 : }
502 :
503 79800 : pfree(fixed_str);
504 :
505 79800 : return collator;
506 : }
507 :
508 : /*
509 : * Wrapper around ucasemap_open() to handle API differences for older ICU
510 : * versions.
511 : *
512 : * Additionally makes sure we get the right options for case folding.
513 : */
514 : static UCaseMap *
515 200 : pg_ucasemap_open(const char *loc_str)
516 : {
517 200 : UErrorCode status = U_ZERO_ERROR;
518 : UCaseMap *casemap;
519 : char *fixed_str;
520 :
521 200 : fixed_str = fix_icu_locale_str(loc_str);
522 :
523 200 : casemap = ucasemap_open(fixed_str, foldcase_options(fixed_str), &status);
524 200 : if (U_FAILURE(status))
525 : /* use original string for error report */
526 0 : ereport(ERROR,
527 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
528 : errmsg("could not open casemap for locale \"%s\": %s",
529 : loc_str, u_errorName(status)));
530 :
531 200 : pfree(fixed_str);
532 :
533 200 : return casemap;
534 : }
535 :
536 : /*
537 : * Create a UCollator with the given locale string and rules.
538 : *
539 : * Ensure that no path leaks a UCollator.
540 : */
541 : static UCollator *
542 210 : make_icu_collator(const char *iculocstr, const char *icurules)
543 : {
544 210 : if (!icurules)
545 : {
546 : /* simple case without rules */
547 198 : return pg_ucol_open(iculocstr);
548 : }
549 : else
550 : {
551 : UCollator *collator_std_rules;
552 : UCollator *collator_all_rules;
553 : const UChar *std_rules;
554 : UChar *my_rules;
555 : UChar *all_rules;
556 : int32_t length;
557 : int32_t total;
558 : UErrorCode status;
559 :
560 : /*
561 : * If rules are specified, we extract the rules of the standard
562 : * collation, add our own rules, and make a new collator with the
563 : * combined rules.
564 : */
565 12 : icu_to_uchar(&my_rules, icurules, strlen(icurules));
566 :
567 12 : collator_std_rules = pg_ucol_open(iculocstr);
568 :
569 12 : std_rules = ucol_getRules(collator_std_rules, &length);
570 :
571 12 : total = u_strlen(std_rules) + u_strlen(my_rules) + 1;
572 :
573 : /* avoid leaking collator on OOM */
574 12 : all_rules = palloc_extended(sizeof(UChar) * total, MCXT_ALLOC_NO_OOM);
575 12 : if (!all_rules)
576 : {
577 0 : ucol_close(collator_std_rules);
578 0 : ereport(ERROR,
579 : (errcode(ERRCODE_OUT_OF_MEMORY),
580 : errmsg("out of memory")));
581 : }
582 :
583 12 : u_strcpy(all_rules, std_rules);
584 12 : u_strcat(all_rules, my_rules);
585 :
586 12 : ucol_close(collator_std_rules);
587 :
588 12 : status = U_ZERO_ERROR;
589 12 : collator_all_rules = ucol_openRules(all_rules, u_strlen(all_rules),
590 : UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,
591 : NULL, &status);
592 12 : if (U_FAILURE(status))
593 : {
594 6 : ereport(ERROR,
595 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
596 : errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
597 : iculocstr, icurules, u_errorName(status))));
598 : }
599 :
600 6 : return collator_all_rules;
601 : }
602 : }
603 :
604 : static size_t
605 0 : strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
606 : pg_locale_t locale)
607 : {
608 0 : return icu_convert_case(u_strToLower, dest, destsize, src, srclen, locale);
609 : }
610 :
611 : static size_t
612 0 : strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
613 : pg_locale_t locale)
614 : {
615 0 : return icu_convert_case(u_strToTitle_default_BI, dest, destsize, src, srclen, locale);
616 : }
617 :
618 : static size_t
619 0 : strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
620 : pg_locale_t locale)
621 : {
622 0 : return icu_convert_case(u_strToUpper, dest, destsize, src, srclen, locale);
623 : }
624 :
625 : static size_t
626 0 : strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
627 : pg_locale_t locale)
628 : {
629 0 : return icu_convert_case(u_strFoldCase_default, dest, destsize, src, srclen, locale);
630 : }
631 :
632 : static size_t
633 528 : strlower_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
634 : pg_locale_t locale)
635 : {
636 528 : UErrorCode status = U_ZERO_ERROR;
637 : int32_t needed;
638 :
639 528 : needed = ucasemap_utf8ToLower(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
640 528 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
641 0 : ereport(ERROR,
642 : errmsg("case conversion failed: %s", u_errorName(status)));
643 528 : return needed;
644 : }
645 :
646 : static size_t
647 30 : strtitle_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
648 : pg_locale_t locale)
649 : {
650 30 : UErrorCode status = U_ZERO_ERROR;
651 : int32_t needed;
652 :
653 30 : needed = ucasemap_utf8ToTitle(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
654 30 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
655 0 : ereport(ERROR,
656 : errmsg("case conversion failed: %s", u_errorName(status)));
657 30 : return needed;
658 : }
659 :
660 : static size_t
661 54 : strupper_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
662 : pg_locale_t locale)
663 : {
664 54 : UErrorCode status = U_ZERO_ERROR;
665 : int32_t needed;
666 :
667 54 : needed = ucasemap_utf8ToUpper(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
668 54 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
669 0 : ereport(ERROR,
670 : errmsg("case conversion failed: %s", u_errorName(status)));
671 54 : return needed;
672 : }
673 :
674 : static size_t
675 12 : strfold_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
676 : pg_locale_t locale)
677 : {
678 12 : UErrorCode status = U_ZERO_ERROR;
679 : int32_t needed;
680 :
681 12 : needed = ucasemap_utf8FoldCase(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
682 12 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
683 0 : ereport(ERROR,
684 : errmsg("case conversion failed: %s", u_errorName(status)));
685 12 : return needed;
686 : }
687 :
688 : /*
689 : * For historical compatibility, behavior is not multibyte-aware.
690 : *
691 : * NB: uses libc tolower() for single-byte encodings (also for historical
692 : * compatibility), and therefore relies on the global LC_CTYPE setting.
693 : */
694 : static size_t
695 0 : downcase_ident_icu(char *dst, size_t dstsize, const char *src,
696 : ssize_t srclen, pg_locale_t locale)
697 : {
698 : int i;
699 : bool libc_lower;
700 0 : locale_t lt = locale->icu.lt;
701 :
702 0 : libc_lower = lt && (pg_database_encoding_max_length() == 1);
703 :
704 0 : for (i = 0; i < srclen && i < dstsize; i++)
705 : {
706 0 : unsigned char ch = (unsigned char) src[i];
707 :
708 0 : if (ch >= 'A' && ch <= 'Z')
709 0 : ch = pg_ascii_tolower(ch);
710 0 : else if (libc_lower && IS_HIGHBIT_SET(ch) && isupper_l(ch, lt))
711 0 : ch = tolower_l(ch, lt);
712 0 : dst[i] = (char) ch;
713 : }
714 :
715 0 : if (i < dstsize)
716 0 : dst[i] = '\0';
717 :
718 0 : return srclen;
719 : }
720 :
721 : /*
722 : * strncoll_icu_utf8
723 : *
724 : * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
725 : * database encoding. An argument length of -1 means the string is
726 : * NUL-terminated.
727 : */
728 : #ifdef HAVE_UCOL_STRCOLLUTF8
729 : int
730 23966 : strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
731 : pg_locale_t locale)
732 : {
733 : int result;
734 : UErrorCode status;
735 :
736 : Assert(GetDatabaseEncoding() == PG_UTF8);
737 :
738 23966 : status = U_ZERO_ERROR;
739 23966 : result = ucol_strcollUTF8(locale->icu.ucol,
740 : arg1, len1,
741 : arg2, len2,
742 : &status);
743 23966 : if (U_FAILURE(status))
744 0 : ereport(ERROR,
745 : (errmsg("collation failed: %s", u_errorName(status))));
746 :
747 23966 : return result;
748 : }
749 : #endif
750 :
751 : /* 'srclen' of -1 means the strings are NUL-terminated */
752 : size_t
753 5748 : strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
754 : pg_locale_t locale)
755 : {
756 : char sbuf[TEXTBUFLEN];
757 5748 : char *buf = sbuf;
758 : UChar *uchar;
759 : int32_t ulen;
760 : size_t uchar_bsize;
761 : Size result_bsize;
762 :
763 5748 : init_icu_converter();
764 :
765 5748 : ulen = uchar_length(icu_converter, src, srclen);
766 :
767 5748 : uchar_bsize = (ulen + 1) * sizeof(UChar);
768 :
769 5748 : if (uchar_bsize > TEXTBUFLEN)
770 0 : buf = palloc(uchar_bsize);
771 :
772 5748 : uchar = (UChar *) buf;
773 :
774 5748 : ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
775 :
776 5748 : result_bsize = ucol_getSortKey(locale->icu.ucol,
777 : uchar, ulen,
778 : (uint8_t *) dest, destsize);
779 :
780 : /*
781 : * ucol_getSortKey() counts the nul-terminator in the result length, but
782 : * this function should not.
783 : */
784 : Assert(result_bsize > 0);
785 5748 : result_bsize--;
786 :
787 5748 : if (buf != sbuf)
788 0 : pfree(buf);
789 :
790 : /* if dest is defined, it should be nul-terminated */
791 : Assert(result_bsize >= destsize || dest[result_bsize] == '\0');
792 :
793 5748 : return result_bsize;
794 : }
795 :
796 : /* 'srclen' of -1 means the strings are NUL-terminated */
797 : size_t
798 1668 : strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
799 : const char *src, ssize_t srclen,
800 : pg_locale_t locale)
801 : {
802 : size_t result;
803 : UCharIterator iter;
804 : uint32_t state[2];
805 : UErrorCode status;
806 :
807 : Assert(GetDatabaseEncoding() == PG_UTF8);
808 :
809 1668 : uiter_setUTF8(&iter, src, srclen);
810 1668 : state[0] = state[1] = 0; /* won't need that again */
811 1668 : status = U_ZERO_ERROR;
812 1668 : result = ucol_nextSortKeyPart(locale->icu.ucol,
813 : &iter,
814 : state,
815 : (uint8_t *) dest,
816 : destsize,
817 : &status);
818 1668 : if (U_FAILURE(status))
819 0 : ereport(ERROR,
820 : (errmsg("sort key generation failed: %s",
821 : u_errorName(status))));
822 :
823 1668 : return result;
824 : }
825 :
826 : char *
827 79454 : get_collation_actual_version_icu(const char *collcollate)
828 : {
829 : UCollator *collator;
830 : UVersionInfo versioninfo;
831 : char buf[U_MAX_VERSION_STRING_LENGTH];
832 :
833 79454 : collator = pg_ucol_open(collcollate);
834 :
835 79454 : ucol_getVersion(collator, versioninfo);
836 79454 : ucol_close(collator);
837 :
838 79454 : u_versionToString(versioninfo, buf);
839 79454 : return pstrdup(buf);
840 : }
841 :
842 : /*
843 : * Convert a string in the database encoding into a string of UChars.
844 : *
845 : * The source string at buff is of length nbytes
846 : * (it needn't be nul-terminated)
847 : *
848 : * *buff_uchar receives a pointer to the palloc'd result string, and
849 : * the function's result is the number of UChars generated.
850 : *
851 : * The result string is nul-terminated, though most callers rely on the
852 : * result length instead.
853 : */
854 : static int32_t
855 12 : icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
856 : {
857 : int32_t len_uchar;
858 :
859 12 : init_icu_converter();
860 :
861 12 : len_uchar = uchar_length(icu_converter, buff, nbytes);
862 :
863 12 : *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
864 12 : len_uchar = uchar_convert(icu_converter,
865 : *buff_uchar, len_uchar + 1, buff, nbytes);
866 :
867 12 : return len_uchar;
868 : }
869 :
870 : /*
871 : * Convert a string of UChars into the database encoding.
872 : *
873 : * The source string at buff_uchar is of length len_uchar
874 : * (it needn't be nul-terminated)
875 : *
876 : * *result receives a pointer to the palloc'd result string, and the
877 : * function's result is the number of bytes generated (not counting nul).
878 : *
879 : * The result string is nul-terminated.
880 : */
881 : static size_t
882 0 : icu_from_uchar(char *dest, size_t destsize, const UChar *buff_uchar, int32_t len_uchar)
883 : {
884 : UErrorCode status;
885 : int32_t len_result;
886 :
887 0 : init_icu_converter();
888 :
889 0 : status = U_ZERO_ERROR;
890 0 : len_result = ucnv_fromUChars(icu_converter, NULL, 0,
891 : buff_uchar, len_uchar, &status);
892 0 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
893 0 : ereport(ERROR,
894 : (errmsg("%s failed: %s", "ucnv_fromUChars",
895 : u_errorName(status))));
896 :
897 0 : if (len_result + 1 > destsize)
898 0 : return len_result;
899 :
900 0 : status = U_ZERO_ERROR;
901 0 : len_result = ucnv_fromUChars(icu_converter, dest, len_result + 1,
902 : buff_uchar, len_uchar, &status);
903 0 : if (U_FAILURE(status) ||
904 0 : status == U_STRING_NOT_TERMINATED_WARNING)
905 0 : ereport(ERROR,
906 : (errmsg("%s failed: %s", "ucnv_fromUChars",
907 : u_errorName(status))));
908 :
909 0 : return len_result;
910 : }
911 :
912 : static int32_t
913 0 : convert_case_uchar(ICU_Convert_Func func, pg_locale_t mylocale,
914 : UChar **buff_dest, UChar *buff_source, int32_t len_source)
915 : {
916 : UErrorCode status;
917 : int32_t len_dest;
918 :
919 0 : len_dest = len_source; /* try first with same length */
920 0 : *buff_dest = palloc(len_dest * sizeof(**buff_dest));
921 0 : status = U_ZERO_ERROR;
922 0 : len_dest = func(*buff_dest, len_dest, buff_source, len_source,
923 : mylocale->icu.locale, &status);
924 0 : if (status == U_BUFFER_OVERFLOW_ERROR)
925 : {
926 : /* try again with adjusted length */
927 0 : pfree(*buff_dest);
928 0 : *buff_dest = palloc(len_dest * sizeof(**buff_dest));
929 0 : status = U_ZERO_ERROR;
930 0 : len_dest = func(*buff_dest, len_dest, buff_source, len_source,
931 : mylocale->icu.locale, &status);
932 : }
933 0 : if (U_FAILURE(status))
934 0 : ereport(ERROR,
935 : (errmsg("case conversion failed: %s", u_errorName(status))));
936 0 : return len_dest;
937 : }
938 :
939 : static int32_t
940 0 : icu_convert_case(ICU_Convert_Func func, char *dest, size_t destsize,
941 : const char *src, ssize_t srclen, pg_locale_t locale)
942 : {
943 : int32_t len_uchar;
944 : int32_t len_conv;
945 : UChar *buff_uchar;
946 : UChar *buff_conv;
947 : size_t result_len;
948 :
949 0 : len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
950 0 : len_conv = convert_case_uchar(func, locale, &buff_conv,
951 : buff_uchar, len_uchar);
952 0 : result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
953 0 : pfree(buff_uchar);
954 0 : pfree(buff_conv);
955 :
956 0 : return result_len;
957 : }
958 :
959 : static int32_t
960 0 : u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
961 : const UChar *src, int32_t srcLength,
962 : const char *locale,
963 : UErrorCode *pErrorCode)
964 : {
965 0 : return u_strToTitle(dest, destCapacity, src, srcLength,
966 : NULL, locale, pErrorCode);
967 : }
968 :
969 : static int32_t
970 0 : u_strFoldCase_default(UChar *dest, int32_t destCapacity,
971 : const UChar *src, int32_t srcLength,
972 : const char *locale,
973 : UErrorCode *pErrorCode)
974 : {
975 0 : return u_strFoldCase(dest, destCapacity, src, srcLength,
976 0 : foldcase_options(locale), pErrorCode);
977 : }
978 :
979 : /*
980 : * Return the correct u_strFoldCase() options for the given locale.
981 : *
982 : * Unlike the ICU APIs for lowercasing, titlecasing, and uppercasing, case
983 : * folding does not accept a locale. Instead it just supports a single option
984 : * relevant to Turkic languages 'az' and 'tr'; check for those languages.
985 : */
986 : static int32_t
987 200 : foldcase_options(const char *locale)
988 : {
989 200 : uint32 options = U_FOLD_CASE_DEFAULT;
990 : char lang[3];
991 200 : UErrorCode status = U_ZERO_ERROR;
992 :
993 200 : uloc_getLanguage(locale, lang, 3, &status);
994 200 : if (U_SUCCESS(status))
995 : {
996 : /*
997 : * The option name is confusing, but it causes u_strFoldCase to use
998 : * the 'T' mappings, which are ignored for U_FOLD_CASE_DEFAULT.
999 : */
1000 194 : if (strcmp(lang, "tr") == 0 || strcmp(lang, "az") == 0)
1001 6 : options = U_FOLD_CASE_EXCLUDE_SPECIAL_I;
1002 : }
1003 :
1004 200 : return options;
1005 : }
1006 :
1007 : /*
1008 : * strncoll_icu
1009 : *
1010 : * Convert the arguments from the database encoding to UChar strings, then
1011 : * call ucol_strcoll(). An argument length of -1 means that the string is
1012 : * NUL-terminated.
1013 : *
1014 : * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
1015 : * caller should call that instead.
1016 : */
1017 : static int
1018 0 : strncoll_icu(const char *arg1, ssize_t len1,
1019 : const char *arg2, ssize_t len2, pg_locale_t locale)
1020 : {
1021 : char sbuf[TEXTBUFLEN];
1022 0 : char *buf = sbuf;
1023 : int32_t ulen1;
1024 : int32_t ulen2;
1025 : size_t bufsize1;
1026 : size_t bufsize2;
1027 : UChar *uchar1,
1028 : *uchar2;
1029 : int result;
1030 :
1031 : /* if encoding is UTF8, use more efficient strncoll_icu_utf8 */
1032 : #ifdef HAVE_UCOL_STRCOLLUTF8
1033 : Assert(GetDatabaseEncoding() != PG_UTF8);
1034 : #endif
1035 :
1036 0 : init_icu_converter();
1037 :
1038 0 : ulen1 = uchar_length(icu_converter, arg1, len1);
1039 0 : ulen2 = uchar_length(icu_converter, arg2, len2);
1040 :
1041 0 : bufsize1 = (ulen1 + 1) * sizeof(UChar);
1042 0 : bufsize2 = (ulen2 + 1) * sizeof(UChar);
1043 :
1044 0 : if (bufsize1 + bufsize2 > TEXTBUFLEN)
1045 0 : buf = palloc(bufsize1 + bufsize2);
1046 :
1047 0 : uchar1 = (UChar *) buf;
1048 0 : uchar2 = (UChar *) (buf + bufsize1);
1049 :
1050 0 : ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
1051 0 : ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
1052 :
1053 0 : result = ucol_strcoll(locale->icu.ucol,
1054 : uchar1, ulen1,
1055 : uchar2, ulen2);
1056 :
1057 0 : if (buf != sbuf)
1058 0 : pfree(buf);
1059 :
1060 0 : return result;
1061 : }
1062 :
1063 : /* 'srclen' of -1 means the strings are NUL-terminated */
1064 : static size_t
1065 0 : strnxfrm_prefix_icu(char *dest, size_t destsize,
1066 : const char *src, ssize_t srclen,
1067 : pg_locale_t locale)
1068 : {
1069 : char sbuf[TEXTBUFLEN];
1070 0 : char *buf = sbuf;
1071 : UCharIterator iter;
1072 : uint32_t state[2];
1073 : UErrorCode status;
1074 0 : int32_t ulen = -1;
1075 0 : UChar *uchar = NULL;
1076 : size_t uchar_bsize;
1077 : Size result_bsize;
1078 :
1079 : /* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */
1080 : Assert(GetDatabaseEncoding() != PG_UTF8);
1081 :
1082 0 : init_icu_converter();
1083 :
1084 0 : ulen = uchar_length(icu_converter, src, srclen);
1085 :
1086 0 : uchar_bsize = (ulen + 1) * sizeof(UChar);
1087 :
1088 0 : if (uchar_bsize > TEXTBUFLEN)
1089 0 : buf = palloc(uchar_bsize);
1090 :
1091 0 : uchar = (UChar *) buf;
1092 :
1093 0 : ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
1094 :
1095 0 : uiter_setString(&iter, uchar, ulen);
1096 0 : state[0] = state[1] = 0; /* won't need that again */
1097 0 : status = U_ZERO_ERROR;
1098 0 : result_bsize = ucol_nextSortKeyPart(locale->icu.ucol,
1099 : &iter,
1100 : state,
1101 : (uint8_t *) dest,
1102 : destsize,
1103 : &status);
1104 0 : if (U_FAILURE(status))
1105 0 : ereport(ERROR,
1106 : (errmsg("sort key generation failed: %s",
1107 : u_errorName(status))));
1108 :
1109 0 : return result_bsize;
1110 : }
1111 :
1112 : static void
1113 5760 : init_icu_converter(void)
1114 : {
1115 : const char *icu_encoding_name;
1116 : UErrorCode status;
1117 : UConverter *conv;
1118 :
1119 5760 : if (icu_converter)
1120 5754 : return; /* already done */
1121 :
1122 6 : icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
1123 6 : if (!icu_encoding_name)
1124 0 : ereport(ERROR,
1125 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1126 : errmsg("encoding \"%s\" not supported by ICU",
1127 : pg_encoding_to_char(GetDatabaseEncoding()))));
1128 :
1129 6 : status = U_ZERO_ERROR;
1130 6 : conv = ucnv_open(icu_encoding_name, &status);
1131 6 : if (U_FAILURE(status))
1132 0 : ereport(ERROR,
1133 : (errmsg("could not open ICU converter for encoding \"%s\": %s",
1134 : icu_encoding_name, u_errorName(status))));
1135 :
1136 6 : icu_converter = conv;
1137 : }
1138 :
1139 : /*
1140 : * Find length, in UChars, of given string if converted to UChar string.
1141 : *
1142 : * A length of -1 indicates that the input string is NUL-terminated.
1143 : */
1144 : static size_t
1145 5760 : uchar_length(UConverter *converter, const char *str, int32_t len)
1146 : {
1147 5760 : UErrorCode status = U_ZERO_ERROR;
1148 : int32_t ulen;
1149 :
1150 5760 : ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
1151 5760 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1152 0 : ereport(ERROR,
1153 : (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
1154 5760 : return ulen;
1155 : }
1156 :
1157 : /*
1158 : * Convert the given source string into a UChar string, stored in dest, and
1159 : * return the length (in UChars).
1160 : *
1161 : * A srclen of -1 indicates that the input string is NUL-terminated.
1162 : */
1163 : static int32_t
1164 5760 : uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
1165 : const char *src, int32_t srclen)
1166 : {
1167 5760 : UErrorCode status = U_ZERO_ERROR;
1168 : int32_t ulen;
1169 :
1170 5760 : status = U_ZERO_ERROR;
1171 5760 : ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
1172 5760 : if (U_FAILURE(status))
1173 0 : ereport(ERROR,
1174 : (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
1175 5760 : return ulen;
1176 : }
1177 :
1178 : /*
1179 : * Parse collation attributes from the given locale string and apply them to
1180 : * the open collator.
1181 : *
1182 : * First, the locale string is canonicalized to an ICU format locale ID such
1183 : * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
1184 : * the key-value arguments.
1185 : *
1186 : * Starting with ICU version 54, the attributes are processed automatically by
1187 : * ucol_open(), so this is only necessary for emulating this behavior on older
1188 : * versions.
1189 : */
1190 : pg_attribute_unused()
1191 : static void
1192 0 : icu_set_collation_attributes(UCollator *collator, const char *loc,
1193 : UErrorCode *status)
1194 : {
1195 : int32_t len;
1196 : char *icu_locale_id;
1197 : char *lower_str;
1198 : char *str;
1199 : char *token;
1200 :
1201 : /*
1202 : * The input locale may be a BCP 47 language tag, e.g.
1203 : * "und-u-kc-ks-level1", which expresses the same attributes in a
1204 : * different form. It will be converted to the equivalent ICU format
1205 : * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
1206 : * uloc_canonicalize().
1207 : */
1208 0 : *status = U_ZERO_ERROR;
1209 0 : len = uloc_canonicalize(loc, NULL, 0, status);
1210 0 : icu_locale_id = palloc(len + 1);
1211 0 : *status = U_ZERO_ERROR;
1212 0 : len = uloc_canonicalize(loc, icu_locale_id, len + 1, status);
1213 0 : if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
1214 0 : return;
1215 :
1216 0 : lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
1217 :
1218 0 : pfree(icu_locale_id);
1219 :
1220 0 : str = strchr(lower_str, '@');
1221 0 : if (!str)
1222 0 : return;
1223 0 : str++;
1224 :
1225 0 : while ((token = strsep(&str, ";")))
1226 : {
1227 0 : char *e = strchr(token, '=');
1228 :
1229 0 : if (e)
1230 : {
1231 : char *name;
1232 : char *value;
1233 : UColAttribute uattr;
1234 : UColAttributeValue uvalue;
1235 :
1236 0 : *status = U_ZERO_ERROR;
1237 :
1238 0 : *e = '\0';
1239 0 : name = token;
1240 0 : value = e + 1;
1241 :
1242 : /*
1243 : * See attribute name and value lists in ICU i18n/coll.cpp
1244 : */
1245 0 : if (strcmp(name, "colstrength") == 0)
1246 0 : uattr = UCOL_STRENGTH;
1247 0 : else if (strcmp(name, "colbackwards") == 0)
1248 0 : uattr = UCOL_FRENCH_COLLATION;
1249 0 : else if (strcmp(name, "colcaselevel") == 0)
1250 0 : uattr = UCOL_CASE_LEVEL;
1251 0 : else if (strcmp(name, "colcasefirst") == 0)
1252 0 : uattr = UCOL_CASE_FIRST;
1253 0 : else if (strcmp(name, "colalternate") == 0)
1254 0 : uattr = UCOL_ALTERNATE_HANDLING;
1255 0 : else if (strcmp(name, "colnormalization") == 0)
1256 0 : uattr = UCOL_NORMALIZATION_MODE;
1257 0 : else if (strcmp(name, "colnumeric") == 0)
1258 0 : uattr = UCOL_NUMERIC_COLLATION;
1259 : else
1260 : /* ignore if unknown */
1261 0 : continue;
1262 :
1263 0 : if (strcmp(value, "primary") == 0)
1264 0 : uvalue = UCOL_PRIMARY;
1265 0 : else if (strcmp(value, "secondary") == 0)
1266 0 : uvalue = UCOL_SECONDARY;
1267 0 : else if (strcmp(value, "tertiary") == 0)
1268 0 : uvalue = UCOL_TERTIARY;
1269 0 : else if (strcmp(value, "quaternary") == 0)
1270 0 : uvalue = UCOL_QUATERNARY;
1271 0 : else if (strcmp(value, "identical") == 0)
1272 0 : uvalue = UCOL_IDENTICAL;
1273 0 : else if (strcmp(value, "no") == 0)
1274 0 : uvalue = UCOL_OFF;
1275 0 : else if (strcmp(value, "yes") == 0)
1276 0 : uvalue = UCOL_ON;
1277 0 : else if (strcmp(value, "shifted") == 0)
1278 0 : uvalue = UCOL_SHIFTED;
1279 0 : else if (strcmp(value, "non-ignorable") == 0)
1280 0 : uvalue = UCOL_NON_IGNORABLE;
1281 0 : else if (strcmp(value, "lower") == 0)
1282 0 : uvalue = UCOL_LOWER_FIRST;
1283 0 : else if (strcmp(value, "upper") == 0)
1284 0 : uvalue = UCOL_UPPER_FIRST;
1285 : else
1286 : {
1287 0 : *status = U_ILLEGAL_ARGUMENT_ERROR;
1288 0 : break;
1289 : }
1290 :
1291 0 : ucol_setAttribute(collator, uattr, uvalue, status);
1292 : }
1293 : }
1294 :
1295 0 : pfree(lower_str);
1296 : }
1297 :
1298 : #endif /* USE_ICU */
|