Line data Source code
1 : /*-----------------------------------------------------------------------
2 : *
3 : * PostgreSQL locale utilities for ICU
4 : *
5 : * Portions Copyright (c) 2002-2026, PostgreSQL Global Development Group
6 : *
7 : * src/backend/utils/adt/pg_locale_icu.c
8 : *
9 : *-----------------------------------------------------------------------
10 : */
11 :
12 : #include "postgres.h"
13 :
14 : #ifdef USE_ICU
15 : #include <unicode/ucasemap.h>
16 : #include <unicode/ucnv.h>
17 : #include <unicode/ucol.h>
18 : #include <unicode/ustring.h>
19 :
20 : /*
21 : * ucol_strcollUTF8() was introduced in ICU 50, but it is buggy before ICU 53.
22 : * (see
23 : * <https://www.postgresql.org/message-id/flat/f1438ec6-22aa-4029-9a3b-26f79d330e72%40manitou-mail.org>)
24 : */
25 : #if U_ICU_VERSION_MAJOR_NUM >= 53
26 : #define HAVE_UCOL_STRCOLLUTF8 1
27 : #else
28 : #undef HAVE_UCOL_STRCOLLUTF8
29 : #endif
30 :
31 : #endif
32 :
33 : #include "access/htup_details.h"
34 : #include "catalog/pg_database.h"
35 : #include "catalog/pg_collation.h"
36 : #include "mb/pg_wchar.h"
37 : #include "miscadmin.h"
38 : #include "utils/builtins.h"
39 : #include "utils/formatting.h"
40 : #include "utils/memutils.h"
41 : #include "utils/pg_locale.h"
42 : #include "utils/syscache.h"
43 :
44 : /*
45 : * Size of stack buffer to use for string transformations, used to avoid heap
46 : * allocations in typical cases. This should be large enough that most strings
47 : * will fit, but small enough that we feel comfortable putting it on the
48 : * stack.
49 : */
50 : #define TEXTBUFLEN 1024
51 :
52 : extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
53 :
54 : #ifdef USE_ICU
55 :
56 : extern UCollator *pg_ucol_open(const char *loc_str);
57 : static UCaseMap *pg_ucasemap_open(const char *loc_str);
58 :
59 : static size_t strlower_icu(char *dest, size_t destsize, const char *src,
60 : ssize_t srclen, pg_locale_t locale);
61 : static size_t strtitle_icu(char *dest, size_t destsize, const char *src,
62 : ssize_t srclen, pg_locale_t locale);
63 : static size_t strupper_icu(char *dest, size_t destsize, const char *src,
64 : ssize_t srclen, pg_locale_t locale);
65 : static size_t strfold_icu(char *dest, size_t destsize, const char *src,
66 : ssize_t srclen, pg_locale_t locale);
67 : static size_t strlower_icu_utf8(char *dest, size_t destsize, const char *src,
68 : ssize_t srclen, pg_locale_t locale);
69 : static size_t strtitle_icu_utf8(char *dest, size_t destsize, const char *src,
70 : ssize_t srclen, pg_locale_t locale);
71 : static size_t strupper_icu_utf8(char *dest, size_t destsize, const char *src,
72 : ssize_t srclen, pg_locale_t locale);
73 : static size_t strfold_icu_utf8(char *dest, size_t destsize, const char *src,
74 : ssize_t srclen, pg_locale_t locale);
75 : static size_t downcase_ident_icu(char *dst, size_t dstsize, const char *src,
76 : ssize_t srclen, pg_locale_t locale);
77 : static int strncoll_icu(const char *arg1, ssize_t len1,
78 : const char *arg2, ssize_t len2,
79 : pg_locale_t locale);
80 : static size_t strnxfrm_icu(char *dest, size_t destsize,
81 : const char *src, ssize_t srclen,
82 : pg_locale_t locale);
83 : extern char *get_collation_actual_version_icu(const char *collcollate);
84 :
85 : typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
86 : const UChar *src, int32_t srcLength,
87 : const char *locale,
88 : UErrorCode *pErrorCode);
89 :
90 : /*
91 : * Converter object for converting between ICU's UChar strings and C strings
92 : * in database encoding. Since the database encoding doesn't change, we only
93 : * need one of these per session.
94 : */
95 : static UConverter *icu_converter = NULL;
96 :
97 : static UCollator *make_icu_collator(const char *iculocstr,
98 : const char *icurules);
99 : static int strncoll_icu(const char *arg1, ssize_t len1,
100 : const char *arg2, ssize_t len2,
101 : pg_locale_t locale);
102 : static size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
103 : const char *src, ssize_t srclen,
104 : pg_locale_t locale);
105 : #ifdef HAVE_UCOL_STRCOLLUTF8
106 : static int strncoll_icu_utf8(const char *arg1, ssize_t len1,
107 : const char *arg2, ssize_t len2,
108 : pg_locale_t locale);
109 : #endif
110 : static size_t strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
111 : const char *src, ssize_t srclen,
112 : pg_locale_t locale);
113 : static void init_icu_converter(void);
114 : static size_t uchar_length(UConverter *converter,
115 : const char *str, int32_t len);
116 : static int32_t uchar_convert(UConverter *converter,
117 : UChar *dest, int32_t destlen,
118 : const char *src, int32_t srclen);
119 : static int32_t icu_to_uchar(UChar **buff_uchar, const char *buff,
120 : size_t nbytes);
121 : static size_t icu_from_uchar(char *dest, size_t destsize,
122 : const UChar *buff_uchar, int32_t len_uchar);
123 : static void icu_set_collation_attributes(UCollator *collator, const char *loc,
124 : UErrorCode *status);
125 : static int32_t icu_convert_case(ICU_Convert_Func func, char *dest,
126 : size_t destsize, const char *src,
127 : ssize_t srclen, pg_locale_t locale);
128 : static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
129 : const UChar *src, int32_t srcLength,
130 : const char *locale,
131 : UErrorCode *pErrorCode);
132 : static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
133 : const UChar *src, int32_t srcLength,
134 : const char *locale,
135 : UErrorCode *pErrorCode);
136 : static int32_t foldcase_options(const char *locale);
137 :
138 : /*
139 : * XXX: many of the functions below rely on casts directly from pg_wchar to
140 : * UChar32, which is correct for UTF-8 and LATIN1, but not in general.
141 : */
142 :
143 : static pg_wchar
144 54 : toupper_icu(pg_wchar wc, pg_locale_t locale)
145 : {
146 54 : return u_toupper(wc);
147 : }
148 :
149 : static pg_wchar
150 54 : tolower_icu(pg_wchar wc, pg_locale_t locale)
151 : {
152 54 : return u_tolower(wc);
153 : }
154 :
155 : static const struct collate_methods collate_methods_icu = {
156 : .strncoll = strncoll_icu,
157 : .strnxfrm = strnxfrm_icu,
158 : .strnxfrm_prefix = strnxfrm_prefix_icu,
159 : .strxfrm_is_safe = true,
160 : };
161 :
162 : static const struct collate_methods collate_methods_icu_utf8 = {
163 : #ifdef HAVE_UCOL_STRCOLLUTF8
164 : .strncoll = strncoll_icu_utf8,
165 : #else
166 : .strncoll = strncoll_icu,
167 : #endif
168 : .strnxfrm = strnxfrm_icu,
169 : .strnxfrm_prefix = strnxfrm_prefix_icu_utf8,
170 : .strxfrm_is_safe = true,
171 : };
172 :
173 : static bool
174 6144 : wc_isdigit_icu(pg_wchar wc, pg_locale_t locale)
175 : {
176 6144 : return u_isdigit(wc);
177 : }
178 :
179 : static bool
180 6144 : wc_isalpha_icu(pg_wchar wc, pg_locale_t locale)
181 : {
182 6144 : return u_isalpha(wc);
183 : }
184 :
185 : static bool
186 6144 : wc_isalnum_icu(pg_wchar wc, pg_locale_t locale)
187 : {
188 6144 : return u_isalnum(wc);
189 : }
190 :
191 : static bool
192 6144 : wc_isupper_icu(pg_wchar wc, pg_locale_t locale)
193 : {
194 6144 : return u_isupper(wc);
195 : }
196 :
197 : static bool
198 6144 : wc_islower_icu(pg_wchar wc, pg_locale_t locale)
199 : {
200 6144 : return u_islower(wc);
201 : }
202 :
203 : static bool
204 6144 : wc_isgraph_icu(pg_wchar wc, pg_locale_t locale)
205 : {
206 6144 : return u_isgraph(wc);
207 : }
208 :
209 : static bool
210 6144 : wc_isprint_icu(pg_wchar wc, pg_locale_t locale)
211 : {
212 6144 : return u_isprint(wc);
213 : }
214 :
215 : static bool
216 6144 : wc_ispunct_icu(pg_wchar wc, pg_locale_t locale)
217 : {
218 6144 : return u_ispunct(wc);
219 : }
220 :
221 : static bool
222 6144 : wc_isspace_icu(pg_wchar wc, pg_locale_t locale)
223 : {
224 6144 : return u_isspace(wc);
225 : }
226 :
227 : static bool
228 0 : wc_isxdigit_icu(pg_wchar wc, pg_locale_t locale)
229 : {
230 0 : return u_isxdigit(wc);
231 : }
232 :
233 : static bool
234 63 : wc_iscased_icu(pg_wchar wc, pg_locale_t locale)
235 : {
236 63 : return u_hasBinaryProperty(wc, UCHAR_CASED);
237 : }
238 :
239 : static const struct ctype_methods ctype_methods_icu = {
240 : .strlower = strlower_icu,
241 : .strtitle = strtitle_icu,
242 : .strupper = strupper_icu,
243 : .strfold = strfold_icu,
244 : .downcase_ident = downcase_ident_icu,
245 : .wc_isdigit = wc_isdigit_icu,
246 : .wc_isalpha = wc_isalpha_icu,
247 : .wc_isalnum = wc_isalnum_icu,
248 : .wc_isupper = wc_isupper_icu,
249 : .wc_islower = wc_islower_icu,
250 : .wc_isgraph = wc_isgraph_icu,
251 : .wc_isprint = wc_isprint_icu,
252 : .wc_ispunct = wc_ispunct_icu,
253 : .wc_isspace = wc_isspace_icu,
254 : .wc_isxdigit = wc_isxdigit_icu,
255 : .wc_iscased = wc_iscased_icu,
256 : .wc_toupper = toupper_icu,
257 : .wc_tolower = tolower_icu,
258 : };
259 :
260 : static const struct ctype_methods ctype_methods_icu_utf8 = {
261 : .strlower = strlower_icu_utf8,
262 : .strtitle = strtitle_icu_utf8,
263 : .strupper = strupper_icu_utf8,
264 : .strfold = strfold_icu_utf8,
265 : /* uses plain ASCII semantics for historical reasons */
266 : .downcase_ident = NULL,
267 : .wc_isdigit = wc_isdigit_icu,
268 : .wc_isalpha = wc_isalpha_icu,
269 : .wc_isalnum = wc_isalnum_icu,
270 : .wc_isupper = wc_isupper_icu,
271 : .wc_islower = wc_islower_icu,
272 : .wc_isgraph = wc_isgraph_icu,
273 : .wc_isprint = wc_isprint_icu,
274 : .wc_ispunct = wc_ispunct_icu,
275 : .wc_isspace = wc_isspace_icu,
276 : .wc_isxdigit = wc_isxdigit_icu,
277 : .wc_iscased = wc_iscased_icu,
278 : .wc_toupper = toupper_icu,
279 : .wc_tolower = tolower_icu,
280 : };
281 :
282 : /*
283 : * ICU still depends on libc for compatibility with certain historical
284 : * behavior for single-byte encodings. See downcase_ident_icu().
285 : *
286 : * XXX: consider fixing by decoding the single byte into a code point, and
287 : * using u_tolower().
288 : */
289 : static locale_t
290 0 : make_libc_ctype_locale(const char *ctype)
291 : {
292 : locale_t loc;
293 :
294 : #ifndef WIN32
295 0 : loc = newlocale(LC_CTYPE_MASK, ctype, NULL);
296 : #else
297 : loc = _create_locale(LC_ALL, ctype);
298 : #endif
299 0 : if (!loc)
300 0 : report_newlocale_failure(ctype);
301 :
302 0 : return loc;
303 : }
304 : #endif
305 :
306 : pg_locale_t
307 105 : create_pg_locale_icu(Oid collid, MemoryContext context)
308 : {
309 : #ifdef USE_ICU
310 : bool deterministic;
311 : const char *iculocstr;
312 105 : const char *icurules = NULL;
313 : UCollator *collator;
314 105 : locale_t loc = (locale_t) 0;
315 : pg_locale_t result;
316 :
317 105 : if (collid == DEFAULT_COLLATION_OID)
318 : {
319 : HeapTuple tp;
320 : Datum datum;
321 : bool isnull;
322 :
323 13 : tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
324 13 : if (!HeapTupleIsValid(tp))
325 0 : elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
326 :
327 : /* default database collation is always deterministic */
328 13 : deterministic = true;
329 13 : datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
330 : Anum_pg_database_datlocale);
331 13 : iculocstr = TextDatumGetCString(datum);
332 13 : datum = SysCacheGetAttr(DATABASEOID, tp,
333 : Anum_pg_database_daticurules, &isnull);
334 13 : if (!isnull)
335 0 : icurules = TextDatumGetCString(datum);
336 :
337 : /* libc only needed for default locale and single-byte encoding */
338 13 : if (pg_database_encoding_max_length() == 1)
339 : {
340 : const char *ctype;
341 :
342 0 : datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
343 : Anum_pg_database_datctype);
344 0 : ctype = TextDatumGetCString(datum);
345 :
346 0 : loc = make_libc_ctype_locale(ctype);
347 : }
348 :
349 13 : ReleaseSysCache(tp);
350 : }
351 : else
352 : {
353 : Form_pg_collation collform;
354 : HeapTuple tp;
355 : Datum datum;
356 : bool isnull;
357 :
358 92 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
359 92 : if (!HeapTupleIsValid(tp))
360 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
361 92 : collform = (Form_pg_collation) GETSTRUCT(tp);
362 92 : deterministic = collform->collisdeterministic;
363 92 : datum = SysCacheGetAttrNotNull(COLLOID, tp,
364 : Anum_pg_collation_colllocale);
365 92 : iculocstr = TextDatumGetCString(datum);
366 92 : datum = SysCacheGetAttr(COLLOID, tp,
367 : Anum_pg_collation_collicurules, &isnull);
368 92 : if (!isnull)
369 6 : icurules = TextDatumGetCString(datum);
370 :
371 92 : ReleaseSysCache(tp);
372 : }
373 :
374 105 : collator = make_icu_collator(iculocstr, icurules);
375 :
376 100 : result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
377 100 : result->icu.locale = MemoryContextStrdup(context, iculocstr);
378 100 : result->icu.ucol = collator;
379 100 : result->icu.lt = loc;
380 100 : result->deterministic = deterministic;
381 100 : result->collate_is_c = false;
382 100 : result->ctype_is_c = false;
383 100 : if (GetDatabaseEncoding() == PG_UTF8)
384 : {
385 100 : result->icu.ucasemap = pg_ucasemap_open(iculocstr);
386 100 : result->collate = &collate_methods_icu_utf8;
387 100 : result->ctype = &ctype_methods_icu_utf8;
388 : }
389 : else
390 : {
391 0 : result->collate = &collate_methods_icu;
392 0 : result->ctype = &ctype_methods_icu;
393 : }
394 :
395 100 : return result;
396 : #else
397 : /* could get here if a collation was created by a build with ICU */
398 : ereport(ERROR,
399 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
400 : errmsg("ICU is not supported in this build")));
401 :
402 : return NULL;
403 : #endif
404 : }
405 :
406 : #ifdef USE_ICU
407 :
408 : /*
409 : * Check locale string and fix it if necessary. Returns a new palloc'd string.
410 : *
411 : * In ICU versions 54 and earlier, "und" is not a recognized spelling of the
412 : * root locale. If the first component of the locale is "und", replace with
413 : * "root" before opening.
414 : */
415 : static char *
416 43140 : fix_icu_locale_str(const char *loc_str)
417 : {
418 : /*
419 : * Must never open default collator, because it depends on the environment
420 : * and may change at any time. Should not happen, but check here to catch
421 : * bugs that might be hard to catch otherwise.
422 : *
423 : * NB: the default collator is not the same as the collator for the root
424 : * locale. The root locale may be specified as the empty string, "und", or
425 : * "root". The default collator is opened by passing NULL to ucol_open().
426 : */
427 43140 : if (loc_str == NULL)
428 0 : elog(ERROR, "opening default collator is not supported");
429 :
430 : if (U_ICU_VERSION_MAJOR_NUM < 55)
431 : {
432 : char lang[ULOC_LANG_CAPACITY];
433 : UErrorCode status = U_ZERO_ERROR;
434 :
435 : uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
436 : if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
437 : {
438 : ereport(ERROR,
439 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
440 : errmsg("could not get language from locale \"%s\": %s",
441 : loc_str, u_errorName(status))));
442 : }
443 :
444 : if (strcmp(lang, "und") == 0)
445 : {
446 : const char *remainder = loc_str + strlen("und");
447 : char *fixed_str;
448 :
449 : fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
450 : strcpy(fixed_str, "root");
451 : strcat(fixed_str, remainder);
452 :
453 : return fixed_str;
454 : }
455 : }
456 :
457 43140 : return pstrdup(loc_str);
458 : }
459 :
460 : /*
461 : * Wrapper around ucol_open() to handle API differences for older ICU
462 : * versions.
463 : *
464 : * Ensure that no path leaks a UCollator.
465 : */
466 : UCollator *
467 43040 : pg_ucol_open(const char *loc_str)
468 : {
469 : UCollator *collator;
470 : UErrorCode status;
471 : char *fixed_str;
472 :
473 43040 : fixed_str = fix_icu_locale_str(loc_str);
474 :
475 43040 : status = U_ZERO_ERROR;
476 43040 : collator = ucol_open(fixed_str, &status);
477 43040 : if (U_FAILURE(status))
478 6 : ereport(ERROR,
479 : /* use original string for error report */
480 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
481 : errmsg("could not open collator for locale \"%s\": %s",
482 : loc_str, u_errorName(status))));
483 :
484 : if (U_ICU_VERSION_MAJOR_NUM < 54)
485 : {
486 : status = U_ZERO_ERROR;
487 : icu_set_collation_attributes(collator, fixed_str, &status);
488 :
489 : /*
490 : * Pretend the error came from ucol_open(), for consistent error
491 : * message across ICU versions.
492 : */
493 : if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
494 : {
495 : ucol_close(collator);
496 : ereport(ERROR,
497 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
498 : errmsg("could not open collator for locale \"%s\": %s",
499 : loc_str, u_errorName(status))));
500 : }
501 : }
502 :
503 43034 : pfree(fixed_str);
504 :
505 43034 : return collator;
506 : }
507 :
508 : /*
509 : * Wrapper around ucasemap_open() to handle API differences for older ICU
510 : * versions.
511 : *
512 : * Additionally makes sure we get the right options for case folding.
513 : */
514 : static UCaseMap *
515 100 : pg_ucasemap_open(const char *loc_str)
516 : {
517 100 : UErrorCode status = U_ZERO_ERROR;
518 : UCaseMap *casemap;
519 : char *fixed_str;
520 :
521 100 : fixed_str = fix_icu_locale_str(loc_str);
522 :
523 100 : casemap = ucasemap_open(fixed_str, foldcase_options(fixed_str), &status);
524 100 : if (U_FAILURE(status))
525 : /* use original string for error report */
526 0 : ereport(ERROR,
527 : errcode(ERRCODE_INVALID_PARAMETER_VALUE),
528 : errmsg("could not open casemap for locale \"%s\": %s",
529 : loc_str, u_errorName(status)));
530 :
531 100 : pfree(fixed_str);
532 :
533 100 : return casemap;
534 : }
535 :
536 : /*
537 : * Create a UCollator with the given locale string and rules.
538 : *
539 : * Ensure that no path leaks a UCollator.
540 : */
541 : static UCollator *
542 105 : make_icu_collator(const char *iculocstr, const char *icurules)
543 : {
544 105 : if (!icurules)
545 : {
546 : /* simple case without rules */
547 99 : return pg_ucol_open(iculocstr);
548 : }
549 : else
550 : {
551 : UCollator *collator_std_rules;
552 : UCollator *collator_all_rules;
553 : const UChar *std_rules;
554 : UChar *my_rules;
555 : UChar *all_rules;
556 : int32_t length;
557 : int32_t total;
558 : UErrorCode status;
559 :
560 : /*
561 : * If rules are specified, we extract the rules of the standard
562 : * collation, add our own rules, and make a new collator with the
563 : * combined rules.
564 : */
565 6 : icu_to_uchar(&my_rules, icurules, strlen(icurules));
566 :
567 6 : collator_std_rules = pg_ucol_open(iculocstr);
568 :
569 6 : std_rules = ucol_getRules(collator_std_rules, &length);
570 :
571 6 : total = u_strlen(std_rules) + u_strlen(my_rules) + 1;
572 :
573 : /* avoid leaking collator on OOM */
574 6 : all_rules = palloc_extended(sizeof(UChar) * total, MCXT_ALLOC_NO_OOM);
575 6 : if (!all_rules)
576 : {
577 0 : ucol_close(collator_std_rules);
578 0 : ereport(ERROR,
579 : (errcode(ERRCODE_OUT_OF_MEMORY),
580 : errmsg("out of memory")));
581 : }
582 :
583 6 : u_strcpy(all_rules, std_rules);
584 6 : u_strcat(all_rules, my_rules);
585 :
586 6 : ucol_close(collator_std_rules);
587 :
588 6 : status = U_ZERO_ERROR;
589 6 : collator_all_rules = ucol_openRules(all_rules, u_strlen(all_rules),
590 : UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,
591 : NULL, &status);
592 6 : if (U_FAILURE(status))
593 : {
594 3 : ereport(ERROR,
595 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
596 : errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
597 : iculocstr, icurules, u_errorName(status))));
598 : }
599 :
600 3 : pfree(my_rules);
601 3 : pfree(all_rules);
602 3 : return collator_all_rules;
603 : }
604 : }
605 :
606 : static size_t
607 0 : strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
608 : pg_locale_t locale)
609 : {
610 0 : return icu_convert_case(u_strToLower, dest, destsize, src, srclen, locale);
611 : }
612 :
613 : static size_t
614 0 : strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
615 : pg_locale_t locale)
616 : {
617 0 : return icu_convert_case(u_strToTitle_default_BI, dest, destsize, src, srclen, locale);
618 : }
619 :
620 : static size_t
621 0 : strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
622 : pg_locale_t locale)
623 : {
624 0 : return icu_convert_case(u_strToUpper, dest, destsize, src, srclen, locale);
625 : }
626 :
627 : static size_t
628 0 : strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
629 : pg_locale_t locale)
630 : {
631 0 : return icu_convert_case(u_strFoldCase_default, dest, destsize, src, srclen, locale);
632 : }
633 :
634 : static size_t
635 264 : strlower_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
636 : pg_locale_t locale)
637 : {
638 264 : UErrorCode status = U_ZERO_ERROR;
639 : int32_t needed;
640 :
641 264 : needed = ucasemap_utf8ToLower(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
642 264 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
643 0 : ereport(ERROR,
644 : errmsg("case conversion failed: %s", u_errorName(status)));
645 264 : return needed;
646 : }
647 :
648 : static size_t
649 15 : strtitle_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
650 : pg_locale_t locale)
651 : {
652 15 : UErrorCode status = U_ZERO_ERROR;
653 : int32_t needed;
654 :
655 15 : needed = ucasemap_utf8ToTitle(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
656 15 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
657 0 : ereport(ERROR,
658 : errmsg("case conversion failed: %s", u_errorName(status)));
659 15 : return needed;
660 : }
661 :
662 : static size_t
663 27 : strupper_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
664 : pg_locale_t locale)
665 : {
666 27 : UErrorCode status = U_ZERO_ERROR;
667 : int32_t needed;
668 :
669 27 : needed = ucasemap_utf8ToUpper(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
670 27 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
671 0 : ereport(ERROR,
672 : errmsg("case conversion failed: %s", u_errorName(status)));
673 27 : return needed;
674 : }
675 :
676 : static size_t
677 6 : strfold_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
678 : pg_locale_t locale)
679 : {
680 6 : UErrorCode status = U_ZERO_ERROR;
681 : int32_t needed;
682 :
683 6 : needed = ucasemap_utf8FoldCase(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
684 6 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
685 0 : ereport(ERROR,
686 : errmsg("case conversion failed: %s", u_errorName(status)));
687 6 : return needed;
688 : }
689 :
690 : /*
691 : * For historical compatibility, behavior is not multibyte-aware.
692 : *
693 : * NB: uses libc tolower() for single-byte encodings (also for historical
694 : * compatibility), and therefore relies on the global LC_CTYPE setting.
695 : */
696 : static size_t
697 0 : downcase_ident_icu(char *dst, size_t dstsize, const char *src,
698 : ssize_t srclen, pg_locale_t locale)
699 : {
700 : int i;
701 : bool libc_lower;
702 0 : locale_t lt = locale->icu.lt;
703 :
704 0 : libc_lower = lt && (pg_database_encoding_max_length() == 1);
705 :
706 0 : for (i = 0; i < srclen && i < dstsize; i++)
707 : {
708 0 : unsigned char ch = (unsigned char) src[i];
709 :
710 0 : if (ch >= 'A' && ch <= 'Z')
711 0 : ch = pg_ascii_tolower(ch);
712 0 : else if (libc_lower && IS_HIGHBIT_SET(ch) && isupper_l(ch, lt))
713 0 : ch = tolower_l(ch, lt);
714 0 : dst[i] = (char) ch;
715 : }
716 :
717 0 : if (i < dstsize)
718 0 : dst[i] = '\0';
719 :
720 0 : return srclen;
721 : }
722 :
723 : /*
724 : * strncoll_icu_utf8
725 : *
726 : * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
727 : * database encoding. An argument length of -1 means the string is
728 : * NUL-terminated.
729 : */
730 : #ifdef HAVE_UCOL_STRCOLLUTF8
731 : int
732 12028 : strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
733 : pg_locale_t locale)
734 : {
735 : int result;
736 : UErrorCode status;
737 :
738 : Assert(GetDatabaseEncoding() == PG_UTF8);
739 :
740 12028 : status = U_ZERO_ERROR;
741 12028 : result = ucol_strcollUTF8(locale->icu.ucol,
742 : arg1, len1,
743 : arg2, len2,
744 : &status);
745 12028 : if (U_FAILURE(status))
746 0 : ereport(ERROR,
747 : (errmsg("collation failed: %s", u_errorName(status))));
748 :
749 12028 : return result;
750 : }
751 : #endif
752 :
753 : /* 'srclen' of -1 means the strings are NUL-terminated */
754 : size_t
755 2874 : strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
756 : pg_locale_t locale)
757 : {
758 : char sbuf[TEXTBUFLEN];
759 2874 : char *buf = sbuf;
760 : UChar *uchar;
761 : int32_t ulen;
762 : size_t uchar_bsize;
763 : Size result_bsize;
764 :
765 2874 : init_icu_converter();
766 :
767 2874 : ulen = uchar_length(icu_converter, src, srclen);
768 :
769 2874 : uchar_bsize = (ulen + 1) * sizeof(UChar);
770 :
771 2874 : if (uchar_bsize > TEXTBUFLEN)
772 0 : buf = palloc(uchar_bsize);
773 :
774 2874 : uchar = (UChar *) buf;
775 :
776 2874 : ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
777 :
778 2874 : result_bsize = ucol_getSortKey(locale->icu.ucol,
779 : uchar, ulen,
780 : (uint8_t *) dest, destsize);
781 :
782 : /*
783 : * ucol_getSortKey() counts the nul-terminator in the result length, but
784 : * this function should not.
785 : */
786 : Assert(result_bsize > 0);
787 2874 : result_bsize--;
788 :
789 2874 : if (buf != sbuf)
790 0 : pfree(buf);
791 :
792 : /* if dest is defined, it should be nul-terminated */
793 : Assert(result_bsize >= destsize || dest[result_bsize] == '\0');
794 :
795 2874 : return result_bsize;
796 : }
797 :
798 : /* 'srclen' of -1 means the strings are NUL-terminated */
799 : size_t
800 834 : strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
801 : const char *src, ssize_t srclen,
802 : pg_locale_t locale)
803 : {
804 : size_t result;
805 : UCharIterator iter;
806 : uint32_t state[2];
807 : UErrorCode status;
808 :
809 : Assert(GetDatabaseEncoding() == PG_UTF8);
810 :
811 834 : uiter_setUTF8(&iter, src, srclen);
812 834 : state[0] = state[1] = 0; /* won't need that again */
813 834 : status = U_ZERO_ERROR;
814 834 : result = ucol_nextSortKeyPart(locale->icu.ucol,
815 : &iter,
816 : state,
817 : (uint8_t *) dest,
818 : destsize,
819 : &status);
820 834 : if (U_FAILURE(status))
821 0 : ereport(ERROR,
822 : (errmsg("sort key generation failed: %s",
823 : u_errorName(status))));
824 :
825 834 : return result;
826 : }
827 :
828 : char *
829 42862 : get_collation_actual_version_icu(const char *collcollate)
830 : {
831 : UCollator *collator;
832 : UVersionInfo versioninfo;
833 : char buf[U_MAX_VERSION_STRING_LENGTH];
834 :
835 42862 : collator = pg_ucol_open(collcollate);
836 :
837 42862 : ucol_getVersion(collator, versioninfo);
838 42862 : ucol_close(collator);
839 :
840 42862 : u_versionToString(versioninfo, buf);
841 42862 : return pstrdup(buf);
842 : }
843 :
844 : /*
845 : * Convert a string in the database encoding into a string of UChars.
846 : *
847 : * The source string at buff is of length nbytes
848 : * (it needn't be nul-terminated)
849 : *
850 : * *buff_uchar receives a pointer to the palloc'd result string, and
851 : * the function's result is the number of UChars generated.
852 : *
853 : * The result string is nul-terminated, though most callers rely on the
854 : * result length instead.
855 : */
856 : static int32_t
857 6 : icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
858 : {
859 : int32_t len_uchar;
860 :
861 6 : init_icu_converter();
862 :
863 6 : len_uchar = uchar_length(icu_converter, buff, nbytes);
864 :
865 6 : *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
866 6 : len_uchar = uchar_convert(icu_converter,
867 : *buff_uchar, len_uchar + 1, buff, nbytes);
868 :
869 6 : return len_uchar;
870 : }
871 :
872 : /*
873 : * Convert a string of UChars into the database encoding.
874 : *
875 : * The source string at buff_uchar is of length len_uchar
876 : * (it needn't be nul-terminated)
877 : *
878 : * *result receives a pointer to the palloc'd result string, and the
879 : * function's result is the number of bytes generated (not counting nul).
880 : *
881 : * The result string is nul-terminated.
882 : */
883 : static size_t
884 0 : icu_from_uchar(char *dest, size_t destsize, const UChar *buff_uchar, int32_t len_uchar)
885 : {
886 : UErrorCode status;
887 : int32_t len_result;
888 :
889 0 : init_icu_converter();
890 :
891 0 : status = U_ZERO_ERROR;
892 0 : len_result = ucnv_fromUChars(icu_converter, NULL, 0,
893 : buff_uchar, len_uchar, &status);
894 0 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
895 0 : ereport(ERROR,
896 : (errmsg("%s failed: %s", "ucnv_fromUChars",
897 : u_errorName(status))));
898 :
899 0 : if (len_result + 1 > destsize)
900 0 : return len_result;
901 :
902 0 : status = U_ZERO_ERROR;
903 0 : len_result = ucnv_fromUChars(icu_converter, dest, len_result + 1,
904 : buff_uchar, len_uchar, &status);
905 0 : if (U_FAILURE(status) ||
906 0 : status == U_STRING_NOT_TERMINATED_WARNING)
907 0 : ereport(ERROR,
908 : (errmsg("%s failed: %s", "ucnv_fromUChars",
909 : u_errorName(status))));
910 :
911 0 : return len_result;
912 : }
913 :
914 : static int32_t
915 0 : convert_case_uchar(ICU_Convert_Func func, pg_locale_t mylocale,
916 : UChar **buff_dest, UChar *buff_source, int32_t len_source)
917 : {
918 : UErrorCode status;
919 : int32_t len_dest;
920 :
921 0 : len_dest = len_source; /* try first with same length */
922 0 : *buff_dest = palloc(len_dest * sizeof(**buff_dest));
923 0 : status = U_ZERO_ERROR;
924 0 : len_dest = func(*buff_dest, len_dest, buff_source, len_source,
925 : mylocale->icu.locale, &status);
926 0 : if (status == U_BUFFER_OVERFLOW_ERROR)
927 : {
928 : /* try again with adjusted length */
929 0 : pfree(*buff_dest);
930 0 : *buff_dest = palloc(len_dest * sizeof(**buff_dest));
931 0 : status = U_ZERO_ERROR;
932 0 : len_dest = func(*buff_dest, len_dest, buff_source, len_source,
933 : mylocale->icu.locale, &status);
934 : }
935 0 : if (U_FAILURE(status))
936 0 : ereport(ERROR,
937 : (errmsg("case conversion failed: %s", u_errorName(status))));
938 0 : return len_dest;
939 : }
940 :
941 : static int32_t
942 0 : icu_convert_case(ICU_Convert_Func func, char *dest, size_t destsize,
943 : const char *src, ssize_t srclen, pg_locale_t locale)
944 : {
945 : int32_t len_uchar;
946 : int32_t len_conv;
947 : UChar *buff_uchar;
948 : UChar *buff_conv;
949 : size_t result_len;
950 :
951 0 : len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
952 0 : len_conv = convert_case_uchar(func, locale, &buff_conv,
953 : buff_uchar, len_uchar);
954 0 : result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
955 0 : pfree(buff_uchar);
956 0 : pfree(buff_conv);
957 :
958 0 : return result_len;
959 : }
960 :
961 : static int32_t
962 0 : u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
963 : const UChar *src, int32_t srcLength,
964 : const char *locale,
965 : UErrorCode *pErrorCode)
966 : {
967 0 : return u_strToTitle(dest, destCapacity, src, srcLength,
968 : NULL, locale, pErrorCode);
969 : }
970 :
971 : static int32_t
972 0 : u_strFoldCase_default(UChar *dest, int32_t destCapacity,
973 : const UChar *src, int32_t srcLength,
974 : const char *locale,
975 : UErrorCode *pErrorCode)
976 : {
977 0 : return u_strFoldCase(dest, destCapacity, src, srcLength,
978 0 : foldcase_options(locale), pErrorCode);
979 : }
980 :
981 : /*
982 : * Return the correct u_strFoldCase() options for the given locale.
983 : *
984 : * Unlike the ICU APIs for lowercasing, titlecasing, and uppercasing, case
985 : * folding does not accept a locale. Instead it just supports a single option
986 : * relevant to Turkic languages 'az' and 'tr'; check for those languages.
987 : */
988 : static int32_t
989 100 : foldcase_options(const char *locale)
990 : {
991 100 : uint32 options = U_FOLD_CASE_DEFAULT;
992 : char lang[3];
993 100 : UErrorCode status = U_ZERO_ERROR;
994 :
995 100 : uloc_getLanguage(locale, lang, 3, &status);
996 100 : if (U_SUCCESS(status))
997 : {
998 : /*
999 : * The option name is confusing, but it causes u_strFoldCase to use
1000 : * the 'T' mappings, which are ignored for U_FOLD_CASE_DEFAULT.
1001 : */
1002 97 : if (strcmp(lang, "tr") == 0 || strcmp(lang, "az") == 0)
1003 3 : options = U_FOLD_CASE_EXCLUDE_SPECIAL_I;
1004 : }
1005 :
1006 100 : return options;
1007 : }
1008 :
1009 : /*
1010 : * strncoll_icu
1011 : *
1012 : * Convert the arguments from the database encoding to UChar strings, then
1013 : * call ucol_strcoll(). An argument length of -1 means that the string is
1014 : * NUL-terminated.
1015 : *
1016 : * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
1017 : * caller should call that instead.
1018 : */
1019 : static int
1020 0 : strncoll_icu(const char *arg1, ssize_t len1,
1021 : const char *arg2, ssize_t len2, pg_locale_t locale)
1022 : {
1023 : char sbuf[TEXTBUFLEN];
1024 0 : char *buf = sbuf;
1025 : int32_t ulen1;
1026 : int32_t ulen2;
1027 : size_t bufsize1;
1028 : size_t bufsize2;
1029 : UChar *uchar1,
1030 : *uchar2;
1031 : int result;
1032 :
1033 : /* if encoding is UTF8, use more efficient strncoll_icu_utf8 */
1034 : #ifdef HAVE_UCOL_STRCOLLUTF8
1035 : Assert(GetDatabaseEncoding() != PG_UTF8);
1036 : #endif
1037 :
1038 0 : init_icu_converter();
1039 :
1040 0 : ulen1 = uchar_length(icu_converter, arg1, len1);
1041 0 : ulen2 = uchar_length(icu_converter, arg2, len2);
1042 :
1043 0 : bufsize1 = (ulen1 + 1) * sizeof(UChar);
1044 0 : bufsize2 = (ulen2 + 1) * sizeof(UChar);
1045 :
1046 0 : if (bufsize1 + bufsize2 > TEXTBUFLEN)
1047 0 : buf = palloc(bufsize1 + bufsize2);
1048 :
1049 0 : uchar1 = (UChar *) buf;
1050 0 : uchar2 = (UChar *) (buf + bufsize1);
1051 :
1052 0 : ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
1053 0 : ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
1054 :
1055 0 : result = ucol_strcoll(locale->icu.ucol,
1056 : uchar1, ulen1,
1057 : uchar2, ulen2);
1058 :
1059 0 : if (buf != sbuf)
1060 0 : pfree(buf);
1061 :
1062 0 : return result;
1063 : }
1064 :
1065 : /* 'srclen' of -1 means the strings are NUL-terminated */
1066 : static size_t
1067 0 : strnxfrm_prefix_icu(char *dest, size_t destsize,
1068 : const char *src, ssize_t srclen,
1069 : pg_locale_t locale)
1070 : {
1071 : char sbuf[TEXTBUFLEN];
1072 0 : char *buf = sbuf;
1073 : UCharIterator iter;
1074 : uint32_t state[2];
1075 : UErrorCode status;
1076 0 : int32_t ulen = -1;
1077 0 : UChar *uchar = NULL;
1078 : size_t uchar_bsize;
1079 : Size result_bsize;
1080 :
1081 : /* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */
1082 : Assert(GetDatabaseEncoding() != PG_UTF8);
1083 :
1084 0 : init_icu_converter();
1085 :
1086 0 : ulen = uchar_length(icu_converter, src, srclen);
1087 :
1088 0 : uchar_bsize = (ulen + 1) * sizeof(UChar);
1089 :
1090 0 : if (uchar_bsize > TEXTBUFLEN)
1091 0 : buf = palloc(uchar_bsize);
1092 :
1093 0 : uchar = (UChar *) buf;
1094 :
1095 0 : ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
1096 :
1097 0 : uiter_setString(&iter, uchar, ulen);
1098 0 : state[0] = state[1] = 0; /* won't need that again */
1099 0 : status = U_ZERO_ERROR;
1100 0 : result_bsize = ucol_nextSortKeyPart(locale->icu.ucol,
1101 : &iter,
1102 : state,
1103 : (uint8_t *) dest,
1104 : destsize,
1105 : &status);
1106 0 : if (U_FAILURE(status))
1107 0 : ereport(ERROR,
1108 : (errmsg("sort key generation failed: %s",
1109 : u_errorName(status))));
1110 :
1111 0 : if (buf != sbuf)
1112 0 : pfree(buf);
1113 :
1114 0 : return result_bsize;
1115 : }
1116 :
1117 : static void
1118 2880 : init_icu_converter(void)
1119 : {
1120 : const char *icu_encoding_name;
1121 : UErrorCode status;
1122 : UConverter *conv;
1123 :
1124 2880 : if (icu_converter)
1125 2877 : return; /* already done */
1126 :
1127 3 : icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
1128 3 : if (!icu_encoding_name)
1129 0 : ereport(ERROR,
1130 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1131 : errmsg("encoding \"%s\" not supported by ICU",
1132 : pg_encoding_to_char(GetDatabaseEncoding()))));
1133 :
1134 3 : status = U_ZERO_ERROR;
1135 3 : conv = ucnv_open(icu_encoding_name, &status);
1136 3 : if (U_FAILURE(status))
1137 0 : ereport(ERROR,
1138 : (errmsg("could not open ICU converter for encoding \"%s\": %s",
1139 : icu_encoding_name, u_errorName(status))));
1140 :
1141 3 : icu_converter = conv;
1142 : }
1143 :
1144 : /*
1145 : * Find length, in UChars, of given string if converted to UChar string.
1146 : *
1147 : * A length of -1 indicates that the input string is NUL-terminated.
1148 : */
1149 : static size_t
1150 2880 : uchar_length(UConverter *converter, const char *str, int32_t len)
1151 : {
1152 2880 : UErrorCode status = U_ZERO_ERROR;
1153 : int32_t ulen;
1154 :
1155 2880 : ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
1156 2880 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1157 0 : ereport(ERROR,
1158 : (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
1159 2880 : return ulen;
1160 : }
1161 :
1162 : /*
1163 : * Convert the given source string into a UChar string, stored in dest, and
1164 : * return the length (in UChars).
1165 : *
1166 : * A srclen of -1 indicates that the input string is NUL-terminated.
1167 : */
1168 : static int32_t
1169 2880 : uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
1170 : const char *src, int32_t srclen)
1171 : {
1172 2880 : UErrorCode status = U_ZERO_ERROR;
1173 : int32_t ulen;
1174 :
1175 2880 : status = U_ZERO_ERROR;
1176 2880 : ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
1177 2880 : if (U_FAILURE(status))
1178 0 : ereport(ERROR,
1179 : (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
1180 2880 : return ulen;
1181 : }
1182 :
1183 : /*
1184 : * Parse collation attributes from the given locale string and apply them to
1185 : * the open collator.
1186 : *
1187 : * First, the locale string is canonicalized to an ICU format locale ID such
1188 : * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
1189 : * the key-value arguments.
1190 : *
1191 : * Starting with ICU version 54, the attributes are processed automatically by
1192 : * ucol_open(), so this is only necessary for emulating this behavior on older
1193 : * versions.
1194 : */
1195 : pg_attribute_unused()
1196 : static void
1197 0 : icu_set_collation_attributes(UCollator *collator, const char *loc,
1198 : UErrorCode *status)
1199 : {
1200 : int32_t len;
1201 : char *icu_locale_id;
1202 : char *lower_str;
1203 : char *str;
1204 : char *token;
1205 :
1206 : /*
1207 : * The input locale may be a BCP 47 language tag, e.g.
1208 : * "und-u-kc-ks-level1", which expresses the same attributes in a
1209 : * different form. It will be converted to the equivalent ICU format
1210 : * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
1211 : * uloc_canonicalize().
1212 : */
1213 0 : *status = U_ZERO_ERROR;
1214 0 : len = uloc_canonicalize(loc, NULL, 0, status);
1215 0 : icu_locale_id = palloc(len + 1);
1216 0 : *status = U_ZERO_ERROR;
1217 0 : len = uloc_canonicalize(loc, icu_locale_id, len + 1, status);
1218 0 : if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
1219 0 : return;
1220 :
1221 0 : lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
1222 :
1223 0 : pfree(icu_locale_id);
1224 :
1225 0 : str = strchr(lower_str, '@');
1226 0 : if (!str)
1227 0 : return;
1228 0 : str++;
1229 :
1230 0 : while ((token = strsep(&str, ";")))
1231 : {
1232 0 : char *e = strchr(token, '=');
1233 :
1234 0 : if (e)
1235 : {
1236 : char *name;
1237 : char *value;
1238 : UColAttribute uattr;
1239 : UColAttributeValue uvalue;
1240 :
1241 0 : *status = U_ZERO_ERROR;
1242 :
1243 0 : *e = '\0';
1244 0 : name = token;
1245 0 : value = e + 1;
1246 :
1247 : /*
1248 : * See attribute name and value lists in ICU i18n/coll.cpp
1249 : */
1250 0 : if (strcmp(name, "colstrength") == 0)
1251 0 : uattr = UCOL_STRENGTH;
1252 0 : else if (strcmp(name, "colbackwards") == 0)
1253 0 : uattr = UCOL_FRENCH_COLLATION;
1254 0 : else if (strcmp(name, "colcaselevel") == 0)
1255 0 : uattr = UCOL_CASE_LEVEL;
1256 0 : else if (strcmp(name, "colcasefirst") == 0)
1257 0 : uattr = UCOL_CASE_FIRST;
1258 0 : else if (strcmp(name, "colalternate") == 0)
1259 0 : uattr = UCOL_ALTERNATE_HANDLING;
1260 0 : else if (strcmp(name, "colnormalization") == 0)
1261 0 : uattr = UCOL_NORMALIZATION_MODE;
1262 0 : else if (strcmp(name, "colnumeric") == 0)
1263 0 : uattr = UCOL_NUMERIC_COLLATION;
1264 : else
1265 : /* ignore if unknown */
1266 0 : continue;
1267 :
1268 0 : if (strcmp(value, "primary") == 0)
1269 0 : uvalue = UCOL_PRIMARY;
1270 0 : else if (strcmp(value, "secondary") == 0)
1271 0 : uvalue = UCOL_SECONDARY;
1272 0 : else if (strcmp(value, "tertiary") == 0)
1273 0 : uvalue = UCOL_TERTIARY;
1274 0 : else if (strcmp(value, "quaternary") == 0)
1275 0 : uvalue = UCOL_QUATERNARY;
1276 0 : else if (strcmp(value, "identical") == 0)
1277 0 : uvalue = UCOL_IDENTICAL;
1278 0 : else if (strcmp(value, "no") == 0)
1279 0 : uvalue = UCOL_OFF;
1280 0 : else if (strcmp(value, "yes") == 0)
1281 0 : uvalue = UCOL_ON;
1282 0 : else if (strcmp(value, "shifted") == 0)
1283 0 : uvalue = UCOL_SHIFTED;
1284 0 : else if (strcmp(value, "non-ignorable") == 0)
1285 0 : uvalue = UCOL_NON_IGNORABLE;
1286 0 : else if (strcmp(value, "lower") == 0)
1287 0 : uvalue = UCOL_LOWER_FIRST;
1288 0 : else if (strcmp(value, "upper") == 0)
1289 0 : uvalue = UCOL_UPPER_FIRST;
1290 : else
1291 : {
1292 0 : *status = U_ILLEGAL_ARGUMENT_ERROR;
1293 0 : break;
1294 : }
1295 :
1296 0 : ucol_setAttribute(collator, uattr, uvalue, status);
1297 : }
1298 : }
1299 :
1300 0 : pfree(lower_str);
1301 : }
1302 :
1303 : #endif /* USE_ICU */
|