Line data Source code
1 : /*-----------------------------------------------------------------------
2 : *
3 : * PostgreSQL locale utilities
4 : *
5 : * Portions Copyright (c) 2002-2023, PostgreSQL Global Development Group
6 : *
7 : * src/backend/utils/adt/pg_locale.c
8 : *
9 : *-----------------------------------------------------------------------
10 : */
11 :
12 : /*----------
13 : * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14 : * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15 : * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16 : * toupper(), etc. are always in the same fixed locale.
17 : *
18 : * LC_MESSAGES is settable at run time and will take effect
19 : * immediately.
20 : *
21 : * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22 : * settable at run-time. However, we don't actually set those locale
23 : * categories permanently. This would have bizarre effects like no
24 : * longer accepting standard floating-point literals in some locales.
25 : * Instead, we only set these locale categories briefly when needed,
26 : * cache the required information obtained from localeconv() or
27 : * strftime(), and then set the locale categories back to "C".
28 : * The cached information is only used by the formatting functions
29 : * (to_char, etc.) and the money type. For the user, this should all be
30 : * transparent.
31 : *
32 : * !!! NOW HEAR THIS !!!
33 : *
34 : * We've been bitten repeatedly by this bug, so let's try to keep it in
35 : * mind in future: on some platforms, the locale functions return pointers
36 : * to static data that will be overwritten by any later locale function.
37 : * Thus, for example, the obvious-looking sequence
38 : * save = setlocale(category, NULL);
39 : * if (!setlocale(category, value))
40 : * fail = true;
41 : * setlocale(category, save);
42 : * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
43 : * will change the memory save is pointing at. To do this sort of thing
44 : * safely, you *must* pstrdup what setlocale returns the first time.
45 : *
46 : * The POSIX locale standard is available here:
47 : *
48 : * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
49 : *----------
50 : */
51 :
52 :
53 : #include "postgres.h"
54 :
55 : #include <time.h>
56 :
57 : #include "access/htup_details.h"
58 : #include "catalog/pg_collation.h"
59 : #include "catalog/pg_control.h"
60 : #include "mb/pg_wchar.h"
61 : #include "miscadmin.h"
62 : #include "utils/builtins.h"
63 : #include "utils/formatting.h"
64 : #include "utils/guc_hooks.h"
65 : #include "utils/hsearch.h"
66 : #include "utils/lsyscache.h"
67 : #include "utils/memutils.h"
68 : #include "utils/pg_locale.h"
69 : #include "utils/syscache.h"
70 :
71 : #ifdef USE_ICU
72 : #include <unicode/ucnv.h>
73 : #include <unicode/ustring.h>
74 : #endif
75 :
76 : #ifdef __GLIBC__
77 : #include <gnu/libc-version.h>
78 : #endif
79 :
80 : #ifdef WIN32
81 : #include <shlwapi.h>
82 : #endif
83 :
84 : /*
85 : * This should be large enough that most strings will fit, but small enough
86 : * that we feel comfortable putting it on the stack
87 : */
88 : #define TEXTBUFLEN 1024
89 :
90 : #define MAX_L10N_DATA 80
91 :
92 :
93 : /* GUC settings */
94 : char *locale_messages;
95 : char *locale_monetary;
96 : char *locale_numeric;
97 : char *locale_time;
98 :
99 : int icu_validation_level = WARNING;
100 :
101 : /*
102 : * lc_time localization cache.
103 : *
104 : * We use only the first 7 or 12 entries of these arrays. The last array
105 : * element is left as NULL for the convenience of outside code that wants
106 : * to sequentially scan these arrays.
107 : */
108 : char *localized_abbrev_days[7 + 1];
109 : char *localized_full_days[7 + 1];
110 : char *localized_abbrev_months[12 + 1];
111 : char *localized_full_months[12 + 1];
112 :
113 : /* is the databases's LC_CTYPE the C locale? */
114 : bool database_ctype_is_c = false;
115 :
116 : /* indicates whether locale information cache is valid */
117 : static bool CurrentLocaleConvValid = false;
118 : static bool CurrentLCTimeValid = false;
119 :
120 : /* Cache for collation-related knowledge */
121 :
122 : typedef struct
123 : {
124 : Oid collid; /* hash key: pg_collation OID */
125 : bool collate_is_c; /* is collation's LC_COLLATE C? */
126 : bool ctype_is_c; /* is collation's LC_CTYPE C? */
127 : bool flags_valid; /* true if above flags are valid */
128 : pg_locale_t locale; /* locale_t struct, or 0 if not valid */
129 : } collation_cache_entry;
130 :
131 : static HTAB *collation_cache = NULL;
132 :
133 :
134 : #if defined(WIN32) && defined(LC_MESSAGES)
135 : static char *IsoLocaleName(const char *);
136 : #endif
137 :
138 : #ifdef USE_ICU
139 : /*
140 : * Converter object for converting between ICU's UChar strings and C strings
141 : * in database encoding. Since the database encoding doesn't change, we only
142 : * need one of these per session.
143 : */
144 : static UConverter *icu_converter = NULL;
145 :
146 : static UCollator *pg_ucol_open(const char *loc_str);
147 : static void init_icu_converter(void);
148 : static size_t uchar_length(UConverter *converter,
149 : const char *str, int32_t len);
150 : static int32_t uchar_convert(UConverter *converter,
151 : UChar *dest, int32_t destlen,
152 : const char *src, int32_t srclen);
153 : static void icu_set_collation_attributes(UCollator *collator, const char *loc,
154 : UErrorCode *status);
155 : #endif
156 :
157 : /*
158 : * pg_perm_setlocale
159 : *
160 : * This wraps the libc function setlocale(), with two additions. First, when
161 : * changing LC_CTYPE, update gettext's encoding for the current message
162 : * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
163 : * not on Windows. Second, if the operation is successful, the corresponding
164 : * LC_XXX environment variable is set to match. By setting the environment
165 : * variable, we ensure that any subsequent use of setlocale(..., "") will
166 : * preserve the settings made through this routine. Of course, LC_ALL must
167 : * also be unset to fully ensure that, but that has to be done elsewhere after
168 : * all the individual LC_XXX variables have been set correctly. (Thank you
169 : * Perl for making this kluge necessary.)
170 : */
171 : char *
172 89286 : pg_perm_setlocale(int category, const char *locale)
173 : {
174 : char *result;
175 : const char *envvar;
176 :
177 : #ifndef WIN32
178 89286 : result = setlocale(category, locale);
179 : #else
180 :
181 : /*
182 : * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
183 : * the given value is good and set it in the environment variables. We
184 : * must ignore attempts to set to "", which means "keep using the old
185 : * environment value".
186 : */
187 : #ifdef LC_MESSAGES
188 : if (category == LC_MESSAGES)
189 : {
190 : result = (char *) locale;
191 : if (locale == NULL || locale[0] == '\0')
192 : return result;
193 : }
194 : else
195 : #endif
196 : result = setlocale(category, locale);
197 : #endif /* WIN32 */
198 :
199 89286 : if (result == NULL)
200 0 : return result; /* fall out immediately on failure */
201 :
202 : /*
203 : * Use the right encoding in translated messages. Under ENABLE_NLS, let
204 : * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
205 : * format strings are ASCII, but database-encoding strings may enter the
206 : * message via %s. This makes the overall message encoding equal to the
207 : * database encoding.
208 : */
209 89286 : if (category == LC_CTYPE)
210 : {
211 : static char save_lc_ctype[LOCALE_NAME_BUFLEN];
212 :
213 : /* copy setlocale() return value before callee invokes it again */
214 26580 : strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
215 26580 : result = save_lc_ctype;
216 :
217 : #ifdef ENABLE_NLS
218 26580 : SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
219 : #else
220 : SetMessageEncoding(GetDatabaseEncoding());
221 : #endif
222 : }
223 :
224 89286 : switch (category)
225 : {
226 26580 : case LC_COLLATE:
227 26580 : envvar = "LC_COLLATE";
228 26580 : break;
229 26580 : case LC_CTYPE:
230 26580 : envvar = "LC_CTYPE";
231 26580 : break;
232 : #ifdef LC_MESSAGES
233 20448 : case LC_MESSAGES:
234 20448 : envvar = "LC_MESSAGES";
235 : #ifdef WIN32
236 : result = IsoLocaleName(locale);
237 : if (result == NULL)
238 : result = (char *) locale;
239 : elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
240 : #endif /* WIN32 */
241 20448 : break;
242 : #endif /* LC_MESSAGES */
243 5226 : case LC_MONETARY:
244 5226 : envvar = "LC_MONETARY";
245 5226 : break;
246 5226 : case LC_NUMERIC:
247 5226 : envvar = "LC_NUMERIC";
248 5226 : break;
249 5226 : case LC_TIME:
250 5226 : envvar = "LC_TIME";
251 5226 : break;
252 0 : default:
253 0 : elog(FATAL, "unrecognized LC category: %d", category);
254 : return NULL; /* keep compiler quiet */
255 : }
256 :
257 89286 : if (setenv(envvar, result, 1) != 0)
258 0 : return NULL;
259 :
260 89286 : return result;
261 : }
262 :
263 :
264 : /*
265 : * Is the locale name valid for the locale category?
266 : *
267 : * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
268 : * canonical name is stored there. This is especially useful for figuring out
269 : * what locale name "" means (ie, the server environment value). (Actually,
270 : * it seems that on most implementations that's the only thing it's good for;
271 : * we could wish that setlocale gave back a canonically spelled version of
272 : * the locale name, but typically it doesn't.)
273 : */
274 : bool
275 67206 : check_locale(int category, const char *locale, char **canonname)
276 : {
277 : char *save;
278 : char *res;
279 :
280 67206 : if (canonname)
281 3198 : *canonname = NULL; /* in case of failure */
282 :
283 67206 : save = setlocale(category, NULL);
284 67206 : if (!save)
285 0 : return false; /* won't happen, we hope */
286 :
287 : /* save may be pointing at a modifiable scratch variable, see above. */
288 67206 : save = pstrdup(save);
289 :
290 : /* set the locale with setlocale, to see if it accepts it. */
291 67206 : res = setlocale(category, locale);
292 :
293 : /* save canonical name if requested. */
294 67206 : if (res && canonname)
295 3194 : *canonname = pstrdup(res);
296 :
297 : /* restore old value. */
298 67206 : if (!setlocale(category, save))
299 0 : elog(WARNING, "failed to restore old locale \"%s\"", save);
300 67206 : pfree(save);
301 :
302 67206 : return (res != NULL);
303 : }
304 :
305 :
306 : /*
307 : * GUC check/assign hooks
308 : *
309 : * For most locale categories, the assign hook doesn't actually set the locale
310 : * permanently, just reset flags so that the next use will cache the
311 : * appropriate values. (See explanation at the top of this file.)
312 : *
313 : * Note: we accept value = "" as selecting the postmaster's environment
314 : * value, whatever it was (so long as the environment setting is legal).
315 : * This will have been locked down by an earlier call to pg_perm_setlocale.
316 : */
317 : bool
318 18300 : check_locale_monetary(char **newval, void **extra, GucSource source)
319 : {
320 18300 : return check_locale(LC_MONETARY, *newval, NULL);
321 : }
322 :
323 : void
324 18120 : assign_locale_monetary(const char *newval, void *extra)
325 : {
326 18120 : CurrentLocaleConvValid = false;
327 18120 : }
328 :
329 : bool
330 18306 : check_locale_numeric(char **newval, void **extra, GucSource source)
331 : {
332 18306 : return check_locale(LC_NUMERIC, *newval, NULL);
333 : }
334 :
335 : void
336 18132 : assign_locale_numeric(const char *newval, void *extra)
337 : {
338 18132 : CurrentLocaleConvValid = false;
339 18132 : }
340 :
341 : bool
342 18300 : check_locale_time(char **newval, void **extra, GucSource source)
343 : {
344 18300 : return check_locale(LC_TIME, *newval, NULL);
345 : }
346 :
347 : void
348 18120 : assign_locale_time(const char *newval, void *extra)
349 : {
350 18120 : CurrentLCTimeValid = false;
351 18120 : }
352 :
353 : /*
354 : * We allow LC_MESSAGES to actually be set globally.
355 : *
356 : * Note: we normally disallow value = "" because it wouldn't have consistent
357 : * semantics (it'd effectively just use the previous value). However, this
358 : * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
359 : * not even if the attempted setting fails due to invalid environment value.
360 : * The idea there is just to accept the environment setting *if possible*
361 : * during startup, until we can read the proper value from postgresql.conf.
362 : */
363 : bool
364 15396 : check_locale_messages(char **newval, void **extra, GucSource source)
365 : {
366 15396 : if (**newval == '\0')
367 : {
368 6294 : if (source == PGC_S_DEFAULT)
369 6294 : return true;
370 : else
371 0 : return false;
372 : }
373 :
374 : /*
375 : * LC_MESSAGES category does not exist everywhere, but accept it anyway
376 : *
377 : * On Windows, we can't even check the value, so accept blindly
378 : */
379 : #if defined(LC_MESSAGES) && !defined(WIN32)
380 9102 : return check_locale(LC_MESSAGES, *newval, NULL);
381 : #else
382 : return true;
383 : #endif
384 : }
385 :
386 : void
387 15222 : assign_locale_messages(const char *newval, void *extra)
388 : {
389 : /*
390 : * LC_MESSAGES category does not exist everywhere, but accept it anyway.
391 : * We ignore failure, as per comment above.
392 : */
393 : #ifdef LC_MESSAGES
394 15222 : (void) pg_perm_setlocale(LC_MESSAGES, newval);
395 : #endif
396 15222 : }
397 :
398 :
399 : /*
400 : * Frees the malloced content of a struct lconv. (But not the struct
401 : * itself.) It's important that this not throw elog(ERROR).
402 : */
403 : static void
404 6 : free_struct_lconv(struct lconv *s)
405 : {
406 6 : free(s->decimal_point);
407 6 : free(s->thousands_sep);
408 6 : free(s->grouping);
409 6 : free(s->int_curr_symbol);
410 6 : free(s->currency_symbol);
411 6 : free(s->mon_decimal_point);
412 6 : free(s->mon_thousands_sep);
413 6 : free(s->mon_grouping);
414 6 : free(s->positive_sign);
415 6 : free(s->negative_sign);
416 6 : }
417 :
418 : /*
419 : * Check that all fields of a struct lconv (or at least, the ones we care
420 : * about) are non-NULL. The field list must match free_struct_lconv().
421 : */
422 : static bool
423 102 : struct_lconv_is_valid(struct lconv *s)
424 : {
425 102 : if (s->decimal_point == NULL)
426 0 : return false;
427 102 : if (s->thousands_sep == NULL)
428 0 : return false;
429 102 : if (s->grouping == NULL)
430 0 : return false;
431 102 : if (s->int_curr_symbol == NULL)
432 0 : return false;
433 102 : if (s->currency_symbol == NULL)
434 0 : return false;
435 102 : if (s->mon_decimal_point == NULL)
436 0 : return false;
437 102 : if (s->mon_thousands_sep == NULL)
438 0 : return false;
439 102 : if (s->mon_grouping == NULL)
440 0 : return false;
441 102 : if (s->positive_sign == NULL)
442 0 : return false;
443 102 : if (s->negative_sign == NULL)
444 0 : return false;
445 102 : return true;
446 : }
447 :
448 :
449 : /*
450 : * Convert the strdup'd string at *str from the specified encoding to the
451 : * database encoding.
452 : */
453 : static void
454 816 : db_encoding_convert(int encoding, char **str)
455 : {
456 : char *pstr;
457 : char *mstr;
458 :
459 : /* convert the string to the database encoding */
460 816 : pstr = pg_any_to_server(*str, strlen(*str), encoding);
461 816 : if (pstr == *str)
462 816 : return; /* no conversion happened */
463 :
464 : /* need it malloc'd not palloc'd */
465 0 : mstr = strdup(pstr);
466 0 : if (mstr == NULL)
467 0 : ereport(ERROR,
468 : (errcode(ERRCODE_OUT_OF_MEMORY),
469 : errmsg("out of memory")));
470 :
471 : /* replace old string */
472 0 : free(*str);
473 0 : *str = mstr;
474 :
475 0 : pfree(pstr);
476 : }
477 :
478 :
479 : /*
480 : * Return the POSIX lconv struct (contains number/money formatting
481 : * information) with locale information for all categories.
482 : */
483 : struct lconv *
484 15936 : PGLC_localeconv(void)
485 : {
486 : static struct lconv CurrentLocaleConv;
487 : static bool CurrentLocaleConvAllocated = false;
488 : struct lconv *extlconv;
489 : struct lconv worklconv;
490 : char *save_lc_monetary;
491 : char *save_lc_numeric;
492 : #ifdef WIN32
493 : char *save_lc_ctype;
494 : #endif
495 :
496 : /* Did we do it already? */
497 15936 : if (CurrentLocaleConvValid)
498 15834 : return &CurrentLocaleConv;
499 :
500 : /* Free any already-allocated storage */
501 102 : if (CurrentLocaleConvAllocated)
502 : {
503 6 : free_struct_lconv(&CurrentLocaleConv);
504 6 : CurrentLocaleConvAllocated = false;
505 : }
506 :
507 : /*
508 : * This is tricky because we really don't want to risk throwing error
509 : * while the locale is set to other than our usual settings. Therefore,
510 : * the process is: collect the usual settings, set locale to special
511 : * setting, copy relevant data into worklconv using strdup(), restore
512 : * normal settings, convert data to desired encoding, and finally stash
513 : * the collected data in CurrentLocaleConv. This makes it safe if we
514 : * throw an error during encoding conversion or run out of memory anywhere
515 : * in the process. All data pointed to by struct lconv members is
516 : * allocated with strdup, to avoid premature elog(ERROR) and to allow
517 : * using a single cleanup routine.
518 : */
519 102 : memset(&worklconv, 0, sizeof(worklconv));
520 :
521 : /* Save prevailing values of monetary and numeric locales */
522 102 : save_lc_monetary = setlocale(LC_MONETARY, NULL);
523 102 : if (!save_lc_monetary)
524 0 : elog(ERROR, "setlocale(NULL) failed");
525 102 : save_lc_monetary = pstrdup(save_lc_monetary);
526 :
527 102 : save_lc_numeric = setlocale(LC_NUMERIC, NULL);
528 102 : if (!save_lc_numeric)
529 0 : elog(ERROR, "setlocale(NULL) failed");
530 102 : save_lc_numeric = pstrdup(save_lc_numeric);
531 :
532 : #ifdef WIN32
533 :
534 : /*
535 : * The POSIX standard explicitly says that it is undefined what happens if
536 : * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
537 : * that implied by LC_CTYPE. In practice, all Unix-ish platforms seem to
538 : * believe that localeconv() should return strings that are encoded in the
539 : * codeset implied by the LC_MONETARY or LC_NUMERIC locale name. Hence,
540 : * once we have successfully collected the localeconv() results, we will
541 : * convert them from that codeset to the desired server encoding.
542 : *
543 : * Windows, of course, resolutely does things its own way; on that
544 : * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
545 : * results. Hence, we must temporarily set that category as well.
546 : */
547 :
548 : /* Save prevailing value of ctype locale */
549 : save_lc_ctype = setlocale(LC_CTYPE, NULL);
550 : if (!save_lc_ctype)
551 : elog(ERROR, "setlocale(NULL) failed");
552 : save_lc_ctype = pstrdup(save_lc_ctype);
553 :
554 : /* Here begins the critical section where we must not throw error */
555 :
556 : /* use numeric to set the ctype */
557 : setlocale(LC_CTYPE, locale_numeric);
558 : #endif
559 :
560 : /* Get formatting information for numeric */
561 102 : setlocale(LC_NUMERIC, locale_numeric);
562 102 : extlconv = localeconv();
563 :
564 : /* Must copy data now in case setlocale() overwrites it */
565 102 : worklconv.decimal_point = strdup(extlconv->decimal_point);
566 102 : worklconv.thousands_sep = strdup(extlconv->thousands_sep);
567 102 : worklconv.grouping = strdup(extlconv->grouping);
568 :
569 : #ifdef WIN32
570 : /* use monetary to set the ctype */
571 : setlocale(LC_CTYPE, locale_monetary);
572 : #endif
573 :
574 : /* Get formatting information for monetary */
575 102 : setlocale(LC_MONETARY, locale_monetary);
576 102 : extlconv = localeconv();
577 :
578 : /* Must copy data now in case setlocale() overwrites it */
579 102 : worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
580 102 : worklconv.currency_symbol = strdup(extlconv->currency_symbol);
581 102 : worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
582 102 : worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
583 102 : worklconv.mon_grouping = strdup(extlconv->mon_grouping);
584 102 : worklconv.positive_sign = strdup(extlconv->positive_sign);
585 102 : worklconv.negative_sign = strdup(extlconv->negative_sign);
586 : /* Copy scalar fields as well */
587 102 : worklconv.int_frac_digits = extlconv->int_frac_digits;
588 102 : worklconv.frac_digits = extlconv->frac_digits;
589 102 : worklconv.p_cs_precedes = extlconv->p_cs_precedes;
590 102 : worklconv.p_sep_by_space = extlconv->p_sep_by_space;
591 102 : worklconv.n_cs_precedes = extlconv->n_cs_precedes;
592 102 : worklconv.n_sep_by_space = extlconv->n_sep_by_space;
593 102 : worklconv.p_sign_posn = extlconv->p_sign_posn;
594 102 : worklconv.n_sign_posn = extlconv->n_sign_posn;
595 :
596 : /*
597 : * Restore the prevailing locale settings; failure to do so is fatal.
598 : * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
599 : * but proceeding with the wrong value of LC_CTYPE would certainly be bad
600 : * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
601 : * are almost certainly "C", there's really no reason that restoring those
602 : * should fail.
603 : */
604 : #ifdef WIN32
605 : if (!setlocale(LC_CTYPE, save_lc_ctype))
606 : elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
607 : #endif
608 102 : if (!setlocale(LC_MONETARY, save_lc_monetary))
609 0 : elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
610 102 : if (!setlocale(LC_NUMERIC, save_lc_numeric))
611 0 : elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
612 :
613 : /*
614 : * At this point we've done our best to clean up, and can call functions
615 : * that might possibly throw errors with a clean conscience. But let's
616 : * make sure we don't leak any already-strdup'd fields in worklconv.
617 : */
618 102 : PG_TRY();
619 : {
620 : int encoding;
621 :
622 : /* Release the pstrdup'd locale names */
623 102 : pfree(save_lc_monetary);
624 102 : pfree(save_lc_numeric);
625 : #ifdef WIN32
626 : pfree(save_lc_ctype);
627 : #endif
628 :
629 : /* If any of the preceding strdup calls failed, complain now. */
630 102 : if (!struct_lconv_is_valid(&worklconv))
631 0 : ereport(ERROR,
632 : (errcode(ERRCODE_OUT_OF_MEMORY),
633 : errmsg("out of memory")));
634 :
635 : /*
636 : * Now we must perform encoding conversion from whatever's associated
637 : * with the locales into the database encoding. If we can't identify
638 : * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
639 : * use PG_SQL_ASCII, which will result in just validating that the
640 : * strings are OK in the database encoding.
641 : */
642 102 : encoding = pg_get_encoding_from_locale(locale_numeric, true);
643 102 : if (encoding < 0)
644 0 : encoding = PG_SQL_ASCII;
645 :
646 102 : db_encoding_convert(encoding, &worklconv.decimal_point);
647 102 : db_encoding_convert(encoding, &worklconv.thousands_sep);
648 : /* grouping is not text and does not require conversion */
649 :
650 102 : encoding = pg_get_encoding_from_locale(locale_monetary, true);
651 102 : if (encoding < 0)
652 0 : encoding = PG_SQL_ASCII;
653 :
654 102 : db_encoding_convert(encoding, &worklconv.int_curr_symbol);
655 102 : db_encoding_convert(encoding, &worklconv.currency_symbol);
656 102 : db_encoding_convert(encoding, &worklconv.mon_decimal_point);
657 102 : db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
658 : /* mon_grouping is not text and does not require conversion */
659 102 : db_encoding_convert(encoding, &worklconv.positive_sign);
660 102 : db_encoding_convert(encoding, &worklconv.negative_sign);
661 : }
662 0 : PG_CATCH();
663 : {
664 0 : free_struct_lconv(&worklconv);
665 0 : PG_RE_THROW();
666 : }
667 102 : PG_END_TRY();
668 :
669 : /*
670 : * Everything is good, so save the results.
671 : */
672 102 : CurrentLocaleConv = worklconv;
673 102 : CurrentLocaleConvAllocated = true;
674 102 : CurrentLocaleConvValid = true;
675 102 : return &CurrentLocaleConv;
676 : }
677 :
678 : #ifdef WIN32
679 : /*
680 : * On Windows, strftime() returns its output in encoding CP_ACP (the default
681 : * operating system codepage for the computer), which is likely different
682 : * from SERVER_ENCODING. This is especially important in Japanese versions
683 : * of Windows which will use SJIS encoding, which we don't support as a
684 : * server encoding.
685 : *
686 : * So, instead of using strftime(), use wcsftime() to return the value in
687 : * wide characters (internally UTF16) and then convert to UTF8, which we
688 : * know how to handle directly.
689 : *
690 : * Note that this only affects the calls to strftime() in this file, which are
691 : * used to get the locale-aware strings. Other parts of the backend use
692 : * pg_strftime(), which isn't locale-aware and does not need to be replaced.
693 : */
694 : static size_t
695 : strftime_win32(char *dst, size_t dstlen,
696 : const char *format, const struct tm *tm)
697 : {
698 : size_t len;
699 : wchar_t wformat[8]; /* formats used below need 3 chars */
700 : wchar_t wbuf[MAX_L10N_DATA];
701 :
702 : /*
703 : * Get a wchar_t version of the format string. We only actually use
704 : * plain-ASCII formats in this file, so we can say that they're UTF8.
705 : */
706 : len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
707 : wformat, lengthof(wformat));
708 : if (len == 0)
709 : elog(ERROR, "could not convert format string from UTF-8: error code %lu",
710 : GetLastError());
711 :
712 : len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
713 : if (len == 0)
714 : {
715 : /*
716 : * wcsftime failed, possibly because the result would not fit in
717 : * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
718 : */
719 : return 0;
720 : }
721 :
722 : len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
723 : NULL, NULL);
724 : if (len == 0)
725 : elog(ERROR, "could not convert string to UTF-8: error code %lu",
726 : GetLastError());
727 :
728 : dst[len] = '\0';
729 :
730 : return len;
731 : }
732 :
733 : /* redefine strftime() */
734 : #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
735 : #endif /* WIN32 */
736 :
737 : /*
738 : * Subroutine for cache_locale_time().
739 : * Convert the given string from encoding "encoding" to the database
740 : * encoding, and store the result at *dst, replacing any previous value.
741 : */
742 : static void
743 1672 : cache_single_string(char **dst, const char *src, int encoding)
744 : {
745 : char *ptr;
746 : char *olddst;
747 :
748 : /* Convert the string to the database encoding, or validate it's OK */
749 1672 : ptr = pg_any_to_server(src, strlen(src), encoding);
750 :
751 : /* Store the string in long-lived storage, replacing any previous value */
752 1672 : olddst = *dst;
753 1672 : *dst = MemoryContextStrdup(TopMemoryContext, ptr);
754 1672 : if (olddst)
755 0 : pfree(olddst);
756 :
757 : /* Might as well clean up any palloc'd conversion result, too */
758 1672 : if (ptr != src)
759 0 : pfree(ptr);
760 1672 : }
761 :
762 : /*
763 : * Update the lc_time localization cache variables if needed.
764 : */
765 : void
766 18512 : cache_locale_time(void)
767 : {
768 : char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
769 : char *bufptr;
770 : time_t timenow;
771 : struct tm *timeinfo;
772 18512 : bool strftimefail = false;
773 : int encoding;
774 : int i;
775 : char *save_lc_time;
776 : #ifdef WIN32
777 : char *save_lc_ctype;
778 : #endif
779 :
780 : /* did we do this already? */
781 18512 : if (CurrentLCTimeValid)
782 18468 : return;
783 :
784 44 : elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
785 :
786 : /*
787 : * As in PGLC_localeconv(), it's critical that we not throw error while
788 : * libc's locale settings have nondefault values. Hence, we just call
789 : * strftime() within the critical section, and then convert and save its
790 : * results afterwards.
791 : */
792 :
793 : /* Save prevailing value of time locale */
794 44 : save_lc_time = setlocale(LC_TIME, NULL);
795 44 : if (!save_lc_time)
796 0 : elog(ERROR, "setlocale(NULL) failed");
797 44 : save_lc_time = pstrdup(save_lc_time);
798 :
799 : #ifdef WIN32
800 :
801 : /*
802 : * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
803 : * must set it here. This code looks the same as what PGLC_localeconv()
804 : * does, but the underlying reason is different: this does NOT determine
805 : * the encoding we'll get back from strftime_win32().
806 : */
807 :
808 : /* Save prevailing value of ctype locale */
809 : save_lc_ctype = setlocale(LC_CTYPE, NULL);
810 : if (!save_lc_ctype)
811 : elog(ERROR, "setlocale(NULL) failed");
812 : save_lc_ctype = pstrdup(save_lc_ctype);
813 :
814 : /* use lc_time to set the ctype */
815 : setlocale(LC_CTYPE, locale_time);
816 : #endif
817 :
818 44 : setlocale(LC_TIME, locale_time);
819 :
820 : /* We use times close to current time as data for strftime(). */
821 44 : timenow = time(NULL);
822 44 : timeinfo = localtime(&timenow);
823 :
824 : /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
825 44 : bufptr = buf;
826 :
827 : /*
828 : * MAX_L10N_DATA is sufficient buffer space for every known locale, and
829 : * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
830 : * error.) An implementation might report errors (e.g. ENOMEM) by
831 : * returning 0 (or, less plausibly, a negative value) and setting errno.
832 : * Report errno just in case the implementation did that, but clear it in
833 : * advance of the calls so we don't emit a stale, unrelated errno.
834 : */
835 44 : errno = 0;
836 :
837 : /* localized days */
838 352 : for (i = 0; i < 7; i++)
839 : {
840 308 : timeinfo->tm_wday = i;
841 308 : if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
842 0 : strftimefail = true;
843 308 : bufptr += MAX_L10N_DATA;
844 308 : if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
845 0 : strftimefail = true;
846 308 : bufptr += MAX_L10N_DATA;
847 : }
848 :
849 : /* localized months */
850 572 : for (i = 0; i < 12; i++)
851 : {
852 528 : timeinfo->tm_mon = i;
853 528 : timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
854 528 : if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
855 0 : strftimefail = true;
856 528 : bufptr += MAX_L10N_DATA;
857 528 : if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
858 0 : strftimefail = true;
859 528 : bufptr += MAX_L10N_DATA;
860 : }
861 :
862 : /*
863 : * Restore the prevailing locale settings; as in PGLC_localeconv(),
864 : * failure to do so is fatal.
865 : */
866 : #ifdef WIN32
867 : if (!setlocale(LC_CTYPE, save_lc_ctype))
868 : elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
869 : #endif
870 44 : if (!setlocale(LC_TIME, save_lc_time))
871 0 : elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
872 :
873 : /*
874 : * At this point we've done our best to clean up, and can throw errors, or
875 : * call functions that might throw errors, with a clean conscience.
876 : */
877 44 : if (strftimefail)
878 0 : elog(ERROR, "strftime() failed: %m");
879 :
880 : /* Release the pstrdup'd locale names */
881 44 : pfree(save_lc_time);
882 : #ifdef WIN32
883 : pfree(save_lc_ctype);
884 : #endif
885 :
886 : #ifndef WIN32
887 :
888 : /*
889 : * As in PGLC_localeconv(), we must convert strftime()'s output from the
890 : * encoding implied by LC_TIME to the database encoding. If we can't
891 : * identify the LC_TIME encoding, just perform encoding validation.
892 : */
893 44 : encoding = pg_get_encoding_from_locale(locale_time, true);
894 44 : if (encoding < 0)
895 0 : encoding = PG_SQL_ASCII;
896 :
897 : #else
898 :
899 : /*
900 : * On Windows, strftime_win32() always returns UTF8 data, so convert from
901 : * that if necessary.
902 : */
903 : encoding = PG_UTF8;
904 :
905 : #endif /* WIN32 */
906 :
907 44 : bufptr = buf;
908 :
909 : /* localized days */
910 352 : for (i = 0; i < 7; i++)
911 : {
912 308 : cache_single_string(&localized_abbrev_days[i], bufptr, encoding);
913 308 : bufptr += MAX_L10N_DATA;
914 308 : cache_single_string(&localized_full_days[i], bufptr, encoding);
915 308 : bufptr += MAX_L10N_DATA;
916 : }
917 44 : localized_abbrev_days[7] = NULL;
918 44 : localized_full_days[7] = NULL;
919 :
920 : /* localized months */
921 572 : for (i = 0; i < 12; i++)
922 : {
923 528 : cache_single_string(&localized_abbrev_months[i], bufptr, encoding);
924 528 : bufptr += MAX_L10N_DATA;
925 528 : cache_single_string(&localized_full_months[i], bufptr, encoding);
926 528 : bufptr += MAX_L10N_DATA;
927 : }
928 44 : localized_abbrev_months[12] = NULL;
929 44 : localized_full_months[12] = NULL;
930 :
931 44 : CurrentLCTimeValid = true;
932 : }
933 :
934 :
935 : #if defined(WIN32) && defined(LC_MESSAGES)
936 : /*
937 : * Convert a Windows setlocale() argument to a Unix-style one.
938 : *
939 : * Regardless of platform, we install message catalogs under a Unix-style
940 : * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
941 : * following that style will elicit localized interface strings.
942 : *
943 : * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
944 : * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
945 : * case-insensitive. setlocale() returns the fully-qualified form; for
946 : * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
947 : * setlocale() and _create_locale() select a "locale identifier"[1] and store
948 : * it in an undocumented _locale_t field. From that LCID, we can retrieve the
949 : * ISO 639 language and the ISO 3166 country. Character encoding does not
950 : * matter, because the server and client encodings govern that.
951 : *
952 : * Windows Vista introduced the "locale name" concept[2], closely following
953 : * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
954 : * Studio 2012, setlocale() accepts locale names in addition to the strings it
955 : * accepted historically. It does not standardize them; setlocale("Th-tH")
956 : * returns "Th-tH". setlocale(category, "") still returns a traditional
957 : * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
958 : * content to carry locale names instead of locale identifiers.
959 : *
960 : * Visual Studio 2015 should still be able to do the same as Visual Studio
961 : * 2012, but the declaration of locale_name is missing in _locale_t, causing
962 : * this code compilation to fail, hence this falls back instead on to
963 : * enumerating all system locales by using EnumSystemLocalesEx to find the
964 : * required locale name. If the input argument is in Unix-style then we can
965 : * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
966 : * LOCALE_SNAME.
967 : *
968 : * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol in
969 : * releases before Windows 8. IsoLocaleName() always fails in a MinGW-built
970 : * postgres.exe, so only Unix-style values of the lc_messages GUC can elicit
971 : * localized messages. In particular, every lc_messages setting that initdb
972 : * can select automatically will yield only C-locale messages. XXX This could
973 : * be fixed by running the fully-qualified locale name through a lookup table.
974 : *
975 : * This function returns a pointer to a static buffer bearing the converted
976 : * name or NULL if conversion fails.
977 : *
978 : * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
979 : * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
980 : */
981 :
982 : #if defined(_MSC_VER)
983 :
984 : /*
985 : * Callback function for EnumSystemLocalesEx() in get_iso_localename().
986 : *
987 : * This function enumerates all system locales, searching for one that matches
988 : * an input with the format: <Language>[_<Country>], e.g.
989 : * English[_United States]
990 : *
991 : * The input is a three wchar_t array as an LPARAM. The first element is the
992 : * locale_name we want to match, the second element is an allocated buffer
993 : * where the Unix-style locale is copied if a match is found, and the third
994 : * element is the search status, 1 if a match was found, 0 otherwise.
995 : */
996 : static BOOL CALLBACK
997 : search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
998 : {
999 : wchar_t test_locale[LOCALE_NAME_MAX_LENGTH];
1000 : wchar_t **argv;
1001 :
1002 : (void) (dwFlags);
1003 :
1004 : argv = (wchar_t **) lparam;
1005 : *argv[2] = (wchar_t) 0;
1006 :
1007 : memset(test_locale, 0, sizeof(test_locale));
1008 :
1009 : /* Get the name of the <Language> in English */
1010 : if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
1011 : test_locale, LOCALE_NAME_MAX_LENGTH))
1012 : {
1013 : /*
1014 : * If the enumerated locale does not have a hyphen ("en") OR the
1015 : * locale_name input does not have an underscore ("English"), we only
1016 : * need to compare the <Language> tags.
1017 : */
1018 : if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
1019 : {
1020 : if (_wcsicmp(argv[0], test_locale) == 0)
1021 : {
1022 : wcscpy(argv[1], pStr);
1023 : *argv[2] = (wchar_t) 1;
1024 : return FALSE;
1025 : }
1026 : }
1027 :
1028 : /*
1029 : * We have to compare a full <Language>_<Country> tag, so we append
1030 : * the underscore and name of the country/region in English, e.g.
1031 : * "English_United States".
1032 : */
1033 : else
1034 : {
1035 : size_t len;
1036 :
1037 : wcscat(test_locale, L"_");
1038 : len = wcslen(test_locale);
1039 : if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
1040 : test_locale + len,
1041 : LOCALE_NAME_MAX_LENGTH - len))
1042 : {
1043 : if (_wcsicmp(argv[0], test_locale) == 0)
1044 : {
1045 : wcscpy(argv[1], pStr);
1046 : *argv[2] = (wchar_t) 1;
1047 : return FALSE;
1048 : }
1049 : }
1050 : }
1051 : }
1052 :
1053 : return TRUE;
1054 : }
1055 :
1056 : /*
1057 : * This function converts a Windows locale name to an ISO formatted version
1058 : * for Visual Studio 2015 or greater.
1059 : *
1060 : * Returns NULL, if no valid conversion was found.
1061 : */
1062 : static char *
1063 : get_iso_localename(const char *winlocname)
1064 : {
1065 : wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH];
1066 : wchar_t buffer[LOCALE_NAME_MAX_LENGTH];
1067 : static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1068 : char *period;
1069 : int len;
1070 : int ret_val;
1071 :
1072 : /*
1073 : * Valid locales have the following syntax:
1074 : * <Language>[_<Country>[.<CodePage>]]
1075 : *
1076 : * GetLocaleInfoEx can only take locale name without code-page and for the
1077 : * purpose of this API the code-page doesn't matter.
1078 : */
1079 : period = strchr(winlocname, '.');
1080 : if (period != NULL)
1081 : len = period - winlocname;
1082 : else
1083 : len = pg_mbstrlen(winlocname);
1084 :
1085 : memset(wc_locale_name, 0, sizeof(wc_locale_name));
1086 : memset(buffer, 0, sizeof(buffer));
1087 : MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
1088 : LOCALE_NAME_MAX_LENGTH);
1089 :
1090 : /*
1091 : * If the lc_messages is already a Unix-style string, we have a direct
1092 : * match with LOCALE_SNAME, e.g. en-US, en_US.
1093 : */
1094 : ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
1095 : LOCALE_NAME_MAX_LENGTH);
1096 : if (!ret_val)
1097 : {
1098 : /*
1099 : * Search for a locale in the system that matches language and country
1100 : * name.
1101 : */
1102 : wchar_t *argv[3];
1103 :
1104 : argv[0] = wc_locale_name;
1105 : argv[1] = buffer;
1106 : argv[2] = (wchar_t *) &ret_val;
1107 : EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
1108 : NULL);
1109 : }
1110 :
1111 : if (ret_val)
1112 : {
1113 : size_t rc;
1114 : char *hyphen;
1115 :
1116 : /* Locale names use only ASCII, any conversion locale suffices. */
1117 : rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
1118 : if (rc == -1 || rc == sizeof(iso_lc_messages))
1119 : return NULL;
1120 :
1121 : /*
1122 : * Since the message catalogs sit on a case-insensitive filesystem, we
1123 : * need not standardize letter case here. So long as we do not ship
1124 : * message catalogs for which it would matter, we also need not
1125 : * translate the script/variant portion, e.g. uz-Cyrl-UZ to
1126 : * uz_UZ@cyrillic. Simply replace the hyphen with an underscore.
1127 : */
1128 : hyphen = strchr(iso_lc_messages, '-');
1129 : if (hyphen)
1130 : *hyphen = '_';
1131 : return iso_lc_messages;
1132 : }
1133 :
1134 : return NULL;
1135 : }
1136 :
1137 : static char *
1138 : IsoLocaleName(const char *winlocname)
1139 : {
1140 : static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1141 :
1142 : if (pg_strcasecmp("c", winlocname) == 0 ||
1143 : pg_strcasecmp("posix", winlocname) == 0)
1144 : {
1145 : strcpy(iso_lc_messages, "C");
1146 : return iso_lc_messages;
1147 : }
1148 : else
1149 : return get_iso_localename(winlocname);
1150 : }
1151 :
1152 : #else /* !defined(_MSC_VER) */
1153 :
1154 : static char *
1155 : IsoLocaleName(const char *winlocname)
1156 : {
1157 : return NULL; /* Not supported on MinGW */
1158 : }
1159 :
1160 : #endif /* defined(_MSC_VER) */
1161 :
1162 : #endif /* WIN32 && LC_MESSAGES */
1163 :
1164 :
1165 : /*
1166 : * Cache mechanism for collation information.
1167 : *
1168 : * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1169 : * (or POSIX), so we can optimize a few code paths in various places.
1170 : * For the built-in C and POSIX collations, we can know that without even
1171 : * doing a cache lookup, but we want to support aliases for C/POSIX too.
1172 : * For the "default" collation, there are separate static cache variables,
1173 : * since consulting the pg_collation catalog doesn't tell us what we need.
1174 : *
1175 : * Also, if a pg_locale_t has been requested for a collation, we cache that
1176 : * for the life of a backend.
1177 : *
1178 : * Note that some code relies on the flags not reporting false negatives
1179 : * (that is, saying it's not C when it is). For example, char2wchar()
1180 : * could fail if the locale is C, so str_tolower() shouldn't call it
1181 : * in that case.
1182 : *
1183 : * Note that we currently lack any way to flush the cache. Since we don't
1184 : * support ALTER COLLATION, this is OK. The worst case is that someone
1185 : * drops a collation, and a useless cache entry hangs around in existing
1186 : * backends.
1187 : */
1188 :
1189 : static collation_cache_entry *
1190 37952 : lookup_collation_cache(Oid collation, bool set_flags)
1191 : {
1192 : collation_cache_entry *cache_entry;
1193 : bool found;
1194 :
1195 : Assert(OidIsValid(collation));
1196 : Assert(collation != DEFAULT_COLLATION_OID);
1197 :
1198 37952 : if (collation_cache == NULL)
1199 : {
1200 : /* First time through, initialize the hash table */
1201 : HASHCTL ctl;
1202 :
1203 40 : ctl.keysize = sizeof(Oid);
1204 40 : ctl.entrysize = sizeof(collation_cache_entry);
1205 40 : collation_cache = hash_create("Collation cache", 100, &ctl,
1206 : HASH_ELEM | HASH_BLOBS);
1207 : }
1208 :
1209 37952 : cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
1210 37952 : if (!found)
1211 : {
1212 : /*
1213 : * Make sure cache entry is marked invalid, in case we fail before
1214 : * setting things.
1215 : */
1216 236 : cache_entry->flags_valid = false;
1217 236 : cache_entry->locale = 0;
1218 : }
1219 :
1220 37952 : if (set_flags && !cache_entry->flags_valid)
1221 : {
1222 : /* Attempt to set the flags */
1223 : HeapTuple tp;
1224 : Form_pg_collation collform;
1225 :
1226 236 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1227 236 : if (!HeapTupleIsValid(tp))
1228 0 : elog(ERROR, "cache lookup failed for collation %u", collation);
1229 236 : collform = (Form_pg_collation) GETSTRUCT(tp);
1230 :
1231 236 : if (collform->collprovider == COLLPROVIDER_LIBC)
1232 : {
1233 : Datum datum;
1234 : const char *collcollate;
1235 : const char *collctype;
1236 :
1237 52 : datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
1238 52 : collcollate = TextDatumGetCString(datum);
1239 52 : datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
1240 52 : collctype = TextDatumGetCString(datum);
1241 :
1242 72 : cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
1243 20 : (strcmp(collcollate, "POSIX") == 0));
1244 72 : cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
1245 20 : (strcmp(collctype, "POSIX") == 0));
1246 : }
1247 : else
1248 : {
1249 184 : cache_entry->collate_is_c = false;
1250 184 : cache_entry->ctype_is_c = false;
1251 : }
1252 :
1253 236 : cache_entry->flags_valid = true;
1254 :
1255 236 : ReleaseSysCache(tp);
1256 : }
1257 :
1258 37952 : return cache_entry;
1259 : }
1260 :
1261 :
1262 : /*
1263 : * Detect whether collation's LC_COLLATE property is C
1264 : */
1265 : bool
1266 18416008 : lc_collate_is_c(Oid collation)
1267 : {
1268 : /*
1269 : * If we're asked about "collation 0", return false, so that the code will
1270 : * go into the non-C path and report that the collation is bogus.
1271 : */
1272 18416008 : if (!OidIsValid(collation))
1273 0 : return false;
1274 :
1275 : /*
1276 : * If we're asked about the default collation, we have to inquire of the C
1277 : * library. Cache the result so we only have to compute it once.
1278 : */
1279 18416008 : if (collation == DEFAULT_COLLATION_OID)
1280 : {
1281 : static int result = -1;
1282 : char *localeptr;
1283 :
1284 12615784 : if (default_locale.provider == COLLPROVIDER_ICU)
1285 12596404 : return false;
1286 :
1287 19380 : if (result >= 0)
1288 19350 : return (bool) result;
1289 30 : localeptr = setlocale(LC_COLLATE, NULL);
1290 30 : if (!localeptr)
1291 0 : elog(ERROR, "invalid LC_COLLATE setting");
1292 :
1293 30 : if (strcmp(localeptr, "C") == 0)
1294 16 : result = true;
1295 14 : else if (strcmp(localeptr, "POSIX") == 0)
1296 0 : result = true;
1297 : else
1298 14 : result = false;
1299 30 : return (bool) result;
1300 : }
1301 :
1302 : /*
1303 : * If we're asked about the built-in C/POSIX collations, we know that.
1304 : */
1305 5800224 : if (collation == C_COLLATION_OID ||
1306 : collation == POSIX_COLLATION_OID)
1307 5783110 : return true;
1308 :
1309 : /*
1310 : * Otherwise, we have to consult pg_collation, but we cache that.
1311 : */
1312 17114 : return (lookup_collation_cache(collation, true))->collate_is_c;
1313 : }
1314 :
1315 : /*
1316 : * Detect whether collation's LC_CTYPE property is C
1317 : */
1318 : bool
1319 5928250 : lc_ctype_is_c(Oid collation)
1320 : {
1321 : /*
1322 : * If we're asked about "collation 0", return false, so that the code will
1323 : * go into the non-C path and report that the collation is bogus.
1324 : */
1325 5928250 : if (!OidIsValid(collation))
1326 0 : return false;
1327 :
1328 : /*
1329 : * If we're asked about the default collation, we have to inquire of the C
1330 : * library. Cache the result so we only have to compute it once.
1331 : */
1332 5928250 : if (collation == DEFAULT_COLLATION_OID)
1333 : {
1334 : static int result = -1;
1335 : char *localeptr;
1336 :
1337 3132498 : if (default_locale.provider == COLLPROVIDER_ICU)
1338 3132368 : return false;
1339 :
1340 130 : if (result >= 0)
1341 114 : return (bool) result;
1342 16 : localeptr = setlocale(LC_CTYPE, NULL);
1343 16 : if (!localeptr)
1344 0 : elog(ERROR, "invalid LC_CTYPE setting");
1345 :
1346 16 : if (strcmp(localeptr, "C") == 0)
1347 10 : result = true;
1348 6 : else if (strcmp(localeptr, "POSIX") == 0)
1349 0 : result = true;
1350 : else
1351 6 : result = false;
1352 16 : return (bool) result;
1353 : }
1354 :
1355 : /*
1356 : * If we're asked about the built-in C/POSIX collations, we know that.
1357 : */
1358 2795752 : if (collation == C_COLLATION_OID ||
1359 : collation == POSIX_COLLATION_OID)
1360 2793804 : return true;
1361 :
1362 : /*
1363 : * Otherwise, we have to consult pg_collation, but we cache that.
1364 : */
1365 1948 : return (lookup_collation_cache(collation, true))->ctype_is_c;
1366 : }
1367 :
1368 : struct pg_locale_struct default_locale;
1369 :
1370 : void
1371 21404 : make_icu_collator(const char *iculocstr,
1372 : const char *icurules,
1373 : struct pg_locale_struct *resultp)
1374 : {
1375 : #ifdef USE_ICU
1376 : UCollator *collator;
1377 :
1378 21404 : collator = pg_ucol_open(iculocstr);
1379 :
1380 : /*
1381 : * If rules are specified, we extract the rules of the standard collation,
1382 : * add our own rules, and make a new collator with the combined rules.
1383 : */
1384 21400 : if (icurules)
1385 : {
1386 : const UChar *default_rules;
1387 : UChar *agg_rules;
1388 : UChar *my_rules;
1389 : UErrorCode status;
1390 : int32_t length;
1391 :
1392 12 : default_rules = ucol_getRules(collator, &length);
1393 12 : icu_to_uchar(&my_rules, icurules, strlen(icurules));
1394 :
1395 12 : agg_rules = palloc_array(UChar, u_strlen(default_rules) + u_strlen(my_rules) + 1);
1396 12 : u_strcpy(agg_rules, default_rules);
1397 12 : u_strcat(agg_rules, my_rules);
1398 :
1399 12 : ucol_close(collator);
1400 :
1401 12 : status = U_ZERO_ERROR;
1402 12 : collator = ucol_openRules(agg_rules, u_strlen(agg_rules),
1403 : UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &status);
1404 12 : if (U_FAILURE(status))
1405 6 : ereport(ERROR,
1406 : (errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
1407 : iculocstr, icurules, u_errorName(status))));
1408 : }
1409 :
1410 : /* We will leak this string if the caller errors later :-( */
1411 21394 : resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, iculocstr);
1412 21394 : resultp->info.icu.ucol = collator;
1413 : #else /* not USE_ICU */
1414 : /* could get here if a collation was created by a build with ICU */
1415 : ereport(ERROR,
1416 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1417 : errmsg("ICU is not supported in this build")));
1418 : #endif /* not USE_ICU */
1419 21394 : }
1420 :
1421 :
1422 : /* simple subroutine for reporting errors from newlocale() */
1423 : #ifdef HAVE_LOCALE_T
1424 : static void
1425 0 : report_newlocale_failure(const char *localename)
1426 : {
1427 : int save_errno;
1428 :
1429 : /*
1430 : * Windows doesn't provide any useful error indication from
1431 : * _create_locale(), and BSD-derived platforms don't seem to feel they
1432 : * need to set errno either (even though POSIX is pretty clear that
1433 : * newlocale should do so). So, if errno hasn't been set, assume ENOENT
1434 : * is what to report.
1435 : */
1436 0 : if (errno == 0)
1437 0 : errno = ENOENT;
1438 :
1439 : /*
1440 : * ENOENT means "no such locale", not "no such file", so clarify that
1441 : * errno with an errdetail message.
1442 : */
1443 0 : save_errno = errno; /* auxiliary funcs might change errno */
1444 0 : ereport(ERROR,
1445 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1446 : errmsg("could not create locale \"%s\": %m",
1447 : localename),
1448 : (save_errno == ENOENT ?
1449 : errdetail("The operating system could not find any locale data for the locale name \"%s\".",
1450 : localename) : 0)));
1451 : }
1452 : #endif /* HAVE_LOCALE_T */
1453 :
1454 : bool
1455 10748630 : pg_locale_deterministic(pg_locale_t locale)
1456 : {
1457 : /* default locale must always be deterministic */
1458 10748630 : if (locale == NULL)
1459 586802 : return true;
1460 : else
1461 10161828 : return locale->deterministic;
1462 : }
1463 :
1464 : /*
1465 : * Create a locale_t from a collation OID. Results are cached for the
1466 : * lifetime of the backend. Thus, do not free the result with freelocale().
1467 : *
1468 : * As a special optimization, the default/database collation returns 0.
1469 : * Callers should then revert to the non-locale_t-enabled code path.
1470 : * Also, callers should avoid calling this before going down a C/POSIX
1471 : * fastpath, because such a fastpath should work even on platforms without
1472 : * locale_t support in the C library.
1473 : *
1474 : * For simplicity, we always generate COLLATE + CTYPE even though we
1475 : * might only need one of them. Since this is called only once per session,
1476 : * it shouldn't cost much.
1477 : */
1478 : pg_locale_t
1479 15755414 : pg_newlocale_from_collation(Oid collid)
1480 : {
1481 : collation_cache_entry *cache_entry;
1482 :
1483 : /* Callers must pass a valid OID */
1484 : Assert(OidIsValid(collid));
1485 :
1486 15755414 : if (collid == DEFAULT_COLLATION_OID)
1487 : {
1488 15736524 : if (default_locale.provider == COLLPROVIDER_ICU)
1489 15723866 : return &default_locale;
1490 : else
1491 12658 : return (pg_locale_t) 0;
1492 : }
1493 :
1494 18890 : cache_entry = lookup_collation_cache(collid, false);
1495 :
1496 18890 : if (cache_entry->locale == 0)
1497 : {
1498 : /* We haven't computed this yet in this session, so do it */
1499 : HeapTuple tp;
1500 : Form_pg_collation collform;
1501 : struct pg_locale_struct result;
1502 : pg_locale_t resultp;
1503 : Datum datum;
1504 : bool isnull;
1505 :
1506 184 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1507 184 : if (!HeapTupleIsValid(tp))
1508 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
1509 184 : collform = (Form_pg_collation) GETSTRUCT(tp);
1510 :
1511 : /* We'll fill in the result struct locally before allocating memory */
1512 184 : memset(&result, 0, sizeof(result));
1513 184 : result.provider = collform->collprovider;
1514 184 : result.deterministic = collform->collisdeterministic;
1515 :
1516 184 : if (collform->collprovider == COLLPROVIDER_LIBC)
1517 : {
1518 : #ifdef HAVE_LOCALE_T
1519 : const char *collcollate;
1520 : const char *collctype pg_attribute_unused();
1521 : locale_t loc;
1522 :
1523 0 : datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
1524 0 : collcollate = TextDatumGetCString(datum);
1525 0 : datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
1526 0 : collctype = TextDatumGetCString(datum);
1527 :
1528 0 : if (strcmp(collcollate, collctype) == 0)
1529 : {
1530 : /* Normal case where they're the same */
1531 0 : errno = 0;
1532 : #ifndef WIN32
1533 0 : loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
1534 : NULL);
1535 : #else
1536 : loc = _create_locale(LC_ALL, collcollate);
1537 : #endif
1538 0 : if (!loc)
1539 0 : report_newlocale_failure(collcollate);
1540 : }
1541 : else
1542 : {
1543 : #ifndef WIN32
1544 : /* We need two newlocale() steps */
1545 : locale_t loc1;
1546 :
1547 0 : errno = 0;
1548 0 : loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
1549 0 : if (!loc1)
1550 0 : report_newlocale_failure(collcollate);
1551 0 : errno = 0;
1552 0 : loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
1553 0 : if (!loc)
1554 0 : report_newlocale_failure(collctype);
1555 : #else
1556 :
1557 : /*
1558 : * XXX The _create_locale() API doesn't appear to support
1559 : * this. Could perhaps be worked around by changing
1560 : * pg_locale_t to contain two separate fields.
1561 : */
1562 : ereport(ERROR,
1563 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1564 : errmsg("collations with different collate and ctype values are not supported on this platform")));
1565 : #endif
1566 : }
1567 :
1568 0 : result.info.lt = loc;
1569 : #else /* not HAVE_LOCALE_T */
1570 : /* platform that doesn't support locale_t */
1571 : ereport(ERROR,
1572 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1573 : errmsg("collation provider LIBC is not supported on this platform")));
1574 : #endif /* not HAVE_LOCALE_T */
1575 : }
1576 184 : else if (collform->collprovider == COLLPROVIDER_ICU)
1577 : {
1578 : const char *iculocstr;
1579 : const char *icurules;
1580 :
1581 184 : datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colliculocale);
1582 184 : iculocstr = TextDatumGetCString(datum);
1583 :
1584 184 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
1585 184 : if (!isnull)
1586 12 : icurules = TextDatumGetCString(datum);
1587 : else
1588 172 : icurules = NULL;
1589 :
1590 184 : make_icu_collator(iculocstr, icurules, &result);
1591 : }
1592 :
1593 178 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1594 : &isnull);
1595 178 : if (!isnull)
1596 : {
1597 : char *actual_versionstr;
1598 : char *collversionstr;
1599 :
1600 178 : collversionstr = TextDatumGetCString(datum);
1601 :
1602 178 : datum = SysCacheGetAttrNotNull(COLLOID, tp, collform->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate);
1603 :
1604 178 : actual_versionstr = get_collation_actual_version(collform->collprovider,
1605 178 : TextDatumGetCString(datum));
1606 178 : if (!actual_versionstr)
1607 : {
1608 : /*
1609 : * This could happen when specifying a version in CREATE
1610 : * COLLATION but the provider does not support versioning, or
1611 : * manually creating a mess in the catalogs.
1612 : */
1613 0 : ereport(ERROR,
1614 : (errmsg("collation \"%s\" has no actual version, but a version was recorded",
1615 : NameStr(collform->collname))));
1616 : }
1617 :
1618 178 : if (strcmp(actual_versionstr, collversionstr) != 0)
1619 0 : ereport(WARNING,
1620 : (errmsg("collation \"%s\" has version mismatch",
1621 : NameStr(collform->collname)),
1622 : errdetail("The collation in the database was created using version %s, "
1623 : "but the operating system provides version %s.",
1624 : collversionstr, actual_versionstr),
1625 : errhint("Rebuild all objects affected by this collation and run "
1626 : "ALTER COLLATION %s REFRESH VERSION, "
1627 : "or build PostgreSQL with the right library version.",
1628 : quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1629 : NameStr(collform->collname)))));
1630 : }
1631 :
1632 178 : ReleaseSysCache(tp);
1633 :
1634 : /* We'll keep the pg_locale_t structures in TopMemoryContext */
1635 178 : resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp));
1636 178 : *resultp = result;
1637 :
1638 178 : cache_entry->locale = resultp;
1639 : }
1640 :
1641 18884 : return cache_entry->locale;
1642 : }
1643 :
1644 : /*
1645 : * Get provider-specific collation version string for the given collation from
1646 : * the operating system/library.
1647 : */
1648 : char *
1649 497654 : get_collation_actual_version(char collprovider, const char *collcollate)
1650 : {
1651 497654 : char *collversion = NULL;
1652 :
1653 : #ifdef USE_ICU
1654 497654 : if (collprovider == COLLPROVIDER_ICU)
1655 : {
1656 : UCollator *collator;
1657 : UVersionInfo versioninfo;
1658 : char buf[U_MAX_VERSION_STRING_LENGTH];
1659 :
1660 495040 : collator = pg_ucol_open(collcollate);
1661 :
1662 495040 : ucol_getVersion(collator, versioninfo);
1663 495040 : ucol_close(collator);
1664 :
1665 495040 : u_versionToString(versioninfo, buf);
1666 495040 : collversion = pstrdup(buf);
1667 : }
1668 : else
1669 : #endif
1670 5228 : if (collprovider == COLLPROVIDER_LIBC &&
1671 5162 : pg_strcasecmp("C", collcollate) != 0 &&
1672 3892 : pg_strncasecmp("C.", collcollate, 2) != 0 &&
1673 1344 : pg_strcasecmp("POSIX", collcollate) != 0)
1674 : {
1675 : #if defined(__GLIBC__)
1676 : /* Use the glibc version because we don't have anything better. */
1677 1320 : collversion = pstrdup(gnu_get_libc_version());
1678 : #elif defined(LC_VERSION_MASK)
1679 : locale_t loc;
1680 :
1681 : /* Look up FreeBSD collation version. */
1682 : loc = newlocale(LC_COLLATE, collcollate, NULL);
1683 : if (loc)
1684 : {
1685 : collversion =
1686 : pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
1687 : freelocale(loc);
1688 : }
1689 : else
1690 : ereport(ERROR,
1691 : (errmsg("could not load locale \"%s\"", collcollate)));
1692 : #elif defined(WIN32)
1693 : /*
1694 : * If we are targeting Windows Vista and above, we can ask for a name
1695 : * given a collation name (earlier versions required a location code
1696 : * that we don't have).
1697 : */
1698 : NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
1699 : WCHAR wide_collcollate[LOCALE_NAME_MAX_LENGTH];
1700 :
1701 : MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
1702 : LOCALE_NAME_MAX_LENGTH);
1703 : if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
1704 : {
1705 : /*
1706 : * GetNLSVersionEx() wants a language tag such as "en-US", not a
1707 : * locale name like "English_United States.1252". Until those
1708 : * values can be prevented from entering the system, or 100%
1709 : * reliably converted to the more useful tag format, tolerate the
1710 : * resulting error and report that we have no version data.
1711 : */
1712 : if (GetLastError() == ERROR_INVALID_PARAMETER)
1713 : return NULL;
1714 :
1715 : ereport(ERROR,
1716 : (errmsg("could not get collation version for locale \"%s\": error code %lu",
1717 : collcollate,
1718 : GetLastError())));
1719 : }
1720 : collversion = psprintf("%lu.%lu,%lu.%lu",
1721 : (version.dwNLSVersion >> 8) & 0xFFFF,
1722 : version.dwNLSVersion & 0xFF,
1723 : (version.dwDefinedVersion >> 8) & 0xFFFF,
1724 : version.dwDefinedVersion & 0xFF);
1725 : #endif
1726 : }
1727 :
1728 497654 : return collversion;
1729 : }
1730 :
1731 : /*
1732 : * pg_strncoll_libc_win32_utf8
1733 : *
1734 : * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
1735 : * invoke wcscoll() or wcscoll_l().
1736 : */
1737 : #ifdef WIN32
1738 : static int
1739 : pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
1740 : size_t len2, pg_locale_t locale)
1741 : {
1742 : char sbuf[TEXTBUFLEN];
1743 : char *buf = sbuf;
1744 : char *a1p,
1745 : *a2p;
1746 : int a1len = len1 * 2 + 2;
1747 : int a2len = len2 * 2 + 2;
1748 : int r;
1749 : int result;
1750 :
1751 : Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1752 : Assert(GetDatabaseEncoding() == PG_UTF8);
1753 : #ifndef WIN32
1754 : Assert(false);
1755 : #endif
1756 :
1757 : if (a1len + a2len > TEXTBUFLEN)
1758 : buf = palloc(a1len + a2len);
1759 :
1760 : a1p = buf;
1761 : a2p = buf + a1len;
1762 :
1763 : /* API does not work for zero-length input */
1764 : if (len1 == 0)
1765 : r = 0;
1766 : else
1767 : {
1768 : r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1769 : (LPWSTR) a1p, a1len / 2);
1770 : if (!r)
1771 : ereport(ERROR,
1772 : (errmsg("could not convert string to UTF-16: error code %lu",
1773 : GetLastError())));
1774 : }
1775 : ((LPWSTR) a1p)[r] = 0;
1776 :
1777 : if (len2 == 0)
1778 : r = 0;
1779 : else
1780 : {
1781 : r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1782 : (LPWSTR) a2p, a2len / 2);
1783 : if (!r)
1784 : ereport(ERROR,
1785 : (errmsg("could not convert string to UTF-16: error code %lu",
1786 : GetLastError())));
1787 : }
1788 : ((LPWSTR) a2p)[r] = 0;
1789 :
1790 : errno = 0;
1791 : #ifdef HAVE_LOCALE_T
1792 : if (locale)
1793 : result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
1794 : else
1795 : #endif
1796 : result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1797 : if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */
1798 : ereport(ERROR,
1799 : (errmsg("could not compare Unicode strings: %m")));
1800 :
1801 : if (buf != sbuf)
1802 : pfree(buf);
1803 :
1804 : return result;
1805 : }
1806 : #endif /* WIN32 */
1807 :
1808 : /*
1809 : * pg_strcoll_libc
1810 : *
1811 : * Call strcoll(), strcoll_l(), wcscoll(), or wcscoll_l() as appropriate for
1812 : * the given locale, platform, and database encoding. If the locale is NULL,
1813 : * use the database collation.
1814 : *
1815 : * Arguments must be encoded in the database encoding and nul-terminated.
1816 : */
1817 : static int
1818 0 : pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
1819 : {
1820 : int result;
1821 :
1822 : Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1823 : #ifdef WIN32
1824 : if (GetDatabaseEncoding() == PG_UTF8)
1825 : {
1826 : size_t len1 = strlen(arg1);
1827 : size_t len2 = strlen(arg2);
1828 :
1829 : result = pg_strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale);
1830 : }
1831 : else
1832 : #endif /* WIN32 */
1833 0 : if (locale)
1834 : {
1835 : #ifdef HAVE_LOCALE_T
1836 0 : result = strcoll_l(arg1, arg2, locale->info.lt);
1837 : #else
1838 : /* shouldn't happen */
1839 : elog(ERROR, "unsupported collprovider: %c", locale->provider);
1840 : #endif
1841 : }
1842 : else
1843 0 : result = strcoll(arg1, arg2);
1844 :
1845 0 : return result;
1846 : }
1847 :
1848 : /*
1849 : * pg_strncoll_libc
1850 : *
1851 : * Nul-terminate the arguments and call pg_strcoll_libc().
1852 : */
1853 : static int
1854 0 : pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2,
1855 : pg_locale_t locale)
1856 : {
1857 : char sbuf[TEXTBUFLEN];
1858 0 : char *buf = sbuf;
1859 0 : size_t bufsize1 = len1 + 1;
1860 0 : size_t bufsize2 = len2 + 1;
1861 : char *arg1n;
1862 : char *arg2n;
1863 : int result;
1864 :
1865 : Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1866 :
1867 : #ifdef WIN32
1868 : /* check for this case before doing the work for nul-termination */
1869 : if (GetDatabaseEncoding() == PG_UTF8)
1870 : return pg_strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale);
1871 : #endif /* WIN32 */
1872 :
1873 0 : if (bufsize1 + bufsize2 > TEXTBUFLEN)
1874 0 : buf = palloc(bufsize1 + bufsize2);
1875 :
1876 0 : arg1n = buf;
1877 0 : arg2n = buf + bufsize1;
1878 :
1879 : /* nul-terminate arguments */
1880 0 : memcpy(arg1n, arg1, len1);
1881 0 : arg1n[len1] = '\0';
1882 0 : memcpy(arg2n, arg2, len2);
1883 0 : arg2n[len2] = '\0';
1884 :
1885 0 : result = pg_strcoll_libc(arg1n, arg2n, locale);
1886 :
1887 0 : if (buf != sbuf)
1888 0 : pfree(buf);
1889 :
1890 0 : return result;
1891 : }
1892 :
1893 : #ifdef USE_ICU
1894 :
1895 : /*
1896 : * pg_strncoll_icu_no_utf8
1897 : *
1898 : * Convert the arguments from the database encoding to UChar strings, then
1899 : * call ucol_strcoll(). An argument length of -1 means that the string is
1900 : * NUL-terminated.
1901 : *
1902 : * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
1903 : * caller should call that instead.
1904 : */
1905 : static int
1906 0 : pg_strncoll_icu_no_utf8(const char *arg1, int32_t len1,
1907 : const char *arg2, int32_t len2, pg_locale_t locale)
1908 : {
1909 : char sbuf[TEXTBUFLEN];
1910 0 : char *buf = sbuf;
1911 : int32_t ulen1;
1912 : int32_t ulen2;
1913 : size_t bufsize1;
1914 : size_t bufsize2;
1915 : UChar *uchar1,
1916 : *uchar2;
1917 : int result;
1918 :
1919 : Assert(locale->provider == COLLPROVIDER_ICU);
1920 : #ifdef HAVE_UCOL_STRCOLLUTF8
1921 : Assert(GetDatabaseEncoding() != PG_UTF8);
1922 : #endif
1923 :
1924 0 : init_icu_converter();
1925 :
1926 0 : ulen1 = uchar_length(icu_converter, arg1, len1);
1927 0 : ulen2 = uchar_length(icu_converter, arg2, len2);
1928 :
1929 0 : bufsize1 = (ulen1 + 1) * sizeof(UChar);
1930 0 : bufsize2 = (ulen2 + 1) * sizeof(UChar);
1931 :
1932 0 : if (bufsize1 + bufsize2 > TEXTBUFLEN)
1933 0 : buf = palloc(bufsize1 + bufsize2);
1934 :
1935 0 : uchar1 = (UChar *) buf;
1936 0 : uchar2 = (UChar *) (buf + bufsize1);
1937 :
1938 0 : ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
1939 0 : ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
1940 :
1941 0 : result = ucol_strcoll(locale->info.icu.ucol,
1942 : uchar1, ulen1,
1943 : uchar2, ulen2);
1944 :
1945 0 : if (buf != sbuf)
1946 0 : pfree(buf);
1947 :
1948 0 : return result;
1949 : }
1950 :
1951 : /*
1952 : * pg_strncoll_icu
1953 : *
1954 : * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
1955 : * database encoding. An argument length of -1 means the string is
1956 : * NUL-terminated.
1957 : *
1958 : * Arguments must be encoded in the database encoding.
1959 : */
1960 : static int
1961 24046218 : pg_strncoll_icu(const char *arg1, int32_t len1, const char *arg2, int32_t len2,
1962 : pg_locale_t locale)
1963 : {
1964 : int result;
1965 :
1966 : Assert(locale->provider == COLLPROVIDER_ICU);
1967 :
1968 : #ifdef HAVE_UCOL_STRCOLLUTF8
1969 24046218 : if (GetDatabaseEncoding() == PG_UTF8)
1970 : {
1971 : UErrorCode status;
1972 :
1973 24046218 : status = U_ZERO_ERROR;
1974 24046218 : result = ucol_strcollUTF8(locale->info.icu.ucol,
1975 : arg1, len1,
1976 : arg2, len2,
1977 : &status);
1978 24046218 : if (U_FAILURE(status))
1979 0 : ereport(ERROR,
1980 : (errmsg("collation failed: %s", u_errorName(status))));
1981 : }
1982 : else
1983 : #endif
1984 : {
1985 0 : result = pg_strncoll_icu_no_utf8(arg1, len1, arg2, len2, locale);
1986 : }
1987 :
1988 24046218 : return result;
1989 : }
1990 :
1991 : #endif /* USE_ICU */
1992 :
1993 : /*
1994 : * pg_strcoll
1995 : *
1996 : * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll(), strcoll_l(), wcscoll(),
1997 : * or wcscoll_l() as appropriate for the given locale, platform, and database
1998 : * encoding. If the locale is not specified, use the database collation.
1999 : *
2000 : * Arguments must be encoded in the database encoding and nul-terminated.
2001 : *
2002 : * The caller is responsible for breaking ties if the collation is
2003 : * deterministic; this maintains consistency with pg_strxfrm(), which cannot
2004 : * easily account for deterministic collations.
2005 : */
2006 : int
2007 21416978 : pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
2008 : {
2009 : int result;
2010 :
2011 21416978 : if (!locale || locale->provider == COLLPROVIDER_LIBC)
2012 0 : result = pg_strcoll_libc(arg1, arg2, locale);
2013 : #ifdef USE_ICU
2014 21416978 : else if (locale->provider == COLLPROVIDER_ICU)
2015 21416978 : result = pg_strncoll_icu(arg1, -1, arg2, -1, locale);
2016 : #endif
2017 : else
2018 : /* shouldn't happen */
2019 0 : elog(ERROR, "unsupported collprovider: %c", locale->provider);
2020 :
2021 21416978 : return result;
2022 : }
2023 :
2024 : /*
2025 : * pg_strncoll
2026 : *
2027 : * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll(), strcoll_l(), wcscoll(),
2028 : * or wcscoll_l() as appropriate for the given locale, platform, and database
2029 : * encoding. If the locale is not specified, use the database collation.
2030 : *
2031 : * Arguments must be encoded in the database encoding.
2032 : *
2033 : * This function may need to nul-terminate the arguments for libc functions;
2034 : * so if the caller already has nul-terminated strings, it should call
2035 : * pg_strcoll() instead.
2036 : *
2037 : * The caller is responsible for breaking ties if the collation is
2038 : * deterministic; this maintains consistency with pg_strnxfrm(), which cannot
2039 : * easily account for deterministic collations.
2040 : */
2041 : int
2042 2629240 : pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2,
2043 : pg_locale_t locale)
2044 : {
2045 : int result;
2046 :
2047 2629240 : if (!locale || locale->provider == COLLPROVIDER_LIBC)
2048 0 : result = pg_strncoll_libc(arg1, len1, arg2, len2, locale);
2049 : #ifdef USE_ICU
2050 2629240 : else if (locale->provider == COLLPROVIDER_ICU)
2051 2629240 : result = pg_strncoll_icu(arg1, len1, arg2, len2, locale);
2052 : #endif
2053 : else
2054 : /* shouldn't happen */
2055 0 : elog(ERROR, "unsupported collprovider: %c", locale->provider);
2056 :
2057 2629240 : return result;
2058 : }
2059 :
2060 :
2061 : static size_t
2062 0 : pg_strxfrm_libc(char *dest, const char *src, size_t destsize,
2063 : pg_locale_t locale)
2064 : {
2065 : Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
2066 :
2067 : #ifdef TRUST_STRXFRM
2068 : #ifdef HAVE_LOCALE_T
2069 : if (locale)
2070 : return strxfrm_l(dest, src, destsize, locale->info.lt);
2071 : else
2072 : #endif
2073 : return strxfrm(dest, src, destsize);
2074 : #else
2075 : /* shouldn't happen */
2076 0 : elog(ERROR, "unsupported collprovider: %c", locale->provider);
2077 : return 0; /* keep compiler quiet */
2078 : #endif
2079 : }
2080 :
2081 : static size_t
2082 0 : pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize,
2083 : pg_locale_t locale)
2084 : {
2085 : char sbuf[TEXTBUFLEN];
2086 0 : char *buf = sbuf;
2087 0 : size_t bufsize = srclen + 1;
2088 : size_t result;
2089 :
2090 : Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
2091 :
2092 0 : if (bufsize > TEXTBUFLEN)
2093 0 : buf = palloc(bufsize);
2094 :
2095 : /* nul-terminate arguments */
2096 0 : memcpy(buf, src, srclen);
2097 0 : buf[srclen] = '\0';
2098 :
2099 0 : result = pg_strxfrm_libc(dest, buf, destsize, locale);
2100 :
2101 0 : if (buf != sbuf)
2102 0 : pfree(buf);
2103 :
2104 : /* if dest is defined, it should be nul-terminated */
2105 : Assert(result >= destsize || dest[result] == '\0');
2106 :
2107 0 : return result;
2108 : }
2109 :
2110 : #ifdef USE_ICU
2111 :
2112 : /* 'srclen' of -1 means the strings are NUL-terminated */
2113 : static size_t
2114 996 : pg_strnxfrm_icu(char *dest, const char *src, int32_t srclen, int32_t destsize,
2115 : pg_locale_t locale)
2116 : {
2117 : char sbuf[TEXTBUFLEN];
2118 996 : char *buf = sbuf;
2119 : UChar *uchar;
2120 : int32_t ulen;
2121 : size_t uchar_bsize;
2122 : Size result_bsize;
2123 :
2124 : Assert(locale->provider == COLLPROVIDER_ICU);
2125 :
2126 996 : init_icu_converter();
2127 :
2128 996 : ulen = uchar_length(icu_converter, src, srclen);
2129 :
2130 996 : uchar_bsize = (ulen + 1) * sizeof(UChar);
2131 :
2132 996 : if (uchar_bsize > TEXTBUFLEN)
2133 0 : buf = palloc(uchar_bsize);
2134 :
2135 996 : uchar = (UChar *) buf;
2136 :
2137 996 : ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
2138 :
2139 996 : result_bsize = ucol_getSortKey(locale->info.icu.ucol,
2140 : uchar, ulen,
2141 : (uint8_t *) dest, destsize);
2142 :
2143 : /*
2144 : * ucol_getSortKey() counts the nul-terminator in the result length, but
2145 : * this function should not.
2146 : */
2147 : Assert(result_bsize > 0);
2148 996 : result_bsize--;
2149 :
2150 996 : if (buf != sbuf)
2151 0 : pfree(buf);
2152 :
2153 : /* if dest is defined, it should be nul-terminated */
2154 : Assert(result_bsize >= destsize || dest[result_bsize] == '\0');
2155 :
2156 996 : return result_bsize;
2157 : }
2158 :
2159 : /* 'srclen' of -1 means the strings are NUL-terminated */
2160 : static size_t
2161 0 : pg_strnxfrm_prefix_icu_no_utf8(char *dest, const char *src, int32_t srclen,
2162 : int32_t destsize, pg_locale_t locale)
2163 : {
2164 : char sbuf[TEXTBUFLEN];
2165 0 : char *buf = sbuf;
2166 : UCharIterator iter;
2167 : uint32_t state[2];
2168 : UErrorCode status;
2169 0 : int32_t ulen = -1;
2170 0 : UChar *uchar = NULL;
2171 : size_t uchar_bsize;
2172 : Size result_bsize;
2173 :
2174 : Assert(locale->provider == COLLPROVIDER_ICU);
2175 : Assert(GetDatabaseEncoding() != PG_UTF8);
2176 :
2177 0 : init_icu_converter();
2178 :
2179 0 : ulen = uchar_length(icu_converter, src, srclen);
2180 :
2181 0 : uchar_bsize = (ulen + 1) * sizeof(UChar);
2182 :
2183 0 : if (uchar_bsize > TEXTBUFLEN)
2184 0 : buf = palloc(uchar_bsize);
2185 :
2186 0 : uchar = (UChar *) buf;
2187 :
2188 0 : ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
2189 :
2190 0 : uiter_setString(&iter, uchar, ulen);
2191 0 : state[0] = state[1] = 0; /* won't need that again */
2192 0 : status = U_ZERO_ERROR;
2193 0 : result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol,
2194 : &iter,
2195 : state,
2196 : (uint8_t *) dest,
2197 : destsize,
2198 : &status);
2199 0 : if (U_FAILURE(status))
2200 0 : ereport(ERROR,
2201 : (errmsg("sort key generation failed: %s",
2202 : u_errorName(status))));
2203 :
2204 0 : return result_bsize;
2205 : }
2206 :
2207 : /* 'srclen' of -1 means the strings are NUL-terminated */
2208 : static size_t
2209 333522 : pg_strnxfrm_prefix_icu(char *dest, const char *src, int32_t srclen,
2210 : int32_t destsize, pg_locale_t locale)
2211 : {
2212 : size_t result;
2213 :
2214 : Assert(locale->provider == COLLPROVIDER_ICU);
2215 :
2216 333522 : if (GetDatabaseEncoding() == PG_UTF8)
2217 : {
2218 : UCharIterator iter;
2219 : uint32_t state[2];
2220 : UErrorCode status;
2221 :
2222 333522 : uiter_setUTF8(&iter, src, srclen);
2223 333522 : state[0] = state[1] = 0; /* won't need that again */
2224 333522 : status = U_ZERO_ERROR;
2225 333522 : result = ucol_nextSortKeyPart(locale->info.icu.ucol,
2226 : &iter,
2227 : state,
2228 : (uint8_t *) dest,
2229 : destsize,
2230 : &status);
2231 333522 : if (U_FAILURE(status))
2232 0 : ereport(ERROR,
2233 : (errmsg("sort key generation failed: %s",
2234 : u_errorName(status))));
2235 : }
2236 : else
2237 0 : result = pg_strnxfrm_prefix_icu_no_utf8(dest, src, srclen, destsize,
2238 : locale);
2239 :
2240 333522 : return result;
2241 : }
2242 :
2243 : #endif
2244 :
2245 : /*
2246 : * Return true if the collation provider supports pg_strxfrm() and
2247 : * pg_strnxfrm(); otherwise false.
2248 : *
2249 : * Unfortunately, it seems that strxfrm() for non-C collations is broken on
2250 : * many common platforms; testing of multiple versions of glibc reveals that,
2251 : * for many locales, strcoll() and strxfrm() do not return consistent
2252 : * results. While no other libc other than Cygwin has so far been shown to
2253 : * have a problem, we take the conservative course of action for right now and
2254 : * disable this categorically. (Users who are certain this isn't a problem on
2255 : * their system can define TRUST_STRXFRM.)
2256 : *
2257 : * No similar problem is known for the ICU provider.
2258 : */
2259 : bool
2260 51310 : pg_strxfrm_enabled(pg_locale_t locale)
2261 : {
2262 51310 : if (!locale || locale->provider == COLLPROVIDER_LIBC)
2263 : #ifdef TRUST_STRXFRM
2264 : return true;
2265 : #else
2266 0 : return false;
2267 : #endif
2268 51310 : else if (locale->provider == COLLPROVIDER_ICU)
2269 51310 : return true;
2270 : else
2271 : /* shouldn't happen */
2272 0 : elog(ERROR, "unsupported collprovider: %c", locale->provider);
2273 :
2274 : return false; /* keep compiler quiet */
2275 : }
2276 :
2277 : /*
2278 : * pg_strxfrm
2279 : *
2280 : * Transforms 'src' to a nul-terminated string stored in 'dest' such that
2281 : * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on
2282 : * untransformed strings.
2283 : *
2284 : * The provided 'src' must be nul-terminated. If 'destsize' is zero, 'dest'
2285 : * may be NULL.
2286 : *
2287 : * Returns the number of bytes needed to store the transformed string,
2288 : * excluding the terminating nul byte. If the value returned is 'destsize' or
2289 : * greater, the resulting contents of 'dest' are undefined.
2290 : */
2291 : size_t
2292 0 : pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
2293 : {
2294 0 : size_t result = 0; /* keep compiler quiet */
2295 :
2296 0 : if (!locale || locale->provider == COLLPROVIDER_LIBC)
2297 0 : result = pg_strxfrm_libc(dest, src, destsize, locale);
2298 : #ifdef USE_ICU
2299 0 : else if (locale->provider == COLLPROVIDER_ICU)
2300 0 : result = pg_strnxfrm_icu(dest, src, -1, destsize, locale);
2301 : #endif
2302 : else
2303 : /* shouldn't happen */
2304 0 : elog(ERROR, "unsupported collprovider: %c", locale->provider);
2305 :
2306 0 : return result;
2307 : }
2308 :
2309 : /*
2310 : * pg_strnxfrm
2311 : *
2312 : * Transforms 'src' to a nul-terminated string stored in 'dest' such that
2313 : * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on
2314 : * untransformed strings.
2315 : *
2316 : * 'src' does not need to be nul-terminated. If 'destsize' is zero, 'dest' may
2317 : * be NULL.
2318 : *
2319 : * Returns the number of bytes needed to store the transformed string,
2320 : * excluding the terminating nul byte. If the value returned is 'destsize' or
2321 : * greater, the resulting contents of 'dest' are undefined.
2322 : *
2323 : * This function may need to nul-terminate the argument for libc functions;
2324 : * so if the caller already has a nul-terminated string, it should call
2325 : * pg_strxfrm() instead.
2326 : */
2327 : size_t
2328 996 : pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen,
2329 : pg_locale_t locale)
2330 : {
2331 996 : size_t result = 0; /* keep compiler quiet */
2332 :
2333 996 : if (!locale || locale->provider == COLLPROVIDER_LIBC)
2334 0 : result = pg_strnxfrm_libc(dest, src, srclen, destsize, locale);
2335 : #ifdef USE_ICU
2336 996 : else if (locale->provider == COLLPROVIDER_ICU)
2337 996 : result = pg_strnxfrm_icu(dest, src, srclen, destsize, locale);
2338 : #endif
2339 : else
2340 : /* shouldn't happen */
2341 0 : elog(ERROR, "unsupported collprovider: %c", locale->provider);
2342 :
2343 996 : return result;
2344 : }
2345 :
2346 : /*
2347 : * Return true if the collation provider supports pg_strxfrm_prefix() and
2348 : * pg_strnxfrm_prefix(); otherwise false.
2349 : */
2350 : bool
2351 333522 : pg_strxfrm_prefix_enabled(pg_locale_t locale)
2352 : {
2353 333522 : if (!locale || locale->provider == COLLPROVIDER_LIBC)
2354 0 : return false;
2355 333522 : else if (locale->provider == COLLPROVIDER_ICU)
2356 333522 : return true;
2357 : else
2358 : /* shouldn't happen */
2359 0 : elog(ERROR, "unsupported collprovider: %c", locale->provider);
2360 :
2361 : return false; /* keep compiler quiet */
2362 : }
2363 :
2364 : /*
2365 : * pg_strxfrm_prefix
2366 : *
2367 : * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary
2368 : * memcmp() on the byte sequence is equivalent to pg_strcoll() on
2369 : * untransformed strings. The result is not nul-terminated.
2370 : *
2371 : * The provided 'src' must be nul-terminated.
2372 : *
2373 : * If destsize is not large enough to hold the resulting byte sequence, stores
2374 : * only the first destsize bytes in 'dest'. Returns the number of bytes
2375 : * actually copied to 'dest'.
2376 : */
2377 : size_t
2378 333522 : pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
2379 : pg_locale_t locale)
2380 : {
2381 333522 : size_t result = 0; /* keep compiler quiet */
2382 :
2383 333522 : if (!locale || locale->provider == COLLPROVIDER_LIBC)
2384 0 : elog(ERROR, "collprovider '%c' does not support pg_strxfrm_prefix()",
2385 : locale->provider);
2386 : #ifdef USE_ICU
2387 333522 : else if (locale->provider == COLLPROVIDER_ICU)
2388 333522 : result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
2389 : #endif
2390 : else
2391 : /* shouldn't happen */
2392 0 : elog(ERROR, "unsupported collprovider: %c", locale->provider);
2393 :
2394 333522 : return result;
2395 : }
2396 :
2397 : /*
2398 : * pg_strnxfrm_prefix
2399 : *
2400 : * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary
2401 : * memcmp() on the byte sequence is equivalent to pg_strcoll() on
2402 : * untransformed strings. The result is not nul-terminated.
2403 : *
2404 : * The provided 'src' must be nul-terminated.
2405 : *
2406 : * If destsize is not large enough to hold the resulting byte sequence, stores
2407 : * only the first destsize bytes in 'dest'. Returns the number of bytes
2408 : * actually copied to 'dest'.
2409 : *
2410 : * This function may need to nul-terminate the argument for libc functions;
2411 : * so if the caller already has a nul-terminated string, it should call
2412 : * pg_strxfrm_prefix() instead.
2413 : */
2414 : size_t
2415 0 : pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
2416 : size_t srclen, pg_locale_t locale)
2417 : {
2418 0 : size_t result = 0; /* keep compiler quiet */
2419 :
2420 0 : if (!locale || locale->provider == COLLPROVIDER_LIBC)
2421 0 : elog(ERROR, "collprovider '%c' does not support pg_strnxfrm_prefix()",
2422 : locale->provider);
2423 : #ifdef USE_ICU
2424 0 : else if (locale->provider == COLLPROVIDER_ICU)
2425 0 : result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
2426 : #endif
2427 : else
2428 : /* shouldn't happen */
2429 0 : elog(ERROR, "unsupported collprovider: %c", locale->provider);
2430 :
2431 0 : return result;
2432 : }
2433 :
2434 : #ifdef USE_ICU
2435 :
2436 : /*
2437 : * Wrapper around ucol_open() to handle API differences for older ICU
2438 : * versions.
2439 : */
2440 : static UCollator *
2441 518122 : pg_ucol_open(const char *loc_str)
2442 : {
2443 : UCollator *collator;
2444 : UErrorCode status;
2445 518122 : const char *orig_str = loc_str;
2446 518122 : char *fixed_str = NULL;
2447 :
2448 : /*
2449 : * Must never open default collator, because it depends on the environment
2450 : * and may change at any time. Should not happen, but check here to catch
2451 : * bugs that might be hard to catch otherwise.
2452 : *
2453 : * NB: the default collator is not the same as the collator for the root
2454 : * locale. The root locale may be specified as the empty string, "und", or
2455 : * "root". The default collator is opened by passing NULL to ucol_open().
2456 : */
2457 518122 : if (loc_str == NULL)
2458 0 : elog(ERROR, "opening default collator is not supported");
2459 :
2460 : /*
2461 : * In ICU versions 54 and earlier, "und" is not a recognized spelling of
2462 : * the root locale. If the first component of the locale is "und", replace
2463 : * with "root" before opening.
2464 : */
2465 : if (U_ICU_VERSION_MAJOR_NUM < 55)
2466 : {
2467 : char lang[ULOC_LANG_CAPACITY];
2468 :
2469 : status = U_ZERO_ERROR;
2470 : uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
2471 : if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
2472 : {
2473 : ereport(ERROR,
2474 : (errmsg("could not get language from locale \"%s\": %s",
2475 : loc_str, u_errorName(status))));
2476 : }
2477 :
2478 : if (strcmp(lang, "und") == 0)
2479 : {
2480 : const char *remainder = loc_str + strlen("und");
2481 :
2482 : fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
2483 : strcpy(fixed_str, "root");
2484 : strcat(fixed_str, remainder);
2485 :
2486 : loc_str = fixed_str;
2487 : }
2488 : }
2489 :
2490 518122 : status = U_ZERO_ERROR;
2491 518122 : collator = ucol_open(loc_str, &status);
2492 518122 : if (U_FAILURE(status))
2493 12 : ereport(ERROR,
2494 : /* use original string for error report */
2495 : (errmsg("could not open collator for locale \"%s\": %s",
2496 : orig_str, u_errorName(status))));
2497 :
2498 : if (U_ICU_VERSION_MAJOR_NUM < 54)
2499 : {
2500 : status = U_ZERO_ERROR;
2501 : icu_set_collation_attributes(collator, loc_str, &status);
2502 :
2503 : /*
2504 : * Pretend the error came from ucol_open(), for consistent error
2505 : * message across ICU versions.
2506 : */
2507 : if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
2508 : {
2509 : ucol_close(collator);
2510 : ereport(ERROR,
2511 : (errmsg("could not open collator for locale \"%s\": %s",
2512 : orig_str, u_errorName(status))));
2513 : }
2514 : }
2515 :
2516 518110 : if (fixed_str != NULL)
2517 0 : pfree(fixed_str);
2518 :
2519 518110 : return collator;
2520 : }
2521 :
2522 : static void
2523 2355312 : init_icu_converter(void)
2524 : {
2525 : const char *icu_encoding_name;
2526 : UErrorCode status;
2527 : UConverter *conv;
2528 :
2529 2355312 : if (icu_converter)
2530 2355208 : return; /* already done */
2531 :
2532 104 : icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
2533 104 : if (!icu_encoding_name)
2534 0 : ereport(ERROR,
2535 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2536 : errmsg("encoding \"%s\" not supported by ICU",
2537 : pg_encoding_to_char(GetDatabaseEncoding()))));
2538 :
2539 104 : status = U_ZERO_ERROR;
2540 104 : conv = ucnv_open(icu_encoding_name, &status);
2541 104 : if (U_FAILURE(status))
2542 0 : ereport(ERROR,
2543 : (errmsg("could not open ICU converter for encoding \"%s\": %s",
2544 : icu_encoding_name, u_errorName(status))));
2545 :
2546 104 : icu_converter = conv;
2547 : }
2548 :
2549 : /*
2550 : * Find length, in UChars, of given string if converted to UChar string.
2551 : */
2552 : static size_t
2553 1178160 : uchar_length(UConverter *converter, const char *str, int32_t len)
2554 : {
2555 1178160 : UErrorCode status = U_ZERO_ERROR;
2556 : int32_t ulen;
2557 :
2558 1178160 : ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
2559 1178160 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
2560 0 : ereport(ERROR,
2561 : (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
2562 1178160 : return ulen;
2563 : }
2564 :
2565 : /*
2566 : * Convert the given source string into a UChar string, stored in dest, and
2567 : * return the length (in UChars).
2568 : */
2569 : static int32_t
2570 1178160 : uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
2571 : const char *src, int32_t srclen)
2572 : {
2573 1178160 : UErrorCode status = U_ZERO_ERROR;
2574 : int32_t ulen;
2575 :
2576 1178160 : status = U_ZERO_ERROR;
2577 1178160 : ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
2578 1178160 : if (U_FAILURE(status))
2579 0 : ereport(ERROR,
2580 : (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
2581 1178160 : return ulen;
2582 : }
2583 :
2584 : /*
2585 : * Convert a string in the database encoding into a string of UChars.
2586 : *
2587 : * The source string at buff is of length nbytes
2588 : * (it needn't be nul-terminated)
2589 : *
2590 : * *buff_uchar receives a pointer to the palloc'd result string, and
2591 : * the function's result is the number of UChars generated.
2592 : *
2593 : * The result string is nul-terminated, though most callers rely on the
2594 : * result length instead.
2595 : */
2596 : int32_t
2597 1177164 : icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
2598 : {
2599 : int32_t len_uchar;
2600 :
2601 1177164 : init_icu_converter();
2602 :
2603 1177164 : len_uchar = uchar_length(icu_converter, buff, nbytes);
2604 :
2605 1177164 : *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
2606 1177164 : len_uchar = uchar_convert(icu_converter,
2607 : *buff_uchar, len_uchar + 1, buff, nbytes);
2608 :
2609 1177164 : return len_uchar;
2610 : }
2611 :
2612 : /*
2613 : * Convert a string of UChars into the database encoding.
2614 : *
2615 : * The source string at buff_uchar is of length len_uchar
2616 : * (it needn't be nul-terminated)
2617 : *
2618 : * *result receives a pointer to the palloc'd result string, and the
2619 : * function's result is the number of bytes generated (not counting nul).
2620 : *
2621 : * The result string is nul-terminated.
2622 : */
2623 : int32_t
2624 1177152 : icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
2625 : {
2626 : UErrorCode status;
2627 : int32_t len_result;
2628 :
2629 1177152 : init_icu_converter();
2630 :
2631 1177152 : status = U_ZERO_ERROR;
2632 1177152 : len_result = ucnv_fromUChars(icu_converter, NULL, 0,
2633 : buff_uchar, len_uchar, &status);
2634 1177152 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
2635 0 : ereport(ERROR,
2636 : (errmsg("%s failed: %s", "ucnv_fromUChars",
2637 : u_errorName(status))));
2638 :
2639 1177152 : *result = palloc(len_result + 1);
2640 :
2641 1177152 : status = U_ZERO_ERROR;
2642 1177152 : len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
2643 : buff_uchar, len_uchar, &status);
2644 1177152 : if (U_FAILURE(status) ||
2645 1177152 : status == U_STRING_NOT_TERMINATED_WARNING)
2646 0 : ereport(ERROR,
2647 : (errmsg("%s failed: %s", "ucnv_fromUChars",
2648 : u_errorName(status))));
2649 :
2650 1177152 : return len_result;
2651 : }
2652 :
2653 : /*
2654 : * Parse collation attributes from the given locale string and apply them to
2655 : * the open collator.
2656 : *
2657 : * First, the locale string is canonicalized to an ICU format locale ID such
2658 : * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
2659 : * the key-value arguments.
2660 : *
2661 : * Starting with ICU version 54, the attributes are processed automatically by
2662 : * ucol_open(), so this is only necessary for emulating this behavior on older
2663 : * versions.
2664 : */
2665 : pg_attribute_unused()
2666 : static void
2667 0 : icu_set_collation_attributes(UCollator *collator, const char *loc,
2668 : UErrorCode *status)
2669 : {
2670 : int32_t len;
2671 : char *icu_locale_id;
2672 : char *lower_str;
2673 : char *str;
2674 :
2675 : /*
2676 : * The input locale may be a BCP 47 language tag, e.g.
2677 : * "und-u-kc-ks-level1", which expresses the same attributes in a
2678 : * different form. It will be converted to the equivalent ICU format
2679 : * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
2680 : * uloc_canonicalize().
2681 : */
2682 0 : *status = U_ZERO_ERROR;
2683 0 : len = uloc_canonicalize(loc, NULL, 0, status);
2684 0 : icu_locale_id = palloc(len + 1);
2685 0 : *status = U_ZERO_ERROR;
2686 0 : len = uloc_canonicalize(loc, icu_locale_id, len + 1, status);
2687 0 : if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING)
2688 0 : return;
2689 :
2690 0 : lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
2691 :
2692 0 : pfree(icu_locale_id);
2693 :
2694 0 : str = strchr(lower_str, '@');
2695 0 : if (!str)
2696 0 : return;
2697 0 : str++;
2698 :
2699 0 : for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
2700 : {
2701 0 : char *e = strchr(token, '=');
2702 :
2703 0 : if (e)
2704 : {
2705 : char *name;
2706 : char *value;
2707 : UColAttribute uattr;
2708 : UColAttributeValue uvalue;
2709 :
2710 0 : *status = U_ZERO_ERROR;
2711 :
2712 0 : *e = '\0';
2713 0 : name = token;
2714 0 : value = e + 1;
2715 :
2716 : /*
2717 : * See attribute name and value lists in ICU i18n/coll.cpp
2718 : */
2719 0 : if (strcmp(name, "colstrength") == 0)
2720 0 : uattr = UCOL_STRENGTH;
2721 0 : else if (strcmp(name, "colbackwards") == 0)
2722 0 : uattr = UCOL_FRENCH_COLLATION;
2723 0 : else if (strcmp(name, "colcaselevel") == 0)
2724 0 : uattr = UCOL_CASE_LEVEL;
2725 0 : else if (strcmp(name, "colcasefirst") == 0)
2726 0 : uattr = UCOL_CASE_FIRST;
2727 0 : else if (strcmp(name, "colalternate") == 0)
2728 0 : uattr = UCOL_ALTERNATE_HANDLING;
2729 0 : else if (strcmp(name, "colnormalization") == 0)
2730 0 : uattr = UCOL_NORMALIZATION_MODE;
2731 0 : else if (strcmp(name, "colnumeric") == 0)
2732 0 : uattr = UCOL_NUMERIC_COLLATION;
2733 : else
2734 : /* ignore if unknown */
2735 0 : continue;
2736 :
2737 0 : if (strcmp(value, "primary") == 0)
2738 0 : uvalue = UCOL_PRIMARY;
2739 0 : else if (strcmp(value, "secondary") == 0)
2740 0 : uvalue = UCOL_SECONDARY;
2741 0 : else if (strcmp(value, "tertiary") == 0)
2742 0 : uvalue = UCOL_TERTIARY;
2743 0 : else if (strcmp(value, "quaternary") == 0)
2744 0 : uvalue = UCOL_QUATERNARY;
2745 0 : else if (strcmp(value, "identical") == 0)
2746 0 : uvalue = UCOL_IDENTICAL;
2747 0 : else if (strcmp(value, "no") == 0)
2748 0 : uvalue = UCOL_OFF;
2749 0 : else if (strcmp(value, "yes") == 0)
2750 0 : uvalue = UCOL_ON;
2751 0 : else if (strcmp(value, "shifted") == 0)
2752 0 : uvalue = UCOL_SHIFTED;
2753 0 : else if (strcmp(value, "non-ignorable") == 0)
2754 0 : uvalue = UCOL_NON_IGNORABLE;
2755 0 : else if (strcmp(value, "lower") == 0)
2756 0 : uvalue = UCOL_LOWER_FIRST;
2757 0 : else if (strcmp(value, "upper") == 0)
2758 0 : uvalue = UCOL_UPPER_FIRST;
2759 : else
2760 : {
2761 0 : *status = U_ILLEGAL_ARGUMENT_ERROR;
2762 0 : break;
2763 : }
2764 :
2765 0 : ucol_setAttribute(collator, uattr, uvalue, status);
2766 : }
2767 : }
2768 :
2769 0 : pfree(lower_str);
2770 : }
2771 : #endif
2772 :
2773 : /*
2774 : * Return the BCP47 language tag representation of the requested locale.
2775 : *
2776 : * This function should be called before passing the string to ucol_open(),
2777 : * because conversion to a language tag also performs "level 2
2778 : * canonicalization". In addition to producing a consistent format, level 2
2779 : * canonicalization is able to more accurately interpret different input
2780 : * locale string formats, such as POSIX and .NET IDs.
2781 : */
2782 : char *
2783 471536 : icu_language_tag(const char *loc_str, int elevel)
2784 : {
2785 : #ifdef USE_ICU
2786 : UErrorCode status;
2787 : char lang[ULOC_LANG_CAPACITY];
2788 : char *langtag;
2789 471536 : size_t buflen = 32; /* arbitrary starting buffer size */
2790 471536 : const bool strict = true;
2791 :
2792 471536 : status = U_ZERO_ERROR;
2793 471536 : uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
2794 471536 : if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
2795 : {
2796 0 : if (elevel > 0)
2797 0 : ereport(elevel,
2798 : (errmsg("could not get language from locale \"%s\": %s",
2799 : loc_str, u_errorName(status))));
2800 0 : return NULL;
2801 : }
2802 :
2803 : /* C/POSIX locales aren't handled by uloc_getLanguageTag() */
2804 471536 : if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2805 4 : return pstrdup("en-US-u-va-posix");
2806 :
2807 : /*
2808 : * A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
2809 : * RFC5646 section 4.4). Additionally, in older ICU versions,
2810 : * uloc_toLanguageTag() doesn't always return the ultimate length on the
2811 : * first call, necessitating a loop.
2812 : */
2813 471532 : langtag = palloc(buflen);
2814 : while (true)
2815 : {
2816 471532 : status = U_ZERO_ERROR;
2817 471532 : uloc_toLanguageTag(loc_str, langtag, buflen, strict, &status);
2818 :
2819 : /* try again if the buffer is not large enough */
2820 471532 : if ((status == U_BUFFER_OVERFLOW_ERROR ||
2821 471532 : status == U_STRING_NOT_TERMINATED_WARNING) &&
2822 : buflen < MaxAllocSize)
2823 : {
2824 0 : buflen = Min(buflen * 2, MaxAllocSize);
2825 0 : langtag = repalloc(langtag, buflen);
2826 0 : continue;
2827 : }
2828 :
2829 471532 : break;
2830 : }
2831 :
2832 471532 : if (U_FAILURE(status))
2833 : {
2834 12 : pfree(langtag);
2835 :
2836 12 : if (elevel > 0)
2837 12 : ereport(elevel,
2838 : (errmsg("could not convert locale name \"%s\" to language tag: %s",
2839 : loc_str, u_errorName(status))));
2840 6 : return NULL;
2841 : }
2842 :
2843 471520 : return langtag;
2844 : #else /* not USE_ICU */
2845 : ereport(ERROR,
2846 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2847 : errmsg("ICU is not supported in this build")));
2848 : return NULL; /* keep compiler quiet */
2849 : #endif /* not USE_ICU */
2850 : }
2851 :
2852 : /*
2853 : * Perform best-effort check that the locale is a valid one.
2854 : */
2855 : void
2856 1684 : icu_validate_locale(const char *loc_str)
2857 : {
2858 : #ifdef USE_ICU
2859 : UCollator *collator;
2860 : UErrorCode status;
2861 : char lang[ULOC_LANG_CAPACITY];
2862 1684 : bool found = false;
2863 1684 : int elevel = icu_validation_level;
2864 :
2865 : /* no validation */
2866 1684 : if (elevel < 0)
2867 0 : return;
2868 :
2869 : /* downgrade to WARNING during pg_upgrade */
2870 1684 : if (IsBinaryUpgrade && elevel > WARNING)
2871 0 : elevel = WARNING;
2872 :
2873 : /* validate that we can extract the language */
2874 1684 : status = U_ZERO_ERROR;
2875 1684 : uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
2876 1684 : if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
2877 : {
2878 0 : ereport(elevel,
2879 : (errmsg("could not get language from ICU locale \"%s\": %s",
2880 : loc_str, u_errorName(status)),
2881 : errhint("To disable ICU locale validation, set parameter icu_validation_level to DISABLED.")));
2882 0 : return;
2883 : }
2884 :
2885 : /* check for special language name */
2886 1684 : if (strcmp(lang, "") == 0 ||
2887 1582 : strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
2888 1582 : strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2889 102 : found = true;
2890 :
2891 : /* search for matching language within ICU */
2892 220926 : for (int32_t i = 0; !found && i < uloc_countAvailable(); i++)
2893 : {
2894 219242 : const char *otherloc = uloc_getAvailable(i);
2895 : char otherlang[ULOC_LANG_CAPACITY];
2896 :
2897 219242 : status = U_ZERO_ERROR;
2898 219242 : uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status);
2899 219242 : if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
2900 0 : continue;
2901 :
2902 219242 : if (strcmp(lang, otherlang) == 0)
2903 1570 : found = true;
2904 : }
2905 :
2906 1684 : if (!found)
2907 12 : ereport(elevel,
2908 : (errmsg("ICU locale \"%s\" has unknown language \"%s\"",
2909 : loc_str, lang),
2910 : errhint("To disable ICU locale validation, set parameter icu_validation_level to DISABLED.")));
2911 :
2912 : /* check that it can be opened */
2913 1678 : collator = pg_ucol_open(loc_str);
2914 1670 : ucol_close(collator);
2915 : #else /* not USE_ICU */
2916 : /* could get here if a collation was created by a build with ICU */
2917 : ereport(ERROR,
2918 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2919 : errmsg("ICU is not supported in this build")));
2920 : #endif /* not USE_ICU */
2921 : }
2922 :
2923 : /*
2924 : * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
2925 : * Therefore we keep them here rather than with the mbutils code.
2926 : */
2927 :
2928 : /*
2929 : * wchar2char --- convert wide characters to multibyte format
2930 : *
2931 : * This has the same API as the standard wcstombs_l() function; in particular,
2932 : * tolen is the maximum number of bytes to store at *to, and *from must be
2933 : * zero-terminated. The output will be zero-terminated iff there is room.
2934 : */
2935 : size_t
2936 271980 : wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
2937 : {
2938 : size_t result;
2939 :
2940 : Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
2941 :
2942 271980 : if (tolen == 0)
2943 0 : return 0;
2944 :
2945 : #ifdef WIN32
2946 :
2947 : /*
2948 : * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
2949 : * for some reason mbstowcs and wcstombs won't do this for us, so we use
2950 : * MultiByteToWideChar().
2951 : */
2952 : if (GetDatabaseEncoding() == PG_UTF8)
2953 : {
2954 : result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
2955 : NULL, NULL);
2956 : /* A zero return is failure */
2957 : if (result <= 0)
2958 : result = -1;
2959 : else
2960 : {
2961 : Assert(result <= tolen);
2962 : /* Microsoft counts the zero terminator in the result */
2963 : result--;
2964 : }
2965 : }
2966 : else
2967 : #endif /* WIN32 */
2968 271980 : if (locale == (pg_locale_t) 0)
2969 : {
2970 : /* Use wcstombs directly for the default locale */
2971 271980 : result = wcstombs(to, from, tolen);
2972 : }
2973 : else
2974 : {
2975 : #ifdef HAVE_LOCALE_T
2976 : #ifdef HAVE_WCSTOMBS_L
2977 : /* Use wcstombs_l for nondefault locales */
2978 : result = wcstombs_l(to, from, tolen, locale->info.lt);
2979 : #else /* !HAVE_WCSTOMBS_L */
2980 : /* We have to temporarily set the locale as current ... ugh */
2981 0 : locale_t save_locale = uselocale(locale->info.lt);
2982 :
2983 0 : result = wcstombs(to, from, tolen);
2984 :
2985 0 : uselocale(save_locale);
2986 : #endif /* HAVE_WCSTOMBS_L */
2987 : #else /* !HAVE_LOCALE_T */
2988 : /* Can't have locale != 0 without HAVE_LOCALE_T */
2989 : elog(ERROR, "wcstombs_l is not available");
2990 : result = 0; /* keep compiler quiet */
2991 : #endif /* HAVE_LOCALE_T */
2992 : }
2993 :
2994 271980 : return result;
2995 : }
2996 :
2997 : /*
2998 : * char2wchar --- convert multibyte characters to wide characters
2999 : *
3000 : * This has almost the API of mbstowcs_l(), except that *from need not be
3001 : * null-terminated; instead, the number of input bytes is specified as
3002 : * fromlen. Also, we ereport() rather than returning -1 for invalid
3003 : * input encoding. tolen is the maximum number of wchar_t's to store at *to.
3004 : * The output will be zero-terminated iff there is room.
3005 : */
3006 : size_t
3007 275136 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
3008 : pg_locale_t locale)
3009 : {
3010 : size_t result;
3011 :
3012 : Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
3013 :
3014 275136 : if (tolen == 0)
3015 0 : return 0;
3016 :
3017 : #ifdef WIN32
3018 : /* See WIN32 "Unicode" comment above */
3019 : if (GetDatabaseEncoding() == PG_UTF8)
3020 : {
3021 : /* Win32 API does not work for zero-length input */
3022 : if (fromlen == 0)
3023 : result = 0;
3024 : else
3025 : {
3026 : result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
3027 : /* A zero return is failure */
3028 : if (result == 0)
3029 : result = -1;
3030 : }
3031 :
3032 : if (result != -1)
3033 : {
3034 : Assert(result < tolen);
3035 : /* Append trailing null wchar (MultiByteToWideChar() does not) */
3036 : to[result] = 0;
3037 : }
3038 : }
3039 : else
3040 : #endif /* WIN32 */
3041 : {
3042 : /* mbstowcs requires ending '\0' */
3043 275136 : char *str = pnstrdup(from, fromlen);
3044 :
3045 275136 : if (locale == (pg_locale_t) 0)
3046 : {
3047 : /* Use mbstowcs directly for the default locale */
3048 275136 : result = mbstowcs(to, str, tolen);
3049 : }
3050 : else
3051 : {
3052 : #ifdef HAVE_LOCALE_T
3053 : #ifdef HAVE_MBSTOWCS_L
3054 : /* Use mbstowcs_l for nondefault locales */
3055 : result = mbstowcs_l(to, str, tolen, locale->info.lt);
3056 : #else /* !HAVE_MBSTOWCS_L */
3057 : /* We have to temporarily set the locale as current ... ugh */
3058 0 : locale_t save_locale = uselocale(locale->info.lt);
3059 :
3060 0 : result = mbstowcs(to, str, tolen);
3061 :
3062 0 : uselocale(save_locale);
3063 : #endif /* HAVE_MBSTOWCS_L */
3064 : #else /* !HAVE_LOCALE_T */
3065 : /* Can't have locale != 0 without HAVE_LOCALE_T */
3066 : elog(ERROR, "mbstowcs_l is not available");
3067 : result = 0; /* keep compiler quiet */
3068 : #endif /* HAVE_LOCALE_T */
3069 : }
3070 :
3071 275136 : pfree(str);
3072 : }
3073 :
3074 275136 : if (result == -1)
3075 : {
3076 : /*
3077 : * Invalid multibyte character encountered. We try to give a useful
3078 : * error message by letting pg_verifymbstr check the string. But it's
3079 : * possible that the string is OK to us, and not OK to mbstowcs ---
3080 : * this suggests that the LC_CTYPE locale is different from the
3081 : * database encoding. Give a generic error message if pg_verifymbstr
3082 : * can't find anything wrong.
3083 : */
3084 0 : pg_verifymbstr(from, fromlen, false); /* might not return */
3085 : /* but if it does ... */
3086 0 : ereport(ERROR,
3087 : (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
3088 : errmsg("invalid multibyte character for locale"),
3089 : errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
3090 : }
3091 :
3092 275136 : return result;
3093 : }
|