Line data Source code
1 : /*-----------------------------------------------------------------------
2 : *
3 : * PostgreSQL locale utilities
4 : *
5 : * Portions Copyright (c) 2002-2022, PostgreSQL Global Development Group
6 : *
7 : * src/backend/utils/adt/pg_locale.c
8 : *
9 : *-----------------------------------------------------------------------
10 : */
11 :
12 : /*----------
13 : * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14 : * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15 : * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16 : * toupper(), etc. are always in the same fixed locale.
17 : *
18 : * LC_MESSAGES is settable at run time and will take effect
19 : * immediately.
20 : *
21 : * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22 : * settable at run-time. However, we don't actually set those locale
23 : * categories permanently. This would have bizarre effects like no
24 : * longer accepting standard floating-point literals in some locales.
25 : * Instead, we only set these locale categories briefly when needed,
26 : * cache the required information obtained from localeconv() or
27 : * strftime(), and then set the locale categories back to "C".
28 : * The cached information is only used by the formatting functions
29 : * (to_char, etc.) and the money type. For the user, this should all be
30 : * transparent.
31 : *
32 : * !!! NOW HEAR THIS !!!
33 : *
34 : * We've been bitten repeatedly by this bug, so let's try to keep it in
35 : * mind in future: on some platforms, the locale functions return pointers
36 : * to static data that will be overwritten by any later locale function.
37 : * Thus, for example, the obvious-looking sequence
38 : * save = setlocale(category, NULL);
39 : * if (!setlocale(category, value))
40 : * fail = true;
41 : * setlocale(category, save);
42 : * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
43 : * will change the memory save is pointing at. To do this sort of thing
44 : * safely, you *must* pstrdup what setlocale returns the first time.
45 : *
46 : * The POSIX locale standard is available here:
47 : *
48 : * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
49 : *----------
50 : */
51 :
52 :
53 : #include "postgres.h"
54 :
55 : #include <time.h>
56 :
57 : #include "access/htup_details.h"
58 : #include "catalog/pg_collation.h"
59 : #include "catalog/pg_control.h"
60 : #include "mb/pg_wchar.h"
61 : #include "utils/builtins.h"
62 : #include "utils/formatting.h"
63 : #include "utils/hsearch.h"
64 : #include "utils/lsyscache.h"
65 : #include "utils/memutils.h"
66 : #include "utils/pg_locale.h"
67 : #include "utils/syscache.h"
68 :
69 : #ifdef USE_ICU
70 : #include <unicode/ucnv.h>
71 : #endif
72 :
73 : #ifdef __GLIBC__
74 : #include <gnu/libc-version.h>
75 : #endif
76 :
77 : #ifdef WIN32
78 : #include <shlwapi.h>
79 : #endif
80 :
81 : #define MAX_L10N_DATA 80
82 :
83 :
84 : /* GUC settings */
85 : char *locale_messages;
86 : char *locale_monetary;
87 : char *locale_numeric;
88 : char *locale_time;
89 :
90 : /*
91 : * lc_time localization cache.
92 : *
93 : * We use only the first 7 or 12 entries of these arrays. The last array
94 : * element is left as NULL for the convenience of outside code that wants
95 : * to sequentially scan these arrays.
96 : */
97 : char *localized_abbrev_days[7 + 1];
98 : char *localized_full_days[7 + 1];
99 : char *localized_abbrev_months[12 + 1];
100 : char *localized_full_months[12 + 1];
101 :
102 : /* indicates whether locale information cache is valid */
103 : static bool CurrentLocaleConvValid = false;
104 : static bool CurrentLCTimeValid = false;
105 :
106 : /* Cache for collation-related knowledge */
107 :
108 : typedef struct
109 : {
110 : Oid collid; /* hash key: pg_collation OID */
111 : bool collate_is_c; /* is collation's LC_COLLATE C? */
112 : bool ctype_is_c; /* is collation's LC_CTYPE C? */
113 : bool flags_valid; /* true if above flags are valid */
114 : pg_locale_t locale; /* locale_t struct, or 0 if not valid */
115 : } collation_cache_entry;
116 :
117 : static HTAB *collation_cache = NULL;
118 :
119 :
120 : #if defined(WIN32) && defined(LC_MESSAGES)
121 : static char *IsoLocaleName(const char *); /* MSVC specific */
122 : #endif
123 :
124 : #ifdef USE_ICU
125 : static void icu_set_collation_attributes(UCollator *collator, const char *loc);
126 : #endif
127 :
128 : /*
129 : * pg_perm_setlocale
130 : *
131 : * This wraps the libc function setlocale(), with two additions. First, when
132 : * changing LC_CTYPE, update gettext's encoding for the current message
133 : * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
134 : * not on Windows. Second, if the operation is successful, the corresponding
135 : * LC_XXX environment variable is set to match. By setting the environment
136 : * variable, we ensure that any subsequent use of setlocale(..., "") will
137 : * preserve the settings made through this routine. Of course, LC_ALL must
138 : * also be unset to fully ensure that, but that has to be done elsewhere after
139 : * all the individual LC_XXX variables have been set correctly. (Thank you
140 : * Perl for making this kluge necessary.)
141 : */
142 : char *
143 80382 : pg_perm_setlocale(int category, const char *locale)
144 : {
145 : char *result;
146 : const char *envvar;
147 :
148 : #ifndef WIN32
149 80382 : result = setlocale(category, locale);
150 : #else
151 :
152 : /*
153 : * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
154 : * the given value is good and set it in the environment variables. We
155 : * must ignore attempts to set to "", which means "keep using the old
156 : * environment value".
157 : */
158 : #ifdef LC_MESSAGES
159 : if (category == LC_MESSAGES)
160 : {
161 : result = (char *) locale;
162 : if (locale == NULL || locale[0] == '\0')
163 : return result;
164 : }
165 : else
166 : #endif
167 : result = setlocale(category, locale);
168 : #endif /* WIN32 */
169 :
170 80382 : if (result == NULL)
171 0 : return result; /* fall out immediately on failure */
172 :
173 : /*
174 : * Use the right encoding in translated messages. Under ENABLE_NLS, let
175 : * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
176 : * format strings are ASCII, but database-encoding strings may enter the
177 : * message via %s. This makes the overall message encoding equal to the
178 : * database encoding.
179 : */
180 80382 : if (category == LC_CTYPE)
181 : {
182 : static char save_lc_ctype[LOCALE_NAME_BUFLEN];
183 :
184 : /* copy setlocale() return value before callee invokes it again */
185 22582 : strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
186 22582 : result = save_lc_ctype;
187 :
188 : #ifdef ENABLE_NLS
189 22582 : SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
190 : #else
191 : SetMessageEncoding(GetDatabaseEncoding());
192 : #endif
193 : }
194 :
195 80382 : switch (category)
196 : {
197 22582 : case LC_COLLATE:
198 22582 : envvar = "LC_COLLATE";
199 22582 : break;
200 22582 : case LC_CTYPE:
201 22582 : envvar = "LC_CTYPE";
202 22582 : break;
203 : #ifdef LC_MESSAGES
204 21556 : case LC_MESSAGES:
205 21556 : envvar = "LC_MESSAGES";
206 : #ifdef WIN32
207 : result = IsoLocaleName(locale);
208 : if (result == NULL)
209 : result = (char *) locale;
210 : elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
211 : #endif /* WIN32 */
212 21556 : break;
213 : #endif /* LC_MESSAGES */
214 4554 : case LC_MONETARY:
215 4554 : envvar = "LC_MONETARY";
216 4554 : break;
217 4554 : case LC_NUMERIC:
218 4554 : envvar = "LC_NUMERIC";
219 4554 : break;
220 4554 : case LC_TIME:
221 4554 : envvar = "LC_TIME";
222 4554 : break;
223 0 : default:
224 0 : elog(FATAL, "unrecognized LC category: %d", category);
225 : return NULL; /* keep compiler quiet */
226 : }
227 :
228 80382 : if (setenv(envvar, result, 1) != 0)
229 0 : return NULL;
230 :
231 80382 : return result;
232 : }
233 :
234 :
235 : /*
236 : * Is the locale name valid for the locale category?
237 : *
238 : * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
239 : * canonical name is stored there. This is especially useful for figuring out
240 : * what locale name "" means (ie, the server environment value). (Actually,
241 : * it seems that on most implementations that's the only thing it's good for;
242 : * we could wish that setlocale gave back a canonically spelled version of
243 : * the locale name, but typically it doesn't.)
244 : */
245 : bool
246 65624 : check_locale(int category, const char *locale, char **canonname)
247 : {
248 : char *save;
249 : char *res;
250 :
251 65624 : if (canonname)
252 2778 : *canonname = NULL; /* in case of failure */
253 :
254 65624 : save = setlocale(category, NULL);
255 65624 : if (!save)
256 0 : return false; /* won't happen, we hope */
257 :
258 : /* save may be pointing at a modifiable scratch variable, see above. */
259 65624 : save = pstrdup(save);
260 :
261 : /* set the locale with setlocale, to see if it accepts it. */
262 65624 : res = setlocale(category, locale);
263 :
264 : /* save canonical name if requested. */
265 65624 : if (res && canonname)
266 2774 : *canonname = pstrdup(res);
267 :
268 : /* restore old value. */
269 65624 : if (!setlocale(category, save))
270 0 : elog(WARNING, "failed to restore old locale \"%s\"", save);
271 65624 : pfree(save);
272 :
273 65624 : return (res != NULL);
274 : }
275 :
276 :
277 : /*
278 : * GUC check/assign hooks
279 : *
280 : * For most locale categories, the assign hook doesn't actually set the locale
281 : * permanently, just reset flags so that the next use will cache the
282 : * appropriate values. (See explanation at the top of this file.)
283 : *
284 : * Note: we accept value = "" as selecting the postmaster's environment
285 : * value, whatever it was (so long as the environment setting is legal).
286 : * This will have been locked down by an earlier call to pg_perm_setlocale.
287 : */
288 : bool
289 17148 : check_locale_monetary(char **newval, void **extra, GucSource source)
290 : {
291 17148 : return check_locale(LC_MONETARY, *newval, NULL);
292 : }
293 :
294 : void
295 16978 : assign_locale_monetary(const char *newval, void *extra)
296 : {
297 16978 : CurrentLocaleConvValid = false;
298 16978 : }
299 :
300 : bool
301 17154 : check_locale_numeric(char **newval, void **extra, GucSource source)
302 : {
303 17154 : return check_locale(LC_NUMERIC, *newval, NULL);
304 : }
305 :
306 : void
307 16990 : assign_locale_numeric(const char *newval, void *extra)
308 : {
309 16990 : CurrentLocaleConvValid = false;
310 16990 : }
311 :
312 : bool
313 17148 : check_locale_time(char **newval, void **extra, GucSource source)
314 : {
315 17148 : return check_locale(LC_TIME, *newval, NULL);
316 : }
317 :
318 : void
319 16978 : assign_locale_time(const char *newval, void *extra)
320 : {
321 16978 : CurrentLCTimeValid = false;
322 16978 : }
323 :
324 : /*
325 : * We allow LC_MESSAGES to actually be set globally.
326 : *
327 : * Note: we normally disallow value = "" because it wouldn't have consistent
328 : * semantics (it'd effectively just use the previous value). However, this
329 : * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
330 : * not even if the attempted setting fails due to invalid environment value.
331 : * The idea there is just to accept the environment setting *if possible*
332 : * during startup, until we can read the proper value from postgresql.conf.
333 : */
334 : bool
335 17172 : check_locale_messages(char **newval, void **extra, GucSource source)
336 : {
337 17172 : if (**newval == '\0')
338 : {
339 5776 : if (source == PGC_S_DEFAULT)
340 5776 : return true;
341 : else
342 0 : return false;
343 : }
344 :
345 : /*
346 : * LC_MESSAGES category does not exist everywhere, but accept it anyway
347 : *
348 : * On Windows, we can't even check the value, so accept blindly
349 : */
350 : #if defined(LC_MESSAGES) && !defined(WIN32)
351 11396 : return check_locale(LC_MESSAGES, *newval, NULL);
352 : #else
353 : return true;
354 : #endif
355 : }
356 :
357 : void
358 17002 : assign_locale_messages(const char *newval, void *extra)
359 : {
360 : /*
361 : * LC_MESSAGES category does not exist everywhere, but accept it anyway.
362 : * We ignore failure, as per comment above.
363 : */
364 : #ifdef LC_MESSAGES
365 17002 : (void) pg_perm_setlocale(LC_MESSAGES, newval);
366 : #endif
367 17002 : }
368 :
369 :
370 : /*
371 : * Frees the malloced content of a struct lconv. (But not the struct
372 : * itself.) It's important that this not throw elog(ERROR).
373 : */
374 : static void
375 6 : free_struct_lconv(struct lconv *s)
376 : {
377 6 : if (s->decimal_point)
378 6 : free(s->decimal_point);
379 6 : if (s->thousands_sep)
380 6 : free(s->thousands_sep);
381 6 : if (s->grouping)
382 6 : free(s->grouping);
383 6 : if (s->int_curr_symbol)
384 6 : free(s->int_curr_symbol);
385 6 : if (s->currency_symbol)
386 6 : free(s->currency_symbol);
387 6 : if (s->mon_decimal_point)
388 6 : free(s->mon_decimal_point);
389 6 : if (s->mon_thousands_sep)
390 6 : free(s->mon_thousands_sep);
391 6 : if (s->mon_grouping)
392 6 : free(s->mon_grouping);
393 6 : if (s->positive_sign)
394 6 : free(s->positive_sign);
395 6 : if (s->negative_sign)
396 6 : free(s->negative_sign);
397 6 : }
398 :
399 : /*
400 : * Check that all fields of a struct lconv (or at least, the ones we care
401 : * about) are non-NULL. The field list must match free_struct_lconv().
402 : */
403 : static bool
404 102 : struct_lconv_is_valid(struct lconv *s)
405 : {
406 102 : if (s->decimal_point == NULL)
407 0 : return false;
408 102 : if (s->thousands_sep == NULL)
409 0 : return false;
410 102 : if (s->grouping == NULL)
411 0 : return false;
412 102 : if (s->int_curr_symbol == NULL)
413 0 : return false;
414 102 : if (s->currency_symbol == NULL)
415 0 : return false;
416 102 : if (s->mon_decimal_point == NULL)
417 0 : return false;
418 102 : if (s->mon_thousands_sep == NULL)
419 0 : return false;
420 102 : if (s->mon_grouping == NULL)
421 0 : return false;
422 102 : if (s->positive_sign == NULL)
423 0 : return false;
424 102 : if (s->negative_sign == NULL)
425 0 : return false;
426 102 : return true;
427 : }
428 :
429 :
430 : /*
431 : * Convert the strdup'd string at *str from the specified encoding to the
432 : * database encoding.
433 : */
434 : static void
435 816 : db_encoding_convert(int encoding, char **str)
436 : {
437 : char *pstr;
438 : char *mstr;
439 :
440 : /* convert the string to the database encoding */
441 816 : pstr = pg_any_to_server(*str, strlen(*str), encoding);
442 816 : if (pstr == *str)
443 816 : return; /* no conversion happened */
444 :
445 : /* need it malloc'd not palloc'd */
446 0 : mstr = strdup(pstr);
447 0 : if (mstr == NULL)
448 0 : ereport(ERROR,
449 : (errcode(ERRCODE_OUT_OF_MEMORY),
450 : errmsg("out of memory")));
451 :
452 : /* replace old string */
453 0 : free(*str);
454 0 : *str = mstr;
455 :
456 0 : pfree(pstr);
457 : }
458 :
459 :
460 : /*
461 : * Return the POSIX lconv struct (contains number/money formatting
462 : * information) with locale information for all categories.
463 : */
464 : struct lconv *
465 15906 : PGLC_localeconv(void)
466 : {
467 : static struct lconv CurrentLocaleConv;
468 : static bool CurrentLocaleConvAllocated = false;
469 : struct lconv *extlconv;
470 : struct lconv worklconv;
471 : char *save_lc_monetary;
472 : char *save_lc_numeric;
473 : #ifdef WIN32
474 : char *save_lc_ctype;
475 : #endif
476 :
477 : /* Did we do it already? */
478 15906 : if (CurrentLocaleConvValid)
479 15804 : return &CurrentLocaleConv;
480 :
481 : /* Free any already-allocated storage */
482 102 : if (CurrentLocaleConvAllocated)
483 : {
484 6 : free_struct_lconv(&CurrentLocaleConv);
485 6 : CurrentLocaleConvAllocated = false;
486 : }
487 :
488 : /*
489 : * This is tricky because we really don't want to risk throwing error
490 : * while the locale is set to other than our usual settings. Therefore,
491 : * the process is: collect the usual settings, set locale to special
492 : * setting, copy relevant data into worklconv using strdup(), restore
493 : * normal settings, convert data to desired encoding, and finally stash
494 : * the collected data in CurrentLocaleConv. This makes it safe if we
495 : * throw an error during encoding conversion or run out of memory anywhere
496 : * in the process. All data pointed to by struct lconv members is
497 : * allocated with strdup, to avoid premature elog(ERROR) and to allow
498 : * using a single cleanup routine.
499 : */
500 102 : memset(&worklconv, 0, sizeof(worklconv));
501 :
502 : /* Save prevailing values of monetary and numeric locales */
503 102 : save_lc_monetary = setlocale(LC_MONETARY, NULL);
504 102 : if (!save_lc_monetary)
505 0 : elog(ERROR, "setlocale(NULL) failed");
506 102 : save_lc_monetary = pstrdup(save_lc_monetary);
507 :
508 102 : save_lc_numeric = setlocale(LC_NUMERIC, NULL);
509 102 : if (!save_lc_numeric)
510 0 : elog(ERROR, "setlocale(NULL) failed");
511 102 : save_lc_numeric = pstrdup(save_lc_numeric);
512 :
513 : #ifdef WIN32
514 :
515 : /*
516 : * The POSIX standard explicitly says that it is undefined what happens if
517 : * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
518 : * that implied by LC_CTYPE. In practice, all Unix-ish platforms seem to
519 : * believe that localeconv() should return strings that are encoded in the
520 : * codeset implied by the LC_MONETARY or LC_NUMERIC locale name. Hence,
521 : * once we have successfully collected the localeconv() results, we will
522 : * convert them from that codeset to the desired server encoding.
523 : *
524 : * Windows, of course, resolutely does things its own way; on that
525 : * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
526 : * results. Hence, we must temporarily set that category as well.
527 : */
528 :
529 : /* Save prevailing value of ctype locale */
530 : save_lc_ctype = setlocale(LC_CTYPE, NULL);
531 : if (!save_lc_ctype)
532 : elog(ERROR, "setlocale(NULL) failed");
533 : save_lc_ctype = pstrdup(save_lc_ctype);
534 :
535 : /* Here begins the critical section where we must not throw error */
536 :
537 : /* use numeric to set the ctype */
538 : setlocale(LC_CTYPE, locale_numeric);
539 : #endif
540 :
541 : /* Get formatting information for numeric */
542 102 : setlocale(LC_NUMERIC, locale_numeric);
543 102 : extlconv = localeconv();
544 :
545 : /* Must copy data now in case setlocale() overwrites it */
546 102 : worklconv.decimal_point = strdup(extlconv->decimal_point);
547 102 : worklconv.thousands_sep = strdup(extlconv->thousands_sep);
548 102 : worklconv.grouping = strdup(extlconv->grouping);
549 :
550 : #ifdef WIN32
551 : /* use monetary to set the ctype */
552 : setlocale(LC_CTYPE, locale_monetary);
553 : #endif
554 :
555 : /* Get formatting information for monetary */
556 102 : setlocale(LC_MONETARY, locale_monetary);
557 102 : extlconv = localeconv();
558 :
559 : /* Must copy data now in case setlocale() overwrites it */
560 102 : worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
561 102 : worklconv.currency_symbol = strdup(extlconv->currency_symbol);
562 102 : worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
563 102 : worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
564 102 : worklconv.mon_grouping = strdup(extlconv->mon_grouping);
565 102 : worklconv.positive_sign = strdup(extlconv->positive_sign);
566 102 : worklconv.negative_sign = strdup(extlconv->negative_sign);
567 : /* Copy scalar fields as well */
568 102 : worklconv.int_frac_digits = extlconv->int_frac_digits;
569 102 : worklconv.frac_digits = extlconv->frac_digits;
570 102 : worklconv.p_cs_precedes = extlconv->p_cs_precedes;
571 102 : worklconv.p_sep_by_space = extlconv->p_sep_by_space;
572 102 : worklconv.n_cs_precedes = extlconv->n_cs_precedes;
573 102 : worklconv.n_sep_by_space = extlconv->n_sep_by_space;
574 102 : worklconv.p_sign_posn = extlconv->p_sign_posn;
575 102 : worklconv.n_sign_posn = extlconv->n_sign_posn;
576 :
577 : /*
578 : * Restore the prevailing locale settings; failure to do so is fatal.
579 : * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
580 : * but proceeding with the wrong value of LC_CTYPE would certainly be bad
581 : * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
582 : * are almost certainly "C", there's really no reason that restoring those
583 : * should fail.
584 : */
585 : #ifdef WIN32
586 : if (!setlocale(LC_CTYPE, save_lc_ctype))
587 : elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
588 : #endif
589 102 : if (!setlocale(LC_MONETARY, save_lc_monetary))
590 0 : elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
591 102 : if (!setlocale(LC_NUMERIC, save_lc_numeric))
592 0 : elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
593 :
594 : /*
595 : * At this point we've done our best to clean up, and can call functions
596 : * that might possibly throw errors with a clean conscience. But let's
597 : * make sure we don't leak any already-strdup'd fields in worklconv.
598 : */
599 102 : PG_TRY();
600 : {
601 : int encoding;
602 :
603 : /* Release the pstrdup'd locale names */
604 102 : pfree(save_lc_monetary);
605 102 : pfree(save_lc_numeric);
606 : #ifdef WIN32
607 : pfree(save_lc_ctype);
608 : #endif
609 :
610 : /* If any of the preceding strdup calls failed, complain now. */
611 102 : if (!struct_lconv_is_valid(&worklconv))
612 0 : ereport(ERROR,
613 : (errcode(ERRCODE_OUT_OF_MEMORY),
614 : errmsg("out of memory")));
615 :
616 : /*
617 : * Now we must perform encoding conversion from whatever's associated
618 : * with the locales into the database encoding. If we can't identify
619 : * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
620 : * use PG_SQL_ASCII, which will result in just validating that the
621 : * strings are OK in the database encoding.
622 : */
623 102 : encoding = pg_get_encoding_from_locale(locale_numeric, true);
624 102 : if (encoding < 0)
625 0 : encoding = PG_SQL_ASCII;
626 :
627 102 : db_encoding_convert(encoding, &worklconv.decimal_point);
628 102 : db_encoding_convert(encoding, &worklconv.thousands_sep);
629 : /* grouping is not text and does not require conversion */
630 :
631 102 : encoding = pg_get_encoding_from_locale(locale_monetary, true);
632 102 : if (encoding < 0)
633 0 : encoding = PG_SQL_ASCII;
634 :
635 102 : db_encoding_convert(encoding, &worklconv.int_curr_symbol);
636 102 : db_encoding_convert(encoding, &worklconv.currency_symbol);
637 102 : db_encoding_convert(encoding, &worklconv.mon_decimal_point);
638 102 : db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
639 : /* mon_grouping is not text and does not require conversion */
640 102 : db_encoding_convert(encoding, &worklconv.positive_sign);
641 102 : db_encoding_convert(encoding, &worklconv.negative_sign);
642 : }
643 0 : PG_CATCH();
644 : {
645 0 : free_struct_lconv(&worklconv);
646 0 : PG_RE_THROW();
647 : }
648 102 : PG_END_TRY();
649 :
650 : /*
651 : * Everything is good, so save the results.
652 : */
653 102 : CurrentLocaleConv = worklconv;
654 102 : CurrentLocaleConvAllocated = true;
655 102 : CurrentLocaleConvValid = true;
656 102 : return &CurrentLocaleConv;
657 : }
658 :
659 : #ifdef WIN32
660 : /*
661 : * On Windows, strftime() returns its output in encoding CP_ACP (the default
662 : * operating system codepage for the computer), which is likely different
663 : * from SERVER_ENCODING. This is especially important in Japanese versions
664 : * of Windows which will use SJIS encoding, which we don't support as a
665 : * server encoding.
666 : *
667 : * So, instead of using strftime(), use wcsftime() to return the value in
668 : * wide characters (internally UTF16) and then convert to UTF8, which we
669 : * know how to handle directly.
670 : *
671 : * Note that this only affects the calls to strftime() in this file, which are
672 : * used to get the locale-aware strings. Other parts of the backend use
673 : * pg_strftime(), which isn't locale-aware and does not need to be replaced.
674 : */
675 : static size_t
676 : strftime_win32(char *dst, size_t dstlen,
677 : const char *format, const struct tm *tm)
678 : {
679 : size_t len;
680 : wchar_t wformat[8]; /* formats used below need 3 chars */
681 : wchar_t wbuf[MAX_L10N_DATA];
682 :
683 : /*
684 : * Get a wchar_t version of the format string. We only actually use
685 : * plain-ASCII formats in this file, so we can say that they're UTF8.
686 : */
687 : len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
688 : wformat, lengthof(wformat));
689 : if (len == 0)
690 : elog(ERROR, "could not convert format string from UTF-8: error code %lu",
691 : GetLastError());
692 :
693 : len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
694 : if (len == 0)
695 : {
696 : /*
697 : * wcsftime failed, possibly because the result would not fit in
698 : * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
699 : */
700 : return 0;
701 : }
702 :
703 : len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
704 : NULL, NULL);
705 : if (len == 0)
706 : elog(ERROR, "could not convert string to UTF-8: error code %lu",
707 : GetLastError());
708 :
709 : dst[len] = '\0';
710 :
711 : return len;
712 : }
713 :
714 : /* redefine strftime() */
715 : #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
716 : #endif /* WIN32 */
717 :
718 : /*
719 : * Subroutine for cache_locale_time().
720 : * Convert the given string from encoding "encoding" to the database
721 : * encoding, and store the result at *dst, replacing any previous value.
722 : */
723 : static void
724 1672 : cache_single_string(char **dst, const char *src, int encoding)
725 : {
726 : char *ptr;
727 : char *olddst;
728 :
729 : /* Convert the string to the database encoding, or validate it's OK */
730 1672 : ptr = pg_any_to_server(src, strlen(src), encoding);
731 :
732 : /* Store the string in long-lived storage, replacing any previous value */
733 1672 : olddst = *dst;
734 1672 : *dst = MemoryContextStrdup(TopMemoryContext, ptr);
735 1672 : if (olddst)
736 0 : pfree(olddst);
737 :
738 : /* Might as well clean up any palloc'd conversion result, too */
739 1672 : if (ptr != src)
740 0 : pfree(ptr);
741 1672 : }
742 :
743 : /*
744 : * Update the lc_time localization cache variables if needed.
745 : */
746 : void
747 18512 : cache_locale_time(void)
748 : {
749 : char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
750 : char *bufptr;
751 : time_t timenow;
752 : struct tm *timeinfo;
753 18512 : bool strftimefail = false;
754 : int encoding;
755 : int i;
756 : char *save_lc_time;
757 : #ifdef WIN32
758 : char *save_lc_ctype;
759 : #endif
760 :
761 : /* did we do this already? */
762 18512 : if (CurrentLCTimeValid)
763 18468 : return;
764 :
765 44 : elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
766 :
767 : /*
768 : * As in PGLC_localeconv(), it's critical that we not throw error while
769 : * libc's locale settings have nondefault values. Hence, we just call
770 : * strftime() within the critical section, and then convert and save its
771 : * results afterwards.
772 : */
773 :
774 : /* Save prevailing value of time locale */
775 44 : save_lc_time = setlocale(LC_TIME, NULL);
776 44 : if (!save_lc_time)
777 0 : elog(ERROR, "setlocale(NULL) failed");
778 44 : save_lc_time = pstrdup(save_lc_time);
779 :
780 : #ifdef WIN32
781 :
782 : /*
783 : * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
784 : * must set it here. This code looks the same as what PGLC_localeconv()
785 : * does, but the underlying reason is different: this does NOT determine
786 : * the encoding we'll get back from strftime_win32().
787 : */
788 :
789 : /* Save prevailing value of ctype locale */
790 : save_lc_ctype = setlocale(LC_CTYPE, NULL);
791 : if (!save_lc_ctype)
792 : elog(ERROR, "setlocale(NULL) failed");
793 : save_lc_ctype = pstrdup(save_lc_ctype);
794 :
795 : /* use lc_time to set the ctype */
796 : setlocale(LC_CTYPE, locale_time);
797 : #endif
798 :
799 44 : setlocale(LC_TIME, locale_time);
800 :
801 : /* We use times close to current time as data for strftime(). */
802 44 : timenow = time(NULL);
803 44 : timeinfo = localtime(&timenow);
804 :
805 : /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
806 44 : bufptr = buf;
807 :
808 : /*
809 : * MAX_L10N_DATA is sufficient buffer space for every known locale, and
810 : * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
811 : * error.) An implementation might report errors (e.g. ENOMEM) by
812 : * returning 0 (or, less plausibly, a negative value) and setting errno.
813 : * Report errno just in case the implementation did that, but clear it in
814 : * advance of the calls so we don't emit a stale, unrelated errno.
815 : */
816 44 : errno = 0;
817 :
818 : /* localized days */
819 352 : for (i = 0; i < 7; i++)
820 : {
821 308 : timeinfo->tm_wday = i;
822 308 : if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
823 0 : strftimefail = true;
824 308 : bufptr += MAX_L10N_DATA;
825 308 : if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
826 0 : strftimefail = true;
827 308 : bufptr += MAX_L10N_DATA;
828 : }
829 :
830 : /* localized months */
831 572 : for (i = 0; i < 12; i++)
832 : {
833 528 : timeinfo->tm_mon = i;
834 528 : timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
835 528 : if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
836 0 : strftimefail = true;
837 528 : bufptr += MAX_L10N_DATA;
838 528 : if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
839 0 : strftimefail = true;
840 528 : bufptr += MAX_L10N_DATA;
841 : }
842 :
843 : /*
844 : * Restore the prevailing locale settings; as in PGLC_localeconv(),
845 : * failure to do so is fatal.
846 : */
847 : #ifdef WIN32
848 : if (!setlocale(LC_CTYPE, save_lc_ctype))
849 : elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
850 : #endif
851 44 : if (!setlocale(LC_TIME, save_lc_time))
852 0 : elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
853 :
854 : /*
855 : * At this point we've done our best to clean up, and can throw errors, or
856 : * call functions that might throw errors, with a clean conscience.
857 : */
858 44 : if (strftimefail)
859 0 : elog(ERROR, "strftime() failed: %m");
860 :
861 : /* Release the pstrdup'd locale names */
862 44 : pfree(save_lc_time);
863 : #ifdef WIN32
864 : pfree(save_lc_ctype);
865 : #endif
866 :
867 : #ifndef WIN32
868 :
869 : /*
870 : * As in PGLC_localeconv(), we must convert strftime()'s output from the
871 : * encoding implied by LC_TIME to the database encoding. If we can't
872 : * identify the LC_TIME encoding, just perform encoding validation.
873 : */
874 44 : encoding = pg_get_encoding_from_locale(locale_time, true);
875 44 : if (encoding < 0)
876 0 : encoding = PG_SQL_ASCII;
877 :
878 : #else
879 :
880 : /*
881 : * On Windows, strftime_win32() always returns UTF8 data, so convert from
882 : * that if necessary.
883 : */
884 : encoding = PG_UTF8;
885 :
886 : #endif /* WIN32 */
887 :
888 44 : bufptr = buf;
889 :
890 : /* localized days */
891 352 : for (i = 0; i < 7; i++)
892 : {
893 308 : cache_single_string(&localized_abbrev_days[i], bufptr, encoding);
894 308 : bufptr += MAX_L10N_DATA;
895 308 : cache_single_string(&localized_full_days[i], bufptr, encoding);
896 308 : bufptr += MAX_L10N_DATA;
897 : }
898 44 : localized_abbrev_days[7] = NULL;
899 44 : localized_full_days[7] = NULL;
900 :
901 : /* localized months */
902 572 : for (i = 0; i < 12; i++)
903 : {
904 528 : cache_single_string(&localized_abbrev_months[i], bufptr, encoding);
905 528 : bufptr += MAX_L10N_DATA;
906 528 : cache_single_string(&localized_full_months[i], bufptr, encoding);
907 528 : bufptr += MAX_L10N_DATA;
908 : }
909 44 : localized_abbrev_months[12] = NULL;
910 44 : localized_full_months[12] = NULL;
911 :
912 44 : CurrentLCTimeValid = true;
913 : }
914 :
915 :
916 : #if defined(WIN32) && defined(LC_MESSAGES)
917 : /*
918 : * Convert a Windows setlocale() argument to a Unix-style one.
919 : *
920 : * Regardless of platform, we install message catalogs under a Unix-style
921 : * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
922 : * following that style will elicit localized interface strings.
923 : *
924 : * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
925 : * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
926 : * case-insensitive. setlocale() returns the fully-qualified form; for
927 : * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
928 : * setlocale() and _create_locale() select a "locale identifier"[1] and store
929 : * it in an undocumented _locale_t field. From that LCID, we can retrieve the
930 : * ISO 639 language and the ISO 3166 country. Character encoding does not
931 : * matter, because the server and client encodings govern that.
932 : *
933 : * Windows Vista introduced the "locale name" concept[2], closely following
934 : * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
935 : * Studio 2012, setlocale() accepts locale names in addition to the strings it
936 : * accepted historically. It does not standardize them; setlocale("Th-tH")
937 : * returns "Th-tH". setlocale(category, "") still returns a traditional
938 : * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
939 : * content to carry locale names instead of locale identifiers.
940 : *
941 : * Visual Studio 2015 should still be able to do the same as Visual Studio
942 : * 2012, but the declaration of locale_name is missing in _locale_t, causing
943 : * this code compilation to fail, hence this falls back instead on to
944 : * enumerating all system locales by using EnumSystemLocalesEx to find the
945 : * required locale name. If the input argument is in Unix-style then we can
946 : * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
947 : * LOCALE_SNAME.
948 : *
949 : * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol in
950 : * releases before Windows 8. IsoLocaleName() always fails in a MinGW-built
951 : * postgres.exe, so only Unix-style values of the lc_messages GUC can elicit
952 : * localized messages. In particular, every lc_messages setting that initdb
953 : * can select automatically will yield only C-locale messages. XXX This could
954 : * be fixed by running the fully-qualified locale name through a lookup table.
955 : *
956 : * This function returns a pointer to a static buffer bearing the converted
957 : * name or NULL if conversion fails.
958 : *
959 : * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
960 : * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
961 : */
962 :
963 : #if _MSC_VER >= 1900
964 : /*
965 : * Callback function for EnumSystemLocalesEx() in get_iso_localename().
966 : *
967 : * This function enumerates all system locales, searching for one that matches
968 : * an input with the format: <Language>[_<Country>], e.g.
969 : * English[_United States]
970 : *
971 : * The input is a three wchar_t array as an LPARAM. The first element is the
972 : * locale_name we want to match, the second element is an allocated buffer
973 : * where the Unix-style locale is copied if a match is found, and the third
974 : * element is the search status, 1 if a match was found, 0 otherwise.
975 : */
976 : static BOOL CALLBACK
977 : search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
978 : {
979 : wchar_t test_locale[LOCALE_NAME_MAX_LENGTH];
980 : wchar_t **argv;
981 :
982 : (void) (dwFlags);
983 :
984 : argv = (wchar_t **) lparam;
985 : *argv[2] = (wchar_t) 0;
986 :
987 : memset(test_locale, 0, sizeof(test_locale));
988 :
989 : /* Get the name of the <Language> in English */
990 : if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
991 : test_locale, LOCALE_NAME_MAX_LENGTH))
992 : {
993 : /*
994 : * If the enumerated locale does not have a hyphen ("en") OR the
995 : * lc_message input does not have an underscore ("English"), we only
996 : * need to compare the <Language> tags.
997 : */
998 : if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
999 : {
1000 : if (_wcsicmp(argv[0], test_locale) == 0)
1001 : {
1002 : wcscpy(argv[1], pStr);
1003 : *argv[2] = (wchar_t) 1;
1004 : return FALSE;
1005 : }
1006 : }
1007 :
1008 : /*
1009 : * We have to compare a full <Language>_<Country> tag, so we append
1010 : * the underscore and name of the country/region in English, e.g.
1011 : * "English_United States".
1012 : */
1013 : else
1014 : {
1015 : size_t len;
1016 :
1017 : wcscat(test_locale, L"_");
1018 : len = wcslen(test_locale);
1019 : if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
1020 : test_locale + len,
1021 : LOCALE_NAME_MAX_LENGTH - len))
1022 : {
1023 : if (_wcsicmp(argv[0], test_locale) == 0)
1024 : {
1025 : wcscpy(argv[1], pStr);
1026 : *argv[2] = (wchar_t) 1;
1027 : return FALSE;
1028 : }
1029 : }
1030 : }
1031 : }
1032 :
1033 : return TRUE;
1034 : }
1035 :
1036 : /*
1037 : * This function converts a Windows locale name to an ISO formatted version
1038 : * for Visual Studio 2015 or greater.
1039 : *
1040 : * Returns NULL, if no valid conversion was found.
1041 : */
1042 : static char *
1043 : get_iso_localename(const char *winlocname)
1044 : {
1045 : wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH];
1046 : wchar_t buffer[LOCALE_NAME_MAX_LENGTH];
1047 : static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1048 : char *period;
1049 : int len;
1050 : int ret_val;
1051 :
1052 : /*
1053 : * Valid locales have the following syntax:
1054 : * <Language>[_<Country>[.<CodePage>]]
1055 : *
1056 : * GetLocaleInfoEx can only take locale name without code-page and for the
1057 : * purpose of this API the code-page doesn't matter.
1058 : */
1059 : period = strchr(winlocname, '.');
1060 : if (period != NULL)
1061 : len = period - winlocname;
1062 : else
1063 : len = pg_mbstrlen(winlocname);
1064 :
1065 : memset(wc_locale_name, 0, sizeof(wc_locale_name));
1066 : memset(buffer, 0, sizeof(buffer));
1067 : MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
1068 : LOCALE_NAME_MAX_LENGTH);
1069 :
1070 : /*
1071 : * If the lc_messages is already a Unix-style string, we have a direct
1072 : * match with LOCALE_SNAME, e.g. en-US, en_US.
1073 : */
1074 : ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
1075 : LOCALE_NAME_MAX_LENGTH);
1076 : if (!ret_val)
1077 : {
1078 : /*
1079 : * Search for a locale in the system that matches language and country
1080 : * name.
1081 : */
1082 : wchar_t *argv[3];
1083 :
1084 : argv[0] = wc_locale_name;
1085 : argv[1] = buffer;
1086 : argv[2] = (wchar_t *) &ret_val;
1087 : EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
1088 : NULL);
1089 : }
1090 :
1091 : if (ret_val)
1092 : {
1093 : size_t rc;
1094 : char *hyphen;
1095 :
1096 : /* Locale names use only ASCII, any conversion locale suffices. */
1097 : rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
1098 : if (rc == -1 || rc == sizeof(iso_lc_messages))
1099 : return NULL;
1100 :
1101 : /*
1102 : * Simply replace the hyphen with an underscore. See comments in
1103 : * IsoLocaleName.
1104 : */
1105 : hyphen = strchr(iso_lc_messages, '-');
1106 : if (hyphen)
1107 : *hyphen = '_';
1108 : return iso_lc_messages;
1109 : }
1110 :
1111 : return NULL;
1112 : }
1113 : #endif /* _MSC_VER >= 1900 */
1114 :
1115 : static char *
1116 : IsoLocaleName(const char *winlocname)
1117 : {
1118 : #if defined(_MSC_VER)
1119 : static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1120 :
1121 : if (pg_strcasecmp("c", winlocname) == 0 ||
1122 : pg_strcasecmp("posix", winlocname) == 0)
1123 : {
1124 : strcpy(iso_lc_messages, "C");
1125 : return iso_lc_messages;
1126 : }
1127 : else
1128 : {
1129 : #if (_MSC_VER >= 1900) /* Visual Studio 2015 or later */
1130 : return get_iso_localename(winlocname);
1131 : #else
1132 : _locale_t loct;
1133 :
1134 : loct = _create_locale(LC_CTYPE, winlocname);
1135 : if (loct != NULL)
1136 : {
1137 : size_t rc;
1138 : char *hyphen;
1139 :
1140 : /* Locale names use only ASCII, any conversion locale suffices. */
1141 : rc = wchar2char(iso_lc_messages, loct->locinfo->locale_name[LC_CTYPE],
1142 : sizeof(iso_lc_messages), NULL);
1143 : _free_locale(loct);
1144 : if (rc == -1 || rc == sizeof(iso_lc_messages))
1145 : return NULL;
1146 :
1147 : /*
1148 : * Since the message catalogs sit on a case-insensitive
1149 : * filesystem, we need not standardize letter case here. So long
1150 : * as we do not ship message catalogs for which it would matter,
1151 : * we also need not translate the script/variant portion, e.g.
1152 : * uz-Cyrl-UZ to uz_UZ@cyrillic. Simply replace the hyphen with
1153 : * an underscore.
1154 : *
1155 : * Note that the locale name can be less-specific than the value
1156 : * we would derive under earlier Visual Studio releases. For
1157 : * example, French_France.1252 yields just "fr". This does not
1158 : * affect any of the country-specific message catalogs available
1159 : * as of this writing (pt_BR, zh_CN, zh_TW).
1160 : */
1161 : hyphen = strchr(iso_lc_messages, '-');
1162 : if (hyphen)
1163 : *hyphen = '_';
1164 : return iso_lc_messages;
1165 : }
1166 : #endif /* Visual Studio 2015 or later */
1167 : }
1168 : #endif /* defined(_MSC_VER) */
1169 : return NULL; /* Not supported on this version of msvc/mingw */
1170 : }
1171 : #endif /* WIN32 && LC_MESSAGES */
1172 :
1173 :
1174 : /*
1175 : * Detect aging strxfrm() implementations that, in a subset of locales, write
1176 : * past the specified buffer length. Affected users must update OS packages
1177 : * before using PostgreSQL 9.5 or later.
1178 : *
1179 : * Assume that the bug can come and go from one postmaster startup to another
1180 : * due to physical replication among diverse machines. Assume that the bug's
1181 : * presence will not change during the life of a particular postmaster. Given
1182 : * those assumptions, call this no less than once per postmaster startup per
1183 : * LC_COLLATE setting used. No known-affected system offers strxfrm_l(), so
1184 : * there is no need to consider pg_collation locales.
1185 : */
1186 : void
1187 22582 : check_strxfrm_bug(void)
1188 : {
1189 : char buf[32];
1190 22582 : const int canary = 0x7F;
1191 22582 : bool ok = true;
1192 :
1193 : /*
1194 : * Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10
1195 : * 05/08 returns 18 and modifies 10 bytes. It respects limits above or
1196 : * below that range.
1197 : *
1198 : * The bug is present in Solaris 8 as well; it is absent in Solaris 10
1199 : * 01/13 and Solaris 11.2. Affected locales include is_IS.ISO8859-1,
1200 : * en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R. Unaffected locales
1201 : * include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C.
1202 : */
1203 22582 : buf[7] = canary;
1204 22582 : (void) strxfrm(buf, "ab", 7);
1205 22582 : if (buf[7] != canary)
1206 0 : ok = false;
1207 :
1208 : /*
1209 : * illumos bug #1594 was present in the source tree from 2010-10-11 to
1210 : * 2012-02-01. Given an ASCII string of any length and length limit 1,
1211 : * affected systems ignore the length limit and modify a number of bytes
1212 : * one less than the return value. The problem inputs for this bug do not
1213 : * overlap those for the Solaris bug, hence a distinct test.
1214 : *
1215 : * Affected systems include smartos-20110926T021612Z. Affected locales
1216 : * include en_US.ISO8859-1 and en_US.UTF-8. Unaffected locales include C.
1217 : */
1218 22582 : buf[1] = canary;
1219 22582 : (void) strxfrm(buf, "a", 1);
1220 22582 : if (buf[1] != canary)
1221 0 : ok = false;
1222 :
1223 22582 : if (!ok)
1224 0 : ereport(ERROR,
1225 : (errcode(ERRCODE_SYSTEM_ERROR),
1226 : errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length",
1227 : setlocale(LC_COLLATE, NULL)),
1228 : errhint("Apply system library package updates.")));
1229 22582 : }
1230 :
1231 :
1232 : /*
1233 : * Cache mechanism for collation information.
1234 : *
1235 : * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1236 : * (or POSIX), so we can optimize a few code paths in various places.
1237 : * For the built-in C and POSIX collations, we can know that without even
1238 : * doing a cache lookup, but we want to support aliases for C/POSIX too.
1239 : * For the "default" collation, there are separate static cache variables,
1240 : * since consulting the pg_collation catalog doesn't tell us what we need.
1241 : *
1242 : * Also, if a pg_locale_t has been requested for a collation, we cache that
1243 : * for the life of a backend.
1244 : *
1245 : * Note that some code relies on the flags not reporting false negatives
1246 : * (that is, saying it's not C when it is). For example, char2wchar()
1247 : * could fail if the locale is C, so str_tolower() shouldn't call it
1248 : * in that case.
1249 : *
1250 : * Note that we currently lack any way to flush the cache. Since we don't
1251 : * support ALTER COLLATION, this is OK. The worst case is that someone
1252 : * drops a collation, and a useless cache entry hangs around in existing
1253 : * backends.
1254 : */
1255 :
1256 : static collation_cache_entry *
1257 44 : lookup_collation_cache(Oid collation, bool set_flags)
1258 : {
1259 : collation_cache_entry *cache_entry;
1260 : bool found;
1261 :
1262 : Assert(OidIsValid(collation));
1263 : Assert(collation != DEFAULT_COLLATION_OID);
1264 :
1265 44 : if (collation_cache == NULL)
1266 : {
1267 : /* First time through, initialize the hash table */
1268 : HASHCTL ctl;
1269 :
1270 16 : ctl.keysize = sizeof(Oid);
1271 16 : ctl.entrysize = sizeof(collation_cache_entry);
1272 16 : collation_cache = hash_create("Collation cache", 100, &ctl,
1273 : HASH_ELEM | HASH_BLOBS);
1274 : }
1275 :
1276 44 : cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
1277 44 : if (!found)
1278 : {
1279 : /*
1280 : * Make sure cache entry is marked invalid, in case we fail before
1281 : * setting things.
1282 : */
1283 22 : cache_entry->flags_valid = false;
1284 22 : cache_entry->locale = 0;
1285 : }
1286 :
1287 44 : if (set_flags && !cache_entry->flags_valid)
1288 : {
1289 : /* Attempt to set the flags */
1290 : HeapTuple tp;
1291 : Form_pg_collation collform;
1292 :
1293 22 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1294 22 : if (!HeapTupleIsValid(tp))
1295 0 : elog(ERROR, "cache lookup failed for collation %u", collation);
1296 22 : collform = (Form_pg_collation) GETSTRUCT(tp);
1297 :
1298 22 : if (collform->collprovider == COLLPROVIDER_LIBC)
1299 : {
1300 : Datum datum;
1301 : bool isnull;
1302 : const char *collcollate;
1303 : const char *collctype;
1304 :
1305 22 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull);
1306 : Assert(!isnull);
1307 22 : collcollate = TextDatumGetCString(datum);
1308 22 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull);
1309 : Assert(!isnull);
1310 22 : collctype = TextDatumGetCString(datum);
1311 :
1312 28 : cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
1313 6 : (strcmp(collcollate, "POSIX") == 0));
1314 28 : cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
1315 6 : (strcmp(collctype, "POSIX") == 0));
1316 : }
1317 : else
1318 : {
1319 0 : cache_entry->collate_is_c = false;
1320 0 : cache_entry->ctype_is_c = false;
1321 : }
1322 :
1323 22 : cache_entry->flags_valid = true;
1324 :
1325 22 : ReleaseSysCache(tp);
1326 : }
1327 :
1328 44 : return cache_entry;
1329 : }
1330 :
1331 :
1332 : /*
1333 : * Detect whether collation's LC_COLLATE property is C
1334 : */
1335 : bool
1336 17824644 : lc_collate_is_c(Oid collation)
1337 : {
1338 : /*
1339 : * If we're asked about "collation 0", return false, so that the code will
1340 : * go into the non-C path and report that the collation is bogus.
1341 : */
1342 17824644 : if (!OidIsValid(collation))
1343 0 : return false;
1344 :
1345 : /*
1346 : * If we're asked about the default collation, we have to inquire of the C
1347 : * library. Cache the result so we only have to compute it once.
1348 : */
1349 17824644 : if (collation == DEFAULT_COLLATION_OID)
1350 : {
1351 : static int result = -1;
1352 : char *localeptr;
1353 :
1354 12086504 : if (default_locale.provider == COLLPROVIDER_ICU)
1355 0 : return false;
1356 :
1357 12086504 : if (result >= 0)
1358 12082656 : return (bool) result;
1359 3848 : localeptr = setlocale(LC_COLLATE, NULL);
1360 3848 : if (!localeptr)
1361 0 : elog(ERROR, "invalid LC_COLLATE setting");
1362 :
1363 3848 : if (strcmp(localeptr, "C") == 0)
1364 62 : result = true;
1365 3786 : else if (strcmp(localeptr, "POSIX") == 0)
1366 0 : result = true;
1367 : else
1368 3786 : result = false;
1369 3848 : return (bool) result;
1370 : }
1371 :
1372 : /*
1373 : * If we're asked about the built-in C/POSIX collations, we know that.
1374 : */
1375 5738140 : if (collation == C_COLLATION_OID ||
1376 : collation == POSIX_COLLATION_OID)
1377 5738118 : return true;
1378 :
1379 : /*
1380 : * Otherwise, we have to consult pg_collation, but we cache that.
1381 : */
1382 22 : return (lookup_collation_cache(collation, true))->collate_is_c;
1383 : }
1384 :
1385 : /*
1386 : * Detect whether collation's LC_CTYPE property is C
1387 : */
1388 : bool
1389 5472714 : lc_ctype_is_c(Oid collation)
1390 : {
1391 : /*
1392 : * If we're asked about "collation 0", return false, so that the code will
1393 : * go into the non-C path and report that the collation is bogus.
1394 : */
1395 5472714 : if (!OidIsValid(collation))
1396 0 : return false;
1397 :
1398 : /*
1399 : * If we're asked about the default collation, we have to inquire of the C
1400 : * library. Cache the result so we only have to compute it once.
1401 : */
1402 5472714 : if (collation == DEFAULT_COLLATION_OID)
1403 : {
1404 : static int result = -1;
1405 : char *localeptr;
1406 :
1407 3030762 : if (default_locale.provider == COLLPROVIDER_ICU)
1408 0 : return false;
1409 :
1410 3030762 : if (result >= 0)
1411 3029596 : return (bool) result;
1412 1166 : localeptr = setlocale(LC_CTYPE, NULL);
1413 1166 : if (!localeptr)
1414 0 : elog(ERROR, "invalid LC_CTYPE setting");
1415 :
1416 1166 : if (strcmp(localeptr, "C") == 0)
1417 24 : result = true;
1418 1142 : else if (strcmp(localeptr, "POSIX") == 0)
1419 0 : result = true;
1420 : else
1421 1142 : result = false;
1422 1166 : return (bool) result;
1423 : }
1424 :
1425 : /*
1426 : * If we're asked about the built-in C/POSIX collations, we know that.
1427 : */
1428 2441952 : if (collation == C_COLLATION_OID ||
1429 : collation == POSIX_COLLATION_OID)
1430 2441930 : return true;
1431 :
1432 : /*
1433 : * Otherwise, we have to consult pg_collation, but we cache that.
1434 : */
1435 22 : return (lookup_collation_cache(collation, true))->ctype_is_c;
1436 : }
1437 :
1438 : struct pg_locale_struct default_locale;
1439 :
1440 : void
1441 0 : make_icu_collator(const char *iculocstr,
1442 : struct pg_locale_struct *resultp)
1443 : {
1444 : #ifdef USE_ICU
1445 : UCollator *collator;
1446 : UErrorCode status;
1447 :
1448 : status = U_ZERO_ERROR;
1449 : collator = ucol_open(iculocstr, &status);
1450 : if (U_FAILURE(status))
1451 : ereport(ERROR,
1452 : (errmsg("could not open collator for locale \"%s\": %s",
1453 : iculocstr, u_errorName(status))));
1454 :
1455 : if (U_ICU_VERSION_MAJOR_NUM < 54)
1456 : icu_set_collation_attributes(collator, iculocstr);
1457 :
1458 : /* We will leak this string if the caller errors later :-( */
1459 : resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, iculocstr);
1460 : resultp->info.icu.ucol = collator;
1461 : #else /* not USE_ICU */
1462 : /* could get here if a collation was created by a build with ICU */
1463 0 : ereport(ERROR,
1464 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1465 : errmsg("ICU is not supported in this build")));
1466 : #endif /* not USE_ICU */
1467 : }
1468 :
1469 :
1470 : /* simple subroutine for reporting errors from newlocale() */
1471 : #ifdef HAVE_LOCALE_T
1472 : static void
1473 0 : report_newlocale_failure(const char *localename)
1474 : {
1475 : int save_errno;
1476 :
1477 : /*
1478 : * Windows doesn't provide any useful error indication from
1479 : * _create_locale(), and BSD-derived platforms don't seem to feel they
1480 : * need to set errno either (even though POSIX is pretty clear that
1481 : * newlocale should do so). So, if errno hasn't been set, assume ENOENT
1482 : * is what to report.
1483 : */
1484 0 : if (errno == 0)
1485 0 : errno = ENOENT;
1486 :
1487 : /*
1488 : * ENOENT means "no such locale", not "no such file", so clarify that
1489 : * errno with an errdetail message.
1490 : */
1491 0 : save_errno = errno; /* auxiliary funcs might change errno */
1492 0 : ereport(ERROR,
1493 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1494 : errmsg("could not create locale \"%s\": %m",
1495 : localename),
1496 : (save_errno == ENOENT ?
1497 : errdetail("The operating system could not find any locale data for the locale name \"%s\".",
1498 : localename) : 0)));
1499 : }
1500 : #endif /* HAVE_LOCALE_T */
1501 :
1502 :
1503 : /*
1504 : * Create a locale_t from a collation OID. Results are cached for the
1505 : * lifetime of the backend. Thus, do not free the result with freelocale().
1506 : *
1507 : * As a special optimization, the default/database collation returns 0.
1508 : * Callers should then revert to the non-locale_t-enabled code path.
1509 : * Also, callers should avoid calling this before going down a C/POSIX
1510 : * fastpath, because such a fastpath should work even on platforms without
1511 : * locale_t support in the C library.
1512 : *
1513 : * For simplicity, we always generate COLLATE + CTYPE even though we
1514 : * might only need one of them. Since this is called only once per session,
1515 : * it shouldn't cost much.
1516 : */
1517 : pg_locale_t
1518 14787358 : pg_newlocale_from_collation(Oid collid)
1519 : {
1520 : collation_cache_entry *cache_entry;
1521 :
1522 : /* Callers must pass a valid OID */
1523 : Assert(OidIsValid(collid));
1524 :
1525 14787358 : if (collid == DEFAULT_COLLATION_OID)
1526 : {
1527 14787358 : if (default_locale.provider == COLLPROVIDER_ICU)
1528 0 : return &default_locale;
1529 : else
1530 14787358 : return (pg_locale_t) 0;
1531 : }
1532 :
1533 0 : cache_entry = lookup_collation_cache(collid, false);
1534 :
1535 0 : if (cache_entry->locale == 0)
1536 : {
1537 : /* We haven't computed this yet in this session, so do it */
1538 : HeapTuple tp;
1539 : Form_pg_collation collform;
1540 : struct pg_locale_struct result;
1541 : pg_locale_t resultp;
1542 : Datum datum;
1543 : bool isnull;
1544 :
1545 0 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1546 0 : if (!HeapTupleIsValid(tp))
1547 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
1548 0 : collform = (Form_pg_collation) GETSTRUCT(tp);
1549 :
1550 : /* We'll fill in the result struct locally before allocating memory */
1551 0 : memset(&result, 0, sizeof(result));
1552 0 : result.provider = collform->collprovider;
1553 0 : result.deterministic = collform->collisdeterministic;
1554 :
1555 0 : if (collform->collprovider == COLLPROVIDER_LIBC)
1556 : {
1557 : #ifdef HAVE_LOCALE_T
1558 : const char *collcollate;
1559 : const char *collctype pg_attribute_unused();
1560 : locale_t loc;
1561 :
1562 0 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull);
1563 : Assert(!isnull);
1564 0 : collcollate = TextDatumGetCString(datum);
1565 0 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull);
1566 : Assert(!isnull);
1567 0 : collctype = TextDatumGetCString(datum);
1568 :
1569 0 : if (strcmp(collcollate, collctype) == 0)
1570 : {
1571 : /* Normal case where they're the same */
1572 0 : errno = 0;
1573 : #ifndef WIN32
1574 0 : loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
1575 : NULL);
1576 : #else
1577 : loc = _create_locale(LC_ALL, collcollate);
1578 : #endif
1579 0 : if (!loc)
1580 0 : report_newlocale_failure(collcollate);
1581 : }
1582 : else
1583 : {
1584 : #ifndef WIN32
1585 : /* We need two newlocale() steps */
1586 : locale_t loc1;
1587 :
1588 0 : errno = 0;
1589 0 : loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
1590 0 : if (!loc1)
1591 0 : report_newlocale_failure(collcollate);
1592 0 : errno = 0;
1593 0 : loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
1594 0 : if (!loc)
1595 0 : report_newlocale_failure(collctype);
1596 : #else
1597 :
1598 : /*
1599 : * XXX The _create_locale() API doesn't appear to support
1600 : * this. Could perhaps be worked around by changing
1601 : * pg_locale_t to contain two separate fields.
1602 : */
1603 : ereport(ERROR,
1604 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1605 : errmsg("collations with different collate and ctype values are not supported on this platform")));
1606 : #endif
1607 : }
1608 :
1609 0 : result.info.lt = loc;
1610 : #else /* not HAVE_LOCALE_T */
1611 : /* platform that doesn't support locale_t */
1612 : ereport(ERROR,
1613 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1614 : errmsg("collation provider LIBC is not supported on this platform")));
1615 : #endif /* not HAVE_LOCALE_T */
1616 : }
1617 0 : else if (collform->collprovider == COLLPROVIDER_ICU)
1618 : {
1619 : const char *iculocstr;
1620 :
1621 0 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colliculocale, &isnull);
1622 : Assert(!isnull);
1623 0 : iculocstr = TextDatumGetCString(datum);
1624 0 : make_icu_collator(iculocstr, &result);
1625 : }
1626 :
1627 0 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1628 : &isnull);
1629 0 : if (!isnull)
1630 : {
1631 : char *actual_versionstr;
1632 : char *collversionstr;
1633 :
1634 0 : collversionstr = TextDatumGetCString(datum);
1635 :
1636 0 : datum = SysCacheGetAttr(COLLOID, tp, collform->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate, &isnull);
1637 : Assert(!isnull);
1638 :
1639 0 : actual_versionstr = get_collation_actual_version(collform->collprovider,
1640 0 : TextDatumGetCString(datum));
1641 0 : if (!actual_versionstr)
1642 : {
1643 : /*
1644 : * This could happen when specifying a version in CREATE
1645 : * COLLATION but the provider does not support versioning, or
1646 : * manually creating a mess in the catalogs.
1647 : */
1648 0 : ereport(ERROR,
1649 : (errmsg("collation \"%s\" has no actual version, but a version was recorded",
1650 : NameStr(collform->collname))));
1651 : }
1652 :
1653 0 : if (strcmp(actual_versionstr, collversionstr) != 0)
1654 0 : ereport(WARNING,
1655 : (errmsg("collation \"%s\" has version mismatch",
1656 : NameStr(collform->collname)),
1657 : errdetail("The collation in the database was created using version %s, "
1658 : "but the operating system provides version %s.",
1659 : collversionstr, actual_versionstr),
1660 : errhint("Rebuild all objects affected by this collation and run "
1661 : "ALTER COLLATION %s REFRESH VERSION, "
1662 : "or build PostgreSQL with the right library version.",
1663 : quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1664 : NameStr(collform->collname)))));
1665 : }
1666 :
1667 0 : ReleaseSysCache(tp);
1668 :
1669 : /* We'll keep the pg_locale_t structures in TopMemoryContext */
1670 0 : resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp));
1671 0 : *resultp = result;
1672 :
1673 0 : cache_entry->locale = resultp;
1674 : }
1675 :
1676 0 : return cache_entry->locale;
1677 : }
1678 :
1679 : /*
1680 : * Get provider-specific collation version string for the given collation from
1681 : * the operating system/library.
1682 : */
1683 : char *
1684 20748 : get_collation_actual_version(char collprovider, const char *collcollate)
1685 : {
1686 20748 : char *collversion = NULL;
1687 :
1688 : #ifdef USE_ICU
1689 : if (collprovider == COLLPROVIDER_ICU)
1690 : {
1691 : UCollator *collator;
1692 : UErrorCode status;
1693 : UVersionInfo versioninfo;
1694 : char buf[U_MAX_VERSION_STRING_LENGTH];
1695 :
1696 : status = U_ZERO_ERROR;
1697 : collator = ucol_open(collcollate, &status);
1698 : if (U_FAILURE(status))
1699 : ereport(ERROR,
1700 : (errmsg("could not open collator for locale \"%s\": %s",
1701 : collcollate, u_errorName(status))));
1702 : ucol_getVersion(collator, versioninfo);
1703 : ucol_close(collator);
1704 :
1705 : u_versionToString(versioninfo, buf);
1706 : collversion = pstrdup(buf);
1707 : }
1708 : else
1709 : #endif
1710 41496 : if (collprovider == COLLPROVIDER_LIBC &&
1711 41360 : pg_strcasecmp("C", collcollate) != 0 &&
1712 40160 : pg_strncasecmp("C.", collcollate, 2) != 0 &&
1713 19548 : pg_strcasecmp("POSIX", collcollate) != 0)
1714 : {
1715 : #if defined(__GLIBC__)
1716 : /* Use the glibc version because we don't have anything better. */
1717 19542 : collversion = pstrdup(gnu_get_libc_version());
1718 : #elif defined(LC_VERSION_MASK)
1719 : locale_t loc;
1720 :
1721 : /* Look up FreeBSD collation version. */
1722 : loc = newlocale(LC_COLLATE, collcollate, NULL);
1723 : if (loc)
1724 : {
1725 : collversion =
1726 : pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
1727 : freelocale(loc);
1728 : }
1729 : else
1730 : ereport(ERROR,
1731 : (errmsg("could not load locale \"%s\"", collcollate)));
1732 : #elif defined(WIN32) && _WIN32_WINNT >= 0x0600
1733 : /*
1734 : * If we are targeting Windows Vista and above, we can ask for a name
1735 : * given a collation name (earlier versions required a location code
1736 : * that we don't have).
1737 : */
1738 : NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
1739 : WCHAR wide_collcollate[LOCALE_NAME_MAX_LENGTH];
1740 :
1741 : MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
1742 : LOCALE_NAME_MAX_LENGTH);
1743 : if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
1744 : {
1745 : /*
1746 : * GetNLSVersionEx() wants a language tag such as "en-US", not a
1747 : * locale name like "English_United States.1252". Until those
1748 : * values can be prevented from entering the system, or 100%
1749 : * reliably converted to the more useful tag format, tolerate the
1750 : * resulting error and report that we have no version data.
1751 : */
1752 : if (GetLastError() == ERROR_INVALID_PARAMETER)
1753 : return NULL;
1754 :
1755 : ereport(ERROR,
1756 : (errmsg("could not get collation version for locale \"%s\": error code %lu",
1757 : collcollate,
1758 : GetLastError())));
1759 : }
1760 : collversion = psprintf("%d.%d,%d.%d",
1761 : (version.dwNLSVersion >> 8) & 0xFFFF,
1762 : version.dwNLSVersion & 0xFF,
1763 : (version.dwDefinedVersion >> 8) & 0xFFFF,
1764 : version.dwDefinedVersion & 0xFF);
1765 : #endif
1766 : }
1767 :
1768 20748 : return collversion;
1769 : }
1770 :
1771 :
1772 : #ifdef USE_ICU
1773 : /*
1774 : * Converter object for converting between ICU's UChar strings and C strings
1775 : * in database encoding. Since the database encoding doesn't change, we only
1776 : * need one of these per session.
1777 : */
1778 : static UConverter *icu_converter = NULL;
1779 :
1780 : static void
1781 : init_icu_converter(void)
1782 : {
1783 : const char *icu_encoding_name;
1784 : UErrorCode status;
1785 : UConverter *conv;
1786 :
1787 : if (icu_converter)
1788 : return; /* already done */
1789 :
1790 : icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
1791 : if (!icu_encoding_name)
1792 : ereport(ERROR,
1793 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1794 : errmsg("encoding \"%s\" not supported by ICU",
1795 : pg_encoding_to_char(GetDatabaseEncoding()))));
1796 :
1797 : status = U_ZERO_ERROR;
1798 : conv = ucnv_open(icu_encoding_name, &status);
1799 : if (U_FAILURE(status))
1800 : ereport(ERROR,
1801 : (errmsg("could not open ICU converter for encoding \"%s\": %s",
1802 : icu_encoding_name, u_errorName(status))));
1803 :
1804 : icu_converter = conv;
1805 : }
1806 :
1807 : /*
1808 : * Convert a string in the database encoding into a string of UChars.
1809 : *
1810 : * The source string at buff is of length nbytes
1811 : * (it needn't be nul-terminated)
1812 : *
1813 : * *buff_uchar receives a pointer to the palloc'd result string, and
1814 : * the function's result is the number of UChars generated.
1815 : *
1816 : * The result string is nul-terminated, though most callers rely on the
1817 : * result length instead.
1818 : */
1819 : int32_t
1820 : icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
1821 : {
1822 : UErrorCode status;
1823 : int32_t len_uchar;
1824 :
1825 : init_icu_converter();
1826 :
1827 : status = U_ZERO_ERROR;
1828 : len_uchar = ucnv_toUChars(icu_converter, NULL, 0,
1829 : buff, nbytes, &status);
1830 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1831 : ereport(ERROR,
1832 : (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
1833 :
1834 : *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
1835 :
1836 : status = U_ZERO_ERROR;
1837 : len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar + 1,
1838 : buff, nbytes, &status);
1839 : if (U_FAILURE(status))
1840 : ereport(ERROR,
1841 : (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
1842 :
1843 : return len_uchar;
1844 : }
1845 :
1846 : /*
1847 : * Convert a string of UChars into the database encoding.
1848 : *
1849 : * The source string at buff_uchar is of length len_uchar
1850 : * (it needn't be nul-terminated)
1851 : *
1852 : * *result receives a pointer to the palloc'd result string, and the
1853 : * function's result is the number of bytes generated (not counting nul).
1854 : *
1855 : * The result string is nul-terminated.
1856 : */
1857 : int32_t
1858 : icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
1859 : {
1860 : UErrorCode status;
1861 : int32_t len_result;
1862 :
1863 : init_icu_converter();
1864 :
1865 : status = U_ZERO_ERROR;
1866 : len_result = ucnv_fromUChars(icu_converter, NULL, 0,
1867 : buff_uchar, len_uchar, &status);
1868 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1869 : ereport(ERROR,
1870 : (errmsg("%s failed: %s", "ucnv_fromUChars",
1871 : u_errorName(status))));
1872 :
1873 : *result = palloc(len_result + 1);
1874 :
1875 : status = U_ZERO_ERROR;
1876 : len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
1877 : buff_uchar, len_uchar, &status);
1878 : if (U_FAILURE(status))
1879 : ereport(ERROR,
1880 : (errmsg("%s failed: %s", "ucnv_fromUChars",
1881 : u_errorName(status))));
1882 :
1883 : return len_result;
1884 : }
1885 :
1886 : /*
1887 : * Parse collation attributes and apply them to the open collator. This takes
1888 : * a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
1889 : * applies the key-value arguments.
1890 : *
1891 : * Starting with ICU version 54, the attributes are processed automatically by
1892 : * ucol_open(), so this is only necessary for emulating this behavior on older
1893 : * versions.
1894 : */
1895 : pg_attribute_unused()
1896 : static void
1897 : icu_set_collation_attributes(UCollator *collator, const char *loc)
1898 : {
1899 : char *str = asc_tolower(loc, strlen(loc));
1900 :
1901 : str = strchr(str, '@');
1902 : if (!str)
1903 : return;
1904 : str++;
1905 :
1906 : for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
1907 : {
1908 : char *e = strchr(token, '=');
1909 :
1910 : if (e)
1911 : {
1912 : char *name;
1913 : char *value;
1914 : UColAttribute uattr;
1915 : UColAttributeValue uvalue;
1916 : UErrorCode status;
1917 :
1918 : status = U_ZERO_ERROR;
1919 :
1920 : *e = '\0';
1921 : name = token;
1922 : value = e + 1;
1923 :
1924 : /*
1925 : * See attribute name and value lists in ICU i18n/coll.cpp
1926 : */
1927 : if (strcmp(name, "colstrength") == 0)
1928 : uattr = UCOL_STRENGTH;
1929 : else if (strcmp(name, "colbackwards") == 0)
1930 : uattr = UCOL_FRENCH_COLLATION;
1931 : else if (strcmp(name, "colcaselevel") == 0)
1932 : uattr = UCOL_CASE_LEVEL;
1933 : else if (strcmp(name, "colcasefirst") == 0)
1934 : uattr = UCOL_CASE_FIRST;
1935 : else if (strcmp(name, "colalternate") == 0)
1936 : uattr = UCOL_ALTERNATE_HANDLING;
1937 : else if (strcmp(name, "colnormalization") == 0)
1938 : uattr = UCOL_NORMALIZATION_MODE;
1939 : else if (strcmp(name, "colnumeric") == 0)
1940 : uattr = UCOL_NUMERIC_COLLATION;
1941 : else
1942 : /* ignore if unknown */
1943 : continue;
1944 :
1945 : if (strcmp(value, "primary") == 0)
1946 : uvalue = UCOL_PRIMARY;
1947 : else if (strcmp(value, "secondary") == 0)
1948 : uvalue = UCOL_SECONDARY;
1949 : else if (strcmp(value, "tertiary") == 0)
1950 : uvalue = UCOL_TERTIARY;
1951 : else if (strcmp(value, "quaternary") == 0)
1952 : uvalue = UCOL_QUATERNARY;
1953 : else if (strcmp(value, "identical") == 0)
1954 : uvalue = UCOL_IDENTICAL;
1955 : else if (strcmp(value, "no") == 0)
1956 : uvalue = UCOL_OFF;
1957 : else if (strcmp(value, "yes") == 0)
1958 : uvalue = UCOL_ON;
1959 : else if (strcmp(value, "shifted") == 0)
1960 : uvalue = UCOL_SHIFTED;
1961 : else if (strcmp(value, "non-ignorable") == 0)
1962 : uvalue = UCOL_NON_IGNORABLE;
1963 : else if (strcmp(value, "lower") == 0)
1964 : uvalue = UCOL_LOWER_FIRST;
1965 : else if (strcmp(value, "upper") == 0)
1966 : uvalue = UCOL_UPPER_FIRST;
1967 : else
1968 : status = U_ILLEGAL_ARGUMENT_ERROR;
1969 :
1970 : if (status == U_ZERO_ERROR)
1971 : ucol_setAttribute(collator, uattr, uvalue, &status);
1972 :
1973 : /*
1974 : * Pretend the error came from ucol_open(), for consistent error
1975 : * message across ICU versions.
1976 : */
1977 : if (U_FAILURE(status))
1978 : ereport(ERROR,
1979 : (errmsg("could not open collator for locale \"%s\": %s",
1980 : loc, u_errorName(status))));
1981 : }
1982 : }
1983 : }
1984 :
1985 : #endif /* USE_ICU */
1986 :
1987 : /*
1988 : * Check if the given locale ID is valid, and ereport(ERROR) if it isn't.
1989 : */
1990 : void
1991 0 : check_icu_locale(const char *icu_locale)
1992 : {
1993 : #ifdef USE_ICU
1994 : UCollator *collator;
1995 : UErrorCode status;
1996 :
1997 : status = U_ZERO_ERROR;
1998 : collator = ucol_open(icu_locale, &status);
1999 : if (U_FAILURE(status))
2000 : ereport(ERROR,
2001 : (errmsg("could not open collator for locale \"%s\": %s",
2002 : icu_locale, u_errorName(status))));
2003 :
2004 : if (U_ICU_VERSION_MAJOR_NUM < 54)
2005 : icu_set_collation_attributes(collator, icu_locale);
2006 : ucol_close(collator);
2007 : #else
2008 0 : ereport(ERROR,
2009 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2010 : errmsg("ICU is not supported in this build")));
2011 : #endif
2012 : }
2013 :
2014 : /*
2015 : * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
2016 : * Therefore we keep them here rather than with the mbutils code.
2017 : */
2018 :
2019 : /*
2020 : * wchar2char --- convert wide characters to multibyte format
2021 : *
2022 : * This has the same API as the standard wcstombs_l() function; in particular,
2023 : * tolen is the maximum number of bytes to store at *to, and *from must be
2024 : * zero-terminated. The output will be zero-terminated iff there is room.
2025 : */
2026 : size_t
2027 1243514 : wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
2028 : {
2029 : size_t result;
2030 :
2031 : Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
2032 :
2033 1243514 : if (tolen == 0)
2034 0 : return 0;
2035 :
2036 : #ifdef WIN32
2037 :
2038 : /*
2039 : * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
2040 : * for some reason mbstowcs and wcstombs won't do this for us, so we use
2041 : * MultiByteToWideChar().
2042 : */
2043 : if (GetDatabaseEncoding() == PG_UTF8)
2044 : {
2045 : result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
2046 : NULL, NULL);
2047 : /* A zero return is failure */
2048 : if (result <= 0)
2049 : result = -1;
2050 : else
2051 : {
2052 : Assert(result <= tolen);
2053 : /* Microsoft counts the zero terminator in the result */
2054 : result--;
2055 : }
2056 : }
2057 : else
2058 : #endif /* WIN32 */
2059 1243514 : if (locale == (pg_locale_t) 0)
2060 : {
2061 : /* Use wcstombs directly for the default locale */
2062 1243514 : result = wcstombs(to, from, tolen);
2063 : }
2064 : else
2065 : {
2066 : #ifdef HAVE_LOCALE_T
2067 : #ifdef HAVE_WCSTOMBS_L
2068 : /* Use wcstombs_l for nondefault locales */
2069 : result = wcstombs_l(to, from, tolen, locale->info.lt);
2070 : #else /* !HAVE_WCSTOMBS_L */
2071 : /* We have to temporarily set the locale as current ... ugh */
2072 0 : locale_t save_locale = uselocale(locale->info.lt);
2073 :
2074 0 : result = wcstombs(to, from, tolen);
2075 :
2076 0 : uselocale(save_locale);
2077 : #endif /* HAVE_WCSTOMBS_L */
2078 : #else /* !HAVE_LOCALE_T */
2079 : /* Can't have locale != 0 without HAVE_LOCALE_T */
2080 : elog(ERROR, "wcstombs_l is not available");
2081 : result = 0; /* keep compiler quiet */
2082 : #endif /* HAVE_LOCALE_T */
2083 : }
2084 :
2085 1243514 : return result;
2086 : }
2087 :
2088 : /*
2089 : * char2wchar --- convert multibyte characters to wide characters
2090 : *
2091 : * This has almost the API of mbstowcs_l(), except that *from need not be
2092 : * null-terminated; instead, the number of input bytes is specified as
2093 : * fromlen. Also, we ereport() rather than returning -1 for invalid
2094 : * input encoding. tolen is the maximum number of wchar_t's to store at *to.
2095 : * The output will be zero-terminated iff there is room.
2096 : */
2097 : size_t
2098 1248086 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
2099 : pg_locale_t locale)
2100 : {
2101 : size_t result;
2102 :
2103 : Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
2104 :
2105 1248086 : if (tolen == 0)
2106 0 : return 0;
2107 :
2108 : #ifdef WIN32
2109 : /* See WIN32 "Unicode" comment above */
2110 : if (GetDatabaseEncoding() == PG_UTF8)
2111 : {
2112 : /* Win32 API does not work for zero-length input */
2113 : if (fromlen == 0)
2114 : result = 0;
2115 : else
2116 : {
2117 : result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
2118 : /* A zero return is failure */
2119 : if (result == 0)
2120 : result = -1;
2121 : }
2122 :
2123 : if (result != -1)
2124 : {
2125 : Assert(result < tolen);
2126 : /* Append trailing null wchar (MultiByteToWideChar() does not) */
2127 : to[result] = 0;
2128 : }
2129 : }
2130 : else
2131 : #endif /* WIN32 */
2132 : {
2133 : /* mbstowcs requires ending '\0' */
2134 1248086 : char *str = pnstrdup(from, fromlen);
2135 :
2136 1248086 : if (locale == (pg_locale_t) 0)
2137 : {
2138 : /* Use mbstowcs directly for the default locale */
2139 1248086 : result = mbstowcs(to, str, tolen);
2140 : }
2141 : else
2142 : {
2143 : #ifdef HAVE_LOCALE_T
2144 : #ifdef HAVE_MBSTOWCS_L
2145 : /* Use mbstowcs_l for nondefault locales */
2146 : result = mbstowcs_l(to, str, tolen, locale->info.lt);
2147 : #else /* !HAVE_MBSTOWCS_L */
2148 : /* We have to temporarily set the locale as current ... ugh */
2149 0 : locale_t save_locale = uselocale(locale->info.lt);
2150 :
2151 0 : result = mbstowcs(to, str, tolen);
2152 :
2153 0 : uselocale(save_locale);
2154 : #endif /* HAVE_MBSTOWCS_L */
2155 : #else /* !HAVE_LOCALE_T */
2156 : /* Can't have locale != 0 without HAVE_LOCALE_T */
2157 : elog(ERROR, "mbstowcs_l is not available");
2158 : result = 0; /* keep compiler quiet */
2159 : #endif /* HAVE_LOCALE_T */
2160 : }
2161 :
2162 1248086 : pfree(str);
2163 : }
2164 :
2165 1248086 : if (result == -1)
2166 : {
2167 : /*
2168 : * Invalid multibyte character encountered. We try to give a useful
2169 : * error message by letting pg_verifymbstr check the string. But it's
2170 : * possible that the string is OK to us, and not OK to mbstowcs ---
2171 : * this suggests that the LC_CTYPE locale is different from the
2172 : * database encoding. Give a generic error message if pg_verifymbstr
2173 : * can't find anything wrong.
2174 : */
2175 0 : pg_verifymbstr(from, fromlen, false); /* might not return */
2176 : /* but if it does ... */
2177 0 : ereport(ERROR,
2178 : (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
2179 : errmsg("invalid multibyte character for locale"),
2180 : errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
2181 : }
2182 :
2183 1248086 : return result;
2184 : }
|