Line data Source code
1 : /*-----------------------------------------------------------------------
2 : *
3 : * PostgreSQL locale utilities
4 : *
5 : * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
6 : *
7 : * src/backend/utils/adt/pg_locale.c
8 : *
9 : *-----------------------------------------------------------------------
10 : */
11 :
12 : /*----------
13 : * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14 : * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15 : * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16 : * toupper(), etc. are always in the same fixed locale.
17 : *
18 : * LC_MESSAGES is settable at run time and will take effect
19 : * immediately.
20 : *
21 : * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22 : * settable at run-time. However, we don't actually set those locale
23 : * categories permanently. This would have bizarre effects like no
24 : * longer accepting standard floating-point literals in some locales.
25 : * Instead, we only set these locale categories briefly when needed,
26 : * cache the required information obtained from localeconv() or
27 : * strftime(), and then set the locale categories back to "C".
28 : * The cached information is only used by the formatting functions
29 : * (to_char, etc.) and the money type. For the user, this should all be
30 : * transparent.
31 : *
32 : * !!! NOW HEAR THIS !!!
33 : *
34 : * We've been bitten repeatedly by this bug, so let's try to keep it in
35 : * mind in future: on some platforms, the locale functions return pointers
36 : * to static data that will be overwritten by any later locale function.
37 : * Thus, for example, the obvious-looking sequence
38 : * save = setlocale(category, NULL);
39 : * if (!setlocale(category, value))
40 : * fail = true;
41 : * setlocale(category, save);
42 : * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
43 : * will change the memory save is pointing at. To do this sort of thing
44 : * safely, you *must* pstrdup what setlocale returns the first time.
45 : *
46 : * The POSIX locale standard is available here:
47 : *
48 : * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
49 : *----------
50 : */
51 :
52 :
53 : #include "postgres.h"
54 :
55 : #include <time.h>
56 :
57 : #include "access/htup_details.h"
58 : #include "catalog/pg_collation.h"
59 : #include "catalog/pg_database.h"
60 : #include "common/hashfn.h"
61 : #include "common/string.h"
62 : #include "mb/pg_wchar.h"
63 : #include "miscadmin.h"
64 : #include "utils/builtins.h"
65 : #include "utils/formatting.h"
66 : #include "utils/guc_hooks.h"
67 : #include "utils/lsyscache.h"
68 : #include "utils/memutils.h"
69 : #include "utils/pg_locale.h"
70 : #include "utils/syscache.h"
71 :
72 : #ifdef WIN32
73 : #include <shlwapi.h>
74 : #endif
75 :
76 : /* Error triggered for locale-sensitive subroutines */
77 : #define PGLOCALE_SUPPORT_ERROR(provider) \
78 : elog(ERROR, "unsupported collprovider for %s: %c", __func__, provider)
79 :
80 : /*
81 : * This should be large enough that most strings will fit, but small enough
82 : * that we feel comfortable putting it on the stack
83 : */
84 : #define TEXTBUFLEN 1024
85 :
86 : #define MAX_L10N_DATA 80
87 :
88 : /* pg_locale_builtin.c */
89 : extern pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context);
90 : extern char *get_collation_actual_version_builtin(const char *collcollate);
91 :
92 : /* pg_locale_icu.c */
93 : #ifdef USE_ICU
94 : extern UCollator *pg_ucol_open(const char *loc_str);
95 : extern char *get_collation_actual_version_icu(const char *collcollate);
96 : #endif
97 : extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
98 :
99 : /* pg_locale_libc.c */
100 : extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
101 : extern char *get_collation_actual_version_libc(const char *collcollate);
102 :
103 : extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
104 : ssize_t srclen, pg_locale_t locale);
105 : extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
106 : ssize_t srclen, pg_locale_t locale);
107 : extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
108 : ssize_t srclen, pg_locale_t locale);
109 :
110 : extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
111 : ssize_t srclen, pg_locale_t locale);
112 : extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
113 : ssize_t srclen, pg_locale_t locale);
114 : extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
115 : ssize_t srclen, pg_locale_t locale);
116 :
117 : extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
118 : ssize_t srclen, pg_locale_t locale);
119 : extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
120 : ssize_t srclen, pg_locale_t locale);
121 : extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
122 : ssize_t srclen, pg_locale_t locale);
123 :
124 : /* GUC settings */
125 : char *locale_messages;
126 : char *locale_monetary;
127 : char *locale_numeric;
128 : char *locale_time;
129 :
130 : int icu_validation_level = WARNING;
131 :
132 : /*
133 : * lc_time localization cache.
134 : *
135 : * We use only the first 7 or 12 entries of these arrays. The last array
136 : * element is left as NULL for the convenience of outside code that wants
137 : * to sequentially scan these arrays.
138 : */
139 : char *localized_abbrev_days[7 + 1];
140 : char *localized_full_days[7 + 1];
141 : char *localized_abbrev_months[12 + 1];
142 : char *localized_full_months[12 + 1];
143 :
144 : /* is the databases's LC_CTYPE the C locale? */
145 : bool database_ctype_is_c = false;
146 :
147 : static pg_locale_t default_locale = NULL;
148 :
149 : /* indicates whether locale information cache is valid */
150 : static bool CurrentLocaleConvValid = false;
151 : static bool CurrentLCTimeValid = false;
152 :
153 : /* Cache for collation-related knowledge */
154 :
155 : typedef struct
156 : {
157 : Oid collid; /* hash key: pg_collation OID */
158 : pg_locale_t locale; /* locale_t struct, or 0 if not valid */
159 :
160 : /* needed for simplehash */
161 : uint32 hash;
162 : char status;
163 : } collation_cache_entry;
164 :
165 : #define SH_PREFIX collation_cache
166 : #define SH_ELEMENT_TYPE collation_cache_entry
167 : #define SH_KEY_TYPE Oid
168 : #define SH_KEY collid
169 : #define SH_HASH_KEY(tb, key) murmurhash32((uint32) key)
170 : #define SH_EQUAL(tb, a, b) (a == b)
171 : #define SH_GET_HASH(tb, a) a->hash
172 : #define SH_SCOPE static inline
173 : #define SH_STORE_HASH
174 : #define SH_DECLARE
175 : #define SH_DEFINE
176 : #include "lib/simplehash.h"
177 :
178 : static MemoryContext CollationCacheContext = NULL;
179 : static collation_cache_hash *CollationCache = NULL;
180 :
181 : /*
182 : * The collation cache is often accessed repeatedly for the same collation, so
183 : * remember the last one used.
184 : */
185 : static Oid last_collation_cache_oid = InvalidOid;
186 : static pg_locale_t last_collation_cache_locale = NULL;
187 :
188 : #if defined(WIN32) && defined(LC_MESSAGES)
189 : static char *IsoLocaleName(const char *);
190 : #endif
191 :
192 : /*
193 : * pg_perm_setlocale
194 : *
195 : * This wraps the libc function setlocale(), with two additions. First, when
196 : * changing LC_CTYPE, update gettext's encoding for the current message
197 : * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
198 : * not on Windows. Second, if the operation is successful, the corresponding
199 : * LC_XXX environment variable is set to match. By setting the environment
200 : * variable, we ensure that any subsequent use of setlocale(..., "") will
201 : * preserve the settings made through this routine. Of course, LC_ALL must
202 : * also be unset to fully ensure that, but that has to be done elsewhere after
203 : * all the individual LC_XXX variables have been set correctly. (Thank you
204 : * Perl for making this kluge necessary.)
205 : */
206 : char *
207 91992 : pg_perm_setlocale(int category, const char *locale)
208 : {
209 : char *result;
210 : const char *envvar;
211 :
212 : #ifndef WIN32
213 91992 : result = setlocale(category, locale);
214 : #else
215 :
216 : /*
217 : * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
218 : * the given value is good and set it in the environment variables. We
219 : * must ignore attempts to set to "", which means "keep using the old
220 : * environment value".
221 : */
222 : #ifdef LC_MESSAGES
223 : if (category == LC_MESSAGES)
224 : {
225 : result = (char *) locale;
226 : if (locale == NULL || locale[0] == '\0')
227 : return result;
228 : }
229 : else
230 : #endif
231 : result = setlocale(category, locale);
232 : #endif /* WIN32 */
233 :
234 91992 : if (result == NULL)
235 0 : return result; /* fall out immediately on failure */
236 :
237 : /*
238 : * Use the right encoding in translated messages. Under ENABLE_NLS, let
239 : * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
240 : * format strings are ASCII, but database-encoding strings may enter the
241 : * message via %s. This makes the overall message encoding equal to the
242 : * database encoding.
243 : */
244 91992 : if (category == LC_CTYPE)
245 : {
246 : static char save_lc_ctype[LOCALE_NAME_BUFLEN];
247 :
248 : /* copy setlocale() return value before callee invokes it again */
249 31066 : strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
250 31066 : result = save_lc_ctype;
251 :
252 : #ifdef ENABLE_NLS
253 31066 : SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
254 : #else
255 : SetMessageEncoding(GetDatabaseEncoding());
256 : #endif
257 : }
258 :
259 91992 : switch (category)
260 : {
261 31066 : case LC_COLLATE:
262 31066 : envvar = "LC_COLLATE";
263 31066 : break;
264 31066 : case LC_CTYPE:
265 31066 : envvar = "LC_CTYPE";
266 31066 : break;
267 : #ifdef LC_MESSAGES
268 19936 : case LC_MESSAGES:
269 19936 : envvar = "LC_MESSAGES";
270 : #ifdef WIN32
271 : result = IsoLocaleName(locale);
272 : if (result == NULL)
273 : result = (char *) locale;
274 : elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
275 : #endif /* WIN32 */
276 19936 : break;
277 : #endif /* LC_MESSAGES */
278 3308 : case LC_MONETARY:
279 3308 : envvar = "LC_MONETARY";
280 3308 : break;
281 3308 : case LC_NUMERIC:
282 3308 : envvar = "LC_NUMERIC";
283 3308 : break;
284 3308 : case LC_TIME:
285 3308 : envvar = "LC_TIME";
286 3308 : break;
287 0 : default:
288 0 : elog(FATAL, "unrecognized LC category: %d", category);
289 : return NULL; /* keep compiler quiet */
290 : }
291 :
292 91992 : if (setenv(envvar, result, 1) != 0)
293 0 : return NULL;
294 :
295 91992 : return result;
296 : }
297 :
298 :
299 : /*
300 : * Is the locale name valid for the locale category?
301 : *
302 : * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
303 : * canonical name is stored there. This is especially useful for figuring out
304 : * what locale name "" means (ie, the server environment value). (Actually,
305 : * it seems that on most implementations that's the only thing it's good for;
306 : * we could wish that setlocale gave back a canonically spelled version of
307 : * the locale name, but typically it doesn't.)
308 : */
309 : bool
310 63868 : check_locale(int category, const char *locale, char **canonname)
311 : {
312 : char *save;
313 : char *res;
314 :
315 : /* Don't let Windows' non-ASCII locale names in. */
316 63868 : if (!pg_is_ascii(locale))
317 : {
318 0 : ereport(WARNING,
319 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
320 : errmsg("locale name \"%s\" contains non-ASCII characters",
321 : locale)));
322 0 : return false;
323 : }
324 :
325 63868 : if (canonname)
326 1350 : *canonname = NULL; /* in case of failure */
327 :
328 63868 : save = setlocale(category, NULL);
329 63868 : if (!save)
330 0 : return false; /* won't happen, we hope */
331 :
332 : /* save may be pointing at a modifiable scratch variable, see above. */
333 63868 : save = pstrdup(save);
334 :
335 : /* set the locale with setlocale, to see if it accepts it. */
336 63868 : res = setlocale(category, locale);
337 :
338 : /* save canonical name if requested. */
339 63868 : if (res && canonname)
340 1346 : *canonname = pstrdup(res);
341 :
342 : /* restore old value. */
343 63868 : if (!setlocale(category, save))
344 0 : elog(WARNING, "failed to restore old locale \"%s\"", save);
345 63868 : pfree(save);
346 :
347 : /* Don't let Windows' non-ASCII locale names out. */
348 63868 : if (canonname && *canonname && !pg_is_ascii(*canonname))
349 : {
350 0 : ereport(WARNING,
351 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
352 : errmsg("locale name \"%s\" contains non-ASCII characters",
353 : *canonname)));
354 0 : pfree(*canonname);
355 0 : *canonname = NULL;
356 0 : return false;
357 : }
358 :
359 63868 : return (res != NULL);
360 : }
361 :
362 :
363 : /*
364 : * GUC check/assign hooks
365 : *
366 : * For most locale categories, the assign hook doesn't actually set the locale
367 : * permanently, just reset flags so that the next use will cache the
368 : * appropriate values. (See explanation at the top of this file.)
369 : *
370 : * Note: we accept value = "" as selecting the postmaster's environment
371 : * value, whatever it was (so long as the environment setting is legal).
372 : * This will have been locked down by an earlier call to pg_perm_setlocale.
373 : */
374 : bool
375 16794 : check_locale_monetary(char **newval, void **extra, GucSource source)
376 : {
377 16794 : return check_locale(LC_MONETARY, *newval, NULL);
378 : }
379 :
380 : void
381 16600 : assign_locale_monetary(const char *newval, void *extra)
382 : {
383 16600 : CurrentLocaleConvValid = false;
384 16600 : }
385 :
386 : bool
387 16800 : check_locale_numeric(char **newval, void **extra, GucSource source)
388 : {
389 16800 : return check_locale(LC_NUMERIC, *newval, NULL);
390 : }
391 :
392 : void
393 16612 : assign_locale_numeric(const char *newval, void *extra)
394 : {
395 16612 : CurrentLocaleConvValid = false;
396 16612 : }
397 :
398 : bool
399 16794 : check_locale_time(char **newval, void **extra, GucSource source)
400 : {
401 16794 : return check_locale(LC_TIME, *newval, NULL);
402 : }
403 :
404 : void
405 16600 : assign_locale_time(const char *newval, void *extra)
406 : {
407 16600 : CurrentLCTimeValid = false;
408 16600 : }
409 :
410 : /*
411 : * We allow LC_MESSAGES to actually be set globally.
412 : *
413 : * Note: we normally disallow value = "" because it wouldn't have consistent
414 : * semantics (it'd effectively just use the previous value). However, this
415 : * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
416 : * not even if the attempted setting fails due to invalid environment value.
417 : * The idea there is just to accept the environment setting *if possible*
418 : * during startup, until we can read the proper value from postgresql.conf.
419 : */
420 : bool
421 16824 : check_locale_messages(char **newval, void **extra, GucSource source)
422 : {
423 16824 : if (**newval == '\0')
424 : {
425 4694 : if (source == PGC_S_DEFAULT)
426 4694 : return true;
427 : else
428 0 : return false;
429 : }
430 :
431 : /*
432 : * LC_MESSAGES category does not exist everywhere, but accept it anyway
433 : *
434 : * On Windows, we can't even check the value, so accept blindly
435 : */
436 : #if defined(LC_MESSAGES) && !defined(WIN32)
437 12130 : return check_locale(LC_MESSAGES, *newval, NULL);
438 : #else
439 : return true;
440 : #endif
441 : }
442 :
443 : void
444 16628 : assign_locale_messages(const char *newval, void *extra)
445 : {
446 : /*
447 : * LC_MESSAGES category does not exist everywhere, but accept it anyway.
448 : * We ignore failure, as per comment above.
449 : */
450 : #ifdef LC_MESSAGES
451 16628 : (void) pg_perm_setlocale(LC_MESSAGES, newval);
452 : #endif
453 16628 : }
454 :
455 :
456 : /*
457 : * Frees the malloced content of a struct lconv. (But not the struct
458 : * itself.) It's important that this not throw elog(ERROR).
459 : */
460 : static void
461 6 : free_struct_lconv(struct lconv *s)
462 : {
463 6 : free(s->decimal_point);
464 6 : free(s->thousands_sep);
465 6 : free(s->grouping);
466 6 : free(s->int_curr_symbol);
467 6 : free(s->currency_symbol);
468 6 : free(s->mon_decimal_point);
469 6 : free(s->mon_thousands_sep);
470 6 : free(s->mon_grouping);
471 6 : free(s->positive_sign);
472 6 : free(s->negative_sign);
473 6 : }
474 :
475 : /*
476 : * Check that all fields of a struct lconv (or at least, the ones we care
477 : * about) are non-NULL. The field list must match free_struct_lconv().
478 : */
479 : static bool
480 56 : struct_lconv_is_valid(struct lconv *s)
481 : {
482 56 : if (s->decimal_point == NULL)
483 0 : return false;
484 56 : if (s->thousands_sep == NULL)
485 0 : return false;
486 56 : if (s->grouping == NULL)
487 0 : return false;
488 56 : if (s->int_curr_symbol == NULL)
489 0 : return false;
490 56 : if (s->currency_symbol == NULL)
491 0 : return false;
492 56 : if (s->mon_decimal_point == NULL)
493 0 : return false;
494 56 : if (s->mon_thousands_sep == NULL)
495 0 : return false;
496 56 : if (s->mon_grouping == NULL)
497 0 : return false;
498 56 : if (s->positive_sign == NULL)
499 0 : return false;
500 56 : if (s->negative_sign == NULL)
501 0 : return false;
502 56 : return true;
503 : }
504 :
505 :
506 : /*
507 : * Convert the strdup'd string at *str from the specified encoding to the
508 : * database encoding.
509 : */
510 : static void
511 448 : db_encoding_convert(int encoding, char **str)
512 : {
513 : char *pstr;
514 : char *mstr;
515 :
516 : /* convert the string to the database encoding */
517 448 : pstr = pg_any_to_server(*str, strlen(*str), encoding);
518 448 : if (pstr == *str)
519 448 : return; /* no conversion happened */
520 :
521 : /* need it malloc'd not palloc'd */
522 0 : mstr = strdup(pstr);
523 0 : if (mstr == NULL)
524 0 : ereport(ERROR,
525 : (errcode(ERRCODE_OUT_OF_MEMORY),
526 : errmsg("out of memory")));
527 :
528 : /* replace old string */
529 0 : free(*str);
530 0 : *str = mstr;
531 :
532 0 : pfree(pstr);
533 : }
534 :
535 :
536 : /*
537 : * Return the POSIX lconv struct (contains number/money formatting
538 : * information) with locale information for all categories.
539 : */
540 : struct lconv *
541 2966 : PGLC_localeconv(void)
542 : {
543 : static struct lconv CurrentLocaleConv;
544 : static bool CurrentLocaleConvAllocated = false;
545 : struct lconv *extlconv;
546 : struct lconv worklconv;
547 : char *save_lc_monetary;
548 : char *save_lc_numeric;
549 : #ifdef WIN32
550 : char *save_lc_ctype;
551 : #endif
552 :
553 : /* Did we do it already? */
554 2966 : if (CurrentLocaleConvValid)
555 2910 : return &CurrentLocaleConv;
556 :
557 : /* Free any already-allocated storage */
558 56 : if (CurrentLocaleConvAllocated)
559 : {
560 6 : free_struct_lconv(&CurrentLocaleConv);
561 6 : CurrentLocaleConvAllocated = false;
562 : }
563 :
564 : /*
565 : * This is tricky because we really don't want to risk throwing error
566 : * while the locale is set to other than our usual settings. Therefore,
567 : * the process is: collect the usual settings, set locale to special
568 : * setting, copy relevant data into worklconv using strdup(), restore
569 : * normal settings, convert data to desired encoding, and finally stash
570 : * the collected data in CurrentLocaleConv. This makes it safe if we
571 : * throw an error during encoding conversion or run out of memory anywhere
572 : * in the process. All data pointed to by struct lconv members is
573 : * allocated with strdup, to avoid premature elog(ERROR) and to allow
574 : * using a single cleanup routine.
575 : */
576 56 : memset(&worklconv, 0, sizeof(worklconv));
577 :
578 : /* Save prevailing values of monetary and numeric locales */
579 56 : save_lc_monetary = setlocale(LC_MONETARY, NULL);
580 56 : if (!save_lc_monetary)
581 0 : elog(ERROR, "setlocale(NULL) failed");
582 56 : save_lc_monetary = pstrdup(save_lc_monetary);
583 :
584 56 : save_lc_numeric = setlocale(LC_NUMERIC, NULL);
585 56 : if (!save_lc_numeric)
586 0 : elog(ERROR, "setlocale(NULL) failed");
587 56 : save_lc_numeric = pstrdup(save_lc_numeric);
588 :
589 : #ifdef WIN32
590 :
591 : /*
592 : * The POSIX standard explicitly says that it is undefined what happens if
593 : * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
594 : * that implied by LC_CTYPE. In practice, all Unix-ish platforms seem to
595 : * believe that localeconv() should return strings that are encoded in the
596 : * codeset implied by the LC_MONETARY or LC_NUMERIC locale name. Hence,
597 : * once we have successfully collected the localeconv() results, we will
598 : * convert them from that codeset to the desired server encoding.
599 : *
600 : * Windows, of course, resolutely does things its own way; on that
601 : * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
602 : * results. Hence, we must temporarily set that category as well.
603 : */
604 :
605 : /* Save prevailing value of ctype locale */
606 : save_lc_ctype = setlocale(LC_CTYPE, NULL);
607 : if (!save_lc_ctype)
608 : elog(ERROR, "setlocale(NULL) failed");
609 : save_lc_ctype = pstrdup(save_lc_ctype);
610 :
611 : /* Here begins the critical section where we must not throw error */
612 :
613 : /* use numeric to set the ctype */
614 : setlocale(LC_CTYPE, locale_numeric);
615 : #endif
616 :
617 : /* Get formatting information for numeric */
618 56 : setlocale(LC_NUMERIC, locale_numeric);
619 56 : extlconv = localeconv();
620 :
621 : /* Must copy data now in case setlocale() overwrites it */
622 56 : worklconv.decimal_point = strdup(extlconv->decimal_point);
623 56 : worklconv.thousands_sep = strdup(extlconv->thousands_sep);
624 56 : worklconv.grouping = strdup(extlconv->grouping);
625 :
626 : #ifdef WIN32
627 : /* use monetary to set the ctype */
628 : setlocale(LC_CTYPE, locale_monetary);
629 : #endif
630 :
631 : /* Get formatting information for monetary */
632 56 : setlocale(LC_MONETARY, locale_monetary);
633 56 : extlconv = localeconv();
634 :
635 : /* Must copy data now in case setlocale() overwrites it */
636 56 : worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
637 56 : worklconv.currency_symbol = strdup(extlconv->currency_symbol);
638 56 : worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
639 56 : worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
640 56 : worklconv.mon_grouping = strdup(extlconv->mon_grouping);
641 56 : worklconv.positive_sign = strdup(extlconv->positive_sign);
642 56 : worklconv.negative_sign = strdup(extlconv->negative_sign);
643 : /* Copy scalar fields as well */
644 56 : worklconv.int_frac_digits = extlconv->int_frac_digits;
645 56 : worklconv.frac_digits = extlconv->frac_digits;
646 56 : worklconv.p_cs_precedes = extlconv->p_cs_precedes;
647 56 : worklconv.p_sep_by_space = extlconv->p_sep_by_space;
648 56 : worklconv.n_cs_precedes = extlconv->n_cs_precedes;
649 56 : worklconv.n_sep_by_space = extlconv->n_sep_by_space;
650 56 : worklconv.p_sign_posn = extlconv->p_sign_posn;
651 56 : worklconv.n_sign_posn = extlconv->n_sign_posn;
652 :
653 : /*
654 : * Restore the prevailing locale settings; failure to do so is fatal.
655 : * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
656 : * but proceeding with the wrong value of LC_CTYPE would certainly be bad
657 : * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
658 : * are almost certainly "C", there's really no reason that restoring those
659 : * should fail.
660 : */
661 : #ifdef WIN32
662 : if (!setlocale(LC_CTYPE, save_lc_ctype))
663 : elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
664 : #endif
665 56 : if (!setlocale(LC_MONETARY, save_lc_monetary))
666 0 : elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
667 56 : if (!setlocale(LC_NUMERIC, save_lc_numeric))
668 0 : elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
669 :
670 : /*
671 : * At this point we've done our best to clean up, and can call functions
672 : * that might possibly throw errors with a clean conscience. But let's
673 : * make sure we don't leak any already-strdup'd fields in worklconv.
674 : */
675 56 : PG_TRY();
676 : {
677 : int encoding;
678 :
679 : /* Release the pstrdup'd locale names */
680 56 : pfree(save_lc_monetary);
681 56 : pfree(save_lc_numeric);
682 : #ifdef WIN32
683 : pfree(save_lc_ctype);
684 : #endif
685 :
686 : /* If any of the preceding strdup calls failed, complain now. */
687 56 : if (!struct_lconv_is_valid(&worklconv))
688 0 : ereport(ERROR,
689 : (errcode(ERRCODE_OUT_OF_MEMORY),
690 : errmsg("out of memory")));
691 :
692 : /*
693 : * Now we must perform encoding conversion from whatever's associated
694 : * with the locales into the database encoding. If we can't identify
695 : * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
696 : * use PG_SQL_ASCII, which will result in just validating that the
697 : * strings are OK in the database encoding.
698 : */
699 56 : encoding = pg_get_encoding_from_locale(locale_numeric, true);
700 56 : if (encoding < 0)
701 0 : encoding = PG_SQL_ASCII;
702 :
703 56 : db_encoding_convert(encoding, &worklconv.decimal_point);
704 56 : db_encoding_convert(encoding, &worklconv.thousands_sep);
705 : /* grouping is not text and does not require conversion */
706 :
707 56 : encoding = pg_get_encoding_from_locale(locale_monetary, true);
708 56 : if (encoding < 0)
709 0 : encoding = PG_SQL_ASCII;
710 :
711 56 : db_encoding_convert(encoding, &worklconv.int_curr_symbol);
712 56 : db_encoding_convert(encoding, &worklconv.currency_symbol);
713 56 : db_encoding_convert(encoding, &worklconv.mon_decimal_point);
714 56 : db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
715 : /* mon_grouping is not text and does not require conversion */
716 56 : db_encoding_convert(encoding, &worklconv.positive_sign);
717 56 : db_encoding_convert(encoding, &worklconv.negative_sign);
718 : }
719 0 : PG_CATCH();
720 : {
721 0 : free_struct_lconv(&worklconv);
722 0 : PG_RE_THROW();
723 : }
724 56 : PG_END_TRY();
725 :
726 : /*
727 : * Everything is good, so save the results.
728 : */
729 56 : CurrentLocaleConv = worklconv;
730 56 : CurrentLocaleConvAllocated = true;
731 56 : CurrentLocaleConvValid = true;
732 56 : return &CurrentLocaleConv;
733 : }
734 :
735 : #ifdef WIN32
736 : /*
737 : * On Windows, strftime() returns its output in encoding CP_ACP (the default
738 : * operating system codepage for the computer), which is likely different
739 : * from SERVER_ENCODING. This is especially important in Japanese versions
740 : * of Windows which will use SJIS encoding, which we don't support as a
741 : * server encoding.
742 : *
743 : * So, instead of using strftime(), use wcsftime() to return the value in
744 : * wide characters (internally UTF16) and then convert to UTF8, which we
745 : * know how to handle directly.
746 : *
747 : * Note that this only affects the calls to strftime() in this file, which are
748 : * used to get the locale-aware strings. Other parts of the backend use
749 : * pg_strftime(), which isn't locale-aware and does not need to be replaced.
750 : */
751 : static size_t
752 : strftime_win32(char *dst, size_t dstlen,
753 : const char *format, const struct tm *tm)
754 : {
755 : size_t len;
756 : wchar_t wformat[8]; /* formats used below need 3 chars */
757 : wchar_t wbuf[MAX_L10N_DATA];
758 :
759 : /*
760 : * Get a wchar_t version of the format string. We only actually use
761 : * plain-ASCII formats in this file, so we can say that they're UTF8.
762 : */
763 : len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
764 : wformat, lengthof(wformat));
765 : if (len == 0)
766 : elog(ERROR, "could not convert format string from UTF-8: error code %lu",
767 : GetLastError());
768 :
769 : len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
770 : if (len == 0)
771 : {
772 : /*
773 : * wcsftime failed, possibly because the result would not fit in
774 : * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
775 : */
776 : return 0;
777 : }
778 :
779 : len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
780 : NULL, NULL);
781 : if (len == 0)
782 : elog(ERROR, "could not convert string to UTF-8: error code %lu",
783 : GetLastError());
784 :
785 : dst[len] = '\0';
786 :
787 : return len;
788 : }
789 :
790 : /* redefine strftime() */
791 : #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
792 : #endif /* WIN32 */
793 :
794 : /*
795 : * Subroutine for cache_locale_time().
796 : * Convert the given string from encoding "encoding" to the database
797 : * encoding, and store the result at *dst, replacing any previous value.
798 : */
799 : static void
800 1748 : cache_single_string(char **dst, const char *src, int encoding)
801 : {
802 : char *ptr;
803 : char *olddst;
804 :
805 : /* Convert the string to the database encoding, or validate it's OK */
806 1748 : ptr = pg_any_to_server(src, strlen(src), encoding);
807 :
808 : /* Store the string in long-lived storage, replacing any previous value */
809 1748 : olddst = *dst;
810 1748 : *dst = MemoryContextStrdup(TopMemoryContext, ptr);
811 1748 : if (olddst)
812 0 : pfree(olddst);
813 :
814 : /* Might as well clean up any palloc'd conversion result, too */
815 1748 : if (ptr != src)
816 0 : pfree(ptr);
817 1748 : }
818 :
819 : /*
820 : * Update the lc_time localization cache variables if needed.
821 : */
822 : void
823 49546 : cache_locale_time(void)
824 : {
825 : char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
826 : char *bufptr;
827 : time_t timenow;
828 : struct tm *timeinfo;
829 : struct tm timeinfobuf;
830 49546 : bool strftimefail = false;
831 : int encoding;
832 : int i;
833 : char *save_lc_time;
834 : #ifdef WIN32
835 : char *save_lc_ctype;
836 : #endif
837 :
838 : /* did we do this already? */
839 49546 : if (CurrentLCTimeValid)
840 49500 : return;
841 :
842 46 : elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
843 :
844 : /*
845 : * As in PGLC_localeconv(), it's critical that we not throw error while
846 : * libc's locale settings have nondefault values. Hence, we just call
847 : * strftime() within the critical section, and then convert and save its
848 : * results afterwards.
849 : */
850 :
851 : /* Save prevailing value of time locale */
852 46 : save_lc_time = setlocale(LC_TIME, NULL);
853 46 : if (!save_lc_time)
854 0 : elog(ERROR, "setlocale(NULL) failed");
855 46 : save_lc_time = pstrdup(save_lc_time);
856 :
857 : #ifdef WIN32
858 :
859 : /*
860 : * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
861 : * must set it here. This code looks the same as what PGLC_localeconv()
862 : * does, but the underlying reason is different: this does NOT determine
863 : * the encoding we'll get back from strftime_win32().
864 : */
865 :
866 : /* Save prevailing value of ctype locale */
867 : save_lc_ctype = setlocale(LC_CTYPE, NULL);
868 : if (!save_lc_ctype)
869 : elog(ERROR, "setlocale(NULL) failed");
870 : save_lc_ctype = pstrdup(save_lc_ctype);
871 :
872 : /* use lc_time to set the ctype */
873 : setlocale(LC_CTYPE, locale_time);
874 : #endif
875 :
876 46 : setlocale(LC_TIME, locale_time);
877 :
878 : /* We use times close to current time as data for strftime(). */
879 46 : timenow = time(NULL);
880 46 : timeinfo = gmtime_r(&timenow, &timeinfobuf);
881 :
882 : /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
883 46 : bufptr = buf;
884 :
885 : /*
886 : * MAX_L10N_DATA is sufficient buffer space for every known locale, and
887 : * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
888 : * error.) An implementation might report errors (e.g. ENOMEM) by
889 : * returning 0 (or, less plausibly, a negative value) and setting errno.
890 : * Report errno just in case the implementation did that, but clear it in
891 : * advance of the calls so we don't emit a stale, unrelated errno.
892 : */
893 46 : errno = 0;
894 :
895 : /* localized days */
896 368 : for (i = 0; i < 7; i++)
897 : {
898 322 : timeinfo->tm_wday = i;
899 322 : if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
900 0 : strftimefail = true;
901 322 : bufptr += MAX_L10N_DATA;
902 322 : if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
903 0 : strftimefail = true;
904 322 : bufptr += MAX_L10N_DATA;
905 : }
906 :
907 : /* localized months */
908 598 : for (i = 0; i < 12; i++)
909 : {
910 552 : timeinfo->tm_mon = i;
911 552 : timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
912 552 : if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
913 0 : strftimefail = true;
914 552 : bufptr += MAX_L10N_DATA;
915 552 : if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
916 0 : strftimefail = true;
917 552 : bufptr += MAX_L10N_DATA;
918 : }
919 :
920 : /*
921 : * Restore the prevailing locale settings; as in PGLC_localeconv(),
922 : * failure to do so is fatal.
923 : */
924 : #ifdef WIN32
925 : if (!setlocale(LC_CTYPE, save_lc_ctype))
926 : elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
927 : #endif
928 46 : if (!setlocale(LC_TIME, save_lc_time))
929 0 : elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
930 :
931 : /*
932 : * At this point we've done our best to clean up, and can throw errors, or
933 : * call functions that might throw errors, with a clean conscience.
934 : */
935 46 : if (strftimefail)
936 0 : elog(ERROR, "strftime() failed: %m");
937 :
938 : /* Release the pstrdup'd locale names */
939 46 : pfree(save_lc_time);
940 : #ifdef WIN32
941 : pfree(save_lc_ctype);
942 : #endif
943 :
944 : #ifndef WIN32
945 :
946 : /*
947 : * As in PGLC_localeconv(), we must convert strftime()'s output from the
948 : * encoding implied by LC_TIME to the database encoding. If we can't
949 : * identify the LC_TIME encoding, just perform encoding validation.
950 : */
951 46 : encoding = pg_get_encoding_from_locale(locale_time, true);
952 46 : if (encoding < 0)
953 0 : encoding = PG_SQL_ASCII;
954 :
955 : #else
956 :
957 : /*
958 : * On Windows, strftime_win32() always returns UTF8 data, so convert from
959 : * that if necessary.
960 : */
961 : encoding = PG_UTF8;
962 :
963 : #endif /* WIN32 */
964 :
965 46 : bufptr = buf;
966 :
967 : /* localized days */
968 368 : for (i = 0; i < 7; i++)
969 : {
970 322 : cache_single_string(&localized_abbrev_days[i], bufptr, encoding);
971 322 : bufptr += MAX_L10N_DATA;
972 322 : cache_single_string(&localized_full_days[i], bufptr, encoding);
973 322 : bufptr += MAX_L10N_DATA;
974 : }
975 46 : localized_abbrev_days[7] = NULL;
976 46 : localized_full_days[7] = NULL;
977 :
978 : /* localized months */
979 598 : for (i = 0; i < 12; i++)
980 : {
981 552 : cache_single_string(&localized_abbrev_months[i], bufptr, encoding);
982 552 : bufptr += MAX_L10N_DATA;
983 552 : cache_single_string(&localized_full_months[i], bufptr, encoding);
984 552 : bufptr += MAX_L10N_DATA;
985 : }
986 46 : localized_abbrev_months[12] = NULL;
987 46 : localized_full_months[12] = NULL;
988 :
989 46 : CurrentLCTimeValid = true;
990 : }
991 :
992 :
993 : #if defined(WIN32) && defined(LC_MESSAGES)
994 : /*
995 : * Convert a Windows setlocale() argument to a Unix-style one.
996 : *
997 : * Regardless of platform, we install message catalogs under a Unix-style
998 : * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
999 : * following that style will elicit localized interface strings.
1000 : *
1001 : * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
1002 : * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
1003 : * case-insensitive. setlocale() returns the fully-qualified form; for
1004 : * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
1005 : * setlocale() and _create_locale() select a "locale identifier"[1] and store
1006 : * it in an undocumented _locale_t field. From that LCID, we can retrieve the
1007 : * ISO 639 language and the ISO 3166 country. Character encoding does not
1008 : * matter, because the server and client encodings govern that.
1009 : *
1010 : * Windows Vista introduced the "locale name" concept[2], closely following
1011 : * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
1012 : * Studio 2012, setlocale() accepts locale names in addition to the strings it
1013 : * accepted historically. It does not standardize them; setlocale("Th-tH")
1014 : * returns "Th-tH". setlocale(category, "") still returns a traditional
1015 : * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
1016 : * content to carry locale names instead of locale identifiers.
1017 : *
1018 : * Visual Studio 2015 should still be able to do the same as Visual Studio
1019 : * 2012, but the declaration of locale_name is missing in _locale_t, causing
1020 : * this code compilation to fail, hence this falls back instead on to
1021 : * enumerating all system locales by using EnumSystemLocalesEx to find the
1022 : * required locale name. If the input argument is in Unix-style then we can
1023 : * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
1024 : * LOCALE_SNAME.
1025 : *
1026 : * This function returns a pointer to a static buffer bearing the converted
1027 : * name or NULL if conversion fails.
1028 : *
1029 : * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
1030 : * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
1031 : */
1032 :
1033 : /*
1034 : * Callback function for EnumSystemLocalesEx() in get_iso_localename().
1035 : *
1036 : * This function enumerates all system locales, searching for one that matches
1037 : * an input with the format: <Language>[_<Country>], e.g.
1038 : * English[_United States]
1039 : *
1040 : * The input is a three wchar_t array as an LPARAM. The first element is the
1041 : * locale_name we want to match, the second element is an allocated buffer
1042 : * where the Unix-style locale is copied if a match is found, and the third
1043 : * element is the search status, 1 if a match was found, 0 otherwise.
1044 : */
1045 : static BOOL CALLBACK
1046 : search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
1047 : {
1048 : wchar_t test_locale[LOCALE_NAME_MAX_LENGTH];
1049 : wchar_t **argv;
1050 :
1051 : (void) (dwFlags);
1052 :
1053 : argv = (wchar_t **) lparam;
1054 : *argv[2] = (wchar_t) 0;
1055 :
1056 : memset(test_locale, 0, sizeof(test_locale));
1057 :
1058 : /* Get the name of the <Language> in English */
1059 : if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
1060 : test_locale, LOCALE_NAME_MAX_LENGTH))
1061 : {
1062 : /*
1063 : * If the enumerated locale does not have a hyphen ("en") OR the
1064 : * locale_name input does not have an underscore ("English"), we only
1065 : * need to compare the <Language> tags.
1066 : */
1067 : if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
1068 : {
1069 : if (_wcsicmp(argv[0], test_locale) == 0)
1070 : {
1071 : wcscpy(argv[1], pStr);
1072 : *argv[2] = (wchar_t) 1;
1073 : return FALSE;
1074 : }
1075 : }
1076 :
1077 : /*
1078 : * We have to compare a full <Language>_<Country> tag, so we append
1079 : * the underscore and name of the country/region in English, e.g.
1080 : * "English_United States".
1081 : */
1082 : else
1083 : {
1084 : size_t len;
1085 :
1086 : wcscat(test_locale, L"_");
1087 : len = wcslen(test_locale);
1088 : if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
1089 : test_locale + len,
1090 : LOCALE_NAME_MAX_LENGTH - len))
1091 : {
1092 : if (_wcsicmp(argv[0], test_locale) == 0)
1093 : {
1094 : wcscpy(argv[1], pStr);
1095 : *argv[2] = (wchar_t) 1;
1096 : return FALSE;
1097 : }
1098 : }
1099 : }
1100 : }
1101 :
1102 : return TRUE;
1103 : }
1104 :
1105 : /*
1106 : * This function converts a Windows locale name to an ISO formatted version
1107 : * for Visual Studio 2015 or greater.
1108 : *
1109 : * Returns NULL, if no valid conversion was found.
1110 : */
1111 : static char *
1112 : get_iso_localename(const char *winlocname)
1113 : {
1114 : wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH];
1115 : wchar_t buffer[LOCALE_NAME_MAX_LENGTH];
1116 : static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1117 : char *period;
1118 : int len;
1119 : int ret_val;
1120 :
1121 : /*
1122 : * Valid locales have the following syntax:
1123 : * <Language>[_<Country>[.<CodePage>]]
1124 : *
1125 : * GetLocaleInfoEx can only take locale name without code-page and for the
1126 : * purpose of this API the code-page doesn't matter.
1127 : */
1128 : period = strchr(winlocname, '.');
1129 : if (period != NULL)
1130 : len = period - winlocname;
1131 : else
1132 : len = pg_mbstrlen(winlocname);
1133 :
1134 : memset(wc_locale_name, 0, sizeof(wc_locale_name));
1135 : memset(buffer, 0, sizeof(buffer));
1136 : MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
1137 : LOCALE_NAME_MAX_LENGTH);
1138 :
1139 : /*
1140 : * If the lc_messages is already a Unix-style string, we have a direct
1141 : * match with LOCALE_SNAME, e.g. en-US, en_US.
1142 : */
1143 : ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
1144 : LOCALE_NAME_MAX_LENGTH);
1145 : if (!ret_val)
1146 : {
1147 : /*
1148 : * Search for a locale in the system that matches language and country
1149 : * name.
1150 : */
1151 : wchar_t *argv[3];
1152 :
1153 : argv[0] = wc_locale_name;
1154 : argv[1] = buffer;
1155 : argv[2] = (wchar_t *) &ret_val;
1156 : EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
1157 : NULL);
1158 : }
1159 :
1160 : if (ret_val)
1161 : {
1162 : size_t rc;
1163 : char *hyphen;
1164 :
1165 : /* Locale names use only ASCII, any conversion locale suffices. */
1166 : rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
1167 : if (rc == -1 || rc == sizeof(iso_lc_messages))
1168 : return NULL;
1169 :
1170 : /*
1171 : * Since the message catalogs sit on a case-insensitive filesystem, we
1172 : * need not standardize letter case here. So long as we do not ship
1173 : * message catalogs for which it would matter, we also need not
1174 : * translate the script/variant portion, e.g. uz-Cyrl-UZ to
1175 : * uz_UZ@cyrillic. Simply replace the hyphen with an underscore.
1176 : */
1177 : hyphen = strchr(iso_lc_messages, '-');
1178 : if (hyphen)
1179 : *hyphen = '_';
1180 : return iso_lc_messages;
1181 : }
1182 :
1183 : return NULL;
1184 : }
1185 :
1186 : static char *
1187 : IsoLocaleName(const char *winlocname)
1188 : {
1189 : static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1190 :
1191 : if (pg_strcasecmp("c", winlocname) == 0 ||
1192 : pg_strcasecmp("posix", winlocname) == 0)
1193 : {
1194 : strcpy(iso_lc_messages, "C");
1195 : return iso_lc_messages;
1196 : }
1197 : else
1198 : return get_iso_localename(winlocname);
1199 : }
1200 :
1201 : #endif /* WIN32 && LC_MESSAGES */
1202 :
1203 : /*
1204 : * Create a new pg_locale_t struct for the given collation oid.
1205 : */
1206 : static pg_locale_t
1207 3544 : create_pg_locale(Oid collid, MemoryContext context)
1208 : {
1209 : HeapTuple tp;
1210 : Form_pg_collation collform;
1211 : pg_locale_t result;
1212 : Datum datum;
1213 : bool isnull;
1214 :
1215 3544 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1216 3544 : if (!HeapTupleIsValid(tp))
1217 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
1218 3544 : collform = (Form_pg_collation) GETSTRUCT(tp);
1219 :
1220 3544 : if (collform->collprovider == COLLPROVIDER_BUILTIN)
1221 52 : result = create_pg_locale_builtin(collid, context);
1222 3492 : else if (collform->collprovider == COLLPROVIDER_ICU)
1223 184 : result = create_pg_locale_icu(collid, context);
1224 3308 : else if (collform->collprovider == COLLPROVIDER_LIBC)
1225 3308 : result = create_pg_locale_libc(collid, context);
1226 : else
1227 : /* shouldn't happen */
1228 0 : PGLOCALE_SUPPORT_ERROR(collform->collprovider);
1229 :
1230 3538 : result->is_default = false;
1231 :
1232 : Assert((result->collate_is_c && result->collate == NULL) ||
1233 : (!result->collate_is_c && result->collate != NULL));
1234 :
1235 3538 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1236 : &isnull);
1237 3538 : if (!isnull)
1238 : {
1239 : char *actual_versionstr;
1240 : char *collversionstr;
1241 :
1242 230 : collversionstr = TextDatumGetCString(datum);
1243 :
1244 230 : if (collform->collprovider == COLLPROVIDER_LIBC)
1245 0 : datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
1246 : else
1247 230 : datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
1248 :
1249 230 : actual_versionstr = get_collation_actual_version(collform->collprovider,
1250 230 : TextDatumGetCString(datum));
1251 230 : if (!actual_versionstr)
1252 : {
1253 : /*
1254 : * This could happen when specifying a version in CREATE COLLATION
1255 : * but the provider does not support versioning, or manually
1256 : * creating a mess in the catalogs.
1257 : */
1258 0 : ereport(ERROR,
1259 : (errmsg("collation \"%s\" has no actual version, but a version was recorded",
1260 : NameStr(collform->collname))));
1261 : }
1262 :
1263 230 : if (strcmp(actual_versionstr, collversionstr) != 0)
1264 0 : ereport(WARNING,
1265 : (errmsg("collation \"%s\" has version mismatch",
1266 : NameStr(collform->collname)),
1267 : errdetail("The collation in the database was created using version %s, "
1268 : "but the operating system provides version %s.",
1269 : collversionstr, actual_versionstr),
1270 : errhint("Rebuild all objects affected by this collation and run "
1271 : "ALTER COLLATION %s REFRESH VERSION, "
1272 : "or build PostgreSQL with the right library version.",
1273 : quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1274 : NameStr(collform->collname)))));
1275 : }
1276 :
1277 3538 : ReleaseSysCache(tp);
1278 :
1279 3538 : return result;
1280 : }
1281 :
1282 : /*
1283 : * Initialize default_locale with database locale settings.
1284 : */
1285 : void
1286 27760 : init_database_collation(void)
1287 : {
1288 : HeapTuple tup;
1289 : Form_pg_database dbform;
1290 : pg_locale_t result;
1291 :
1292 : Assert(default_locale == NULL);
1293 :
1294 : /* Fetch our pg_database row normally, via syscache */
1295 27760 : tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
1296 27760 : if (!HeapTupleIsValid(tup))
1297 0 : elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
1298 27760 : dbform = (Form_pg_database) GETSTRUCT(tup);
1299 :
1300 27760 : if (dbform->datlocprovider == COLLPROVIDER_BUILTIN)
1301 1688 : result = create_pg_locale_builtin(DEFAULT_COLLATION_OID,
1302 : TopMemoryContext);
1303 26072 : else if (dbform->datlocprovider == COLLPROVIDER_ICU)
1304 26 : result = create_pg_locale_icu(DEFAULT_COLLATION_OID,
1305 : TopMemoryContext);
1306 26046 : else if (dbform->datlocprovider == COLLPROVIDER_LIBC)
1307 26046 : result = create_pg_locale_libc(DEFAULT_COLLATION_OID,
1308 : TopMemoryContext);
1309 : else
1310 : /* shouldn't happen */
1311 0 : PGLOCALE_SUPPORT_ERROR(dbform->datlocprovider);
1312 :
1313 27756 : result->is_default = true;
1314 27756 : ReleaseSysCache(tup);
1315 :
1316 27756 : default_locale = result;
1317 27756 : }
1318 :
1319 : /*
1320 : * Create a pg_locale_t from a collation OID. Results are cached for the
1321 : * lifetime of the backend. Thus, do not free the result with freelocale().
1322 : *
1323 : * For simplicity, we always generate COLLATE + CTYPE even though we
1324 : * might only need one of them. Since this is called only once per session,
1325 : * it shouldn't cost much.
1326 : */
1327 : pg_locale_t
1328 24226190 : pg_newlocale_from_collation(Oid collid)
1329 : {
1330 : collation_cache_entry *cache_entry;
1331 : bool found;
1332 :
1333 24226190 : if (collid == DEFAULT_COLLATION_OID)
1334 20940928 : return default_locale;
1335 :
1336 3285262 : if (!OidIsValid(collid))
1337 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
1338 :
1339 3285262 : if (last_collation_cache_oid == collid)
1340 3279862 : return last_collation_cache_locale;
1341 :
1342 5400 : if (CollationCache == NULL)
1343 : {
1344 3236 : CollationCacheContext = AllocSetContextCreate(TopMemoryContext,
1345 : "collation cache",
1346 : ALLOCSET_DEFAULT_SIZES);
1347 3236 : CollationCache = collation_cache_create(CollationCacheContext,
1348 : 16, NULL);
1349 : }
1350 :
1351 5400 : cache_entry = collation_cache_insert(CollationCache, collid, &found);
1352 5400 : if (!found)
1353 : {
1354 : /*
1355 : * Make sure cache entry is marked invalid, in case we fail before
1356 : * setting things.
1357 : */
1358 3544 : cache_entry->locale = 0;
1359 : }
1360 :
1361 5400 : if (cache_entry->locale == 0)
1362 : {
1363 3544 : cache_entry->locale = create_pg_locale(collid, CollationCacheContext);
1364 : }
1365 :
1366 5394 : last_collation_cache_oid = collid;
1367 5394 : last_collation_cache_locale = cache_entry->locale;
1368 :
1369 5394 : return cache_entry->locale;
1370 : }
1371 :
1372 : /*
1373 : * Get provider-specific collation version string for the given collation from
1374 : * the operating system/library.
1375 : */
1376 : char *
1377 94992 : get_collation_actual_version(char collprovider, const char *collcollate)
1378 : {
1379 94992 : char *collversion = NULL;
1380 :
1381 94992 : if (collprovider == COLLPROVIDER_BUILTIN)
1382 1806 : collversion = get_collation_actual_version_builtin(collcollate);
1383 : #ifdef USE_ICU
1384 93186 : else if (collprovider == COLLPROVIDER_ICU)
1385 67792 : collversion = get_collation_actual_version_icu(collcollate);
1386 : #endif
1387 25394 : else if (collprovider == COLLPROVIDER_LIBC)
1388 25394 : collversion = get_collation_actual_version_libc(collcollate);
1389 :
1390 94992 : return collversion;
1391 : }
1392 :
1393 : size_t
1394 435042 : pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
1395 : pg_locale_t locale)
1396 : {
1397 435042 : if (locale->provider == COLLPROVIDER_BUILTIN)
1398 11922 : return strlower_builtin(dst, dstsize, src, srclen, locale);
1399 : #ifdef USE_ICU
1400 423120 : else if (locale->provider == COLLPROVIDER_ICU)
1401 516 : return strlower_icu(dst, dstsize, src, srclen, locale);
1402 : #endif
1403 422604 : else if (locale->provider == COLLPROVIDER_LIBC)
1404 422604 : return strlower_libc(dst, dstsize, src, srclen, locale);
1405 : else
1406 : /* shouldn't happen */
1407 0 : PGLOCALE_SUPPORT_ERROR(locale->provider);
1408 :
1409 : return 0; /* keep compiler quiet */
1410 : }
1411 :
1412 : size_t
1413 208 : pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
1414 : pg_locale_t locale)
1415 : {
1416 208 : if (locale->provider == COLLPROVIDER_BUILTIN)
1417 170 : return strtitle_builtin(dst, dstsize, src, srclen, locale);
1418 : #ifdef USE_ICU
1419 38 : else if (locale->provider == COLLPROVIDER_ICU)
1420 30 : return strtitle_icu(dst, dstsize, src, srclen, locale);
1421 : #endif
1422 8 : else if (locale->provider == COLLPROVIDER_LIBC)
1423 8 : return strtitle_libc(dst, dstsize, src, srclen, locale);
1424 : else
1425 : /* shouldn't happen */
1426 0 : PGLOCALE_SUPPORT_ERROR(locale->provider);
1427 :
1428 : return 0; /* keep compiler quiet */
1429 : }
1430 :
1431 : size_t
1432 1034122 : pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
1433 : pg_locale_t locale)
1434 : {
1435 1034122 : if (locale->provider == COLLPROVIDER_BUILTIN)
1436 316858 : return strupper_builtin(dst, dstsize, src, srclen, locale);
1437 : #ifdef USE_ICU
1438 717264 : else if (locale->provider == COLLPROVIDER_ICU)
1439 54 : return strupper_icu(dst, dstsize, src, srclen, locale);
1440 : #endif
1441 717210 : else if (locale->provider == COLLPROVIDER_LIBC)
1442 717210 : return strupper_libc(dst, dstsize, src, srclen, locale);
1443 : else
1444 : /* shouldn't happen */
1445 0 : PGLOCALE_SUPPORT_ERROR(locale->provider);
1446 :
1447 : return 0; /* keep compiler quiet */
1448 : }
1449 :
1450 : /*
1451 : * pg_strcoll
1452 : *
1453 : * Like pg_strncoll for NUL-terminated input strings.
1454 : */
1455 : int
1456 23978946 : pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
1457 : {
1458 23978946 : return locale->collate->strncoll(arg1, -1, arg2, -1, locale);
1459 : }
1460 :
1461 : /*
1462 : * pg_strncoll
1463 : *
1464 : * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll_l() or wcscoll_l() as
1465 : * appropriate for the given locale, platform, and database encoding. If the
1466 : * locale is not specified, use the database collation.
1467 : *
1468 : * The input strings must be encoded in the database encoding. If an input
1469 : * string is NUL-terminated, its length may be specified as -1.
1470 : *
1471 : * The caller is responsible for breaking ties if the collation is
1472 : * deterministic; this maintains consistency with pg_strnxfrm(), which cannot
1473 : * easily account for deterministic collations.
1474 : */
1475 : int
1476 2072644 : pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
1477 : pg_locale_t locale)
1478 : {
1479 2072644 : return locale->collate->strncoll(arg1, len1, arg2, len2, locale);
1480 : }
1481 :
1482 : /*
1483 : * Return true if the collation provider supports pg_strxfrm() and
1484 : * pg_strnxfrm(); otherwise false.
1485 : *
1486 : *
1487 : * No similar problem is known for the ICU provider.
1488 : */
1489 : bool
1490 42294 : pg_strxfrm_enabled(pg_locale_t locale)
1491 : {
1492 : /*
1493 : * locale->collate->strnxfrm is still a required method, even if it may
1494 : * have the wrong behavior, because the planner uses it for estimates in
1495 : * some cases.
1496 : */
1497 42294 : return locale->collate->strxfrm_is_safe;
1498 : }
1499 :
1500 : /*
1501 : * pg_strxfrm
1502 : *
1503 : * Like pg_strnxfrm for a NUL-terminated input string.
1504 : */
1505 : size_t
1506 144 : pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
1507 : {
1508 144 : return locale->collate->strnxfrm(dest, destsize, src, -1, locale);
1509 : }
1510 :
1511 : /*
1512 : * pg_strnxfrm
1513 : *
1514 : * Transforms 'src' to a nul-terminated string stored in 'dest' such that
1515 : * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on
1516 : * untransformed strings.
1517 : *
1518 : * The input string must be encoded in the database encoding. If the input
1519 : * string is NUL-terminated, its length may be specified as -1. If 'destsize'
1520 : * is zero, 'dest' may be NULL.
1521 : *
1522 : * Not all providers support pg_strnxfrm() safely. The caller should check
1523 : * pg_strxfrm_enabled() first, otherwise this function may return wrong
1524 : * results or an error.
1525 : *
1526 : * Returns the number of bytes needed (or more) to store the transformed
1527 : * string, excluding the terminating nul byte. If the value returned is
1528 : * 'destsize' or greater, the resulting contents of 'dest' are undefined.
1529 : */
1530 : size_t
1531 10020 : pg_strnxfrm(char *dest, size_t destsize, const char *src, ssize_t srclen,
1532 : pg_locale_t locale)
1533 : {
1534 10020 : return locale->collate->strnxfrm(dest, destsize, src, srclen, locale);
1535 : }
1536 :
1537 : /*
1538 : * Return true if the collation provider supports pg_strxfrm_prefix() and
1539 : * pg_strnxfrm_prefix(); otherwise false.
1540 : */
1541 : bool
1542 1656 : pg_strxfrm_prefix_enabled(pg_locale_t locale)
1543 : {
1544 1656 : return (locale->collate->strnxfrm_prefix != NULL);
1545 : }
1546 :
1547 : /*
1548 : * pg_strxfrm_prefix
1549 : *
1550 : * Like pg_strnxfrm_prefix for a NUL-terminated input string.
1551 : */
1552 : size_t
1553 1656 : pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
1554 : pg_locale_t locale)
1555 : {
1556 1656 : return locale->collate->strnxfrm_prefix(dest, destsize, src, -1, locale);
1557 : }
1558 :
1559 : /*
1560 : * pg_strnxfrm_prefix
1561 : *
1562 : * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary
1563 : * memcmp() on the byte sequence is equivalent to pg_strncoll() on
1564 : * untransformed strings. The result is not nul-terminated.
1565 : *
1566 : * The input string must be encoded in the database encoding. If the input
1567 : * string is NUL-terminated, its length may be specified as -1.
1568 : *
1569 : * Not all providers support pg_strnxfrm_prefix() safely. The caller should
1570 : * check pg_strxfrm_prefix_enabled() first, otherwise this function may return
1571 : * wrong results or an error.
1572 : *
1573 : * If destsize is not large enough to hold the resulting byte sequence, stores
1574 : * only the first destsize bytes in 'dest'. Returns the number of bytes
1575 : * actually copied to 'dest'.
1576 : */
1577 : size_t
1578 0 : pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
1579 : ssize_t srclen, pg_locale_t locale)
1580 : {
1581 0 : return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale);
1582 : }
1583 :
1584 : /*
1585 : * Return required encoding ID for the given locale, or -1 if any encoding is
1586 : * valid for the locale.
1587 : */
1588 : int
1589 1862 : builtin_locale_encoding(const char *locale)
1590 : {
1591 1862 : if (strcmp(locale, "C") == 0)
1592 64 : return -1;
1593 1798 : else if (strcmp(locale, "C.UTF-8") == 0)
1594 1768 : return PG_UTF8;
1595 30 : else if (strcmp(locale, "PG_UNICODE_FAST") == 0)
1596 30 : return PG_UTF8;
1597 :
1598 :
1599 0 : ereport(ERROR,
1600 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1601 : errmsg("invalid locale name \"%s\" for builtin provider",
1602 : locale)));
1603 :
1604 : return 0; /* keep compiler quiet */
1605 : }
1606 :
1607 :
1608 : /*
1609 : * Validate the locale and encoding combination, and return the canonical form
1610 : * of the locale name.
1611 : */
1612 : const char *
1613 1846 : builtin_validate_locale(int encoding, const char *locale)
1614 : {
1615 1846 : const char *canonical_name = NULL;
1616 : int required_encoding;
1617 :
1618 1846 : if (strcmp(locale, "C") == 0)
1619 52 : canonical_name = "C";
1620 1794 : else if (strcmp(locale, "C.UTF-8") == 0 || strcmp(locale, "C.UTF8") == 0)
1621 1754 : canonical_name = "C.UTF-8";
1622 40 : else if (strcmp(locale, "PG_UNICODE_FAST") == 0)
1623 22 : canonical_name = "PG_UNICODE_FAST";
1624 :
1625 1846 : if (!canonical_name)
1626 18 : ereport(ERROR,
1627 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1628 : errmsg("invalid locale name \"%s\" for builtin provider",
1629 : locale)));
1630 :
1631 1828 : required_encoding = builtin_locale_encoding(canonical_name);
1632 1828 : if (required_encoding >= 0 && encoding != required_encoding)
1633 2 : ereport(ERROR,
1634 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1635 : errmsg("encoding \"%s\" does not match locale \"%s\"",
1636 : pg_encoding_to_char(encoding), locale)));
1637 :
1638 1826 : return canonical_name;
1639 : }
1640 :
1641 :
1642 :
1643 : /*
1644 : * Return the BCP47 language tag representation of the requested locale.
1645 : *
1646 : * This function should be called before passing the string to ucol_open(),
1647 : * because conversion to a language tag also performs "level 2
1648 : * canonicalization". In addition to producing a consistent format, level 2
1649 : * canonicalization is able to more accurately interpret different input
1650 : * locale string formats, such as POSIX and .NET IDs.
1651 : */
1652 : char *
1653 67496 : icu_language_tag(const char *loc_str, int elevel)
1654 : {
1655 : #ifdef USE_ICU
1656 : UErrorCode status;
1657 : char *langtag;
1658 67496 : size_t buflen = 32; /* arbitrary starting buffer size */
1659 67496 : const bool strict = true;
1660 :
1661 : /*
1662 : * A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
1663 : * RFC5646 section 4.4). Additionally, in older ICU versions,
1664 : * uloc_toLanguageTag() doesn't always return the ultimate length on the
1665 : * first call, necessitating a loop.
1666 : */
1667 67496 : langtag = palloc(buflen);
1668 : while (true)
1669 : {
1670 67496 : status = U_ZERO_ERROR;
1671 67496 : uloc_toLanguageTag(loc_str, langtag, buflen, strict, &status);
1672 :
1673 : /* try again if the buffer is not large enough */
1674 67496 : if ((status == U_BUFFER_OVERFLOW_ERROR ||
1675 67496 : status == U_STRING_NOT_TERMINATED_WARNING) &&
1676 : buflen < MaxAllocSize)
1677 : {
1678 0 : buflen = Min(buflen * 2, MaxAllocSize);
1679 0 : langtag = repalloc(langtag, buflen);
1680 0 : continue;
1681 : }
1682 :
1683 67496 : break;
1684 : }
1685 :
1686 67496 : if (U_FAILURE(status))
1687 : {
1688 18 : pfree(langtag);
1689 :
1690 18 : if (elevel > 0)
1691 14 : ereport(elevel,
1692 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1693 : errmsg("could not convert locale name \"%s\" to language tag: %s",
1694 : loc_str, u_errorName(status))));
1695 12 : return NULL;
1696 : }
1697 :
1698 67478 : return langtag;
1699 : #else /* not USE_ICU */
1700 : ereport(ERROR,
1701 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1702 : errmsg("ICU is not supported in this build")));
1703 : return NULL; /* keep compiler quiet */
1704 : #endif /* not USE_ICU */
1705 : }
1706 :
1707 : /*
1708 : * Perform best-effort check that the locale is a valid one.
1709 : */
1710 : void
1711 166 : icu_validate_locale(const char *loc_str)
1712 : {
1713 : #ifdef USE_ICU
1714 : UCollator *collator;
1715 : UErrorCode status;
1716 : char lang[ULOC_LANG_CAPACITY];
1717 166 : bool found = false;
1718 166 : int elevel = icu_validation_level;
1719 :
1720 : /* no validation */
1721 166 : if (elevel < 0)
1722 12 : return;
1723 :
1724 : /* downgrade to WARNING during pg_upgrade */
1725 154 : if (IsBinaryUpgrade && elevel > WARNING)
1726 0 : elevel = WARNING;
1727 :
1728 : /* validate that we can extract the language */
1729 154 : status = U_ZERO_ERROR;
1730 154 : uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
1731 154 : if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
1732 : {
1733 0 : ereport(elevel,
1734 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1735 : errmsg("could not get language from ICU locale \"%s\": %s",
1736 : loc_str, u_errorName(status)),
1737 : errhint("To disable ICU locale validation, set the parameter \"%s\" to \"%s\".",
1738 : "icu_validation_level", "disabled")));
1739 0 : return;
1740 : }
1741 :
1742 : /* check for special language name */
1743 154 : if (strcmp(lang, "") == 0 ||
1744 46 : strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
1745 108 : found = true;
1746 :
1747 : /* search for matching language within ICU */
1748 15138 : for (int32_t i = 0; !found && i < uloc_countAvailable(); i++)
1749 : {
1750 14984 : const char *otherloc = uloc_getAvailable(i);
1751 : char otherlang[ULOC_LANG_CAPACITY];
1752 :
1753 14984 : status = U_ZERO_ERROR;
1754 14984 : uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status);
1755 14984 : if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
1756 0 : continue;
1757 :
1758 14984 : if (strcmp(lang, otherlang) == 0)
1759 32 : found = true;
1760 : }
1761 :
1762 154 : if (!found)
1763 14 : ereport(elevel,
1764 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1765 : errmsg("ICU locale \"%s\" has unknown language \"%s\"",
1766 : loc_str, lang),
1767 : errhint("To disable ICU locale validation, set the parameter \"%s\" to \"%s\".",
1768 : "icu_validation_level", "disabled")));
1769 :
1770 : /* check that it can be opened */
1771 148 : collator = pg_ucol_open(loc_str);
1772 140 : ucol_close(collator);
1773 : #else /* not USE_ICU */
1774 : /* could get here if a collation was created by a build with ICU */
1775 : ereport(ERROR,
1776 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1777 : errmsg("ICU is not supported in this build")));
1778 : #endif /* not USE_ICU */
1779 : }
|