Line data Source code
1 : /*-----------------------------------------------------------------------
2 : *
3 : * PostgreSQL locale utilities for libc
4 : *
5 : * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
6 : *
7 : * src/backend/utils/adt/pg_locale_libc.c
8 : *
9 : *-----------------------------------------------------------------------
10 : */
11 :
12 : #include "postgres.h"
13 :
14 : #include <limits.h>
15 : #include <wctype.h>
16 :
17 : #include "access/htup_details.h"
18 : #include "catalog/pg_database.h"
19 : #include "catalog/pg_collation.h"
20 : #include "mb/pg_wchar.h"
21 : #include "miscadmin.h"
22 : #include "utils/builtins.h"
23 : #include "utils/formatting.h"
24 : #include "utils/memutils.h"
25 : #include "utils/pg_locale.h"
26 : #include "utils/syscache.h"
27 :
28 : #ifdef __GLIBC__
29 : #include <gnu/libc-version.h>
30 : #endif
31 :
32 : #ifdef WIN32
33 : #include <shlwapi.h>
34 : #endif
35 :
36 : /*
37 : * Size of stack buffer to use for string transformations, used to avoid heap
38 : * allocations in typical cases. This should be large enough that most strings
39 : * will fit, but small enough that we feel comfortable putting it on the
40 : * stack.
41 : */
42 : #define TEXTBUFLEN 1024
43 :
44 : extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
45 :
46 : extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
47 : ssize_t srclen, pg_locale_t locale);
48 : extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
49 : ssize_t srclen, pg_locale_t locale);
50 : extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
51 : ssize_t srclen, pg_locale_t locale);
52 :
53 : static int strncoll_libc(const char *arg1, ssize_t len1,
54 : const char *arg2, ssize_t len2,
55 : pg_locale_t locale);
56 : static size_t strnxfrm_libc(char *dest, size_t destsize,
57 : const char *src, ssize_t srclen,
58 : pg_locale_t locale);
59 : extern char *get_collation_actual_version_libc(const char *collcollate);
60 : static locale_t make_libc_collator(const char *collate,
61 : const char *ctype);
62 : static void report_newlocale_failure(const char *localename);
63 :
64 : #ifdef WIN32
65 : static int strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
66 : const char *arg2, ssize_t len2,
67 : pg_locale_t locale);
68 : #endif
69 :
70 : static size_t strlower_libc_sb(char *dest, size_t destsize,
71 : const char *src, ssize_t srclen,
72 : pg_locale_t locale);
73 : static size_t strlower_libc_mb(char *dest, size_t destsize,
74 : const char *src, ssize_t srclen,
75 : pg_locale_t locale);
76 : static size_t strtitle_libc_sb(char *dest, size_t destsize,
77 : const char *src, ssize_t srclen,
78 : pg_locale_t locale);
79 : static size_t strtitle_libc_mb(char *dest, size_t destsize,
80 : const char *src, ssize_t srclen,
81 : pg_locale_t locale);
82 : static size_t strupper_libc_sb(char *dest, size_t destsize,
83 : const char *src, ssize_t srclen,
84 : pg_locale_t locale);
85 : static size_t strupper_libc_mb(char *dest, size_t destsize,
86 : const char *src, ssize_t srclen,
87 : pg_locale_t locale);
88 :
89 : static const struct collate_methods collate_methods_libc = {
90 : .strncoll = strncoll_libc,
91 : .strnxfrm = strnxfrm_libc,
92 : .strnxfrm_prefix = NULL,
93 :
94 : /*
95 : * Unfortunately, it seems that strxfrm() for non-C collations is broken
96 : * on many common platforms; testing of multiple versions of glibc reveals
97 : * that, for many locales, strcoll() and strxfrm() do not return
98 : * consistent results. While no other libc other than Cygwin has so far
99 : * been shown to have a problem, we take the conservative course of action
100 : * for right now and disable this categorically. (Users who are certain
101 : * this isn't a problem on their system can define TRUST_STRXFRM.)
102 : */
103 : #ifdef TRUST_STRXFRM
104 : .strxfrm_is_safe = true,
105 : #else
106 : .strxfrm_is_safe = false,
107 : #endif
108 : };
109 :
110 : #ifdef WIN32
111 : static const struct collate_methods collate_methods_libc_win32_utf8 = {
112 : .strncoll = strncoll_libc_win32_utf8,
113 : .strnxfrm = strnxfrm_libc,
114 : .strnxfrm_prefix = NULL,
115 : #ifdef TRUST_STRXFRM
116 : .strxfrm_is_safe = true,
117 : #else
118 : .strxfrm_is_safe = false,
119 : #endif
120 : };
121 : #endif
122 :
123 : size_t
124 422604 : strlower_libc(char *dst, size_t dstsize, const char *src,
125 : ssize_t srclen, pg_locale_t locale)
126 : {
127 422604 : if (pg_database_encoding_max_length() > 1)
128 422604 : return strlower_libc_mb(dst, dstsize, src, srclen, locale);
129 : else
130 0 : return strlower_libc_sb(dst, dstsize, src, srclen, locale);
131 : }
132 :
133 : size_t
134 8 : strtitle_libc(char *dst, size_t dstsize, const char *src,
135 : ssize_t srclen, pg_locale_t locale)
136 : {
137 8 : if (pg_database_encoding_max_length() > 1)
138 8 : return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
139 : else
140 0 : return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
141 : }
142 :
143 : size_t
144 717210 : strupper_libc(char *dst, size_t dstsize, const char *src,
145 : ssize_t srclen, pg_locale_t locale)
146 : {
147 717210 : if (pg_database_encoding_max_length() > 1)
148 717210 : return strupper_libc_mb(dst, dstsize, src, srclen, locale);
149 : else
150 0 : return strupper_libc_sb(dst, dstsize, src, srclen, locale);
151 : }
152 :
153 : static size_t
154 0 : strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
155 : pg_locale_t locale)
156 : {
157 0 : if (srclen < 0)
158 0 : srclen = strlen(src);
159 :
160 0 : if (srclen + 1 <= destsize)
161 : {
162 0 : locale_t loc = locale->info.lt;
163 : char *p;
164 :
165 0 : if (srclen + 1 > destsize)
166 0 : return srclen;
167 :
168 0 : memcpy(dest, src, srclen);
169 0 : dest[srclen] = '\0';
170 :
171 : /*
172 : * Note: we assume that tolower_l() will not be so broken as to need
173 : * an isupper_l() guard test. When using the default collation, we
174 : * apply the traditional Postgres behavior that forces ASCII-style
175 : * treatment of I/i, but in non-default collations you get exactly
176 : * what the collation says.
177 : */
178 0 : for (p = dest; *p; p++)
179 : {
180 0 : if (locale->is_default)
181 0 : *p = pg_tolower((unsigned char) *p);
182 : else
183 0 : *p = tolower_l((unsigned char) *p, loc);
184 : }
185 : }
186 :
187 0 : return srclen;
188 : }
189 :
190 : static size_t
191 422604 : strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
192 : pg_locale_t locale)
193 : {
194 422604 : locale_t loc = locale->info.lt;
195 : size_t result_size;
196 : wchar_t *workspace;
197 : char *result;
198 : size_t curr_char;
199 : size_t max_size;
200 :
201 422604 : if (srclen < 0)
202 0 : srclen = strlen(src);
203 :
204 : /* Overflow paranoia */
205 422604 : if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
206 0 : ereport(ERROR,
207 : (errcode(ERRCODE_OUT_OF_MEMORY),
208 : errmsg("out of memory")));
209 :
210 : /* Output workspace cannot have more codes than input bytes */
211 422604 : workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
212 :
213 422604 : char2wchar(workspace, srclen + 1, src, srclen, locale);
214 :
215 3636600 : for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
216 3213996 : workspace[curr_char] = towlower_l(workspace[curr_char], loc);
217 :
218 : /*
219 : * Make result large enough; case change might change number of bytes
220 : */
221 422604 : max_size = curr_char * pg_database_encoding_max_length();
222 422604 : result = palloc(max_size + 1);
223 :
224 422604 : result_size = wchar2char(result, workspace, max_size + 1, locale);
225 :
226 422604 : if (result_size + 1 > destsize)
227 0 : return result_size;
228 :
229 422604 : memcpy(dest, result, result_size);
230 422604 : dest[result_size] = '\0';
231 :
232 422604 : pfree(workspace);
233 422604 : pfree(result);
234 :
235 422604 : return result_size;
236 : }
237 :
238 : static size_t
239 0 : strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
240 : pg_locale_t locale)
241 : {
242 0 : if (srclen < 0)
243 0 : srclen = strlen(src);
244 :
245 0 : if (srclen + 1 <= destsize)
246 : {
247 0 : locale_t loc = locale->info.lt;
248 0 : int wasalnum = false;
249 : char *p;
250 :
251 0 : memcpy(dest, src, srclen);
252 0 : dest[srclen] = '\0';
253 :
254 : /*
255 : * Note: we assume that toupper_l()/tolower_l() will not be so broken
256 : * as to need guard tests. When using the default collation, we apply
257 : * the traditional Postgres behavior that forces ASCII-style treatment
258 : * of I/i, but in non-default collations you get exactly what the
259 : * collation says.
260 : */
261 0 : for (p = dest; *p; p++)
262 : {
263 0 : if (locale->is_default)
264 : {
265 0 : if (wasalnum)
266 0 : *p = pg_tolower((unsigned char) *p);
267 : else
268 0 : *p = pg_toupper((unsigned char) *p);
269 : }
270 : else
271 : {
272 0 : if (wasalnum)
273 0 : *p = tolower_l((unsigned char) *p, loc);
274 : else
275 0 : *p = toupper_l((unsigned char) *p, loc);
276 : }
277 0 : wasalnum = isalnum_l((unsigned char) *p, loc);
278 : }
279 : }
280 :
281 0 : return srclen;
282 : }
283 :
284 : static size_t
285 8 : strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
286 : pg_locale_t locale)
287 : {
288 8 : locale_t loc = locale->info.lt;
289 8 : int wasalnum = false;
290 : size_t result_size;
291 : wchar_t *workspace;
292 : char *result;
293 : size_t curr_char;
294 : size_t max_size;
295 :
296 8 : if (srclen < 0)
297 0 : srclen = strlen(src);
298 :
299 : /* Overflow paranoia */
300 8 : if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
301 0 : ereport(ERROR,
302 : (errcode(ERRCODE_OUT_OF_MEMORY),
303 : errmsg("out of memory")));
304 :
305 : /* Output workspace cannot have more codes than input bytes */
306 8 : workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
307 :
308 8 : char2wchar(workspace, srclen + 1, src, srclen, locale);
309 :
310 80 : for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
311 : {
312 72 : if (wasalnum)
313 56 : workspace[curr_char] = towlower_l(workspace[curr_char], loc);
314 : else
315 16 : workspace[curr_char] = towupper_l(workspace[curr_char], loc);
316 72 : wasalnum = iswalnum_l(workspace[curr_char], loc);
317 : }
318 :
319 : /*
320 : * Make result large enough; case change might change number of bytes
321 : */
322 8 : max_size = curr_char * pg_database_encoding_max_length();
323 8 : result = palloc(max_size + 1);
324 :
325 8 : result_size = wchar2char(result, workspace, max_size + 1, locale);
326 :
327 8 : if (result_size + 1 > destsize)
328 0 : return result_size;
329 :
330 8 : memcpy(dest, result, result_size);
331 8 : dest[result_size] = '\0';
332 :
333 8 : pfree(workspace);
334 8 : pfree(result);
335 :
336 8 : return result_size;
337 : }
338 :
339 : static size_t
340 0 : strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
341 : pg_locale_t locale)
342 : {
343 0 : if (srclen < 0)
344 0 : srclen = strlen(src);
345 :
346 0 : if (srclen + 1 <= destsize)
347 : {
348 0 : locale_t loc = locale->info.lt;
349 : char *p;
350 :
351 0 : memcpy(dest, src, srclen);
352 0 : dest[srclen] = '\0';
353 :
354 : /*
355 : * Note: we assume that toupper_l() will not be so broken as to need
356 : * an islower_l() guard test. When using the default collation, we
357 : * apply the traditional Postgres behavior that forces ASCII-style
358 : * treatment of I/i, but in non-default collations you get exactly
359 : * what the collation says.
360 : */
361 0 : for (p = dest; *p; p++)
362 : {
363 0 : if (locale->is_default)
364 0 : *p = pg_toupper((unsigned char) *p);
365 : else
366 0 : *p = toupper_l((unsigned char) *p, loc);
367 : }
368 : }
369 :
370 0 : return srclen;
371 : }
372 :
373 : static size_t
374 717210 : strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
375 : pg_locale_t locale)
376 : {
377 717210 : locale_t loc = locale->info.lt;
378 : size_t result_size;
379 : wchar_t *workspace;
380 : char *result;
381 : size_t curr_char;
382 : size_t max_size;
383 :
384 717210 : if (srclen < 0)
385 0 : srclen = strlen(src);
386 :
387 : /* Overflow paranoia */
388 717210 : if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
389 0 : ereport(ERROR,
390 : (errcode(ERRCODE_OUT_OF_MEMORY),
391 : errmsg("out of memory")));
392 :
393 : /* Output workspace cannot have more codes than input bytes */
394 717210 : workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
395 :
396 717210 : char2wchar(workspace, srclen + 1, src, srclen, locale);
397 :
398 2353118 : for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
399 1635908 : workspace[curr_char] = towupper_l(workspace[curr_char], loc);
400 :
401 : /*
402 : * Make result large enough; case change might change number of bytes
403 : */
404 717210 : max_size = curr_char * pg_database_encoding_max_length();
405 717210 : result = palloc(max_size + 1);
406 :
407 717210 : result_size = wchar2char(result, workspace, max_size + 1, locale);
408 :
409 717210 : if (result_size + 1 > destsize)
410 0 : return result_size;
411 :
412 717210 : memcpy(dest, result, result_size);
413 717210 : dest[result_size] = '\0';
414 :
415 717210 : pfree(workspace);
416 717210 : pfree(result);
417 :
418 717210 : return result_size;
419 : }
420 :
421 : pg_locale_t
422 29360 : create_pg_locale_libc(Oid collid, MemoryContext context)
423 : {
424 : const char *collate;
425 : const char *ctype;
426 : locale_t loc;
427 : pg_locale_t result;
428 :
429 29360 : if (collid == DEFAULT_COLLATION_OID)
430 : {
431 : HeapTuple tp;
432 : Datum datum;
433 :
434 26052 : tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
435 26052 : if (!HeapTupleIsValid(tp))
436 0 : elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
437 26052 : datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
438 : Anum_pg_database_datcollate);
439 26052 : collate = TextDatumGetCString(datum);
440 26052 : datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
441 : Anum_pg_database_datctype);
442 26052 : ctype = TextDatumGetCString(datum);
443 :
444 26052 : ReleaseSysCache(tp);
445 : }
446 : else
447 : {
448 : HeapTuple tp;
449 : Datum datum;
450 :
451 3308 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
452 3308 : if (!HeapTupleIsValid(tp))
453 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
454 :
455 3308 : datum = SysCacheGetAttrNotNull(COLLOID, tp,
456 : Anum_pg_collation_collcollate);
457 3308 : collate = TextDatumGetCString(datum);
458 3308 : datum = SysCacheGetAttrNotNull(COLLOID, tp,
459 : Anum_pg_collation_collctype);
460 3308 : ctype = TextDatumGetCString(datum);
461 :
462 3308 : ReleaseSysCache(tp);
463 : }
464 :
465 :
466 29360 : loc = make_libc_collator(collate, ctype);
467 :
468 29360 : result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
469 29360 : result->provider = COLLPROVIDER_LIBC;
470 29360 : result->deterministic = true;
471 54386 : result->collate_is_c = (strcmp(collate, "C") == 0) ||
472 25026 : (strcmp(collate, "POSIX") == 0);
473 54386 : result->ctype_is_c = (strcmp(ctype, "C") == 0) ||
474 25026 : (strcmp(ctype, "POSIX") == 0);
475 29360 : result->info.lt = loc;
476 29360 : if (!result->collate_is_c)
477 : {
478 : #ifdef WIN32
479 : if (GetDatabaseEncoding() == PG_UTF8)
480 : result->collate = &collate_methods_libc_win32_utf8;
481 : else
482 : #endif
483 24962 : result->collate = &collate_methods_libc;
484 : }
485 :
486 29360 : return result;
487 : }
488 :
489 : /*
490 : * Create a locale_t with the given collation and ctype.
491 : *
492 : * The "C" and "POSIX" locales are not actually handled by libc, so return
493 : * NULL.
494 : *
495 : * Ensure that no path leaks a locale_t.
496 : */
497 : static locale_t
498 29360 : make_libc_collator(const char *collate, const char *ctype)
499 : {
500 29360 : locale_t loc = 0;
501 :
502 29360 : if (strcmp(collate, ctype) == 0)
503 : {
504 29360 : if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
505 : {
506 : /* Normal case where they're the same */
507 24962 : errno = 0;
508 : #ifndef WIN32
509 24962 : loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
510 : NULL);
511 : #else
512 : loc = _create_locale(LC_ALL, collate);
513 : #endif
514 24962 : if (!loc)
515 0 : report_newlocale_failure(collate);
516 : }
517 : }
518 : else
519 : {
520 : #ifndef WIN32
521 : /* We need two newlocale() steps */
522 0 : locale_t loc1 = 0;
523 :
524 0 : if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
525 : {
526 0 : errno = 0;
527 0 : loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
528 0 : if (!loc1)
529 0 : report_newlocale_failure(collate);
530 : }
531 :
532 0 : if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
533 : {
534 0 : errno = 0;
535 0 : loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
536 0 : if (!loc)
537 : {
538 0 : if (loc1)
539 0 : freelocale(loc1);
540 0 : report_newlocale_failure(ctype);
541 : }
542 : }
543 : else
544 0 : loc = loc1;
545 : #else
546 :
547 : /*
548 : * XXX The _create_locale() API doesn't appear to support this. Could
549 : * perhaps be worked around by changing pg_locale_t to contain two
550 : * separate fields.
551 : */
552 : ereport(ERROR,
553 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
554 : errmsg("collations with different collate and ctype values are not supported on this platform")));
555 : #endif
556 : }
557 :
558 29360 : return loc;
559 : }
560 :
561 : /*
562 : * strncoll_libc
563 : *
564 : * NUL-terminate arguments, if necessary, and pass to strcoll_l().
565 : *
566 : * An input string length of -1 means that it's already NUL-terminated.
567 : */
568 : int
569 26028704 : strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
570 : pg_locale_t locale)
571 : {
572 : char sbuf[TEXTBUFLEN];
573 26028704 : char *buf = sbuf;
574 26028704 : size_t bufsize1 = (len1 == -1) ? 0 : len1 + 1;
575 26028704 : size_t bufsize2 = (len2 == -1) ? 0 : len2 + 1;
576 : const char *arg1n;
577 : const char *arg2n;
578 : int result;
579 :
580 : Assert(locale->provider == COLLPROVIDER_LIBC);
581 :
582 26028704 : if (bufsize1 + bufsize2 > TEXTBUFLEN)
583 360 : buf = palloc(bufsize1 + bufsize2);
584 :
585 : /* nul-terminate arguments if necessary */
586 26028704 : if (len1 == -1)
587 : {
588 23977224 : arg1n = arg1;
589 : }
590 : else
591 : {
592 2051480 : char *buf1 = buf;
593 :
594 2051480 : memcpy(buf1, arg1, len1);
595 2051480 : buf1[len1] = '\0';
596 2051480 : arg1n = buf1;
597 : }
598 :
599 26028704 : if (len2 == -1)
600 : {
601 23977224 : arg2n = arg2;
602 : }
603 : else
604 : {
605 2051480 : char *buf2 = buf + bufsize1;
606 :
607 2051480 : memcpy(buf2, arg2, len2);
608 2051480 : buf2[len2] = '\0';
609 2051480 : arg2n = buf2;
610 : }
611 :
612 26028704 : result = strcoll_l(arg1n, arg2n, locale->info.lt);
613 :
614 26028704 : if (buf != sbuf)
615 360 : pfree(buf);
616 :
617 26028704 : return result;
618 : }
619 :
620 : /*
621 : * strnxfrm_libc
622 : *
623 : * NUL-terminate src, if necessary, and pass to strxfrm_l().
624 : *
625 : * A source length of -1 means that it's already NUL-terminated.
626 : */
627 : size_t
628 144 : strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
629 : pg_locale_t locale)
630 : {
631 : char sbuf[TEXTBUFLEN];
632 144 : char *buf = sbuf;
633 144 : size_t bufsize = srclen + 1;
634 : size_t result;
635 :
636 : Assert(locale->provider == COLLPROVIDER_LIBC);
637 :
638 144 : if (srclen == -1)
639 144 : return strxfrm_l(dest, src, destsize, locale->info.lt);
640 :
641 0 : if (bufsize > TEXTBUFLEN)
642 0 : buf = palloc(bufsize);
643 :
644 : /* nul-terminate argument */
645 0 : memcpy(buf, src, srclen);
646 0 : buf[srclen] = '\0';
647 :
648 0 : result = strxfrm_l(dest, buf, destsize, locale->info.lt);
649 :
650 0 : if (buf != sbuf)
651 0 : pfree(buf);
652 :
653 : /* if dest is defined, it should be nul-terminated */
654 : Assert(result >= destsize || dest[result] == '\0');
655 :
656 0 : return result;
657 : }
658 :
659 : char *
660 25400 : get_collation_actual_version_libc(const char *collcollate)
661 : {
662 25400 : char *collversion = NULL;
663 :
664 50620 : if (pg_strcasecmp("C", collcollate) != 0 &&
665 50268 : pg_strncasecmp("C.", collcollate, 2) != 0 &&
666 25048 : pg_strcasecmp("POSIX", collcollate) != 0)
667 : {
668 : #if defined(__GLIBC__)
669 : /* Use the glibc version because we don't have anything better. */
670 25022 : collversion = pstrdup(gnu_get_libc_version());
671 : #elif defined(LC_VERSION_MASK)
672 : locale_t loc;
673 :
674 : /* Look up FreeBSD collation version. */
675 : loc = newlocale(LC_COLLATE_MASK, collcollate, NULL);
676 : if (loc)
677 : {
678 : collversion =
679 : pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
680 : freelocale(loc);
681 : }
682 : else
683 : ereport(ERROR,
684 : (errmsg("could not load locale \"%s\"", collcollate)));
685 : #elif defined(WIN32)
686 : /*
687 : * If we are targeting Windows Vista and above, we can ask for a name
688 : * given a collation name (earlier versions required a location code
689 : * that we don't have).
690 : */
691 : NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
692 : WCHAR wide_collcollate[LOCALE_NAME_MAX_LENGTH];
693 :
694 : MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
695 : LOCALE_NAME_MAX_LENGTH);
696 : if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
697 : {
698 : /*
699 : * GetNLSVersionEx() wants a language tag such as "en-US", not a
700 : * locale name like "English_United States.1252". Until those
701 : * values can be prevented from entering the system, or 100%
702 : * reliably converted to the more useful tag format, tolerate the
703 : * resulting error and report that we have no version data.
704 : */
705 : if (GetLastError() == ERROR_INVALID_PARAMETER)
706 : return NULL;
707 :
708 : ereport(ERROR,
709 : (errmsg("could not get collation version for locale \"%s\": error code %lu",
710 : collcollate,
711 : GetLastError())));
712 : }
713 : collversion = psprintf("%lu.%lu,%lu.%lu",
714 : (version.dwNLSVersion >> 8) & 0xFFFF,
715 : version.dwNLSVersion & 0xFF,
716 : (version.dwDefinedVersion >> 8) & 0xFFFF,
717 : version.dwDefinedVersion & 0xFF);
718 : #endif
719 : }
720 :
721 25400 : return collversion;
722 : }
723 :
724 : /*
725 : * strncoll_libc_win32_utf8
726 : *
727 : * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
728 : * invoke wcscoll_l().
729 : *
730 : * An input string length of -1 means that it's NUL-terminated.
731 : */
732 : #ifdef WIN32
733 : static int
734 : strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
735 : ssize_t len2, pg_locale_t locale)
736 : {
737 : char sbuf[TEXTBUFLEN];
738 : char *buf = sbuf;
739 : char *a1p,
740 : *a2p;
741 : int a1len;
742 : int a2len;
743 : int r;
744 : int result;
745 :
746 : Assert(locale->provider == COLLPROVIDER_LIBC);
747 : Assert(GetDatabaseEncoding() == PG_UTF8);
748 :
749 : if (len1 == -1)
750 : len1 = strlen(arg1);
751 : if (len2 == -1)
752 : len2 = strlen(arg2);
753 :
754 : a1len = len1 * 2 + 2;
755 : a2len = len2 * 2 + 2;
756 :
757 : if (a1len + a2len > TEXTBUFLEN)
758 : buf = palloc(a1len + a2len);
759 :
760 : a1p = buf;
761 : a2p = buf + a1len;
762 :
763 : /* API does not work for zero-length input */
764 : if (len1 == 0)
765 : r = 0;
766 : else
767 : {
768 : r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
769 : (LPWSTR) a1p, a1len / 2);
770 : if (!r)
771 : ereport(ERROR,
772 : (errmsg("could not convert string to UTF-16: error code %lu",
773 : GetLastError())));
774 : }
775 : ((LPWSTR) a1p)[r] = 0;
776 :
777 : if (len2 == 0)
778 : r = 0;
779 : else
780 : {
781 : r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
782 : (LPWSTR) a2p, a2len / 2);
783 : if (!r)
784 : ereport(ERROR,
785 : (errmsg("could not convert string to UTF-16: error code %lu",
786 : GetLastError())));
787 : }
788 : ((LPWSTR) a2p)[r] = 0;
789 :
790 : errno = 0;
791 : result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
792 : if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */
793 : ereport(ERROR,
794 : (errmsg("could not compare Unicode strings: %m")));
795 :
796 : if (buf != sbuf)
797 : pfree(buf);
798 :
799 : return result;
800 : }
801 : #endif /* WIN32 */
802 :
803 : /* simple subroutine for reporting errors from newlocale() */
804 : static void
805 0 : report_newlocale_failure(const char *localename)
806 : {
807 : int save_errno;
808 :
809 : /*
810 : * Windows doesn't provide any useful error indication from
811 : * _create_locale(), and BSD-derived platforms don't seem to feel they
812 : * need to set errno either (even though POSIX is pretty clear that
813 : * newlocale should do so). So, if errno hasn't been set, assume ENOENT
814 : * is what to report.
815 : */
816 0 : if (errno == 0)
817 0 : errno = ENOENT;
818 :
819 : /*
820 : * ENOENT means "no such locale", not "no such file", so clarify that
821 : * errno with an errdetail message.
822 : */
823 0 : save_errno = errno; /* auxiliary funcs might change errno */
824 0 : ereport(ERROR,
825 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
826 : errmsg("could not create locale \"%s\": %m",
827 : localename),
828 : (save_errno == ENOENT ?
829 : errdetail("The operating system could not find any locale data for the locale name \"%s\".",
830 : localename) : 0)));
831 : }
832 :
833 : /*
834 : * POSIX doesn't define _l-variants of these functions, but several systems
835 : * have them. We provide our own replacements here.
836 : */
837 : #ifndef HAVE_MBSTOWCS_L
838 : static size_t
839 1139822 : mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
840 : {
841 : #ifdef WIN32
842 : return _mbstowcs_l(dest, src, n, loc);
843 : #else
844 : size_t result;
845 1139822 : locale_t save_locale = uselocale(loc);
846 :
847 1139822 : result = mbstowcs(dest, src, n);
848 1139822 : uselocale(save_locale);
849 1139822 : return result;
850 : #endif
851 : }
852 : #endif
853 : #ifndef HAVE_WCSTOMBS_L
854 : static size_t
855 1139822 : wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
856 : {
857 : #ifdef WIN32
858 : return _wcstombs_l(dest, src, n, loc);
859 : #else
860 : size_t result;
861 1139822 : locale_t save_locale = uselocale(loc);
862 :
863 1139822 : result = wcstombs(dest, src, n);
864 1139822 : uselocale(save_locale);
865 1139822 : return result;
866 : #endif
867 : }
868 : #endif
869 :
870 : /*
871 : * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
872 : * Therefore we keep them here rather than with the mbutils code.
873 : */
874 :
875 : /*
876 : * wchar2char --- convert wide characters to multibyte format
877 : *
878 : * This has the same API as the standard wcstombs_l() function; in particular,
879 : * tolen is the maximum number of bytes to store at *to, and *from must be
880 : * zero-terminated. The output will be zero-terminated iff there is room.
881 : */
882 : size_t
883 1139822 : wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
884 : {
885 : size_t result;
886 :
887 1139822 : if (tolen == 0)
888 0 : return 0;
889 :
890 : #ifdef WIN32
891 :
892 : /*
893 : * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
894 : * for some reason mbstowcs and wcstombs won't do this for us, so we use
895 : * MultiByteToWideChar().
896 : */
897 : if (GetDatabaseEncoding() == PG_UTF8)
898 : {
899 : result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
900 : NULL, NULL);
901 : /* A zero return is failure */
902 : if (result <= 0)
903 : result = -1;
904 : else
905 : {
906 : Assert(result <= tolen);
907 : /* Microsoft counts the zero terminator in the result */
908 : result--;
909 : }
910 : }
911 : else
912 : #endif /* WIN32 */
913 1139822 : if (locale == (pg_locale_t) 0)
914 : {
915 : /* Use wcstombs directly for the default locale */
916 0 : result = wcstombs(to, from, tolen);
917 : }
918 : else
919 : {
920 : /* Use wcstombs_l for nondefault locales */
921 1139822 : result = wcstombs_l(to, from, tolen, locale->info.lt);
922 : }
923 :
924 1139822 : return result;
925 : }
926 :
927 : /*
928 : * char2wchar --- convert multibyte characters to wide characters
929 : *
930 : * This has almost the API of mbstowcs_l(), except that *from need not be
931 : * null-terminated; instead, the number of input bytes is specified as
932 : * fromlen. Also, we ereport() rather than returning -1 for invalid
933 : * input encoding. tolen is the maximum number of wchar_t's to store at *to.
934 : * The output will be zero-terminated iff there is room.
935 : */
936 : size_t
937 1142994 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
938 : pg_locale_t locale)
939 : {
940 : size_t result;
941 :
942 1142994 : if (tolen == 0)
943 0 : return 0;
944 :
945 : #ifdef WIN32
946 : /* See WIN32 "Unicode" comment above */
947 : if (GetDatabaseEncoding() == PG_UTF8)
948 : {
949 : /* Win32 API does not work for zero-length input */
950 : if (fromlen == 0)
951 : result = 0;
952 : else
953 : {
954 : result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
955 : /* A zero return is failure */
956 : if (result == 0)
957 : result = -1;
958 : }
959 :
960 : if (result != -1)
961 : {
962 : Assert(result < tolen);
963 : /* Append trailing null wchar (MultiByteToWideChar() does not) */
964 : to[result] = 0;
965 : }
966 : }
967 : else
968 : #endif /* WIN32 */
969 : {
970 : /* mbstowcs requires ending '\0' */
971 1142994 : char *str = pnstrdup(from, fromlen);
972 :
973 1142994 : if (locale == (pg_locale_t) 0)
974 : {
975 : /* Use mbstowcs directly for the default locale */
976 3172 : result = mbstowcs(to, str, tolen);
977 : }
978 : else
979 : {
980 : /* Use mbstowcs_l for nondefault locales */
981 1139822 : result = mbstowcs_l(to, str, tolen, locale->info.lt);
982 : }
983 :
984 1142994 : pfree(str);
985 : }
986 :
987 1142994 : if (result == -1)
988 : {
989 : /*
990 : * Invalid multibyte character encountered. We try to give a useful
991 : * error message by letting pg_verifymbstr check the string. But it's
992 : * possible that the string is OK to us, and not OK to mbstowcs ---
993 : * this suggests that the LC_CTYPE locale is different from the
994 : * database encoding. Give a generic error message if pg_verifymbstr
995 : * can't find anything wrong.
996 : */
997 0 : pg_verifymbstr(from, fromlen, false); /* might not return */
998 : /* but if it does ... */
999 0 : ereport(ERROR,
1000 : (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1001 : errmsg("invalid multibyte character for locale"),
1002 : errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
1003 : }
1004 :
1005 1142994 : return result;
1006 : }
|