Line data Source code
1 : /*-----------------------------------------------------------------------
2 : *
3 : * PostgreSQL locale utilities for libc
4 : *
5 : * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
6 : *
7 : * src/backend/utils/adt/pg_locale_libc.c
8 : *
9 : *-----------------------------------------------------------------------
10 : */
11 :
12 : #include "postgres.h"
13 :
14 : #include <limits.h>
15 : #include <wctype.h>
16 :
17 : #include "access/htup_details.h"
18 : #include "catalog/pg_database.h"
19 : #include "catalog/pg_collation.h"
20 : #include "mb/pg_wchar.h"
21 : #include "miscadmin.h"
22 : #include "utils/builtins.h"
23 : #include "utils/formatting.h"
24 : #include "utils/memutils.h"
25 : #include "utils/pg_locale.h"
26 : #include "utils/syscache.h"
27 :
28 : #ifdef __GLIBC__
29 : #include <gnu/libc-version.h>
30 : #endif
31 :
32 : #ifdef WIN32
33 : #include <shlwapi.h>
34 : #endif
35 :
36 : /*
37 : * For the libc provider, to provide as much functionality as possible on a
38 : * variety of platforms without going so far as to implement everything from
39 : * scratch, we use several implementation strategies depending on the
40 : * situation:
41 : *
42 : * 1. In C/POSIX collations, we use hard-wired code. We can't depend on
43 : * the <ctype.h> functions since those will obey LC_CTYPE. Note that these
44 : * collations don't give a fig about multibyte characters.
45 : *
46 : * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
47 : * This assumes that every platform uses Unicode codepoints directly
48 : * as the wchar_t representation of Unicode. On some platforms
49 : * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
50 : *
51 : * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
52 : * values up to 255, and punt for values above that. This is 100% correct
53 : * only in single-byte encodings such as LATINn. However, non-Unicode
54 : * multibyte encodings are mostly Far Eastern character sets for which the
55 : * properties being tested here aren't very relevant for higher code values
56 : * anyway. The difficulty with using the <wctype.h> functions with
57 : * non-Unicode multibyte encodings is that we can have no certainty that
58 : * the platform's wchar_t representation matches what we do in pg_wchar
59 : * conversions.
60 : *
61 : * As a special case, in the "default" collation, (2) and (3) force ASCII
62 : * letters to follow ASCII upcase/downcase rules, while in a non-default
63 : * collation we just let the library functions do what they will. The case
64 : * where this matters is treatment of I/i in Turkish, and the behavior is
65 : * meant to match the upper()/lower() SQL functions.
66 : *
67 : * We store the active collation setting in static variables. In principle
68 : * it could be passed down to here via the regex library's "struct vars" data
69 : * structure; but that would require somewhat invasive changes in the regex
70 : * library, and right now there's no real benefit to be gained from that.
71 : *
72 : * NB: the coding here assumes pg_wchar is an unsigned type.
73 : */
74 :
75 : /*
76 : * Size of stack buffer to use for string transformations, used to avoid heap
77 : * allocations in typical cases. This should be large enough that most strings
78 : * will fit, but small enough that we feel comfortable putting it on the
79 : * stack.
80 : */
81 : #define TEXTBUFLEN 1024
82 :
83 : extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
84 :
85 : static int strncoll_libc(const char *arg1, ssize_t len1,
86 : const char *arg2, ssize_t len2,
87 : pg_locale_t locale);
88 : static size_t strnxfrm_libc(char *dest, size_t destsize,
89 : const char *src, ssize_t srclen,
90 : pg_locale_t locale);
91 : extern char *get_collation_actual_version_libc(const char *collcollate);
92 : static locale_t make_libc_collator(const char *collate,
93 : const char *ctype);
94 :
95 : #ifdef WIN32
96 : static int strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
97 : const char *arg2, ssize_t len2,
98 : pg_locale_t locale);
99 : #endif
100 :
101 : static size_t char2wchar(wchar_t *to, size_t tolen, const char *from,
102 : size_t fromlen, locale_t loc);
103 :
104 : static size_t strlower_libc_sb(char *dest, size_t destsize,
105 : const char *src, ssize_t srclen,
106 : pg_locale_t locale);
107 : static size_t strlower_libc_mb(char *dest, size_t destsize,
108 : const char *src, ssize_t srclen,
109 : pg_locale_t locale);
110 : static size_t strtitle_libc_sb(char *dest, size_t destsize,
111 : const char *src, ssize_t srclen,
112 : pg_locale_t locale);
113 : static size_t strtitle_libc_mb(char *dest, size_t destsize,
114 : const char *src, ssize_t srclen,
115 : pg_locale_t locale);
116 : static size_t strupper_libc_sb(char *dest, size_t destsize,
117 : const char *src, ssize_t srclen,
118 : pg_locale_t locale);
119 : static size_t strupper_libc_mb(char *dest, size_t destsize,
120 : const char *src, ssize_t srclen,
121 : pg_locale_t locale);
122 :
123 : static bool
124 0 : wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
125 : {
126 0 : return isdigit_l((unsigned char) wc, locale->lt);
127 : }
128 :
129 : static bool
130 0 : wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
131 : {
132 0 : return isalpha_l((unsigned char) wc, locale->lt);
133 : }
134 :
135 : static bool
136 0 : wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
137 : {
138 0 : return isalnum_l((unsigned char) wc, locale->lt);
139 : }
140 :
141 : static bool
142 0 : wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
143 : {
144 0 : return isupper_l((unsigned char) wc, locale->lt);
145 : }
146 :
147 : static bool
148 0 : wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
149 : {
150 0 : return islower_l((unsigned char) wc, locale->lt);
151 : }
152 :
153 : static bool
154 0 : wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
155 : {
156 0 : return isgraph_l((unsigned char) wc, locale->lt);
157 : }
158 :
159 : static bool
160 0 : wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
161 : {
162 0 : return isprint_l((unsigned char) wc, locale->lt);
163 : }
164 :
165 : static bool
166 0 : wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
167 : {
168 0 : return ispunct_l((unsigned char) wc, locale->lt);
169 : }
170 :
171 : static bool
172 0 : wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
173 : {
174 0 : return isspace_l((unsigned char) wc, locale->lt);
175 : }
176 :
177 : static bool
178 0 : wc_isxdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
179 : {
180 : #ifndef WIN32
181 0 : return isxdigit_l((unsigned char) wc, locale->lt);
182 : #else
183 : return _isxdigit_l((unsigned char) wc, locale->lt);
184 : #endif
185 : }
186 :
187 : static bool
188 131608 : wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
189 : {
190 131608 : return iswdigit_l((wint_t) wc, locale->lt);
191 : }
192 :
193 : static bool
194 81148 : wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
195 : {
196 81148 : return iswalpha_l((wint_t) wc, locale->lt);
197 : }
198 :
199 : static bool
200 2845676 : wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
201 : {
202 2845676 : return iswalnum_l((wint_t) wc, locale->lt);
203 : }
204 :
205 : static bool
206 4112 : wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
207 : {
208 4112 : return iswupper_l((wint_t) wc, locale->lt);
209 : }
210 :
211 : static bool
212 4102 : wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
213 : {
214 4102 : return iswlower_l((wint_t) wc, locale->lt);
215 : }
216 :
217 : static bool
218 4102 : wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
219 : {
220 4102 : return iswgraph_l((wint_t) wc, locale->lt);
221 : }
222 :
223 : static bool
224 4102 : wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
225 : {
226 4102 : return iswprint_l((wint_t) wc, locale->lt);
227 : }
228 :
229 : static bool
230 4102 : wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
231 : {
232 4102 : return iswpunct_l((wint_t) wc, locale->lt);
233 : }
234 :
235 : static bool
236 48152 : wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
237 : {
238 48152 : return iswspace_l((wint_t) wc, locale->lt);
239 : }
240 :
241 : static bool
242 12 : wc_isxdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
243 : {
244 : #ifndef WIN32
245 12 : return iswxdigit_l((wint_t) wc, locale->lt);
246 : #else
247 : return _iswxdigit_l((wint_t) wc, locale->lt);
248 : #endif
249 : }
250 :
251 : static char
252 0 : char_tolower_libc(unsigned char ch, pg_locale_t locale)
253 : {
254 : Assert(pg_database_encoding_max_length() == 1);
255 0 : return tolower_l(ch, locale->lt);
256 : }
257 :
258 : static bool
259 0 : char_is_cased_libc(char ch, pg_locale_t locale)
260 : {
261 0 : bool is_multibyte = pg_database_encoding_max_length() > 1;
262 :
263 0 : if (is_multibyte && IS_HIGHBIT_SET(ch))
264 0 : return true;
265 : else
266 0 : return isalpha_l((unsigned char) ch, locale->lt);
267 : }
268 :
269 : static pg_wchar
270 0 : toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
271 : {
272 : Assert(GetDatabaseEncoding() != PG_UTF8);
273 :
274 : /* force C behavior for ASCII characters, per comments above */
275 0 : if (locale->is_default && wc <= (pg_wchar) 127)
276 0 : return pg_ascii_toupper((unsigned char) wc);
277 0 : if (wc <= (pg_wchar) UCHAR_MAX)
278 0 : return toupper_l((unsigned char) wc, locale->lt);
279 : else
280 0 : return wc;
281 : }
282 :
283 : static pg_wchar
284 9088 : toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
285 : {
286 : Assert(GetDatabaseEncoding() == PG_UTF8);
287 :
288 : /* force C behavior for ASCII characters, per comments above */
289 9088 : if (locale->is_default && wc <= (pg_wchar) 127)
290 892 : return pg_ascii_toupper((unsigned char) wc);
291 : if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
292 8196 : return towupper_l((wint_t) wc, locale->lt);
293 : else
294 : return wc;
295 : }
296 :
297 : static pg_wchar
298 0 : tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
299 : {
300 : Assert(GetDatabaseEncoding() != PG_UTF8);
301 :
302 : /* force C behavior for ASCII characters, per comments above */
303 0 : if (locale->is_default && wc <= (pg_wchar) 127)
304 0 : return pg_ascii_tolower((unsigned char) wc);
305 0 : if (wc <= (pg_wchar) UCHAR_MAX)
306 0 : return tolower_l((unsigned char) wc, locale->lt);
307 : else
308 0 : return wc;
309 : }
310 :
311 : static pg_wchar
312 9092 : tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
313 : {
314 : Assert(GetDatabaseEncoding() == PG_UTF8);
315 :
316 : /* force C behavior for ASCII characters, per comments above */
317 9092 : if (locale->is_default && wc <= (pg_wchar) 127)
318 896 : return pg_ascii_tolower((unsigned char) wc);
319 : if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
320 8196 : return towlower_l((wint_t) wc, locale->lt);
321 : else
322 : return wc;
323 : }
324 :
325 : static const struct ctype_methods ctype_methods_libc_sb = {
326 : .strlower = strlower_libc_sb,
327 : .strtitle = strtitle_libc_sb,
328 : .strupper = strupper_libc_sb,
329 : /* in libc, casefolding is the same as lowercasing */
330 : .strfold = strlower_libc_sb,
331 : .wc_isdigit = wc_isdigit_libc_sb,
332 : .wc_isalpha = wc_isalpha_libc_sb,
333 : .wc_isalnum = wc_isalnum_libc_sb,
334 : .wc_isupper = wc_isupper_libc_sb,
335 : .wc_islower = wc_islower_libc_sb,
336 : .wc_isgraph = wc_isgraph_libc_sb,
337 : .wc_isprint = wc_isprint_libc_sb,
338 : .wc_ispunct = wc_ispunct_libc_sb,
339 : .wc_isspace = wc_isspace_libc_sb,
340 : .wc_isxdigit = wc_isxdigit_libc_sb,
341 : .char_is_cased = char_is_cased_libc,
342 : .char_tolower = char_tolower_libc,
343 : .wc_toupper = toupper_libc_sb,
344 : .wc_tolower = tolower_libc_sb,
345 : };
346 :
347 : /*
348 : * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
349 : * single-byte semantics for pattern matching.
350 : */
351 : static const struct ctype_methods ctype_methods_libc_other_mb = {
352 : .strlower = strlower_libc_mb,
353 : .strtitle = strtitle_libc_mb,
354 : .strupper = strupper_libc_mb,
355 : /* in libc, casefolding is the same as lowercasing */
356 : .strfold = strlower_libc_mb,
357 : .wc_isdigit = wc_isdigit_libc_sb,
358 : .wc_isalpha = wc_isalpha_libc_sb,
359 : .wc_isalnum = wc_isalnum_libc_sb,
360 : .wc_isupper = wc_isupper_libc_sb,
361 : .wc_islower = wc_islower_libc_sb,
362 : .wc_isgraph = wc_isgraph_libc_sb,
363 : .wc_isprint = wc_isprint_libc_sb,
364 : .wc_ispunct = wc_ispunct_libc_sb,
365 : .wc_isspace = wc_isspace_libc_sb,
366 : .wc_isxdigit = wc_isxdigit_libc_sb,
367 : .char_is_cased = char_is_cased_libc,
368 : .char_tolower = char_tolower_libc,
369 : .wc_toupper = toupper_libc_sb,
370 : .wc_tolower = tolower_libc_sb,
371 : };
372 :
373 : static const struct ctype_methods ctype_methods_libc_utf8 = {
374 : .strlower = strlower_libc_mb,
375 : .strtitle = strtitle_libc_mb,
376 : .strupper = strupper_libc_mb,
377 : /* in libc, casefolding is the same as lowercasing */
378 : .strfold = strlower_libc_mb,
379 : .wc_isdigit = wc_isdigit_libc_mb,
380 : .wc_isalpha = wc_isalpha_libc_mb,
381 : .wc_isalnum = wc_isalnum_libc_mb,
382 : .wc_isupper = wc_isupper_libc_mb,
383 : .wc_islower = wc_islower_libc_mb,
384 : .wc_isgraph = wc_isgraph_libc_mb,
385 : .wc_isprint = wc_isprint_libc_mb,
386 : .wc_ispunct = wc_ispunct_libc_mb,
387 : .wc_isspace = wc_isspace_libc_mb,
388 : .wc_isxdigit = wc_isxdigit_libc_mb,
389 : .char_is_cased = char_is_cased_libc,
390 : .char_tolower = char_tolower_libc,
391 : .wc_toupper = toupper_libc_mb,
392 : .wc_tolower = tolower_libc_mb,
393 : };
394 :
395 : static const struct collate_methods collate_methods_libc = {
396 : .strncoll = strncoll_libc,
397 : .strnxfrm = strnxfrm_libc,
398 : .strnxfrm_prefix = NULL,
399 :
400 : /*
401 : * Unfortunately, it seems that strxfrm() for non-C collations is broken
402 : * on many common platforms; testing of multiple versions of glibc reveals
403 : * that, for many locales, strcoll() and strxfrm() do not return
404 : * consistent results. While no other libc other than Cygwin has so far
405 : * been shown to have a problem, we take the conservative course of action
406 : * for right now and disable this categorically. (Users who are certain
407 : * this isn't a problem on their system can define TRUST_STRXFRM.)
408 : */
409 : #ifdef TRUST_STRXFRM
410 : .strxfrm_is_safe = true,
411 : #else
412 : .strxfrm_is_safe = false,
413 : #endif
414 : };
415 :
416 : #ifdef WIN32
417 : static const struct collate_methods collate_methods_libc_win32_utf8 = {
418 : .strncoll = strncoll_libc_win32_utf8,
419 : .strnxfrm = strnxfrm_libc,
420 : .strnxfrm_prefix = NULL,
421 : #ifdef TRUST_STRXFRM
422 : .strxfrm_is_safe = true,
423 : #else
424 : .strxfrm_is_safe = false,
425 : #endif
426 : };
427 : #endif
428 :
429 : static size_t
430 0 : strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
431 : pg_locale_t locale)
432 : {
433 0 : if (srclen < 0)
434 0 : srclen = strlen(src);
435 :
436 0 : if (srclen + 1 <= destsize)
437 : {
438 0 : locale_t loc = locale->lt;
439 : char *p;
440 :
441 0 : memcpy(dest, src, srclen);
442 0 : dest[srclen] = '\0';
443 :
444 : /*
445 : * Note: we assume that tolower_l() will not be so broken as to need
446 : * an isupper_l() guard test. When using the default collation, we
447 : * apply the traditional Postgres behavior that forces ASCII-style
448 : * treatment of I/i, but in non-default collations you get exactly
449 : * what the collation says.
450 : */
451 0 : for (p = dest; *p; p++)
452 : {
453 0 : if (locale->is_default)
454 : {
455 0 : if (*p >= 'A' && *p <= 'Z')
456 0 : *p += 'a' - 'A';
457 0 : else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
458 0 : *p = tolower_l((unsigned char) *p, loc);
459 : }
460 : else
461 0 : *p = tolower_l((unsigned char) *p, loc);
462 : }
463 : }
464 :
465 0 : return srclen;
466 : }
467 :
468 : static size_t
469 425022 : strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
470 : pg_locale_t locale)
471 : {
472 425022 : locale_t loc = locale->lt;
473 : size_t result_size;
474 : wchar_t *workspace;
475 : char *result;
476 : size_t curr_char;
477 : size_t max_size;
478 :
479 425022 : if (srclen < 0)
480 0 : srclen = strlen(src);
481 :
482 : /* Overflow paranoia */
483 425022 : if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
484 0 : ereport(ERROR,
485 : (errcode(ERRCODE_OUT_OF_MEMORY),
486 : errmsg("out of memory")));
487 :
488 : /* Output workspace cannot have more codes than input bytes */
489 425022 : workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
490 :
491 425022 : char2wchar(workspace, srclen + 1, src, srclen, loc);
492 :
493 3669786 : for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
494 3244764 : workspace[curr_char] = towlower_l(workspace[curr_char], loc);
495 :
496 : /*
497 : * Make result large enough; case change might change number of bytes
498 : */
499 425022 : max_size = curr_char * pg_database_encoding_max_length();
500 425022 : result = palloc(max_size + 1);
501 :
502 425022 : result_size = wchar2char(result, workspace, max_size + 1, loc);
503 :
504 425022 : if (result_size + 1 > destsize)
505 0 : return result_size;
506 :
507 425022 : memcpy(dest, result, result_size);
508 425022 : dest[result_size] = '\0';
509 :
510 425022 : pfree(workspace);
511 425022 : pfree(result);
512 :
513 425022 : return result_size;
514 : }
515 :
516 : static size_t
517 0 : strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
518 : pg_locale_t locale)
519 : {
520 0 : if (srclen < 0)
521 0 : srclen = strlen(src);
522 :
523 0 : if (srclen + 1 <= destsize)
524 : {
525 0 : locale_t loc = locale->lt;
526 0 : int wasalnum = false;
527 : char *p;
528 :
529 0 : memcpy(dest, src, srclen);
530 0 : dest[srclen] = '\0';
531 :
532 : /*
533 : * Note: we assume that toupper_l()/tolower_l() will not be so broken
534 : * as to need guard tests. When using the default collation, we apply
535 : * the traditional Postgres behavior that forces ASCII-style treatment
536 : * of I/i, but in non-default collations you get exactly what the
537 : * collation says.
538 : */
539 0 : for (p = dest; *p; p++)
540 : {
541 0 : if (locale->is_default)
542 : {
543 0 : if (wasalnum)
544 : {
545 0 : if (*p >= 'A' && *p <= 'Z')
546 0 : *p += 'a' - 'A';
547 0 : else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
548 0 : *p = tolower_l((unsigned char) *p, loc);
549 : }
550 : else
551 : {
552 0 : if (*p >= 'a' && *p <= 'z')
553 0 : *p -= 'a' - 'A';
554 0 : else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
555 0 : *p = toupper_l((unsigned char) *p, loc);
556 : }
557 : }
558 : else
559 : {
560 0 : if (wasalnum)
561 0 : *p = tolower_l((unsigned char) *p, loc);
562 : else
563 0 : *p = toupper_l((unsigned char) *p, loc);
564 : }
565 0 : wasalnum = isalnum_l((unsigned char) *p, loc);
566 : }
567 : }
568 :
569 0 : return srclen;
570 : }
571 :
572 : static size_t
573 8 : strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
574 : pg_locale_t locale)
575 : {
576 8 : locale_t loc = locale->lt;
577 8 : int wasalnum = false;
578 : size_t result_size;
579 : wchar_t *workspace;
580 : char *result;
581 : size_t curr_char;
582 : size_t max_size;
583 :
584 8 : if (srclen < 0)
585 0 : srclen = strlen(src);
586 :
587 : /* Overflow paranoia */
588 8 : if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
589 0 : ereport(ERROR,
590 : (errcode(ERRCODE_OUT_OF_MEMORY),
591 : errmsg("out of memory")));
592 :
593 : /* Output workspace cannot have more codes than input bytes */
594 8 : workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
595 :
596 8 : char2wchar(workspace, srclen + 1, src, srclen, loc);
597 :
598 80 : for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
599 : {
600 72 : if (wasalnum)
601 56 : workspace[curr_char] = towlower_l(workspace[curr_char], loc);
602 : else
603 16 : workspace[curr_char] = towupper_l(workspace[curr_char], loc);
604 72 : wasalnum = iswalnum_l(workspace[curr_char], loc);
605 : }
606 :
607 : /*
608 : * Make result large enough; case change might change number of bytes
609 : */
610 8 : max_size = curr_char * pg_database_encoding_max_length();
611 8 : result = palloc(max_size + 1);
612 :
613 8 : result_size = wchar2char(result, workspace, max_size + 1, loc);
614 :
615 8 : if (result_size + 1 > destsize)
616 0 : return result_size;
617 :
618 8 : memcpy(dest, result, result_size);
619 8 : dest[result_size] = '\0';
620 :
621 8 : pfree(workspace);
622 8 : pfree(result);
623 :
624 8 : return result_size;
625 : }
626 :
627 : static size_t
628 0 : strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
629 : pg_locale_t locale)
630 : {
631 0 : if (srclen < 0)
632 0 : srclen = strlen(src);
633 :
634 0 : if (srclen + 1 <= destsize)
635 : {
636 0 : locale_t loc = locale->lt;
637 : char *p;
638 :
639 0 : memcpy(dest, src, srclen);
640 0 : dest[srclen] = '\0';
641 :
642 : /*
643 : * Note: we assume that toupper_l() will not be so broken as to need
644 : * an islower_l() guard test. When using the default collation, we
645 : * apply the traditional Postgres behavior that forces ASCII-style
646 : * treatment of I/i, but in non-default collations you get exactly
647 : * what the collation says.
648 : */
649 0 : for (p = dest; *p; p++)
650 : {
651 0 : if (locale->is_default)
652 : {
653 0 : if (*p >= 'a' && *p <= 'z')
654 0 : *p -= 'a' - 'A';
655 0 : else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
656 0 : *p = toupper_l((unsigned char) *p, loc);
657 : }
658 : else
659 0 : *p = toupper_l((unsigned char) *p, loc);
660 : }
661 : }
662 :
663 0 : return srclen;
664 : }
665 :
666 : static size_t
667 719980 : strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
668 : pg_locale_t locale)
669 : {
670 719980 : locale_t loc = locale->lt;
671 : size_t result_size;
672 : wchar_t *workspace;
673 : char *result;
674 : size_t curr_char;
675 : size_t max_size;
676 :
677 719980 : if (srclen < 0)
678 0 : srclen = strlen(src);
679 :
680 : /* Overflow paranoia */
681 719980 : if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
682 0 : ereport(ERROR,
683 : (errcode(ERRCODE_OUT_OF_MEMORY),
684 : errmsg("out of memory")));
685 :
686 : /* Output workspace cannot have more codes than input bytes */
687 719980 : workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
688 :
689 719980 : char2wchar(workspace, srclen + 1, src, srclen, loc);
690 :
691 2372434 : for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
692 1652454 : workspace[curr_char] = towupper_l(workspace[curr_char], loc);
693 :
694 : /*
695 : * Make result large enough; case change might change number of bytes
696 : */
697 719980 : max_size = curr_char * pg_database_encoding_max_length();
698 719980 : result = palloc(max_size + 1);
699 :
700 719980 : result_size = wchar2char(result, workspace, max_size + 1, loc);
701 :
702 719980 : if (result_size + 1 > destsize)
703 0 : return result_size;
704 :
705 719980 : memcpy(dest, result, result_size);
706 719980 : dest[result_size] = '\0';
707 :
708 719980 : pfree(workspace);
709 719980 : pfree(result);
710 :
711 719980 : return result_size;
712 : }
713 :
714 : pg_locale_t
715 31914 : create_pg_locale_libc(Oid collid, MemoryContext context)
716 : {
717 : const char *collate;
718 : const char *ctype;
719 : locale_t loc;
720 : pg_locale_t result;
721 :
722 31914 : if (collid == DEFAULT_COLLATION_OID)
723 : {
724 : HeapTuple tp;
725 : Datum datum;
726 :
727 31824 : tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
728 31824 : if (!HeapTupleIsValid(tp))
729 0 : elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
730 31824 : datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
731 : Anum_pg_database_datcollate);
732 31824 : collate = TextDatumGetCString(datum);
733 31824 : datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
734 : Anum_pg_database_datctype);
735 31824 : ctype = TextDatumGetCString(datum);
736 :
737 31824 : ReleaseSysCache(tp);
738 : }
739 : else
740 : {
741 : HeapTuple tp;
742 : Datum datum;
743 :
744 90 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
745 90 : if (!HeapTupleIsValid(tp))
746 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
747 :
748 90 : datum = SysCacheGetAttrNotNull(COLLOID, tp,
749 : Anum_pg_collation_collcollate);
750 90 : collate = TextDatumGetCString(datum);
751 90 : datum = SysCacheGetAttrNotNull(COLLOID, tp,
752 : Anum_pg_collation_collctype);
753 90 : ctype = TextDatumGetCString(datum);
754 :
755 90 : ReleaseSysCache(tp);
756 : }
757 :
758 :
759 31914 : loc = make_libc_collator(collate, ctype);
760 :
761 31914 : result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
762 31914 : result->deterministic = true;
763 62696 : result->collate_is_c = (strcmp(collate, "C") == 0) ||
764 30782 : (strcmp(collate, "POSIX") == 0);
765 62696 : result->ctype_is_c = (strcmp(ctype, "C") == 0) ||
766 30782 : (strcmp(ctype, "POSIX") == 0);
767 31914 : result->lt = loc;
768 31914 : if (!result->collate_is_c)
769 : {
770 : #ifdef WIN32
771 : if (GetDatabaseEncoding() == PG_UTF8)
772 : result->collate = &collate_methods_libc_win32_utf8;
773 : else
774 : #endif
775 30718 : result->collate = &collate_methods_libc;
776 : }
777 31914 : if (!result->ctype_is_c)
778 : {
779 30718 : if (GetDatabaseEncoding() == PG_UTF8)
780 30654 : result->ctype = &ctype_methods_libc_utf8;
781 64 : else if (pg_database_encoding_max_length() > 1)
782 0 : result->ctype = &ctype_methods_libc_other_mb;
783 : else
784 64 : result->ctype = &ctype_methods_libc_sb;
785 : }
786 :
787 31914 : return result;
788 : }
789 :
790 : /*
791 : * Create a locale_t with the given collation and ctype.
792 : *
793 : * The "C" and "POSIX" locales are not actually handled by libc, so return
794 : * NULL.
795 : *
796 : * Ensure that no path leaks a locale_t.
797 : */
798 : static locale_t
799 31914 : make_libc_collator(const char *collate, const char *ctype)
800 : {
801 31914 : locale_t loc = 0;
802 :
803 31914 : if (strcmp(collate, ctype) == 0)
804 : {
805 31914 : if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
806 : {
807 : /* Normal case where they're the same */
808 30718 : errno = 0;
809 : #ifndef WIN32
810 30718 : loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
811 : NULL);
812 : #else
813 : loc = _create_locale(LC_ALL, collate);
814 : #endif
815 30718 : if (!loc)
816 0 : report_newlocale_failure(collate);
817 : }
818 : }
819 : else
820 : {
821 : #ifndef WIN32
822 : /* We need two newlocale() steps */
823 0 : locale_t loc1 = 0;
824 :
825 0 : if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
826 : {
827 0 : errno = 0;
828 0 : loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
829 0 : if (!loc1)
830 0 : report_newlocale_failure(collate);
831 : }
832 :
833 0 : if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
834 : {
835 0 : errno = 0;
836 0 : loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
837 0 : if (!loc)
838 : {
839 0 : if (loc1)
840 0 : freelocale(loc1);
841 0 : report_newlocale_failure(ctype);
842 : }
843 : }
844 : else
845 0 : loc = loc1;
846 : #else
847 :
848 : /*
849 : * XXX The _create_locale() API doesn't appear to support this. Could
850 : * perhaps be worked around by changing pg_locale_t to contain two
851 : * separate fields.
852 : */
853 : ereport(ERROR,
854 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
855 : errmsg("collations with different collate and ctype values are not supported on this platform")));
856 : #endif
857 : }
858 :
859 31914 : return loc;
860 : }
861 :
862 : /*
863 : * strncoll_libc
864 : *
865 : * NUL-terminate arguments, if necessary, and pass to strcoll_l().
866 : *
867 : * An input string length of -1 means that it's already NUL-terminated.
868 : */
869 : int
870 30180656 : strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
871 : pg_locale_t locale)
872 : {
873 : char sbuf[TEXTBUFLEN];
874 30180656 : char *buf = sbuf;
875 30180656 : size_t bufsize1 = (len1 == -1) ? 0 : len1 + 1;
876 30180656 : size_t bufsize2 = (len2 == -1) ? 0 : len2 + 1;
877 : const char *arg1n;
878 : const char *arg2n;
879 : int result;
880 :
881 30180656 : if (bufsize1 + bufsize2 > TEXTBUFLEN)
882 568 : buf = palloc(bufsize1 + bufsize2);
883 :
884 : /* nul-terminate arguments if necessary */
885 30180656 : if (len1 == -1)
886 : {
887 25754254 : arg1n = arg1;
888 : }
889 : else
890 : {
891 4426402 : char *buf1 = buf;
892 :
893 4426402 : memcpy(buf1, arg1, len1);
894 4426402 : buf1[len1] = '\0';
895 4426402 : arg1n = buf1;
896 : }
897 :
898 30180656 : if (len2 == -1)
899 : {
900 25754254 : arg2n = arg2;
901 : }
902 : else
903 : {
904 4426402 : char *buf2 = buf + bufsize1;
905 :
906 4426402 : memcpy(buf2, arg2, len2);
907 4426402 : buf2[len2] = '\0';
908 4426402 : arg2n = buf2;
909 : }
910 :
911 30180656 : result = strcoll_l(arg1n, arg2n, locale->lt);
912 :
913 30180656 : if (buf != sbuf)
914 568 : pfree(buf);
915 :
916 30180656 : return result;
917 : }
918 :
919 : /*
920 : * strnxfrm_libc
921 : *
922 : * NUL-terminate src, if necessary, and pass to strxfrm_l().
923 : *
924 : * A source length of -1 means that it's already NUL-terminated.
925 : */
926 : size_t
927 144 : strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
928 : pg_locale_t locale)
929 : {
930 : char sbuf[TEXTBUFLEN];
931 144 : char *buf = sbuf;
932 144 : size_t bufsize = srclen + 1;
933 : size_t result;
934 :
935 144 : if (srclen == -1)
936 144 : return strxfrm_l(dest, src, destsize, locale->lt);
937 :
938 0 : if (bufsize > TEXTBUFLEN)
939 0 : buf = palloc(bufsize);
940 :
941 : /* nul-terminate argument */
942 0 : memcpy(buf, src, srclen);
943 0 : buf[srclen] = '\0';
944 :
945 0 : result = strxfrm_l(dest, buf, destsize, locale->lt);
946 :
947 0 : if (buf != sbuf)
948 0 : pfree(buf);
949 :
950 : /* if dest is defined, it should be nul-terminated */
951 : Assert(result >= destsize || dest[result] == '\0');
952 :
953 0 : return result;
954 : }
955 :
956 : char *
957 31046 : get_collation_actual_version_libc(const char *collcollate)
958 : {
959 31046 : char *collversion = NULL;
960 :
961 61916 : if (pg_strcasecmp("C", collcollate) != 0 &&
962 61548 : pg_strncasecmp("C.", collcollate, 2) != 0 &&
963 30678 : pg_strcasecmp("POSIX", collcollate) != 0)
964 : {
965 : #if defined(__GLIBC__)
966 : /* Use the glibc version because we don't have anything better. */
967 30652 : collversion = pstrdup(gnu_get_libc_version());
968 : #elif defined(LC_VERSION_MASK)
969 : locale_t loc;
970 :
971 : /* Look up FreeBSD collation version. */
972 : loc = newlocale(LC_COLLATE_MASK, collcollate, NULL);
973 : if (loc)
974 : {
975 : collversion =
976 : pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
977 : freelocale(loc);
978 : }
979 : else
980 : ereport(ERROR,
981 : (errmsg("could not load locale \"%s\"", collcollate)));
982 : #elif defined(WIN32)
983 : /*
984 : * If we are targeting Windows Vista and above, we can ask for a name
985 : * given a collation name (earlier versions required a location code
986 : * that we don't have).
987 : */
988 : NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
989 : WCHAR wide_collcollate[LOCALE_NAME_MAX_LENGTH];
990 :
991 : MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
992 : LOCALE_NAME_MAX_LENGTH);
993 : if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
994 : {
995 : /*
996 : * GetNLSVersionEx() wants a language tag such as "en-US", not a
997 : * locale name like "English_United States.1252". Until those
998 : * values can be prevented from entering the system, or 100%
999 : * reliably converted to the more useful tag format, tolerate the
1000 : * resulting error and report that we have no version data.
1001 : */
1002 : if (GetLastError() == ERROR_INVALID_PARAMETER)
1003 : return NULL;
1004 :
1005 : ereport(ERROR,
1006 : (errmsg("could not get collation version for locale \"%s\": error code %lu",
1007 : collcollate,
1008 : GetLastError())));
1009 : }
1010 : collversion = psprintf("%lu.%lu,%lu.%lu",
1011 : (version.dwNLSVersion >> 8) & 0xFFFF,
1012 : version.dwNLSVersion & 0xFF,
1013 : (version.dwDefinedVersion >> 8) & 0xFFFF,
1014 : version.dwDefinedVersion & 0xFF);
1015 : #endif
1016 : }
1017 :
1018 31046 : return collversion;
1019 : }
1020 :
1021 : /*
1022 : * strncoll_libc_win32_utf8
1023 : *
1024 : * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
1025 : * invoke wcscoll_l().
1026 : *
1027 : * An input string length of -1 means that it's NUL-terminated.
1028 : */
1029 : #ifdef WIN32
1030 : static int
1031 : strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
1032 : ssize_t len2, pg_locale_t locale)
1033 : {
1034 : char sbuf[TEXTBUFLEN];
1035 : char *buf = sbuf;
1036 : char *a1p,
1037 : *a2p;
1038 : int a1len;
1039 : int a2len;
1040 : int r;
1041 : int result;
1042 :
1043 : Assert(GetDatabaseEncoding() == PG_UTF8);
1044 :
1045 : if (len1 == -1)
1046 : len1 = strlen(arg1);
1047 : if (len2 == -1)
1048 : len2 = strlen(arg2);
1049 :
1050 : a1len = len1 * 2 + 2;
1051 : a2len = len2 * 2 + 2;
1052 :
1053 : if (a1len + a2len > TEXTBUFLEN)
1054 : buf = palloc(a1len + a2len);
1055 :
1056 : a1p = buf;
1057 : a2p = buf + a1len;
1058 :
1059 : /* API does not work for zero-length input */
1060 : if (len1 == 0)
1061 : r = 0;
1062 : else
1063 : {
1064 : r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1065 : (LPWSTR) a1p, a1len / 2);
1066 : if (!r)
1067 : ereport(ERROR,
1068 : (errmsg("could not convert string to UTF-16: error code %lu",
1069 : GetLastError())));
1070 : }
1071 : ((LPWSTR) a1p)[r] = 0;
1072 :
1073 : if (len2 == 0)
1074 : r = 0;
1075 : else
1076 : {
1077 : r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1078 : (LPWSTR) a2p, a2len / 2);
1079 : if (!r)
1080 : ereport(ERROR,
1081 : (errmsg("could not convert string to UTF-16: error code %lu",
1082 : GetLastError())));
1083 : }
1084 : ((LPWSTR) a2p)[r] = 0;
1085 :
1086 : errno = 0;
1087 : result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->lt);
1088 : if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */
1089 : ereport(ERROR,
1090 : (errmsg("could not compare Unicode strings: %m")));
1091 :
1092 : if (buf != sbuf)
1093 : pfree(buf);
1094 :
1095 : return result;
1096 : }
1097 : #endif /* WIN32 */
1098 :
1099 : /* simple subroutine for reporting errors from newlocale() */
1100 : void
1101 0 : report_newlocale_failure(const char *localename)
1102 : {
1103 : int save_errno;
1104 :
1105 : /*
1106 : * Windows doesn't provide any useful error indication from
1107 : * _create_locale(), and BSD-derived platforms don't seem to feel they
1108 : * need to set errno either (even though POSIX is pretty clear that
1109 : * newlocale should do so). So, if errno hasn't been set, assume ENOENT
1110 : * is what to report.
1111 : */
1112 0 : if (errno == 0)
1113 0 : errno = ENOENT;
1114 :
1115 : /*
1116 : * ENOENT means "no such locale", not "no such file", so clarify that
1117 : * errno with an errdetail message.
1118 : */
1119 0 : save_errno = errno; /* auxiliary funcs might change errno */
1120 0 : ereport(ERROR,
1121 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1122 : errmsg("could not create locale \"%s\": %m",
1123 : localename),
1124 : (save_errno == ENOENT ?
1125 : errdetail("The operating system could not find any locale data for the locale name \"%s\".",
1126 : localename) : 0)));
1127 : }
1128 :
1129 : /*
1130 : * POSIX doesn't define _l-variants of these functions, but several systems
1131 : * have them. We provide our own replacements here.
1132 : */
1133 : #ifndef HAVE_MBSTOWCS_L
1134 : static size_t
1135 1145010 : mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
1136 : {
1137 : #ifdef WIN32
1138 : return _mbstowcs_l(dest, src, n, loc);
1139 : #else
1140 : size_t result;
1141 1145010 : locale_t save_locale = uselocale(loc);
1142 :
1143 1145010 : result = mbstowcs(dest, src, n);
1144 1145010 : uselocale(save_locale);
1145 1145010 : return result;
1146 : #endif
1147 : }
1148 : #endif
1149 : #ifndef HAVE_WCSTOMBS_L
1150 : static size_t
1151 1145010 : wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
1152 : {
1153 : #ifdef WIN32
1154 : return _wcstombs_l(dest, src, n, loc);
1155 : #else
1156 : size_t result;
1157 1145010 : locale_t save_locale = uselocale(loc);
1158 :
1159 1145010 : result = wcstombs(dest, src, n);
1160 1145010 : uselocale(save_locale);
1161 1145010 : return result;
1162 : #endif
1163 : }
1164 : #endif
1165 :
1166 : /*
1167 : * These functions convert from/to libc's wchar_t, *not* pg_wchar.
1168 : * Therefore we keep them here rather than with the mbutils code.
1169 : */
1170 :
1171 : /*
1172 : * wchar2char --- convert wide characters to multibyte format
1173 : *
1174 : * This has the same API as the standard wcstombs_l() function; in particular,
1175 : * tolen is the maximum number of bytes to store at *to, and *from must be
1176 : * zero-terminated. The output will be zero-terminated iff there is room.
1177 : */
1178 : size_t
1179 1145010 : wchar2char(char *to, const wchar_t *from, size_t tolen, locale_t loc)
1180 : {
1181 : size_t result;
1182 :
1183 1145010 : if (tolen == 0)
1184 0 : return 0;
1185 :
1186 : #ifdef WIN32
1187 :
1188 : /*
1189 : * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
1190 : * for some reason mbstowcs and wcstombs won't do this for us, so we use
1191 : * MultiByteToWideChar().
1192 : */
1193 : if (GetDatabaseEncoding() == PG_UTF8)
1194 : {
1195 : result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
1196 : NULL, NULL);
1197 : /* A zero return is failure */
1198 : if (result <= 0)
1199 : result = -1;
1200 : else
1201 : {
1202 : Assert(result <= tolen);
1203 : /* Microsoft counts the zero terminator in the result */
1204 : result--;
1205 : }
1206 : }
1207 : else
1208 : #endif /* WIN32 */
1209 1145010 : if (loc == (locale_t) 0)
1210 : {
1211 : /* Use wcstombs directly for the default locale */
1212 0 : result = wcstombs(to, from, tolen);
1213 : }
1214 : else
1215 : {
1216 : /* Use wcstombs_l for nondefault locales */
1217 1145010 : result = wcstombs_l(to, from, tolen, loc);
1218 : }
1219 :
1220 1145010 : return result;
1221 : }
1222 :
1223 : /*
1224 : * char2wchar --- convert multibyte characters to wide characters
1225 : *
1226 : * This has almost the API of mbstowcs_l(), except that *from need not be
1227 : * null-terminated; instead, the number of input bytes is specified as
1228 : * fromlen. Also, we ereport() rather than returning -1 for invalid
1229 : * input encoding. tolen is the maximum number of wchar_t's to store at *to.
1230 : * The output will be zero-terminated iff there is room.
1231 : */
1232 : static size_t
1233 1145010 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
1234 : locale_t loc)
1235 : {
1236 : size_t result;
1237 :
1238 1145010 : if (tolen == 0)
1239 0 : return 0;
1240 :
1241 : #ifdef WIN32
1242 : /* See WIN32 "Unicode" comment above */
1243 : if (GetDatabaseEncoding() == PG_UTF8)
1244 : {
1245 : /* Win32 API does not work for zero-length input */
1246 : if (fromlen == 0)
1247 : result = 0;
1248 : else
1249 : {
1250 : result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
1251 : /* A zero return is failure */
1252 : if (result == 0)
1253 : result = -1;
1254 : }
1255 :
1256 : if (result != -1)
1257 : {
1258 : Assert(result < tolen);
1259 : /* Append trailing null wchar (MultiByteToWideChar() does not) */
1260 : to[result] = 0;
1261 : }
1262 : }
1263 : else
1264 : #endif /* WIN32 */
1265 : {
1266 : /* mbstowcs requires ending '\0' */
1267 1145010 : char *str = pnstrdup(from, fromlen);
1268 :
1269 1145010 : if (loc == (locale_t) 0)
1270 : {
1271 : /* Use mbstowcs directly for the default locale */
1272 0 : result = mbstowcs(to, str, tolen);
1273 : }
1274 : else
1275 : {
1276 : /* Use mbstowcs_l for nondefault locales */
1277 1145010 : result = mbstowcs_l(to, str, tolen, loc);
1278 : }
1279 :
1280 1145010 : pfree(str);
1281 : }
1282 :
1283 1145010 : if (result == -1)
1284 : {
1285 : /*
1286 : * Invalid multibyte character encountered. We try to give a useful
1287 : * error message by letting pg_verifymbstr check the string. But it's
1288 : * possible that the string is OK to us, and not OK to mbstowcs ---
1289 : * this suggests that the LC_CTYPE locale is different from the
1290 : * database encoding. Give a generic error message if pg_verifymbstr
1291 : * can't find anything wrong.
1292 : */
1293 0 : pg_verifymbstr(from, fromlen, false); /* might not return */
1294 : /* but if it does ... */
1295 0 : ereport(ERROR,
1296 : (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1297 : errmsg("invalid multibyte character for locale"),
1298 : errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
1299 : }
1300 :
1301 1145010 : return result;
1302 : }
|