LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_libc.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 58.5 % 313 183
Test Date: 2026-05-20 05:16:37 Functions: 57.8 % 45 26
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-----------------------------------------------------------------------
       2              :  *
       3              :  * PostgreSQL locale utilities for libc
       4              :  *
       5              :  * Portions Copyright (c) 2002-2026, PostgreSQL Global Development Group
       6              :  *
       7              :  * src/backend/utils/adt/pg_locale_libc.c
       8              :  *
       9              :  *-----------------------------------------------------------------------
      10              :  */
      11              : 
      12              : #include "postgres.h"
      13              : 
      14              : #include <limits.h>
      15              : #include <wctype.h>
      16              : 
      17              : #include "access/htup_details.h"
      18              : #include "catalog/pg_database.h"
      19              : #include "catalog/pg_collation.h"
      20              : #include "mb/pg_wchar.h"
      21              : #include "miscadmin.h"
      22              : #include "utils/builtins.h"
      23              : #include "utils/formatting.h"
      24              : #include "utils/memutils.h"
      25              : #include "utils/pg_locale.h"
      26              : #include "utils/syscache.h"
      27              : 
      28              : #ifdef __GLIBC__
      29              : #include <gnu/libc-version.h>
      30              : #endif
      31              : 
      32              : #ifdef WIN32
      33              : #include <shlwapi.h>
      34              : #endif
      35              : 
      36              : /*
      37              :  * For the libc provider, to provide as much functionality as possible on a
      38              :  * variety of platforms without going so far as to implement everything from
      39              :  * scratch, we use several implementation strategies depending on the
      40              :  * situation:
      41              :  *
      42              :  * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
      43              :  * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
      44              :  * collations don't give a fig about multibyte characters.
      45              :  *
      46              :  * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
      47              :  * This assumes that every platform uses Unicode codepoints directly
      48              :  * as the wchar_t representation of Unicode.  On some platforms
      49              :  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
      50              :  *
      51              :  * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
      52              :  * values up to 255, and punt for values above that.  This is 100% correct
      53              :  * only in single-byte encodings such as LATINn.  However, non-Unicode
      54              :  * multibyte encodings are mostly Far Eastern character sets for which the
      55              :  * properties being tested here aren't very relevant for higher code values
      56              :  * anyway.  The difficulty with using the <wctype.h> functions with
      57              :  * non-Unicode multibyte encodings is that we can have no certainty that
      58              :  * the platform's wchar_t representation matches what we do in pg_wchar
      59              :  * conversions.
      60              :  *
      61              :  * As a special case, in the "default" collation, (2) and (3) force ASCII
      62              :  * letters to follow ASCII upcase/downcase rules, while in a non-default
      63              :  * collation we just let the library functions do what they will.  The case
      64              :  * where this matters is treatment of I/i in Turkish, and the behavior is
      65              :  * meant to match the upper()/lower() SQL functions.
      66              :  *
      67              :  * We store the active collation setting in static variables.  In principle
      68              :  * it could be passed down to here via the regex library's "struct vars" data
      69              :  * structure; but that would require somewhat invasive changes in the regex
      70              :  * library, and right now there's no real benefit to be gained from that.
      71              :  *
      72              :  * NB: the coding here assumes pg_wchar is an unsigned type.
      73              :  */
      74              : 
      75              : /*
      76              :  * Size of stack buffer to use for string transformations, used to avoid heap
      77              :  * allocations in typical cases. This should be large enough that most strings
      78              :  * will fit, but small enough that we feel comfortable putting it on the
      79              :  * stack.
      80              :  */
      81              : #define     TEXTBUFLEN          1024
      82              : 
      83              : extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
      84              : 
      85              : static int  strncoll_libc(const char *arg1, size_t len1,
      86              :                           const char *arg2, size_t len2,
      87              :                           pg_locale_t locale);
      88              : static int  strcoll_libc(const char *arg1, const char *arg2,
      89              :                          pg_locale_t locale);
      90              : static size_t strnxfrm_libc(char *dest, size_t destsize,
      91              :                             const char *src, size_t srclen,
      92              :                             pg_locale_t locale);
      93              : static size_t strxfrm_libc(char *dest, size_t destsize,
      94              :                            const char *src, pg_locale_t locale);
      95              : extern char *get_collation_actual_version_libc(const char *collcollate);
      96              : static locale_t make_libc_collator(const char *collate,
      97              :                                    const char *ctype);
      98              : 
      99              : #ifdef WIN32
     100              : static int  strncoll_libc_win32_utf8(const char *arg1, size_t len1,
     101              :                                      const char *arg2, size_t len2,
     102              :                                      pg_locale_t locale);
     103              : static int  strcoll_libc_win32_utf8(const char *arg1, const char *arg2,
     104              :                                     pg_locale_t locale);
     105              : #endif
     106              : 
     107              : static size_t char2wchar(wchar_t *to, size_t tolen, const char *from,
     108              :                          size_t fromlen, locale_t loc);
     109              : 
     110              : static size_t strlower_libc_sb(char *dest, size_t destsize,
     111              :                                const char *src, size_t srclen,
     112              :                                pg_locale_t locale);
     113              : static size_t strlower_libc_mb(char *dest, size_t destsize,
     114              :                                const char *src, size_t srclen,
     115              :                                pg_locale_t locale);
     116              : static size_t strtitle_libc_sb(char *dest, size_t destsize,
     117              :                                const char *src, size_t srclen,
     118              :                                pg_locale_t locale);
     119              : static size_t strtitle_libc_mb(char *dest, size_t destsize,
     120              :                                const char *src, size_t srclen,
     121              :                                pg_locale_t locale);
     122              : static size_t strupper_libc_sb(char *dest, size_t destsize,
     123              :                                const char *src, size_t srclen,
     124              :                                pg_locale_t locale);
     125              : static size_t strupper_libc_mb(char *dest, size_t destsize,
     126              :                                const char *src, size_t srclen,
     127              :                                pg_locale_t locale);
     128              : 
     129              : static bool
     130            0 : wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
     131              : {
     132            0 :     return isdigit_l((unsigned char) wc, locale->lt);
     133              : }
     134              : 
     135              : static bool
     136            0 : wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
     137              : {
     138            0 :     return isalpha_l((unsigned char) wc, locale->lt);
     139              : }
     140              : 
     141              : static bool
     142            0 : wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
     143              : {
     144            0 :     return isalnum_l((unsigned char) wc, locale->lt);
     145              : }
     146              : 
     147              : static bool
     148            0 : wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
     149              : {
     150            0 :     return isupper_l((unsigned char) wc, locale->lt);
     151              : }
     152              : 
     153              : static bool
     154            0 : wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
     155              : {
     156            0 :     return islower_l((unsigned char) wc, locale->lt);
     157              : }
     158              : 
     159              : static bool
     160            0 : wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
     161              : {
     162            0 :     return isgraph_l((unsigned char) wc, locale->lt);
     163              : }
     164              : 
     165              : static bool
     166            0 : wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
     167              : {
     168            0 :     return isprint_l((unsigned char) wc, locale->lt);
     169              : }
     170              : 
     171              : static bool
     172            0 : wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
     173              : {
     174            0 :     return ispunct_l((unsigned char) wc, locale->lt);
     175              : }
     176              : 
     177              : static bool
     178            0 : wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
     179              : {
     180            0 :     return isspace_l((unsigned char) wc, locale->lt);
     181              : }
     182              : 
     183              : static bool
     184            0 : wc_isxdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
     185              : {
     186              : #ifndef WIN32
     187            0 :     return isxdigit_l((unsigned char) wc, locale->lt);
     188              : #else
     189              :     return _isxdigit_l((unsigned char) wc, locale->lt);
     190              : #endif
     191              : }
     192              : 
     193              : static bool
     194            0 : wc_iscased_libc_sb(pg_wchar wc, pg_locale_t locale)
     195              : {
     196            0 :     return isupper_l((unsigned char) wc, locale->lt) ||
     197            0 :         islower_l((unsigned char) wc, locale->lt);
     198              : }
     199              : 
     200              : static bool
     201       100063 : wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
     202              : {
     203       100063 :     return iswdigit_l((wint_t) wc, locale->lt);
     204              : }
     205              : 
     206              : static bool
     207        73895 : wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
     208              : {
     209        73895 :     return iswalpha_l((wint_t) wc, locale->lt);
     210              : }
     211              : 
     212              : static bool
     213      1597379 : wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
     214              : {
     215      1597379 :     return iswalnum_l((wint_t) wc, locale->lt);
     216              : }
     217              : 
     218              : static bool
     219         2056 : wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
     220              : {
     221         2056 :     return iswupper_l((wint_t) wc, locale->lt);
     222              : }
     223              : 
     224              : static bool
     225         2051 : wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
     226              : {
     227         2051 :     return iswlower_l((wint_t) wc, locale->lt);
     228              : }
     229              : 
     230              : static bool
     231         2051 : wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
     232              : {
     233         2051 :     return iswgraph_l((wint_t) wc, locale->lt);
     234              : }
     235              : 
     236              : static bool
     237         2051 : wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
     238              : {
     239         2051 :     return iswprint_l((wint_t) wc, locale->lt);
     240              : }
     241              : 
     242              : static bool
     243         2051 : wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
     244              : {
     245         2051 :     return iswpunct_l((wint_t) wc, locale->lt);
     246              : }
     247              : 
     248              : static bool
     249        34486 : wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
     250              : {
     251        34486 :     return iswspace_l((wint_t) wc, locale->lt);
     252              : }
     253              : 
     254              : static bool
     255            9 : wc_isxdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
     256              : {
     257              : #ifndef WIN32
     258            9 :     return iswxdigit_l((wint_t) wc, locale->lt);
     259              : #else
     260              :     return _iswxdigit_l((wint_t) wc, locale->lt);
     261              : #endif
     262              : }
     263              : 
     264              : static bool
     265            0 : wc_iscased_libc_mb(pg_wchar wc, pg_locale_t locale)
     266              : {
     267            0 :     return iswupper_l((wint_t) wc, locale->lt) ||
     268            0 :         iswlower_l((wint_t) wc, locale->lt);
     269              : }
     270              : 
     271              : static pg_wchar
     272            0 : toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
     273              : {
     274              :     Assert(GetDatabaseEncoding() != PG_UTF8);
     275              : 
     276              :     /* force C behavior for ASCII characters, per comments above */
     277            0 :     if (locale->is_default && wc <= (pg_wchar) 127)
     278            0 :         return pg_ascii_toupper((unsigned char) wc);
     279            0 :     if (wc <= (pg_wchar) UCHAR_MAX)
     280            0 :         return toupper_l((unsigned char) wc, locale->lt);
     281              :     else
     282            0 :         return wc;
     283              : }
     284              : 
     285              : static pg_wchar
     286         4679 : toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
     287              : {
     288              :     Assert(GetDatabaseEncoding() == PG_UTF8);
     289              : 
     290              :     /* force C behavior for ASCII characters, per comments above */
     291         4679 :     if (locale->is_default && wc <= (pg_wchar) 127)
     292          581 :         return pg_ascii_toupper((unsigned char) wc);
     293              :     if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
     294         4098 :         return towupper_l((wint_t) wc, locale->lt);
     295              :     else
     296              :         return wc;
     297              : }
     298              : 
     299              : static pg_wchar
     300            0 : tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
     301              : {
     302              :     Assert(GetDatabaseEncoding() != PG_UTF8);
     303              : 
     304              :     /* force C behavior for ASCII characters, per comments above */
     305            0 :     if (locale->is_default && wc <= (pg_wchar) 127)
     306            0 :         return pg_ascii_tolower((unsigned char) wc);
     307            0 :     if (wc <= (pg_wchar) UCHAR_MAX)
     308            0 :         return tolower_l((unsigned char) wc, locale->lt);
     309              :     else
     310            0 :         return wc;
     311              : }
     312              : 
     313              : static pg_wchar
     314         4681 : tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
     315              : {
     316              :     Assert(GetDatabaseEncoding() == PG_UTF8);
     317              : 
     318              :     /* force C behavior for ASCII characters, per comments above */
     319         4681 :     if (locale->is_default && wc <= (pg_wchar) 127)
     320          583 :         return pg_ascii_tolower((unsigned char) wc);
     321              :     if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
     322         4098 :         return towlower_l((wint_t) wc, locale->lt);
     323              :     else
     324              :         return wc;
     325              : }
     326              : 
     327              : /*
     328              :  * Characters A..Z always downcase to a..z, even in the Turkish
     329              :  * locale. Characters beyond 127 use tolower().
     330              :  */
     331              : static size_t
     332        13247 : downcase_ident_libc_sb(char *dst, size_t dstsize, const char *src,
     333              :                        size_t srclen, pg_locale_t locale)
     334              : {
     335        13247 :     locale_t    loc = locale->lt;
     336              :     int         i;
     337              : 
     338       128853 :     for (i = 0; i < srclen && i < dstsize; i++)
     339              :     {
     340       115606 :         unsigned char ch = (unsigned char) src[i];
     341              : 
     342       115606 :         if (ch >= 'A' && ch <= 'Z')
     343         6728 :             ch = pg_ascii_tolower(ch);
     344       108878 :         else if (IS_HIGHBIT_SET(ch) && isupper_l(ch, loc))
     345            0 :             ch = tolower_l(ch, loc);
     346       115606 :         dst[i] = (char) ch;
     347              :     }
     348              : 
     349        13247 :     if (i < dstsize)
     350        13247 :         dst[i] = '\0';
     351              : 
     352        13247 :     return srclen;
     353              : }
     354              : 
     355              : static const struct ctype_methods ctype_methods_libc_sb = {
     356              :     .strlower = strlower_libc_sb,
     357              :     .strtitle = strtitle_libc_sb,
     358              :     .strupper = strupper_libc_sb,
     359              :     /* in libc, casefolding is the same as lowercasing */
     360              :     .strfold = strlower_libc_sb,
     361              :     .downcase_ident = downcase_ident_libc_sb,
     362              :     .wc_isdigit = wc_isdigit_libc_sb,
     363              :     .wc_isalpha = wc_isalpha_libc_sb,
     364              :     .wc_isalnum = wc_isalnum_libc_sb,
     365              :     .wc_isupper = wc_isupper_libc_sb,
     366              :     .wc_islower = wc_islower_libc_sb,
     367              :     .wc_isgraph = wc_isgraph_libc_sb,
     368              :     .wc_isprint = wc_isprint_libc_sb,
     369              :     .wc_ispunct = wc_ispunct_libc_sb,
     370              :     .wc_isspace = wc_isspace_libc_sb,
     371              :     .wc_isxdigit = wc_isxdigit_libc_sb,
     372              :     .wc_iscased = wc_iscased_libc_sb,
     373              :     .wc_toupper = toupper_libc_sb,
     374              :     .wc_tolower = tolower_libc_sb,
     375              : };
     376              : 
     377              : /*
     378              :  * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
     379              :  * single-byte semantics for pattern matching.
     380              :  */
     381              : static const struct ctype_methods ctype_methods_libc_other_mb = {
     382              :     .strlower = strlower_libc_mb,
     383              :     .strtitle = strtitle_libc_mb,
     384              :     .strupper = strupper_libc_mb,
     385              :     /* in libc, casefolding is the same as lowercasing */
     386              :     .strfold = strlower_libc_mb,
     387              :     /* uses plain ASCII semantics for historical reasons */
     388              :     .downcase_ident = NULL,
     389              :     .wc_isdigit = wc_isdigit_libc_sb,
     390              :     .wc_isalpha = wc_isalpha_libc_sb,
     391              :     .wc_isalnum = wc_isalnum_libc_sb,
     392              :     .wc_isupper = wc_isupper_libc_sb,
     393              :     .wc_islower = wc_islower_libc_sb,
     394              :     .wc_isgraph = wc_isgraph_libc_sb,
     395              :     .wc_isprint = wc_isprint_libc_sb,
     396              :     .wc_ispunct = wc_ispunct_libc_sb,
     397              :     .wc_isspace = wc_isspace_libc_sb,
     398              :     .wc_isxdigit = wc_isxdigit_libc_sb,
     399              :     .wc_iscased = wc_iscased_libc_sb,
     400              :     .wc_toupper = toupper_libc_sb,
     401              :     .wc_tolower = tolower_libc_sb,
     402              : };
     403              : 
     404              : static const struct ctype_methods ctype_methods_libc_utf8 = {
     405              :     .strlower = strlower_libc_mb,
     406              :     .strtitle = strtitle_libc_mb,
     407              :     .strupper = strupper_libc_mb,
     408              :     /* in libc, casefolding is the same as lowercasing */
     409              :     .strfold = strlower_libc_mb,
     410              :     /* uses plain ASCII semantics for historical reasons */
     411              :     .downcase_ident = NULL,
     412              :     .wc_isdigit = wc_isdigit_libc_mb,
     413              :     .wc_isalpha = wc_isalpha_libc_mb,
     414              :     .wc_isalnum = wc_isalnum_libc_mb,
     415              :     .wc_isupper = wc_isupper_libc_mb,
     416              :     .wc_islower = wc_islower_libc_mb,
     417              :     .wc_isgraph = wc_isgraph_libc_mb,
     418              :     .wc_isprint = wc_isprint_libc_mb,
     419              :     .wc_ispunct = wc_ispunct_libc_mb,
     420              :     .wc_isspace = wc_isspace_libc_mb,
     421              :     .wc_isxdigit = wc_isxdigit_libc_mb,
     422              :     .wc_iscased = wc_iscased_libc_mb,
     423              :     .wc_toupper = toupper_libc_mb,
     424              :     .wc_tolower = tolower_libc_mb,
     425              : };
     426              : 
     427              : static const struct collate_methods collate_methods_libc = {
     428              :     .strncoll = strncoll_libc,
     429              :     .strcoll = strcoll_libc,
     430              :     .strnxfrm = strnxfrm_libc,
     431              :     .strxfrm = strxfrm_libc,
     432              :     .strnxfrm_prefix = NULL,
     433              :     .strxfrm_prefix = NULL,
     434              : 
     435              :     /*
     436              :      * Unfortunately, it seems that strxfrm() for non-C collations is broken
     437              :      * on many common platforms; testing of multiple versions of glibc reveals
     438              :      * that, for many locales, strcoll() and strxfrm() do not return
     439              :      * consistent results. While no other libc other than Cygwin has so far
     440              :      * been shown to have a problem, we take the conservative course of action
     441              :      * for right now and disable this categorically.  (Users who are certain
     442              :      * this isn't a problem on their system can define TRUST_STRXFRM.)
     443              :      */
     444              : #ifdef TRUST_STRXFRM
     445              :     .strxfrm_is_safe = true,
     446              : #else
     447              :     .strxfrm_is_safe = false,
     448              : #endif
     449              : };
     450              : 
     451              : #ifdef WIN32
     452              : static const struct collate_methods collate_methods_libc_win32_utf8 = {
     453              :     .strncoll = strncoll_libc_win32_utf8,
     454              :     .strcoll = strcoll_libc_win32_utf8,
     455              :     .strnxfrm = strnxfrm_libc,
     456              :     .strxfrm = strxfrm_libc,
     457              :     .strnxfrm_prefix = NULL,
     458              : #ifdef TRUST_STRXFRM
     459              :     .strxfrm_is_safe = true,
     460              : #else
     461              :     .strxfrm_is_safe = false,
     462              : #endif
     463              : };
     464              : #endif
     465              : 
     466              : static size_t
     467            0 : strlower_libc_sb(char *dest, size_t destsize, const char *src, size_t srclen,
     468              :                  pg_locale_t locale)
     469              : {
     470            0 :     if (srclen + 1 <= destsize)
     471              :     {
     472            0 :         locale_t    loc = locale->lt;
     473              :         char       *p;
     474              : 
     475            0 :         memcpy(dest, src, srclen);
     476            0 :         dest[srclen] = '\0';
     477              : 
     478              :         /*
     479              :          * Note: we assume that tolower_l() will not be so broken as to need
     480              :          * an isupper_l() guard test.  When using the default collation, we
     481              :          * apply the traditional Postgres behavior that forces ASCII-style
     482              :          * treatment of I/i, but in non-default collations you get exactly
     483              :          * what the collation says.
     484              :          */
     485            0 :         for (p = dest; *p; p++)
     486              :         {
     487            0 :             if (locale->is_default)
     488              :             {
     489            0 :                 if (*p >= 'A' && *p <= 'Z')
     490            0 :                     *p += 'a' - 'A';
     491            0 :                 else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
     492            0 :                     *p = tolower_l((unsigned char) *p, loc);
     493              :             }
     494              :             else
     495            0 :                 *p = tolower_l((unsigned char) *p, loc);
     496              :         }
     497              :     }
     498              : 
     499            0 :     return srclen;
     500              : }
     501              : 
     502              : static size_t
     503       521267 : strlower_libc_mb(char *dest, size_t destsize, const char *src, size_t srclen,
     504              :                  pg_locale_t locale)
     505              : {
     506       521267 :     locale_t    loc = locale->lt;
     507              :     size_t      result_size;
     508              :     wchar_t    *workspace;
     509              :     char       *result;
     510              :     size_t      curr_char;
     511              :     size_t      max_size;
     512              : 
     513              :     /* Overflow paranoia */
     514       521267 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     515            0 :         ereport(ERROR,
     516              :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     517              :                  errmsg("out of memory")));
     518              : 
     519              :     /* Output workspace cannot have more codes than input bytes */
     520       521267 :     workspace = palloc_array(wchar_t, srclen + 1);
     521              : 
     522       521267 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     523              : 
     524      2497120 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     525      1975853 :         workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     526              : 
     527              :     /*
     528              :      * Make result large enough; case change might change number of bytes
     529              :      */
     530       521267 :     max_size = curr_char * pg_database_encoding_max_length();
     531       521267 :     result = palloc(max_size + 1);
     532              : 
     533       521267 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     534              : 
     535       521267 :     if (destsize >= result_size + 1)
     536              :     {
     537       521267 :         memcpy(dest, result, result_size);
     538       521267 :         dest[result_size] = '\0';
     539              :     }
     540              : 
     541       521267 :     pfree(workspace);
     542       521267 :     pfree(result);
     543              : 
     544       521267 :     return result_size;
     545              : }
     546              : 
     547              : static size_t
     548            0 : strtitle_libc_sb(char *dest, size_t destsize, const char *src, size_t srclen,
     549              :                  pg_locale_t locale)
     550              : {
     551            0 :     if (srclen + 1 <= destsize)
     552              :     {
     553            0 :         locale_t    loc = locale->lt;
     554            0 :         int         wasalnum = false;
     555              :         char       *p;
     556              : 
     557            0 :         memcpy(dest, src, srclen);
     558            0 :         dest[srclen] = '\0';
     559              : 
     560              :         /*
     561              :          * Note: we assume that toupper_l()/tolower_l() will not be so broken
     562              :          * as to need guard tests.  When using the default collation, we apply
     563              :          * the traditional Postgres behavior that forces ASCII-style treatment
     564              :          * of I/i, but in non-default collations you get exactly what the
     565              :          * collation says.
     566              :          */
     567            0 :         for (p = dest; *p; p++)
     568              :         {
     569            0 :             if (locale->is_default)
     570              :             {
     571            0 :                 if (wasalnum)
     572              :                 {
     573            0 :                     if (*p >= 'A' && *p <= 'Z')
     574            0 :                         *p += 'a' - 'A';
     575            0 :                     else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
     576            0 :                         *p = tolower_l((unsigned char) *p, loc);
     577              :                 }
     578              :                 else
     579              :                 {
     580            0 :                     if (*p >= 'a' && *p <= 'z')
     581            0 :                         *p -= 'a' - 'A';
     582            0 :                     else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
     583            0 :                         *p = toupper_l((unsigned char) *p, loc);
     584              :                 }
     585              :             }
     586              :             else
     587              :             {
     588            0 :                 if (wasalnum)
     589            0 :                     *p = tolower_l((unsigned char) *p, loc);
     590              :                 else
     591            0 :                     *p = toupper_l((unsigned char) *p, loc);
     592              :             }
     593            0 :             wasalnum = isalnum_l((unsigned char) *p, loc);
     594              :         }
     595              :     }
     596              : 
     597            0 :     return srclen;
     598              : }
     599              : 
     600              : static size_t
     601           18 : strtitle_libc_mb(char *dest, size_t destsize, const char *src, size_t srclen,
     602              :                  pg_locale_t locale)
     603              : {
     604           18 :     locale_t    loc = locale->lt;
     605           18 :     int         wasalnum = false;
     606              :     size_t      result_size;
     607              :     wchar_t    *workspace;
     608              :     char       *result;
     609              :     size_t      curr_char;
     610              :     size_t      max_size;
     611              : 
     612              :     /* Overflow paranoia */
     613           18 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     614            0 :         ereport(ERROR,
     615              :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     616              :                  errmsg("out of memory")));
     617              : 
     618              :     /* Output workspace cannot have more codes than input bytes */
     619           18 :     workspace = palloc_array(wchar_t, srclen + 1);
     620              : 
     621           18 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     622              : 
     623          165 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     624              :     {
     625          147 :         if (wasalnum)
     626          111 :             workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     627              :         else
     628           36 :             workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     629          147 :         wasalnum = iswalnum_l(workspace[curr_char], loc);
     630              :     }
     631              : 
     632              :     /*
     633              :      * Make result large enough; case change might change number of bytes
     634              :      */
     635           18 :     max_size = curr_char * pg_database_encoding_max_length();
     636           18 :     result = palloc(max_size + 1);
     637              : 
     638           18 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     639              : 
     640           18 :     if (destsize >= result_size + 1)
     641              :     {
     642           18 :         memcpy(dest, result, result_size);
     643           18 :         dest[result_size] = '\0';
     644              :     }
     645              : 
     646           18 :     pfree(workspace);
     647           18 :     pfree(result);
     648              : 
     649           18 :     return result_size;
     650              : }
     651              : 
     652              : static size_t
     653            0 : strupper_libc_sb(char *dest, size_t destsize, const char *src, size_t srclen,
     654              :                  pg_locale_t locale)
     655              : {
     656            0 :     if (srclen + 1 <= destsize)
     657              :     {
     658            0 :         locale_t    loc = locale->lt;
     659              :         char       *p;
     660              : 
     661            0 :         memcpy(dest, src, srclen);
     662            0 :         dest[srclen] = '\0';
     663              : 
     664              :         /*
     665              :          * Note: we assume that toupper_l() will not be so broken as to need
     666              :          * an islower_l() guard test.  When using the default collation, we
     667              :          * apply the traditional Postgres behavior that forces ASCII-style
     668              :          * treatment of I/i, but in non-default collations you get exactly
     669              :          * what the collation says.
     670              :          */
     671            0 :         for (p = dest; *p; p++)
     672              :         {
     673            0 :             if (locale->is_default)
     674              :             {
     675            0 :                 if (*p >= 'a' && *p <= 'z')
     676            0 :                     *p -= 'a' - 'A';
     677            0 :                 else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
     678            0 :                     *p = toupper_l((unsigned char) *p, loc);
     679              :             }
     680              :             else
     681            0 :                 *p = toupper_l((unsigned char) *p, loc);
     682              :         }
     683              :     }
     684              : 
     685            0 :     return srclen;
     686              : }
     687              : 
     688              : static size_t
     689       521850 : strupper_libc_mb(char *dest, size_t destsize, const char *src, size_t srclen,
     690              :                  pg_locale_t locale)
     691              : {
     692       521850 :     locale_t    loc = locale->lt;
     693              :     size_t      result_size;
     694              :     wchar_t    *workspace;
     695              :     char       *result;
     696              :     size_t      curr_char;
     697              :     size_t      max_size;
     698              : 
     699              :     /* Overflow paranoia */
     700       521850 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     701            0 :         ereport(ERROR,
     702              :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     703              :                  errmsg("out of memory")));
     704              : 
     705              :     /* Output workspace cannot have more codes than input bytes */
     706       521850 :     workspace = palloc_array(wchar_t, srclen + 1);
     707              : 
     708       521850 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     709              : 
     710      1652339 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     711      1130489 :         workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     712              : 
     713              :     /*
     714              :      * Make result large enough; case change might change number of bytes
     715              :      */
     716       521850 :     max_size = curr_char * pg_database_encoding_max_length();
     717       521850 :     result = palloc(max_size + 1);
     718              : 
     719       521850 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     720              : 
     721       521850 :     if (destsize >= result_size + 1)
     722              :     {
     723       521850 :         memcpy(dest, result, result_size);
     724       521850 :         dest[result_size] = '\0';
     725              :     }
     726              : 
     727       521850 :     pfree(workspace);
     728       521850 :     pfree(result);
     729              : 
     730       521850 :     return result_size;
     731              : }
     732              : 
     733              : pg_locale_t
     734        17640 : create_pg_locale_libc(Oid collid, MemoryContext context)
     735              : {
     736              :     const char *collate;
     737              :     const char *ctype;
     738              :     locale_t    loc;
     739              :     pg_locale_t result;
     740              : 
     741        17640 :     if (collid == DEFAULT_COLLATION_OID)
     742              :     {
     743              :         HeapTuple   tp;
     744              :         Datum       datum;
     745              : 
     746        17583 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     747        17583 :         if (!HeapTupleIsValid(tp))
     748            0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     749        17583 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     750              :                                        Anum_pg_database_datcollate);
     751        17583 :         collate = TextDatumGetCString(datum);
     752        17583 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     753              :                                        Anum_pg_database_datctype);
     754        17583 :         ctype = TextDatumGetCString(datum);
     755              : 
     756        17583 :         ReleaseSysCache(tp);
     757              :     }
     758              :     else
     759              :     {
     760              :         HeapTuple   tp;
     761              :         Datum       datum;
     762              : 
     763           57 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     764           57 :         if (!HeapTupleIsValid(tp))
     765            0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     766              : 
     767           57 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     768              :                                        Anum_pg_collation_collcollate);
     769           57 :         collate = TextDatumGetCString(datum);
     770           57 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     771              :                                        Anum_pg_collation_collctype);
     772           57 :         ctype = TextDatumGetCString(datum);
     773              : 
     774           57 :         ReleaseSysCache(tp);
     775              :     }
     776              : 
     777              : 
     778        17640 :     loc = make_libc_collator(collate, ctype);
     779              : 
     780        17640 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     781        17640 :     result->deterministic = true;
     782        34714 :     result->collate_is_c = (strcmp(collate, "C") == 0) ||
     783        17074 :         (strcmp(collate, "POSIX") == 0);
     784        34714 :     result->ctype_is_c = (strcmp(ctype, "C") == 0) ||
     785        17074 :         (strcmp(ctype, "POSIX") == 0);
     786        17640 :     result->lt = loc;
     787        17640 :     if (!result->collate_is_c)
     788              :     {
     789              : #ifdef WIN32
     790              :         if (GetDatabaseEncoding() == PG_UTF8)
     791              :             result->collate = &collate_methods_libc_win32_utf8;
     792              :         else
     793              : #endif
     794        17034 :             result->collate = &collate_methods_libc;
     795              :     }
     796        17640 :     if (!result->ctype_is_c)
     797              :     {
     798        17034 :         if (GetDatabaseEncoding() == PG_UTF8)
     799        17002 :             result->ctype = &ctype_methods_libc_utf8;
     800           32 :         else if (pg_database_encoding_max_length() > 1)
     801            0 :             result->ctype = &ctype_methods_libc_other_mb;
     802              :         else
     803           32 :             result->ctype = &ctype_methods_libc_sb;
     804              :     }
     805              : 
     806        17640 :     return result;
     807              : }
     808              : 
     809              : /*
     810              :  * Create a locale_t with the given collation and ctype.
     811              :  *
     812              :  * The "C" and "POSIX" locales are not actually handled by libc, so return
     813              :  * NULL.
     814              :  *
     815              :  * Ensure that no path leaks a locale_t.
     816              :  */
     817              : static locale_t
     818        17640 : make_libc_collator(const char *collate, const char *ctype)
     819              : {
     820        17640 :     locale_t    loc = 0;
     821              : 
     822        17640 :     if (strcmp(collate, ctype) == 0)
     823              :     {
     824        17640 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     825              :         {
     826              :             /* Normal case where they're the same */
     827        17034 :             errno = 0;
     828              : #ifndef WIN32
     829        17034 :             loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
     830              :                             NULL);
     831              : #else
     832              :             loc = _create_locale(LC_ALL, collate);
     833              : #endif
     834        17034 :             if (!loc)
     835            0 :                 report_newlocale_failure(collate);
     836              :         }
     837              :     }
     838              :     else
     839              :     {
     840              : #ifndef WIN32
     841              :         /* We need two newlocale() steps */
     842            0 :         locale_t    loc1 = 0;
     843              : 
     844            0 :         if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
     845              :         {
     846            0 :             errno = 0;
     847            0 :             loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
     848            0 :             if (!loc1)
     849            0 :                 report_newlocale_failure(collate);
     850              :         }
     851              : 
     852            0 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     853              :         {
     854            0 :             errno = 0;
     855            0 :             loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
     856            0 :             if (!loc)
     857              :             {
     858            0 :                 if (loc1)
     859            0 :                     freelocale(loc1);
     860            0 :                 report_newlocale_failure(ctype);
     861              :             }
     862              :         }
     863              :         else
     864            0 :             loc = loc1;
     865              : #else
     866              : 
     867              :         /*
     868              :          * XXX The _create_locale() API doesn't appear to support this. Could
     869              :          * perhaps be worked around by changing pg_locale_t to contain two
     870              :          * separate fields.
     871              :          */
     872              :         ereport(ERROR,
     873              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     874              :                  errmsg("collations with different collate and ctype values are not supported on this platform")));
     875              : #endif
     876              :     }
     877              : 
     878        17640 :     return loc;
     879              : }
     880              : 
     881              : /*
     882              :  * strncoll_libc
     883              :  *
     884              :  * NUL-terminate arguments and pass to strcoll_l().
     885              :  */
     886              : static int
     887      2773313 : strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2,
     888              :               pg_locale_t locale)
     889              : {
     890              :     char        sbuf[TEXTBUFLEN];
     891      2773313 :     char       *buf = sbuf;
     892      2773313 :     size_t      bufsize1 = len1 + 1;
     893      2773313 :     size_t      bufsize2 = len2 + 1;
     894              :     char       *buf1;
     895              :     char       *buf2;
     896              :     const char *arg1n;
     897              :     const char *arg2n;
     898              :     int         result;
     899              : 
     900      2773313 :     if (bufsize1 + bufsize2 > TEXTBUFLEN)
     901          318 :         buf = palloc(bufsize1 + bufsize2);
     902              : 
     903      2773313 :     buf1 = buf;
     904      2773313 :     buf2 = buf + bufsize1;
     905              : 
     906      2773313 :     memcpy(buf1, arg1, len1);
     907      2773313 :     buf1[len1] = '\0';
     908      2773313 :     arg1n = buf1;
     909              : 
     910      2773313 :     memcpy(buf2, arg2, len2);
     911      2773313 :     buf2[len2] = '\0';
     912      2773313 :     arg2n = buf2;
     913              : 
     914      2773313 :     result = strcoll_l(arg1n, arg2n, locale->lt);
     915              : 
     916      2773313 :     if (buf != sbuf)
     917          318 :         pfree(buf);
     918              : 
     919      2773313 :     return result;
     920              : }
     921              : 
     922              : /*
     923              :  * strcoll_libc
     924              :  */
     925              : static int
     926     14471845 : strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
     927              : {
     928     14471845 :     return strcoll_l(arg1, arg2, locale->lt);
     929              : }
     930              : 
     931              : /*
     932              :  * strnxfrm_libc
     933              :  *
     934              :  * NUL-terminate src and pass to strxfrm_l().
     935              :  */
     936              : static size_t
     937            0 : strnxfrm_libc(char *dest, size_t destsize, const char *src, size_t srclen,
     938              :               pg_locale_t locale)
     939              : {
     940              :     char        sbuf[TEXTBUFLEN];
     941            0 :     char       *buf = sbuf;
     942            0 :     size_t      bufsize = srclen + 1;
     943              :     size_t      result;
     944              : 
     945            0 :     if (bufsize > TEXTBUFLEN)
     946            0 :         buf = palloc(bufsize);
     947              : 
     948              :     /* nul-terminate argument */
     949            0 :     memcpy(buf, src, srclen);
     950            0 :     buf[srclen] = '\0';
     951              : 
     952            0 :     result = strxfrm_l(dest, buf, destsize, locale->lt);
     953              : 
     954            0 :     if (buf != sbuf)
     955            0 :         pfree(buf);
     956              : 
     957              :     /* if dest is defined, it should be nul-terminated */
     958              :     Assert(result >= destsize || dest[result] == '\0');
     959              : 
     960            0 :     return result;
     961              : }
     962              : 
     963              : /*
     964              :  * strxfrm_libc
     965              :  */
     966              : static size_t
     967          132 : strxfrm_libc(char *dest, size_t destsize, const char *src, pg_locale_t locale)
     968              : {
     969          132 :     return strxfrm_l(dest, src, destsize, locale->lt);
     970              : }
     971              : 
     972              : char *
     973        17462 : get_collation_actual_version_libc(const char *collcollate)
     974              : {
     975        17462 :     char       *collversion = NULL;
     976              : 
     977        34831 :     if (pg_strcasecmp("C", collcollate) != 0 &&
     978        34628 :         pg_strncasecmp("C.", collcollate, 2) != 0 &&
     979        17259 :         pg_strcasecmp("POSIX", collcollate) != 0)
     980              :     {
     981              : #if defined(__GLIBC__)
     982              :         /* Use the glibc version because we don't have anything better. */
     983        17245 :         collversion = pstrdup(gnu_get_libc_version());
     984              : #elif defined(LC_VERSION_MASK)
     985              :         locale_t    loc;
     986              : 
     987              :         /* Look up FreeBSD collation version. */
     988              :         loc = newlocale(LC_COLLATE_MASK, collcollate, NULL);
     989              :         if (loc)
     990              :         {
     991              :             collversion =
     992              :                 pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
     993              :             freelocale(loc);
     994              :         }
     995              :         else
     996              :             ereport(ERROR,
     997              :                     (errmsg("could not load locale \"%s\"", collcollate)));
     998              : #elif defined(WIN32)
     999              :         /*
    1000              :          * If we are targeting Windows Vista and above, we can ask for a name
    1001              :          * given a collation name (earlier versions required a location code
    1002              :          * that we don't have).
    1003              :          */
    1004              :         NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
    1005              :         WCHAR       wide_collcollate[LOCALE_NAME_MAX_LENGTH];
    1006              : 
    1007              :         MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
    1008              :                             LOCALE_NAME_MAX_LENGTH);
    1009              :         if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
    1010              :         {
    1011              :             /*
    1012              :              * GetNLSVersionEx() wants a language tag such as "en-US", not a
    1013              :              * locale name like "English_United States.1252".  Until those
    1014              :              * values can be prevented from entering the system, or 100%
    1015              :              * reliably converted to the more useful tag format, tolerate the
    1016              :              * resulting error and report that we have no version data.
    1017              :              */
    1018              :             if (GetLastError() == ERROR_INVALID_PARAMETER)
    1019              :                 return NULL;
    1020              : 
    1021              :             ereport(ERROR,
    1022              :                     (errmsg("could not get collation version for locale \"%s\": error code %lu",
    1023              :                             collcollate,
    1024              :                             GetLastError())));
    1025              :         }
    1026              :         collversion = psprintf("%lu.%lu,%lu.%lu",
    1027              :                                (version.dwNLSVersion >> 8) & 0xFFFF,
    1028              :                                version.dwNLSVersion & 0xFF,
    1029              :                                (version.dwDefinedVersion >> 8) & 0xFFFF,
    1030              :                                version.dwDefinedVersion & 0xFF);
    1031              : #endif
    1032              :     }
    1033              : 
    1034        17462 :     return collversion;
    1035              : }
    1036              : 
    1037              : /*
    1038              :  * strncoll_libc_win32_utf8
    1039              :  *
    1040              :  * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
    1041              :  * invoke wcscoll_l().
    1042              :  */
    1043              : #ifdef WIN32
    1044              : static int
    1045              : strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
    1046              :                          size_t len2, pg_locale_t locale)
    1047              : {
    1048              :     char        sbuf[TEXTBUFLEN];
    1049              :     char       *buf = sbuf;
    1050              :     char       *a1p,
    1051              :                *a2p;
    1052              :     size_t      a1len,
    1053              :                 a2len,
    1054              :                 buflen;
    1055              :     int         r;
    1056              :     int         result;
    1057              : 
    1058              :     Assert(GetDatabaseEncoding() == PG_UTF8);
    1059              : 
    1060              :     /*
    1061              :      * In a 32-bit build, twice the input length can overflow size_t, so we
    1062              :      * must be careful.
    1063              :      */
    1064              :     a1len = add_size(add_size(len1, len1), 2);
    1065              :     a2len = add_size(add_size(len2, len2), 2);
    1066              :     buflen = add_size(a1len, a2len);
    1067              : 
    1068              :     if (buflen > TEXTBUFLEN)
    1069              :         buf = palloc(buflen);
    1070              : 
    1071              :     a1p = buf;
    1072              :     a2p = buf + a1len;
    1073              : 
    1074              :     /* API does not work for zero-length input */
    1075              :     if (len1 == 0)
    1076              :         r = 0;
    1077              :     else
    1078              :     {
    1079              :         r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
    1080              :                                 (LPWSTR) a1p, a1len / 2);
    1081              :         if (!r)
    1082              :             ereport(ERROR,
    1083              :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1084              :                             GetLastError())));
    1085              :     }
    1086              :     ((LPWSTR) a1p)[r] = 0;
    1087              : 
    1088              :     if (len2 == 0)
    1089              :         r = 0;
    1090              :     else
    1091              :     {
    1092              :         r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
    1093              :                                 (LPWSTR) a2p, a2len / 2);
    1094              :         if (!r)
    1095              :             ereport(ERROR,
    1096              :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1097              :                             GetLastError())));
    1098              :     }
    1099              :     ((LPWSTR) a2p)[r] = 0;
    1100              : 
    1101              :     errno = 0;
    1102              :     result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->lt);
    1103              :     if (result == 2147483647)   /* _NLSCMPERROR; missing from mingw headers */
    1104              :         ereport(ERROR,
    1105              :                 (errmsg("could not compare Unicode strings: %m")));
    1106              : 
    1107              :     if (buf != sbuf)
    1108              :         pfree(buf);
    1109              : 
    1110              :     return result;
    1111              : }
    1112              : 
    1113              : static int
    1114              : strcoll_libc_win32_utf8(const char *arg1, const char *arg2,
    1115              :                         pg_locale_t locale)
    1116              : {
    1117              :     size_t      len1 = strlen(arg1);
    1118              :     size_t      len2 = strlen(arg2);
    1119              : 
    1120              :     return strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale);
    1121              : }
    1122              : #endif                          /* WIN32 */
    1123              : 
    1124              : /* simple subroutine for reporting errors from newlocale() */
    1125              : void
    1126            0 : report_newlocale_failure(const char *localename)
    1127              : {
    1128              :     int         save_errno;
    1129              : 
    1130              :     /*
    1131              :      * Windows doesn't provide any useful error indication from
    1132              :      * _create_locale(), and BSD-derived platforms don't seem to feel they
    1133              :      * need to set errno either (even though POSIX is pretty clear that
    1134              :      * newlocale should do so).  So, if errno hasn't been set, assume ENOENT
    1135              :      * is what to report.
    1136              :      */
    1137            0 :     if (errno == 0)
    1138            0 :         errno = ENOENT;
    1139              : 
    1140              :     /*
    1141              :      * ENOENT means "no such locale", not "no such file", so clarify that
    1142              :      * errno with an errdetail message.
    1143              :      */
    1144            0 :     save_errno = errno;         /* auxiliary funcs might change errno */
    1145            0 :     ereport(ERROR,
    1146              :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1147              :              errmsg("could not create locale \"%s\": %m",
    1148              :                     localename),
    1149              :              (save_errno == ENOENT ?
    1150              :               errdetail("The operating system could not find any locale data for the locale name \"%s\".",
    1151              :                         localename) : 0)));
    1152              : }
    1153              : 
    1154              : /*
    1155              :  * POSIX doesn't define _l-variants of these functions, but several systems
    1156              :  * have them.  We provide our own replacements here.
    1157              :  */
    1158              : #ifndef HAVE_MBSTOWCS_L
    1159              : static size_t
    1160      1043135 : mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
    1161              : {
    1162              : #ifdef WIN32
    1163              :     return _mbstowcs_l(dest, src, n, loc);
    1164              : #else
    1165              :     size_t      result;
    1166      1043135 :     locale_t    save_locale = uselocale(loc);
    1167              : 
    1168      1043135 :     result = mbstowcs(dest, src, n);
    1169      1043135 :     uselocale(save_locale);
    1170      1043135 :     return result;
    1171              : #endif
    1172              : }
    1173              : #endif
    1174              : #ifndef HAVE_WCSTOMBS_L
    1175              : static size_t
    1176      1043135 : wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
    1177              : {
    1178              : #ifdef WIN32
    1179              :     return _wcstombs_l(dest, src, n, loc);
    1180              : #else
    1181              :     size_t      result;
    1182      1043135 :     locale_t    save_locale = uselocale(loc);
    1183              : 
    1184      1043135 :     result = wcstombs(dest, src, n);
    1185      1043135 :     uselocale(save_locale);
    1186      1043135 :     return result;
    1187              : #endif
    1188              : }
    1189              : #endif
    1190              : 
    1191              : /*
    1192              :  * These functions convert from/to libc's wchar_t, *not* pg_wchar.
    1193              :  * Therefore we keep them here rather than with the mbutils code.
    1194              :  */
    1195              : 
    1196              : /*
    1197              :  * wchar2char --- convert wide characters to multibyte format
    1198              :  *
    1199              :  * This has the same API as the standard wcstombs_l() function; in particular,
    1200              :  * tolen is the maximum number of bytes to store at *to, and *from must be
    1201              :  * zero-terminated.  The output will be zero-terminated iff there is room.
    1202              :  */
    1203              : size_t
    1204      1043135 : wchar2char(char *to, const wchar_t *from, size_t tolen, locale_t loc)
    1205              : {
    1206              :     size_t      result;
    1207              : 
    1208      1043135 :     if (tolen == 0)
    1209            0 :         return 0;
    1210              : 
    1211              : #ifdef WIN32
    1212              : 
    1213              :     /*
    1214              :      * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
    1215              :      * for some reason mbstowcs and wcstombs won't do this for us, so we use
    1216              :      * MultiByteToWideChar().
    1217              :      */
    1218              :     if (GetDatabaseEncoding() == PG_UTF8)
    1219              :     {
    1220              :         result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
    1221              :                                      NULL, NULL);
    1222              :         /* A zero return is failure */
    1223              :         if (result <= 0)
    1224              :             result = -1;
    1225              :         else
    1226              :         {
    1227              :             Assert(result <= tolen);
    1228              :             /* Microsoft counts the zero terminator in the result */
    1229              :             result--;
    1230              :         }
    1231              :     }
    1232              :     else
    1233              : #endif                          /* WIN32 */
    1234      1043135 :     if (loc == (locale_t) 0)
    1235              :     {
    1236              :         /* Use wcstombs directly for the default locale */
    1237            0 :         result = wcstombs(to, from, tolen);
    1238              :     }
    1239              :     else
    1240              :     {
    1241              :         /* Use wcstombs_l for nondefault locales */
    1242      1043135 :         result = wcstombs_l(to, from, tolen, loc);
    1243              :     }
    1244              : 
    1245      1043135 :     return result;
    1246              : }
    1247              : 
    1248              : /*
    1249              :  * char2wchar --- convert multibyte characters to wide characters
    1250              :  *
    1251              :  * This has almost the API of mbstowcs_l(), except that *from need not be
    1252              :  * null-terminated; instead, the number of input bytes is specified as
    1253              :  * fromlen.  Also, we ereport() rather than returning -1 for invalid
    1254              :  * input encoding.  tolen is the maximum number of wchar_t's to store at *to.
    1255              :  * The output will be zero-terminated iff there is room.
    1256              :  */
    1257              : static size_t
    1258      1043135 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
    1259              :            locale_t loc)
    1260              : {
    1261              :     size_t      result;
    1262              : 
    1263      1043135 :     if (tolen == 0)
    1264            0 :         return 0;
    1265              : 
    1266              : #ifdef WIN32
    1267              :     /* See WIN32 "Unicode" comment above */
    1268              :     if (GetDatabaseEncoding() == PG_UTF8)
    1269              :     {
    1270              :         /* Win32 API does not work for zero-length input */
    1271              :         if (fromlen == 0)
    1272              :             result = 0;
    1273              :         else
    1274              :         {
    1275              :             result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
    1276              :             /* A zero return is failure */
    1277              :             if (result == 0)
    1278              :                 result = -1;
    1279              :         }
    1280              : 
    1281              :         if (result != -1)
    1282              :         {
    1283              :             Assert(result < tolen);
    1284              :             /* Append trailing null wchar (MultiByteToWideChar() does not) */
    1285              :             to[result] = 0;
    1286              :         }
    1287              :     }
    1288              :     else
    1289              : #endif                          /* WIN32 */
    1290              :     {
    1291              :         /* mbstowcs requires ending '\0' */
    1292      1043135 :         char       *str = pnstrdup(from, fromlen);
    1293              : 
    1294      1043135 :         if (loc == (locale_t) 0)
    1295              :         {
    1296              :             /* Use mbstowcs directly for the default locale */
    1297            0 :             result = mbstowcs(to, str, tolen);
    1298              :         }
    1299              :         else
    1300              :         {
    1301              :             /* Use mbstowcs_l for nondefault locales */
    1302      1043135 :             result = mbstowcs_l(to, str, tolen, loc);
    1303              :         }
    1304              : 
    1305      1043135 :         pfree(str);
    1306              :     }
    1307              : 
    1308      1043135 :     if (result == -1)
    1309              :     {
    1310              :         /*
    1311              :          * Invalid multibyte character encountered.  We try to give a useful
    1312              :          * error message by letting pg_verifymbstr check the string.  But it's
    1313              :          * possible that the string is OK to us, and not OK to mbstowcs ---
    1314              :          * this suggests that the LC_CTYPE locale is different from the
    1315              :          * database encoding.  Give a generic error message if pg_verifymbstr
    1316              :          * can't find anything wrong.
    1317              :          */
    1318            0 :         pg_verifymbstr(from, fromlen, false);   /* might not return */
    1319              :         /* but if it does ... */
    1320            0 :         ereport(ERROR,
    1321              :                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
    1322              :                  errmsg("invalid multibyte character for locale"),
    1323              :                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
    1324              :     }
    1325              : 
    1326      1043135 :     return result;
    1327              : }
        

Generated by: LCOV version 2.0-1