LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_libc.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 191 330 57.9 %
Date: 2025-12-23 14:18:26 Functions: 25 43 58.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-----------------------------------------------------------------------
       2             :  *
       3             :  * PostgreSQL locale utilities for libc
       4             :  *
       5             :  * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
       6             :  *
       7             :  * src/backend/utils/adt/pg_locale_libc.c
       8             :  *
       9             :  *-----------------------------------------------------------------------
      10             :  */
      11             : 
      12             : #include "postgres.h"
      13             : 
      14             : #include <limits.h>
      15             : #include <wctype.h>
      16             : 
      17             : #include "access/htup_details.h"
      18             : #include "catalog/pg_database.h"
      19             : #include "catalog/pg_collation.h"
      20             : #include "mb/pg_wchar.h"
      21             : #include "miscadmin.h"
      22             : #include "utils/builtins.h"
      23             : #include "utils/formatting.h"
      24             : #include "utils/memutils.h"
      25             : #include "utils/pg_locale.h"
      26             : #include "utils/syscache.h"
      27             : 
      28             : #ifdef __GLIBC__
      29             : #include <gnu/libc-version.h>
      30             : #endif
      31             : 
      32             : #ifdef WIN32
      33             : #include <shlwapi.h>
      34             : #endif
      35             : 
      36             : /*
      37             :  * For the libc provider, to provide as much functionality as possible on a
      38             :  * variety of platforms without going so far as to implement everything from
      39             :  * scratch, we use several implementation strategies depending on the
      40             :  * situation:
      41             :  *
      42             :  * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
      43             :  * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
      44             :  * collations don't give a fig about multibyte characters.
      45             :  *
      46             :  * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
      47             :  * This assumes that every platform uses Unicode codepoints directly
      48             :  * as the wchar_t representation of Unicode.  On some platforms
      49             :  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
      50             :  *
      51             :  * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
      52             :  * values up to 255, and punt for values above that.  This is 100% correct
      53             :  * only in single-byte encodings such as LATINn.  However, non-Unicode
      54             :  * multibyte encodings are mostly Far Eastern character sets for which the
      55             :  * properties being tested here aren't very relevant for higher code values
      56             :  * anyway.  The difficulty with using the <wctype.h> functions with
      57             :  * non-Unicode multibyte encodings is that we can have no certainty that
      58             :  * the platform's wchar_t representation matches what we do in pg_wchar
      59             :  * conversions.
      60             :  *
      61             :  * As a special case, in the "default" collation, (2) and (3) force ASCII
      62             :  * letters to follow ASCII upcase/downcase rules, while in a non-default
      63             :  * collation we just let the library functions do what they will.  The case
      64             :  * where this matters is treatment of I/i in Turkish, and the behavior is
      65             :  * meant to match the upper()/lower() SQL functions.
      66             :  *
      67             :  * We store the active collation setting in static variables.  In principle
      68             :  * it could be passed down to here via the regex library's "struct vars" data
      69             :  * structure; but that would require somewhat invasive changes in the regex
      70             :  * library, and right now there's no real benefit to be gained from that.
      71             :  *
      72             :  * NB: the coding here assumes pg_wchar is an unsigned type.
      73             :  */
      74             : 
      75             : /*
      76             :  * Size of stack buffer to use for string transformations, used to avoid heap
      77             :  * allocations in typical cases. This should be large enough that most strings
      78             :  * will fit, but small enough that we feel comfortable putting it on the
      79             :  * stack.
      80             :  */
      81             : #define     TEXTBUFLEN          1024
      82             : 
      83             : extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
      84             : 
      85             : static int  strncoll_libc(const char *arg1, ssize_t len1,
      86             :                           const char *arg2, ssize_t len2,
      87             :                           pg_locale_t locale);
      88             : static size_t strnxfrm_libc(char *dest, size_t destsize,
      89             :                             const char *src, ssize_t srclen,
      90             :                             pg_locale_t locale);
      91             : extern char *get_collation_actual_version_libc(const char *collcollate);
      92             : static locale_t make_libc_collator(const char *collate,
      93             :                                    const char *ctype);
      94             : 
      95             : #ifdef WIN32
      96             : static int  strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
      97             :                                      const char *arg2, ssize_t len2,
      98             :                                      pg_locale_t locale);
      99             : #endif
     100             : 
     101             : static size_t char2wchar(wchar_t *to, size_t tolen, const char *from,
     102             :                          size_t fromlen, locale_t loc);
     103             : 
     104             : static size_t strlower_libc_sb(char *dest, size_t destsize,
     105             :                                const char *src, ssize_t srclen,
     106             :                                pg_locale_t locale);
     107             : static size_t strlower_libc_mb(char *dest, size_t destsize,
     108             :                                const char *src, ssize_t srclen,
     109             :                                pg_locale_t locale);
     110             : static size_t strtitle_libc_sb(char *dest, size_t destsize,
     111             :                                const char *src, ssize_t srclen,
     112             :                                pg_locale_t locale);
     113             : static size_t strtitle_libc_mb(char *dest, size_t destsize,
     114             :                                const char *src, ssize_t srclen,
     115             :                                pg_locale_t locale);
     116             : static size_t strupper_libc_sb(char *dest, size_t destsize,
     117             :                                const char *src, ssize_t srclen,
     118             :                                pg_locale_t locale);
     119             : static size_t strupper_libc_mb(char *dest, size_t destsize,
     120             :                                const char *src, ssize_t srclen,
     121             :                                pg_locale_t locale);
     122             : 
     123             : static bool
     124           0 : wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
     125             : {
     126           0 :     return isdigit_l((unsigned char) wc, locale->lt);
     127             : }
     128             : 
     129             : static bool
     130           0 : wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
     131             : {
     132           0 :     return isalpha_l((unsigned char) wc, locale->lt);
     133             : }
     134             : 
     135             : static bool
     136           0 : wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
     137             : {
     138           0 :     return isalnum_l((unsigned char) wc, locale->lt);
     139             : }
     140             : 
     141             : static bool
     142           0 : wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
     143             : {
     144           0 :     return isupper_l((unsigned char) wc, locale->lt);
     145             : }
     146             : 
     147             : static bool
     148           0 : wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
     149             : {
     150           0 :     return islower_l((unsigned char) wc, locale->lt);
     151             : }
     152             : 
     153             : static bool
     154           0 : wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
     155             : {
     156           0 :     return isgraph_l((unsigned char) wc, locale->lt);
     157             : }
     158             : 
     159             : static bool
     160           0 : wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
     161             : {
     162           0 :     return isprint_l((unsigned char) wc, locale->lt);
     163             : }
     164             : 
     165             : static bool
     166           0 : wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
     167             : {
     168           0 :     return ispunct_l((unsigned char) wc, locale->lt);
     169             : }
     170             : 
     171             : static bool
     172           0 : wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
     173             : {
     174           0 :     return isspace_l((unsigned char) wc, locale->lt);
     175             : }
     176             : 
     177             : static bool
     178           0 : wc_isxdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
     179             : {
     180             : #ifndef WIN32
     181           0 :     return isxdigit_l((unsigned char) wc, locale->lt);
     182             : #else
     183             :     return _isxdigit_l((unsigned char) wc, locale->lt);
     184             : #endif
     185             : }
     186             : 
     187             : static bool
     188           0 : wc_iscased_libc_sb(pg_wchar wc, pg_locale_t locale)
     189             : {
     190           0 :     return isupper_l((unsigned char) wc, locale->lt) ||
     191           0 :         islower_l((unsigned char) wc, locale->lt);
     192             : }
     193             : 
     194             : static bool
     195      131608 : wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
     196             : {
     197      131608 :     return iswdigit_l((wint_t) wc, locale->lt);
     198             : }
     199             : 
     200             : static bool
     201       81148 : wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
     202             : {
     203       81148 :     return iswalpha_l((wint_t) wc, locale->lt);
     204             : }
     205             : 
     206             : static bool
     207     2845662 : wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
     208             : {
     209     2845662 :     return iswalnum_l((wint_t) wc, locale->lt);
     210             : }
     211             : 
     212             : static bool
     213        4112 : wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
     214             : {
     215        4112 :     return iswupper_l((wint_t) wc, locale->lt);
     216             : }
     217             : 
     218             : static bool
     219        4102 : wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
     220             : {
     221        4102 :     return iswlower_l((wint_t) wc, locale->lt);
     222             : }
     223             : 
     224             : static bool
     225        4102 : wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
     226             : {
     227        4102 :     return iswgraph_l((wint_t) wc, locale->lt);
     228             : }
     229             : 
     230             : static bool
     231        4102 : wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
     232             : {
     233        4102 :     return iswprint_l((wint_t) wc, locale->lt);
     234             : }
     235             : 
     236             : static bool
     237        4102 : wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
     238             : {
     239        4102 :     return iswpunct_l((wint_t) wc, locale->lt);
     240             : }
     241             : 
     242             : static bool
     243       48152 : wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
     244             : {
     245       48152 :     return iswspace_l((wint_t) wc, locale->lt);
     246             : }
     247             : 
     248             : static bool
     249          12 : wc_isxdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
     250             : {
     251             : #ifndef WIN32
     252          12 :     return iswxdigit_l((wint_t) wc, locale->lt);
     253             : #else
     254             :     return _iswxdigit_l((wint_t) wc, locale->lt);
     255             : #endif
     256             : }
     257             : 
     258             : static bool
     259           0 : wc_iscased_libc_mb(pg_wchar wc, pg_locale_t locale)
     260             : {
     261           0 :     return iswupper_l((wint_t) wc, locale->lt) ||
     262           0 :         iswlower_l((wint_t) wc, locale->lt);
     263             : }
     264             : 
     265             : static pg_wchar
     266           0 : toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
     267             : {
     268             :     Assert(GetDatabaseEncoding() != PG_UTF8);
     269             : 
     270             :     /* force C behavior for ASCII characters, per comments above */
     271           0 :     if (locale->is_default && wc <= (pg_wchar) 127)
     272           0 :         return pg_ascii_toupper((unsigned char) wc);
     273           0 :     if (wc <= (pg_wchar) UCHAR_MAX)
     274           0 :         return toupper_l((unsigned char) wc, locale->lt);
     275             :     else
     276           0 :         return wc;
     277             : }
     278             : 
     279             : static pg_wchar
     280        9088 : toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
     281             : {
     282             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     283             : 
     284             :     /* force C behavior for ASCII characters, per comments above */
     285        9088 :     if (locale->is_default && wc <= (pg_wchar) 127)
     286         892 :         return pg_ascii_toupper((unsigned char) wc);
     287             :     if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
     288        8196 :         return towupper_l((wint_t) wc, locale->lt);
     289             :     else
     290             :         return wc;
     291             : }
     292             : 
     293             : static pg_wchar
     294           0 : tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
     295             : {
     296             :     Assert(GetDatabaseEncoding() != PG_UTF8);
     297             : 
     298             :     /* force C behavior for ASCII characters, per comments above */
     299           0 :     if (locale->is_default && wc <= (pg_wchar) 127)
     300           0 :         return pg_ascii_tolower((unsigned char) wc);
     301           0 :     if (wc <= (pg_wchar) UCHAR_MAX)
     302           0 :         return tolower_l((unsigned char) wc, locale->lt);
     303             :     else
     304           0 :         return wc;
     305             : }
     306             : 
     307             : static pg_wchar
     308        9092 : tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
     309             : {
     310             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     311             : 
     312             :     /* force C behavior for ASCII characters, per comments above */
     313        9092 :     if (locale->is_default && wc <= (pg_wchar) 127)
     314         896 :         return pg_ascii_tolower((unsigned char) wc);
     315             :     if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
     316        8196 :         return towlower_l((wint_t) wc, locale->lt);
     317             :     else
     318             :         return wc;
     319             : }
     320             : 
     321             : /*
     322             :  * Characters A..Z always downcase to a..z, even in the Turkish
     323             :  * locale. Characters beyond 127 use tolower().
     324             :  */
     325             : static size_t
     326       25040 : downcase_ident_libc_sb(char *dst, size_t dstsize, const char *src,
     327             :                        ssize_t srclen, pg_locale_t locale)
     328             : {
     329       25040 :     locale_t    loc = locale->lt;
     330             :     int         i;
     331             : 
     332      244536 :     for (i = 0; i < srclen && i < dstsize; i++)
     333             :     {
     334      219496 :         unsigned char ch = (unsigned char) src[i];
     335             : 
     336      219496 :         if (ch >= 'A' && ch <= 'Z')
     337       13592 :             ch = pg_ascii_tolower(ch);
     338      205904 :         else if (IS_HIGHBIT_SET(ch) && isupper_l(ch, loc))
     339           0 :             ch = tolower_l(ch, loc);
     340      219496 :         dst[i] = (char) ch;
     341             :     }
     342             : 
     343       25040 :     if (i < dstsize)
     344       25040 :         dst[i] = '\0';
     345             : 
     346       25040 :     return srclen;
     347             : }
     348             : 
     349             : static const struct ctype_methods ctype_methods_libc_sb = {
     350             :     .strlower = strlower_libc_sb,
     351             :     .strtitle = strtitle_libc_sb,
     352             :     .strupper = strupper_libc_sb,
     353             :     /* in libc, casefolding is the same as lowercasing */
     354             :     .strfold = strlower_libc_sb,
     355             :     .downcase_ident = downcase_ident_libc_sb,
     356             :     .wc_isdigit = wc_isdigit_libc_sb,
     357             :     .wc_isalpha = wc_isalpha_libc_sb,
     358             :     .wc_isalnum = wc_isalnum_libc_sb,
     359             :     .wc_isupper = wc_isupper_libc_sb,
     360             :     .wc_islower = wc_islower_libc_sb,
     361             :     .wc_isgraph = wc_isgraph_libc_sb,
     362             :     .wc_isprint = wc_isprint_libc_sb,
     363             :     .wc_ispunct = wc_ispunct_libc_sb,
     364             :     .wc_isspace = wc_isspace_libc_sb,
     365             :     .wc_isxdigit = wc_isxdigit_libc_sb,
     366             :     .wc_iscased = wc_iscased_libc_sb,
     367             :     .wc_toupper = toupper_libc_sb,
     368             :     .wc_tolower = tolower_libc_sb,
     369             : };
     370             : 
     371             : /*
     372             :  * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
     373             :  * single-byte semantics for pattern matching.
     374             :  */
     375             : static const struct ctype_methods ctype_methods_libc_other_mb = {
     376             :     .strlower = strlower_libc_mb,
     377             :     .strtitle = strtitle_libc_mb,
     378             :     .strupper = strupper_libc_mb,
     379             :     /* in libc, casefolding is the same as lowercasing */
     380             :     .strfold = strlower_libc_mb,
     381             :     /* uses plain ASCII semantics for historical reasons */
     382             :     .downcase_ident = NULL,
     383             :     .wc_isdigit = wc_isdigit_libc_sb,
     384             :     .wc_isalpha = wc_isalpha_libc_sb,
     385             :     .wc_isalnum = wc_isalnum_libc_sb,
     386             :     .wc_isupper = wc_isupper_libc_sb,
     387             :     .wc_islower = wc_islower_libc_sb,
     388             :     .wc_isgraph = wc_isgraph_libc_sb,
     389             :     .wc_isprint = wc_isprint_libc_sb,
     390             :     .wc_ispunct = wc_ispunct_libc_sb,
     391             :     .wc_isspace = wc_isspace_libc_sb,
     392             :     .wc_isxdigit = wc_isxdigit_libc_sb,
     393             :     .wc_iscased = wc_iscased_libc_sb,
     394             :     .wc_toupper = toupper_libc_sb,
     395             :     .wc_tolower = tolower_libc_sb,
     396             : };
     397             : 
     398             : static const struct ctype_methods ctype_methods_libc_utf8 = {
     399             :     .strlower = strlower_libc_mb,
     400             :     .strtitle = strtitle_libc_mb,
     401             :     .strupper = strupper_libc_mb,
     402             :     /* in libc, casefolding is the same as lowercasing */
     403             :     .strfold = strlower_libc_mb,
     404             :     /* uses plain ASCII semantics for historical reasons */
     405             :     .downcase_ident = NULL,
     406             :     .wc_isdigit = wc_isdigit_libc_mb,
     407             :     .wc_isalpha = wc_isalpha_libc_mb,
     408             :     .wc_isalnum = wc_isalnum_libc_mb,
     409             :     .wc_isupper = wc_isupper_libc_mb,
     410             :     .wc_islower = wc_islower_libc_mb,
     411             :     .wc_isgraph = wc_isgraph_libc_mb,
     412             :     .wc_isprint = wc_isprint_libc_mb,
     413             :     .wc_ispunct = wc_ispunct_libc_mb,
     414             :     .wc_isspace = wc_isspace_libc_mb,
     415             :     .wc_isxdigit = wc_isxdigit_libc_mb,
     416             :     .wc_iscased = wc_iscased_libc_mb,
     417             :     .wc_toupper = toupper_libc_mb,
     418             :     .wc_tolower = tolower_libc_mb,
     419             : };
     420             : 
     421             : static const struct collate_methods collate_methods_libc = {
     422             :     .strncoll = strncoll_libc,
     423             :     .strnxfrm = strnxfrm_libc,
     424             :     .strnxfrm_prefix = NULL,
     425             : 
     426             :     /*
     427             :      * Unfortunately, it seems that strxfrm() for non-C collations is broken
     428             :      * on many common platforms; testing of multiple versions of glibc reveals
     429             :      * that, for many locales, strcoll() and strxfrm() do not return
     430             :      * consistent results. While no other libc other than Cygwin has so far
     431             :      * been shown to have a problem, we take the conservative course of action
     432             :      * for right now and disable this categorically.  (Users who are certain
     433             :      * this isn't a problem on their system can define TRUST_STRXFRM.)
     434             :      */
     435             : #ifdef TRUST_STRXFRM
     436             :     .strxfrm_is_safe = true,
     437             : #else
     438             :     .strxfrm_is_safe = false,
     439             : #endif
     440             : };
     441             : 
     442             : #ifdef WIN32
     443             : static const struct collate_methods collate_methods_libc_win32_utf8 = {
     444             :     .strncoll = strncoll_libc_win32_utf8,
     445             :     .strnxfrm = strnxfrm_libc,
     446             :     .strnxfrm_prefix = NULL,
     447             : #ifdef TRUST_STRXFRM
     448             :     .strxfrm_is_safe = true,
     449             : #else
     450             :     .strxfrm_is_safe = false,
     451             : #endif
     452             : };
     453             : #endif
     454             : 
     455             : static size_t
     456           0 : strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     457             :                  pg_locale_t locale)
     458             : {
     459           0 :     if (srclen < 0)
     460           0 :         srclen = strlen(src);
     461             : 
     462           0 :     if (srclen + 1 <= destsize)
     463             :     {
     464           0 :         locale_t    loc = locale->lt;
     465             :         char       *p;
     466             : 
     467           0 :         memcpy(dest, src, srclen);
     468           0 :         dest[srclen] = '\0';
     469             : 
     470             :         /*
     471             :          * Note: we assume that tolower_l() will not be so broken as to need
     472             :          * an isupper_l() guard test.  When using the default collation, we
     473             :          * apply the traditional Postgres behavior that forces ASCII-style
     474             :          * treatment of I/i, but in non-default collations you get exactly
     475             :          * what the collation says.
     476             :          */
     477           0 :         for (p = dest; *p; p++)
     478             :         {
     479           0 :             if (locale->is_default)
     480             :             {
     481           0 :                 if (*p >= 'A' && *p <= 'Z')
     482           0 :                     *p += 'a' - 'A';
     483           0 :                 else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
     484           0 :                     *p = tolower_l((unsigned char) *p, loc);
     485             :             }
     486             :             else
     487           0 :                 *p = tolower_l((unsigned char) *p, loc);
     488             :         }
     489             :     }
     490             : 
     491           0 :     return srclen;
     492             : }
     493             : 
     494             : static size_t
     495      866424 : strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     496             :                  pg_locale_t locale)
     497             : {
     498      866424 :     locale_t    loc = locale->lt;
     499             :     size_t      result_size;
     500             :     wchar_t    *workspace;
     501             :     char       *result;
     502             :     size_t      curr_char;
     503             :     size_t      max_size;
     504             : 
     505      866424 :     if (srclen < 0)
     506           0 :         srclen = strlen(src);
     507             : 
     508             :     /* Overflow paranoia */
     509      866424 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     510           0 :         ereport(ERROR,
     511             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     512             :                  errmsg("out of memory")));
     513             : 
     514             :     /* Output workspace cannot have more codes than input bytes */
     515      866424 :     workspace = palloc_array(wchar_t, srclen + 1);
     516             : 
     517      866424 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     518             : 
     519     4552628 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     520     3686204 :         workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     521             : 
     522             :     /*
     523             :      * Make result large enough; case change might change number of bytes
     524             :      */
     525      866424 :     max_size = curr_char * pg_database_encoding_max_length();
     526      866424 :     result = palloc(max_size + 1);
     527             : 
     528      866424 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     529             : 
     530      866424 :     if (result_size + 1 > destsize)
     531           0 :         return result_size;
     532             : 
     533      866424 :     memcpy(dest, result, result_size);
     534      866424 :     dest[result_size] = '\0';
     535             : 
     536      866424 :     pfree(workspace);
     537      866424 :     pfree(result);
     538             : 
     539      866424 :     return result_size;
     540             : }
     541             : 
     542             : static size_t
     543           0 : strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     544             :                  pg_locale_t locale)
     545             : {
     546           0 :     if (srclen < 0)
     547           0 :         srclen = strlen(src);
     548             : 
     549           0 :     if (srclen + 1 <= destsize)
     550             :     {
     551           0 :         locale_t    loc = locale->lt;
     552           0 :         int         wasalnum = false;
     553             :         char       *p;
     554             : 
     555           0 :         memcpy(dest, src, srclen);
     556           0 :         dest[srclen] = '\0';
     557             : 
     558             :         /*
     559             :          * Note: we assume that toupper_l()/tolower_l() will not be so broken
     560             :          * as to need guard tests.  When using the default collation, we apply
     561             :          * the traditional Postgres behavior that forces ASCII-style treatment
     562             :          * of I/i, but in non-default collations you get exactly what the
     563             :          * collation says.
     564             :          */
     565           0 :         for (p = dest; *p; p++)
     566             :         {
     567           0 :             if (locale->is_default)
     568             :             {
     569           0 :                 if (wasalnum)
     570             :                 {
     571           0 :                     if (*p >= 'A' && *p <= 'Z')
     572           0 :                         *p += 'a' - 'A';
     573           0 :                     else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
     574           0 :                         *p = tolower_l((unsigned char) *p, loc);
     575             :                 }
     576             :                 else
     577             :                 {
     578           0 :                     if (*p >= 'a' && *p <= 'z')
     579           0 :                         *p -= 'a' - 'A';
     580           0 :                     else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
     581           0 :                         *p = toupper_l((unsigned char) *p, loc);
     582             :                 }
     583             :             }
     584             :             else
     585             :             {
     586           0 :                 if (wasalnum)
     587           0 :                     *p = tolower_l((unsigned char) *p, loc);
     588             :                 else
     589           0 :                     *p = toupper_l((unsigned char) *p, loc);
     590             :             }
     591           0 :             wasalnum = isalnum_l((unsigned char) *p, loc);
     592             :         }
     593             :     }
     594             : 
     595           0 :     return srclen;
     596             : }
     597             : 
     598             : static size_t
     599           8 : strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     600             :                  pg_locale_t locale)
     601             : {
     602           8 :     locale_t    loc = locale->lt;
     603           8 :     int         wasalnum = false;
     604             :     size_t      result_size;
     605             :     wchar_t    *workspace;
     606             :     char       *result;
     607             :     size_t      curr_char;
     608             :     size_t      max_size;
     609             : 
     610           8 :     if (srclen < 0)
     611           0 :         srclen = strlen(src);
     612             : 
     613             :     /* Overflow paranoia */
     614           8 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     615           0 :         ereport(ERROR,
     616             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     617             :                  errmsg("out of memory")));
     618             : 
     619             :     /* Output workspace cannot have more codes than input bytes */
     620           8 :     workspace = palloc_array(wchar_t, srclen + 1);
     621             : 
     622           8 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     623             : 
     624          80 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     625             :     {
     626          72 :         if (wasalnum)
     627          56 :             workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     628             :         else
     629          16 :             workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     630          72 :         wasalnum = iswalnum_l(workspace[curr_char], loc);
     631             :     }
     632             : 
     633             :     /*
     634             :      * Make result large enough; case change might change number of bytes
     635             :      */
     636           8 :     max_size = curr_char * pg_database_encoding_max_length();
     637           8 :     result = palloc(max_size + 1);
     638             : 
     639           8 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     640             : 
     641           8 :     if (result_size + 1 > destsize)
     642           0 :         return result_size;
     643             : 
     644           8 :     memcpy(dest, result, result_size);
     645           8 :     dest[result_size] = '\0';
     646             : 
     647           8 :     pfree(workspace);
     648           8 :     pfree(result);
     649             : 
     650           8 :     return result_size;
     651             : }
     652             : 
     653             : static size_t
     654           0 : strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     655             :                  pg_locale_t locale)
     656             : {
     657           0 :     if (srclen < 0)
     658           0 :         srclen = strlen(src);
     659             : 
     660           0 :     if (srclen + 1 <= destsize)
     661             :     {
     662           0 :         locale_t    loc = locale->lt;
     663             :         char       *p;
     664             : 
     665           0 :         memcpy(dest, src, srclen);
     666           0 :         dest[srclen] = '\0';
     667             : 
     668             :         /*
     669             :          * Note: we assume that toupper_l() will not be so broken as to need
     670             :          * an islower_l() guard test.  When using the default collation, we
     671             :          * apply the traditional Postgres behavior that forces ASCII-style
     672             :          * treatment of I/i, but in non-default collations you get exactly
     673             :          * what the collation says.
     674             :          */
     675           0 :         for (p = dest; *p; p++)
     676             :         {
     677           0 :             if (locale->is_default)
     678             :             {
     679           0 :                 if (*p >= 'a' && *p <= 'z')
     680           0 :                     *p -= 'a' - 'A';
     681           0 :                 else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
     682           0 :                     *p = toupper_l((unsigned char) *p, loc);
     683             :             }
     684             :             else
     685           0 :                 *p = toupper_l((unsigned char) *p, loc);
     686             :         }
     687             :     }
     688             : 
     689           0 :     return srclen;
     690             : }
     691             : 
     692             : static size_t
     693      721134 : strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     694             :                  pg_locale_t locale)
     695             : {
     696      721134 :     locale_t    loc = locale->lt;
     697             :     size_t      result_size;
     698             :     wchar_t    *workspace;
     699             :     char       *result;
     700             :     size_t      curr_char;
     701             :     size_t      max_size;
     702             : 
     703      721134 :     if (srclen < 0)
     704           0 :         srclen = strlen(src);
     705             : 
     706             :     /* Overflow paranoia */
     707      721134 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     708           0 :         ereport(ERROR,
     709             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     710             :                  errmsg("out of memory")));
     711             : 
     712             :     /* Output workspace cannot have more codes than input bytes */
     713      721134 :     workspace = palloc_array(wchar_t, srclen + 1);
     714             : 
     715      721134 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     716             : 
     717     2380512 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     718     1659378 :         workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     719             : 
     720             :     /*
     721             :      * Make result large enough; case change might change number of bytes
     722             :      */
     723      721134 :     max_size = curr_char * pg_database_encoding_max_length();
     724      721134 :     result = palloc(max_size + 1);
     725             : 
     726      721134 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     727             : 
     728      721134 :     if (result_size + 1 > destsize)
     729           0 :         return result_size;
     730             : 
     731      721134 :     memcpy(dest, result, result_size);
     732      721134 :     dest[result_size] = '\0';
     733             : 
     734      721134 :     pfree(workspace);
     735      721134 :     pfree(result);
     736             : 
     737      721134 :     return result_size;
     738             : }
     739             : 
     740             : pg_locale_t
     741       31720 : create_pg_locale_libc(Oid collid, MemoryContext context)
     742             : {
     743             :     const char *collate;
     744             :     const char *ctype;
     745             :     locale_t    loc;
     746             :     pg_locale_t result;
     747             : 
     748       31720 :     if (collid == DEFAULT_COLLATION_OID)
     749             :     {
     750             :         HeapTuple   tp;
     751             :         Datum       datum;
     752             : 
     753       31630 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     754       31630 :         if (!HeapTupleIsValid(tp))
     755           0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     756       31630 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     757             :                                        Anum_pg_database_datcollate);
     758       31630 :         collate = TextDatumGetCString(datum);
     759       31630 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     760             :                                        Anum_pg_database_datctype);
     761       31630 :         ctype = TextDatumGetCString(datum);
     762             : 
     763       31630 :         ReleaseSysCache(tp);
     764             :     }
     765             :     else
     766             :     {
     767             :         HeapTuple   tp;
     768             :         Datum       datum;
     769             : 
     770          90 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     771          90 :         if (!HeapTupleIsValid(tp))
     772           0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     773             : 
     774          90 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     775             :                                        Anum_pg_collation_collcollate);
     776          90 :         collate = TextDatumGetCString(datum);
     777          90 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     778             :                                        Anum_pg_collation_collctype);
     779          90 :         ctype = TextDatumGetCString(datum);
     780             : 
     781          90 :         ReleaseSysCache(tp);
     782             :     }
     783             : 
     784             : 
     785       31720 :     loc = make_libc_collator(collate, ctype);
     786             : 
     787       31720 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     788       31720 :     result->deterministic = true;
     789       62304 :     result->collate_is_c = (strcmp(collate, "C") == 0) ||
     790       30584 :         (strcmp(collate, "POSIX") == 0);
     791       62304 :     result->ctype_is_c = (strcmp(ctype, "C") == 0) ||
     792       30584 :         (strcmp(ctype, "POSIX") == 0);
     793       31720 :     result->lt = loc;
     794       31720 :     if (!result->collate_is_c)
     795             :     {
     796             : #ifdef WIN32
     797             :         if (GetDatabaseEncoding() == PG_UTF8)
     798             :             result->collate = &collate_methods_libc_win32_utf8;
     799             :         else
     800             : #endif
     801       30520 :             result->collate = &collate_methods_libc;
     802             :     }
     803       31720 :     if (!result->ctype_is_c)
     804             :     {
     805       30520 :         if (GetDatabaseEncoding() == PG_UTF8)
     806       30456 :             result->ctype = &ctype_methods_libc_utf8;
     807          64 :         else if (pg_database_encoding_max_length() > 1)
     808           0 :             result->ctype = &ctype_methods_libc_other_mb;
     809             :         else
     810          64 :             result->ctype = &ctype_methods_libc_sb;
     811             :     }
     812             : 
     813       31720 :     return result;
     814             : }
     815             : 
     816             : /*
     817             :  * Create a locale_t with the given collation and ctype.
     818             :  *
     819             :  * The "C" and "POSIX" locales are not actually handled by libc, so return
     820             :  * NULL.
     821             :  *
     822             :  * Ensure that no path leaks a locale_t.
     823             :  */
     824             : static locale_t
     825       31720 : make_libc_collator(const char *collate, const char *ctype)
     826             : {
     827       31720 :     locale_t    loc = 0;
     828             : 
     829       31720 :     if (strcmp(collate, ctype) == 0)
     830             :     {
     831       31720 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     832             :         {
     833             :             /* Normal case where they're the same */
     834       30520 :             errno = 0;
     835             : #ifndef WIN32
     836       30520 :             loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
     837             :                             NULL);
     838             : #else
     839             :             loc = _create_locale(LC_ALL, collate);
     840             : #endif
     841       30520 :             if (!loc)
     842           0 :                 report_newlocale_failure(collate);
     843             :         }
     844             :     }
     845             :     else
     846             :     {
     847             : #ifndef WIN32
     848             :         /* We need two newlocale() steps */
     849           0 :         locale_t    loc1 = 0;
     850             : 
     851           0 :         if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
     852             :         {
     853           0 :             errno = 0;
     854           0 :             loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
     855           0 :             if (!loc1)
     856           0 :                 report_newlocale_failure(collate);
     857             :         }
     858             : 
     859           0 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     860             :         {
     861           0 :             errno = 0;
     862           0 :             loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
     863           0 :             if (!loc)
     864             :             {
     865           0 :                 if (loc1)
     866           0 :                     freelocale(loc1);
     867           0 :                 report_newlocale_failure(ctype);
     868             :             }
     869             :         }
     870             :         else
     871           0 :             loc = loc1;
     872             : #else
     873             : 
     874             :         /*
     875             :          * XXX The _create_locale() API doesn't appear to support this. Could
     876             :          * perhaps be worked around by changing pg_locale_t to contain two
     877             :          * separate fields.
     878             :          */
     879             :         ereport(ERROR,
     880             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     881             :                  errmsg("collations with different collate and ctype values are not supported on this platform")));
     882             : #endif
     883             :     }
     884             : 
     885       31720 :     return loc;
     886             : }
     887             : 
     888             : /*
     889             :  * strncoll_libc
     890             :  *
     891             :  * NUL-terminate arguments, if necessary, and pass to strcoll_l().
     892             :  *
     893             :  * An input string length of -1 means that it's already NUL-terminated.
     894             :  */
     895             : int
     896    29882876 : strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
     897             :               pg_locale_t locale)
     898             : {
     899             :     char        sbuf[TEXTBUFLEN];
     900    29882876 :     char       *buf = sbuf;
     901    29882876 :     size_t      bufsize1 = (len1 == -1) ? 0 : len1 + 1;
     902    29882876 :     size_t      bufsize2 = (len2 == -1) ? 0 : len2 + 1;
     903             :     const char *arg1n;
     904             :     const char *arg2n;
     905             :     int         result;
     906             : 
     907    29882876 :     if (bufsize1 + bufsize2 > TEXTBUFLEN)
     908         568 :         buf = palloc(bufsize1 + bufsize2);
     909             : 
     910             :     /* nul-terminate arguments if necessary */
     911    29882876 :     if (len1 == -1)
     912             :     {
     913    25452014 :         arg1n = arg1;
     914             :     }
     915             :     else
     916             :     {
     917     4430862 :         char       *buf1 = buf;
     918             : 
     919     4430862 :         memcpy(buf1, arg1, len1);
     920     4430862 :         buf1[len1] = '\0';
     921     4430862 :         arg1n = buf1;
     922             :     }
     923             : 
     924    29882876 :     if (len2 == -1)
     925             :     {
     926    25452014 :         arg2n = arg2;
     927             :     }
     928             :     else
     929             :     {
     930     4430862 :         char       *buf2 = buf + bufsize1;
     931             : 
     932     4430862 :         memcpy(buf2, arg2, len2);
     933     4430862 :         buf2[len2] = '\0';
     934     4430862 :         arg2n = buf2;
     935             :     }
     936             : 
     937    29882876 :     result = strcoll_l(arg1n, arg2n, locale->lt);
     938             : 
     939    29882876 :     if (buf != sbuf)
     940         568 :         pfree(buf);
     941             : 
     942    29882876 :     return result;
     943             : }
     944             : 
     945             : /*
     946             :  * strnxfrm_libc
     947             :  *
     948             :  * NUL-terminate src, if necessary, and pass to strxfrm_l().
     949             :  *
     950             :  * A source length of -1 means that it's already NUL-terminated.
     951             :  */
     952             : size_t
     953         144 : strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
     954             :               pg_locale_t locale)
     955             : {
     956             :     char        sbuf[TEXTBUFLEN];
     957         144 :     char       *buf = sbuf;
     958         144 :     size_t      bufsize = srclen + 1;
     959             :     size_t      result;
     960             : 
     961         144 :     if (srclen == -1)
     962         144 :         return strxfrm_l(dest, src, destsize, locale->lt);
     963             : 
     964           0 :     if (bufsize > TEXTBUFLEN)
     965           0 :         buf = palloc(bufsize);
     966             : 
     967             :     /* nul-terminate argument */
     968           0 :     memcpy(buf, src, srclen);
     969           0 :     buf[srclen] = '\0';
     970             : 
     971           0 :     result = strxfrm_l(dest, buf, destsize, locale->lt);
     972             : 
     973           0 :     if (buf != sbuf)
     974           0 :         pfree(buf);
     975             : 
     976             :     /* if dest is defined, it should be nul-terminated */
     977             :     Assert(result >= destsize || dest[result] == '\0');
     978             : 
     979           0 :     return result;
     980             : }
     981             : 
     982             : char *
     983       31054 : get_collation_actual_version_libc(const char *collcollate)
     984             : {
     985       31054 :     char       *collversion = NULL;
     986             : 
     987       61932 :     if (pg_strcasecmp("C", collcollate) != 0 &&
     988       61560 :         pg_strncasecmp("C.", collcollate, 2) != 0 &&
     989       30682 :         pg_strcasecmp("POSIX", collcollate) != 0)
     990             :     {
     991             : #if defined(__GLIBC__)
     992             :         /* Use the glibc version because we don't have anything better. */
     993       30656 :         collversion = pstrdup(gnu_get_libc_version());
     994             : #elif defined(LC_VERSION_MASK)
     995             :         locale_t    loc;
     996             : 
     997             :         /* Look up FreeBSD collation version. */
     998             :         loc = newlocale(LC_COLLATE_MASK, collcollate, NULL);
     999             :         if (loc)
    1000             :         {
    1001             :             collversion =
    1002             :                 pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
    1003             :             freelocale(loc);
    1004             :         }
    1005             :         else
    1006             :             ereport(ERROR,
    1007             :                     (errmsg("could not load locale \"%s\"", collcollate)));
    1008             : #elif defined(WIN32)
    1009             :         /*
    1010             :          * If we are targeting Windows Vista and above, we can ask for a name
    1011             :          * given a collation name (earlier versions required a location code
    1012             :          * that we don't have).
    1013             :          */
    1014             :         NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
    1015             :         WCHAR       wide_collcollate[LOCALE_NAME_MAX_LENGTH];
    1016             : 
    1017             :         MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
    1018             :                             LOCALE_NAME_MAX_LENGTH);
    1019             :         if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
    1020             :         {
    1021             :             /*
    1022             :              * GetNLSVersionEx() wants a language tag such as "en-US", not a
    1023             :              * locale name like "English_United States.1252".  Until those
    1024             :              * values can be prevented from entering the system, or 100%
    1025             :              * reliably converted to the more useful tag format, tolerate the
    1026             :              * resulting error and report that we have no version data.
    1027             :              */
    1028             :             if (GetLastError() == ERROR_INVALID_PARAMETER)
    1029             :                 return NULL;
    1030             : 
    1031             :             ereport(ERROR,
    1032             :                     (errmsg("could not get collation version for locale \"%s\": error code %lu",
    1033             :                             collcollate,
    1034             :                             GetLastError())));
    1035             :         }
    1036             :         collversion = psprintf("%lu.%lu,%lu.%lu",
    1037             :                                (version.dwNLSVersion >> 8) & 0xFFFF,
    1038             :                                version.dwNLSVersion & 0xFF,
    1039             :                                (version.dwDefinedVersion >> 8) & 0xFFFF,
    1040             :                                version.dwDefinedVersion & 0xFF);
    1041             : #endif
    1042             :     }
    1043             : 
    1044       31054 :     return collversion;
    1045             : }
    1046             : 
    1047             : /*
    1048             :  * strncoll_libc_win32_utf8
    1049             :  *
    1050             :  * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
    1051             :  * invoke wcscoll_l().
    1052             :  *
    1053             :  * An input string length of -1 means that it's NUL-terminated.
    1054             :  */
    1055             : #ifdef WIN32
    1056             : static int
    1057             : strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
    1058             :                          ssize_t len2, pg_locale_t locale)
    1059             : {
    1060             :     char        sbuf[TEXTBUFLEN];
    1061             :     char       *buf = sbuf;
    1062             :     char       *a1p,
    1063             :                *a2p;
    1064             :     int         a1len;
    1065             :     int         a2len;
    1066             :     int         r;
    1067             :     int         result;
    1068             : 
    1069             :     Assert(GetDatabaseEncoding() == PG_UTF8);
    1070             : 
    1071             :     if (len1 == -1)
    1072             :         len1 = strlen(arg1);
    1073             :     if (len2 == -1)
    1074             :         len2 = strlen(arg2);
    1075             : 
    1076             :     a1len = len1 * 2 + 2;
    1077             :     a2len = len2 * 2 + 2;
    1078             : 
    1079             :     if (a1len + a2len > TEXTBUFLEN)
    1080             :         buf = palloc(a1len + a2len);
    1081             : 
    1082             :     a1p = buf;
    1083             :     a2p = buf + a1len;
    1084             : 
    1085             :     /* API does not work for zero-length input */
    1086             :     if (len1 == 0)
    1087             :         r = 0;
    1088             :     else
    1089             :     {
    1090             :         r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
    1091             :                                 (LPWSTR) a1p, a1len / 2);
    1092             :         if (!r)
    1093             :             ereport(ERROR,
    1094             :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1095             :                             GetLastError())));
    1096             :     }
    1097             :     ((LPWSTR) a1p)[r] = 0;
    1098             : 
    1099             :     if (len2 == 0)
    1100             :         r = 0;
    1101             :     else
    1102             :     {
    1103             :         r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
    1104             :                                 (LPWSTR) a2p, a2len / 2);
    1105             :         if (!r)
    1106             :             ereport(ERROR,
    1107             :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1108             :                             GetLastError())));
    1109             :     }
    1110             :     ((LPWSTR) a2p)[r] = 0;
    1111             : 
    1112             :     errno = 0;
    1113             :     result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->lt);
    1114             :     if (result == 2147483647)   /* _NLSCMPERROR; missing from mingw headers */
    1115             :         ereport(ERROR,
    1116             :                 (errmsg("could not compare Unicode strings: %m")));
    1117             : 
    1118             :     if (buf != sbuf)
    1119             :         pfree(buf);
    1120             : 
    1121             :     return result;
    1122             : }
    1123             : #endif                          /* WIN32 */
    1124             : 
    1125             : /* simple subroutine for reporting errors from newlocale() */
    1126             : void
    1127           0 : report_newlocale_failure(const char *localename)
    1128             : {
    1129             :     int         save_errno;
    1130             : 
    1131             :     /*
    1132             :      * Windows doesn't provide any useful error indication from
    1133             :      * _create_locale(), and BSD-derived platforms don't seem to feel they
    1134             :      * need to set errno either (even though POSIX is pretty clear that
    1135             :      * newlocale should do so).  So, if errno hasn't been set, assume ENOENT
    1136             :      * is what to report.
    1137             :      */
    1138           0 :     if (errno == 0)
    1139           0 :         errno = ENOENT;
    1140             : 
    1141             :     /*
    1142             :      * ENOENT means "no such locale", not "no such file", so clarify that
    1143             :      * errno with an errdetail message.
    1144             :      */
    1145           0 :     save_errno = errno;         /* auxiliary funcs might change errno */
    1146           0 :     ereport(ERROR,
    1147             :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1148             :              errmsg("could not create locale \"%s\": %m",
    1149             :                     localename),
    1150             :              (save_errno == ENOENT ?
    1151             :               errdetail("The operating system could not find any locale data for the locale name \"%s\".",
    1152             :                         localename) : 0)));
    1153             : }
    1154             : 
    1155             : /*
    1156             :  * POSIX doesn't define _l-variants of these functions, but several systems
    1157             :  * have them.  We provide our own replacements here.
    1158             :  */
    1159             : #ifndef HAVE_MBSTOWCS_L
    1160             : static size_t
    1161     1587566 : mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
    1162             : {
    1163             : #ifdef WIN32
    1164             :     return _mbstowcs_l(dest, src, n, loc);
    1165             : #else
    1166             :     size_t      result;
    1167     1587566 :     locale_t    save_locale = uselocale(loc);
    1168             : 
    1169     1587566 :     result = mbstowcs(dest, src, n);
    1170     1587566 :     uselocale(save_locale);
    1171     1587566 :     return result;
    1172             : #endif
    1173             : }
    1174             : #endif
    1175             : #ifndef HAVE_WCSTOMBS_L
    1176             : static size_t
    1177     1587566 : wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
    1178             : {
    1179             : #ifdef WIN32
    1180             :     return _wcstombs_l(dest, src, n, loc);
    1181             : #else
    1182             :     size_t      result;
    1183     1587566 :     locale_t    save_locale = uselocale(loc);
    1184             : 
    1185     1587566 :     result = wcstombs(dest, src, n);
    1186     1587566 :     uselocale(save_locale);
    1187     1587566 :     return result;
    1188             : #endif
    1189             : }
    1190             : #endif
    1191             : 
    1192             : /*
    1193             :  * These functions convert from/to libc's wchar_t, *not* pg_wchar.
    1194             :  * Therefore we keep them here rather than with the mbutils code.
    1195             :  */
    1196             : 
    1197             : /*
    1198             :  * wchar2char --- convert wide characters to multibyte format
    1199             :  *
    1200             :  * This has the same API as the standard wcstombs_l() function; in particular,
    1201             :  * tolen is the maximum number of bytes to store at *to, and *from must be
    1202             :  * zero-terminated.  The output will be zero-terminated iff there is room.
    1203             :  */
    1204             : size_t
    1205     1587566 : wchar2char(char *to, const wchar_t *from, size_t tolen, locale_t loc)
    1206             : {
    1207             :     size_t      result;
    1208             : 
    1209     1587566 :     if (tolen == 0)
    1210           0 :         return 0;
    1211             : 
    1212             : #ifdef WIN32
    1213             : 
    1214             :     /*
    1215             :      * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
    1216             :      * for some reason mbstowcs and wcstombs won't do this for us, so we use
    1217             :      * MultiByteToWideChar().
    1218             :      */
    1219             :     if (GetDatabaseEncoding() == PG_UTF8)
    1220             :     {
    1221             :         result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
    1222             :                                      NULL, NULL);
    1223             :         /* A zero return is failure */
    1224             :         if (result <= 0)
    1225             :             result = -1;
    1226             :         else
    1227             :         {
    1228             :             Assert(result <= tolen);
    1229             :             /* Microsoft counts the zero terminator in the result */
    1230             :             result--;
    1231             :         }
    1232             :     }
    1233             :     else
    1234             : #endif                          /* WIN32 */
    1235     1587566 :     if (loc == (locale_t) 0)
    1236             :     {
    1237             :         /* Use wcstombs directly for the default locale */
    1238           0 :         result = wcstombs(to, from, tolen);
    1239             :     }
    1240             :     else
    1241             :     {
    1242             :         /* Use wcstombs_l for nondefault locales */
    1243     1587566 :         result = wcstombs_l(to, from, tolen, loc);
    1244             :     }
    1245             : 
    1246     1587566 :     return result;
    1247             : }
    1248             : 
    1249             : /*
    1250             :  * char2wchar --- convert multibyte characters to wide characters
    1251             :  *
    1252             :  * This has almost the API of mbstowcs_l(), except that *from need not be
    1253             :  * null-terminated; instead, the number of input bytes is specified as
    1254             :  * fromlen.  Also, we ereport() rather than returning -1 for invalid
    1255             :  * input encoding.  tolen is the maximum number of wchar_t's to store at *to.
    1256             :  * The output will be zero-terminated iff there is room.
    1257             :  */
    1258             : static size_t
    1259     1587566 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
    1260             :            locale_t loc)
    1261             : {
    1262             :     size_t      result;
    1263             : 
    1264     1587566 :     if (tolen == 0)
    1265           0 :         return 0;
    1266             : 
    1267             : #ifdef WIN32
    1268             :     /* See WIN32 "Unicode" comment above */
    1269             :     if (GetDatabaseEncoding() == PG_UTF8)
    1270             :     {
    1271             :         /* Win32 API does not work for zero-length input */
    1272             :         if (fromlen == 0)
    1273             :             result = 0;
    1274             :         else
    1275             :         {
    1276             :             result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
    1277             :             /* A zero return is failure */
    1278             :             if (result == 0)
    1279             :                 result = -1;
    1280             :         }
    1281             : 
    1282             :         if (result != -1)
    1283             :         {
    1284             :             Assert(result < tolen);
    1285             :             /* Append trailing null wchar (MultiByteToWideChar() does not) */
    1286             :             to[result] = 0;
    1287             :         }
    1288             :     }
    1289             :     else
    1290             : #endif                          /* WIN32 */
    1291             :     {
    1292             :         /* mbstowcs requires ending '\0' */
    1293     1587566 :         char       *str = pnstrdup(from, fromlen);
    1294             : 
    1295     1587566 :         if (loc == (locale_t) 0)
    1296             :         {
    1297             :             /* Use mbstowcs directly for the default locale */
    1298           0 :             result = mbstowcs(to, str, tolen);
    1299             :         }
    1300             :         else
    1301             :         {
    1302             :             /* Use mbstowcs_l for nondefault locales */
    1303     1587566 :             result = mbstowcs_l(to, str, tolen, loc);
    1304             :         }
    1305             : 
    1306     1587566 :         pfree(str);
    1307             :     }
    1308             : 
    1309     1587566 :     if (result == -1)
    1310             :     {
    1311             :         /*
    1312             :          * Invalid multibyte character encountered.  We try to give a useful
    1313             :          * error message by letting pg_verifymbstr check the string.  But it's
    1314             :          * possible that the string is OK to us, and not OK to mbstowcs ---
    1315             :          * this suggests that the LC_CTYPE locale is different from the
    1316             :          * database encoding.  Give a generic error message if pg_verifymbstr
    1317             :          * can't find anything wrong.
    1318             :          */
    1319           0 :         pg_verifymbstr(from, fromlen, false);   /* might not return */
    1320             :         /* but if it does ... */
    1321           0 :         ereport(ERROR,
    1322             :                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
    1323             :                  errmsg("invalid multibyte character for locale"),
    1324             :                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
    1325             :     }
    1326             : 
    1327     1587566 :     return result;
    1328             : }

Generated by: LCOV version 1.16