LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_libc.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 179 305 58.7 %
Date: 2025-07-02 01:17:55 Functions: 23 40 57.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-----------------------------------------------------------------------
       2             :  *
       3             :  * PostgreSQL locale utilities for libc
       4             :  *
       5             :  * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
       6             :  *
       7             :  * src/backend/utils/adt/pg_locale_libc.c
       8             :  *
       9             :  *-----------------------------------------------------------------------
      10             :  */
      11             : 
      12             : #include "postgres.h"
      13             : 
      14             : #include <limits.h>
      15             : #include <wctype.h>
      16             : 
      17             : #include "access/htup_details.h"
      18             : #include "catalog/pg_database.h"
      19             : #include "catalog/pg_collation.h"
      20             : #include "mb/pg_wchar.h"
      21             : #include "miscadmin.h"
      22             : #include "utils/builtins.h"
      23             : #include "utils/formatting.h"
      24             : #include "utils/memutils.h"
      25             : #include "utils/pg_locale.h"
      26             : #include "utils/syscache.h"
      27             : 
      28             : #ifdef __GLIBC__
      29             : #include <gnu/libc-version.h>
      30             : #endif
      31             : 
      32             : #ifdef WIN32
      33             : #include <shlwapi.h>
      34             : #endif
      35             : 
      36             : /*
      37             :  * For the libc provider, to provide as much functionality as possible on a
      38             :  * variety of platforms without going so far as to implement everything from
      39             :  * scratch, we use several implementation strategies depending on the
      40             :  * situation:
      41             :  *
      42             :  * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
      43             :  * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
      44             :  * collations don't give a fig about multibyte characters.
      45             :  *
      46             :  * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
      47             :  * This assumes that every platform uses Unicode codepoints directly
      48             :  * as the wchar_t representation of Unicode.  (XXX: ICU makes this assumption
      49             :  * even for non-UTF8 encodings, which may be a problem.)  On some platforms
      50             :  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
      51             :  *
      52             :  * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
      53             :  * values up to 255, and punt for values above that.  This is 100% correct
      54             :  * only in single-byte encodings such as LATINn.  However, non-Unicode
      55             :  * multibyte encodings are mostly Far Eastern character sets for which the
      56             :  * properties being tested here aren't very relevant for higher code values
      57             :  * anyway.  The difficulty with using the <wctype.h> functions with
      58             :  * non-Unicode multibyte encodings is that we can have no certainty that
      59             :  * the platform's wchar_t representation matches what we do in pg_wchar
      60             :  * conversions.
      61             :  *
      62             :  * As a special case, in the "default" collation, (2) and (3) force ASCII
      63             :  * letters to follow ASCII upcase/downcase rules, while in a non-default
      64             :  * collation we just let the library functions do what they will.  The case
      65             :  * where this matters is treatment of I/i in Turkish, and the behavior is
      66             :  * meant to match the upper()/lower() SQL functions.
      67             :  *
      68             :  * We store the active collation setting in static variables.  In principle
      69             :  * it could be passed down to here via the regex library's "struct vars" data
      70             :  * structure; but that would require somewhat invasive changes in the regex
      71             :  * library, and right now there's no real benefit to be gained from that.
      72             :  *
      73             :  * NB: the coding here assumes pg_wchar is an unsigned type.
      74             :  */
      75             : 
      76             : /*
      77             :  * Size of stack buffer to use for string transformations, used to avoid heap
      78             :  * allocations in typical cases. This should be large enough that most strings
      79             :  * will fit, but small enough that we feel comfortable putting it on the
      80             :  * stack.
      81             :  */
      82             : #define     TEXTBUFLEN          1024
      83             : 
      84             : extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
      85             : 
      86             : static int  strncoll_libc(const char *arg1, ssize_t len1,
      87             :                           const char *arg2, ssize_t len2,
      88             :                           pg_locale_t locale);
      89             : static size_t strnxfrm_libc(char *dest, size_t destsize,
      90             :                             const char *src, ssize_t srclen,
      91             :                             pg_locale_t locale);
      92             : extern char *get_collation_actual_version_libc(const char *collcollate);
      93             : static locale_t make_libc_collator(const char *collate,
      94             :                                    const char *ctype);
      95             : 
      96             : #ifdef WIN32
      97             : static int  strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
      98             :                                      const char *arg2, ssize_t len2,
      99             :                                      pg_locale_t locale);
     100             : #endif
     101             : 
     102             : static size_t strlower_libc_sb(char *dest, size_t destsize,
     103             :                                const char *src, ssize_t srclen,
     104             :                                pg_locale_t locale);
     105             : static size_t strlower_libc_mb(char *dest, size_t destsize,
     106             :                                const char *src, ssize_t srclen,
     107             :                                pg_locale_t locale);
     108             : static size_t strtitle_libc_sb(char *dest, size_t destsize,
     109             :                                const char *src, ssize_t srclen,
     110             :                                pg_locale_t locale);
     111             : static size_t strtitle_libc_mb(char *dest, size_t destsize,
     112             :                                const char *src, ssize_t srclen,
     113             :                                pg_locale_t locale);
     114             : static size_t strupper_libc_sb(char *dest, size_t destsize,
     115             :                                const char *src, ssize_t srclen,
     116             :                                pg_locale_t locale);
     117             : static size_t strupper_libc_mb(char *dest, size_t destsize,
     118             :                                const char *src, ssize_t srclen,
     119             :                                pg_locale_t locale);
     120             : 
     121             : static bool
     122           0 : wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
     123             : {
     124           0 :     return isdigit_l((unsigned char) wc, locale->info.lt);
     125             : }
     126             : 
     127             : static bool
     128           0 : wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
     129             : {
     130           0 :     return isalpha_l((unsigned char) wc, locale->info.lt);
     131             : }
     132             : 
     133             : static bool
     134           0 : wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
     135             : {
     136           0 :     return isalnum_l((unsigned char) wc, locale->info.lt);
     137             : }
     138             : 
     139             : static bool
     140           0 : wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
     141             : {
     142           0 :     return isupper_l((unsigned char) wc, locale->info.lt);
     143             : }
     144             : 
     145             : static bool
     146           0 : wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
     147             : {
     148           0 :     return islower_l((unsigned char) wc, locale->info.lt);
     149             : }
     150             : 
     151             : static bool
     152           0 : wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
     153             : {
     154           0 :     return isgraph_l((unsigned char) wc, locale->info.lt);
     155             : }
     156             : 
     157             : static bool
     158           0 : wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
     159             : {
     160           0 :     return isprint_l((unsigned char) wc, locale->info.lt);
     161             : }
     162             : 
     163             : static bool
     164           0 : wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
     165             : {
     166           0 :     return ispunct_l((unsigned char) wc, locale->info.lt);
     167             : }
     168             : 
     169             : static bool
     170           0 : wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
     171             : {
     172           0 :     return isspace_l((unsigned char) wc, locale->info.lt);
     173             : }
     174             : 
     175             : static bool
     176      106840 : wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
     177             : {
     178      106840 :     return iswdigit_l((wint_t) wc, locale->info.lt);
     179             : }
     180             : 
     181             : static bool
     182       12544 : wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
     183             : {
     184       12544 :     return iswalpha_l((wint_t) wc, locale->info.lt);
     185             : }
     186             : 
     187             : static bool
     188       45076 : wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
     189             : {
     190       45076 :     return iswalnum_l((wint_t) wc, locale->info.lt);
     191             : }
     192             : 
     193             : static bool
     194        4112 : wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
     195             : {
     196        4112 :     return iswupper_l((wint_t) wc, locale->info.lt);
     197             : }
     198             : 
     199             : static bool
     200        4102 : wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
     201             : {
     202        4102 :     return iswlower_l((wint_t) wc, locale->info.lt);
     203             : }
     204             : 
     205             : static bool
     206        4102 : wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
     207             : {
     208        4102 :     return iswgraph_l((wint_t) wc, locale->info.lt);
     209             : }
     210             : 
     211             : static bool
     212        4102 : wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
     213             : {
     214        4102 :     return iswprint_l((wint_t) wc, locale->info.lt);
     215             : }
     216             : 
     217             : static bool
     218        4102 : wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
     219             : {
     220        4102 :     return iswpunct_l((wint_t) wc, locale->info.lt);
     221             : }
     222             : 
     223             : static bool
     224       47700 : wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
     225             : {
     226       47700 :     return iswspace_l((wint_t) wc, locale->info.lt);
     227             : }
     228             : 
     229             : static char
     230           0 : char_tolower_libc(unsigned char ch, pg_locale_t locale)
     231             : {
     232             :     Assert(pg_database_encoding_max_length() == 1);
     233           0 :     return tolower_l(ch, locale->info.lt);
     234             : }
     235             : 
     236             : static bool
     237           0 : char_is_cased_libc(char ch, pg_locale_t locale)
     238             : {
     239           0 :     bool        is_multibyte = pg_database_encoding_max_length() > 1;
     240             : 
     241           0 :     if (is_multibyte && IS_HIGHBIT_SET(ch))
     242           0 :         return true;
     243             :     else
     244           0 :         return isalpha_l((unsigned char) ch, locale->info.lt);
     245             : }
     246             : 
     247             : static pg_wchar
     248           0 : toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
     249             : {
     250             :     Assert(GetDatabaseEncoding() != PG_UTF8);
     251             : 
     252             :     /* force C behavior for ASCII characters, per comments above */
     253           0 :     if (locale->is_default && wc <= (pg_wchar) 127)
     254           0 :         return pg_ascii_toupper((unsigned char) wc);
     255           0 :     if (wc <= (pg_wchar) UCHAR_MAX)
     256           0 :         return toupper_l((unsigned char) wc, locale->info.lt);
     257             :     else
     258           0 :         return wc;
     259             : }
     260             : 
     261             : static pg_wchar
     262        9088 : toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
     263             : {
     264             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     265             : 
     266             :     /* force C behavior for ASCII characters, per comments above */
     267        9088 :     if (locale->is_default && wc <= (pg_wchar) 127)
     268         892 :         return pg_ascii_toupper((unsigned char) wc);
     269             :     if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
     270        8196 :         return towupper_l((wint_t) wc, locale->info.lt);
     271             :     else
     272             :         return wc;
     273             : }
     274             : 
     275             : static pg_wchar
     276           0 : tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
     277             : {
     278             :     Assert(GetDatabaseEncoding() != PG_UTF8);
     279             : 
     280             :     /* force C behavior for ASCII characters, per comments above */
     281           0 :     if (locale->is_default && wc <= (pg_wchar) 127)
     282           0 :         return pg_ascii_tolower((unsigned char) wc);
     283           0 :     if (wc <= (pg_wchar) UCHAR_MAX)
     284           0 :         return tolower_l((unsigned char) wc, locale->info.lt);
     285             :     else
     286           0 :         return wc;
     287             : }
     288             : 
     289             : static pg_wchar
     290        9092 : tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
     291             : {
     292             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     293             : 
     294             :     /* force C behavior for ASCII characters, per comments above */
     295        9092 :     if (locale->is_default && wc <= (pg_wchar) 127)
     296         896 :         return pg_ascii_tolower((unsigned char) wc);
     297             :     if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
     298        8196 :         return towlower_l((wint_t) wc, locale->info.lt);
     299             :     else
     300             :         return wc;
     301             : }
     302             : 
     303             : static const struct ctype_methods ctype_methods_libc_sb = {
     304             :     .strlower = strlower_libc_sb,
     305             :     .strtitle = strtitle_libc_sb,
     306             :     .strupper = strupper_libc_sb,
     307             :     .wc_isdigit = wc_isdigit_libc_sb,
     308             :     .wc_isalpha = wc_isalpha_libc_sb,
     309             :     .wc_isalnum = wc_isalnum_libc_sb,
     310             :     .wc_isupper = wc_isupper_libc_sb,
     311             :     .wc_islower = wc_islower_libc_sb,
     312             :     .wc_isgraph = wc_isgraph_libc_sb,
     313             :     .wc_isprint = wc_isprint_libc_sb,
     314             :     .wc_ispunct = wc_ispunct_libc_sb,
     315             :     .wc_isspace = wc_isspace_libc_sb,
     316             :     .char_is_cased = char_is_cased_libc,
     317             :     .char_tolower = char_tolower_libc,
     318             :     .wc_toupper = toupper_libc_sb,
     319             :     .wc_tolower = tolower_libc_sb,
     320             :     .max_chr = UCHAR_MAX,
     321             : };
     322             : 
     323             : /*
     324             :  * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
     325             :  * single-byte semantics for pattern matching.
     326             :  */
     327             : static const struct ctype_methods ctype_methods_libc_other_mb = {
     328             :     .strlower = strlower_libc_mb,
     329             :     .strtitle = strtitle_libc_mb,
     330             :     .strupper = strupper_libc_mb,
     331             :     .wc_isdigit = wc_isdigit_libc_sb,
     332             :     .wc_isalpha = wc_isalpha_libc_sb,
     333             :     .wc_isalnum = wc_isalnum_libc_sb,
     334             :     .wc_isupper = wc_isupper_libc_sb,
     335             :     .wc_islower = wc_islower_libc_sb,
     336             :     .wc_isgraph = wc_isgraph_libc_sb,
     337             :     .wc_isprint = wc_isprint_libc_sb,
     338             :     .wc_ispunct = wc_ispunct_libc_sb,
     339             :     .wc_isspace = wc_isspace_libc_sb,
     340             :     .char_is_cased = char_is_cased_libc,
     341             :     .char_tolower = char_tolower_libc,
     342             :     .wc_toupper = toupper_libc_sb,
     343             :     .wc_tolower = tolower_libc_sb,
     344             :     .max_chr = UCHAR_MAX,
     345             : };
     346             : 
     347             : static const struct ctype_methods ctype_methods_libc_utf8 = {
     348             :     .strlower = strlower_libc_mb,
     349             :     .strtitle = strtitle_libc_mb,
     350             :     .strupper = strupper_libc_mb,
     351             :     .wc_isdigit = wc_isdigit_libc_mb,
     352             :     .wc_isalpha = wc_isalpha_libc_mb,
     353             :     .wc_isalnum = wc_isalnum_libc_mb,
     354             :     .wc_isupper = wc_isupper_libc_mb,
     355             :     .wc_islower = wc_islower_libc_mb,
     356             :     .wc_isgraph = wc_isgraph_libc_mb,
     357             :     .wc_isprint = wc_isprint_libc_mb,
     358             :     .wc_ispunct = wc_ispunct_libc_mb,
     359             :     .wc_isspace = wc_isspace_libc_mb,
     360             :     .char_is_cased = char_is_cased_libc,
     361             :     .char_tolower = char_tolower_libc,
     362             :     .wc_toupper = toupper_libc_mb,
     363             :     .wc_tolower = tolower_libc_mb,
     364             : };
     365             : 
     366             : static const struct collate_methods collate_methods_libc = {
     367             :     .strncoll = strncoll_libc,
     368             :     .strnxfrm = strnxfrm_libc,
     369             :     .strnxfrm_prefix = NULL,
     370             : 
     371             :     /*
     372             :      * Unfortunately, it seems that strxfrm() for non-C collations is broken
     373             :      * on many common platforms; testing of multiple versions of glibc reveals
     374             :      * that, for many locales, strcoll() and strxfrm() do not return
     375             :      * consistent results. While no other libc other than Cygwin has so far
     376             :      * been shown to have a problem, we take the conservative course of action
     377             :      * for right now and disable this categorically.  (Users who are certain
     378             :      * this isn't a problem on their system can define TRUST_STRXFRM.)
     379             :      */
     380             : #ifdef TRUST_STRXFRM
     381             :     .strxfrm_is_safe = true,
     382             : #else
     383             :     .strxfrm_is_safe = false,
     384             : #endif
     385             : };
     386             : 
     387             : #ifdef WIN32
     388             : static const struct collate_methods collate_methods_libc_win32_utf8 = {
     389             :     .strncoll = strncoll_libc_win32_utf8,
     390             :     .strnxfrm = strnxfrm_libc,
     391             :     .strnxfrm_prefix = NULL,
     392             : #ifdef TRUST_STRXFRM
     393             :     .strxfrm_is_safe = true,
     394             : #else
     395             :     .strxfrm_is_safe = false,
     396             : #endif
     397             : };
     398             : #endif
     399             : 
     400             : static size_t
     401           0 : strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     402             :                  pg_locale_t locale)
     403             : {
     404           0 :     if (srclen < 0)
     405           0 :         srclen = strlen(src);
     406             : 
     407           0 :     if (srclen + 1 <= destsize)
     408             :     {
     409           0 :         locale_t    loc = locale->info.lt;
     410             :         char       *p;
     411             : 
     412           0 :         if (srclen + 1 > destsize)
     413           0 :             return srclen;
     414             : 
     415           0 :         memcpy(dest, src, srclen);
     416           0 :         dest[srclen] = '\0';
     417             : 
     418             :         /*
     419             :          * Note: we assume that tolower_l() will not be so broken as to need
     420             :          * an isupper_l() guard test.  When using the default collation, we
     421             :          * apply the traditional Postgres behavior that forces ASCII-style
     422             :          * treatment of I/i, but in non-default collations you get exactly
     423             :          * what the collation says.
     424             :          */
     425           0 :         for (p = dest; *p; p++)
     426             :         {
     427           0 :             if (locale->is_default)
     428           0 :                 *p = pg_tolower((unsigned char) *p);
     429             :             else
     430           0 :                 *p = tolower_l((unsigned char) *p, loc);
     431             :         }
     432             :     }
     433             : 
     434           0 :     return srclen;
     435             : }
     436             : 
     437             : static size_t
     438      422896 : strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     439             :                  pg_locale_t locale)
     440             : {
     441      422896 :     locale_t    loc = locale->info.lt;
     442             :     size_t      result_size;
     443             :     wchar_t    *workspace;
     444             :     char       *result;
     445             :     size_t      curr_char;
     446             :     size_t      max_size;
     447             : 
     448      422896 :     if (srclen < 0)
     449           0 :         srclen = strlen(src);
     450             : 
     451             :     /* Overflow paranoia */
     452      422896 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     453           0 :         ereport(ERROR,
     454             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     455             :                  errmsg("out of memory")));
     456             : 
     457             :     /* Output workspace cannot have more codes than input bytes */
     458      422896 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     459             : 
     460      422896 :     char2wchar(workspace, srclen + 1, src, srclen, locale);
     461             : 
     462     3643230 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     463     3220334 :         workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     464             : 
     465             :     /*
     466             :      * Make result large enough; case change might change number of bytes
     467             :      */
     468      422896 :     max_size = curr_char * pg_database_encoding_max_length();
     469      422896 :     result = palloc(max_size + 1);
     470             : 
     471      422896 :     result_size = wchar2char(result, workspace, max_size + 1, locale);
     472             : 
     473      422896 :     if (result_size + 1 > destsize)
     474           0 :         return result_size;
     475             : 
     476      422896 :     memcpy(dest, result, result_size);
     477      422896 :     dest[result_size] = '\0';
     478             : 
     479      422896 :     pfree(workspace);
     480      422896 :     pfree(result);
     481             : 
     482      422896 :     return result_size;
     483             : }
     484             : 
     485             : static size_t
     486           0 : strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     487             :                  pg_locale_t locale)
     488             : {
     489           0 :     if (srclen < 0)
     490           0 :         srclen = strlen(src);
     491             : 
     492           0 :     if (srclen + 1 <= destsize)
     493             :     {
     494           0 :         locale_t    loc = locale->info.lt;
     495           0 :         int         wasalnum = false;
     496             :         char       *p;
     497             : 
     498           0 :         memcpy(dest, src, srclen);
     499           0 :         dest[srclen] = '\0';
     500             : 
     501             :         /*
     502             :          * Note: we assume that toupper_l()/tolower_l() will not be so broken
     503             :          * as to need guard tests.  When using the default collation, we apply
     504             :          * the traditional Postgres behavior that forces ASCII-style treatment
     505             :          * of I/i, but in non-default collations you get exactly what the
     506             :          * collation says.
     507             :          */
     508           0 :         for (p = dest; *p; p++)
     509             :         {
     510           0 :             if (locale->is_default)
     511             :             {
     512           0 :                 if (wasalnum)
     513           0 :                     *p = pg_tolower((unsigned char) *p);
     514             :                 else
     515           0 :                     *p = pg_toupper((unsigned char) *p);
     516             :             }
     517             :             else
     518             :             {
     519           0 :                 if (wasalnum)
     520           0 :                     *p = tolower_l((unsigned char) *p, loc);
     521             :                 else
     522           0 :                     *p = toupper_l((unsigned char) *p, loc);
     523             :             }
     524           0 :             wasalnum = isalnum_l((unsigned char) *p, loc);
     525             :         }
     526             :     }
     527             : 
     528           0 :     return srclen;
     529             : }
     530             : 
     531             : static size_t
     532           8 : strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     533             :                  pg_locale_t locale)
     534             : {
     535           8 :     locale_t    loc = locale->info.lt;
     536           8 :     int         wasalnum = false;
     537             :     size_t      result_size;
     538             :     wchar_t    *workspace;
     539             :     char       *result;
     540             :     size_t      curr_char;
     541             :     size_t      max_size;
     542             : 
     543           8 :     if (srclen < 0)
     544           0 :         srclen = strlen(src);
     545             : 
     546             :     /* Overflow paranoia */
     547           8 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     548           0 :         ereport(ERROR,
     549             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     550             :                  errmsg("out of memory")));
     551             : 
     552             :     /* Output workspace cannot have more codes than input bytes */
     553           8 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     554             : 
     555           8 :     char2wchar(workspace, srclen + 1, src, srclen, locale);
     556             : 
     557          80 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     558             :     {
     559          72 :         if (wasalnum)
     560          56 :             workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     561             :         else
     562          16 :             workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     563          72 :         wasalnum = iswalnum_l(workspace[curr_char], loc);
     564             :     }
     565             : 
     566             :     /*
     567             :      * Make result large enough; case change might change number of bytes
     568             :      */
     569           8 :     max_size = curr_char * pg_database_encoding_max_length();
     570           8 :     result = palloc(max_size + 1);
     571             : 
     572           8 :     result_size = wchar2char(result, workspace, max_size + 1, locale);
     573             : 
     574           8 :     if (result_size + 1 > destsize)
     575           0 :         return result_size;
     576             : 
     577           8 :     memcpy(dest, result, result_size);
     578           8 :     dest[result_size] = '\0';
     579             : 
     580           8 :     pfree(workspace);
     581           8 :     pfree(result);
     582             : 
     583           8 :     return result_size;
     584             : }
     585             : 
     586             : static size_t
     587           0 : strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     588             :                  pg_locale_t locale)
     589             : {
     590           0 :     if (srclen < 0)
     591           0 :         srclen = strlen(src);
     592             : 
     593           0 :     if (srclen + 1 <= destsize)
     594             :     {
     595           0 :         locale_t    loc = locale->info.lt;
     596             :         char       *p;
     597             : 
     598           0 :         memcpy(dest, src, srclen);
     599           0 :         dest[srclen] = '\0';
     600             : 
     601             :         /*
     602             :          * Note: we assume that toupper_l() will not be so broken as to need
     603             :          * an islower_l() guard test.  When using the default collation, we
     604             :          * apply the traditional Postgres behavior that forces ASCII-style
     605             :          * treatment of I/i, but in non-default collations you get exactly
     606             :          * what the collation says.
     607             :          */
     608           0 :         for (p = dest; *p; p++)
     609             :         {
     610           0 :             if (locale->is_default)
     611           0 :                 *p = pg_toupper((unsigned char) *p);
     612             :             else
     613           0 :                 *p = toupper_l((unsigned char) *p, loc);
     614             :         }
     615             :     }
     616             : 
     617           0 :     return srclen;
     618             : }
     619             : 
     620             : static size_t
     621      717928 : strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     622             :                  pg_locale_t locale)
     623             : {
     624      717928 :     locale_t    loc = locale->info.lt;
     625             :     size_t      result_size;
     626             :     wchar_t    *workspace;
     627             :     char       *result;
     628             :     size_t      curr_char;
     629             :     size_t      max_size;
     630             : 
     631      717928 :     if (srclen < 0)
     632           0 :         srclen = strlen(src);
     633             : 
     634             :     /* Overflow paranoia */
     635      717928 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     636           0 :         ereport(ERROR,
     637             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     638             :                  errmsg("out of memory")));
     639             : 
     640             :     /* Output workspace cannot have more codes than input bytes */
     641      717928 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     642             : 
     643      717928 :     char2wchar(workspace, srclen + 1, src, srclen, locale);
     644             : 
     645     2358142 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     646     1640214 :         workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     647             : 
     648             :     /*
     649             :      * Make result large enough; case change might change number of bytes
     650             :      */
     651      717928 :     max_size = curr_char * pg_database_encoding_max_length();
     652      717928 :     result = palloc(max_size + 1);
     653             : 
     654      717928 :     result_size = wchar2char(result, workspace, max_size + 1, locale);
     655             : 
     656      717928 :     if (result_size + 1 > destsize)
     657           0 :         return result_size;
     658             : 
     659      717928 :     memcpy(dest, result, result_size);
     660      717928 :     dest[result_size] = '\0';
     661             : 
     662      717928 :     pfree(workspace);
     663      717928 :     pfree(result);
     664             : 
     665      717928 :     return result_size;
     666             : }
     667             : 
     668             : pg_locale_t
     669       33370 : create_pg_locale_libc(Oid collid, MemoryContext context)
     670             : {
     671             :     const char *collate;
     672             :     const char *ctype;
     673             :     locale_t    loc;
     674             :     pg_locale_t result;
     675             : 
     676       33370 :     if (collid == DEFAULT_COLLATION_OID)
     677             :     {
     678             :         HeapTuple   tp;
     679             :         Datum       datum;
     680             : 
     681       29340 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     682       29340 :         if (!HeapTupleIsValid(tp))
     683           0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     684       29340 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     685             :                                        Anum_pg_database_datcollate);
     686       29340 :         collate = TextDatumGetCString(datum);
     687       29340 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     688             :                                        Anum_pg_database_datctype);
     689       29340 :         ctype = TextDatumGetCString(datum);
     690             : 
     691       29340 :         ReleaseSysCache(tp);
     692             :     }
     693             :     else
     694             :     {
     695             :         HeapTuple   tp;
     696             :         Datum       datum;
     697             : 
     698        4030 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     699        4030 :         if (!HeapTupleIsValid(tp))
     700           0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     701             : 
     702        4030 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     703             :                                        Anum_pg_collation_collcollate);
     704        4030 :         collate = TextDatumGetCString(datum);
     705        4030 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     706             :                                        Anum_pg_collation_collctype);
     707        4030 :         ctype = TextDatumGetCString(datum);
     708             : 
     709        4030 :         ReleaseSysCache(tp);
     710             :     }
     711             : 
     712             : 
     713       33370 :     loc = make_libc_collator(collate, ctype);
     714             : 
     715       33370 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     716       33370 :     result->deterministic = true;
     717       61670 :     result->collate_is_c = (strcmp(collate, "C") == 0) ||
     718       28300 :         (strcmp(collate, "POSIX") == 0);
     719       61670 :     result->ctype_is_c = (strcmp(ctype, "C") == 0) ||
     720       28300 :         (strcmp(ctype, "POSIX") == 0);
     721       33370 :     result->info.lt = loc;
     722       33370 :     if (!result->collate_is_c)
     723             :     {
     724             : #ifdef WIN32
     725             :         if (GetDatabaseEncoding() == PG_UTF8)
     726             :             result->collate = &collate_methods_libc_win32_utf8;
     727             :         else
     728             : #endif
     729       28236 :             result->collate = &collate_methods_libc;
     730             :     }
     731       33370 :     if (!result->ctype_is_c)
     732             :     {
     733       28236 :         if (GetDatabaseEncoding() == PG_UTF8)
     734       28172 :             result->ctype = &ctype_methods_libc_utf8;
     735          64 :         else if (pg_database_encoding_max_length() > 1)
     736           0 :             result->ctype = &ctype_methods_libc_other_mb;
     737             :         else
     738          64 :             result->ctype = &ctype_methods_libc_sb;
     739             :     }
     740             : 
     741       33370 :     return result;
     742             : }
     743             : 
     744             : /*
     745             :  * Create a locale_t with the given collation and ctype.
     746             :  *
     747             :  * The "C" and "POSIX" locales are not actually handled by libc, so return
     748             :  * NULL.
     749             :  *
     750             :  * Ensure that no path leaks a locale_t.
     751             :  */
     752             : static locale_t
     753       33370 : make_libc_collator(const char *collate, const char *ctype)
     754             : {
     755       33370 :     locale_t    loc = 0;
     756             : 
     757       33370 :     if (strcmp(collate, ctype) == 0)
     758             :     {
     759       33370 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     760             :         {
     761             :             /* Normal case where they're the same */
     762       28236 :             errno = 0;
     763             : #ifndef WIN32
     764       28236 :             loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
     765             :                             NULL);
     766             : #else
     767             :             loc = _create_locale(LC_ALL, collate);
     768             : #endif
     769       28236 :             if (!loc)
     770           0 :                 report_newlocale_failure(collate);
     771             :         }
     772             :     }
     773             :     else
     774             :     {
     775             : #ifndef WIN32
     776             :         /* We need two newlocale() steps */
     777           0 :         locale_t    loc1 = 0;
     778             : 
     779           0 :         if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
     780             :         {
     781           0 :             errno = 0;
     782           0 :             loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
     783           0 :             if (!loc1)
     784           0 :                 report_newlocale_failure(collate);
     785             :         }
     786             : 
     787           0 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     788             :         {
     789           0 :             errno = 0;
     790           0 :             loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
     791           0 :             if (!loc)
     792             :             {
     793           0 :                 if (loc1)
     794           0 :                     freelocale(loc1);
     795           0 :                 report_newlocale_failure(ctype);
     796             :             }
     797             :         }
     798             :         else
     799           0 :             loc = loc1;
     800             : #else
     801             : 
     802             :         /*
     803             :          * XXX The _create_locale() API doesn't appear to support this. Could
     804             :          * perhaps be worked around by changing pg_locale_t to contain two
     805             :          * separate fields.
     806             :          */
     807             :         ereport(ERROR,
     808             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     809             :                  errmsg("collations with different collate and ctype values are not supported on this platform")));
     810             : #endif
     811             :     }
     812             : 
     813       33370 :     return loc;
     814             : }
     815             : 
     816             : /*
     817             :  * strncoll_libc
     818             :  *
     819             :  * NUL-terminate arguments, if necessary, and pass to strcoll_l().
     820             :  *
     821             :  * An input string length of -1 means that it's already NUL-terminated.
     822             :  */
     823             : int
     824    29417928 : strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
     825             :               pg_locale_t locale)
     826             : {
     827             :     char        sbuf[TEXTBUFLEN];
     828    29417928 :     char       *buf = sbuf;
     829    29417928 :     size_t      bufsize1 = (len1 == -1) ? 0 : len1 + 1;
     830    29417928 :     size_t      bufsize2 = (len2 == -1) ? 0 : len2 + 1;
     831             :     const char *arg1n;
     832             :     const char *arg2n;
     833             :     int         result;
     834             : 
     835    29417928 :     if (bufsize1 + bufsize2 > TEXTBUFLEN)
     836         568 :         buf = palloc(bufsize1 + bufsize2);
     837             : 
     838             :     /* nul-terminate arguments if necessary */
     839    29417928 :     if (len1 == -1)
     840             :     {
     841    24977486 :         arg1n = arg1;
     842             :     }
     843             :     else
     844             :     {
     845     4440442 :         char       *buf1 = buf;
     846             : 
     847     4440442 :         memcpy(buf1, arg1, len1);
     848     4440442 :         buf1[len1] = '\0';
     849     4440442 :         arg1n = buf1;
     850             :     }
     851             : 
     852    29417928 :     if (len2 == -1)
     853             :     {
     854    24977486 :         arg2n = arg2;
     855             :     }
     856             :     else
     857             :     {
     858     4440442 :         char       *buf2 = buf + bufsize1;
     859             : 
     860     4440442 :         memcpy(buf2, arg2, len2);
     861     4440442 :         buf2[len2] = '\0';
     862     4440442 :         arg2n = buf2;
     863             :     }
     864             : 
     865    29417928 :     result = strcoll_l(arg1n, arg2n, locale->info.lt);
     866             : 
     867    29417928 :     if (buf != sbuf)
     868         568 :         pfree(buf);
     869             : 
     870    29417928 :     return result;
     871             : }
     872             : 
     873             : /*
     874             :  * strnxfrm_libc
     875             :  *
     876             :  * NUL-terminate src, if necessary, and pass to strxfrm_l().
     877             :  *
     878             :  * A source length of -1 means that it's already NUL-terminated.
     879             :  */
     880             : size_t
     881         144 : strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
     882             :               pg_locale_t locale)
     883             : {
     884             :     char        sbuf[TEXTBUFLEN];
     885         144 :     char       *buf = sbuf;
     886         144 :     size_t      bufsize = srclen + 1;
     887             :     size_t      result;
     888             : 
     889         144 :     if (srclen == -1)
     890         144 :         return strxfrm_l(dest, src, destsize, locale->info.lt);
     891             : 
     892           0 :     if (bufsize > TEXTBUFLEN)
     893           0 :         buf = palloc(bufsize);
     894             : 
     895             :     /* nul-terminate argument */
     896           0 :     memcpy(buf, src, srclen);
     897           0 :     buf[srclen] = '\0';
     898             : 
     899           0 :     result = strxfrm_l(dest, buf, destsize, locale->info.lt);
     900             : 
     901           0 :     if (buf != sbuf)
     902           0 :         pfree(buf);
     903             : 
     904             :     /* if dest is defined, it should be nul-terminated */
     905             :     Assert(result >= destsize || dest[result] == '\0');
     906             : 
     907           0 :     return result;
     908             : }
     909             : 
     910             : char *
     911       28826 : get_collation_actual_version_libc(const char *collcollate)
     912             : {
     913       28826 :     char       *collversion = NULL;
     914             : 
     915       57472 :     if (pg_strcasecmp("C", collcollate) != 0 &&
     916       57100 :         pg_strncasecmp("C.", collcollate, 2) != 0 &&
     917       28454 :         pg_strcasecmp("POSIX", collcollate) != 0)
     918             :     {
     919             : #if defined(__GLIBC__)
     920             :         /* Use the glibc version because we don't have anything better. */
     921       28428 :         collversion = pstrdup(gnu_get_libc_version());
     922             : #elif defined(LC_VERSION_MASK)
     923             :         locale_t    loc;
     924             : 
     925             :         /* Look up FreeBSD collation version. */
     926             :         loc = newlocale(LC_COLLATE_MASK, collcollate, NULL);
     927             :         if (loc)
     928             :         {
     929             :             collversion =
     930             :                 pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
     931             :             freelocale(loc);
     932             :         }
     933             :         else
     934             :             ereport(ERROR,
     935             :                     (errmsg("could not load locale \"%s\"", collcollate)));
     936             : #elif defined(WIN32)
     937             :         /*
     938             :          * If we are targeting Windows Vista and above, we can ask for a name
     939             :          * given a collation name (earlier versions required a location code
     940             :          * that we don't have).
     941             :          */
     942             :         NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
     943             :         WCHAR       wide_collcollate[LOCALE_NAME_MAX_LENGTH];
     944             : 
     945             :         MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
     946             :                             LOCALE_NAME_MAX_LENGTH);
     947             :         if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
     948             :         {
     949             :             /*
     950             :              * GetNLSVersionEx() wants a language tag such as "en-US", not a
     951             :              * locale name like "English_United States.1252".  Until those
     952             :              * values can be prevented from entering the system, or 100%
     953             :              * reliably converted to the more useful tag format, tolerate the
     954             :              * resulting error and report that we have no version data.
     955             :              */
     956             :             if (GetLastError() == ERROR_INVALID_PARAMETER)
     957             :                 return NULL;
     958             : 
     959             :             ereport(ERROR,
     960             :                     (errmsg("could not get collation version for locale \"%s\": error code %lu",
     961             :                             collcollate,
     962             :                             GetLastError())));
     963             :         }
     964             :         collversion = psprintf("%lu.%lu,%lu.%lu",
     965             :                                (version.dwNLSVersion >> 8) & 0xFFFF,
     966             :                                version.dwNLSVersion & 0xFF,
     967             :                                (version.dwDefinedVersion >> 8) & 0xFFFF,
     968             :                                version.dwDefinedVersion & 0xFF);
     969             : #endif
     970             :     }
     971             : 
     972       28826 :     return collversion;
     973             : }
     974             : 
     975             : /*
     976             :  * strncoll_libc_win32_utf8
     977             :  *
     978             :  * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
     979             :  * invoke wcscoll_l().
     980             :  *
     981             :  * An input string length of -1 means that it's NUL-terminated.
     982             :  */
     983             : #ifdef WIN32
     984             : static int
     985             : strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
     986             :                          ssize_t len2, pg_locale_t locale)
     987             : {
     988             :     char        sbuf[TEXTBUFLEN];
     989             :     char       *buf = sbuf;
     990             :     char       *a1p,
     991             :                *a2p;
     992             :     int         a1len;
     993             :     int         a2len;
     994             :     int         r;
     995             :     int         result;
     996             : 
     997             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     998             : 
     999             :     if (len1 == -1)
    1000             :         len1 = strlen(arg1);
    1001             :     if (len2 == -1)
    1002             :         len2 = strlen(arg2);
    1003             : 
    1004             :     a1len = len1 * 2 + 2;
    1005             :     a2len = len2 * 2 + 2;
    1006             : 
    1007             :     if (a1len + a2len > TEXTBUFLEN)
    1008             :         buf = palloc(a1len + a2len);
    1009             : 
    1010             :     a1p = buf;
    1011             :     a2p = buf + a1len;
    1012             : 
    1013             :     /* API does not work for zero-length input */
    1014             :     if (len1 == 0)
    1015             :         r = 0;
    1016             :     else
    1017             :     {
    1018             :         r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
    1019             :                                 (LPWSTR) a1p, a1len / 2);
    1020             :         if (!r)
    1021             :             ereport(ERROR,
    1022             :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1023             :                             GetLastError())));
    1024             :     }
    1025             :     ((LPWSTR) a1p)[r] = 0;
    1026             : 
    1027             :     if (len2 == 0)
    1028             :         r = 0;
    1029             :     else
    1030             :     {
    1031             :         r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
    1032             :                                 (LPWSTR) a2p, a2len / 2);
    1033             :         if (!r)
    1034             :             ereport(ERROR,
    1035             :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1036             :                             GetLastError())));
    1037             :     }
    1038             :     ((LPWSTR) a2p)[r] = 0;
    1039             : 
    1040             :     errno = 0;
    1041             :     result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
    1042             :     if (result == 2147483647)   /* _NLSCMPERROR; missing from mingw headers */
    1043             :         ereport(ERROR,
    1044             :                 (errmsg("could not compare Unicode strings: %m")));
    1045             : 
    1046             :     if (buf != sbuf)
    1047             :         pfree(buf);
    1048             : 
    1049             :     return result;
    1050             : }
    1051             : #endif                          /* WIN32 */
    1052             : 
    1053             : /* simple subroutine for reporting errors from newlocale() */
    1054             : void
    1055           0 : report_newlocale_failure(const char *localename)
    1056             : {
    1057             :     int         save_errno;
    1058             : 
    1059             :     /*
    1060             :      * Windows doesn't provide any useful error indication from
    1061             :      * _create_locale(), and BSD-derived platforms don't seem to feel they
    1062             :      * need to set errno either (even though POSIX is pretty clear that
    1063             :      * newlocale should do so).  So, if errno hasn't been set, assume ENOENT
    1064             :      * is what to report.
    1065             :      */
    1066           0 :     if (errno == 0)
    1067           0 :         errno = ENOENT;
    1068             : 
    1069             :     /*
    1070             :      * ENOENT means "no such locale", not "no such file", so clarify that
    1071             :      * errno with an errdetail message.
    1072             :      */
    1073           0 :     save_errno = errno;         /* auxiliary funcs might change errno */
    1074           0 :     ereport(ERROR,
    1075             :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1076             :              errmsg("could not create locale \"%s\": %m",
    1077             :                     localename),
    1078             :              (save_errno == ENOENT ?
    1079             :               errdetail("The operating system could not find any locale data for the locale name \"%s\".",
    1080             :                         localename) : 0)));
    1081             : }
    1082             : 
    1083             : /*
    1084             :  * POSIX doesn't define _l-variants of these functions, but several systems
    1085             :  * have them.  We provide our own replacements here.
    1086             :  */
    1087             : #ifndef HAVE_MBSTOWCS_L
    1088             : static size_t
    1089     1140832 : mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
    1090             : {
    1091             : #ifdef WIN32
    1092             :     return _mbstowcs_l(dest, src, n, loc);
    1093             : #else
    1094             :     size_t      result;
    1095     1140832 :     locale_t    save_locale = uselocale(loc);
    1096             : 
    1097     1140832 :     result = mbstowcs(dest, src, n);
    1098     1140832 :     uselocale(save_locale);
    1099     1140832 :     return result;
    1100             : #endif
    1101             : }
    1102             : #endif
    1103             : #ifndef HAVE_WCSTOMBS_L
    1104             : static size_t
    1105     1140832 : wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
    1106             : {
    1107             : #ifdef WIN32
    1108             :     return _wcstombs_l(dest, src, n, loc);
    1109             : #else
    1110             :     size_t      result;
    1111     1140832 :     locale_t    save_locale = uselocale(loc);
    1112             : 
    1113     1140832 :     result = wcstombs(dest, src, n);
    1114     1140832 :     uselocale(save_locale);
    1115     1140832 :     return result;
    1116             : #endif
    1117             : }
    1118             : #endif
    1119             : 
    1120             : /*
    1121             :  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
    1122             :  * Therefore we keep them here rather than with the mbutils code.
    1123             :  */
    1124             : 
    1125             : /*
    1126             :  * wchar2char --- convert wide characters to multibyte format
    1127             :  *
    1128             :  * This has the same API as the standard wcstombs_l() function; in particular,
    1129             :  * tolen is the maximum number of bytes to store at *to, and *from must be
    1130             :  * zero-terminated.  The output will be zero-terminated iff there is room.
    1131             :  */
    1132             : size_t
    1133     1140832 : wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
    1134             : {
    1135             :     size_t      result;
    1136             : 
    1137     1140832 :     if (tolen == 0)
    1138           0 :         return 0;
    1139             : 
    1140             : #ifdef WIN32
    1141             : 
    1142             :     /*
    1143             :      * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
    1144             :      * for some reason mbstowcs and wcstombs won't do this for us, so we use
    1145             :      * MultiByteToWideChar().
    1146             :      */
    1147             :     if (GetDatabaseEncoding() == PG_UTF8)
    1148             :     {
    1149             :         result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
    1150             :                                      NULL, NULL);
    1151             :         /* A zero return is failure */
    1152             :         if (result <= 0)
    1153             :             result = -1;
    1154             :         else
    1155             :         {
    1156             :             Assert(result <= tolen);
    1157             :             /* Microsoft counts the zero terminator in the result */
    1158             :             result--;
    1159             :         }
    1160             :     }
    1161             :     else
    1162             : #endif                          /* WIN32 */
    1163     1140832 :     if (locale == (pg_locale_t) 0)
    1164             :     {
    1165             :         /* Use wcstombs directly for the default locale */
    1166           0 :         result = wcstombs(to, from, tolen);
    1167             :     }
    1168             :     else
    1169             :     {
    1170             :         /* Use wcstombs_l for nondefault locales */
    1171     1140832 :         result = wcstombs_l(to, from, tolen, locale->info.lt);
    1172             :     }
    1173             : 
    1174     1140832 :     return result;
    1175             : }
    1176             : 
    1177             : /*
    1178             :  * char2wchar --- convert multibyte characters to wide characters
    1179             :  *
    1180             :  * This has almost the API of mbstowcs_l(), except that *from need not be
    1181             :  * null-terminated; instead, the number of input bytes is specified as
    1182             :  * fromlen.  Also, we ereport() rather than returning -1 for invalid
    1183             :  * input encoding.  tolen is the maximum number of wchar_t's to store at *to.
    1184             :  * The output will be zero-terminated iff there is room.
    1185             :  */
    1186             : size_t
    1187     1144004 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
    1188             :            pg_locale_t locale)
    1189             : {
    1190             :     size_t      result;
    1191             : 
    1192     1144004 :     if (tolen == 0)
    1193           0 :         return 0;
    1194             : 
    1195             : #ifdef WIN32
    1196             :     /* See WIN32 "Unicode" comment above */
    1197             :     if (GetDatabaseEncoding() == PG_UTF8)
    1198             :     {
    1199             :         /* Win32 API does not work for zero-length input */
    1200             :         if (fromlen == 0)
    1201             :             result = 0;
    1202             :         else
    1203             :         {
    1204             :             result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
    1205             :             /* A zero return is failure */
    1206             :             if (result == 0)
    1207             :                 result = -1;
    1208             :         }
    1209             : 
    1210             :         if (result != -1)
    1211             :         {
    1212             :             Assert(result < tolen);
    1213             :             /* Append trailing null wchar (MultiByteToWideChar() does not) */
    1214             :             to[result] = 0;
    1215             :         }
    1216             :     }
    1217             :     else
    1218             : #endif                          /* WIN32 */
    1219             :     {
    1220             :         /* mbstowcs requires ending '\0' */
    1221     1144004 :         char       *str = pnstrdup(from, fromlen);
    1222             : 
    1223     1144004 :         if (locale == (pg_locale_t) 0)
    1224             :         {
    1225             :             /* Use mbstowcs directly for the default locale */
    1226        3172 :             result = mbstowcs(to, str, tolen);
    1227             :         }
    1228             :         else
    1229             :         {
    1230             :             /* Use mbstowcs_l for nondefault locales */
    1231     1140832 :             result = mbstowcs_l(to, str, tolen, locale->info.lt);
    1232             :         }
    1233             : 
    1234     1144004 :         pfree(str);
    1235             :     }
    1236             : 
    1237     1144004 :     if (result == -1)
    1238             :     {
    1239             :         /*
    1240             :          * Invalid multibyte character encountered.  We try to give a useful
    1241             :          * error message by letting pg_verifymbstr check the string.  But it's
    1242             :          * possible that the string is OK to us, and not OK to mbstowcs ---
    1243             :          * this suggests that the LC_CTYPE locale is different from the
    1244             :          * database encoding.  Give a generic error message if pg_verifymbstr
    1245             :          * can't find anything wrong.
    1246             :          */
    1247           0 :         pg_verifymbstr(from, fromlen, false);   /* might not return */
    1248             :         /* but if it does ... */
    1249           0 :         ereport(ERROR,
    1250             :                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
    1251             :                  errmsg("invalid multibyte character for locale"),
    1252             :                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
    1253             :     }
    1254             : 
    1255     1144004 :     return result;
    1256             : }

Generated by: LCOV version 1.16