LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_libc.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 180 309 58.3 %
Date: 2025-10-23 03:17:20 Functions: 24 42 57.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-----------------------------------------------------------------------
       2             :  *
       3             :  * PostgreSQL locale utilities for libc
       4             :  *
       5             :  * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
       6             :  *
       7             :  * src/backend/utils/adt/pg_locale_libc.c
       8             :  *
       9             :  *-----------------------------------------------------------------------
      10             :  */
      11             : 
      12             : #include "postgres.h"
      13             : 
      14             : #include <limits.h>
      15             : #include <wctype.h>
      16             : 
      17             : #include "access/htup_details.h"
      18             : #include "catalog/pg_database.h"
      19             : #include "catalog/pg_collation.h"
      20             : #include "mb/pg_wchar.h"
      21             : #include "miscadmin.h"
      22             : #include "utils/builtins.h"
      23             : #include "utils/formatting.h"
      24             : #include "utils/memutils.h"
      25             : #include "utils/pg_locale.h"
      26             : #include "utils/syscache.h"
      27             : 
      28             : #ifdef __GLIBC__
      29             : #include <gnu/libc-version.h>
      30             : #endif
      31             : 
      32             : #ifdef WIN32
      33             : #include <shlwapi.h>
      34             : #endif
      35             : 
      36             : /*
      37             :  * For the libc provider, to provide as much functionality as possible on a
      38             :  * variety of platforms without going so far as to implement everything from
      39             :  * scratch, we use several implementation strategies depending on the
      40             :  * situation:
      41             :  *
      42             :  * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
      43             :  * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
      44             :  * collations don't give a fig about multibyte characters.
      45             :  *
      46             :  * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
      47             :  * This assumes that every platform uses Unicode codepoints directly
      48             :  * as the wchar_t representation of Unicode.  (XXX: ICU makes this assumption
      49             :  * even for non-UTF8 encodings, which may be a problem.)  On some platforms
      50             :  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
      51             :  *
      52             :  * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
      53             :  * values up to 255, and punt for values above that.  This is 100% correct
      54             :  * only in single-byte encodings such as LATINn.  However, non-Unicode
      55             :  * multibyte encodings are mostly Far Eastern character sets for which the
      56             :  * properties being tested here aren't very relevant for higher code values
      57             :  * anyway.  The difficulty with using the <wctype.h> functions with
      58             :  * non-Unicode multibyte encodings is that we can have no certainty that
      59             :  * the platform's wchar_t representation matches what we do in pg_wchar
      60             :  * conversions.
      61             :  *
      62             :  * As a special case, in the "default" collation, (2) and (3) force ASCII
      63             :  * letters to follow ASCII upcase/downcase rules, while in a non-default
      64             :  * collation we just let the library functions do what they will.  The case
      65             :  * where this matters is treatment of I/i in Turkish, and the behavior is
      66             :  * meant to match the upper()/lower() SQL functions.
      67             :  *
      68             :  * We store the active collation setting in static variables.  In principle
      69             :  * it could be passed down to here via the regex library's "struct vars" data
      70             :  * structure; but that would require somewhat invasive changes in the regex
      71             :  * library, and right now there's no real benefit to be gained from that.
      72             :  *
      73             :  * NB: the coding here assumes pg_wchar is an unsigned type.
      74             :  */
      75             : 
      76             : /*
      77             :  * Size of stack buffer to use for string transformations, used to avoid heap
      78             :  * allocations in typical cases. This should be large enough that most strings
      79             :  * will fit, but small enough that we feel comfortable putting it on the
      80             :  * stack.
      81             :  */
      82             : #define     TEXTBUFLEN          1024
      83             : 
      84             : extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
      85             : 
      86             : static int  strncoll_libc(const char *arg1, ssize_t len1,
      87             :                           const char *arg2, ssize_t len2,
      88             :                           pg_locale_t locale);
      89             : static size_t strnxfrm_libc(char *dest, size_t destsize,
      90             :                             const char *src, ssize_t srclen,
      91             :                             pg_locale_t locale);
      92             : extern char *get_collation_actual_version_libc(const char *collcollate);
      93             : static locale_t make_libc_collator(const char *collate,
      94             :                                    const char *ctype);
      95             : 
      96             : #ifdef WIN32
      97             : static int  strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
      98             :                                      const char *arg2, ssize_t len2,
      99             :                                      pg_locale_t locale);
     100             : #endif
     101             : 
     102             : static size_t char2wchar(wchar_t *to, size_t tolen, const char *from,
     103             :                          size_t fromlen, locale_t loc);
     104             : 
     105             : static size_t strlower_libc_sb(char *dest, size_t destsize,
     106             :                                const char *src, ssize_t srclen,
     107             :                                pg_locale_t locale);
     108             : static size_t strlower_libc_mb(char *dest, size_t destsize,
     109             :                                const char *src, ssize_t srclen,
     110             :                                pg_locale_t locale);
     111             : static size_t strtitle_libc_sb(char *dest, size_t destsize,
     112             :                                const char *src, ssize_t srclen,
     113             :                                pg_locale_t locale);
     114             : static size_t strtitle_libc_mb(char *dest, size_t destsize,
     115             :                                const char *src, ssize_t srclen,
     116             :                                pg_locale_t locale);
     117             : static size_t strupper_libc_sb(char *dest, size_t destsize,
     118             :                                const char *src, ssize_t srclen,
     119             :                                pg_locale_t locale);
     120             : static size_t strupper_libc_mb(char *dest, size_t destsize,
     121             :                                const char *src, ssize_t srclen,
     122             :                                pg_locale_t locale);
     123             : 
     124             : static bool
     125           0 : wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
     126             : {
     127           0 :     return isdigit_l((unsigned char) wc, locale->lt);
     128             : }
     129             : 
     130             : static bool
     131           0 : wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
     132             : {
     133           0 :     return isalpha_l((unsigned char) wc, locale->lt);
     134             : }
     135             : 
     136             : static bool
     137           0 : wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
     138             : {
     139           0 :     return isalnum_l((unsigned char) wc, locale->lt);
     140             : }
     141             : 
     142             : static bool
     143           0 : wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
     144             : {
     145           0 :     return isupper_l((unsigned char) wc, locale->lt);
     146             : }
     147             : 
     148             : static bool
     149           0 : wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
     150             : {
     151           0 :     return islower_l((unsigned char) wc, locale->lt);
     152             : }
     153             : 
     154             : static bool
     155           0 : wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
     156             : {
     157           0 :     return isgraph_l((unsigned char) wc, locale->lt);
     158             : }
     159             : 
     160             : static bool
     161           0 : wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
     162             : {
     163           0 :     return isprint_l((unsigned char) wc, locale->lt);
     164             : }
     165             : 
     166             : static bool
     167           0 : wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
     168             : {
     169           0 :     return ispunct_l((unsigned char) wc, locale->lt);
     170             : }
     171             : 
     172             : static bool
     173           0 : wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
     174             : {
     175           0 :     return isspace_l((unsigned char) wc, locale->lt);
     176             : }
     177             : 
     178             : static bool
     179           0 : wc_isxdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
     180             : {
     181             : #ifndef WIN32
     182           0 :     return isxdigit_l((unsigned char) wc, locale->lt);
     183             : #else
     184             :     return _isxdigit_l((unsigned char) wc, locale->lt);
     185             : #endif
     186             : }
     187             : 
     188             : static bool
     189      131608 : wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
     190             : {
     191      131608 :     return iswdigit_l((wint_t) wc, locale->lt);
     192             : }
     193             : 
     194             : static bool
     195       81148 : wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
     196             : {
     197       81148 :     return iswalpha_l((wint_t) wc, locale->lt);
     198             : }
     199             : 
     200             : static bool
     201     2845650 : wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
     202             : {
     203     2845650 :     return iswalnum_l((wint_t) wc, locale->lt);
     204             : }
     205             : 
     206             : static bool
     207        4112 : wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
     208             : {
     209        4112 :     return iswupper_l((wint_t) wc, locale->lt);
     210             : }
     211             : 
     212             : static bool
     213        4102 : wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
     214             : {
     215        4102 :     return iswlower_l((wint_t) wc, locale->lt);
     216             : }
     217             : 
     218             : static bool
     219        4102 : wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
     220             : {
     221        4102 :     return iswgraph_l((wint_t) wc, locale->lt);
     222             : }
     223             : 
     224             : static bool
     225        4102 : wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
     226             : {
     227        4102 :     return iswprint_l((wint_t) wc, locale->lt);
     228             : }
     229             : 
     230             : static bool
     231        4102 : wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
     232             : {
     233        4102 :     return iswpunct_l((wint_t) wc, locale->lt);
     234             : }
     235             : 
     236             : static bool
     237       48152 : wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
     238             : {
     239       48152 :     return iswspace_l((wint_t) wc, locale->lt);
     240             : }
     241             : 
     242             : static bool
     243          12 : wc_isxdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
     244             : {
     245             : #ifndef WIN32
     246          12 :     return iswxdigit_l((wint_t) wc, locale->lt);
     247             : #else
     248             :     return _iswxdigit_l((wint_t) wc, locale->lt);
     249             : #endif
     250             : }
     251             : 
     252             : static char
     253           0 : char_tolower_libc(unsigned char ch, pg_locale_t locale)
     254             : {
     255             :     Assert(pg_database_encoding_max_length() == 1);
     256           0 :     return tolower_l(ch, locale->lt);
     257             : }
     258             : 
     259             : static bool
     260           0 : char_is_cased_libc(char ch, pg_locale_t locale)
     261             : {
     262           0 :     bool        is_multibyte = pg_database_encoding_max_length() > 1;
     263             : 
     264           0 :     if (is_multibyte && IS_HIGHBIT_SET(ch))
     265           0 :         return true;
     266             :     else
     267           0 :         return isalpha_l((unsigned char) ch, locale->lt);
     268             : }
     269             : 
     270             : static pg_wchar
     271           0 : toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
     272             : {
     273             :     Assert(GetDatabaseEncoding() != PG_UTF8);
     274             : 
     275             :     /* force C behavior for ASCII characters, per comments above */
     276           0 :     if (locale->is_default && wc <= (pg_wchar) 127)
     277           0 :         return pg_ascii_toupper((unsigned char) wc);
     278           0 :     if (wc <= (pg_wchar) UCHAR_MAX)
     279           0 :         return toupper_l((unsigned char) wc, locale->lt);
     280             :     else
     281           0 :         return wc;
     282             : }
     283             : 
     284             : static pg_wchar
     285        9088 : toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
     286             : {
     287             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     288             : 
     289             :     /* force C behavior for ASCII characters, per comments above */
     290        9088 :     if (locale->is_default && wc <= (pg_wchar) 127)
     291         892 :         return pg_ascii_toupper((unsigned char) wc);
     292             :     if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
     293        8196 :         return towupper_l((wint_t) wc, locale->lt);
     294             :     else
     295             :         return wc;
     296             : }
     297             : 
     298             : static pg_wchar
     299           0 : tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
     300             : {
     301             :     Assert(GetDatabaseEncoding() != PG_UTF8);
     302             : 
     303             :     /* force C behavior for ASCII characters, per comments above */
     304           0 :     if (locale->is_default && wc <= (pg_wchar) 127)
     305           0 :         return pg_ascii_tolower((unsigned char) wc);
     306           0 :     if (wc <= (pg_wchar) UCHAR_MAX)
     307           0 :         return tolower_l((unsigned char) wc, locale->lt);
     308             :     else
     309           0 :         return wc;
     310             : }
     311             : 
     312             : static pg_wchar
     313        9092 : tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
     314             : {
     315             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     316             : 
     317             :     /* force C behavior for ASCII characters, per comments above */
     318        9092 :     if (locale->is_default && wc <= (pg_wchar) 127)
     319         896 :         return pg_ascii_tolower((unsigned char) wc);
     320             :     if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
     321        8196 :         return towlower_l((wint_t) wc, locale->lt);
     322             :     else
     323             :         return wc;
     324             : }
     325             : 
     326             : static const struct ctype_methods ctype_methods_libc_sb = {
     327             :     .strlower = strlower_libc_sb,
     328             :     .strtitle = strtitle_libc_sb,
     329             :     .strupper = strupper_libc_sb,
     330             :     .wc_isdigit = wc_isdigit_libc_sb,
     331             :     .wc_isalpha = wc_isalpha_libc_sb,
     332             :     .wc_isalnum = wc_isalnum_libc_sb,
     333             :     .wc_isupper = wc_isupper_libc_sb,
     334             :     .wc_islower = wc_islower_libc_sb,
     335             :     .wc_isgraph = wc_isgraph_libc_sb,
     336             :     .wc_isprint = wc_isprint_libc_sb,
     337             :     .wc_ispunct = wc_ispunct_libc_sb,
     338             :     .wc_isspace = wc_isspace_libc_sb,
     339             :     .wc_isxdigit = wc_isxdigit_libc_sb,
     340             :     .char_is_cased = char_is_cased_libc,
     341             :     .char_tolower = char_tolower_libc,
     342             :     .wc_toupper = toupper_libc_sb,
     343             :     .wc_tolower = tolower_libc_sb,
     344             :     .max_chr = UCHAR_MAX,
     345             : };
     346             : 
     347             : /*
     348             :  * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
     349             :  * single-byte semantics for pattern matching.
     350             :  */
     351             : static const struct ctype_methods ctype_methods_libc_other_mb = {
     352             :     .strlower = strlower_libc_mb,
     353             :     .strtitle = strtitle_libc_mb,
     354             :     .strupper = strupper_libc_mb,
     355             :     .wc_isdigit = wc_isdigit_libc_sb,
     356             :     .wc_isalpha = wc_isalpha_libc_sb,
     357             :     .wc_isalnum = wc_isalnum_libc_sb,
     358             :     .wc_isupper = wc_isupper_libc_sb,
     359             :     .wc_islower = wc_islower_libc_sb,
     360             :     .wc_isgraph = wc_isgraph_libc_sb,
     361             :     .wc_isprint = wc_isprint_libc_sb,
     362             :     .wc_ispunct = wc_ispunct_libc_sb,
     363             :     .wc_isspace = wc_isspace_libc_sb,
     364             :     .wc_isxdigit = wc_isxdigit_libc_sb,
     365             :     .char_is_cased = char_is_cased_libc,
     366             :     .char_tolower = char_tolower_libc,
     367             :     .wc_toupper = toupper_libc_sb,
     368             :     .wc_tolower = tolower_libc_sb,
     369             :     .max_chr = UCHAR_MAX,
     370             : };
     371             : 
     372             : static const struct ctype_methods ctype_methods_libc_utf8 = {
     373             :     .strlower = strlower_libc_mb,
     374             :     .strtitle = strtitle_libc_mb,
     375             :     .strupper = strupper_libc_mb,
     376             :     .wc_isdigit = wc_isdigit_libc_mb,
     377             :     .wc_isalpha = wc_isalpha_libc_mb,
     378             :     .wc_isalnum = wc_isalnum_libc_mb,
     379             :     .wc_isupper = wc_isupper_libc_mb,
     380             :     .wc_islower = wc_islower_libc_mb,
     381             :     .wc_isgraph = wc_isgraph_libc_mb,
     382             :     .wc_isprint = wc_isprint_libc_mb,
     383             :     .wc_ispunct = wc_ispunct_libc_mb,
     384             :     .wc_isspace = wc_isspace_libc_mb,
     385             :     .wc_isxdigit = wc_isxdigit_libc_mb,
     386             :     .char_is_cased = char_is_cased_libc,
     387             :     .char_tolower = char_tolower_libc,
     388             :     .wc_toupper = toupper_libc_mb,
     389             :     .wc_tolower = tolower_libc_mb,
     390             : };
     391             : 
     392             : static const struct collate_methods collate_methods_libc = {
     393             :     .strncoll = strncoll_libc,
     394             :     .strnxfrm = strnxfrm_libc,
     395             :     .strnxfrm_prefix = NULL,
     396             : 
     397             :     /*
     398             :      * Unfortunately, it seems that strxfrm() for non-C collations is broken
     399             :      * on many common platforms; testing of multiple versions of glibc reveals
     400             :      * that, for many locales, strcoll() and strxfrm() do not return
     401             :      * consistent results. While no other libc other than Cygwin has so far
     402             :      * been shown to have a problem, we take the conservative course of action
     403             :      * for right now and disable this categorically.  (Users who are certain
     404             :      * this isn't a problem on their system can define TRUST_STRXFRM.)
     405             :      */
     406             : #ifdef TRUST_STRXFRM
     407             :     .strxfrm_is_safe = true,
     408             : #else
     409             :     .strxfrm_is_safe = false,
     410             : #endif
     411             : };
     412             : 
     413             : #ifdef WIN32
     414             : static const struct collate_methods collate_methods_libc_win32_utf8 = {
     415             :     .strncoll = strncoll_libc_win32_utf8,
     416             :     .strnxfrm = strnxfrm_libc,
     417             :     .strnxfrm_prefix = NULL,
     418             : #ifdef TRUST_STRXFRM
     419             :     .strxfrm_is_safe = true,
     420             : #else
     421             :     .strxfrm_is_safe = false,
     422             : #endif
     423             : };
     424             : #endif
     425             : 
     426             : static size_t
     427           0 : strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     428             :                  pg_locale_t locale)
     429             : {
     430           0 :     if (srclen < 0)
     431           0 :         srclen = strlen(src);
     432             : 
     433           0 :     if (srclen + 1 <= destsize)
     434             :     {
     435           0 :         locale_t    loc = locale->lt;
     436             :         char       *p;
     437             : 
     438           0 :         if (srclen + 1 > destsize)
     439           0 :             return srclen;
     440             : 
     441           0 :         memcpy(dest, src, srclen);
     442           0 :         dest[srclen] = '\0';
     443             : 
     444             :         /*
     445             :          * Note: we assume that tolower_l() will not be so broken as to need
     446             :          * an isupper_l() guard test.  When using the default collation, we
     447             :          * apply the traditional Postgres behavior that forces ASCII-style
     448             :          * treatment of I/i, but in non-default collations you get exactly
     449             :          * what the collation says.
     450             :          */
     451           0 :         for (p = dest; *p; p++)
     452             :         {
     453           0 :             if (locale->is_default)
     454           0 :                 *p = pg_tolower((unsigned char) *p);
     455             :             else
     456           0 :                 *p = tolower_l((unsigned char) *p, loc);
     457             :         }
     458             :     }
     459             : 
     460           0 :     return srclen;
     461             : }
     462             : 
     463             : static size_t
     464      424990 : strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     465             :                  pg_locale_t locale)
     466             : {
     467      424990 :     locale_t    loc = locale->lt;
     468             :     size_t      result_size;
     469             :     wchar_t    *workspace;
     470             :     char       *result;
     471             :     size_t      curr_char;
     472             :     size_t      max_size;
     473             : 
     474      424990 :     if (srclen < 0)
     475           0 :         srclen = strlen(src);
     476             : 
     477             :     /* Overflow paranoia */
     478      424990 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     479           0 :         ereport(ERROR,
     480             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     481             :                  errmsg("out of memory")));
     482             : 
     483             :     /* Output workspace cannot have more codes than input bytes */
     484      424990 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     485             : 
     486      424990 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     487             : 
     488     3669304 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     489     3244314 :         workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     490             : 
     491             :     /*
     492             :      * Make result large enough; case change might change number of bytes
     493             :      */
     494      424990 :     max_size = curr_char * pg_database_encoding_max_length();
     495      424990 :     result = palloc(max_size + 1);
     496             : 
     497      424990 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     498             : 
     499      424990 :     if (result_size + 1 > destsize)
     500           0 :         return result_size;
     501             : 
     502      424990 :     memcpy(dest, result, result_size);
     503      424990 :     dest[result_size] = '\0';
     504             : 
     505      424990 :     pfree(workspace);
     506      424990 :     pfree(result);
     507             : 
     508      424990 :     return result_size;
     509             : }
     510             : 
     511             : static size_t
     512           0 : strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     513             :                  pg_locale_t locale)
     514             : {
     515           0 :     if (srclen < 0)
     516           0 :         srclen = strlen(src);
     517             : 
     518           0 :     if (srclen + 1 <= destsize)
     519             :     {
     520           0 :         locale_t    loc = locale->lt;
     521           0 :         int         wasalnum = false;
     522             :         char       *p;
     523             : 
     524           0 :         memcpy(dest, src, srclen);
     525           0 :         dest[srclen] = '\0';
     526             : 
     527             :         /*
     528             :          * Note: we assume that toupper_l()/tolower_l() will not be so broken
     529             :          * as to need guard tests.  When using the default collation, we apply
     530             :          * the traditional Postgres behavior that forces ASCII-style treatment
     531             :          * of I/i, but in non-default collations you get exactly what the
     532             :          * collation says.
     533             :          */
     534           0 :         for (p = dest; *p; p++)
     535             :         {
     536           0 :             if (locale->is_default)
     537             :             {
     538           0 :                 if (wasalnum)
     539           0 :                     *p = pg_tolower((unsigned char) *p);
     540             :                 else
     541           0 :                     *p = pg_toupper((unsigned char) *p);
     542             :             }
     543             :             else
     544             :             {
     545           0 :                 if (wasalnum)
     546           0 :                     *p = tolower_l((unsigned char) *p, loc);
     547             :                 else
     548           0 :                     *p = toupper_l((unsigned char) *p, loc);
     549             :             }
     550           0 :             wasalnum = isalnum_l((unsigned char) *p, loc);
     551             :         }
     552             :     }
     553             : 
     554           0 :     return srclen;
     555             : }
     556             : 
     557             : static size_t
     558           8 : strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     559             :                  pg_locale_t locale)
     560             : {
     561           8 :     locale_t    loc = locale->lt;
     562           8 :     int         wasalnum = false;
     563             :     size_t      result_size;
     564             :     wchar_t    *workspace;
     565             :     char       *result;
     566             :     size_t      curr_char;
     567             :     size_t      max_size;
     568             : 
     569           8 :     if (srclen < 0)
     570           0 :         srclen = strlen(src);
     571             : 
     572             :     /* Overflow paranoia */
     573           8 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     574           0 :         ereport(ERROR,
     575             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     576             :                  errmsg("out of memory")));
     577             : 
     578             :     /* Output workspace cannot have more codes than input bytes */
     579           8 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     580             : 
     581           8 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     582             : 
     583          80 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     584             :     {
     585          72 :         if (wasalnum)
     586          56 :             workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     587             :         else
     588          16 :             workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     589          72 :         wasalnum = iswalnum_l(workspace[curr_char], loc);
     590             :     }
     591             : 
     592             :     /*
     593             :      * Make result large enough; case change might change number of bytes
     594             :      */
     595           8 :     max_size = curr_char * pg_database_encoding_max_length();
     596           8 :     result = palloc(max_size + 1);
     597             : 
     598           8 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     599             : 
     600           8 :     if (result_size + 1 > destsize)
     601           0 :         return result_size;
     602             : 
     603           8 :     memcpy(dest, result, result_size);
     604           8 :     dest[result_size] = '\0';
     605             : 
     606           8 :     pfree(workspace);
     607           8 :     pfree(result);
     608             : 
     609           8 :     return result_size;
     610             : }
     611             : 
     612             : static size_t
     613           0 : strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     614             :                  pg_locale_t locale)
     615             : {
     616           0 :     if (srclen < 0)
     617           0 :         srclen = strlen(src);
     618             : 
     619           0 :     if (srclen + 1 <= destsize)
     620             :     {
     621           0 :         locale_t    loc = locale->lt;
     622             :         char       *p;
     623             : 
     624           0 :         memcpy(dest, src, srclen);
     625           0 :         dest[srclen] = '\0';
     626             : 
     627             :         /*
     628             :          * Note: we assume that toupper_l() will not be so broken as to need
     629             :          * an islower_l() guard test.  When using the default collation, we
     630             :          * apply the traditional Postgres behavior that forces ASCII-style
     631             :          * treatment of I/i, but in non-default collations you get exactly
     632             :          * what the collation says.
     633             :          */
     634           0 :         for (p = dest; *p; p++)
     635             :         {
     636           0 :             if (locale->is_default)
     637           0 :                 *p = pg_toupper((unsigned char) *p);
     638             :             else
     639           0 :                 *p = toupper_l((unsigned char) *p, loc);
     640             :         }
     641             :     }
     642             : 
     643           0 :     return srclen;
     644             : }
     645             : 
     646             : static size_t
     647      719232 : strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     648             :                  pg_locale_t locale)
     649             : {
     650      719232 :     locale_t    loc = locale->lt;
     651             :     size_t      result_size;
     652             :     wchar_t    *workspace;
     653             :     char       *result;
     654             :     size_t      curr_char;
     655             :     size_t      max_size;
     656             : 
     657      719232 :     if (srclen < 0)
     658           0 :         srclen = strlen(src);
     659             : 
     660             :     /* Overflow paranoia */
     661      719232 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     662           0 :         ereport(ERROR,
     663             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     664             :                  errmsg("out of memory")));
     665             : 
     666             :     /* Output workspace cannot have more codes than input bytes */
     667      719232 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     668             : 
     669      719232 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     670             : 
     671     2367270 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     672     1648038 :         workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     673             : 
     674             :     /*
     675             :      * Make result large enough; case change might change number of bytes
     676             :      */
     677      719232 :     max_size = curr_char * pg_database_encoding_max_length();
     678      719232 :     result = palloc(max_size + 1);
     679             : 
     680      719232 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     681             : 
     682      719232 :     if (result_size + 1 > destsize)
     683           0 :         return result_size;
     684             : 
     685      719232 :     memcpy(dest, result, result_size);
     686      719232 :     dest[result_size] = '\0';
     687             : 
     688      719232 :     pfree(workspace);
     689      719232 :     pfree(result);
     690             : 
     691      719232 :     return result_size;
     692             : }
     693             : 
     694             : pg_locale_t
     695       35304 : create_pg_locale_libc(Oid collid, MemoryContext context)
     696             : {
     697             :     const char *collate;
     698             :     const char *ctype;
     699             :     locale_t    loc;
     700             :     pg_locale_t result;
     701             : 
     702       35304 :     if (collid == DEFAULT_COLLATION_OID)
     703             :     {
     704             :         HeapTuple   tp;
     705             :         Datum       datum;
     706             : 
     707       31256 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     708       31256 :         if (!HeapTupleIsValid(tp))
     709           0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     710       31256 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     711             :                                        Anum_pg_database_datcollate);
     712       31256 :         collate = TextDatumGetCString(datum);
     713       31256 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     714             :                                        Anum_pg_database_datctype);
     715       31256 :         ctype = TextDatumGetCString(datum);
     716             : 
     717       31256 :         ReleaseSysCache(tp);
     718             :     }
     719             :     else
     720             :     {
     721             :         HeapTuple   tp;
     722             :         Datum       datum;
     723             : 
     724        4048 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     725        4048 :         if (!HeapTupleIsValid(tp))
     726           0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     727             : 
     728        4048 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     729             :                                        Anum_pg_collation_collcollate);
     730        4048 :         collate = TextDatumGetCString(datum);
     731        4048 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     732             :                                        Anum_pg_collation_collctype);
     733        4048 :         ctype = TextDatumGetCString(datum);
     734             : 
     735        4048 :         ReleaseSysCache(tp);
     736             :     }
     737             : 
     738             : 
     739       35304 :     loc = make_libc_collator(collate, ctype);
     740             : 
     741       35304 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     742       35304 :     result->deterministic = true;
     743       65520 :     result->collate_is_c = (strcmp(collate, "C") == 0) ||
     744       30216 :         (strcmp(collate, "POSIX") == 0);
     745       65520 :     result->ctype_is_c = (strcmp(ctype, "C") == 0) ||
     746       30216 :         (strcmp(ctype, "POSIX") == 0);
     747       35304 :     result->lt = loc;
     748       35304 :     if (!result->collate_is_c)
     749             :     {
     750             : #ifdef WIN32
     751             :         if (GetDatabaseEncoding() == PG_UTF8)
     752             :             result->collate = &collate_methods_libc_win32_utf8;
     753             :         else
     754             : #endif
     755       30152 :             result->collate = &collate_methods_libc;
     756             :     }
     757       35304 :     if (!result->ctype_is_c)
     758             :     {
     759       30152 :         if (GetDatabaseEncoding() == PG_UTF8)
     760       30088 :             result->ctype = &ctype_methods_libc_utf8;
     761          64 :         else if (pg_database_encoding_max_length() > 1)
     762           0 :             result->ctype = &ctype_methods_libc_other_mb;
     763             :         else
     764          64 :             result->ctype = &ctype_methods_libc_sb;
     765             :     }
     766             : 
     767       35304 :     return result;
     768             : }
     769             : 
     770             : /*
     771             :  * Create a locale_t with the given collation and ctype.
     772             :  *
     773             :  * The "C" and "POSIX" locales are not actually handled by libc, so return
     774             :  * NULL.
     775             :  *
     776             :  * Ensure that no path leaks a locale_t.
     777             :  */
     778             : static locale_t
     779       35304 : make_libc_collator(const char *collate, const char *ctype)
     780             : {
     781       35304 :     locale_t    loc = 0;
     782             : 
     783       35304 :     if (strcmp(collate, ctype) == 0)
     784             :     {
     785       35304 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     786             :         {
     787             :             /* Normal case where they're the same */
     788       30152 :             errno = 0;
     789             : #ifndef WIN32
     790       30152 :             loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
     791             :                             NULL);
     792             : #else
     793             :             loc = _create_locale(LC_ALL, collate);
     794             : #endif
     795       30152 :             if (!loc)
     796           0 :                 report_newlocale_failure(collate);
     797             :         }
     798             :     }
     799             :     else
     800             :     {
     801             : #ifndef WIN32
     802             :         /* We need two newlocale() steps */
     803           0 :         locale_t    loc1 = 0;
     804             : 
     805           0 :         if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
     806             :         {
     807           0 :             errno = 0;
     808           0 :             loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
     809           0 :             if (!loc1)
     810           0 :                 report_newlocale_failure(collate);
     811             :         }
     812             : 
     813           0 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     814             :         {
     815           0 :             errno = 0;
     816           0 :             loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
     817           0 :             if (!loc)
     818             :             {
     819           0 :                 if (loc1)
     820           0 :                     freelocale(loc1);
     821           0 :                 report_newlocale_failure(ctype);
     822             :             }
     823             :         }
     824             :         else
     825           0 :             loc = loc1;
     826             : #else
     827             : 
     828             :         /*
     829             :          * XXX The _create_locale() API doesn't appear to support this. Could
     830             :          * perhaps be worked around by changing pg_locale_t to contain two
     831             :          * separate fields.
     832             :          */
     833             :         ereport(ERROR,
     834             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     835             :                  errmsg("collations with different collate and ctype values are not supported on this platform")));
     836             : #endif
     837             :     }
     838             : 
     839       35304 :     return loc;
     840             : }
     841             : 
     842             : /*
     843             :  * strncoll_libc
     844             :  *
     845             :  * NUL-terminate arguments, if necessary, and pass to strcoll_l().
     846             :  *
     847             :  * An input string length of -1 means that it's already NUL-terminated.
     848             :  */
     849             : int
     850    30192280 : strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
     851             :               pg_locale_t locale)
     852             : {
     853             :     char        sbuf[TEXTBUFLEN];
     854    30192280 :     char       *buf = sbuf;
     855    30192280 :     size_t      bufsize1 = (len1 == -1) ? 0 : len1 + 1;
     856    30192280 :     size_t      bufsize2 = (len2 == -1) ? 0 : len2 + 1;
     857             :     const char *arg1n;
     858             :     const char *arg2n;
     859             :     int         result;
     860             : 
     861    30192280 :     if (bufsize1 + bufsize2 > TEXTBUFLEN)
     862         568 :         buf = palloc(bufsize1 + bufsize2);
     863             : 
     864             :     /* nul-terminate arguments if necessary */
     865    30192280 :     if (len1 == -1)
     866             :     {
     867    25768894 :         arg1n = arg1;
     868             :     }
     869             :     else
     870             :     {
     871     4423386 :         char       *buf1 = buf;
     872             : 
     873     4423386 :         memcpy(buf1, arg1, len1);
     874     4423386 :         buf1[len1] = '\0';
     875     4423386 :         arg1n = buf1;
     876             :     }
     877             : 
     878    30192280 :     if (len2 == -1)
     879             :     {
     880    25768894 :         arg2n = arg2;
     881             :     }
     882             :     else
     883             :     {
     884     4423386 :         char       *buf2 = buf + bufsize1;
     885             : 
     886     4423386 :         memcpy(buf2, arg2, len2);
     887     4423386 :         buf2[len2] = '\0';
     888     4423386 :         arg2n = buf2;
     889             :     }
     890             : 
     891    30192280 :     result = strcoll_l(arg1n, arg2n, locale->lt);
     892             : 
     893    30192280 :     if (buf != sbuf)
     894         568 :         pfree(buf);
     895             : 
     896    30192280 :     return result;
     897             : }
     898             : 
     899             : /*
     900             :  * strnxfrm_libc
     901             :  *
     902             :  * NUL-terminate src, if necessary, and pass to strxfrm_l().
     903             :  *
     904             :  * A source length of -1 means that it's already NUL-terminated.
     905             :  */
     906             : size_t
     907         144 : strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
     908             :               pg_locale_t locale)
     909             : {
     910             :     char        sbuf[TEXTBUFLEN];
     911         144 :     char       *buf = sbuf;
     912         144 :     size_t      bufsize = srclen + 1;
     913             :     size_t      result;
     914             : 
     915         144 :     if (srclen == -1)
     916         144 :         return strxfrm_l(dest, src, destsize, locale->lt);
     917             : 
     918           0 :     if (bufsize > TEXTBUFLEN)
     919           0 :         buf = palloc(bufsize);
     920             : 
     921             :     /* nul-terminate argument */
     922           0 :     memcpy(buf, src, srclen);
     923           0 :     buf[srclen] = '\0';
     924             : 
     925           0 :     result = strxfrm_l(dest, buf, destsize, locale->lt);
     926             : 
     927           0 :     if (buf != sbuf)
     928           0 :         pfree(buf);
     929             : 
     930             :     /* if dest is defined, it should be nul-terminated */
     931             :     Assert(result >= destsize || dest[result] == '\0');
     932             : 
     933           0 :     return result;
     934             : }
     935             : 
     936             : char *
     937       30500 : get_collation_actual_version_libc(const char *collcollate)
     938             : {
     939       30500 :     char       *collversion = NULL;
     940             : 
     941       60824 :     if (pg_strcasecmp("C", collcollate) != 0 &&
     942       60456 :         pg_strncasecmp("C.", collcollate, 2) != 0 &&
     943       30132 :         pg_strcasecmp("POSIX", collcollate) != 0)
     944             :     {
     945             : #if defined(__GLIBC__)
     946             :         /* Use the glibc version because we don't have anything better. */
     947       30106 :         collversion = pstrdup(gnu_get_libc_version());
     948             : #elif defined(LC_VERSION_MASK)
     949             :         locale_t    loc;
     950             : 
     951             :         /* Look up FreeBSD collation version. */
     952             :         loc = newlocale(LC_COLLATE_MASK, collcollate, NULL);
     953             :         if (loc)
     954             :         {
     955             :             collversion =
     956             :                 pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
     957             :             freelocale(loc);
     958             :         }
     959             :         else
     960             :             ereport(ERROR,
     961             :                     (errmsg("could not load locale \"%s\"", collcollate)));
     962             : #elif defined(WIN32)
     963             :         /*
     964             :          * If we are targeting Windows Vista and above, we can ask for a name
     965             :          * given a collation name (earlier versions required a location code
     966             :          * that we don't have).
     967             :          */
     968             :         NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
     969             :         WCHAR       wide_collcollate[LOCALE_NAME_MAX_LENGTH];
     970             : 
     971             :         MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
     972             :                             LOCALE_NAME_MAX_LENGTH);
     973             :         if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
     974             :         {
     975             :             /*
     976             :              * GetNLSVersionEx() wants a language tag such as "en-US", not a
     977             :              * locale name like "English_United States.1252".  Until those
     978             :              * values can be prevented from entering the system, or 100%
     979             :              * reliably converted to the more useful tag format, tolerate the
     980             :              * resulting error and report that we have no version data.
     981             :              */
     982             :             if (GetLastError() == ERROR_INVALID_PARAMETER)
     983             :                 return NULL;
     984             : 
     985             :             ereport(ERROR,
     986             :                     (errmsg("could not get collation version for locale \"%s\": error code %lu",
     987             :                             collcollate,
     988             :                             GetLastError())));
     989             :         }
     990             :         collversion = psprintf("%lu.%lu,%lu.%lu",
     991             :                                (version.dwNLSVersion >> 8) & 0xFFFF,
     992             :                                version.dwNLSVersion & 0xFF,
     993             :                                (version.dwDefinedVersion >> 8) & 0xFFFF,
     994             :                                version.dwDefinedVersion & 0xFF);
     995             : #endif
     996             :     }
     997             : 
     998       30500 :     return collversion;
     999             : }
    1000             : 
    1001             : /*
    1002             :  * strncoll_libc_win32_utf8
    1003             :  *
    1004             :  * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
    1005             :  * invoke wcscoll_l().
    1006             :  *
    1007             :  * An input string length of -1 means that it's NUL-terminated.
    1008             :  */
    1009             : #ifdef WIN32
    1010             : static int
    1011             : strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
    1012             :                          ssize_t len2, pg_locale_t locale)
    1013             : {
    1014             :     char        sbuf[TEXTBUFLEN];
    1015             :     char       *buf = sbuf;
    1016             :     char       *a1p,
    1017             :                *a2p;
    1018             :     int         a1len;
    1019             :     int         a2len;
    1020             :     int         r;
    1021             :     int         result;
    1022             : 
    1023             :     Assert(GetDatabaseEncoding() == PG_UTF8);
    1024             : 
    1025             :     if (len1 == -1)
    1026             :         len1 = strlen(arg1);
    1027             :     if (len2 == -1)
    1028             :         len2 = strlen(arg2);
    1029             : 
    1030             :     a1len = len1 * 2 + 2;
    1031             :     a2len = len2 * 2 + 2;
    1032             : 
    1033             :     if (a1len + a2len > TEXTBUFLEN)
    1034             :         buf = palloc(a1len + a2len);
    1035             : 
    1036             :     a1p = buf;
    1037             :     a2p = buf + a1len;
    1038             : 
    1039             :     /* API does not work for zero-length input */
    1040             :     if (len1 == 0)
    1041             :         r = 0;
    1042             :     else
    1043             :     {
    1044             :         r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
    1045             :                                 (LPWSTR) a1p, a1len / 2);
    1046             :         if (!r)
    1047             :             ereport(ERROR,
    1048             :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1049             :                             GetLastError())));
    1050             :     }
    1051             :     ((LPWSTR) a1p)[r] = 0;
    1052             : 
    1053             :     if (len2 == 0)
    1054             :         r = 0;
    1055             :     else
    1056             :     {
    1057             :         r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
    1058             :                                 (LPWSTR) a2p, a2len / 2);
    1059             :         if (!r)
    1060             :             ereport(ERROR,
    1061             :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1062             :                             GetLastError())));
    1063             :     }
    1064             :     ((LPWSTR) a2p)[r] = 0;
    1065             : 
    1066             :     errno = 0;
    1067             :     result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->lt);
    1068             :     if (result == 2147483647)   /* _NLSCMPERROR; missing from mingw headers */
    1069             :         ereport(ERROR,
    1070             :                 (errmsg("could not compare Unicode strings: %m")));
    1071             : 
    1072             :     if (buf != sbuf)
    1073             :         pfree(buf);
    1074             : 
    1075             :     return result;
    1076             : }
    1077             : #endif                          /* WIN32 */
    1078             : 
    1079             : /* simple subroutine for reporting errors from newlocale() */
    1080             : void
    1081           0 : report_newlocale_failure(const char *localename)
    1082             : {
    1083             :     int         save_errno;
    1084             : 
    1085             :     /*
    1086             :      * Windows doesn't provide any useful error indication from
    1087             :      * _create_locale(), and BSD-derived platforms don't seem to feel they
    1088             :      * need to set errno either (even though POSIX is pretty clear that
    1089             :      * newlocale should do so).  So, if errno hasn't been set, assume ENOENT
    1090             :      * is what to report.
    1091             :      */
    1092           0 :     if (errno == 0)
    1093           0 :         errno = ENOENT;
    1094             : 
    1095             :     /*
    1096             :      * ENOENT means "no such locale", not "no such file", so clarify that
    1097             :      * errno with an errdetail message.
    1098             :      */
    1099           0 :     save_errno = errno;         /* auxiliary funcs might change errno */
    1100           0 :     ereport(ERROR,
    1101             :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1102             :              errmsg("could not create locale \"%s\": %m",
    1103             :                     localename),
    1104             :              (save_errno == ENOENT ?
    1105             :               errdetail("The operating system could not find any locale data for the locale name \"%s\".",
    1106             :                         localename) : 0)));
    1107             : }
    1108             : 
    1109             : /*
    1110             :  * POSIX doesn't define _l-variants of these functions, but several systems
    1111             :  * have them.  We provide our own replacements here.
    1112             :  */
    1113             : #ifndef HAVE_MBSTOWCS_L
    1114             : static size_t
    1115     1144230 : mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
    1116             : {
    1117             : #ifdef WIN32
    1118             :     return _mbstowcs_l(dest, src, n, loc);
    1119             : #else
    1120             :     size_t      result;
    1121     1144230 :     locale_t    save_locale = uselocale(loc);
    1122             : 
    1123     1144230 :     result = mbstowcs(dest, src, n);
    1124     1144230 :     uselocale(save_locale);
    1125     1144230 :     return result;
    1126             : #endif
    1127             : }
    1128             : #endif
    1129             : #ifndef HAVE_WCSTOMBS_L
    1130             : static size_t
    1131     1144230 : wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
    1132             : {
    1133             : #ifdef WIN32
    1134             :     return _wcstombs_l(dest, src, n, loc);
    1135             : #else
    1136             :     size_t      result;
    1137     1144230 :     locale_t    save_locale = uselocale(loc);
    1138             : 
    1139     1144230 :     result = wcstombs(dest, src, n);
    1140     1144230 :     uselocale(save_locale);
    1141     1144230 :     return result;
    1142             : #endif
    1143             : }
    1144             : #endif
    1145             : 
    1146             : /*
    1147             :  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
    1148             :  * Therefore we keep them here rather than with the mbutils code.
    1149             :  */
    1150             : 
    1151             : /*
    1152             :  * wchar2char --- convert wide characters to multibyte format
    1153             :  *
    1154             :  * This has the same API as the standard wcstombs_l() function; in particular,
    1155             :  * tolen is the maximum number of bytes to store at *to, and *from must be
    1156             :  * zero-terminated.  The output will be zero-terminated iff there is room.
    1157             :  */
    1158             : size_t
    1159     1144230 : wchar2char(char *to, const wchar_t *from, size_t tolen, locale_t loc)
    1160             : {
    1161             :     size_t      result;
    1162             : 
    1163     1144230 :     if (tolen == 0)
    1164           0 :         return 0;
    1165             : 
    1166             : #ifdef WIN32
    1167             : 
    1168             :     /*
    1169             :      * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
    1170             :      * for some reason mbstowcs and wcstombs won't do this for us, so we use
    1171             :      * MultiByteToWideChar().
    1172             :      */
    1173             :     if (GetDatabaseEncoding() == PG_UTF8)
    1174             :     {
    1175             :         result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
    1176             :                                      NULL, NULL);
    1177             :         /* A zero return is failure */
    1178             :         if (result <= 0)
    1179             :             result = -1;
    1180             :         else
    1181             :         {
    1182             :             Assert(result <= tolen);
    1183             :             /* Microsoft counts the zero terminator in the result */
    1184             :             result--;
    1185             :         }
    1186             :     }
    1187             :     else
    1188             : #endif                          /* WIN32 */
    1189     1144230 :     if (loc == (locale_t) 0)
    1190             :     {
    1191             :         /* Use wcstombs directly for the default locale */
    1192           0 :         result = wcstombs(to, from, tolen);
    1193             :     }
    1194             :     else
    1195             :     {
    1196             :         /* Use wcstombs_l for nondefault locales */
    1197     1144230 :         result = wcstombs_l(to, from, tolen, loc);
    1198             :     }
    1199             : 
    1200     1144230 :     return result;
    1201             : }
    1202             : 
    1203             : /*
    1204             :  * char2wchar --- convert multibyte characters to wide characters
    1205             :  *
    1206             :  * This has almost the API of mbstowcs_l(), except that *from need not be
    1207             :  * null-terminated; instead, the number of input bytes is specified as
    1208             :  * fromlen.  Also, we ereport() rather than returning -1 for invalid
    1209             :  * input encoding.  tolen is the maximum number of wchar_t's to store at *to.
    1210             :  * The output will be zero-terminated iff there is room.
    1211             :  */
    1212             : static size_t
    1213     1144230 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
    1214             :            locale_t loc)
    1215             : {
    1216             :     size_t      result;
    1217             : 
    1218     1144230 :     if (tolen == 0)
    1219           0 :         return 0;
    1220             : 
    1221             : #ifdef WIN32
    1222             :     /* See WIN32 "Unicode" comment above */
    1223             :     if (GetDatabaseEncoding() == PG_UTF8)
    1224             :     {
    1225             :         /* Win32 API does not work for zero-length input */
    1226             :         if (fromlen == 0)
    1227             :             result = 0;
    1228             :         else
    1229             :         {
    1230             :             result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
    1231             :             /* A zero return is failure */
    1232             :             if (result == 0)
    1233             :                 result = -1;
    1234             :         }
    1235             : 
    1236             :         if (result != -1)
    1237             :         {
    1238             :             Assert(result < tolen);
    1239             :             /* Append trailing null wchar (MultiByteToWideChar() does not) */
    1240             :             to[result] = 0;
    1241             :         }
    1242             :     }
    1243             :     else
    1244             : #endif                          /* WIN32 */
    1245             :     {
    1246             :         /* mbstowcs requires ending '\0' */
    1247     1144230 :         char       *str = pnstrdup(from, fromlen);
    1248             : 
    1249     1144230 :         if (loc == (locale_t) 0)
    1250             :         {
    1251             :             /* Use mbstowcs directly for the default locale */
    1252           0 :             result = mbstowcs(to, str, tolen);
    1253             :         }
    1254             :         else
    1255             :         {
    1256             :             /* Use mbstowcs_l for nondefault locales */
    1257     1144230 :             result = mbstowcs_l(to, str, tolen, loc);
    1258             :         }
    1259             : 
    1260     1144230 :         pfree(str);
    1261             :     }
    1262             : 
    1263     1144230 :     if (result == -1)
    1264             :     {
    1265             :         /*
    1266             :          * Invalid multibyte character encountered.  We try to give a useful
    1267             :          * error message by letting pg_verifymbstr check the string.  But it's
    1268             :          * possible that the string is OK to us, and not OK to mbstowcs ---
    1269             :          * this suggests that the LC_CTYPE locale is different from the
    1270             :          * database encoding.  Give a generic error message if pg_verifymbstr
    1271             :          * can't find anything wrong.
    1272             :          */
    1273           0 :         pg_verifymbstr(from, fromlen, false);   /* might not return */
    1274             :         /* but if it does ... */
    1275           0 :         ereport(ERROR,
    1276             :                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
    1277             :                  errmsg("invalid multibyte character for locale"),
    1278             :                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
    1279             :     }
    1280             : 
    1281     1144230 :     return result;
    1282             : }

Generated by: LCOV version 1.16