LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_libc.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 180 319 56.4 %
Date: 2025-12-03 05:18:44 Functions: 24 42 57.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-----------------------------------------------------------------------
       2             :  *
       3             :  * PostgreSQL locale utilities for libc
       4             :  *
       5             :  * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
       6             :  *
       7             :  * src/backend/utils/adt/pg_locale_libc.c
       8             :  *
       9             :  *-----------------------------------------------------------------------
      10             :  */
      11             : 
      12             : #include "postgres.h"
      13             : 
      14             : #include <limits.h>
      15             : #include <wctype.h>
      16             : 
      17             : #include "access/htup_details.h"
      18             : #include "catalog/pg_database.h"
      19             : #include "catalog/pg_collation.h"
      20             : #include "mb/pg_wchar.h"
      21             : #include "miscadmin.h"
      22             : #include "utils/builtins.h"
      23             : #include "utils/formatting.h"
      24             : #include "utils/memutils.h"
      25             : #include "utils/pg_locale.h"
      26             : #include "utils/syscache.h"
      27             : 
      28             : #ifdef __GLIBC__
      29             : #include <gnu/libc-version.h>
      30             : #endif
      31             : 
      32             : #ifdef WIN32
      33             : #include <shlwapi.h>
      34             : #endif
      35             : 
      36             : /*
      37             :  * For the libc provider, to provide as much functionality as possible on a
      38             :  * variety of platforms without going so far as to implement everything from
      39             :  * scratch, we use several implementation strategies depending on the
      40             :  * situation:
      41             :  *
      42             :  * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
      43             :  * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
      44             :  * collations don't give a fig about multibyte characters.
      45             :  *
      46             :  * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
      47             :  * This assumes that every platform uses Unicode codepoints directly
      48             :  * as the wchar_t representation of Unicode.  On some platforms
      49             :  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
      50             :  *
      51             :  * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
      52             :  * values up to 255, and punt for values above that.  This is 100% correct
      53             :  * only in single-byte encodings such as LATINn.  However, non-Unicode
      54             :  * multibyte encodings are mostly Far Eastern character sets for which the
      55             :  * properties being tested here aren't very relevant for higher code values
      56             :  * anyway.  The difficulty with using the <wctype.h> functions with
      57             :  * non-Unicode multibyte encodings is that we can have no certainty that
      58             :  * the platform's wchar_t representation matches what we do in pg_wchar
      59             :  * conversions.
      60             :  *
      61             :  * As a special case, in the "default" collation, (2) and (3) force ASCII
      62             :  * letters to follow ASCII upcase/downcase rules, while in a non-default
      63             :  * collation we just let the library functions do what they will.  The case
      64             :  * where this matters is treatment of I/i in Turkish, and the behavior is
      65             :  * meant to match the upper()/lower() SQL functions.
      66             :  *
      67             :  * We store the active collation setting in static variables.  In principle
      68             :  * it could be passed down to here via the regex library's "struct vars" data
      69             :  * structure; but that would require somewhat invasive changes in the regex
      70             :  * library, and right now there's no real benefit to be gained from that.
      71             :  *
      72             :  * NB: the coding here assumes pg_wchar is an unsigned type.
      73             :  */
      74             : 
      75             : /*
      76             :  * Size of stack buffer to use for string transformations, used to avoid heap
      77             :  * allocations in typical cases. This should be large enough that most strings
      78             :  * will fit, but small enough that we feel comfortable putting it on the
      79             :  * stack.
      80             :  */
      81             : #define     TEXTBUFLEN          1024
      82             : 
      83             : extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
      84             : 
      85             : static int  strncoll_libc(const char *arg1, ssize_t len1,
      86             :                           const char *arg2, ssize_t len2,
      87             :                           pg_locale_t locale);
      88             : static size_t strnxfrm_libc(char *dest, size_t destsize,
      89             :                             const char *src, ssize_t srclen,
      90             :                             pg_locale_t locale);
      91             : extern char *get_collation_actual_version_libc(const char *collcollate);
      92             : static locale_t make_libc_collator(const char *collate,
      93             :                                    const char *ctype);
      94             : 
      95             : #ifdef WIN32
      96             : static int  strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
      97             :                                      const char *arg2, ssize_t len2,
      98             :                                      pg_locale_t locale);
      99             : #endif
     100             : 
     101             : static size_t char2wchar(wchar_t *to, size_t tolen, const char *from,
     102             :                          size_t fromlen, locale_t loc);
     103             : 
     104             : static size_t strlower_libc_sb(char *dest, size_t destsize,
     105             :                                const char *src, ssize_t srclen,
     106             :                                pg_locale_t locale);
     107             : static size_t strlower_libc_mb(char *dest, size_t destsize,
     108             :                                const char *src, ssize_t srclen,
     109             :                                pg_locale_t locale);
     110             : static size_t strtitle_libc_sb(char *dest, size_t destsize,
     111             :                                const char *src, ssize_t srclen,
     112             :                                pg_locale_t locale);
     113             : static size_t strtitle_libc_mb(char *dest, size_t destsize,
     114             :                                const char *src, ssize_t srclen,
     115             :                                pg_locale_t locale);
     116             : static size_t strupper_libc_sb(char *dest, size_t destsize,
     117             :                                const char *src, ssize_t srclen,
     118             :                                pg_locale_t locale);
     119             : static size_t strupper_libc_mb(char *dest, size_t destsize,
     120             :                                const char *src, ssize_t srclen,
     121             :                                pg_locale_t locale);
     122             : 
     123             : static bool
     124           0 : wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
     125             : {
     126           0 :     return isdigit_l((unsigned char) wc, locale->lt);
     127             : }
     128             : 
     129             : static bool
     130           0 : wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
     131             : {
     132           0 :     return isalpha_l((unsigned char) wc, locale->lt);
     133             : }
     134             : 
     135             : static bool
     136           0 : wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
     137             : {
     138           0 :     return isalnum_l((unsigned char) wc, locale->lt);
     139             : }
     140             : 
     141             : static bool
     142           0 : wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
     143             : {
     144           0 :     return isupper_l((unsigned char) wc, locale->lt);
     145             : }
     146             : 
     147             : static bool
     148           0 : wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
     149             : {
     150           0 :     return islower_l((unsigned char) wc, locale->lt);
     151             : }
     152             : 
     153             : static bool
     154           0 : wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
     155             : {
     156           0 :     return isgraph_l((unsigned char) wc, locale->lt);
     157             : }
     158             : 
     159             : static bool
     160           0 : wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
     161             : {
     162           0 :     return isprint_l((unsigned char) wc, locale->lt);
     163             : }
     164             : 
     165             : static bool
     166           0 : wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
     167             : {
     168           0 :     return ispunct_l((unsigned char) wc, locale->lt);
     169             : }
     170             : 
     171             : static bool
     172           0 : wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
     173             : {
     174           0 :     return isspace_l((unsigned char) wc, locale->lt);
     175             : }
     176             : 
     177             : static bool
     178           0 : wc_isxdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
     179             : {
     180             : #ifndef WIN32
     181           0 :     return isxdigit_l((unsigned char) wc, locale->lt);
     182             : #else
     183             :     return _isxdigit_l((unsigned char) wc, locale->lt);
     184             : #endif
     185             : }
     186             : 
     187             : static bool
     188      131608 : wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
     189             : {
     190      131608 :     return iswdigit_l((wint_t) wc, locale->lt);
     191             : }
     192             : 
     193             : static bool
     194       81148 : wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
     195             : {
     196       81148 :     return iswalpha_l((wint_t) wc, locale->lt);
     197             : }
     198             : 
     199             : static bool
     200     2845676 : wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
     201             : {
     202     2845676 :     return iswalnum_l((wint_t) wc, locale->lt);
     203             : }
     204             : 
     205             : static bool
     206        4112 : wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
     207             : {
     208        4112 :     return iswupper_l((wint_t) wc, locale->lt);
     209             : }
     210             : 
     211             : static bool
     212        4102 : wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
     213             : {
     214        4102 :     return iswlower_l((wint_t) wc, locale->lt);
     215             : }
     216             : 
     217             : static bool
     218        4102 : wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
     219             : {
     220        4102 :     return iswgraph_l((wint_t) wc, locale->lt);
     221             : }
     222             : 
     223             : static bool
     224        4102 : wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
     225             : {
     226        4102 :     return iswprint_l((wint_t) wc, locale->lt);
     227             : }
     228             : 
     229             : static bool
     230        4102 : wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
     231             : {
     232        4102 :     return iswpunct_l((wint_t) wc, locale->lt);
     233             : }
     234             : 
     235             : static bool
     236       48152 : wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
     237             : {
     238       48152 :     return iswspace_l((wint_t) wc, locale->lt);
     239             : }
     240             : 
     241             : static bool
     242          12 : wc_isxdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
     243             : {
     244             : #ifndef WIN32
     245          12 :     return iswxdigit_l((wint_t) wc, locale->lt);
     246             : #else
     247             :     return _iswxdigit_l((wint_t) wc, locale->lt);
     248             : #endif
     249             : }
     250             : 
     251             : static char
     252           0 : char_tolower_libc(unsigned char ch, pg_locale_t locale)
     253             : {
     254             :     Assert(pg_database_encoding_max_length() == 1);
     255           0 :     return tolower_l(ch, locale->lt);
     256             : }
     257             : 
     258             : static bool
     259           0 : char_is_cased_libc(char ch, pg_locale_t locale)
     260             : {
     261           0 :     bool        is_multibyte = pg_database_encoding_max_length() > 1;
     262             : 
     263           0 :     if (is_multibyte && IS_HIGHBIT_SET(ch))
     264           0 :         return true;
     265             :     else
     266           0 :         return isalpha_l((unsigned char) ch, locale->lt);
     267             : }
     268             : 
     269             : static pg_wchar
     270           0 : toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
     271             : {
     272             :     Assert(GetDatabaseEncoding() != PG_UTF8);
     273             : 
     274             :     /* force C behavior for ASCII characters, per comments above */
     275           0 :     if (locale->is_default && wc <= (pg_wchar) 127)
     276           0 :         return pg_ascii_toupper((unsigned char) wc);
     277           0 :     if (wc <= (pg_wchar) UCHAR_MAX)
     278           0 :         return toupper_l((unsigned char) wc, locale->lt);
     279             :     else
     280           0 :         return wc;
     281             : }
     282             : 
     283             : static pg_wchar
     284        9088 : toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
     285             : {
     286             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     287             : 
     288             :     /* force C behavior for ASCII characters, per comments above */
     289        9088 :     if (locale->is_default && wc <= (pg_wchar) 127)
     290         892 :         return pg_ascii_toupper((unsigned char) wc);
     291             :     if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
     292        8196 :         return towupper_l((wint_t) wc, locale->lt);
     293             :     else
     294             :         return wc;
     295             : }
     296             : 
     297             : static pg_wchar
     298           0 : tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
     299             : {
     300             :     Assert(GetDatabaseEncoding() != PG_UTF8);
     301             : 
     302             :     /* force C behavior for ASCII characters, per comments above */
     303           0 :     if (locale->is_default && wc <= (pg_wchar) 127)
     304           0 :         return pg_ascii_tolower((unsigned char) wc);
     305           0 :     if (wc <= (pg_wchar) UCHAR_MAX)
     306           0 :         return tolower_l((unsigned char) wc, locale->lt);
     307             :     else
     308           0 :         return wc;
     309             : }
     310             : 
     311             : static pg_wchar
     312        9092 : tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
     313             : {
     314             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     315             : 
     316             :     /* force C behavior for ASCII characters, per comments above */
     317        9092 :     if (locale->is_default && wc <= (pg_wchar) 127)
     318         896 :         return pg_ascii_tolower((unsigned char) wc);
     319             :     if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
     320        8196 :         return towlower_l((wint_t) wc, locale->lt);
     321             :     else
     322             :         return wc;
     323             : }
     324             : 
     325             : static const struct ctype_methods ctype_methods_libc_sb = {
     326             :     .strlower = strlower_libc_sb,
     327             :     .strtitle = strtitle_libc_sb,
     328             :     .strupper = strupper_libc_sb,
     329             :     /* in libc, casefolding is the same as lowercasing */
     330             :     .strfold = strlower_libc_sb,
     331             :     .wc_isdigit = wc_isdigit_libc_sb,
     332             :     .wc_isalpha = wc_isalpha_libc_sb,
     333             :     .wc_isalnum = wc_isalnum_libc_sb,
     334             :     .wc_isupper = wc_isupper_libc_sb,
     335             :     .wc_islower = wc_islower_libc_sb,
     336             :     .wc_isgraph = wc_isgraph_libc_sb,
     337             :     .wc_isprint = wc_isprint_libc_sb,
     338             :     .wc_ispunct = wc_ispunct_libc_sb,
     339             :     .wc_isspace = wc_isspace_libc_sb,
     340             :     .wc_isxdigit = wc_isxdigit_libc_sb,
     341             :     .char_is_cased = char_is_cased_libc,
     342             :     .char_tolower = char_tolower_libc,
     343             :     .wc_toupper = toupper_libc_sb,
     344             :     .wc_tolower = tolower_libc_sb,
     345             : };
     346             : 
     347             : /*
     348             :  * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
     349             :  * single-byte semantics for pattern matching.
     350             :  */
     351             : static const struct ctype_methods ctype_methods_libc_other_mb = {
     352             :     .strlower = strlower_libc_mb,
     353             :     .strtitle = strtitle_libc_mb,
     354             :     .strupper = strupper_libc_mb,
     355             :     /* in libc, casefolding is the same as lowercasing */
     356             :     .strfold = strlower_libc_mb,
     357             :     .wc_isdigit = wc_isdigit_libc_sb,
     358             :     .wc_isalpha = wc_isalpha_libc_sb,
     359             :     .wc_isalnum = wc_isalnum_libc_sb,
     360             :     .wc_isupper = wc_isupper_libc_sb,
     361             :     .wc_islower = wc_islower_libc_sb,
     362             :     .wc_isgraph = wc_isgraph_libc_sb,
     363             :     .wc_isprint = wc_isprint_libc_sb,
     364             :     .wc_ispunct = wc_ispunct_libc_sb,
     365             :     .wc_isspace = wc_isspace_libc_sb,
     366             :     .wc_isxdigit = wc_isxdigit_libc_sb,
     367             :     .char_is_cased = char_is_cased_libc,
     368             :     .char_tolower = char_tolower_libc,
     369             :     .wc_toupper = toupper_libc_sb,
     370             :     .wc_tolower = tolower_libc_sb,
     371             : };
     372             : 
     373             : static const struct ctype_methods ctype_methods_libc_utf8 = {
     374             :     .strlower = strlower_libc_mb,
     375             :     .strtitle = strtitle_libc_mb,
     376             :     .strupper = strupper_libc_mb,
     377             :     /* in libc, casefolding is the same as lowercasing */
     378             :     .strfold = strlower_libc_mb,
     379             :     .wc_isdigit = wc_isdigit_libc_mb,
     380             :     .wc_isalpha = wc_isalpha_libc_mb,
     381             :     .wc_isalnum = wc_isalnum_libc_mb,
     382             :     .wc_isupper = wc_isupper_libc_mb,
     383             :     .wc_islower = wc_islower_libc_mb,
     384             :     .wc_isgraph = wc_isgraph_libc_mb,
     385             :     .wc_isprint = wc_isprint_libc_mb,
     386             :     .wc_ispunct = wc_ispunct_libc_mb,
     387             :     .wc_isspace = wc_isspace_libc_mb,
     388             :     .wc_isxdigit = wc_isxdigit_libc_mb,
     389             :     .char_is_cased = char_is_cased_libc,
     390             :     .char_tolower = char_tolower_libc,
     391             :     .wc_toupper = toupper_libc_mb,
     392             :     .wc_tolower = tolower_libc_mb,
     393             : };
     394             : 
     395             : static const struct collate_methods collate_methods_libc = {
     396             :     .strncoll = strncoll_libc,
     397             :     .strnxfrm = strnxfrm_libc,
     398             :     .strnxfrm_prefix = NULL,
     399             : 
     400             :     /*
     401             :      * Unfortunately, it seems that strxfrm() for non-C collations is broken
     402             :      * on many common platforms; testing of multiple versions of glibc reveals
     403             :      * that, for many locales, strcoll() and strxfrm() do not return
     404             :      * consistent results. While no other libc other than Cygwin has so far
     405             :      * been shown to have a problem, we take the conservative course of action
     406             :      * for right now and disable this categorically.  (Users who are certain
     407             :      * this isn't a problem on their system can define TRUST_STRXFRM.)
     408             :      */
     409             : #ifdef TRUST_STRXFRM
     410             :     .strxfrm_is_safe = true,
     411             : #else
     412             :     .strxfrm_is_safe = false,
     413             : #endif
     414             : };
     415             : 
     416             : #ifdef WIN32
     417             : static const struct collate_methods collate_methods_libc_win32_utf8 = {
     418             :     .strncoll = strncoll_libc_win32_utf8,
     419             :     .strnxfrm = strnxfrm_libc,
     420             :     .strnxfrm_prefix = NULL,
     421             : #ifdef TRUST_STRXFRM
     422             :     .strxfrm_is_safe = true,
     423             : #else
     424             :     .strxfrm_is_safe = false,
     425             : #endif
     426             : };
     427             : #endif
     428             : 
     429             : static size_t
     430           0 : strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     431             :                  pg_locale_t locale)
     432             : {
     433           0 :     if (srclen < 0)
     434           0 :         srclen = strlen(src);
     435             : 
     436           0 :     if (srclen + 1 <= destsize)
     437             :     {
     438           0 :         locale_t    loc = locale->lt;
     439             :         char       *p;
     440             : 
     441           0 :         memcpy(dest, src, srclen);
     442           0 :         dest[srclen] = '\0';
     443             : 
     444             :         /*
     445             :          * Note: we assume that tolower_l() will not be so broken as to need
     446             :          * an isupper_l() guard test.  When using the default collation, we
     447             :          * apply the traditional Postgres behavior that forces ASCII-style
     448             :          * treatment of I/i, but in non-default collations you get exactly
     449             :          * what the collation says.
     450             :          */
     451           0 :         for (p = dest; *p; p++)
     452             :         {
     453           0 :             if (locale->is_default)
     454             :             {
     455           0 :                 if (*p >= 'A' && *p <= 'Z')
     456           0 :                     *p += 'a' - 'A';
     457           0 :                 else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
     458           0 :                     *p = tolower_l((unsigned char) *p, loc);
     459             :             }
     460             :             else
     461           0 :                 *p = tolower_l((unsigned char) *p, loc);
     462             :         }
     463             :     }
     464             : 
     465           0 :     return srclen;
     466             : }
     467             : 
     468             : static size_t
     469      425022 : strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     470             :                  pg_locale_t locale)
     471             : {
     472      425022 :     locale_t    loc = locale->lt;
     473             :     size_t      result_size;
     474             :     wchar_t    *workspace;
     475             :     char       *result;
     476             :     size_t      curr_char;
     477             :     size_t      max_size;
     478             : 
     479      425022 :     if (srclen < 0)
     480           0 :         srclen = strlen(src);
     481             : 
     482             :     /* Overflow paranoia */
     483      425022 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     484           0 :         ereport(ERROR,
     485             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     486             :                  errmsg("out of memory")));
     487             : 
     488             :     /* Output workspace cannot have more codes than input bytes */
     489      425022 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     490             : 
     491      425022 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     492             : 
     493     3669786 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     494     3244764 :         workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     495             : 
     496             :     /*
     497             :      * Make result large enough; case change might change number of bytes
     498             :      */
     499      425022 :     max_size = curr_char * pg_database_encoding_max_length();
     500      425022 :     result = palloc(max_size + 1);
     501             : 
     502      425022 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     503             : 
     504      425022 :     if (result_size + 1 > destsize)
     505           0 :         return result_size;
     506             : 
     507      425022 :     memcpy(dest, result, result_size);
     508      425022 :     dest[result_size] = '\0';
     509             : 
     510      425022 :     pfree(workspace);
     511      425022 :     pfree(result);
     512             : 
     513      425022 :     return result_size;
     514             : }
     515             : 
     516             : static size_t
     517           0 : strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     518             :                  pg_locale_t locale)
     519             : {
     520           0 :     if (srclen < 0)
     521           0 :         srclen = strlen(src);
     522             : 
     523           0 :     if (srclen + 1 <= destsize)
     524             :     {
     525           0 :         locale_t    loc = locale->lt;
     526           0 :         int         wasalnum = false;
     527             :         char       *p;
     528             : 
     529           0 :         memcpy(dest, src, srclen);
     530           0 :         dest[srclen] = '\0';
     531             : 
     532             :         /*
     533             :          * Note: we assume that toupper_l()/tolower_l() will not be so broken
     534             :          * as to need guard tests.  When using the default collation, we apply
     535             :          * the traditional Postgres behavior that forces ASCII-style treatment
     536             :          * of I/i, but in non-default collations you get exactly what the
     537             :          * collation says.
     538             :          */
     539           0 :         for (p = dest; *p; p++)
     540             :         {
     541           0 :             if (locale->is_default)
     542             :             {
     543           0 :                 if (wasalnum)
     544             :                 {
     545           0 :                     if (*p >= 'A' && *p <= 'Z')
     546           0 :                         *p += 'a' - 'A';
     547           0 :                     else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
     548           0 :                         *p = tolower_l((unsigned char) *p, loc);
     549             :                 }
     550             :                 else
     551             :                 {
     552           0 :                     if (*p >= 'a' && *p <= 'z')
     553           0 :                         *p -= 'a' - 'A';
     554           0 :                     else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
     555           0 :                         *p = toupper_l((unsigned char) *p, loc);
     556             :                 }
     557             :             }
     558             :             else
     559             :             {
     560           0 :                 if (wasalnum)
     561           0 :                     *p = tolower_l((unsigned char) *p, loc);
     562             :                 else
     563           0 :                     *p = toupper_l((unsigned char) *p, loc);
     564             :             }
     565           0 :             wasalnum = isalnum_l((unsigned char) *p, loc);
     566             :         }
     567             :     }
     568             : 
     569           0 :     return srclen;
     570             : }
     571             : 
     572             : static size_t
     573           8 : strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     574             :                  pg_locale_t locale)
     575             : {
     576           8 :     locale_t    loc = locale->lt;
     577           8 :     int         wasalnum = false;
     578             :     size_t      result_size;
     579             :     wchar_t    *workspace;
     580             :     char       *result;
     581             :     size_t      curr_char;
     582             :     size_t      max_size;
     583             : 
     584           8 :     if (srclen < 0)
     585           0 :         srclen = strlen(src);
     586             : 
     587             :     /* Overflow paranoia */
     588           8 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     589           0 :         ereport(ERROR,
     590             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     591             :                  errmsg("out of memory")));
     592             : 
     593             :     /* Output workspace cannot have more codes than input bytes */
     594           8 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     595             : 
     596           8 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     597             : 
     598          80 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     599             :     {
     600          72 :         if (wasalnum)
     601          56 :             workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     602             :         else
     603          16 :             workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     604          72 :         wasalnum = iswalnum_l(workspace[curr_char], loc);
     605             :     }
     606             : 
     607             :     /*
     608             :      * Make result large enough; case change might change number of bytes
     609             :      */
     610           8 :     max_size = curr_char * pg_database_encoding_max_length();
     611           8 :     result = palloc(max_size + 1);
     612             : 
     613           8 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     614             : 
     615           8 :     if (result_size + 1 > destsize)
     616           0 :         return result_size;
     617             : 
     618           8 :     memcpy(dest, result, result_size);
     619           8 :     dest[result_size] = '\0';
     620             : 
     621           8 :     pfree(workspace);
     622           8 :     pfree(result);
     623             : 
     624           8 :     return result_size;
     625             : }
     626             : 
     627             : static size_t
     628           0 : strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     629             :                  pg_locale_t locale)
     630             : {
     631           0 :     if (srclen < 0)
     632           0 :         srclen = strlen(src);
     633             : 
     634           0 :     if (srclen + 1 <= destsize)
     635             :     {
     636           0 :         locale_t    loc = locale->lt;
     637             :         char       *p;
     638             : 
     639           0 :         memcpy(dest, src, srclen);
     640           0 :         dest[srclen] = '\0';
     641             : 
     642             :         /*
     643             :          * Note: we assume that toupper_l() will not be so broken as to need
     644             :          * an islower_l() guard test.  When using the default collation, we
     645             :          * apply the traditional Postgres behavior that forces ASCII-style
     646             :          * treatment of I/i, but in non-default collations you get exactly
     647             :          * what the collation says.
     648             :          */
     649           0 :         for (p = dest; *p; p++)
     650             :         {
     651           0 :             if (locale->is_default)
     652             :             {
     653           0 :                 if (*p >= 'a' && *p <= 'z')
     654           0 :                     *p -= 'a' - 'A';
     655           0 :                 else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
     656           0 :                     *p = toupper_l((unsigned char) *p, loc);
     657             :             }
     658             :             else
     659           0 :                 *p = toupper_l((unsigned char) *p, loc);
     660             :         }
     661             :     }
     662             : 
     663           0 :     return srclen;
     664             : }
     665             : 
     666             : static size_t
     667      719980 : strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     668             :                  pg_locale_t locale)
     669             : {
     670      719980 :     locale_t    loc = locale->lt;
     671             :     size_t      result_size;
     672             :     wchar_t    *workspace;
     673             :     char       *result;
     674             :     size_t      curr_char;
     675             :     size_t      max_size;
     676             : 
     677      719980 :     if (srclen < 0)
     678           0 :         srclen = strlen(src);
     679             : 
     680             :     /* Overflow paranoia */
     681      719980 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     682           0 :         ereport(ERROR,
     683             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     684             :                  errmsg("out of memory")));
     685             : 
     686             :     /* Output workspace cannot have more codes than input bytes */
     687      719980 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     688             : 
     689      719980 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     690             : 
     691     2372434 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     692     1652454 :         workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     693             : 
     694             :     /*
     695             :      * Make result large enough; case change might change number of bytes
     696             :      */
     697      719980 :     max_size = curr_char * pg_database_encoding_max_length();
     698      719980 :     result = palloc(max_size + 1);
     699             : 
     700      719980 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     701             : 
     702      719980 :     if (result_size + 1 > destsize)
     703           0 :         return result_size;
     704             : 
     705      719980 :     memcpy(dest, result, result_size);
     706      719980 :     dest[result_size] = '\0';
     707             : 
     708      719980 :     pfree(workspace);
     709      719980 :     pfree(result);
     710             : 
     711      719980 :     return result_size;
     712             : }
     713             : 
     714             : pg_locale_t
     715       31914 : create_pg_locale_libc(Oid collid, MemoryContext context)
     716             : {
     717             :     const char *collate;
     718             :     const char *ctype;
     719             :     locale_t    loc;
     720             :     pg_locale_t result;
     721             : 
     722       31914 :     if (collid == DEFAULT_COLLATION_OID)
     723             :     {
     724             :         HeapTuple   tp;
     725             :         Datum       datum;
     726             : 
     727       31824 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     728       31824 :         if (!HeapTupleIsValid(tp))
     729           0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     730       31824 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     731             :                                        Anum_pg_database_datcollate);
     732       31824 :         collate = TextDatumGetCString(datum);
     733       31824 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     734             :                                        Anum_pg_database_datctype);
     735       31824 :         ctype = TextDatumGetCString(datum);
     736             : 
     737       31824 :         ReleaseSysCache(tp);
     738             :     }
     739             :     else
     740             :     {
     741             :         HeapTuple   tp;
     742             :         Datum       datum;
     743             : 
     744          90 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     745          90 :         if (!HeapTupleIsValid(tp))
     746           0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     747             : 
     748          90 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     749             :                                        Anum_pg_collation_collcollate);
     750          90 :         collate = TextDatumGetCString(datum);
     751          90 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     752             :                                        Anum_pg_collation_collctype);
     753          90 :         ctype = TextDatumGetCString(datum);
     754             : 
     755          90 :         ReleaseSysCache(tp);
     756             :     }
     757             : 
     758             : 
     759       31914 :     loc = make_libc_collator(collate, ctype);
     760             : 
     761       31914 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     762       31914 :     result->deterministic = true;
     763       62696 :     result->collate_is_c = (strcmp(collate, "C") == 0) ||
     764       30782 :         (strcmp(collate, "POSIX") == 0);
     765       62696 :     result->ctype_is_c = (strcmp(ctype, "C") == 0) ||
     766       30782 :         (strcmp(ctype, "POSIX") == 0);
     767       31914 :     result->lt = loc;
     768       31914 :     if (!result->collate_is_c)
     769             :     {
     770             : #ifdef WIN32
     771             :         if (GetDatabaseEncoding() == PG_UTF8)
     772             :             result->collate = &collate_methods_libc_win32_utf8;
     773             :         else
     774             : #endif
     775       30718 :             result->collate = &collate_methods_libc;
     776             :     }
     777       31914 :     if (!result->ctype_is_c)
     778             :     {
     779       30718 :         if (GetDatabaseEncoding() == PG_UTF8)
     780       30654 :             result->ctype = &ctype_methods_libc_utf8;
     781          64 :         else if (pg_database_encoding_max_length() > 1)
     782           0 :             result->ctype = &ctype_methods_libc_other_mb;
     783             :         else
     784          64 :             result->ctype = &ctype_methods_libc_sb;
     785             :     }
     786             : 
     787       31914 :     return result;
     788             : }
     789             : 
     790             : /*
     791             :  * Create a locale_t with the given collation and ctype.
     792             :  *
     793             :  * The "C" and "POSIX" locales are not actually handled by libc, so return
     794             :  * NULL.
     795             :  *
     796             :  * Ensure that no path leaks a locale_t.
     797             :  */
     798             : static locale_t
     799       31914 : make_libc_collator(const char *collate, const char *ctype)
     800             : {
     801       31914 :     locale_t    loc = 0;
     802             : 
     803       31914 :     if (strcmp(collate, ctype) == 0)
     804             :     {
     805       31914 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     806             :         {
     807             :             /* Normal case where they're the same */
     808       30718 :             errno = 0;
     809             : #ifndef WIN32
     810       30718 :             loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
     811             :                             NULL);
     812             : #else
     813             :             loc = _create_locale(LC_ALL, collate);
     814             : #endif
     815       30718 :             if (!loc)
     816           0 :                 report_newlocale_failure(collate);
     817             :         }
     818             :     }
     819             :     else
     820             :     {
     821             : #ifndef WIN32
     822             :         /* We need two newlocale() steps */
     823           0 :         locale_t    loc1 = 0;
     824             : 
     825           0 :         if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
     826             :         {
     827           0 :             errno = 0;
     828           0 :             loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
     829           0 :             if (!loc1)
     830           0 :                 report_newlocale_failure(collate);
     831             :         }
     832             : 
     833           0 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     834             :         {
     835           0 :             errno = 0;
     836           0 :             loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
     837           0 :             if (!loc)
     838             :             {
     839           0 :                 if (loc1)
     840           0 :                     freelocale(loc1);
     841           0 :                 report_newlocale_failure(ctype);
     842             :             }
     843             :         }
     844             :         else
     845           0 :             loc = loc1;
     846             : #else
     847             : 
     848             :         /*
     849             :          * XXX The _create_locale() API doesn't appear to support this. Could
     850             :          * perhaps be worked around by changing pg_locale_t to contain two
     851             :          * separate fields.
     852             :          */
     853             :         ereport(ERROR,
     854             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     855             :                  errmsg("collations with different collate and ctype values are not supported on this platform")));
     856             : #endif
     857             :     }
     858             : 
     859       31914 :     return loc;
     860             : }
     861             : 
     862             : /*
     863             :  * strncoll_libc
     864             :  *
     865             :  * NUL-terminate arguments, if necessary, and pass to strcoll_l().
     866             :  *
     867             :  * An input string length of -1 means that it's already NUL-terminated.
     868             :  */
     869             : int
     870    30180656 : strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
     871             :               pg_locale_t locale)
     872             : {
     873             :     char        sbuf[TEXTBUFLEN];
     874    30180656 :     char       *buf = sbuf;
     875    30180656 :     size_t      bufsize1 = (len1 == -1) ? 0 : len1 + 1;
     876    30180656 :     size_t      bufsize2 = (len2 == -1) ? 0 : len2 + 1;
     877             :     const char *arg1n;
     878             :     const char *arg2n;
     879             :     int         result;
     880             : 
     881    30180656 :     if (bufsize1 + bufsize2 > TEXTBUFLEN)
     882         568 :         buf = palloc(bufsize1 + bufsize2);
     883             : 
     884             :     /* nul-terminate arguments if necessary */
     885    30180656 :     if (len1 == -1)
     886             :     {
     887    25754254 :         arg1n = arg1;
     888             :     }
     889             :     else
     890             :     {
     891     4426402 :         char       *buf1 = buf;
     892             : 
     893     4426402 :         memcpy(buf1, arg1, len1);
     894     4426402 :         buf1[len1] = '\0';
     895     4426402 :         arg1n = buf1;
     896             :     }
     897             : 
     898    30180656 :     if (len2 == -1)
     899             :     {
     900    25754254 :         arg2n = arg2;
     901             :     }
     902             :     else
     903             :     {
     904     4426402 :         char       *buf2 = buf + bufsize1;
     905             : 
     906     4426402 :         memcpy(buf2, arg2, len2);
     907     4426402 :         buf2[len2] = '\0';
     908     4426402 :         arg2n = buf2;
     909             :     }
     910             : 
     911    30180656 :     result = strcoll_l(arg1n, arg2n, locale->lt);
     912             : 
     913    30180656 :     if (buf != sbuf)
     914         568 :         pfree(buf);
     915             : 
     916    30180656 :     return result;
     917             : }
     918             : 
     919             : /*
     920             :  * strnxfrm_libc
     921             :  *
     922             :  * NUL-terminate src, if necessary, and pass to strxfrm_l().
     923             :  *
     924             :  * A source length of -1 means that it's already NUL-terminated.
     925             :  */
     926             : size_t
     927         144 : strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
     928             :               pg_locale_t locale)
     929             : {
     930             :     char        sbuf[TEXTBUFLEN];
     931         144 :     char       *buf = sbuf;
     932         144 :     size_t      bufsize = srclen + 1;
     933             :     size_t      result;
     934             : 
     935         144 :     if (srclen == -1)
     936         144 :         return strxfrm_l(dest, src, destsize, locale->lt);
     937             : 
     938           0 :     if (bufsize > TEXTBUFLEN)
     939           0 :         buf = palloc(bufsize);
     940             : 
     941             :     /* nul-terminate argument */
     942           0 :     memcpy(buf, src, srclen);
     943           0 :     buf[srclen] = '\0';
     944             : 
     945           0 :     result = strxfrm_l(dest, buf, destsize, locale->lt);
     946             : 
     947           0 :     if (buf != sbuf)
     948           0 :         pfree(buf);
     949             : 
     950             :     /* if dest is defined, it should be nul-terminated */
     951             :     Assert(result >= destsize || dest[result] == '\0');
     952             : 
     953           0 :     return result;
     954             : }
     955             : 
     956             : char *
     957       31046 : get_collation_actual_version_libc(const char *collcollate)
     958             : {
     959       31046 :     char       *collversion = NULL;
     960             : 
     961       61916 :     if (pg_strcasecmp("C", collcollate) != 0 &&
     962       61548 :         pg_strncasecmp("C.", collcollate, 2) != 0 &&
     963       30678 :         pg_strcasecmp("POSIX", collcollate) != 0)
     964             :     {
     965             : #if defined(__GLIBC__)
     966             :         /* Use the glibc version because we don't have anything better. */
     967       30652 :         collversion = pstrdup(gnu_get_libc_version());
     968             : #elif defined(LC_VERSION_MASK)
     969             :         locale_t    loc;
     970             : 
     971             :         /* Look up FreeBSD collation version. */
     972             :         loc = newlocale(LC_COLLATE_MASK, collcollate, NULL);
     973             :         if (loc)
     974             :         {
     975             :             collversion =
     976             :                 pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
     977             :             freelocale(loc);
     978             :         }
     979             :         else
     980             :             ereport(ERROR,
     981             :                     (errmsg("could not load locale \"%s\"", collcollate)));
     982             : #elif defined(WIN32)
     983             :         /*
     984             :          * If we are targeting Windows Vista and above, we can ask for a name
     985             :          * given a collation name (earlier versions required a location code
     986             :          * that we don't have).
     987             :          */
     988             :         NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
     989             :         WCHAR       wide_collcollate[LOCALE_NAME_MAX_LENGTH];
     990             : 
     991             :         MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
     992             :                             LOCALE_NAME_MAX_LENGTH);
     993             :         if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
     994             :         {
     995             :             /*
     996             :              * GetNLSVersionEx() wants a language tag such as "en-US", not a
     997             :              * locale name like "English_United States.1252".  Until those
     998             :              * values can be prevented from entering the system, or 100%
     999             :              * reliably converted to the more useful tag format, tolerate the
    1000             :              * resulting error and report that we have no version data.
    1001             :              */
    1002             :             if (GetLastError() == ERROR_INVALID_PARAMETER)
    1003             :                 return NULL;
    1004             : 
    1005             :             ereport(ERROR,
    1006             :                     (errmsg("could not get collation version for locale \"%s\": error code %lu",
    1007             :                             collcollate,
    1008             :                             GetLastError())));
    1009             :         }
    1010             :         collversion = psprintf("%lu.%lu,%lu.%lu",
    1011             :                                (version.dwNLSVersion >> 8) & 0xFFFF,
    1012             :                                version.dwNLSVersion & 0xFF,
    1013             :                                (version.dwDefinedVersion >> 8) & 0xFFFF,
    1014             :                                version.dwDefinedVersion & 0xFF);
    1015             : #endif
    1016             :     }
    1017             : 
    1018       31046 :     return collversion;
    1019             : }
    1020             : 
    1021             : /*
    1022             :  * strncoll_libc_win32_utf8
    1023             :  *
    1024             :  * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
    1025             :  * invoke wcscoll_l().
    1026             :  *
    1027             :  * An input string length of -1 means that it's NUL-terminated.
    1028             :  */
    1029             : #ifdef WIN32
    1030             : static int
    1031             : strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
    1032             :                          ssize_t len2, pg_locale_t locale)
    1033             : {
    1034             :     char        sbuf[TEXTBUFLEN];
    1035             :     char       *buf = sbuf;
    1036             :     char       *a1p,
    1037             :                *a2p;
    1038             :     int         a1len;
    1039             :     int         a2len;
    1040             :     int         r;
    1041             :     int         result;
    1042             : 
    1043             :     Assert(GetDatabaseEncoding() == PG_UTF8);
    1044             : 
    1045             :     if (len1 == -1)
    1046             :         len1 = strlen(arg1);
    1047             :     if (len2 == -1)
    1048             :         len2 = strlen(arg2);
    1049             : 
    1050             :     a1len = len1 * 2 + 2;
    1051             :     a2len = len2 * 2 + 2;
    1052             : 
    1053             :     if (a1len + a2len > TEXTBUFLEN)
    1054             :         buf = palloc(a1len + a2len);
    1055             : 
    1056             :     a1p = buf;
    1057             :     a2p = buf + a1len;
    1058             : 
    1059             :     /* API does not work for zero-length input */
    1060             :     if (len1 == 0)
    1061             :         r = 0;
    1062             :     else
    1063             :     {
    1064             :         r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
    1065             :                                 (LPWSTR) a1p, a1len / 2);
    1066             :         if (!r)
    1067             :             ereport(ERROR,
    1068             :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1069             :                             GetLastError())));
    1070             :     }
    1071             :     ((LPWSTR) a1p)[r] = 0;
    1072             : 
    1073             :     if (len2 == 0)
    1074             :         r = 0;
    1075             :     else
    1076             :     {
    1077             :         r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
    1078             :                                 (LPWSTR) a2p, a2len / 2);
    1079             :         if (!r)
    1080             :             ereport(ERROR,
    1081             :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1082             :                             GetLastError())));
    1083             :     }
    1084             :     ((LPWSTR) a2p)[r] = 0;
    1085             : 
    1086             :     errno = 0;
    1087             :     result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->lt);
    1088             :     if (result == 2147483647)   /* _NLSCMPERROR; missing from mingw headers */
    1089             :         ereport(ERROR,
    1090             :                 (errmsg("could not compare Unicode strings: %m")));
    1091             : 
    1092             :     if (buf != sbuf)
    1093             :         pfree(buf);
    1094             : 
    1095             :     return result;
    1096             : }
    1097             : #endif                          /* WIN32 */
    1098             : 
    1099             : /* simple subroutine for reporting errors from newlocale() */
    1100             : void
    1101           0 : report_newlocale_failure(const char *localename)
    1102             : {
    1103             :     int         save_errno;
    1104             : 
    1105             :     /*
    1106             :      * Windows doesn't provide any useful error indication from
    1107             :      * _create_locale(), and BSD-derived platforms don't seem to feel they
    1108             :      * need to set errno either (even though POSIX is pretty clear that
    1109             :      * newlocale should do so).  So, if errno hasn't been set, assume ENOENT
    1110             :      * is what to report.
    1111             :      */
    1112           0 :     if (errno == 0)
    1113           0 :         errno = ENOENT;
    1114             : 
    1115             :     /*
    1116             :      * ENOENT means "no such locale", not "no such file", so clarify that
    1117             :      * errno with an errdetail message.
    1118             :      */
    1119           0 :     save_errno = errno;         /* auxiliary funcs might change errno */
    1120           0 :     ereport(ERROR,
    1121             :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1122             :              errmsg("could not create locale \"%s\": %m",
    1123             :                     localename),
    1124             :              (save_errno == ENOENT ?
    1125             :               errdetail("The operating system could not find any locale data for the locale name \"%s\".",
    1126             :                         localename) : 0)));
    1127             : }
    1128             : 
    1129             : /*
    1130             :  * POSIX doesn't define _l-variants of these functions, but several systems
    1131             :  * have them.  We provide our own replacements here.
    1132             :  */
    1133             : #ifndef HAVE_MBSTOWCS_L
    1134             : static size_t
    1135     1145010 : mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
    1136             : {
    1137             : #ifdef WIN32
    1138             :     return _mbstowcs_l(dest, src, n, loc);
    1139             : #else
    1140             :     size_t      result;
    1141     1145010 :     locale_t    save_locale = uselocale(loc);
    1142             : 
    1143     1145010 :     result = mbstowcs(dest, src, n);
    1144     1145010 :     uselocale(save_locale);
    1145     1145010 :     return result;
    1146             : #endif
    1147             : }
    1148             : #endif
    1149             : #ifndef HAVE_WCSTOMBS_L
    1150             : static size_t
    1151     1145010 : wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
    1152             : {
    1153             : #ifdef WIN32
    1154             :     return _wcstombs_l(dest, src, n, loc);
    1155             : #else
    1156             :     size_t      result;
    1157     1145010 :     locale_t    save_locale = uselocale(loc);
    1158             : 
    1159     1145010 :     result = wcstombs(dest, src, n);
    1160     1145010 :     uselocale(save_locale);
    1161     1145010 :     return result;
    1162             : #endif
    1163             : }
    1164             : #endif
    1165             : 
    1166             : /*
    1167             :  * These functions convert from/to libc's wchar_t, *not* pg_wchar.
    1168             :  * Therefore we keep them here rather than with the mbutils code.
    1169             :  */
    1170             : 
    1171             : /*
    1172             :  * wchar2char --- convert wide characters to multibyte format
    1173             :  *
    1174             :  * This has the same API as the standard wcstombs_l() function; in particular,
    1175             :  * tolen is the maximum number of bytes to store at *to, and *from must be
    1176             :  * zero-terminated.  The output will be zero-terminated iff there is room.
    1177             :  */
    1178             : size_t
    1179     1145010 : wchar2char(char *to, const wchar_t *from, size_t tolen, locale_t loc)
    1180             : {
    1181             :     size_t      result;
    1182             : 
    1183     1145010 :     if (tolen == 0)
    1184           0 :         return 0;
    1185             : 
    1186             : #ifdef WIN32
    1187             : 
    1188             :     /*
    1189             :      * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
    1190             :      * for some reason mbstowcs and wcstombs won't do this for us, so we use
    1191             :      * MultiByteToWideChar().
    1192             :      */
    1193             :     if (GetDatabaseEncoding() == PG_UTF8)
    1194             :     {
    1195             :         result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
    1196             :                                      NULL, NULL);
    1197             :         /* A zero return is failure */
    1198             :         if (result <= 0)
    1199             :             result = -1;
    1200             :         else
    1201             :         {
    1202             :             Assert(result <= tolen);
    1203             :             /* Microsoft counts the zero terminator in the result */
    1204             :             result--;
    1205             :         }
    1206             :     }
    1207             :     else
    1208             : #endif                          /* WIN32 */
    1209     1145010 :     if (loc == (locale_t) 0)
    1210             :     {
    1211             :         /* Use wcstombs directly for the default locale */
    1212           0 :         result = wcstombs(to, from, tolen);
    1213             :     }
    1214             :     else
    1215             :     {
    1216             :         /* Use wcstombs_l for nondefault locales */
    1217     1145010 :         result = wcstombs_l(to, from, tolen, loc);
    1218             :     }
    1219             : 
    1220     1145010 :     return result;
    1221             : }
    1222             : 
    1223             : /*
    1224             :  * char2wchar --- convert multibyte characters to wide characters
    1225             :  *
    1226             :  * This has almost the API of mbstowcs_l(), except that *from need not be
    1227             :  * null-terminated; instead, the number of input bytes is specified as
    1228             :  * fromlen.  Also, we ereport() rather than returning -1 for invalid
    1229             :  * input encoding.  tolen is the maximum number of wchar_t's to store at *to.
    1230             :  * The output will be zero-terminated iff there is room.
    1231             :  */
    1232             : static size_t
    1233     1145010 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
    1234             :            locale_t loc)
    1235             : {
    1236             :     size_t      result;
    1237             : 
    1238     1145010 :     if (tolen == 0)
    1239           0 :         return 0;
    1240             : 
    1241             : #ifdef WIN32
    1242             :     /* See WIN32 "Unicode" comment above */
    1243             :     if (GetDatabaseEncoding() == PG_UTF8)
    1244             :     {
    1245             :         /* Win32 API does not work for zero-length input */
    1246             :         if (fromlen == 0)
    1247             :             result = 0;
    1248             :         else
    1249             :         {
    1250             :             result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
    1251             :             /* A zero return is failure */
    1252             :             if (result == 0)
    1253             :                 result = -1;
    1254             :         }
    1255             : 
    1256             :         if (result != -1)
    1257             :         {
    1258             :             Assert(result < tolen);
    1259             :             /* Append trailing null wchar (MultiByteToWideChar() does not) */
    1260             :             to[result] = 0;
    1261             :         }
    1262             :     }
    1263             :     else
    1264             : #endif                          /* WIN32 */
    1265             :     {
    1266             :         /* mbstowcs requires ending '\0' */
    1267     1145010 :         char       *str = pnstrdup(from, fromlen);
    1268             : 
    1269     1145010 :         if (loc == (locale_t) 0)
    1270             :         {
    1271             :             /* Use mbstowcs directly for the default locale */
    1272           0 :             result = mbstowcs(to, str, tolen);
    1273             :         }
    1274             :         else
    1275             :         {
    1276             :             /* Use mbstowcs_l for nondefault locales */
    1277     1145010 :             result = mbstowcs_l(to, str, tolen, loc);
    1278             :         }
    1279             : 
    1280     1145010 :         pfree(str);
    1281             :     }
    1282             : 
    1283     1145010 :     if (result == -1)
    1284             :     {
    1285             :         /*
    1286             :          * Invalid multibyte character encountered.  We try to give a useful
    1287             :          * error message by letting pg_verifymbstr check the string.  But it's
    1288             :          * possible that the string is OK to us, and not OK to mbstowcs ---
    1289             :          * this suggests that the LC_CTYPE locale is different from the
    1290             :          * database encoding.  Give a generic error message if pg_verifymbstr
    1291             :          * can't find anything wrong.
    1292             :          */
    1293           0 :         pg_verifymbstr(from, fromlen, false);   /* might not return */
    1294             :         /* but if it does ... */
    1295           0 :         ereport(ERROR,
    1296             :                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
    1297             :                  errmsg("invalid multibyte character for locale"),
    1298             :                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
    1299             :     }
    1300             : 
    1301     1145010 :     return result;
    1302             : }

Generated by: LCOV version 1.16