LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_libc.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 180 309 58.3 %
Date: 2025-11-13 00:18:11 Functions: 24 42 57.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-----------------------------------------------------------------------
       2             :  *
       3             :  * PostgreSQL locale utilities for libc
       4             :  *
       5             :  * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
       6             :  *
       7             :  * src/backend/utils/adt/pg_locale_libc.c
       8             :  *
       9             :  *-----------------------------------------------------------------------
      10             :  */
      11             : 
      12             : #include "postgres.h"
      13             : 
      14             : #include <limits.h>
      15             : #include <wctype.h>
      16             : 
      17             : #include "access/htup_details.h"
      18             : #include "catalog/pg_database.h"
      19             : #include "catalog/pg_collation.h"
      20             : #include "mb/pg_wchar.h"
      21             : #include "miscadmin.h"
      22             : #include "utils/builtins.h"
      23             : #include "utils/formatting.h"
      24             : #include "utils/memutils.h"
      25             : #include "utils/pg_locale.h"
      26             : #include "utils/syscache.h"
      27             : 
      28             : #ifdef __GLIBC__
      29             : #include <gnu/libc-version.h>
      30             : #endif
      31             : 
      32             : #ifdef WIN32
      33             : #include <shlwapi.h>
      34             : #endif
      35             : 
      36             : /*
      37             :  * For the libc provider, to provide as much functionality as possible on a
      38             :  * variety of platforms without going so far as to implement everything from
      39             :  * scratch, we use several implementation strategies depending on the
      40             :  * situation:
      41             :  *
      42             :  * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
      43             :  * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
      44             :  * collations don't give a fig about multibyte characters.
      45             :  *
      46             :  * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
      47             :  * This assumes that every platform uses Unicode codepoints directly
      48             :  * as the wchar_t representation of Unicode.  On some platforms
      49             :  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
      50             :  *
      51             :  * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
      52             :  * values up to 255, and punt for values above that.  This is 100% correct
      53             :  * only in single-byte encodings such as LATINn.  However, non-Unicode
      54             :  * multibyte encodings are mostly Far Eastern character sets for which the
      55             :  * properties being tested here aren't very relevant for higher code values
      56             :  * anyway.  The difficulty with using the <wctype.h> functions with
      57             :  * non-Unicode multibyte encodings is that we can have no certainty that
      58             :  * the platform's wchar_t representation matches what we do in pg_wchar
      59             :  * conversions.
      60             :  *
      61             :  * As a special case, in the "default" collation, (2) and (3) force ASCII
      62             :  * letters to follow ASCII upcase/downcase rules, while in a non-default
      63             :  * collation we just let the library functions do what they will.  The case
      64             :  * where this matters is treatment of I/i in Turkish, and the behavior is
      65             :  * meant to match the upper()/lower() SQL functions.
      66             :  *
      67             :  * We store the active collation setting in static variables.  In principle
      68             :  * it could be passed down to here via the regex library's "struct vars" data
      69             :  * structure; but that would require somewhat invasive changes in the regex
      70             :  * library, and right now there's no real benefit to be gained from that.
      71             :  *
      72             :  * NB: the coding here assumes pg_wchar is an unsigned type.
      73             :  */
      74             : 
      75             : /*
      76             :  * Size of stack buffer to use for string transformations, used to avoid heap
      77             :  * allocations in typical cases. This should be large enough that most strings
      78             :  * will fit, but small enough that we feel comfortable putting it on the
      79             :  * stack.
      80             :  */
      81             : #define     TEXTBUFLEN          1024
      82             : 
      83             : extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
      84             : 
      85             : static int  strncoll_libc(const char *arg1, ssize_t len1,
      86             :                           const char *arg2, ssize_t len2,
      87             :                           pg_locale_t locale);
      88             : static size_t strnxfrm_libc(char *dest, size_t destsize,
      89             :                             const char *src, ssize_t srclen,
      90             :                             pg_locale_t locale);
      91             : extern char *get_collation_actual_version_libc(const char *collcollate);
      92             : static locale_t make_libc_collator(const char *collate,
      93             :                                    const char *ctype);
      94             : 
      95             : #ifdef WIN32
      96             : static int  strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
      97             :                                      const char *arg2, ssize_t len2,
      98             :                                      pg_locale_t locale);
      99             : #endif
     100             : 
     101             : static size_t char2wchar(wchar_t *to, size_t tolen, const char *from,
     102             :                          size_t fromlen, locale_t loc);
     103             : 
     104             : static size_t strlower_libc_sb(char *dest, size_t destsize,
     105             :                                const char *src, ssize_t srclen,
     106             :                                pg_locale_t locale);
     107             : static size_t strlower_libc_mb(char *dest, size_t destsize,
     108             :                                const char *src, ssize_t srclen,
     109             :                                pg_locale_t locale);
     110             : static size_t strtitle_libc_sb(char *dest, size_t destsize,
     111             :                                const char *src, ssize_t srclen,
     112             :                                pg_locale_t locale);
     113             : static size_t strtitle_libc_mb(char *dest, size_t destsize,
     114             :                                const char *src, ssize_t srclen,
     115             :                                pg_locale_t locale);
     116             : static size_t strupper_libc_sb(char *dest, size_t destsize,
     117             :                                const char *src, ssize_t srclen,
     118             :                                pg_locale_t locale);
     119             : static size_t strupper_libc_mb(char *dest, size_t destsize,
     120             :                                const char *src, ssize_t srclen,
     121             :                                pg_locale_t locale);
     122             : 
     123             : static bool
     124           0 : wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
     125             : {
     126           0 :     return isdigit_l((unsigned char) wc, locale->lt);
     127             : }
     128             : 
     129             : static bool
     130           0 : wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
     131             : {
     132           0 :     return isalpha_l((unsigned char) wc, locale->lt);
     133             : }
     134             : 
     135             : static bool
     136           0 : wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
     137             : {
     138           0 :     return isalnum_l((unsigned char) wc, locale->lt);
     139             : }
     140             : 
     141             : static bool
     142           0 : wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
     143             : {
     144           0 :     return isupper_l((unsigned char) wc, locale->lt);
     145             : }
     146             : 
     147             : static bool
     148           0 : wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
     149             : {
     150           0 :     return islower_l((unsigned char) wc, locale->lt);
     151             : }
     152             : 
     153             : static bool
     154           0 : wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
     155             : {
     156           0 :     return isgraph_l((unsigned char) wc, locale->lt);
     157             : }
     158             : 
     159             : static bool
     160           0 : wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
     161             : {
     162           0 :     return isprint_l((unsigned char) wc, locale->lt);
     163             : }
     164             : 
     165             : static bool
     166           0 : wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
     167             : {
     168           0 :     return ispunct_l((unsigned char) wc, locale->lt);
     169             : }
     170             : 
     171             : static bool
     172           0 : wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
     173             : {
     174           0 :     return isspace_l((unsigned char) wc, locale->lt);
     175             : }
     176             : 
     177             : static bool
     178           0 : wc_isxdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
     179             : {
     180             : #ifndef WIN32
     181           0 :     return isxdigit_l((unsigned char) wc, locale->lt);
     182             : #else
     183             :     return _isxdigit_l((unsigned char) wc, locale->lt);
     184             : #endif
     185             : }
     186             : 
     187             : static bool
     188      131608 : wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
     189             : {
     190      131608 :     return iswdigit_l((wint_t) wc, locale->lt);
     191             : }
     192             : 
     193             : static bool
     194       81148 : wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
     195             : {
     196       81148 :     return iswalpha_l((wint_t) wc, locale->lt);
     197             : }
     198             : 
     199             : static bool
     200     2845670 : wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
     201             : {
     202     2845670 :     return iswalnum_l((wint_t) wc, locale->lt);
     203             : }
     204             : 
     205             : static bool
     206        4112 : wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
     207             : {
     208        4112 :     return iswupper_l((wint_t) wc, locale->lt);
     209             : }
     210             : 
     211             : static bool
     212        4102 : wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
     213             : {
     214        4102 :     return iswlower_l((wint_t) wc, locale->lt);
     215             : }
     216             : 
     217             : static bool
     218        4102 : wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
     219             : {
     220        4102 :     return iswgraph_l((wint_t) wc, locale->lt);
     221             : }
     222             : 
     223             : static bool
     224        4102 : wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
     225             : {
     226        4102 :     return iswprint_l((wint_t) wc, locale->lt);
     227             : }
     228             : 
     229             : static bool
     230        4102 : wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
     231             : {
     232        4102 :     return iswpunct_l((wint_t) wc, locale->lt);
     233             : }
     234             : 
     235             : static bool
     236       48152 : wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
     237             : {
     238       48152 :     return iswspace_l((wint_t) wc, locale->lt);
     239             : }
     240             : 
     241             : static bool
     242          12 : wc_isxdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
     243             : {
     244             : #ifndef WIN32
     245          12 :     return iswxdigit_l((wint_t) wc, locale->lt);
     246             : #else
     247             :     return _iswxdigit_l((wint_t) wc, locale->lt);
     248             : #endif
     249             : }
     250             : 
     251             : static char
     252           0 : char_tolower_libc(unsigned char ch, pg_locale_t locale)
     253             : {
     254             :     Assert(pg_database_encoding_max_length() == 1);
     255           0 :     return tolower_l(ch, locale->lt);
     256             : }
     257             : 
     258             : static bool
     259           0 : char_is_cased_libc(char ch, pg_locale_t locale)
     260             : {
     261           0 :     bool        is_multibyte = pg_database_encoding_max_length() > 1;
     262             : 
     263           0 :     if (is_multibyte && IS_HIGHBIT_SET(ch))
     264           0 :         return true;
     265             :     else
     266           0 :         return isalpha_l((unsigned char) ch, locale->lt);
     267             : }
     268             : 
     269             : static pg_wchar
     270           0 : toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
     271             : {
     272             :     Assert(GetDatabaseEncoding() != PG_UTF8);
     273             : 
     274             :     /* force C behavior for ASCII characters, per comments above */
     275           0 :     if (locale->is_default && wc <= (pg_wchar) 127)
     276           0 :         return pg_ascii_toupper((unsigned char) wc);
     277           0 :     if (wc <= (pg_wchar) UCHAR_MAX)
     278           0 :         return toupper_l((unsigned char) wc, locale->lt);
     279             :     else
     280           0 :         return wc;
     281             : }
     282             : 
     283             : static pg_wchar
     284        9088 : toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
     285             : {
     286             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     287             : 
     288             :     /* force C behavior for ASCII characters, per comments above */
     289        9088 :     if (locale->is_default && wc <= (pg_wchar) 127)
     290         892 :         return pg_ascii_toupper((unsigned char) wc);
     291             :     if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
     292        8196 :         return towupper_l((wint_t) wc, locale->lt);
     293             :     else
     294             :         return wc;
     295             : }
     296             : 
     297             : static pg_wchar
     298           0 : tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
     299             : {
     300             :     Assert(GetDatabaseEncoding() != PG_UTF8);
     301             : 
     302             :     /* force C behavior for ASCII characters, per comments above */
     303           0 :     if (locale->is_default && wc <= (pg_wchar) 127)
     304           0 :         return pg_ascii_tolower((unsigned char) wc);
     305           0 :     if (wc <= (pg_wchar) UCHAR_MAX)
     306           0 :         return tolower_l((unsigned char) wc, locale->lt);
     307             :     else
     308           0 :         return wc;
     309             : }
     310             : 
     311             : static pg_wchar
     312        9092 : tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
     313             : {
     314             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     315             : 
     316             :     /* force C behavior for ASCII characters, per comments above */
     317        9092 :     if (locale->is_default && wc <= (pg_wchar) 127)
     318         896 :         return pg_ascii_tolower((unsigned char) wc);
     319             :     if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
     320        8196 :         return towlower_l((wint_t) wc, locale->lt);
     321             :     else
     322             :         return wc;
     323             : }
     324             : 
     325             : static const struct ctype_methods ctype_methods_libc_sb = {
     326             :     .strlower = strlower_libc_sb,
     327             :     .strtitle = strtitle_libc_sb,
     328             :     .strupper = strupper_libc_sb,
     329             :     .wc_isdigit = wc_isdigit_libc_sb,
     330             :     .wc_isalpha = wc_isalpha_libc_sb,
     331             :     .wc_isalnum = wc_isalnum_libc_sb,
     332             :     .wc_isupper = wc_isupper_libc_sb,
     333             :     .wc_islower = wc_islower_libc_sb,
     334             :     .wc_isgraph = wc_isgraph_libc_sb,
     335             :     .wc_isprint = wc_isprint_libc_sb,
     336             :     .wc_ispunct = wc_ispunct_libc_sb,
     337             :     .wc_isspace = wc_isspace_libc_sb,
     338             :     .wc_isxdigit = wc_isxdigit_libc_sb,
     339             :     .char_is_cased = char_is_cased_libc,
     340             :     .char_tolower = char_tolower_libc,
     341             :     .wc_toupper = toupper_libc_sb,
     342             :     .wc_tolower = tolower_libc_sb,
     343             :     .max_chr = UCHAR_MAX,
     344             : };
     345             : 
     346             : /*
     347             :  * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
     348             :  * single-byte semantics for pattern matching.
     349             :  */
     350             : static const struct ctype_methods ctype_methods_libc_other_mb = {
     351             :     .strlower = strlower_libc_mb,
     352             :     .strtitle = strtitle_libc_mb,
     353             :     .strupper = strupper_libc_mb,
     354             :     .wc_isdigit = wc_isdigit_libc_sb,
     355             :     .wc_isalpha = wc_isalpha_libc_sb,
     356             :     .wc_isalnum = wc_isalnum_libc_sb,
     357             :     .wc_isupper = wc_isupper_libc_sb,
     358             :     .wc_islower = wc_islower_libc_sb,
     359             :     .wc_isgraph = wc_isgraph_libc_sb,
     360             :     .wc_isprint = wc_isprint_libc_sb,
     361             :     .wc_ispunct = wc_ispunct_libc_sb,
     362             :     .wc_isspace = wc_isspace_libc_sb,
     363             :     .wc_isxdigit = wc_isxdigit_libc_sb,
     364             :     .char_is_cased = char_is_cased_libc,
     365             :     .char_tolower = char_tolower_libc,
     366             :     .wc_toupper = toupper_libc_sb,
     367             :     .wc_tolower = tolower_libc_sb,
     368             :     .max_chr = UCHAR_MAX,
     369             : };
     370             : 
     371             : static const struct ctype_methods ctype_methods_libc_utf8 = {
     372             :     .strlower = strlower_libc_mb,
     373             :     .strtitle = strtitle_libc_mb,
     374             :     .strupper = strupper_libc_mb,
     375             :     .wc_isdigit = wc_isdigit_libc_mb,
     376             :     .wc_isalpha = wc_isalpha_libc_mb,
     377             :     .wc_isalnum = wc_isalnum_libc_mb,
     378             :     .wc_isupper = wc_isupper_libc_mb,
     379             :     .wc_islower = wc_islower_libc_mb,
     380             :     .wc_isgraph = wc_isgraph_libc_mb,
     381             :     .wc_isprint = wc_isprint_libc_mb,
     382             :     .wc_ispunct = wc_ispunct_libc_mb,
     383             :     .wc_isspace = wc_isspace_libc_mb,
     384             :     .wc_isxdigit = wc_isxdigit_libc_mb,
     385             :     .char_is_cased = char_is_cased_libc,
     386             :     .char_tolower = char_tolower_libc,
     387             :     .wc_toupper = toupper_libc_mb,
     388             :     .wc_tolower = tolower_libc_mb,
     389             : };
     390             : 
     391             : static const struct collate_methods collate_methods_libc = {
     392             :     .strncoll = strncoll_libc,
     393             :     .strnxfrm = strnxfrm_libc,
     394             :     .strnxfrm_prefix = NULL,
     395             : 
     396             :     /*
     397             :      * Unfortunately, it seems that strxfrm() for non-C collations is broken
     398             :      * on many common platforms; testing of multiple versions of glibc reveals
     399             :      * that, for many locales, strcoll() and strxfrm() do not return
     400             :      * consistent results. While no other libc other than Cygwin has so far
     401             :      * been shown to have a problem, we take the conservative course of action
     402             :      * for right now and disable this categorically.  (Users who are certain
     403             :      * this isn't a problem on their system can define TRUST_STRXFRM.)
     404             :      */
     405             : #ifdef TRUST_STRXFRM
     406             :     .strxfrm_is_safe = true,
     407             : #else
     408             :     .strxfrm_is_safe = false,
     409             : #endif
     410             : };
     411             : 
     412             : #ifdef WIN32
     413             : static const struct collate_methods collate_methods_libc_win32_utf8 = {
     414             :     .strncoll = strncoll_libc_win32_utf8,
     415             :     .strnxfrm = strnxfrm_libc,
     416             :     .strnxfrm_prefix = NULL,
     417             : #ifdef TRUST_STRXFRM
     418             :     .strxfrm_is_safe = true,
     419             : #else
     420             :     .strxfrm_is_safe = false,
     421             : #endif
     422             : };
     423             : #endif
     424             : 
     425             : static size_t
     426           0 : strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     427             :                  pg_locale_t locale)
     428             : {
     429           0 :     if (srclen < 0)
     430           0 :         srclen = strlen(src);
     431             : 
     432           0 :     if (srclen + 1 <= destsize)
     433             :     {
     434           0 :         locale_t    loc = locale->lt;
     435             :         char       *p;
     436             : 
     437           0 :         if (srclen + 1 > destsize)
     438           0 :             return srclen;
     439             : 
     440           0 :         memcpy(dest, src, srclen);
     441           0 :         dest[srclen] = '\0';
     442             : 
     443             :         /*
     444             :          * Note: we assume that tolower_l() will not be so broken as to need
     445             :          * an isupper_l() guard test.  When using the default collation, we
     446             :          * apply the traditional Postgres behavior that forces ASCII-style
     447             :          * treatment of I/i, but in non-default collations you get exactly
     448             :          * what the collation says.
     449             :          */
     450           0 :         for (p = dest; *p; p++)
     451             :         {
     452           0 :             if (locale->is_default)
     453           0 :                 *p = pg_tolower((unsigned char) *p);
     454             :             else
     455           0 :                 *p = tolower_l((unsigned char) *p, loc);
     456             :         }
     457             :     }
     458             : 
     459           0 :     return srclen;
     460             : }
     461             : 
     462             : static size_t
     463      424990 : strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     464             :                  pg_locale_t locale)
     465             : {
     466      424990 :     locale_t    loc = locale->lt;
     467             :     size_t      result_size;
     468             :     wchar_t    *workspace;
     469             :     char       *result;
     470             :     size_t      curr_char;
     471             :     size_t      max_size;
     472             : 
     473      424990 :     if (srclen < 0)
     474           0 :         srclen = strlen(src);
     475             : 
     476             :     /* Overflow paranoia */
     477      424990 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     478           0 :         ereport(ERROR,
     479             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     480             :                  errmsg("out of memory")));
     481             : 
     482             :     /* Output workspace cannot have more codes than input bytes */
     483      424990 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     484             : 
     485      424990 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     486             : 
     487     3669294 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     488     3244304 :         workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     489             : 
     490             :     /*
     491             :      * Make result large enough; case change might change number of bytes
     492             :      */
     493      424990 :     max_size = curr_char * pg_database_encoding_max_length();
     494      424990 :     result = palloc(max_size + 1);
     495             : 
     496      424990 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     497             : 
     498      424990 :     if (result_size + 1 > destsize)
     499           0 :         return result_size;
     500             : 
     501      424990 :     memcpy(dest, result, result_size);
     502      424990 :     dest[result_size] = '\0';
     503             : 
     504      424990 :     pfree(workspace);
     505      424990 :     pfree(result);
     506             : 
     507      424990 :     return result_size;
     508             : }
     509             : 
     510             : static size_t
     511           0 : strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     512             :                  pg_locale_t locale)
     513             : {
     514           0 :     if (srclen < 0)
     515           0 :         srclen = strlen(src);
     516             : 
     517           0 :     if (srclen + 1 <= destsize)
     518             :     {
     519           0 :         locale_t    loc = locale->lt;
     520           0 :         int         wasalnum = false;
     521             :         char       *p;
     522             : 
     523           0 :         memcpy(dest, src, srclen);
     524           0 :         dest[srclen] = '\0';
     525             : 
     526             :         /*
     527             :          * Note: we assume that toupper_l()/tolower_l() will not be so broken
     528             :          * as to need guard tests.  When using the default collation, we apply
     529             :          * the traditional Postgres behavior that forces ASCII-style treatment
     530             :          * of I/i, but in non-default collations you get exactly what the
     531             :          * collation says.
     532             :          */
     533           0 :         for (p = dest; *p; p++)
     534             :         {
     535           0 :             if (locale->is_default)
     536             :             {
     537           0 :                 if (wasalnum)
     538           0 :                     *p = pg_tolower((unsigned char) *p);
     539             :                 else
     540           0 :                     *p = pg_toupper((unsigned char) *p);
     541             :             }
     542             :             else
     543             :             {
     544           0 :                 if (wasalnum)
     545           0 :                     *p = tolower_l((unsigned char) *p, loc);
     546             :                 else
     547           0 :                     *p = toupper_l((unsigned char) *p, loc);
     548             :             }
     549           0 :             wasalnum = isalnum_l((unsigned char) *p, loc);
     550             :         }
     551             :     }
     552             : 
     553           0 :     return srclen;
     554             : }
     555             : 
     556             : static size_t
     557           8 : strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     558             :                  pg_locale_t locale)
     559             : {
     560           8 :     locale_t    loc = locale->lt;
     561           8 :     int         wasalnum = false;
     562             :     size_t      result_size;
     563             :     wchar_t    *workspace;
     564             :     char       *result;
     565             :     size_t      curr_char;
     566             :     size_t      max_size;
     567             : 
     568           8 :     if (srclen < 0)
     569           0 :         srclen = strlen(src);
     570             : 
     571             :     /* Overflow paranoia */
     572           8 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     573           0 :         ereport(ERROR,
     574             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     575             :                  errmsg("out of memory")));
     576             : 
     577             :     /* Output workspace cannot have more codes than input bytes */
     578           8 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     579             : 
     580           8 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     581             : 
     582          80 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     583             :     {
     584          72 :         if (wasalnum)
     585          56 :             workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     586             :         else
     587          16 :             workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     588          72 :         wasalnum = iswalnum_l(workspace[curr_char], loc);
     589             :     }
     590             : 
     591             :     /*
     592             :      * Make result large enough; case change might change number of bytes
     593             :      */
     594           8 :     max_size = curr_char * pg_database_encoding_max_length();
     595           8 :     result = palloc(max_size + 1);
     596             : 
     597           8 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     598             : 
     599           8 :     if (result_size + 1 > destsize)
     600           0 :         return result_size;
     601             : 
     602           8 :     memcpy(dest, result, result_size);
     603           8 :     dest[result_size] = '\0';
     604             : 
     605           8 :     pfree(workspace);
     606           8 :     pfree(result);
     607             : 
     608           8 :     return result_size;
     609             : }
     610             : 
     611             : static size_t
     612           0 : strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     613             :                  pg_locale_t locale)
     614             : {
     615           0 :     if (srclen < 0)
     616           0 :         srclen = strlen(src);
     617             : 
     618           0 :     if (srclen + 1 <= destsize)
     619             :     {
     620           0 :         locale_t    loc = locale->lt;
     621             :         char       *p;
     622             : 
     623           0 :         memcpy(dest, src, srclen);
     624           0 :         dest[srclen] = '\0';
     625             : 
     626             :         /*
     627             :          * Note: we assume that toupper_l() will not be so broken as to need
     628             :          * an islower_l() guard test.  When using the default collation, we
     629             :          * apply the traditional Postgres behavior that forces ASCII-style
     630             :          * treatment of I/i, but in non-default collations you get exactly
     631             :          * what the collation says.
     632             :          */
     633           0 :         for (p = dest; *p; p++)
     634             :         {
     635           0 :             if (locale->is_default)
     636           0 :                 *p = pg_toupper((unsigned char) *p);
     637             :             else
     638           0 :                 *p = toupper_l((unsigned char) *p, loc);
     639             :         }
     640             :     }
     641             : 
     642           0 :     return srclen;
     643             : }
     644             : 
     645             : static size_t
     646      719780 : strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     647             :                  pg_locale_t locale)
     648             : {
     649      719780 :     locale_t    loc = locale->lt;
     650             :     size_t      result_size;
     651             :     wchar_t    *workspace;
     652             :     char       *result;
     653             :     size_t      curr_char;
     654             :     size_t      max_size;
     655             : 
     656      719780 :     if (srclen < 0)
     657           0 :         srclen = strlen(src);
     658             : 
     659             :     /* Overflow paranoia */
     660      719780 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     661           0 :         ereport(ERROR,
     662             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     663             :                  errmsg("out of memory")));
     664             : 
     665             :     /* Output workspace cannot have more codes than input bytes */
     666      719780 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     667             : 
     668      719780 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     669             : 
     670     2371034 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     671     1651254 :         workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     672             : 
     673             :     /*
     674             :      * Make result large enough; case change might change number of bytes
     675             :      */
     676      719780 :     max_size = curr_char * pg_database_encoding_max_length();
     677      719780 :     result = palloc(max_size + 1);
     678             : 
     679      719780 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     680             : 
     681      719780 :     if (result_size + 1 > destsize)
     682           0 :         return result_size;
     683             : 
     684      719780 :     memcpy(dest, result, result_size);
     685      719780 :     dest[result_size] = '\0';
     686             : 
     687      719780 :     pfree(workspace);
     688      719780 :     pfree(result);
     689             : 
     690      719780 :     return result_size;
     691             : }
     692             : 
     693             : pg_locale_t
     694       31746 : create_pg_locale_libc(Oid collid, MemoryContext context)
     695             : {
     696             :     const char *collate;
     697             :     const char *ctype;
     698             :     locale_t    loc;
     699             :     pg_locale_t result;
     700             : 
     701       31746 :     if (collid == DEFAULT_COLLATION_OID)
     702             :     {
     703             :         HeapTuple   tp;
     704             :         Datum       datum;
     705             : 
     706       31656 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     707       31656 :         if (!HeapTupleIsValid(tp))
     708           0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     709       31656 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     710             :                                        Anum_pg_database_datcollate);
     711       31656 :         collate = TextDatumGetCString(datum);
     712       31656 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     713             :                                        Anum_pg_database_datctype);
     714       31656 :         ctype = TextDatumGetCString(datum);
     715             : 
     716       31656 :         ReleaseSysCache(tp);
     717             :     }
     718             :     else
     719             :     {
     720             :         HeapTuple   tp;
     721             :         Datum       datum;
     722             : 
     723          90 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     724          90 :         if (!HeapTupleIsValid(tp))
     725           0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     726             : 
     727          90 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     728             :                                        Anum_pg_collation_collcollate);
     729          90 :         collate = TextDatumGetCString(datum);
     730          90 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     731             :                                        Anum_pg_collation_collctype);
     732          90 :         ctype = TextDatumGetCString(datum);
     733             : 
     734          90 :         ReleaseSysCache(tp);
     735             :     }
     736             : 
     737             : 
     738       31746 :     loc = make_libc_collator(collate, ctype);
     739             : 
     740       31746 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     741       31746 :     result->deterministic = true;
     742       62358 :     result->collate_is_c = (strcmp(collate, "C") == 0) ||
     743       30612 :         (strcmp(collate, "POSIX") == 0);
     744       62358 :     result->ctype_is_c = (strcmp(ctype, "C") == 0) ||
     745       30612 :         (strcmp(ctype, "POSIX") == 0);
     746       31746 :     result->lt = loc;
     747       31746 :     if (!result->collate_is_c)
     748             :     {
     749             : #ifdef WIN32
     750             :         if (GetDatabaseEncoding() == PG_UTF8)
     751             :             result->collate = &collate_methods_libc_win32_utf8;
     752             :         else
     753             : #endif
     754       30548 :             result->collate = &collate_methods_libc;
     755             :     }
     756       31746 :     if (!result->ctype_is_c)
     757             :     {
     758       30548 :         if (GetDatabaseEncoding() == PG_UTF8)
     759       30484 :             result->ctype = &ctype_methods_libc_utf8;
     760          64 :         else if (pg_database_encoding_max_length() > 1)
     761           0 :             result->ctype = &ctype_methods_libc_other_mb;
     762             :         else
     763          64 :             result->ctype = &ctype_methods_libc_sb;
     764             :     }
     765             : 
     766       31746 :     return result;
     767             : }
     768             : 
     769             : /*
     770             :  * Create a locale_t with the given collation and ctype.
     771             :  *
     772             :  * The "C" and "POSIX" locales are not actually handled by libc, so return
     773             :  * NULL.
     774             :  *
     775             :  * Ensure that no path leaks a locale_t.
     776             :  */
     777             : static locale_t
     778       31746 : make_libc_collator(const char *collate, const char *ctype)
     779             : {
     780       31746 :     locale_t    loc = 0;
     781             : 
     782       31746 :     if (strcmp(collate, ctype) == 0)
     783             :     {
     784       31746 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     785             :         {
     786             :             /* Normal case where they're the same */
     787       30548 :             errno = 0;
     788             : #ifndef WIN32
     789       30548 :             loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
     790             :                             NULL);
     791             : #else
     792             :             loc = _create_locale(LC_ALL, collate);
     793             : #endif
     794       30548 :             if (!loc)
     795           0 :                 report_newlocale_failure(collate);
     796             :         }
     797             :     }
     798             :     else
     799             :     {
     800             : #ifndef WIN32
     801             :         /* We need two newlocale() steps */
     802           0 :         locale_t    loc1 = 0;
     803             : 
     804           0 :         if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
     805             :         {
     806           0 :             errno = 0;
     807           0 :             loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
     808           0 :             if (!loc1)
     809           0 :                 report_newlocale_failure(collate);
     810             :         }
     811             : 
     812           0 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     813             :         {
     814           0 :             errno = 0;
     815           0 :             loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
     816           0 :             if (!loc)
     817             :             {
     818           0 :                 if (loc1)
     819           0 :                     freelocale(loc1);
     820           0 :                 report_newlocale_failure(ctype);
     821             :             }
     822             :         }
     823             :         else
     824           0 :             loc = loc1;
     825             : #else
     826             : 
     827             :         /*
     828             :          * XXX The _create_locale() API doesn't appear to support this. Could
     829             :          * perhaps be worked around by changing pg_locale_t to contain two
     830             :          * separate fields.
     831             :          */
     832             :         ereport(ERROR,
     833             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     834             :                  errmsg("collations with different collate and ctype values are not supported on this platform")));
     835             : #endif
     836             :     }
     837             : 
     838       31746 :     return loc;
     839             : }
     840             : 
     841             : /*
     842             :  * strncoll_libc
     843             :  *
     844             :  * NUL-terminate arguments, if necessary, and pass to strcoll_l().
     845             :  *
     846             :  * An input string length of -1 means that it's already NUL-terminated.
     847             :  */
     848             : int
     849    29640266 : strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
     850             :               pg_locale_t locale)
     851             : {
     852             :     char        sbuf[TEXTBUFLEN];
     853    29640266 :     char       *buf = sbuf;
     854    29640266 :     size_t      bufsize1 = (len1 == -1) ? 0 : len1 + 1;
     855    29640266 :     size_t      bufsize2 = (len2 == -1) ? 0 : len2 + 1;
     856             :     const char *arg1n;
     857             :     const char *arg2n;
     858             :     int         result;
     859             : 
     860    29640266 :     if (bufsize1 + bufsize2 > TEXTBUFLEN)
     861         568 :         buf = palloc(bufsize1 + bufsize2);
     862             : 
     863             :     /* nul-terminate arguments if necessary */
     864    29640266 :     if (len1 == -1)
     865             :     {
     866    25232144 :         arg1n = arg1;
     867             :     }
     868             :     else
     869             :     {
     870     4408122 :         char       *buf1 = buf;
     871             : 
     872     4408122 :         memcpy(buf1, arg1, len1);
     873     4408122 :         buf1[len1] = '\0';
     874     4408122 :         arg1n = buf1;
     875             :     }
     876             : 
     877    29640266 :     if (len2 == -1)
     878             :     {
     879    25232144 :         arg2n = arg2;
     880             :     }
     881             :     else
     882             :     {
     883     4408122 :         char       *buf2 = buf + bufsize1;
     884             : 
     885     4408122 :         memcpy(buf2, arg2, len2);
     886     4408122 :         buf2[len2] = '\0';
     887     4408122 :         arg2n = buf2;
     888             :     }
     889             : 
     890    29640266 :     result = strcoll_l(arg1n, arg2n, locale->lt);
     891             : 
     892    29640266 :     if (buf != sbuf)
     893         568 :         pfree(buf);
     894             : 
     895    29640266 :     return result;
     896             : }
     897             : 
     898             : /*
     899             :  * strnxfrm_libc
     900             :  *
     901             :  * NUL-terminate src, if necessary, and pass to strxfrm_l().
     902             :  *
     903             :  * A source length of -1 means that it's already NUL-terminated.
     904             :  */
     905             : size_t
     906         144 : strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
     907             :               pg_locale_t locale)
     908             : {
     909             :     char        sbuf[TEXTBUFLEN];
     910         144 :     char       *buf = sbuf;
     911         144 :     size_t      bufsize = srclen + 1;
     912             :     size_t      result;
     913             : 
     914         144 :     if (srclen == -1)
     915         144 :         return strxfrm_l(dest, src, destsize, locale->lt);
     916             : 
     917           0 :     if (bufsize > TEXTBUFLEN)
     918           0 :         buf = palloc(bufsize);
     919             : 
     920             :     /* nul-terminate argument */
     921           0 :     memcpy(buf, src, srclen);
     922           0 :     buf[srclen] = '\0';
     923             : 
     924           0 :     result = strxfrm_l(dest, buf, destsize, locale->lt);
     925             : 
     926           0 :     if (buf != sbuf)
     927           0 :         pfree(buf);
     928             : 
     929             :     /* if dest is defined, it should be nul-terminated */
     930             :     Assert(result >= destsize || dest[result] == '\0');
     931             : 
     932           0 :     return result;
     933             : }
     934             : 
     935             : char *
     936       30876 : get_collation_actual_version_libc(const char *collcollate)
     937             : {
     938       30876 :     char       *collversion = NULL;
     939             : 
     940       61576 :     if (pg_strcasecmp("C", collcollate) != 0 &&
     941       61208 :         pg_strncasecmp("C.", collcollate, 2) != 0 &&
     942       30508 :         pg_strcasecmp("POSIX", collcollate) != 0)
     943             :     {
     944             : #if defined(__GLIBC__)
     945             :         /* Use the glibc version because we don't have anything better. */
     946       30482 :         collversion = pstrdup(gnu_get_libc_version());
     947             : #elif defined(LC_VERSION_MASK)
     948             :         locale_t    loc;
     949             : 
     950             :         /* Look up FreeBSD collation version. */
     951             :         loc = newlocale(LC_COLLATE_MASK, collcollate, NULL);
     952             :         if (loc)
     953             :         {
     954             :             collversion =
     955             :                 pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
     956             :             freelocale(loc);
     957             :         }
     958             :         else
     959             :             ereport(ERROR,
     960             :                     (errmsg("could not load locale \"%s\"", collcollate)));
     961             : #elif defined(WIN32)
     962             :         /*
     963             :          * If we are targeting Windows Vista and above, we can ask for a name
     964             :          * given a collation name (earlier versions required a location code
     965             :          * that we don't have).
     966             :          */
     967             :         NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
     968             :         WCHAR       wide_collcollate[LOCALE_NAME_MAX_LENGTH];
     969             : 
     970             :         MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
     971             :                             LOCALE_NAME_MAX_LENGTH);
     972             :         if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
     973             :         {
     974             :             /*
     975             :              * GetNLSVersionEx() wants a language tag such as "en-US", not a
     976             :              * locale name like "English_United States.1252".  Until those
     977             :              * values can be prevented from entering the system, or 100%
     978             :              * reliably converted to the more useful tag format, tolerate the
     979             :              * resulting error and report that we have no version data.
     980             :              */
     981             :             if (GetLastError() == ERROR_INVALID_PARAMETER)
     982             :                 return NULL;
     983             : 
     984             :             ereport(ERROR,
     985             :                     (errmsg("could not get collation version for locale \"%s\": error code %lu",
     986             :                             collcollate,
     987             :                             GetLastError())));
     988             :         }
     989             :         collversion = psprintf("%lu.%lu,%lu.%lu",
     990             :                                (version.dwNLSVersion >> 8) & 0xFFFF,
     991             :                                version.dwNLSVersion & 0xFF,
     992             :                                (version.dwDefinedVersion >> 8) & 0xFFFF,
     993             :                                version.dwDefinedVersion & 0xFF);
     994             : #endif
     995             :     }
     996             : 
     997       30876 :     return collversion;
     998             : }
     999             : 
    1000             : /*
    1001             :  * strncoll_libc_win32_utf8
    1002             :  *
    1003             :  * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
    1004             :  * invoke wcscoll_l().
    1005             :  *
    1006             :  * An input string length of -1 means that it's NUL-terminated.
    1007             :  */
    1008             : #ifdef WIN32
    1009             : static int
    1010             : strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
    1011             :                          ssize_t len2, pg_locale_t locale)
    1012             : {
    1013             :     char        sbuf[TEXTBUFLEN];
    1014             :     char       *buf = sbuf;
    1015             :     char       *a1p,
    1016             :                *a2p;
    1017             :     int         a1len;
    1018             :     int         a2len;
    1019             :     int         r;
    1020             :     int         result;
    1021             : 
    1022             :     Assert(GetDatabaseEncoding() == PG_UTF8);
    1023             : 
    1024             :     if (len1 == -1)
    1025             :         len1 = strlen(arg1);
    1026             :     if (len2 == -1)
    1027             :         len2 = strlen(arg2);
    1028             : 
    1029             :     a1len = len1 * 2 + 2;
    1030             :     a2len = len2 * 2 + 2;
    1031             : 
    1032             :     if (a1len + a2len > TEXTBUFLEN)
    1033             :         buf = palloc(a1len + a2len);
    1034             : 
    1035             :     a1p = buf;
    1036             :     a2p = buf + a1len;
    1037             : 
    1038             :     /* API does not work for zero-length input */
    1039             :     if (len1 == 0)
    1040             :         r = 0;
    1041             :     else
    1042             :     {
    1043             :         r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
    1044             :                                 (LPWSTR) a1p, a1len / 2);
    1045             :         if (!r)
    1046             :             ereport(ERROR,
    1047             :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1048             :                             GetLastError())));
    1049             :     }
    1050             :     ((LPWSTR) a1p)[r] = 0;
    1051             : 
    1052             :     if (len2 == 0)
    1053             :         r = 0;
    1054             :     else
    1055             :     {
    1056             :         r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
    1057             :                                 (LPWSTR) a2p, a2len / 2);
    1058             :         if (!r)
    1059             :             ereport(ERROR,
    1060             :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1061             :                             GetLastError())));
    1062             :     }
    1063             :     ((LPWSTR) a2p)[r] = 0;
    1064             : 
    1065             :     errno = 0;
    1066             :     result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->lt);
    1067             :     if (result == 2147483647)   /* _NLSCMPERROR; missing from mingw headers */
    1068             :         ereport(ERROR,
    1069             :                 (errmsg("could not compare Unicode strings: %m")));
    1070             : 
    1071             :     if (buf != sbuf)
    1072             :         pfree(buf);
    1073             : 
    1074             :     return result;
    1075             : }
    1076             : #endif                          /* WIN32 */
    1077             : 
    1078             : /* simple subroutine for reporting errors from newlocale() */
    1079             : void
    1080           0 : report_newlocale_failure(const char *localename)
    1081             : {
    1082             :     int         save_errno;
    1083             : 
    1084             :     /*
    1085             :      * Windows doesn't provide any useful error indication from
    1086             :      * _create_locale(), and BSD-derived platforms don't seem to feel they
    1087             :      * need to set errno either (even though POSIX is pretty clear that
    1088             :      * newlocale should do so).  So, if errno hasn't been set, assume ENOENT
    1089             :      * is what to report.
    1090             :      */
    1091           0 :     if (errno == 0)
    1092           0 :         errno = ENOENT;
    1093             : 
    1094             :     /*
    1095             :      * ENOENT means "no such locale", not "no such file", so clarify that
    1096             :      * errno with an errdetail message.
    1097             :      */
    1098           0 :     save_errno = errno;         /* auxiliary funcs might change errno */
    1099           0 :     ereport(ERROR,
    1100             :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1101             :              errmsg("could not create locale \"%s\": %m",
    1102             :                     localename),
    1103             :              (save_errno == ENOENT ?
    1104             :               errdetail("The operating system could not find any locale data for the locale name \"%s\".",
    1105             :                         localename) : 0)));
    1106             : }
    1107             : 
    1108             : /*
    1109             :  * POSIX doesn't define _l-variants of these functions, but several systems
    1110             :  * have them.  We provide our own replacements here.
    1111             :  */
    1112             : #ifndef HAVE_MBSTOWCS_L
    1113             : static size_t
    1114     1144778 : mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
    1115             : {
    1116             : #ifdef WIN32
    1117             :     return _mbstowcs_l(dest, src, n, loc);
    1118             : #else
    1119             :     size_t      result;
    1120     1144778 :     locale_t    save_locale = uselocale(loc);
    1121             : 
    1122     1144778 :     result = mbstowcs(dest, src, n);
    1123     1144778 :     uselocale(save_locale);
    1124     1144778 :     return result;
    1125             : #endif
    1126             : }
    1127             : #endif
    1128             : #ifndef HAVE_WCSTOMBS_L
    1129             : static size_t
    1130     1144778 : wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
    1131             : {
    1132             : #ifdef WIN32
    1133             :     return _wcstombs_l(dest, src, n, loc);
    1134             : #else
    1135             :     size_t      result;
    1136     1144778 :     locale_t    save_locale = uselocale(loc);
    1137             : 
    1138     1144778 :     result = wcstombs(dest, src, n);
    1139     1144778 :     uselocale(save_locale);
    1140     1144778 :     return result;
    1141             : #endif
    1142             : }
    1143             : #endif
    1144             : 
    1145             : /*
    1146             :  * These functions convert from/to libc's wchar_t, *not* pg_wchar.
    1147             :  * Therefore we keep them here rather than with the mbutils code.
    1148             :  */
    1149             : 
    1150             : /*
    1151             :  * wchar2char --- convert wide characters to multibyte format
    1152             :  *
    1153             :  * This has the same API as the standard wcstombs_l() function; in particular,
    1154             :  * tolen is the maximum number of bytes to store at *to, and *from must be
    1155             :  * zero-terminated.  The output will be zero-terminated iff there is room.
    1156             :  */
    1157             : size_t
    1158     1144778 : wchar2char(char *to, const wchar_t *from, size_t tolen, locale_t loc)
    1159             : {
    1160             :     size_t      result;
    1161             : 
    1162     1144778 :     if (tolen == 0)
    1163           0 :         return 0;
    1164             : 
    1165             : #ifdef WIN32
    1166             : 
    1167             :     /*
    1168             :      * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
    1169             :      * for some reason mbstowcs and wcstombs won't do this for us, so we use
    1170             :      * MultiByteToWideChar().
    1171             :      */
    1172             :     if (GetDatabaseEncoding() == PG_UTF8)
    1173             :     {
    1174             :         result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
    1175             :                                      NULL, NULL);
    1176             :         /* A zero return is failure */
    1177             :         if (result <= 0)
    1178             :             result = -1;
    1179             :         else
    1180             :         {
    1181             :             Assert(result <= tolen);
    1182             :             /* Microsoft counts the zero terminator in the result */
    1183             :             result--;
    1184             :         }
    1185             :     }
    1186             :     else
    1187             : #endif                          /* WIN32 */
    1188     1144778 :     if (loc == (locale_t) 0)
    1189             :     {
    1190             :         /* Use wcstombs directly for the default locale */
    1191           0 :         result = wcstombs(to, from, tolen);
    1192             :     }
    1193             :     else
    1194             :     {
    1195             :         /* Use wcstombs_l for nondefault locales */
    1196     1144778 :         result = wcstombs_l(to, from, tolen, loc);
    1197             :     }
    1198             : 
    1199     1144778 :     return result;
    1200             : }
    1201             : 
    1202             : /*
    1203             :  * char2wchar --- convert multibyte characters to wide characters
    1204             :  *
    1205             :  * This has almost the API of mbstowcs_l(), except that *from need not be
    1206             :  * null-terminated; instead, the number of input bytes is specified as
    1207             :  * fromlen.  Also, we ereport() rather than returning -1 for invalid
    1208             :  * input encoding.  tolen is the maximum number of wchar_t's to store at *to.
    1209             :  * The output will be zero-terminated iff there is room.
    1210             :  */
    1211             : static size_t
    1212     1144778 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
    1213             :            locale_t loc)
    1214             : {
    1215             :     size_t      result;
    1216             : 
    1217     1144778 :     if (tolen == 0)
    1218           0 :         return 0;
    1219             : 
    1220             : #ifdef WIN32
    1221             :     /* See WIN32 "Unicode" comment above */
    1222             :     if (GetDatabaseEncoding() == PG_UTF8)
    1223             :     {
    1224             :         /* Win32 API does not work for zero-length input */
    1225             :         if (fromlen == 0)
    1226             :             result = 0;
    1227             :         else
    1228             :         {
    1229             :             result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
    1230             :             /* A zero return is failure */
    1231             :             if (result == 0)
    1232             :                 result = -1;
    1233             :         }
    1234             : 
    1235             :         if (result != -1)
    1236             :         {
    1237             :             Assert(result < tolen);
    1238             :             /* Append trailing null wchar (MultiByteToWideChar() does not) */
    1239             :             to[result] = 0;
    1240             :         }
    1241             :     }
    1242             :     else
    1243             : #endif                          /* WIN32 */
    1244             :     {
    1245             :         /* mbstowcs requires ending '\0' */
    1246     1144778 :         char       *str = pnstrdup(from, fromlen);
    1247             : 
    1248     1144778 :         if (loc == (locale_t) 0)
    1249             :         {
    1250             :             /* Use mbstowcs directly for the default locale */
    1251           0 :             result = mbstowcs(to, str, tolen);
    1252             :         }
    1253             :         else
    1254             :         {
    1255             :             /* Use mbstowcs_l for nondefault locales */
    1256     1144778 :             result = mbstowcs_l(to, str, tolen, loc);
    1257             :         }
    1258             : 
    1259     1144778 :         pfree(str);
    1260             :     }
    1261             : 
    1262     1144778 :     if (result == -1)
    1263             :     {
    1264             :         /*
    1265             :          * Invalid multibyte character encountered.  We try to give a useful
    1266             :          * error message by letting pg_verifymbstr check the string.  But it's
    1267             :          * possible that the string is OK to us, and not OK to mbstowcs ---
    1268             :          * this suggests that the LC_CTYPE locale is different from the
    1269             :          * database encoding.  Give a generic error message if pg_verifymbstr
    1270             :          * can't find anything wrong.
    1271             :          */
    1272           0 :         pg_verifymbstr(from, fromlen, false);   /* might not return */
    1273             :         /* but if it does ... */
    1274           0 :         ereport(ERROR,
    1275             :                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
    1276             :                  errmsg("invalid multibyte character for locale"),
    1277             :                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
    1278             :     }
    1279             : 
    1280     1144778 :     return result;
    1281             : }

Generated by: LCOV version 1.16