LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_libc.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 58.4 % 327 191
Test Date: 2026-02-17 17:20:33 Functions: 58.1 % 43 25
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-----------------------------------------------------------------------
       2              :  *
       3              :  * PostgreSQL locale utilities for libc
       4              :  *
       5              :  * Portions Copyright (c) 2002-2026, PostgreSQL Global Development Group
       6              :  *
       7              :  * src/backend/utils/adt/pg_locale_libc.c
       8              :  *
       9              :  *-----------------------------------------------------------------------
      10              :  */
      11              : 
      12              : #include "postgres.h"
      13              : 
      14              : #include <limits.h>
      15              : #include <wctype.h>
      16              : 
      17              : #include "access/htup_details.h"
      18              : #include "catalog/pg_database.h"
      19              : #include "catalog/pg_collation.h"
      20              : #include "mb/pg_wchar.h"
      21              : #include "miscadmin.h"
      22              : #include "utils/builtins.h"
      23              : #include "utils/formatting.h"
      24              : #include "utils/memutils.h"
      25              : #include "utils/pg_locale.h"
      26              : #include "utils/syscache.h"
      27              : 
      28              : #ifdef __GLIBC__
      29              : #include <gnu/libc-version.h>
      30              : #endif
      31              : 
      32              : #ifdef WIN32
      33              : #include <shlwapi.h>
      34              : #endif
      35              : 
      36              : /*
      37              :  * For the libc provider, to provide as much functionality as possible on a
      38              :  * variety of platforms without going so far as to implement everything from
      39              :  * scratch, we use several implementation strategies depending on the
      40              :  * situation:
      41              :  *
      42              :  * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
      43              :  * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
      44              :  * collations don't give a fig about multibyte characters.
      45              :  *
      46              :  * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
      47              :  * This assumes that every platform uses Unicode codepoints directly
      48              :  * as the wchar_t representation of Unicode.  On some platforms
      49              :  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
      50              :  *
      51              :  * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
      52              :  * values up to 255, and punt for values above that.  This is 100% correct
      53              :  * only in single-byte encodings such as LATINn.  However, non-Unicode
      54              :  * multibyte encodings are mostly Far Eastern character sets for which the
      55              :  * properties being tested here aren't very relevant for higher code values
      56              :  * anyway.  The difficulty with using the <wctype.h> functions with
      57              :  * non-Unicode multibyte encodings is that we can have no certainty that
      58              :  * the platform's wchar_t representation matches what we do in pg_wchar
      59              :  * conversions.
      60              :  *
      61              :  * As a special case, in the "default" collation, (2) and (3) force ASCII
      62              :  * letters to follow ASCII upcase/downcase rules, while in a non-default
      63              :  * collation we just let the library functions do what they will.  The case
      64              :  * where this matters is treatment of I/i in Turkish, and the behavior is
      65              :  * meant to match the upper()/lower() SQL functions.
      66              :  *
      67              :  * We store the active collation setting in static variables.  In principle
      68              :  * it could be passed down to here via the regex library's "struct vars" data
      69              :  * structure; but that would require somewhat invasive changes in the regex
      70              :  * library, and right now there's no real benefit to be gained from that.
      71              :  *
      72              :  * NB: the coding here assumes pg_wchar is an unsigned type.
      73              :  */
      74              : 
      75              : /*
      76              :  * Size of stack buffer to use for string transformations, used to avoid heap
      77              :  * allocations in typical cases. This should be large enough that most strings
      78              :  * will fit, but small enough that we feel comfortable putting it on the
      79              :  * stack.
      80              :  */
      81              : #define     TEXTBUFLEN          1024
      82              : 
      83              : extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
      84              : 
      85              : static int  strncoll_libc(const char *arg1, ssize_t len1,
      86              :                           const char *arg2, ssize_t len2,
      87              :                           pg_locale_t locale);
      88              : static size_t strnxfrm_libc(char *dest, size_t destsize,
      89              :                             const char *src, ssize_t srclen,
      90              :                             pg_locale_t locale);
      91              : extern char *get_collation_actual_version_libc(const char *collcollate);
      92              : static locale_t make_libc_collator(const char *collate,
      93              :                                    const char *ctype);
      94              : 
      95              : #ifdef WIN32
      96              : static int  strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
      97              :                                      const char *arg2, ssize_t len2,
      98              :                                      pg_locale_t locale);
      99              : #endif
     100              : 
     101              : static size_t char2wchar(wchar_t *to, size_t tolen, const char *from,
     102              :                          size_t fromlen, locale_t loc);
     103              : 
     104              : static size_t strlower_libc_sb(char *dest, size_t destsize,
     105              :                                const char *src, ssize_t srclen,
     106              :                                pg_locale_t locale);
     107              : static size_t strlower_libc_mb(char *dest, size_t destsize,
     108              :                                const char *src, ssize_t srclen,
     109              :                                pg_locale_t locale);
     110              : static size_t strtitle_libc_sb(char *dest, size_t destsize,
     111              :                                const char *src, ssize_t srclen,
     112              :                                pg_locale_t locale);
     113              : static size_t strtitle_libc_mb(char *dest, size_t destsize,
     114              :                                const char *src, ssize_t srclen,
     115              :                                pg_locale_t locale);
     116              : static size_t strupper_libc_sb(char *dest, size_t destsize,
     117              :                                const char *src, ssize_t srclen,
     118              :                                pg_locale_t locale);
     119              : static size_t strupper_libc_mb(char *dest, size_t destsize,
     120              :                                const char *src, ssize_t srclen,
     121              :                                pg_locale_t locale);
     122              : 
     123              : static bool
     124            0 : wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
     125              : {
     126            0 :     return isdigit_l((unsigned char) wc, locale->lt);
     127              : }
     128              : 
     129              : static bool
     130            0 : wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
     131              : {
     132            0 :     return isalpha_l((unsigned char) wc, locale->lt);
     133              : }
     134              : 
     135              : static bool
     136            0 : wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
     137              : {
     138            0 :     return isalnum_l((unsigned char) wc, locale->lt);
     139              : }
     140              : 
     141              : static bool
     142            0 : wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
     143              : {
     144            0 :     return isupper_l((unsigned char) wc, locale->lt);
     145              : }
     146              : 
     147              : static bool
     148            0 : wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
     149              : {
     150            0 :     return islower_l((unsigned char) wc, locale->lt);
     151              : }
     152              : 
     153              : static bool
     154            0 : wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
     155              : {
     156            0 :     return isgraph_l((unsigned char) wc, locale->lt);
     157              : }
     158              : 
     159              : static bool
     160            0 : wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
     161              : {
     162            0 :     return isprint_l((unsigned char) wc, locale->lt);
     163              : }
     164              : 
     165              : static bool
     166            0 : wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
     167              : {
     168            0 :     return ispunct_l((unsigned char) wc, locale->lt);
     169              : }
     170              : 
     171              : static bool
     172            0 : wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
     173              : {
     174            0 :     return isspace_l((unsigned char) wc, locale->lt);
     175              : }
     176              : 
     177              : static bool
     178            0 : wc_isxdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
     179              : {
     180              : #ifndef WIN32
     181            0 :     return isxdigit_l((unsigned char) wc, locale->lt);
     182              : #else
     183              :     return _isxdigit_l((unsigned char) wc, locale->lt);
     184              : #endif
     185              : }
     186              : 
     187              : static bool
     188            0 : wc_iscased_libc_sb(pg_wchar wc, pg_locale_t locale)
     189              : {
     190            0 :     return isupper_l((unsigned char) wc, locale->lt) ||
     191            0 :         islower_l((unsigned char) wc, locale->lt);
     192              : }
     193              : 
     194              : static bool
     195        65804 : wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
     196              : {
     197        65804 :     return iswdigit_l((wint_t) wc, locale->lt);
     198              : }
     199              : 
     200              : static bool
     201        40574 : wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
     202              : {
     203        40574 :     return iswalpha_l((wint_t) wc, locale->lt);
     204              : }
     205              : 
     206              : static bool
     207      1423225 : wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
     208              : {
     209      1423225 :     return iswalnum_l((wint_t) wc, locale->lt);
     210              : }
     211              : 
     212              : static bool
     213         2056 : wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
     214              : {
     215         2056 :     return iswupper_l((wint_t) wc, locale->lt);
     216              : }
     217              : 
     218              : static bool
     219         2051 : wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
     220              : {
     221         2051 :     return iswlower_l((wint_t) wc, locale->lt);
     222              : }
     223              : 
     224              : static bool
     225         2051 : wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
     226              : {
     227         2051 :     return iswgraph_l((wint_t) wc, locale->lt);
     228              : }
     229              : 
     230              : static bool
     231         2051 : wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
     232              : {
     233         2051 :     return iswprint_l((wint_t) wc, locale->lt);
     234              : }
     235              : 
     236              : static bool
     237         2051 : wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
     238              : {
     239         2051 :     return iswpunct_l((wint_t) wc, locale->lt);
     240              : }
     241              : 
     242              : static bool
     243        24076 : wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
     244              : {
     245        24076 :     return iswspace_l((wint_t) wc, locale->lt);
     246              : }
     247              : 
     248              : static bool
     249            6 : wc_isxdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
     250              : {
     251              : #ifndef WIN32
     252            6 :     return iswxdigit_l((wint_t) wc, locale->lt);
     253              : #else
     254              :     return _iswxdigit_l((wint_t) wc, locale->lt);
     255              : #endif
     256              : }
     257              : 
     258              : static bool
     259            0 : wc_iscased_libc_mb(pg_wchar wc, pg_locale_t locale)
     260              : {
     261            0 :     return iswupper_l((wint_t) wc, locale->lt) ||
     262            0 :         iswlower_l((wint_t) wc, locale->lt);
     263              : }
     264              : 
     265              : static pg_wchar
     266            0 : toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
     267              : {
     268              :     Assert(GetDatabaseEncoding() != PG_UTF8);
     269              : 
     270              :     /* force C behavior for ASCII characters, per comments above */
     271            0 :     if (locale->is_default && wc <= (pg_wchar) 127)
     272            0 :         return pg_ascii_toupper((unsigned char) wc);
     273            0 :     if (wc <= (pg_wchar) UCHAR_MAX)
     274            0 :         return toupper_l((unsigned char) wc, locale->lt);
     275              :     else
     276            0 :         return wc;
     277              : }
     278              : 
     279              : static pg_wchar
     280         4544 : toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
     281              : {
     282              :     Assert(GetDatabaseEncoding() == PG_UTF8);
     283              : 
     284              :     /* force C behavior for ASCII characters, per comments above */
     285         4544 :     if (locale->is_default && wc <= (pg_wchar) 127)
     286          446 :         return pg_ascii_toupper((unsigned char) wc);
     287              :     if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
     288         4098 :         return towupper_l((wint_t) wc, locale->lt);
     289              :     else
     290              :         return wc;
     291              : }
     292              : 
     293              : static pg_wchar
     294            0 : tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
     295              : {
     296              :     Assert(GetDatabaseEncoding() != PG_UTF8);
     297              : 
     298              :     /* force C behavior for ASCII characters, per comments above */
     299            0 :     if (locale->is_default && wc <= (pg_wchar) 127)
     300            0 :         return pg_ascii_tolower((unsigned char) wc);
     301            0 :     if (wc <= (pg_wchar) UCHAR_MAX)
     302            0 :         return tolower_l((unsigned char) wc, locale->lt);
     303              :     else
     304            0 :         return wc;
     305              : }
     306              : 
     307              : static pg_wchar
     308         4546 : tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
     309              : {
     310              :     Assert(GetDatabaseEncoding() == PG_UTF8);
     311              : 
     312              :     /* force C behavior for ASCII characters, per comments above */
     313         4546 :     if (locale->is_default && wc <= (pg_wchar) 127)
     314          448 :         return pg_ascii_tolower((unsigned char) wc);
     315              :     if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
     316         4098 :         return towlower_l((wint_t) wc, locale->lt);
     317              :     else
     318              :         return wc;
     319              : }
     320              : 
     321              : /*
     322              :  * Characters A..Z always downcase to a..z, even in the Turkish
     323              :  * locale. Characters beyond 127 use tolower().
     324              :  */
     325              : static size_t
     326        12669 : downcase_ident_libc_sb(char *dst, size_t dstsize, const char *src,
     327              :                        ssize_t srclen, pg_locale_t locale)
     328              : {
     329        12669 :     locale_t    loc = locale->lt;
     330              :     int         i;
     331              : 
     332       123340 :     for (i = 0; i < srclen && i < dstsize; i++)
     333              :     {
     334       110671 :         unsigned char ch = (unsigned char) src[i];
     335              : 
     336       110671 :         if (ch >= 'A' && ch <= 'Z')
     337         6798 :             ch = pg_ascii_tolower(ch);
     338       103873 :         else if (IS_HIGHBIT_SET(ch) && isupper_l(ch, loc))
     339            0 :             ch = tolower_l(ch, loc);
     340       110671 :         dst[i] = (char) ch;
     341              :     }
     342              : 
     343        12669 :     if (i < dstsize)
     344        12669 :         dst[i] = '\0';
     345              : 
     346        12669 :     return srclen;
     347              : }
     348              : 
     349              : static const struct ctype_methods ctype_methods_libc_sb = {
     350              :     .strlower = strlower_libc_sb,
     351              :     .strtitle = strtitle_libc_sb,
     352              :     .strupper = strupper_libc_sb,
     353              :     /* in libc, casefolding is the same as lowercasing */
     354              :     .strfold = strlower_libc_sb,
     355              :     .downcase_ident = downcase_ident_libc_sb,
     356              :     .wc_isdigit = wc_isdigit_libc_sb,
     357              :     .wc_isalpha = wc_isalpha_libc_sb,
     358              :     .wc_isalnum = wc_isalnum_libc_sb,
     359              :     .wc_isupper = wc_isupper_libc_sb,
     360              :     .wc_islower = wc_islower_libc_sb,
     361              :     .wc_isgraph = wc_isgraph_libc_sb,
     362              :     .wc_isprint = wc_isprint_libc_sb,
     363              :     .wc_ispunct = wc_ispunct_libc_sb,
     364              :     .wc_isspace = wc_isspace_libc_sb,
     365              :     .wc_isxdigit = wc_isxdigit_libc_sb,
     366              :     .wc_iscased = wc_iscased_libc_sb,
     367              :     .wc_toupper = toupper_libc_sb,
     368              :     .wc_tolower = tolower_libc_sb,
     369              : };
     370              : 
     371              : /*
     372              :  * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
     373              :  * single-byte semantics for pattern matching.
     374              :  */
     375              : static const struct ctype_methods ctype_methods_libc_other_mb = {
     376              :     .strlower = strlower_libc_mb,
     377              :     .strtitle = strtitle_libc_mb,
     378              :     .strupper = strupper_libc_mb,
     379              :     /* in libc, casefolding is the same as lowercasing */
     380              :     .strfold = strlower_libc_mb,
     381              :     /* uses plain ASCII semantics for historical reasons */
     382              :     .downcase_ident = NULL,
     383              :     .wc_isdigit = wc_isdigit_libc_sb,
     384              :     .wc_isalpha = wc_isalpha_libc_sb,
     385              :     .wc_isalnum = wc_isalnum_libc_sb,
     386              :     .wc_isupper = wc_isupper_libc_sb,
     387              :     .wc_islower = wc_islower_libc_sb,
     388              :     .wc_isgraph = wc_isgraph_libc_sb,
     389              :     .wc_isprint = wc_isprint_libc_sb,
     390              :     .wc_ispunct = wc_ispunct_libc_sb,
     391              :     .wc_isspace = wc_isspace_libc_sb,
     392              :     .wc_isxdigit = wc_isxdigit_libc_sb,
     393              :     .wc_iscased = wc_iscased_libc_sb,
     394              :     .wc_toupper = toupper_libc_sb,
     395              :     .wc_tolower = tolower_libc_sb,
     396              : };
     397              : 
     398              : static const struct ctype_methods ctype_methods_libc_utf8 = {
     399              :     .strlower = strlower_libc_mb,
     400              :     .strtitle = strtitle_libc_mb,
     401              :     .strupper = strupper_libc_mb,
     402              :     /* in libc, casefolding is the same as lowercasing */
     403              :     .strfold = strlower_libc_mb,
     404              :     /* uses plain ASCII semantics for historical reasons */
     405              :     .downcase_ident = NULL,
     406              :     .wc_isdigit = wc_isdigit_libc_mb,
     407              :     .wc_isalpha = wc_isalpha_libc_mb,
     408              :     .wc_isalnum = wc_isalnum_libc_mb,
     409              :     .wc_isupper = wc_isupper_libc_mb,
     410              :     .wc_islower = wc_islower_libc_mb,
     411              :     .wc_isgraph = wc_isgraph_libc_mb,
     412              :     .wc_isprint = wc_isprint_libc_mb,
     413              :     .wc_ispunct = wc_ispunct_libc_mb,
     414              :     .wc_isspace = wc_isspace_libc_mb,
     415              :     .wc_isxdigit = wc_isxdigit_libc_mb,
     416              :     .wc_iscased = wc_iscased_libc_mb,
     417              :     .wc_toupper = toupper_libc_mb,
     418              :     .wc_tolower = tolower_libc_mb,
     419              : };
     420              : 
     421              : static const struct collate_methods collate_methods_libc = {
     422              :     .strncoll = strncoll_libc,
     423              :     .strnxfrm = strnxfrm_libc,
     424              :     .strnxfrm_prefix = NULL,
     425              : 
     426              :     /*
     427              :      * Unfortunately, it seems that strxfrm() for non-C collations is broken
     428              :      * on many common platforms; testing of multiple versions of glibc reveals
     429              :      * that, for many locales, strcoll() and strxfrm() do not return
     430              :      * consistent results. While no other libc other than Cygwin has so far
     431              :      * been shown to have a problem, we take the conservative course of action
     432              :      * for right now and disable this categorically.  (Users who are certain
     433              :      * this isn't a problem on their system can define TRUST_STRXFRM.)
     434              :      */
     435              : #ifdef TRUST_STRXFRM
     436              :     .strxfrm_is_safe = true,
     437              : #else
     438              :     .strxfrm_is_safe = false,
     439              : #endif
     440              : };
     441              : 
     442              : #ifdef WIN32
     443              : static const struct collate_methods collate_methods_libc_win32_utf8 = {
     444              :     .strncoll = strncoll_libc_win32_utf8,
     445              :     .strnxfrm = strnxfrm_libc,
     446              :     .strnxfrm_prefix = NULL,
     447              : #ifdef TRUST_STRXFRM
     448              :     .strxfrm_is_safe = true,
     449              : #else
     450              :     .strxfrm_is_safe = false,
     451              : #endif
     452              : };
     453              : #endif
     454              : 
     455              : static size_t
     456            0 : strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     457              :                  pg_locale_t locale)
     458              : {
     459            0 :     if (srclen < 0)
     460            0 :         srclen = strlen(src);
     461              : 
     462            0 :     if (srclen + 1 <= destsize)
     463              :     {
     464            0 :         locale_t    loc = locale->lt;
     465              :         char       *p;
     466              : 
     467            0 :         memcpy(dest, src, srclen);
     468            0 :         dest[srclen] = '\0';
     469              : 
     470              :         /*
     471              :          * Note: we assume that tolower_l() will not be so broken as to need
     472              :          * an isupper_l() guard test.  When using the default collation, we
     473              :          * apply the traditional Postgres behavior that forces ASCII-style
     474              :          * treatment of I/i, but in non-default collations you get exactly
     475              :          * what the collation says.
     476              :          */
     477            0 :         for (p = dest; *p; p++)
     478              :         {
     479            0 :             if (locale->is_default)
     480              :             {
     481            0 :                 if (*p >= 'A' && *p <= 'Z')
     482            0 :                     *p += 'a' - 'A';
     483            0 :                 else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
     484            0 :                     *p = tolower_l((unsigned char) *p, loc);
     485              :             }
     486              :             else
     487            0 :                 *p = tolower_l((unsigned char) *p, loc);
     488              :         }
     489              :     }
     490              : 
     491            0 :     return srclen;
     492              : }
     493              : 
     494              : static size_t
     495       432225 : strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     496              :                  pg_locale_t locale)
     497              : {
     498       432225 :     locale_t    loc = locale->lt;
     499              :     size_t      result_size;
     500              :     wchar_t    *workspace;
     501              :     char       *result;
     502              :     size_t      curr_char;
     503              :     size_t      max_size;
     504              : 
     505       432225 :     if (srclen < 0)
     506            0 :         srclen = strlen(src);
     507              : 
     508              :     /* Overflow paranoia */
     509       432225 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     510            0 :         ereport(ERROR,
     511              :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     512              :                  errmsg("out of memory")));
     513              : 
     514              :     /* Output workspace cannot have more codes than input bytes */
     515       432225 :     workspace = palloc_array(wchar_t, srclen + 1);
     516              : 
     517       432225 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     518              : 
     519      2275067 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     520      1842842 :         workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     521              : 
     522              :     /*
     523              :      * Make result large enough; case change might change number of bytes
     524              :      */
     525       432225 :     max_size = curr_char * pg_database_encoding_max_length();
     526       432225 :     result = palloc(max_size + 1);
     527              : 
     528       432225 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     529              : 
     530       432225 :     if (destsize >= result_size + 1)
     531              :     {
     532       432225 :         memcpy(dest, result, result_size);
     533       432225 :         dest[result_size] = '\0';
     534              :     }
     535              : 
     536       432225 :     pfree(workspace);
     537       432225 :     pfree(result);
     538              : 
     539       432225 :     return result_size;
     540              : }
     541              : 
     542              : static size_t
     543            0 : strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     544              :                  pg_locale_t locale)
     545              : {
     546            0 :     if (srclen < 0)
     547            0 :         srclen = strlen(src);
     548              : 
     549            0 :     if (srclen + 1 <= destsize)
     550              :     {
     551            0 :         locale_t    loc = locale->lt;
     552            0 :         int         wasalnum = false;
     553              :         char       *p;
     554              : 
     555            0 :         memcpy(dest, src, srclen);
     556            0 :         dest[srclen] = '\0';
     557              : 
     558              :         /*
     559              :          * Note: we assume that toupper_l()/tolower_l() will not be so broken
     560              :          * as to need guard tests.  When using the default collation, we apply
     561              :          * the traditional Postgres behavior that forces ASCII-style treatment
     562              :          * of I/i, but in non-default collations you get exactly what the
     563              :          * collation says.
     564              :          */
     565            0 :         for (p = dest; *p; p++)
     566              :         {
     567            0 :             if (locale->is_default)
     568              :             {
     569            0 :                 if (wasalnum)
     570              :                 {
     571            0 :                     if (*p >= 'A' && *p <= 'Z')
     572            0 :                         *p += 'a' - 'A';
     573            0 :                     else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc))
     574            0 :                         *p = tolower_l((unsigned char) *p, loc);
     575              :                 }
     576              :                 else
     577              :                 {
     578            0 :                     if (*p >= 'a' && *p <= 'z')
     579            0 :                         *p -= 'a' - 'A';
     580            0 :                     else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
     581            0 :                         *p = toupper_l((unsigned char) *p, loc);
     582              :                 }
     583              :             }
     584              :             else
     585              :             {
     586            0 :                 if (wasalnum)
     587            0 :                     *p = tolower_l((unsigned char) *p, loc);
     588              :                 else
     589            0 :                     *p = toupper_l((unsigned char) *p, loc);
     590              :             }
     591            0 :             wasalnum = isalnum_l((unsigned char) *p, loc);
     592              :         }
     593              :     }
     594              : 
     595            0 :     return srclen;
     596              : }
     597              : 
     598              : static size_t
     599            4 : strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     600              :                  pg_locale_t locale)
     601              : {
     602            4 :     locale_t    loc = locale->lt;
     603            4 :     int         wasalnum = false;
     604              :     size_t      result_size;
     605              :     wchar_t    *workspace;
     606              :     char       *result;
     607              :     size_t      curr_char;
     608              :     size_t      max_size;
     609              : 
     610            4 :     if (srclen < 0)
     611            0 :         srclen = strlen(src);
     612              : 
     613              :     /* Overflow paranoia */
     614            4 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     615            0 :         ereport(ERROR,
     616              :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     617              :                  errmsg("out of memory")));
     618              : 
     619              :     /* Output workspace cannot have more codes than input bytes */
     620            4 :     workspace = palloc_array(wchar_t, srclen + 1);
     621              : 
     622            4 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     623              : 
     624           40 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     625              :     {
     626           36 :         if (wasalnum)
     627           28 :             workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     628              :         else
     629            8 :             workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     630           36 :         wasalnum = iswalnum_l(workspace[curr_char], loc);
     631              :     }
     632              : 
     633              :     /*
     634              :      * Make result large enough; case change might change number of bytes
     635              :      */
     636            4 :     max_size = curr_char * pg_database_encoding_max_length();
     637            4 :     result = palloc(max_size + 1);
     638              : 
     639            4 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     640              : 
     641            4 :     if (destsize >= result_size + 1)
     642              :     {
     643            4 :         memcpy(dest, result, result_size);
     644            4 :         dest[result_size] = '\0';
     645              :     }
     646              : 
     647            4 :     pfree(workspace);
     648            4 :     pfree(result);
     649              : 
     650            4 :     return result_size;
     651              : }
     652              : 
     653              : static size_t
     654            0 : strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     655              :                  pg_locale_t locale)
     656              : {
     657            0 :     if (srclen < 0)
     658            0 :         srclen = strlen(src);
     659              : 
     660            0 :     if (srclen + 1 <= destsize)
     661              :     {
     662            0 :         locale_t    loc = locale->lt;
     663              :         char       *p;
     664              : 
     665            0 :         memcpy(dest, src, srclen);
     666            0 :         dest[srclen] = '\0';
     667              : 
     668              :         /*
     669              :          * Note: we assume that toupper_l() will not be so broken as to need
     670              :          * an islower_l() guard test.  When using the default collation, we
     671              :          * apply the traditional Postgres behavior that forces ASCII-style
     672              :          * treatment of I/i, but in non-default collations you get exactly
     673              :          * what the collation says.
     674              :          */
     675            0 :         for (p = dest; *p; p++)
     676              :         {
     677            0 :             if (locale->is_default)
     678              :             {
     679            0 :                 if (*p >= 'a' && *p <= 'z')
     680            0 :                     *p -= 'a' - 'A';
     681            0 :                 else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc))
     682            0 :                     *p = toupper_l((unsigned char) *p, loc);
     683              :             }
     684              :             else
     685            0 :                 *p = toupper_l((unsigned char) *p, loc);
     686              :         }
     687              :     }
     688              : 
     689            0 :     return srclen;
     690              : }
     691              : 
     692              : static size_t
     693       361182 : strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     694              :                  pg_locale_t locale)
     695              : {
     696       361182 :     locale_t    loc = locale->lt;
     697              :     size_t      result_size;
     698              :     wchar_t    *workspace;
     699              :     char       *result;
     700              :     size_t      curr_char;
     701              :     size_t      max_size;
     702              : 
     703       361182 :     if (srclen < 0)
     704            0 :         srclen = strlen(src);
     705              : 
     706              :     /* Overflow paranoia */
     707       361182 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     708            0 :         ereport(ERROR,
     709              :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     710              :                  errmsg("out of memory")));
     711              : 
     712              :     /* Output workspace cannot have more codes than input bytes */
     713       361182 :     workspace = palloc_array(wchar_t, srclen + 1);
     714              : 
     715       361182 :     char2wchar(workspace, srclen + 1, src, srclen, loc);
     716              : 
     717      1194356 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     718       833174 :         workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     719              : 
     720              :     /*
     721              :      * Make result large enough; case change might change number of bytes
     722              :      */
     723       361182 :     max_size = curr_char * pg_database_encoding_max_length();
     724       361182 :     result = palloc(max_size + 1);
     725              : 
     726       361182 :     result_size = wchar2char(result, workspace, max_size + 1, loc);
     727              : 
     728       361182 :     if (destsize >= result_size + 1)
     729              :     {
     730       361182 :         memcpy(dest, result, result_size);
     731       361182 :         dest[result_size] = '\0';
     732              :     }
     733              : 
     734       361182 :     pfree(workspace);
     735       361182 :     pfree(result);
     736              : 
     737       361182 :     return result_size;
     738              : }
     739              : 
     740              : pg_locale_t
     741        16144 : create_pg_locale_libc(Oid collid, MemoryContext context)
     742              : {
     743              :     const char *collate;
     744              :     const char *ctype;
     745              :     locale_t    loc;
     746              :     pg_locale_t result;
     747              : 
     748        16144 :     if (collid == DEFAULT_COLLATION_OID)
     749              :     {
     750              :         HeapTuple   tp;
     751              :         Datum       datum;
     752              : 
     753        16099 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     754        16099 :         if (!HeapTupleIsValid(tp))
     755            0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     756        16099 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     757              :                                        Anum_pg_database_datcollate);
     758        16099 :         collate = TextDatumGetCString(datum);
     759        16099 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     760              :                                        Anum_pg_database_datctype);
     761        16099 :         ctype = TextDatumGetCString(datum);
     762              : 
     763        16099 :         ReleaseSysCache(tp);
     764              :     }
     765              :     else
     766              :     {
     767              :         HeapTuple   tp;
     768              :         Datum       datum;
     769              : 
     770           45 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     771           45 :         if (!HeapTupleIsValid(tp))
     772            0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     773              : 
     774           45 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     775              :                                        Anum_pg_collation_collcollate);
     776           45 :         collate = TextDatumGetCString(datum);
     777           45 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     778              :                                        Anum_pg_collation_collctype);
     779           45 :         ctype = TextDatumGetCString(datum);
     780              : 
     781           45 :         ReleaseSysCache(tp);
     782              :     }
     783              : 
     784              : 
     785        16144 :     loc = make_libc_collator(collate, ctype);
     786              : 
     787        16144 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     788        16144 :     result->deterministic = true;
     789        31720 :     result->collate_is_c = (strcmp(collate, "C") == 0) ||
     790        15576 :         (strcmp(collate, "POSIX") == 0);
     791        31720 :     result->ctype_is_c = (strcmp(ctype, "C") == 0) ||
     792        15576 :         (strcmp(ctype, "POSIX") == 0);
     793        16144 :     result->lt = loc;
     794        16144 :     if (!result->collate_is_c)
     795              :     {
     796              : #ifdef WIN32
     797              :         if (GetDatabaseEncoding() == PG_UTF8)
     798              :             result->collate = &collate_methods_libc_win32_utf8;
     799              :         else
     800              : #endif
     801        15544 :             result->collate = &collate_methods_libc;
     802              :     }
     803        16144 :     if (!result->ctype_is_c)
     804              :     {
     805        15544 :         if (GetDatabaseEncoding() == PG_UTF8)
     806        15512 :             result->ctype = &ctype_methods_libc_utf8;
     807           32 :         else if (pg_database_encoding_max_length() > 1)
     808            0 :             result->ctype = &ctype_methods_libc_other_mb;
     809              :         else
     810           32 :             result->ctype = &ctype_methods_libc_sb;
     811              :     }
     812              : 
     813        16144 :     return result;
     814              : }
     815              : 
     816              : /*
     817              :  * Create a locale_t with the given collation and ctype.
     818              :  *
     819              :  * The "C" and "POSIX" locales are not actually handled by libc, so return
     820              :  * NULL.
     821              :  *
     822              :  * Ensure that no path leaks a locale_t.
     823              :  */
     824              : static locale_t
     825        16144 : make_libc_collator(const char *collate, const char *ctype)
     826              : {
     827        16144 :     locale_t    loc = 0;
     828              : 
     829        16144 :     if (strcmp(collate, ctype) == 0)
     830              :     {
     831        16144 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     832              :         {
     833              :             /* Normal case where they're the same */
     834        15544 :             errno = 0;
     835              : #ifndef WIN32
     836        15544 :             loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
     837              :                             NULL);
     838              : #else
     839              :             loc = _create_locale(LC_ALL, collate);
     840              : #endif
     841        15544 :             if (!loc)
     842            0 :                 report_newlocale_failure(collate);
     843              :         }
     844              :     }
     845              :     else
     846              :     {
     847              : #ifndef WIN32
     848              :         /* We need two newlocale() steps */
     849            0 :         locale_t    loc1 = 0;
     850              : 
     851            0 :         if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
     852              :         {
     853            0 :             errno = 0;
     854            0 :             loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
     855            0 :             if (!loc1)
     856            0 :                 report_newlocale_failure(collate);
     857              :         }
     858              : 
     859            0 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     860              :         {
     861            0 :             errno = 0;
     862            0 :             loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
     863            0 :             if (!loc)
     864              :             {
     865            0 :                 if (loc1)
     866            0 :                     freelocale(loc1);
     867            0 :                 report_newlocale_failure(ctype);
     868              :             }
     869              :         }
     870              :         else
     871            0 :             loc = loc1;
     872              : #else
     873              : 
     874              :         /*
     875              :          * XXX The _create_locale() API doesn't appear to support this. Could
     876              :          * perhaps be worked around by changing pg_locale_t to contain two
     877              :          * separate fields.
     878              :          */
     879              :         ereport(ERROR,
     880              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     881              :                  errmsg("collations with different collate and ctype values are not supported on this platform")));
     882              : #endif
     883              :     }
     884              : 
     885        16144 :     return loc;
     886              : }
     887              : 
     888              : /*
     889              :  * strncoll_libc
     890              :  *
     891              :  * NUL-terminate arguments, if necessary, and pass to strcoll_l().
     892              :  *
     893              :  * An input string length of -1 means that it's already NUL-terminated.
     894              :  */
     895              : int
     896     15424450 : strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
     897              :               pg_locale_t locale)
     898              : {
     899              :     char        sbuf[TEXTBUFLEN];
     900     15424450 :     char       *buf = sbuf;
     901     15424450 :     size_t      bufsize1 = (len1 == -1) ? 0 : len1 + 1;
     902     15424450 :     size_t      bufsize2 = (len2 == -1) ? 0 : len2 + 1;
     903              :     const char *arg1n;
     904              :     const char *arg2n;
     905              :     int         result;
     906              : 
     907     15424450 :     if (bufsize1 + bufsize2 > TEXTBUFLEN)
     908          284 :         buf = palloc(bufsize1 + bufsize2);
     909              : 
     910              :     /* nul-terminate arguments if necessary */
     911     15424450 :     if (len1 == -1)
     912              :     {
     913     13186621 :         arg1n = arg1;
     914              :     }
     915              :     else
     916              :     {
     917      2237829 :         char       *buf1 = buf;
     918              : 
     919      2237829 :         memcpy(buf1, arg1, len1);
     920      2237829 :         buf1[len1] = '\0';
     921      2237829 :         arg1n = buf1;
     922              :     }
     923              : 
     924     15424450 :     if (len2 == -1)
     925              :     {
     926     13186621 :         arg2n = arg2;
     927              :     }
     928              :     else
     929              :     {
     930      2237829 :         char       *buf2 = buf + bufsize1;
     931              : 
     932      2237829 :         memcpy(buf2, arg2, len2);
     933      2237829 :         buf2[len2] = '\0';
     934      2237829 :         arg2n = buf2;
     935              :     }
     936              : 
     937     15424450 :     result = strcoll_l(arg1n, arg2n, locale->lt);
     938              : 
     939     15424450 :     if (buf != sbuf)
     940          284 :         pfree(buf);
     941              : 
     942     15424450 :     return result;
     943              : }
     944              : 
     945              : /*
     946              :  * strnxfrm_libc
     947              :  *
     948              :  * NUL-terminate src, if necessary, and pass to strxfrm_l().
     949              :  *
     950              :  * A source length of -1 means that it's already NUL-terminated.
     951              :  */
     952              : size_t
     953           72 : strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
     954              :               pg_locale_t locale)
     955              : {
     956              :     char        sbuf[TEXTBUFLEN];
     957           72 :     char       *buf = sbuf;
     958           72 :     size_t      bufsize = srclen + 1;
     959              :     size_t      result;
     960              : 
     961           72 :     if (srclen == -1)
     962           72 :         return strxfrm_l(dest, src, destsize, locale->lt);
     963              : 
     964            0 :     if (bufsize > TEXTBUFLEN)
     965            0 :         buf = palloc(bufsize);
     966              : 
     967              :     /* nul-terminate argument */
     968            0 :     memcpy(buf, src, srclen);
     969            0 :     buf[srclen] = '\0';
     970              : 
     971            0 :     result = strxfrm_l(dest, buf, destsize, locale->lt);
     972              : 
     973            0 :     if (buf != sbuf)
     974            0 :         pfree(buf);
     975              : 
     976              :     /* if dest is defined, it should be nul-terminated */
     977              :     Assert(result >= destsize || dest[result] == '\0');
     978              : 
     979            0 :     return result;
     980              : }
     981              : 
     982              : char *
     983        15848 : get_collation_actual_version_libc(const char *collcollate)
     984              : {
     985        15848 :     char       *collversion = NULL;
     986              : 
     987        31608 :     if (pg_strcasecmp("C", collcollate) != 0 &&
     988        31422 :         pg_strncasecmp("C.", collcollate, 2) != 0 &&
     989        15662 :         pg_strcasecmp("POSIX", collcollate) != 0)
     990              :     {
     991              : #if defined(__GLIBC__)
     992              :         /* Use the glibc version because we don't have anything better. */
     993        15649 :         collversion = pstrdup(gnu_get_libc_version());
     994              : #elif defined(LC_VERSION_MASK)
     995              :         locale_t    loc;
     996              : 
     997              :         /* Look up FreeBSD collation version. */
     998              :         loc = newlocale(LC_COLLATE_MASK, collcollate, NULL);
     999              :         if (loc)
    1000              :         {
    1001              :             collversion =
    1002              :                 pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
    1003              :             freelocale(loc);
    1004              :         }
    1005              :         else
    1006              :             ereport(ERROR,
    1007              :                     (errmsg("could not load locale \"%s\"", collcollate)));
    1008              : #elif defined(WIN32)
    1009              :         /*
    1010              :          * If we are targeting Windows Vista and above, we can ask for a name
    1011              :          * given a collation name (earlier versions required a location code
    1012              :          * that we don't have).
    1013              :          */
    1014              :         NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
    1015              :         WCHAR       wide_collcollate[LOCALE_NAME_MAX_LENGTH];
    1016              : 
    1017              :         MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
    1018              :                             LOCALE_NAME_MAX_LENGTH);
    1019              :         if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
    1020              :         {
    1021              :             /*
    1022              :              * GetNLSVersionEx() wants a language tag such as "en-US", not a
    1023              :              * locale name like "English_United States.1252".  Until those
    1024              :              * values can be prevented from entering the system, or 100%
    1025              :              * reliably converted to the more useful tag format, tolerate the
    1026              :              * resulting error and report that we have no version data.
    1027              :              */
    1028              :             if (GetLastError() == ERROR_INVALID_PARAMETER)
    1029              :                 return NULL;
    1030              : 
    1031              :             ereport(ERROR,
    1032              :                     (errmsg("could not get collation version for locale \"%s\": error code %lu",
    1033              :                             collcollate,
    1034              :                             GetLastError())));
    1035              :         }
    1036              :         collversion = psprintf("%lu.%lu,%lu.%lu",
    1037              :                                (version.dwNLSVersion >> 8) & 0xFFFF,
    1038              :                                version.dwNLSVersion & 0xFF,
    1039              :                                (version.dwDefinedVersion >> 8) & 0xFFFF,
    1040              :                                version.dwDefinedVersion & 0xFF);
    1041              : #endif
    1042              :     }
    1043              : 
    1044        15848 :     return collversion;
    1045              : }
    1046              : 
    1047              : /*
    1048              :  * strncoll_libc_win32_utf8
    1049              :  *
    1050              :  * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
    1051              :  * invoke wcscoll_l().
    1052              :  *
    1053              :  * An input string length of -1 means that it's NUL-terminated.
    1054              :  */
    1055              : #ifdef WIN32
    1056              : static int
    1057              : strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
    1058              :                          ssize_t len2, pg_locale_t locale)
    1059              : {
    1060              :     char        sbuf[TEXTBUFLEN];
    1061              :     char       *buf = sbuf;
    1062              :     char       *a1p,
    1063              :                *a2p;
    1064              :     int         a1len;
    1065              :     int         a2len;
    1066              :     int         r;
    1067              :     int         result;
    1068              : 
    1069              :     Assert(GetDatabaseEncoding() == PG_UTF8);
    1070              : 
    1071              :     if (len1 == -1)
    1072              :         len1 = strlen(arg1);
    1073              :     if (len2 == -1)
    1074              :         len2 = strlen(arg2);
    1075              : 
    1076              :     a1len = len1 * 2 + 2;
    1077              :     a2len = len2 * 2 + 2;
    1078              : 
    1079              :     if (a1len + a2len > TEXTBUFLEN)
    1080              :         buf = palloc(a1len + a2len);
    1081              : 
    1082              :     a1p = buf;
    1083              :     a2p = buf + a1len;
    1084              : 
    1085              :     /* API does not work for zero-length input */
    1086              :     if (len1 == 0)
    1087              :         r = 0;
    1088              :     else
    1089              :     {
    1090              :         r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
    1091              :                                 (LPWSTR) a1p, a1len / 2);
    1092              :         if (!r)
    1093              :             ereport(ERROR,
    1094              :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1095              :                             GetLastError())));
    1096              :     }
    1097              :     ((LPWSTR) a1p)[r] = 0;
    1098              : 
    1099              :     if (len2 == 0)
    1100              :         r = 0;
    1101              :     else
    1102              :     {
    1103              :         r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
    1104              :                                 (LPWSTR) a2p, a2len / 2);
    1105              :         if (!r)
    1106              :             ereport(ERROR,
    1107              :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1108              :                             GetLastError())));
    1109              :     }
    1110              :     ((LPWSTR) a2p)[r] = 0;
    1111              : 
    1112              :     errno = 0;
    1113              :     result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->lt);
    1114              :     if (result == 2147483647)   /* _NLSCMPERROR; missing from mingw headers */
    1115              :         ereport(ERROR,
    1116              :                 (errmsg("could not compare Unicode strings: %m")));
    1117              : 
    1118              :     if (buf != sbuf)
    1119              :         pfree(buf);
    1120              : 
    1121              :     return result;
    1122              : }
    1123              : #endif                          /* WIN32 */
    1124              : 
    1125              : /* simple subroutine for reporting errors from newlocale() */
    1126              : void
    1127            0 : report_newlocale_failure(const char *localename)
    1128              : {
    1129              :     int         save_errno;
    1130              : 
    1131              :     /*
    1132              :      * Windows doesn't provide any useful error indication from
    1133              :      * _create_locale(), and BSD-derived platforms don't seem to feel they
    1134              :      * need to set errno either (even though POSIX is pretty clear that
    1135              :      * newlocale should do so).  So, if errno hasn't been set, assume ENOENT
    1136              :      * is what to report.
    1137              :      */
    1138            0 :     if (errno == 0)
    1139            0 :         errno = ENOENT;
    1140              : 
    1141              :     /*
    1142              :      * ENOENT means "no such locale", not "no such file", so clarify that
    1143              :      * errno with an errdetail message.
    1144              :      */
    1145            0 :     save_errno = errno;         /* auxiliary funcs might change errno */
    1146            0 :     ereport(ERROR,
    1147              :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1148              :              errmsg("could not create locale \"%s\": %m",
    1149              :                     localename),
    1150              :              (save_errno == ENOENT ?
    1151              :               errdetail("The operating system could not find any locale data for the locale name \"%s\".",
    1152              :                         localename) : 0)));
    1153              : }
    1154              : 
    1155              : /*
    1156              :  * POSIX doesn't define _l-variants of these functions, but several systems
    1157              :  * have them.  We provide our own replacements here.
    1158              :  */
    1159              : #ifndef HAVE_MBSTOWCS_L
    1160              : static size_t
    1161       793411 : mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
    1162              : {
    1163              : #ifdef WIN32
    1164              :     return _mbstowcs_l(dest, src, n, loc);
    1165              : #else
    1166              :     size_t      result;
    1167       793411 :     locale_t    save_locale = uselocale(loc);
    1168              : 
    1169       793411 :     result = mbstowcs(dest, src, n);
    1170       793411 :     uselocale(save_locale);
    1171       793411 :     return result;
    1172              : #endif
    1173              : }
    1174              : #endif
    1175              : #ifndef HAVE_WCSTOMBS_L
    1176              : static size_t
    1177       793411 : wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
    1178              : {
    1179              : #ifdef WIN32
    1180              :     return _wcstombs_l(dest, src, n, loc);
    1181              : #else
    1182              :     size_t      result;
    1183       793411 :     locale_t    save_locale = uselocale(loc);
    1184              : 
    1185       793411 :     result = wcstombs(dest, src, n);
    1186       793411 :     uselocale(save_locale);
    1187       793411 :     return result;
    1188              : #endif
    1189              : }
    1190              : #endif
    1191              : 
    1192              : /*
    1193              :  * These functions convert from/to libc's wchar_t, *not* pg_wchar.
    1194              :  * Therefore we keep them here rather than with the mbutils code.
    1195              :  */
    1196              : 
    1197              : /*
    1198              :  * wchar2char --- convert wide characters to multibyte format
    1199              :  *
    1200              :  * This has the same API as the standard wcstombs_l() function; in particular,
    1201              :  * tolen is the maximum number of bytes to store at *to, and *from must be
    1202              :  * zero-terminated.  The output will be zero-terminated iff there is room.
    1203              :  */
    1204              : size_t
    1205       793411 : wchar2char(char *to, const wchar_t *from, size_t tolen, locale_t loc)
    1206              : {
    1207              :     size_t      result;
    1208              : 
    1209       793411 :     if (tolen == 0)
    1210            0 :         return 0;
    1211              : 
    1212              : #ifdef WIN32
    1213              : 
    1214              :     /*
    1215              :      * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
    1216              :      * for some reason mbstowcs and wcstombs won't do this for us, so we use
    1217              :      * MultiByteToWideChar().
    1218              :      */
    1219              :     if (GetDatabaseEncoding() == PG_UTF8)
    1220              :     {
    1221              :         result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
    1222              :                                      NULL, NULL);
    1223              :         /* A zero return is failure */
    1224              :         if (result <= 0)
    1225              :             result = -1;
    1226              :         else
    1227              :         {
    1228              :             Assert(result <= tolen);
    1229              :             /* Microsoft counts the zero terminator in the result */
    1230              :             result--;
    1231              :         }
    1232              :     }
    1233              :     else
    1234              : #endif                          /* WIN32 */
    1235       793411 :     if (loc == (locale_t) 0)
    1236              :     {
    1237              :         /* Use wcstombs directly for the default locale */
    1238            0 :         result = wcstombs(to, from, tolen);
    1239              :     }
    1240              :     else
    1241              :     {
    1242              :         /* Use wcstombs_l for nondefault locales */
    1243       793411 :         result = wcstombs_l(to, from, tolen, loc);
    1244              :     }
    1245              : 
    1246       793411 :     return result;
    1247              : }
    1248              : 
    1249              : /*
    1250              :  * char2wchar --- convert multibyte characters to wide characters
    1251              :  *
    1252              :  * This has almost the API of mbstowcs_l(), except that *from need not be
    1253              :  * null-terminated; instead, the number of input bytes is specified as
    1254              :  * fromlen.  Also, we ereport() rather than returning -1 for invalid
    1255              :  * input encoding.  tolen is the maximum number of wchar_t's to store at *to.
    1256              :  * The output will be zero-terminated iff there is room.
    1257              :  */
    1258              : static size_t
    1259       793411 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
    1260              :            locale_t loc)
    1261              : {
    1262              :     size_t      result;
    1263              : 
    1264       793411 :     if (tolen == 0)
    1265            0 :         return 0;
    1266              : 
    1267              : #ifdef WIN32
    1268              :     /* See WIN32 "Unicode" comment above */
    1269              :     if (GetDatabaseEncoding() == PG_UTF8)
    1270              :     {
    1271              :         /* Win32 API does not work for zero-length input */
    1272              :         if (fromlen == 0)
    1273              :             result = 0;
    1274              :         else
    1275              :         {
    1276              :             result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
    1277              :             /* A zero return is failure */
    1278              :             if (result == 0)
    1279              :                 result = -1;
    1280              :         }
    1281              : 
    1282              :         if (result != -1)
    1283              :         {
    1284              :             Assert(result < tolen);
    1285              :             /* Append trailing null wchar (MultiByteToWideChar() does not) */
    1286              :             to[result] = 0;
    1287              :         }
    1288              :     }
    1289              :     else
    1290              : #endif                          /* WIN32 */
    1291              :     {
    1292              :         /* mbstowcs requires ending '\0' */
    1293       793411 :         char       *str = pnstrdup(from, fromlen);
    1294              : 
    1295       793411 :         if (loc == (locale_t) 0)
    1296              :         {
    1297              :             /* Use mbstowcs directly for the default locale */
    1298            0 :             result = mbstowcs(to, str, tolen);
    1299              :         }
    1300              :         else
    1301              :         {
    1302              :             /* Use mbstowcs_l for nondefault locales */
    1303       793411 :             result = mbstowcs_l(to, str, tolen, loc);
    1304              :         }
    1305              : 
    1306       793411 :         pfree(str);
    1307              :     }
    1308              : 
    1309       793411 :     if (result == -1)
    1310              :     {
    1311              :         /*
    1312              :          * Invalid multibyte character encountered.  We try to give a useful
    1313              :          * error message by letting pg_verifymbstr check the string.  But it's
    1314              :          * possible that the string is OK to us, and not OK to mbstowcs ---
    1315              :          * this suggests that the LC_CTYPE locale is different from the
    1316              :          * database encoding.  Give a generic error message if pg_verifymbstr
    1317              :          * can't find anything wrong.
    1318              :          */
    1319            0 :         pg_verifymbstr(from, fromlen, false);   /* might not return */
    1320              :         /* but if it does ... */
    1321            0 :         ereport(ERROR,
    1322              :                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
    1323              :                  errmsg("invalid multibyte character for locale"),
    1324              :                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
    1325              :     }
    1326              : 
    1327       793411 :     return result;
    1328              : }
        

Generated by: LCOV version 2.0-1