LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_libc.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 158 249 63.5 %
Date: 2025-01-18 04:15:08 Functions: 15 19 78.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-----------------------------------------------------------------------
       2             :  *
       3             :  * PostgreSQL locale utilities for libc
       4             :  *
       5             :  * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
       6             :  *
       7             :  * src/backend/utils/adt/pg_locale_libc.c
       8             :  *
       9             :  *-----------------------------------------------------------------------
      10             :  */
      11             : 
      12             : #include "postgres.h"
      13             : 
      14             : #include <limits.h>
      15             : #include <wctype.h>
      16             : 
      17             : #include "access/htup_details.h"
      18             : #include "catalog/pg_database.h"
      19             : #include "catalog/pg_collation.h"
      20             : #include "mb/pg_wchar.h"
      21             : #include "miscadmin.h"
      22             : #include "utils/builtins.h"
      23             : #include "utils/formatting.h"
      24             : #include "utils/memutils.h"
      25             : #include "utils/pg_locale.h"
      26             : #include "utils/syscache.h"
      27             : 
      28             : #ifdef __GLIBC__
      29             : #include <gnu/libc-version.h>
      30             : #endif
      31             : 
      32             : #ifdef WIN32
      33             : #include <shlwapi.h>
      34             : #endif
      35             : 
      36             : /*
      37             :  * Size of stack buffer to use for string transformations, used to avoid heap
      38             :  * allocations in typical cases. This should be large enough that most strings
      39             :  * will fit, but small enough that we feel comfortable putting it on the
      40             :  * stack.
      41             :  */
      42             : #define     TEXTBUFLEN          1024
      43             : 
      44             : extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
      45             : 
      46             : extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
      47             :                             ssize_t srclen, pg_locale_t locale);
      48             : extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
      49             :                             ssize_t srclen, pg_locale_t locale);
      50             : extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
      51             :                             ssize_t srclen, pg_locale_t locale);
      52             : 
      53             : static int  strncoll_libc(const char *arg1, ssize_t len1,
      54             :                           const char *arg2, ssize_t len2,
      55             :                           pg_locale_t locale);
      56             : static size_t strnxfrm_libc(char *dest, size_t destsize,
      57             :                             const char *src, ssize_t srclen,
      58             :                             pg_locale_t locale);
      59             : extern char *get_collation_actual_version_libc(const char *collcollate);
      60             : static locale_t make_libc_collator(const char *collate,
      61             :                                    const char *ctype);
      62             : static void report_newlocale_failure(const char *localename);
      63             : 
      64             : #ifdef WIN32
      65             : static int  strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
      66             :                                      const char *arg2, ssize_t len2,
      67             :                                      pg_locale_t locale);
      68             : #endif
      69             : 
      70             : static size_t strlower_libc_sb(char *dest, size_t destsize,
      71             :                                const char *src, ssize_t srclen,
      72             :                                pg_locale_t locale);
      73             : static size_t strlower_libc_mb(char *dest, size_t destsize,
      74             :                                const char *src, ssize_t srclen,
      75             :                                pg_locale_t locale);
      76             : static size_t strtitle_libc_sb(char *dest, size_t destsize,
      77             :                                const char *src, ssize_t srclen,
      78             :                                pg_locale_t locale);
      79             : static size_t strtitle_libc_mb(char *dest, size_t destsize,
      80             :                                const char *src, ssize_t srclen,
      81             :                                pg_locale_t locale);
      82             : static size_t strupper_libc_sb(char *dest, size_t destsize,
      83             :                                const char *src, ssize_t srclen,
      84             :                                pg_locale_t locale);
      85             : static size_t strupper_libc_mb(char *dest, size_t destsize,
      86             :                                const char *src, ssize_t srclen,
      87             :                                pg_locale_t locale);
      88             : 
      89             : static const struct collate_methods collate_methods_libc = {
      90             :     .strncoll = strncoll_libc,
      91             :     .strnxfrm = strnxfrm_libc,
      92             :     .strnxfrm_prefix = NULL,
      93             : 
      94             :     /*
      95             :      * Unfortunately, it seems that strxfrm() for non-C collations is broken
      96             :      * on many common platforms; testing of multiple versions of glibc reveals
      97             :      * that, for many locales, strcoll() and strxfrm() do not return
      98             :      * consistent results. While no other libc other than Cygwin has so far
      99             :      * been shown to have a problem, we take the conservative course of action
     100             :      * for right now and disable this categorically.  (Users who are certain
     101             :      * this isn't a problem on their system can define TRUST_STRXFRM.)
     102             :      */
     103             : #ifdef TRUST_STRXFRM
     104             :     .strxfrm_is_safe = true,
     105             : #else
     106             :     .strxfrm_is_safe = false,
     107             : #endif
     108             : };
     109             : 
     110             : #ifdef WIN32
     111             : static const struct collate_methods collate_methods_libc_win32_utf8 = {
     112             :     .strncoll = strncoll_libc_win32_utf8,
     113             :     .strnxfrm = strnxfrm_libc,
     114             :     .strnxfrm_prefix = NULL,
     115             : #ifdef TRUST_STRXFRM
     116             :     .strxfrm_is_safe = true,
     117             : #else
     118             :     .strxfrm_is_safe = false,
     119             : #endif
     120             : };
     121             : #endif
     122             : 
     123             : size_t
     124      422604 : strlower_libc(char *dst, size_t dstsize, const char *src,
     125             :               ssize_t srclen, pg_locale_t locale)
     126             : {
     127      422604 :     if (pg_database_encoding_max_length() > 1)
     128      422604 :         return strlower_libc_mb(dst, dstsize, src, srclen, locale);
     129             :     else
     130           0 :         return strlower_libc_sb(dst, dstsize, src, srclen, locale);
     131             : }
     132             : 
     133             : size_t
     134           8 : strtitle_libc(char *dst, size_t dstsize, const char *src,
     135             :               ssize_t srclen, pg_locale_t locale)
     136             : {
     137           8 :     if (pg_database_encoding_max_length() > 1)
     138           8 :         return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
     139             :     else
     140           0 :         return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
     141             : }
     142             : 
     143             : size_t
     144      717210 : strupper_libc(char *dst, size_t dstsize, const char *src,
     145             :               ssize_t srclen, pg_locale_t locale)
     146             : {
     147      717210 :     if (pg_database_encoding_max_length() > 1)
     148      717210 :         return strupper_libc_mb(dst, dstsize, src, srclen, locale);
     149             :     else
     150           0 :         return strupper_libc_sb(dst, dstsize, src, srclen, locale);
     151             : }
     152             : 
     153             : static size_t
     154           0 : strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     155             :                  pg_locale_t locale)
     156             : {
     157           0 :     if (srclen < 0)
     158           0 :         srclen = strlen(src);
     159             : 
     160           0 :     if (srclen + 1 <= destsize)
     161             :     {
     162           0 :         locale_t    loc = locale->info.lt;
     163             :         char       *p;
     164             : 
     165           0 :         if (srclen + 1 > destsize)
     166           0 :             return srclen;
     167             : 
     168           0 :         memcpy(dest, src, srclen);
     169           0 :         dest[srclen] = '\0';
     170             : 
     171             :         /*
     172             :          * Note: we assume that tolower_l() will not be so broken as to need
     173             :          * an isupper_l() guard test.  When using the default collation, we
     174             :          * apply the traditional Postgres behavior that forces ASCII-style
     175             :          * treatment of I/i, but in non-default collations you get exactly
     176             :          * what the collation says.
     177             :          */
     178           0 :         for (p = dest; *p; p++)
     179             :         {
     180           0 :             if (locale->is_default)
     181           0 :                 *p = pg_tolower((unsigned char) *p);
     182             :             else
     183           0 :                 *p = tolower_l((unsigned char) *p, loc);
     184             :         }
     185             :     }
     186             : 
     187           0 :     return srclen;
     188             : }
     189             : 
     190             : static size_t
     191      422604 : strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     192             :                  pg_locale_t locale)
     193             : {
     194      422604 :     locale_t    loc = locale->info.lt;
     195             :     size_t      result_size;
     196             :     wchar_t    *workspace;
     197             :     char       *result;
     198             :     size_t      curr_char;
     199             :     size_t      max_size;
     200             : 
     201      422604 :     if (srclen < 0)
     202           0 :         srclen = strlen(src);
     203             : 
     204             :     /* Overflow paranoia */
     205      422604 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     206           0 :         ereport(ERROR,
     207             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     208             :                  errmsg("out of memory")));
     209             : 
     210             :     /* Output workspace cannot have more codes than input bytes */
     211      422604 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     212             : 
     213      422604 :     char2wchar(workspace, srclen + 1, src, srclen, locale);
     214             : 
     215     3636600 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     216     3213996 :         workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     217             : 
     218             :     /*
     219             :      * Make result large enough; case change might change number of bytes
     220             :      */
     221      422604 :     max_size = curr_char * pg_database_encoding_max_length();
     222      422604 :     result = palloc(max_size + 1);
     223             : 
     224      422604 :     result_size = wchar2char(result, workspace, max_size + 1, locale);
     225             : 
     226      422604 :     if (result_size + 1 > destsize)
     227           0 :         return result_size;
     228             : 
     229      422604 :     memcpy(dest, result, result_size);
     230      422604 :     dest[result_size] = '\0';
     231             : 
     232      422604 :     pfree(workspace);
     233      422604 :     pfree(result);
     234             : 
     235      422604 :     return result_size;
     236             : }
     237             : 
     238             : static size_t
     239           0 : strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     240             :                  pg_locale_t locale)
     241             : {
     242           0 :     if (srclen < 0)
     243           0 :         srclen = strlen(src);
     244             : 
     245           0 :     if (srclen + 1 <= destsize)
     246             :     {
     247           0 :         locale_t    loc = locale->info.lt;
     248           0 :         int         wasalnum = false;
     249             :         char       *p;
     250             : 
     251           0 :         memcpy(dest, src, srclen);
     252           0 :         dest[srclen] = '\0';
     253             : 
     254             :         /*
     255             :          * Note: we assume that toupper_l()/tolower_l() will not be so broken
     256             :          * as to need guard tests.  When using the default collation, we apply
     257             :          * the traditional Postgres behavior that forces ASCII-style treatment
     258             :          * of I/i, but in non-default collations you get exactly what the
     259             :          * collation says.
     260             :          */
     261           0 :         for (p = dest; *p; p++)
     262             :         {
     263           0 :             if (locale->is_default)
     264             :             {
     265           0 :                 if (wasalnum)
     266           0 :                     *p = pg_tolower((unsigned char) *p);
     267             :                 else
     268           0 :                     *p = pg_toupper((unsigned char) *p);
     269             :             }
     270             :             else
     271             :             {
     272           0 :                 if (wasalnum)
     273           0 :                     *p = tolower_l((unsigned char) *p, loc);
     274             :                 else
     275           0 :                     *p = toupper_l((unsigned char) *p, loc);
     276             :             }
     277           0 :             wasalnum = isalnum_l((unsigned char) *p, loc);
     278             :         }
     279             :     }
     280             : 
     281           0 :     return srclen;
     282             : }
     283             : 
     284             : static size_t
     285           8 : strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     286             :                  pg_locale_t locale)
     287             : {
     288           8 :     locale_t    loc = locale->info.lt;
     289           8 :     int         wasalnum = false;
     290             :     size_t      result_size;
     291             :     wchar_t    *workspace;
     292             :     char       *result;
     293             :     size_t      curr_char;
     294             :     size_t      max_size;
     295             : 
     296           8 :     if (srclen < 0)
     297           0 :         srclen = strlen(src);
     298             : 
     299             :     /* Overflow paranoia */
     300           8 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     301           0 :         ereport(ERROR,
     302             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     303             :                  errmsg("out of memory")));
     304             : 
     305             :     /* Output workspace cannot have more codes than input bytes */
     306           8 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     307             : 
     308           8 :     char2wchar(workspace, srclen + 1, src, srclen, locale);
     309             : 
     310          80 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     311             :     {
     312          72 :         if (wasalnum)
     313          56 :             workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     314             :         else
     315          16 :             workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     316          72 :         wasalnum = iswalnum_l(workspace[curr_char], loc);
     317             :     }
     318             : 
     319             :     /*
     320             :      * Make result large enough; case change might change number of bytes
     321             :      */
     322           8 :     max_size = curr_char * pg_database_encoding_max_length();
     323           8 :     result = palloc(max_size + 1);
     324             : 
     325           8 :     result_size = wchar2char(result, workspace, max_size + 1, locale);
     326             : 
     327           8 :     if (result_size + 1 > destsize)
     328           0 :         return result_size;
     329             : 
     330           8 :     memcpy(dest, result, result_size);
     331           8 :     dest[result_size] = '\0';
     332             : 
     333           8 :     pfree(workspace);
     334           8 :     pfree(result);
     335             : 
     336           8 :     return result_size;
     337             : }
     338             : 
     339             : static size_t
     340           0 : strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     341             :                  pg_locale_t locale)
     342             : {
     343           0 :     if (srclen < 0)
     344           0 :         srclen = strlen(src);
     345             : 
     346           0 :     if (srclen + 1 <= destsize)
     347             :     {
     348           0 :         locale_t    loc = locale->info.lt;
     349             :         char       *p;
     350             : 
     351           0 :         memcpy(dest, src, srclen);
     352           0 :         dest[srclen] = '\0';
     353             : 
     354             :         /*
     355             :          * Note: we assume that toupper_l() will not be so broken as to need
     356             :          * an islower_l() guard test.  When using the default collation, we
     357             :          * apply the traditional Postgres behavior that forces ASCII-style
     358             :          * treatment of I/i, but in non-default collations you get exactly
     359             :          * what the collation says.
     360             :          */
     361           0 :         for (p = dest; *p; p++)
     362             :         {
     363           0 :             if (locale->is_default)
     364           0 :                 *p = pg_toupper((unsigned char) *p);
     365             :             else
     366           0 :                 *p = toupper_l((unsigned char) *p, loc);
     367             :         }
     368             :     }
     369             : 
     370           0 :     return srclen;
     371             : }
     372             : 
     373             : static size_t
     374      717210 : strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     375             :                  pg_locale_t locale)
     376             : {
     377      717210 :     locale_t    loc = locale->info.lt;
     378             :     size_t      result_size;
     379             :     wchar_t    *workspace;
     380             :     char       *result;
     381             :     size_t      curr_char;
     382             :     size_t      max_size;
     383             : 
     384      717210 :     if (srclen < 0)
     385           0 :         srclen = strlen(src);
     386             : 
     387             :     /* Overflow paranoia */
     388      717210 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     389           0 :         ereport(ERROR,
     390             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     391             :                  errmsg("out of memory")));
     392             : 
     393             :     /* Output workspace cannot have more codes than input bytes */
     394      717210 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     395             : 
     396      717210 :     char2wchar(workspace, srclen + 1, src, srclen, locale);
     397             : 
     398     2353118 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     399     1635908 :         workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     400             : 
     401             :     /*
     402             :      * Make result large enough; case change might change number of bytes
     403             :      */
     404      717210 :     max_size = curr_char * pg_database_encoding_max_length();
     405      717210 :     result = palloc(max_size + 1);
     406             : 
     407      717210 :     result_size = wchar2char(result, workspace, max_size + 1, locale);
     408             : 
     409      717210 :     if (result_size + 1 > destsize)
     410           0 :         return result_size;
     411             : 
     412      717210 :     memcpy(dest, result, result_size);
     413      717210 :     dest[result_size] = '\0';
     414             : 
     415      717210 :     pfree(workspace);
     416      717210 :     pfree(result);
     417             : 
     418      717210 :     return result_size;
     419             : }
     420             : 
     421             : pg_locale_t
     422       29360 : create_pg_locale_libc(Oid collid, MemoryContext context)
     423             : {
     424             :     const char *collate;
     425             :     const char *ctype;
     426             :     locale_t    loc;
     427             :     pg_locale_t result;
     428             : 
     429       29360 :     if (collid == DEFAULT_COLLATION_OID)
     430             :     {
     431             :         HeapTuple   tp;
     432             :         Datum       datum;
     433             : 
     434       26052 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     435       26052 :         if (!HeapTupleIsValid(tp))
     436           0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     437       26052 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     438             :                                        Anum_pg_database_datcollate);
     439       26052 :         collate = TextDatumGetCString(datum);
     440       26052 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     441             :                                        Anum_pg_database_datctype);
     442       26052 :         ctype = TextDatumGetCString(datum);
     443             : 
     444       26052 :         ReleaseSysCache(tp);
     445             :     }
     446             :     else
     447             :     {
     448             :         HeapTuple   tp;
     449             :         Datum       datum;
     450             : 
     451        3308 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     452        3308 :         if (!HeapTupleIsValid(tp))
     453           0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     454             : 
     455        3308 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     456             :                                        Anum_pg_collation_collcollate);
     457        3308 :         collate = TextDatumGetCString(datum);
     458        3308 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     459             :                                        Anum_pg_collation_collctype);
     460        3308 :         ctype = TextDatumGetCString(datum);
     461             : 
     462        3308 :         ReleaseSysCache(tp);
     463             :     }
     464             : 
     465             : 
     466       29360 :     loc = make_libc_collator(collate, ctype);
     467             : 
     468       29360 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     469       29360 :     result->provider = COLLPROVIDER_LIBC;
     470       29360 :     result->deterministic = true;
     471       54386 :     result->collate_is_c = (strcmp(collate, "C") == 0) ||
     472       25026 :         (strcmp(collate, "POSIX") == 0);
     473       54386 :     result->ctype_is_c = (strcmp(ctype, "C") == 0) ||
     474       25026 :         (strcmp(ctype, "POSIX") == 0);
     475       29360 :     result->info.lt = loc;
     476       29360 :     if (!result->collate_is_c)
     477             :     {
     478             : #ifdef WIN32
     479             :         if (GetDatabaseEncoding() == PG_UTF8)
     480             :             result->collate = &collate_methods_libc_win32_utf8;
     481             :         else
     482             : #endif
     483       24962 :             result->collate = &collate_methods_libc;
     484             :     }
     485             : 
     486       29360 :     return result;
     487             : }
     488             : 
     489             : /*
     490             :  * Create a locale_t with the given collation and ctype.
     491             :  *
     492             :  * The "C" and "POSIX" locales are not actually handled by libc, so return
     493             :  * NULL.
     494             :  *
     495             :  * Ensure that no path leaks a locale_t.
     496             :  */
     497             : static locale_t
     498       29360 : make_libc_collator(const char *collate, const char *ctype)
     499             : {
     500       29360 :     locale_t    loc = 0;
     501             : 
     502       29360 :     if (strcmp(collate, ctype) == 0)
     503             :     {
     504       29360 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     505             :         {
     506             :             /* Normal case where they're the same */
     507       24962 :             errno = 0;
     508             : #ifndef WIN32
     509       24962 :             loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
     510             :                             NULL);
     511             : #else
     512             :             loc = _create_locale(LC_ALL, collate);
     513             : #endif
     514       24962 :             if (!loc)
     515           0 :                 report_newlocale_failure(collate);
     516             :         }
     517             :     }
     518             :     else
     519             :     {
     520             : #ifndef WIN32
     521             :         /* We need two newlocale() steps */
     522           0 :         locale_t    loc1 = 0;
     523             : 
     524           0 :         if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
     525             :         {
     526           0 :             errno = 0;
     527           0 :             loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
     528           0 :             if (!loc1)
     529           0 :                 report_newlocale_failure(collate);
     530             :         }
     531             : 
     532           0 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     533             :         {
     534           0 :             errno = 0;
     535           0 :             loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
     536           0 :             if (!loc)
     537             :             {
     538           0 :                 if (loc1)
     539           0 :                     freelocale(loc1);
     540           0 :                 report_newlocale_failure(ctype);
     541             :             }
     542             :         }
     543             :         else
     544           0 :             loc = loc1;
     545             : #else
     546             : 
     547             :         /*
     548             :          * XXX The _create_locale() API doesn't appear to support this. Could
     549             :          * perhaps be worked around by changing pg_locale_t to contain two
     550             :          * separate fields.
     551             :          */
     552             :         ereport(ERROR,
     553             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     554             :                  errmsg("collations with different collate and ctype values are not supported on this platform")));
     555             : #endif
     556             :     }
     557             : 
     558       29360 :     return loc;
     559             : }
     560             : 
     561             : /*
     562             :  * strncoll_libc
     563             :  *
     564             :  * NUL-terminate arguments, if necessary, and pass to strcoll_l().
     565             :  *
     566             :  * An input string length of -1 means that it's already NUL-terminated.
     567             :  */
     568             : int
     569    26028704 : strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
     570             :               pg_locale_t locale)
     571             : {
     572             :     char        sbuf[TEXTBUFLEN];
     573    26028704 :     char       *buf = sbuf;
     574    26028704 :     size_t      bufsize1 = (len1 == -1) ? 0 : len1 + 1;
     575    26028704 :     size_t      bufsize2 = (len2 == -1) ? 0 : len2 + 1;
     576             :     const char *arg1n;
     577             :     const char *arg2n;
     578             :     int         result;
     579             : 
     580             :     Assert(locale->provider == COLLPROVIDER_LIBC);
     581             : 
     582    26028704 :     if (bufsize1 + bufsize2 > TEXTBUFLEN)
     583         360 :         buf = palloc(bufsize1 + bufsize2);
     584             : 
     585             :     /* nul-terminate arguments if necessary */
     586    26028704 :     if (len1 == -1)
     587             :     {
     588    23977224 :         arg1n = arg1;
     589             :     }
     590             :     else
     591             :     {
     592     2051480 :         char       *buf1 = buf;
     593             : 
     594     2051480 :         memcpy(buf1, arg1, len1);
     595     2051480 :         buf1[len1] = '\0';
     596     2051480 :         arg1n = buf1;
     597             :     }
     598             : 
     599    26028704 :     if (len2 == -1)
     600             :     {
     601    23977224 :         arg2n = arg2;
     602             :     }
     603             :     else
     604             :     {
     605     2051480 :         char       *buf2 = buf + bufsize1;
     606             : 
     607     2051480 :         memcpy(buf2, arg2, len2);
     608     2051480 :         buf2[len2] = '\0';
     609     2051480 :         arg2n = buf2;
     610             :     }
     611             : 
     612    26028704 :     result = strcoll_l(arg1n, arg2n, locale->info.lt);
     613             : 
     614    26028704 :     if (buf != sbuf)
     615         360 :         pfree(buf);
     616             : 
     617    26028704 :     return result;
     618             : }
     619             : 
     620             : /*
     621             :  * strnxfrm_libc
     622             :  *
     623             :  * NUL-terminate src, if necessary, and pass to strxfrm_l().
     624             :  *
     625             :  * A source length of -1 means that it's already NUL-terminated.
     626             :  */
     627             : size_t
     628         144 : strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
     629             :               pg_locale_t locale)
     630             : {
     631             :     char        sbuf[TEXTBUFLEN];
     632         144 :     char       *buf = sbuf;
     633         144 :     size_t      bufsize = srclen + 1;
     634             :     size_t      result;
     635             : 
     636             :     Assert(locale->provider == COLLPROVIDER_LIBC);
     637             : 
     638         144 :     if (srclen == -1)
     639         144 :         return strxfrm_l(dest, src, destsize, locale->info.lt);
     640             : 
     641           0 :     if (bufsize > TEXTBUFLEN)
     642           0 :         buf = palloc(bufsize);
     643             : 
     644             :     /* nul-terminate argument */
     645           0 :     memcpy(buf, src, srclen);
     646           0 :     buf[srclen] = '\0';
     647             : 
     648           0 :     result = strxfrm_l(dest, buf, destsize, locale->info.lt);
     649             : 
     650           0 :     if (buf != sbuf)
     651           0 :         pfree(buf);
     652             : 
     653             :     /* if dest is defined, it should be nul-terminated */
     654             :     Assert(result >= destsize || dest[result] == '\0');
     655             : 
     656           0 :     return result;
     657             : }
     658             : 
     659             : char *
     660       25400 : get_collation_actual_version_libc(const char *collcollate)
     661             : {
     662       25400 :     char       *collversion = NULL;
     663             : 
     664       50620 :     if (pg_strcasecmp("C", collcollate) != 0 &&
     665       50268 :         pg_strncasecmp("C.", collcollate, 2) != 0 &&
     666       25048 :         pg_strcasecmp("POSIX", collcollate) != 0)
     667             :     {
     668             : #if defined(__GLIBC__)
     669             :         /* Use the glibc version because we don't have anything better. */
     670       25022 :         collversion = pstrdup(gnu_get_libc_version());
     671             : #elif defined(LC_VERSION_MASK)
     672             :         locale_t    loc;
     673             : 
     674             :         /* Look up FreeBSD collation version. */
     675             :         loc = newlocale(LC_COLLATE_MASK, collcollate, NULL);
     676             :         if (loc)
     677             :         {
     678             :             collversion =
     679             :                 pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
     680             :             freelocale(loc);
     681             :         }
     682             :         else
     683             :             ereport(ERROR,
     684             :                     (errmsg("could not load locale \"%s\"", collcollate)));
     685             : #elif defined(WIN32)
     686             :         /*
     687             :          * If we are targeting Windows Vista and above, we can ask for a name
     688             :          * given a collation name (earlier versions required a location code
     689             :          * that we don't have).
     690             :          */
     691             :         NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
     692             :         WCHAR       wide_collcollate[LOCALE_NAME_MAX_LENGTH];
     693             : 
     694             :         MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
     695             :                             LOCALE_NAME_MAX_LENGTH);
     696             :         if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
     697             :         {
     698             :             /*
     699             :              * GetNLSVersionEx() wants a language tag such as "en-US", not a
     700             :              * locale name like "English_United States.1252".  Until those
     701             :              * values can be prevented from entering the system, or 100%
     702             :              * reliably converted to the more useful tag format, tolerate the
     703             :              * resulting error and report that we have no version data.
     704             :              */
     705             :             if (GetLastError() == ERROR_INVALID_PARAMETER)
     706             :                 return NULL;
     707             : 
     708             :             ereport(ERROR,
     709             :                     (errmsg("could not get collation version for locale \"%s\": error code %lu",
     710             :                             collcollate,
     711             :                             GetLastError())));
     712             :         }
     713             :         collversion = psprintf("%lu.%lu,%lu.%lu",
     714             :                                (version.dwNLSVersion >> 8) & 0xFFFF,
     715             :                                version.dwNLSVersion & 0xFF,
     716             :                                (version.dwDefinedVersion >> 8) & 0xFFFF,
     717             :                                version.dwDefinedVersion & 0xFF);
     718             : #endif
     719             :     }
     720             : 
     721       25400 :     return collversion;
     722             : }
     723             : 
     724             : /*
     725             :  * strncoll_libc_win32_utf8
     726             :  *
     727             :  * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
     728             :  * invoke wcscoll_l().
     729             :  *
     730             :  * An input string length of -1 means that it's NUL-terminated.
     731             :  */
     732             : #ifdef WIN32
     733             : static int
     734             : strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
     735             :                          ssize_t len2, pg_locale_t locale)
     736             : {
     737             :     char        sbuf[TEXTBUFLEN];
     738             :     char       *buf = sbuf;
     739             :     char       *a1p,
     740             :                *a2p;
     741             :     int         a1len;
     742             :     int         a2len;
     743             :     int         r;
     744             :     int         result;
     745             : 
     746             :     Assert(locale->provider == COLLPROVIDER_LIBC);
     747             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     748             : 
     749             :     if (len1 == -1)
     750             :         len1 = strlen(arg1);
     751             :     if (len2 == -1)
     752             :         len2 = strlen(arg2);
     753             : 
     754             :     a1len = len1 * 2 + 2;
     755             :     a2len = len2 * 2 + 2;
     756             : 
     757             :     if (a1len + a2len > TEXTBUFLEN)
     758             :         buf = palloc(a1len + a2len);
     759             : 
     760             :     a1p = buf;
     761             :     a2p = buf + a1len;
     762             : 
     763             :     /* API does not work for zero-length input */
     764             :     if (len1 == 0)
     765             :         r = 0;
     766             :     else
     767             :     {
     768             :         r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
     769             :                                 (LPWSTR) a1p, a1len / 2);
     770             :         if (!r)
     771             :             ereport(ERROR,
     772             :                     (errmsg("could not convert string to UTF-16: error code %lu",
     773             :                             GetLastError())));
     774             :     }
     775             :     ((LPWSTR) a1p)[r] = 0;
     776             : 
     777             :     if (len2 == 0)
     778             :         r = 0;
     779             :     else
     780             :     {
     781             :         r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
     782             :                                 (LPWSTR) a2p, a2len / 2);
     783             :         if (!r)
     784             :             ereport(ERROR,
     785             :                     (errmsg("could not convert string to UTF-16: error code %lu",
     786             :                             GetLastError())));
     787             :     }
     788             :     ((LPWSTR) a2p)[r] = 0;
     789             : 
     790             :     errno = 0;
     791             :     result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
     792             :     if (result == 2147483647)   /* _NLSCMPERROR; missing from mingw headers */
     793             :         ereport(ERROR,
     794             :                 (errmsg("could not compare Unicode strings: %m")));
     795             : 
     796             :     if (buf != sbuf)
     797             :         pfree(buf);
     798             : 
     799             :     return result;
     800             : }
     801             : #endif                          /* WIN32 */
     802             : 
     803             : /* simple subroutine for reporting errors from newlocale() */
     804             : static void
     805           0 : report_newlocale_failure(const char *localename)
     806             : {
     807             :     int         save_errno;
     808             : 
     809             :     /*
     810             :      * Windows doesn't provide any useful error indication from
     811             :      * _create_locale(), and BSD-derived platforms don't seem to feel they
     812             :      * need to set errno either (even though POSIX is pretty clear that
     813             :      * newlocale should do so).  So, if errno hasn't been set, assume ENOENT
     814             :      * is what to report.
     815             :      */
     816           0 :     if (errno == 0)
     817           0 :         errno = ENOENT;
     818             : 
     819             :     /*
     820             :      * ENOENT means "no such locale", not "no such file", so clarify that
     821             :      * errno with an errdetail message.
     822             :      */
     823           0 :     save_errno = errno;         /* auxiliary funcs might change errno */
     824           0 :     ereport(ERROR,
     825             :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     826             :              errmsg("could not create locale \"%s\": %m",
     827             :                     localename),
     828             :              (save_errno == ENOENT ?
     829             :               errdetail("The operating system could not find any locale data for the locale name \"%s\".",
     830             :                         localename) : 0)));
     831             : }
     832             : 
     833             : /*
     834             :  * POSIX doesn't define _l-variants of these functions, but several systems
     835             :  * have them.  We provide our own replacements here.
     836             :  */
     837             : #ifndef HAVE_MBSTOWCS_L
     838             : static size_t
     839     1139822 : mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
     840             : {
     841             : #ifdef WIN32
     842             :     return _mbstowcs_l(dest, src, n, loc);
     843             : #else
     844             :     size_t      result;
     845     1139822 :     locale_t    save_locale = uselocale(loc);
     846             : 
     847     1139822 :     result = mbstowcs(dest, src, n);
     848     1139822 :     uselocale(save_locale);
     849     1139822 :     return result;
     850             : #endif
     851             : }
     852             : #endif
     853             : #ifndef HAVE_WCSTOMBS_L
     854             : static size_t
     855     1139822 : wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
     856             : {
     857             : #ifdef WIN32
     858             :     return _wcstombs_l(dest, src, n, loc);
     859             : #else
     860             :     size_t      result;
     861     1139822 :     locale_t    save_locale = uselocale(loc);
     862             : 
     863     1139822 :     result = wcstombs(dest, src, n);
     864     1139822 :     uselocale(save_locale);
     865     1139822 :     return result;
     866             : #endif
     867             : }
     868             : #endif
     869             : 
     870             : /*
     871             :  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
     872             :  * Therefore we keep them here rather than with the mbutils code.
     873             :  */
     874             : 
     875             : /*
     876             :  * wchar2char --- convert wide characters to multibyte format
     877             :  *
     878             :  * This has the same API as the standard wcstombs_l() function; in particular,
     879             :  * tolen is the maximum number of bytes to store at *to, and *from must be
     880             :  * zero-terminated.  The output will be zero-terminated iff there is room.
     881             :  */
     882             : size_t
     883     1139822 : wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
     884             : {
     885             :     size_t      result;
     886             : 
     887     1139822 :     if (tolen == 0)
     888           0 :         return 0;
     889             : 
     890             : #ifdef WIN32
     891             : 
     892             :     /*
     893             :      * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
     894             :      * for some reason mbstowcs and wcstombs won't do this for us, so we use
     895             :      * MultiByteToWideChar().
     896             :      */
     897             :     if (GetDatabaseEncoding() == PG_UTF8)
     898             :     {
     899             :         result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
     900             :                                      NULL, NULL);
     901             :         /* A zero return is failure */
     902             :         if (result <= 0)
     903             :             result = -1;
     904             :         else
     905             :         {
     906             :             Assert(result <= tolen);
     907             :             /* Microsoft counts the zero terminator in the result */
     908             :             result--;
     909             :         }
     910             :     }
     911             :     else
     912             : #endif                          /* WIN32 */
     913     1139822 :     if (locale == (pg_locale_t) 0)
     914             :     {
     915             :         /* Use wcstombs directly for the default locale */
     916           0 :         result = wcstombs(to, from, tolen);
     917             :     }
     918             :     else
     919             :     {
     920             :         /* Use wcstombs_l for nondefault locales */
     921     1139822 :         result = wcstombs_l(to, from, tolen, locale->info.lt);
     922             :     }
     923             : 
     924     1139822 :     return result;
     925             : }
     926             : 
     927             : /*
     928             :  * char2wchar --- convert multibyte characters to wide characters
     929             :  *
     930             :  * This has almost the API of mbstowcs_l(), except that *from need not be
     931             :  * null-terminated; instead, the number of input bytes is specified as
     932             :  * fromlen.  Also, we ereport() rather than returning -1 for invalid
     933             :  * input encoding.  tolen is the maximum number of wchar_t's to store at *to.
     934             :  * The output will be zero-terminated iff there is room.
     935             :  */
     936             : size_t
     937     1142994 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
     938             :            pg_locale_t locale)
     939             : {
     940             :     size_t      result;
     941             : 
     942     1142994 :     if (tolen == 0)
     943           0 :         return 0;
     944             : 
     945             : #ifdef WIN32
     946             :     /* See WIN32 "Unicode" comment above */
     947             :     if (GetDatabaseEncoding() == PG_UTF8)
     948             :     {
     949             :         /* Win32 API does not work for zero-length input */
     950             :         if (fromlen == 0)
     951             :             result = 0;
     952             :         else
     953             :         {
     954             :             result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
     955             :             /* A zero return is failure */
     956             :             if (result == 0)
     957             :                 result = -1;
     958             :         }
     959             : 
     960             :         if (result != -1)
     961             :         {
     962             :             Assert(result < tolen);
     963             :             /* Append trailing null wchar (MultiByteToWideChar() does not) */
     964             :             to[result] = 0;
     965             :         }
     966             :     }
     967             :     else
     968             : #endif                          /* WIN32 */
     969             :     {
     970             :         /* mbstowcs requires ending '\0' */
     971     1142994 :         char       *str = pnstrdup(from, fromlen);
     972             : 
     973     1142994 :         if (locale == (pg_locale_t) 0)
     974             :         {
     975             :             /* Use mbstowcs directly for the default locale */
     976        3172 :             result = mbstowcs(to, str, tolen);
     977             :         }
     978             :         else
     979             :         {
     980             :             /* Use mbstowcs_l for nondefault locales */
     981     1139822 :             result = mbstowcs_l(to, str, tolen, locale->info.lt);
     982             :         }
     983             : 
     984     1142994 :         pfree(str);
     985             :     }
     986             : 
     987     1142994 :     if (result == -1)
     988             :     {
     989             :         /*
     990             :          * Invalid multibyte character encountered.  We try to give a useful
     991             :          * error message by letting pg_verifymbstr check the string.  But it's
     992             :          * possible that the string is OK to us, and not OK to mbstowcs ---
     993             :          * this suggests that the LC_CTYPE locale is different from the
     994             :          * database encoding.  Give a generic error message if pg_verifymbstr
     995             :          * can't find anything wrong.
     996             :          */
     997           0 :         pg_verifymbstr(from, fromlen, false);   /* might not return */
     998             :         /* but if it does ... */
     999           0 :         ereport(ERROR,
    1000             :                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
    1001             :                  errmsg("invalid multibyte character for locale"),
    1002             :                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
    1003             :     }
    1004             : 
    1005     1142994 :     return result;
    1006             : }

Generated by: LCOV version 1.14