LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_libc.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 158 249 63.5 %
Date: 2025-04-01 15:15:16 Functions: 15 19 78.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-----------------------------------------------------------------------
       2             :  *
       3             :  * PostgreSQL locale utilities for libc
       4             :  *
       5             :  * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
       6             :  *
       7             :  * src/backend/utils/adt/pg_locale_libc.c
       8             :  *
       9             :  *-----------------------------------------------------------------------
      10             :  */
      11             : 
      12             : #include "postgres.h"
      13             : 
      14             : #include <limits.h>
      15             : #include <wctype.h>
      16             : 
      17             : #include "access/htup_details.h"
      18             : #include "catalog/pg_database.h"
      19             : #include "catalog/pg_collation.h"
      20             : #include "mb/pg_wchar.h"
      21             : #include "miscadmin.h"
      22             : #include "utils/builtins.h"
      23             : #include "utils/formatting.h"
      24             : #include "utils/memutils.h"
      25             : #include "utils/pg_locale.h"
      26             : #include "utils/syscache.h"
      27             : 
      28             : #ifdef __GLIBC__
      29             : #include <gnu/libc-version.h>
      30             : #endif
      31             : 
      32             : #ifdef WIN32
      33             : #include <shlwapi.h>
      34             : #endif
      35             : 
      36             : /*
      37             :  * Size of stack buffer to use for string transformations, used to avoid heap
      38             :  * allocations in typical cases. This should be large enough that most strings
      39             :  * will fit, but small enough that we feel comfortable putting it on the
      40             :  * stack.
      41             :  */
      42             : #define     TEXTBUFLEN          1024
      43             : 
      44             : extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
      45             : 
      46             : extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
      47             :                             ssize_t srclen, pg_locale_t locale);
      48             : extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
      49             :                             ssize_t srclen, pg_locale_t locale);
      50             : extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
      51             :                             ssize_t srclen, pg_locale_t locale);
      52             : 
      53             : static int  strncoll_libc(const char *arg1, ssize_t len1,
      54             :                           const char *arg2, ssize_t len2,
      55             :                           pg_locale_t locale);
      56             : static size_t strnxfrm_libc(char *dest, size_t destsize,
      57             :                             const char *src, ssize_t srclen,
      58             :                             pg_locale_t locale);
      59             : extern char *get_collation_actual_version_libc(const char *collcollate);
      60             : static locale_t make_libc_collator(const char *collate,
      61             :                                    const char *ctype);
      62             : 
      63             : #ifdef WIN32
      64             : static int  strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
      65             :                                      const char *arg2, ssize_t len2,
      66             :                                      pg_locale_t locale);
      67             : #endif
      68             : 
      69             : static size_t strlower_libc_sb(char *dest, size_t destsize,
      70             :                                const char *src, ssize_t srclen,
      71             :                                pg_locale_t locale);
      72             : static size_t strlower_libc_mb(char *dest, size_t destsize,
      73             :                                const char *src, ssize_t srclen,
      74             :                                pg_locale_t locale);
      75             : static size_t strtitle_libc_sb(char *dest, size_t destsize,
      76             :                                const char *src, ssize_t srclen,
      77             :                                pg_locale_t locale);
      78             : static size_t strtitle_libc_mb(char *dest, size_t destsize,
      79             :                                const char *src, ssize_t srclen,
      80             :                                pg_locale_t locale);
      81             : static size_t strupper_libc_sb(char *dest, size_t destsize,
      82             :                                const char *src, ssize_t srclen,
      83             :                                pg_locale_t locale);
      84             : static size_t strupper_libc_mb(char *dest, size_t destsize,
      85             :                                const char *src, ssize_t srclen,
      86             :                                pg_locale_t locale);
      87             : 
      88             : static const struct collate_methods collate_methods_libc = {
      89             :     .strncoll = strncoll_libc,
      90             :     .strnxfrm = strnxfrm_libc,
      91             :     .strnxfrm_prefix = NULL,
      92             : 
      93             :     /*
      94             :      * Unfortunately, it seems that strxfrm() for non-C collations is broken
      95             :      * on many common platforms; testing of multiple versions of glibc reveals
      96             :      * that, for many locales, strcoll() and strxfrm() do not return
      97             :      * consistent results. While no other libc other than Cygwin has so far
      98             :      * been shown to have a problem, we take the conservative course of action
      99             :      * for right now and disable this categorically.  (Users who are certain
     100             :      * this isn't a problem on their system can define TRUST_STRXFRM.)
     101             :      */
     102             : #ifdef TRUST_STRXFRM
     103             :     .strxfrm_is_safe = true,
     104             : #else
     105             :     .strxfrm_is_safe = false,
     106             : #endif
     107             : };
     108             : 
     109             : #ifdef WIN32
     110             : static const struct collate_methods collate_methods_libc_win32_utf8 = {
     111             :     .strncoll = strncoll_libc_win32_utf8,
     112             :     .strnxfrm = strnxfrm_libc,
     113             :     .strnxfrm_prefix = NULL,
     114             : #ifdef TRUST_STRXFRM
     115             :     .strxfrm_is_safe = true,
     116             : #else
     117             :     .strxfrm_is_safe = false,
     118             : #endif
     119             : };
     120             : #endif
     121             : 
     122             : size_t
     123      422860 : strlower_libc(char *dst, size_t dstsize, const char *src,
     124             :               ssize_t srclen, pg_locale_t locale)
     125             : {
     126      422860 :     if (pg_database_encoding_max_length() > 1)
     127      422860 :         return strlower_libc_mb(dst, dstsize, src, srclen, locale);
     128             :     else
     129           0 :         return strlower_libc_sb(dst, dstsize, src, srclen, locale);
     130             : }
     131             : 
     132             : size_t
     133           8 : strtitle_libc(char *dst, size_t dstsize, const char *src,
     134             :               ssize_t srclen, pg_locale_t locale)
     135             : {
     136           8 :     if (pg_database_encoding_max_length() > 1)
     137           8 :         return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
     138             :     else
     139           0 :         return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
     140             : }
     141             : 
     142             : size_t
     143      717164 : strupper_libc(char *dst, size_t dstsize, const char *src,
     144             :               ssize_t srclen, pg_locale_t locale)
     145             : {
     146      717164 :     if (pg_database_encoding_max_length() > 1)
     147      717164 :         return strupper_libc_mb(dst, dstsize, src, srclen, locale);
     148             :     else
     149           0 :         return strupper_libc_sb(dst, dstsize, src, srclen, locale);
     150             : }
     151             : 
     152             : static size_t
     153           0 : strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     154             :                  pg_locale_t locale)
     155             : {
     156           0 :     if (srclen < 0)
     157           0 :         srclen = strlen(src);
     158             : 
     159           0 :     if (srclen + 1 <= destsize)
     160             :     {
     161           0 :         locale_t    loc = locale->info.lt;
     162             :         char       *p;
     163             : 
     164           0 :         if (srclen + 1 > destsize)
     165           0 :             return srclen;
     166             : 
     167           0 :         memcpy(dest, src, srclen);
     168           0 :         dest[srclen] = '\0';
     169             : 
     170             :         /*
     171             :          * Note: we assume that tolower_l() will not be so broken as to need
     172             :          * an isupper_l() guard test.  When using the default collation, we
     173             :          * apply the traditional Postgres behavior that forces ASCII-style
     174             :          * treatment of I/i, but in non-default collations you get exactly
     175             :          * what the collation says.
     176             :          */
     177           0 :         for (p = dest; *p; p++)
     178             :         {
     179           0 :             if (locale->is_default)
     180           0 :                 *p = pg_tolower((unsigned char) *p);
     181             :             else
     182           0 :                 *p = tolower_l((unsigned char) *p, loc);
     183             :         }
     184             :     }
     185             : 
     186           0 :     return srclen;
     187             : }
     188             : 
     189             : static size_t
     190      422860 : strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     191             :                  pg_locale_t locale)
     192             : {
     193      422860 :     locale_t    loc = locale->info.lt;
     194             :     size_t      result_size;
     195             :     wchar_t    *workspace;
     196             :     char       *result;
     197             :     size_t      curr_char;
     198             :     size_t      max_size;
     199             : 
     200      422860 :     if (srclen < 0)
     201           0 :         srclen = strlen(src);
     202             : 
     203             :     /* Overflow paranoia */
     204      422860 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     205           0 :         ereport(ERROR,
     206             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     207             :                  errmsg("out of memory")));
     208             : 
     209             :     /* Output workspace cannot have more codes than input bytes */
     210      422860 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     211             : 
     212      422860 :     char2wchar(workspace, srclen + 1, src, srclen, locale);
     213             : 
     214     3642632 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     215     3219772 :         workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     216             : 
     217             :     /*
     218             :      * Make result large enough; case change might change number of bytes
     219             :      */
     220      422860 :     max_size = curr_char * pg_database_encoding_max_length();
     221      422860 :     result = palloc(max_size + 1);
     222             : 
     223      422860 :     result_size = wchar2char(result, workspace, max_size + 1, locale);
     224             : 
     225      422860 :     if (result_size + 1 > destsize)
     226           0 :         return result_size;
     227             : 
     228      422860 :     memcpy(dest, result, result_size);
     229      422860 :     dest[result_size] = '\0';
     230             : 
     231      422860 :     pfree(workspace);
     232      422860 :     pfree(result);
     233             : 
     234      422860 :     return result_size;
     235             : }
     236             : 
     237             : static size_t
     238           0 : strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     239             :                  pg_locale_t locale)
     240             : {
     241           0 :     if (srclen < 0)
     242           0 :         srclen = strlen(src);
     243             : 
     244           0 :     if (srclen + 1 <= destsize)
     245             :     {
     246           0 :         locale_t    loc = locale->info.lt;
     247           0 :         int         wasalnum = false;
     248             :         char       *p;
     249             : 
     250           0 :         memcpy(dest, src, srclen);
     251           0 :         dest[srclen] = '\0';
     252             : 
     253             :         /*
     254             :          * Note: we assume that toupper_l()/tolower_l() will not be so broken
     255             :          * as to need guard tests.  When using the default collation, we apply
     256             :          * the traditional Postgres behavior that forces ASCII-style treatment
     257             :          * of I/i, but in non-default collations you get exactly what the
     258             :          * collation says.
     259             :          */
     260           0 :         for (p = dest; *p; p++)
     261             :         {
     262           0 :             if (locale->is_default)
     263             :             {
     264           0 :                 if (wasalnum)
     265           0 :                     *p = pg_tolower((unsigned char) *p);
     266             :                 else
     267           0 :                     *p = pg_toupper((unsigned char) *p);
     268             :             }
     269             :             else
     270             :             {
     271           0 :                 if (wasalnum)
     272           0 :                     *p = tolower_l((unsigned char) *p, loc);
     273             :                 else
     274           0 :                     *p = toupper_l((unsigned char) *p, loc);
     275             :             }
     276           0 :             wasalnum = isalnum_l((unsigned char) *p, loc);
     277             :         }
     278             :     }
     279             : 
     280           0 :     return srclen;
     281             : }
     282             : 
     283             : static size_t
     284           8 : strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     285             :                  pg_locale_t locale)
     286             : {
     287           8 :     locale_t    loc = locale->info.lt;
     288           8 :     int         wasalnum = false;
     289             :     size_t      result_size;
     290             :     wchar_t    *workspace;
     291             :     char       *result;
     292             :     size_t      curr_char;
     293             :     size_t      max_size;
     294             : 
     295           8 :     if (srclen < 0)
     296           0 :         srclen = strlen(src);
     297             : 
     298             :     /* Overflow paranoia */
     299           8 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     300           0 :         ereport(ERROR,
     301             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     302             :                  errmsg("out of memory")));
     303             : 
     304             :     /* Output workspace cannot have more codes than input bytes */
     305           8 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     306             : 
     307           8 :     char2wchar(workspace, srclen + 1, src, srclen, locale);
     308             : 
     309          80 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     310             :     {
     311          72 :         if (wasalnum)
     312          56 :             workspace[curr_char] = towlower_l(workspace[curr_char], loc);
     313             :         else
     314          16 :             workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     315          72 :         wasalnum = iswalnum_l(workspace[curr_char], loc);
     316             :     }
     317             : 
     318             :     /*
     319             :      * Make result large enough; case change might change number of bytes
     320             :      */
     321           8 :     max_size = curr_char * pg_database_encoding_max_length();
     322           8 :     result = palloc(max_size + 1);
     323             : 
     324           8 :     result_size = wchar2char(result, workspace, max_size + 1, locale);
     325             : 
     326           8 :     if (result_size + 1 > destsize)
     327           0 :         return result_size;
     328             : 
     329           8 :     memcpy(dest, result, result_size);
     330           8 :     dest[result_size] = '\0';
     331             : 
     332           8 :     pfree(workspace);
     333           8 :     pfree(result);
     334             : 
     335           8 :     return result_size;
     336             : }
     337             : 
     338             : static size_t
     339           0 : strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     340             :                  pg_locale_t locale)
     341             : {
     342           0 :     if (srclen < 0)
     343           0 :         srclen = strlen(src);
     344             : 
     345           0 :     if (srclen + 1 <= destsize)
     346             :     {
     347           0 :         locale_t    loc = locale->info.lt;
     348             :         char       *p;
     349             : 
     350           0 :         memcpy(dest, src, srclen);
     351           0 :         dest[srclen] = '\0';
     352             : 
     353             :         /*
     354             :          * Note: we assume that toupper_l() will not be so broken as to need
     355             :          * an islower_l() guard test.  When using the default collation, we
     356             :          * apply the traditional Postgres behavior that forces ASCII-style
     357             :          * treatment of I/i, but in non-default collations you get exactly
     358             :          * what the collation says.
     359             :          */
     360           0 :         for (p = dest; *p; p++)
     361             :         {
     362           0 :             if (locale->is_default)
     363           0 :                 *p = pg_toupper((unsigned char) *p);
     364             :             else
     365           0 :                 *p = toupper_l((unsigned char) *p, loc);
     366             :         }
     367             :     }
     368             : 
     369           0 :     return srclen;
     370             : }
     371             : 
     372             : static size_t
     373      717164 : strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
     374             :                  pg_locale_t locale)
     375             : {
     376      717164 :     locale_t    loc = locale->info.lt;
     377             :     size_t      result_size;
     378             :     wchar_t    *workspace;
     379             :     char       *result;
     380             :     size_t      curr_char;
     381             :     size_t      max_size;
     382             : 
     383      717164 :     if (srclen < 0)
     384           0 :         srclen = strlen(src);
     385             : 
     386             :     /* Overflow paranoia */
     387      717164 :     if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
     388           0 :         ereport(ERROR,
     389             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     390             :                  errmsg("out of memory")));
     391             : 
     392             :     /* Output workspace cannot have more codes than input bytes */
     393      717164 :     workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
     394             : 
     395      717164 :     char2wchar(workspace, srclen + 1, src, srclen, locale);
     396             : 
     397     2352796 :     for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
     398     1635632 :         workspace[curr_char] = towupper_l(workspace[curr_char], loc);
     399             : 
     400             :     /*
     401             :      * Make result large enough; case change might change number of bytes
     402             :      */
     403      717164 :     max_size = curr_char * pg_database_encoding_max_length();
     404      717164 :     result = palloc(max_size + 1);
     405             : 
     406      717164 :     result_size = wchar2char(result, workspace, max_size + 1, locale);
     407             : 
     408      717164 :     if (result_size + 1 > destsize)
     409           0 :         return result_size;
     410             : 
     411      717164 :     memcpy(dest, result, result_size);
     412      717164 :     dest[result_size] = '\0';
     413             : 
     414      717164 :     pfree(workspace);
     415      717164 :     pfree(result);
     416             : 
     417      717164 :     return result_size;
     418             : }
     419             : 
     420             : pg_locale_t
     421       33890 : create_pg_locale_libc(Oid collid, MemoryContext context)
     422             : {
     423             :     const char *collate;
     424             :     const char *ctype;
     425             :     locale_t    loc;
     426             :     pg_locale_t result;
     427             : 
     428       33890 :     if (collid == DEFAULT_COLLATION_OID)
     429             :     {
     430             :         HeapTuple   tp;
     431             :         Datum       datum;
     432             : 
     433       30140 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     434       30140 :         if (!HeapTupleIsValid(tp))
     435           0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     436       30140 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     437             :                                        Anum_pg_database_datcollate);
     438       30140 :         collate = TextDatumGetCString(datum);
     439       30140 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     440             :                                        Anum_pg_database_datctype);
     441       30140 :         ctype = TextDatumGetCString(datum);
     442             : 
     443       30140 :         ReleaseSysCache(tp);
     444             :     }
     445             :     else
     446             :     {
     447             :         HeapTuple   tp;
     448             :         Datum       datum;
     449             : 
     450        3750 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     451        3750 :         if (!HeapTupleIsValid(tp))
     452           0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     453             : 
     454        3750 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     455             :                                        Anum_pg_collation_collcollate);
     456        3750 :         collate = TextDatumGetCString(datum);
     457        3750 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     458             :                                        Anum_pg_collation_collctype);
     459        3750 :         ctype = TextDatumGetCString(datum);
     460             : 
     461        3750 :         ReleaseSysCache(tp);
     462             :     }
     463             : 
     464             : 
     465       33890 :     loc = make_libc_collator(collate, ctype);
     466             : 
     467       33890 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     468       33890 :     result->provider = COLLPROVIDER_LIBC;
     469       33890 :     result->deterministic = true;
     470       62998 :     result->collate_is_c = (strcmp(collate, "C") == 0) ||
     471       29108 :         (strcmp(collate, "POSIX") == 0);
     472       62998 :     result->ctype_is_c = (strcmp(ctype, "C") == 0) ||
     473       29108 :         (strcmp(ctype, "POSIX") == 0);
     474       33890 :     result->info.lt = loc;
     475       33890 :     if (!result->collate_is_c)
     476             :     {
     477             : #ifdef WIN32
     478             :         if (GetDatabaseEncoding() == PG_UTF8)
     479             :             result->collate = &collate_methods_libc_win32_utf8;
     480             :         else
     481             : #endif
     482       29044 :             result->collate = &collate_methods_libc;
     483             :     }
     484             : 
     485       33890 :     return result;
     486             : }
     487             : 
     488             : /*
     489             :  * Create a locale_t with the given collation and ctype.
     490             :  *
     491             :  * The "C" and "POSIX" locales are not actually handled by libc, so return
     492             :  * NULL.
     493             :  *
     494             :  * Ensure that no path leaks a locale_t.
     495             :  */
     496             : static locale_t
     497       33890 : make_libc_collator(const char *collate, const char *ctype)
     498             : {
     499       33890 :     locale_t    loc = 0;
     500             : 
     501       33890 :     if (strcmp(collate, ctype) == 0)
     502             :     {
     503       33890 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     504             :         {
     505             :             /* Normal case where they're the same */
     506       29044 :             errno = 0;
     507             : #ifndef WIN32
     508       29044 :             loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
     509             :                             NULL);
     510             : #else
     511             :             loc = _create_locale(LC_ALL, collate);
     512             : #endif
     513       29044 :             if (!loc)
     514           0 :                 report_newlocale_failure(collate);
     515             :         }
     516             :     }
     517             :     else
     518             :     {
     519             : #ifndef WIN32
     520             :         /* We need two newlocale() steps */
     521           0 :         locale_t    loc1 = 0;
     522             : 
     523           0 :         if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
     524             :         {
     525           0 :             errno = 0;
     526           0 :             loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
     527           0 :             if (!loc1)
     528           0 :                 report_newlocale_failure(collate);
     529             :         }
     530             : 
     531           0 :         if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
     532             :         {
     533           0 :             errno = 0;
     534           0 :             loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
     535           0 :             if (!loc)
     536             :             {
     537           0 :                 if (loc1)
     538           0 :                     freelocale(loc1);
     539           0 :                 report_newlocale_failure(ctype);
     540             :             }
     541             :         }
     542             :         else
     543           0 :             loc = loc1;
     544             : #else
     545             : 
     546             :         /*
     547             :          * XXX The _create_locale() API doesn't appear to support this. Could
     548             :          * perhaps be worked around by changing pg_locale_t to contain two
     549             :          * separate fields.
     550             :          */
     551             :         ereport(ERROR,
     552             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     553             :                  errmsg("collations with different collate and ctype values are not supported on this platform")));
     554             : #endif
     555             :     }
     556             : 
     557       33890 :     return loc;
     558             : }
     559             : 
     560             : /*
     561             :  * strncoll_libc
     562             :  *
     563             :  * NUL-terminate arguments, if necessary, and pass to strcoll_l().
     564             :  *
     565             :  * An input string length of -1 means that it's already NUL-terminated.
     566             :  */
     567             : int
     568    29624396 : strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
     569             :               pg_locale_t locale)
     570             : {
     571             :     char        sbuf[TEXTBUFLEN];
     572    29624396 :     char       *buf = sbuf;
     573    29624396 :     size_t      bufsize1 = (len1 == -1) ? 0 : len1 + 1;
     574    29624396 :     size_t      bufsize2 = (len2 == -1) ? 0 : len2 + 1;
     575             :     const char *arg1n;
     576             :     const char *arg2n;
     577             :     int         result;
     578             : 
     579             :     Assert(locale->provider == COLLPROVIDER_LIBC);
     580             : 
     581    29624396 :     if (bufsize1 + bufsize2 > TEXTBUFLEN)
     582         360 :         buf = palloc(bufsize1 + bufsize2);
     583             : 
     584             :     /* nul-terminate arguments if necessary */
     585    29624396 :     if (len1 == -1)
     586             :     {
     587    25177110 :         arg1n = arg1;
     588             :     }
     589             :     else
     590             :     {
     591     4447286 :         char       *buf1 = buf;
     592             : 
     593     4447286 :         memcpy(buf1, arg1, len1);
     594     4447286 :         buf1[len1] = '\0';
     595     4447286 :         arg1n = buf1;
     596             :     }
     597             : 
     598    29624396 :     if (len2 == -1)
     599             :     {
     600    25177110 :         arg2n = arg2;
     601             :     }
     602             :     else
     603             :     {
     604     4447286 :         char       *buf2 = buf + bufsize1;
     605             : 
     606     4447286 :         memcpy(buf2, arg2, len2);
     607     4447286 :         buf2[len2] = '\0';
     608     4447286 :         arg2n = buf2;
     609             :     }
     610             : 
     611    29624396 :     result = strcoll_l(arg1n, arg2n, locale->info.lt);
     612             : 
     613    29624396 :     if (buf != sbuf)
     614         360 :         pfree(buf);
     615             : 
     616    29624396 :     return result;
     617             : }
     618             : 
     619             : /*
     620             :  * strnxfrm_libc
     621             :  *
     622             :  * NUL-terminate src, if necessary, and pass to strxfrm_l().
     623             :  *
     624             :  * A source length of -1 means that it's already NUL-terminated.
     625             :  */
     626             : size_t
     627         144 : strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
     628             :               pg_locale_t locale)
     629             : {
     630             :     char        sbuf[TEXTBUFLEN];
     631         144 :     char       *buf = sbuf;
     632         144 :     size_t      bufsize = srclen + 1;
     633             :     size_t      result;
     634             : 
     635             :     Assert(locale->provider == COLLPROVIDER_LIBC);
     636             : 
     637         144 :     if (srclen == -1)
     638         144 :         return strxfrm_l(dest, src, destsize, locale->info.lt);
     639             : 
     640           0 :     if (bufsize > TEXTBUFLEN)
     641           0 :         buf = palloc(bufsize);
     642             : 
     643             :     /* nul-terminate argument */
     644           0 :     memcpy(buf, src, srclen);
     645           0 :     buf[srclen] = '\0';
     646             : 
     647           0 :     result = strxfrm_l(dest, buf, destsize, locale->info.lt);
     648             : 
     649           0 :     if (buf != sbuf)
     650           0 :         pfree(buf);
     651             : 
     652             :     /* if dest is defined, it should be nul-terminated */
     653             :     Assert(result >= destsize || dest[result] == '\0');
     654             : 
     655           0 :     return result;
     656             : }
     657             : 
     658             : char *
     659       29468 : get_collation_actual_version_libc(const char *collcollate)
     660             : {
     661       29468 :     char       *collversion = NULL;
     662             : 
     663       58756 :     if (pg_strcasecmp("C", collcollate) != 0 &&
     664       58404 :         pg_strncasecmp("C.", collcollate, 2) != 0 &&
     665       29116 :         pg_strcasecmp("POSIX", collcollate) != 0)
     666             :     {
     667             : #if defined(__GLIBC__)
     668             :         /* Use the glibc version because we don't have anything better. */
     669       29090 :         collversion = pstrdup(gnu_get_libc_version());
     670             : #elif defined(LC_VERSION_MASK)
     671             :         locale_t    loc;
     672             : 
     673             :         /* Look up FreeBSD collation version. */
     674             :         loc = newlocale(LC_COLLATE_MASK, collcollate, NULL);
     675             :         if (loc)
     676             :         {
     677             :             collversion =
     678             :                 pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
     679             :             freelocale(loc);
     680             :         }
     681             :         else
     682             :             ereport(ERROR,
     683             :                     (errmsg("could not load locale \"%s\"", collcollate)));
     684             : #elif defined(WIN32)
     685             :         /*
     686             :          * If we are targeting Windows Vista and above, we can ask for a name
     687             :          * given a collation name (earlier versions required a location code
     688             :          * that we don't have).
     689             :          */
     690             :         NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
     691             :         WCHAR       wide_collcollate[LOCALE_NAME_MAX_LENGTH];
     692             : 
     693             :         MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
     694             :                             LOCALE_NAME_MAX_LENGTH);
     695             :         if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
     696             :         {
     697             :             /*
     698             :              * GetNLSVersionEx() wants a language tag such as "en-US", not a
     699             :              * locale name like "English_United States.1252".  Until those
     700             :              * values can be prevented from entering the system, or 100%
     701             :              * reliably converted to the more useful tag format, tolerate the
     702             :              * resulting error and report that we have no version data.
     703             :              */
     704             :             if (GetLastError() == ERROR_INVALID_PARAMETER)
     705             :                 return NULL;
     706             : 
     707             :             ereport(ERROR,
     708             :                     (errmsg("could not get collation version for locale \"%s\": error code %lu",
     709             :                             collcollate,
     710             :                             GetLastError())));
     711             :         }
     712             :         collversion = psprintf("%lu.%lu,%lu.%lu",
     713             :                                (version.dwNLSVersion >> 8) & 0xFFFF,
     714             :                                version.dwNLSVersion & 0xFF,
     715             :                                (version.dwDefinedVersion >> 8) & 0xFFFF,
     716             :                                version.dwDefinedVersion & 0xFF);
     717             : #endif
     718             :     }
     719             : 
     720       29468 :     return collversion;
     721             : }
     722             : 
     723             : /*
     724             :  * strncoll_libc_win32_utf8
     725             :  *
     726             :  * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
     727             :  * invoke wcscoll_l().
     728             :  *
     729             :  * An input string length of -1 means that it's NUL-terminated.
     730             :  */
     731             : #ifdef WIN32
     732             : static int
     733             : strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
     734             :                          ssize_t len2, pg_locale_t locale)
     735             : {
     736             :     char        sbuf[TEXTBUFLEN];
     737             :     char       *buf = sbuf;
     738             :     char       *a1p,
     739             :                *a2p;
     740             :     int         a1len;
     741             :     int         a2len;
     742             :     int         r;
     743             :     int         result;
     744             : 
     745             :     Assert(locale->provider == COLLPROVIDER_LIBC);
     746             :     Assert(GetDatabaseEncoding() == PG_UTF8);
     747             : 
     748             :     if (len1 == -1)
     749             :         len1 = strlen(arg1);
     750             :     if (len2 == -1)
     751             :         len2 = strlen(arg2);
     752             : 
     753             :     a1len = len1 * 2 + 2;
     754             :     a2len = len2 * 2 + 2;
     755             : 
     756             :     if (a1len + a2len > TEXTBUFLEN)
     757             :         buf = palloc(a1len + a2len);
     758             : 
     759             :     a1p = buf;
     760             :     a2p = buf + a1len;
     761             : 
     762             :     /* API does not work for zero-length input */
     763             :     if (len1 == 0)
     764             :         r = 0;
     765             :     else
     766             :     {
     767             :         r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
     768             :                                 (LPWSTR) a1p, a1len / 2);
     769             :         if (!r)
     770             :             ereport(ERROR,
     771             :                     (errmsg("could not convert string to UTF-16: error code %lu",
     772             :                             GetLastError())));
     773             :     }
     774             :     ((LPWSTR) a1p)[r] = 0;
     775             : 
     776             :     if (len2 == 0)
     777             :         r = 0;
     778             :     else
     779             :     {
     780             :         r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
     781             :                                 (LPWSTR) a2p, a2len / 2);
     782             :         if (!r)
     783             :             ereport(ERROR,
     784             :                     (errmsg("could not convert string to UTF-16: error code %lu",
     785             :                             GetLastError())));
     786             :     }
     787             :     ((LPWSTR) a2p)[r] = 0;
     788             : 
     789             :     errno = 0;
     790             :     result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
     791             :     if (result == 2147483647)   /* _NLSCMPERROR; missing from mingw headers */
     792             :         ereport(ERROR,
     793             :                 (errmsg("could not compare Unicode strings: %m")));
     794             : 
     795             :     if (buf != sbuf)
     796             :         pfree(buf);
     797             : 
     798             :     return result;
     799             : }
     800             : #endif                          /* WIN32 */
     801             : 
     802             : /* simple subroutine for reporting errors from newlocale() */
     803             : void
     804           0 : report_newlocale_failure(const char *localename)
     805             : {
     806             :     int         save_errno;
     807             : 
     808             :     /*
     809             :      * Windows doesn't provide any useful error indication from
     810             :      * _create_locale(), and BSD-derived platforms don't seem to feel they
     811             :      * need to set errno either (even though POSIX is pretty clear that
     812             :      * newlocale should do so).  So, if errno hasn't been set, assume ENOENT
     813             :      * is what to report.
     814             :      */
     815           0 :     if (errno == 0)
     816           0 :         errno = ENOENT;
     817             : 
     818             :     /*
     819             :      * ENOENT means "no such locale", not "no such file", so clarify that
     820             :      * errno with an errdetail message.
     821             :      */
     822           0 :     save_errno = errno;         /* auxiliary funcs might change errno */
     823           0 :     ereport(ERROR,
     824             :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     825             :              errmsg("could not create locale \"%s\": %m",
     826             :                     localename),
     827             :              (save_errno == ENOENT ?
     828             :               errdetail("The operating system could not find any locale data for the locale name \"%s\".",
     829             :                         localename) : 0)));
     830             : }
     831             : 
     832             : /*
     833             :  * POSIX doesn't define _l-variants of these functions, but several systems
     834             :  * have them.  We provide our own replacements here.
     835             :  */
     836             : #ifndef HAVE_MBSTOWCS_L
     837             : static size_t
     838     1140032 : mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
     839             : {
     840             : #ifdef WIN32
     841             :     return _mbstowcs_l(dest, src, n, loc);
     842             : #else
     843             :     size_t      result;
     844     1140032 :     locale_t    save_locale = uselocale(loc);
     845             : 
     846     1140032 :     result = mbstowcs(dest, src, n);
     847     1140032 :     uselocale(save_locale);
     848     1140032 :     return result;
     849             : #endif
     850             : }
     851             : #endif
     852             : #ifndef HAVE_WCSTOMBS_L
     853             : static size_t
     854     1140032 : wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
     855             : {
     856             : #ifdef WIN32
     857             :     return _wcstombs_l(dest, src, n, loc);
     858             : #else
     859             :     size_t      result;
     860     1140032 :     locale_t    save_locale = uselocale(loc);
     861             : 
     862     1140032 :     result = wcstombs(dest, src, n);
     863     1140032 :     uselocale(save_locale);
     864     1140032 :     return result;
     865             : #endif
     866             : }
     867             : #endif
     868             : 
     869             : /*
     870             :  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
     871             :  * Therefore we keep them here rather than with the mbutils code.
     872             :  */
     873             : 
     874             : /*
     875             :  * wchar2char --- convert wide characters to multibyte format
     876             :  *
     877             :  * This has the same API as the standard wcstombs_l() function; in particular,
     878             :  * tolen is the maximum number of bytes to store at *to, and *from must be
     879             :  * zero-terminated.  The output will be zero-terminated iff there is room.
     880             :  */
     881             : size_t
     882     1140032 : wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
     883             : {
     884             :     size_t      result;
     885             : 
     886     1140032 :     if (tolen == 0)
     887           0 :         return 0;
     888             : 
     889             : #ifdef WIN32
     890             : 
     891             :     /*
     892             :      * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
     893             :      * for some reason mbstowcs and wcstombs won't do this for us, so we use
     894             :      * MultiByteToWideChar().
     895             :      */
     896             :     if (GetDatabaseEncoding() == PG_UTF8)
     897             :     {
     898             :         result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
     899             :                                      NULL, NULL);
     900             :         /* A zero return is failure */
     901             :         if (result <= 0)
     902             :             result = -1;
     903             :         else
     904             :         {
     905             :             Assert(result <= tolen);
     906             :             /* Microsoft counts the zero terminator in the result */
     907             :             result--;
     908             :         }
     909             :     }
     910             :     else
     911             : #endif                          /* WIN32 */
     912     1140032 :     if (locale == (pg_locale_t) 0)
     913             :     {
     914             :         /* Use wcstombs directly for the default locale */
     915           0 :         result = wcstombs(to, from, tolen);
     916             :     }
     917             :     else
     918             :     {
     919             :         /* Use wcstombs_l for nondefault locales */
     920     1140032 :         result = wcstombs_l(to, from, tolen, locale->info.lt);
     921             :     }
     922             : 
     923     1140032 :     return result;
     924             : }
     925             : 
     926             : /*
     927             :  * char2wchar --- convert multibyte characters to wide characters
     928             :  *
     929             :  * This has almost the API of mbstowcs_l(), except that *from need not be
     930             :  * null-terminated; instead, the number of input bytes is specified as
     931             :  * fromlen.  Also, we ereport() rather than returning -1 for invalid
     932             :  * input encoding.  tolen is the maximum number of wchar_t's to store at *to.
     933             :  * The output will be zero-terminated iff there is room.
     934             :  */
     935             : size_t
     936     1143204 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
     937             :            pg_locale_t locale)
     938             : {
     939             :     size_t      result;
     940             : 
     941     1143204 :     if (tolen == 0)
     942           0 :         return 0;
     943             : 
     944             : #ifdef WIN32
     945             :     /* See WIN32 "Unicode" comment above */
     946             :     if (GetDatabaseEncoding() == PG_UTF8)
     947             :     {
     948             :         /* Win32 API does not work for zero-length input */
     949             :         if (fromlen == 0)
     950             :             result = 0;
     951             :         else
     952             :         {
     953             :             result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
     954             :             /* A zero return is failure */
     955             :             if (result == 0)
     956             :                 result = -1;
     957             :         }
     958             : 
     959             :         if (result != -1)
     960             :         {
     961             :             Assert(result < tolen);
     962             :             /* Append trailing null wchar (MultiByteToWideChar() does not) */
     963             :             to[result] = 0;
     964             :         }
     965             :     }
     966             :     else
     967             : #endif                          /* WIN32 */
     968             :     {
     969             :         /* mbstowcs requires ending '\0' */
     970     1143204 :         char       *str = pnstrdup(from, fromlen);
     971             : 
     972     1143204 :         if (locale == (pg_locale_t) 0)
     973             :         {
     974             :             /* Use mbstowcs directly for the default locale */
     975        3172 :             result = mbstowcs(to, str, tolen);
     976             :         }
     977             :         else
     978             :         {
     979             :             /* Use mbstowcs_l for nondefault locales */
     980     1140032 :             result = mbstowcs_l(to, str, tolen, locale->info.lt);
     981             :         }
     982             : 
     983     1143204 :         pfree(str);
     984             :     }
     985             : 
     986     1143204 :     if (result == -1)
     987             :     {
     988             :         /*
     989             :          * Invalid multibyte character encountered.  We try to give a useful
     990             :          * error message by letting pg_verifymbstr check the string.  But it's
     991             :          * possible that the string is OK to us, and not OK to mbstowcs ---
     992             :          * this suggests that the LC_CTYPE locale is different from the
     993             :          * database encoding.  Give a generic error message if pg_verifymbstr
     994             :          * can't find anything wrong.
     995             :          */
     996           0 :         pg_verifymbstr(from, fromlen, false);   /* might not return */
     997             :         /* but if it does ... */
     998           0 :         ereport(ERROR,
     999             :                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
    1000             :                  errmsg("invalid multibyte character for locale"),
    1001             :                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
    1002             :     }
    1003             : 
    1004     1143204 :     return result;
    1005             : }

Generated by: LCOV version 1.14