LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale.c (source / functions) Hit Total Coverage
Test: PostgreSQL 12beta2 Lines: 290 401 72.3 %
Date: 2019-06-18 07:06:57 Functions: 23 25 92.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-----------------------------------------------------------------------
       2             :  *
       3             :  * PostgreSQL locale utilities
       4             :  *
       5             :  * Portions Copyright (c) 2002-2019, PostgreSQL Global Development Group
       6             :  *
       7             :  * src/backend/utils/adt/pg_locale.c
       8             :  *
       9             :  *-----------------------------------------------------------------------
      10             :  */
      11             : 
      12             : /*----------
      13             :  * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
      14             :  * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
      15             :  * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
      16             :  * toupper(), etc. are always in the same fixed locale.
      17             :  *
      18             :  * LC_MESSAGES is settable at run time and will take effect
      19             :  * immediately.
      20             :  *
      21             :  * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
      22             :  * settable at run-time.  However, we don't actually set those locale
      23             :  * categories permanently.  This would have bizarre effects like no
      24             :  * longer accepting standard floating-point literals in some locales.
      25             :  * Instead, we only set these locale categories briefly when needed,
      26             :  * cache the required information obtained from localeconv() or
      27             :  * strftime(), and then set the locale categories back to "C".
      28             :  * The cached information is only used by the formatting functions
      29             :  * (to_char, etc.) and the money type.  For the user, this should all be
      30             :  * transparent.
      31             :  *
      32             :  * !!! NOW HEAR THIS !!!
      33             :  *
      34             :  * We've been bitten repeatedly by this bug, so let's try to keep it in
      35             :  * mind in future: on some platforms, the locale functions return pointers
      36             :  * to static data that will be overwritten by any later locale function.
      37             :  * Thus, for example, the obvious-looking sequence
      38             :  *          save = setlocale(category, NULL);
      39             :  *          if (!setlocale(category, value))
      40             :  *              fail = true;
      41             :  *          setlocale(category, save);
      42             :  * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
      43             :  * will change the memory save is pointing at.  To do this sort of thing
      44             :  * safely, you *must* pstrdup what setlocale returns the first time.
      45             :  *
      46             :  * The POSIX locale standard is available here:
      47             :  *
      48             :  *  http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
      49             :  *----------
      50             :  */
      51             : 
      52             : 
      53             : #include "postgres.h"
      54             : 
      55             : #include <time.h>
      56             : 
      57             : #include "access/htup_details.h"
      58             : #include "catalog/pg_collation.h"
      59             : #include "catalog/pg_control.h"
      60             : #include "mb/pg_wchar.h"
      61             : #include "utils/builtins.h"
      62             : #include "utils/formatting.h"
      63             : #include "utils/hsearch.h"
      64             : #include "utils/lsyscache.h"
      65             : #include "utils/memutils.h"
      66             : #include "utils/pg_locale.h"
      67             : #include "utils/syscache.h"
      68             : 
      69             : #ifdef USE_ICU
      70             : #include <unicode/ucnv.h>
      71             : #endif
      72             : 
      73             : #ifdef WIN32
      74             : /*
      75             :  * This Windows file defines StrNCpy. We don't need it here, so we undefine
      76             :  * it to keep the compiler quiet, and undefine it again after the file is
      77             :  * included, so we don't accidentally use theirs.
      78             :  */
      79             : #undef StrNCpy
      80             : #include <shlwapi.h>
      81             : #ifdef StrNCpy
      82             : #undef STrNCpy
      83             : #endif
      84             : #endif
      85             : 
      86             : #define     MAX_L10N_DATA       80
      87             : 
      88             : 
      89             : /* GUC settings */
      90             : char       *locale_messages;
      91             : char       *locale_monetary;
      92             : char       *locale_numeric;
      93             : char       *locale_time;
      94             : 
      95             : /* lc_time localization cache */
      96             : char       *localized_abbrev_days[7];
      97             : char       *localized_full_days[7];
      98             : char       *localized_abbrev_months[12];
      99             : char       *localized_full_months[12];
     100             : 
     101             : /* indicates whether locale information cache is valid */
     102             : static bool CurrentLocaleConvValid = false;
     103             : static bool CurrentLCTimeValid = false;
     104             : 
     105             : /* Environment variable storage area */
     106             : 
     107             : #define LC_ENV_BUFSIZE (NAMEDATALEN + 20)
     108             : 
     109             : static char lc_collate_envbuf[LC_ENV_BUFSIZE];
     110             : static char lc_ctype_envbuf[LC_ENV_BUFSIZE];
     111             : 
     112             : #ifdef LC_MESSAGES
     113             : static char lc_messages_envbuf[LC_ENV_BUFSIZE];
     114             : #endif
     115             : static char lc_monetary_envbuf[LC_ENV_BUFSIZE];
     116             : static char lc_numeric_envbuf[LC_ENV_BUFSIZE];
     117             : static char lc_time_envbuf[LC_ENV_BUFSIZE];
     118             : 
     119             : /* Cache for collation-related knowledge */
     120             : 
     121             : typedef struct
     122             : {
     123             :     Oid         collid;         /* hash key: pg_collation OID */
     124             :     bool        collate_is_c;   /* is collation's LC_COLLATE C? */
     125             :     bool        ctype_is_c;     /* is collation's LC_CTYPE C? */
     126             :     bool        flags_valid;    /* true if above flags are valid */
     127             :     pg_locale_t locale;         /* locale_t struct, or 0 if not valid */
     128             : } collation_cache_entry;
     129             : 
     130             : static HTAB *collation_cache = NULL;
     131             : 
     132             : 
     133             : #if defined(WIN32) && defined(LC_MESSAGES)
     134             : static char *IsoLocaleName(const char *);   /* MSVC specific */
     135             : #endif
     136             : 
     137             : #ifdef USE_ICU
     138             : static void icu_set_collation_attributes(UCollator *collator, const char *loc);
     139             : #endif
     140             : 
     141             : /*
     142             :  * pg_perm_setlocale
     143             :  *
     144             :  * This wraps the libc function setlocale(), with two additions.  First, when
     145             :  * changing LC_CTYPE, update gettext's encoding for the current message
     146             :  * domain.  GNU gettext automatically tracks LC_CTYPE on most platforms, but
     147             :  * not on Windows.  Second, if the operation is successful, the corresponding
     148             :  * LC_XXX environment variable is set to match.  By setting the environment
     149             :  * variable, we ensure that any subsequent use of setlocale(..., "") will
     150             :  * preserve the settings made through this routine.  Of course, LC_ALL must
     151             :  * also be unset to fully ensure that, but that has to be done elsewhere after
     152             :  * all the individual LC_XXX variables have been set correctly.  (Thank you
     153             :  * Perl for making this kluge necessary.)
     154             :  */
     155             : char *
     156       42968 : pg_perm_setlocale(int category, const char *locale)
     157             : {
     158             :     char       *result;
     159             :     const char *envvar;
     160             :     char       *envbuf;
     161             : 
     162             : #ifndef WIN32
     163       42968 :     result = setlocale(category, locale);
     164             : #else
     165             : 
     166             :     /*
     167             :      * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
     168             :      * the given value is good and set it in the environment variables. We
     169             :      * must ignore attempts to set to "", which means "keep using the old
     170             :      * environment value".
     171             :      */
     172             : #ifdef LC_MESSAGES
     173             :     if (category == LC_MESSAGES)
     174             :     {
     175             :         result = (char *) locale;
     176             :         if (locale == NULL || locale[0] == '\0')
     177             :             return result;
     178             :     }
     179             :     else
     180             : #endif
     181             :         result = setlocale(category, locale);
     182             : #endif                          /* WIN32 */
     183             : 
     184       42968 :     if (result == NULL)
     185           0 :         return result;          /* fall out immediately on failure */
     186             : 
     187             :     /*
     188             :      * Use the right encoding in translated messages.  Under ENABLE_NLS, let
     189             :      * pg_bind_textdomain_codeset() figure it out.  Under !ENABLE_NLS, message
     190             :      * format strings are ASCII, but database-encoding strings may enter the
     191             :      * message via %s.  This makes the overall message encoding equal to the
     192             :      * database encoding.
     193             :      */
     194       42968 :     if (category == LC_CTYPE)
     195             :     {
     196             :         static char save_lc_ctype[LC_ENV_BUFSIZE];
     197             : 
     198             :         /* copy setlocale() return value before callee invokes it again */
     199       10970 :         strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
     200       10970 :         result = save_lc_ctype;
     201             : 
     202             : #ifdef ENABLE_NLS
     203       10970 :         SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
     204             : #else
     205             :         SetMessageEncoding(GetDatabaseEncoding());
     206             : #endif
     207             :     }
     208             : 
     209       42968 :     switch (category)
     210             :     {
     211             :         case LC_COLLATE:
     212       10970 :             envvar = "LC_COLLATE";
     213       10970 :             envbuf = lc_collate_envbuf;
     214       10970 :             break;
     215             :         case LC_CTYPE:
     216       10970 :             envvar = "LC_CTYPE";
     217       10970 :             envbuf = lc_ctype_envbuf;
     218       10970 :             break;
     219             : #ifdef LC_MESSAGES
     220             :         case LC_MESSAGES:
     221       13378 :             envvar = "LC_MESSAGES";
     222       13378 :             envbuf = lc_messages_envbuf;
     223             : #ifdef WIN32
     224             :             result = IsoLocaleName(locale);
     225             :             if (result == NULL)
     226             :                 result = (char *) locale;
     227             : #endif                          /* WIN32 */
     228       13378 :             break;
     229             : #endif                          /* LC_MESSAGES */
     230             :         case LC_MONETARY:
     231        2550 :             envvar = "LC_MONETARY";
     232        2550 :             envbuf = lc_monetary_envbuf;
     233        2550 :             break;
     234             :         case LC_NUMERIC:
     235        2550 :             envvar = "LC_NUMERIC";
     236        2550 :             envbuf = lc_numeric_envbuf;
     237        2550 :             break;
     238             :         case LC_TIME:
     239        2550 :             envvar = "LC_TIME";
     240        2550 :             envbuf = lc_time_envbuf;
     241        2550 :             break;
     242             :         default:
     243           0 :             elog(FATAL, "unrecognized LC category: %d", category);
     244             :             envvar = NULL;      /* keep compiler quiet */
     245             :             envbuf = NULL;
     246             :             return NULL;
     247             :     }
     248             : 
     249       42968 :     snprintf(envbuf, LC_ENV_BUFSIZE - 1, "%s=%s", envvar, result);
     250             : 
     251       42968 :     if (putenv(envbuf))
     252           0 :         return NULL;
     253             : 
     254       42968 :     return result;
     255             : }
     256             : 
     257             : 
     258             : /*
     259             :  * Is the locale name valid for the locale category?
     260             :  *
     261             :  * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
     262             :  * canonical name is stored there.  This is especially useful for figuring out
     263             :  * what locale name "" means (ie, the server environment value).  (Actually,
     264             :  * it seems that on most implementations that's the only thing it's good for;
     265             :  * we could wish that setlocale gave back a canonically spelled version of
     266             :  * the locale name, but typically it doesn't.)
     267             :  */
     268             : bool
     269       42128 : check_locale(int category, const char *locale, char **canonname)
     270             : {
     271             :     char       *save;
     272             :     char       *res;
     273             : 
     274       42128 :     if (canonname)
     275        1748 :         *canonname = NULL;      /* in case of failure */
     276             : 
     277       42128 :     save = setlocale(category, NULL);
     278       42128 :     if (!save)
     279           0 :         return false;           /* won't happen, we hope */
     280             : 
     281             :     /* save may be pointing at a modifiable scratch variable, see above. */
     282       42128 :     save = pstrdup(save);
     283             : 
     284             :     /* set the locale with setlocale, to see if it accepts it. */
     285       42128 :     res = setlocale(category, locale);
     286             : 
     287             :     /* save canonical name if requested. */
     288       42128 :     if (res && canonname)
     289        1748 :         *canonname = pstrdup(res);
     290             : 
     291             :     /* restore old value. */
     292       42128 :     if (!setlocale(category, save))
     293           0 :         elog(WARNING, "failed to restore old locale \"%s\"", save);
     294       42128 :     pfree(save);
     295             : 
     296       42128 :     return (res != NULL);
     297             : }
     298             : 
     299             : 
     300             : /*
     301             :  * GUC check/assign hooks
     302             :  *
     303             :  * For most locale categories, the assign hook doesn't actually set the locale
     304             :  * permanently, just reset flags so that the next use will cache the
     305             :  * appropriate values.  (See explanation at the top of this file.)
     306             :  *
     307             :  * Note: we accept value = "" as selecting the postmaster's environment
     308             :  * value, whatever it was (so long as the environment setting is legal).
     309             :  * This will have been locked down by an earlier call to pg_perm_setlocale.
     310             :  */
     311             : bool
     312       10968 : check_locale_monetary(char **newval, void **extra, GucSource source)
     313             : {
     314       10968 :     return check_locale(LC_MONETARY, *newval, NULL);
     315             : }
     316             : 
     317             : void
     318       10828 : assign_locale_monetary(const char *newval, void *extra)
     319             : {
     320       10828 :     CurrentLocaleConvValid = false;
     321       10828 : }
     322             : 
     323             : bool
     324       10972 : check_locale_numeric(char **newval, void **extra, GucSource source)
     325             : {
     326       10972 :     return check_locale(LC_NUMERIC, *newval, NULL);
     327             : }
     328             : 
     329             : void
     330       10836 : assign_locale_numeric(const char *newval, void *extra)
     331             : {
     332       10836 :     CurrentLocaleConvValid = false;
     333       10836 : }
     334             : 
     335             : bool
     336       10968 : check_locale_time(char **newval, void **extra, GucSource source)
     337             : {
     338       10968 :     return check_locale(LC_TIME, *newval, NULL);
     339             : }
     340             : 
     341             : void
     342       10828 : assign_locale_time(const char *newval, void *extra)
     343             : {
     344       10828 :     CurrentLCTimeValid = false;
     345       10828 : }
     346             : 
     347             : /*
     348             :  * We allow LC_MESSAGES to actually be set globally.
     349             :  *
     350             :  * Note: we normally disallow value = "" because it wouldn't have consistent
     351             :  * semantics (it'd effectively just use the previous value).  However, this
     352             :  * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
     353             :  * not even if the attempted setting fails due to invalid environment value.
     354             :  * The idea there is just to accept the environment setting *if possible*
     355             :  * during startup, until we can read the proper value from postgresql.conf.
     356             :  */
     357             : bool
     358       10968 : check_locale_messages(char **newval, void **extra, GucSource source)
     359             : {
     360       10968 :     if (**newval == '\0')
     361             :     {
     362        3496 :         if (source == PGC_S_DEFAULT)
     363        3496 :             return true;
     364             :         else
     365           0 :             return false;
     366             :     }
     367             : 
     368             :     /*
     369             :      * LC_MESSAGES category does not exist everywhere, but accept it anyway
     370             :      *
     371             :      * On Windows, we can't even check the value, so accept blindly
     372             :      */
     373             : #if defined(LC_MESSAGES) && !defined(WIN32)
     374        7472 :     return check_locale(LC_MESSAGES, *newval, NULL);
     375             : #else
     376             :     return true;
     377             : #endif
     378             : }
     379             : 
     380             : void
     381       10828 : assign_locale_messages(const char *newval, void *extra)
     382             : {
     383             :     /*
     384             :      * LC_MESSAGES category does not exist everywhere, but accept it anyway.
     385             :      * We ignore failure, as per comment above.
     386             :      */
     387             : #ifdef LC_MESSAGES
     388       10828 :     (void) pg_perm_setlocale(LC_MESSAGES, newval);
     389             : #endif
     390       10828 : }
     391             : 
     392             : 
     393             : /*
     394             :  * Frees the malloced content of a struct lconv.  (But not the struct
     395             :  * itself.)  It's important that this not throw elog(ERROR).
     396             :  */
     397             : static void
     398           4 : free_struct_lconv(struct lconv *s)
     399             : {
     400           4 :     if (s->decimal_point)
     401           4 :         free(s->decimal_point);
     402           4 :     if (s->thousands_sep)
     403           4 :         free(s->thousands_sep);
     404           4 :     if (s->grouping)
     405           4 :         free(s->grouping);
     406           4 :     if (s->int_curr_symbol)
     407           4 :         free(s->int_curr_symbol);
     408           4 :     if (s->currency_symbol)
     409           4 :         free(s->currency_symbol);
     410           4 :     if (s->mon_decimal_point)
     411           4 :         free(s->mon_decimal_point);
     412           4 :     if (s->mon_thousands_sep)
     413           4 :         free(s->mon_thousands_sep);
     414           4 :     if (s->mon_grouping)
     415           4 :         free(s->mon_grouping);
     416           4 :     if (s->positive_sign)
     417           4 :         free(s->positive_sign);
     418           4 :     if (s->negative_sign)
     419           4 :         free(s->negative_sign);
     420           4 : }
     421             : 
     422             : /*
     423             :  * Check that all fields of a struct lconv (or at least, the ones we care
     424             :  * about) are non-NULL.  The field list must match free_struct_lconv().
     425             :  */
     426             : static bool
     427          50 : struct_lconv_is_valid(struct lconv *s)
     428             : {
     429          50 :     if (s->decimal_point == NULL)
     430           0 :         return false;
     431          50 :     if (s->thousands_sep == NULL)
     432           0 :         return false;
     433          50 :     if (s->grouping == NULL)
     434           0 :         return false;
     435          50 :     if (s->int_curr_symbol == NULL)
     436           0 :         return false;
     437          50 :     if (s->currency_symbol == NULL)
     438           0 :         return false;
     439          50 :     if (s->mon_decimal_point == NULL)
     440           0 :         return false;
     441          50 :     if (s->mon_thousands_sep == NULL)
     442           0 :         return false;
     443          50 :     if (s->mon_grouping == NULL)
     444           0 :         return false;
     445          50 :     if (s->positive_sign == NULL)
     446           0 :         return false;
     447          50 :     if (s->negative_sign == NULL)
     448           0 :         return false;
     449          50 :     return true;
     450             : }
     451             : 
     452             : 
     453             : /*
     454             :  * Convert the strdup'd string at *str from the specified encoding to the
     455             :  * database encoding.
     456             :  */
     457             : static void
     458         400 : db_encoding_convert(int encoding, char **str)
     459             : {
     460             :     char       *pstr;
     461             :     char       *mstr;
     462             : 
     463             :     /* convert the string to the database encoding */
     464         400 :     pstr = pg_any_to_server(*str, strlen(*str), encoding);
     465         400 :     if (pstr == *str)
     466         400 :         return;                 /* no conversion happened */
     467             : 
     468             :     /* need it malloc'd not palloc'd */
     469           0 :     mstr = strdup(pstr);
     470           0 :     if (mstr == NULL)
     471           0 :         ereport(ERROR,
     472             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     473             :                  errmsg("out of memory")));
     474             : 
     475             :     /* replace old string */
     476           0 :     free(*str);
     477           0 :     *str = mstr;
     478             : 
     479           0 :     pfree(pstr);
     480             : }
     481             : 
     482             : 
     483             : /*
     484             :  * Return the POSIX lconv struct (contains number/money formatting
     485             :  * information) with locale information for all categories.
     486             :  */
     487             : struct lconv *
     488       26816 : PGLC_localeconv(void)
     489             : {
     490             :     static struct lconv CurrentLocaleConv;
     491             :     static bool CurrentLocaleConvAllocated = false;
     492             :     struct lconv *extlconv;
     493             :     struct lconv worklconv;
     494             :     char       *save_lc_monetary;
     495             :     char       *save_lc_numeric;
     496             : #ifdef WIN32
     497             :     char       *save_lc_ctype;
     498             : #endif
     499             : 
     500             :     /* Did we do it already? */
     501       26816 :     if (CurrentLocaleConvValid)
     502       26766 :         return &CurrentLocaleConv;
     503             : 
     504             :     /* Free any already-allocated storage */
     505          50 :     if (CurrentLocaleConvAllocated)
     506             :     {
     507           4 :         free_struct_lconv(&CurrentLocaleConv);
     508           4 :         CurrentLocaleConvAllocated = false;
     509             :     }
     510             : 
     511             :     /*
     512             :      * This is tricky because we really don't want to risk throwing error
     513             :      * while the locale is set to other than our usual settings.  Therefore,
     514             :      * the process is: collect the usual settings, set locale to special
     515             :      * setting, copy relevant data into worklconv using strdup(), restore
     516             :      * normal settings, convert data to desired encoding, and finally stash
     517             :      * the collected data in CurrentLocaleConv.  This makes it safe if we
     518             :      * throw an error during encoding conversion or run out of memory anywhere
     519             :      * in the process.  All data pointed to by struct lconv members is
     520             :      * allocated with strdup, to avoid premature elog(ERROR) and to allow
     521             :      * using a single cleanup routine.
     522             :      */
     523          50 :     memset(&worklconv, 0, sizeof(worklconv));
     524             : 
     525             :     /* Save prevailing values of monetary and numeric locales */
     526          50 :     save_lc_monetary = setlocale(LC_MONETARY, NULL);
     527          50 :     if (!save_lc_monetary)
     528           0 :         elog(ERROR, "setlocale(NULL) failed");
     529          50 :     save_lc_monetary = pstrdup(save_lc_monetary);
     530             : 
     531          50 :     save_lc_numeric = setlocale(LC_NUMERIC, NULL);
     532          50 :     if (!save_lc_numeric)
     533           0 :         elog(ERROR, "setlocale(NULL) failed");
     534          50 :     save_lc_numeric = pstrdup(save_lc_numeric);
     535             : 
     536             : #ifdef WIN32
     537             : 
     538             :     /*
     539             :      * The POSIX standard explicitly says that it is undefined what happens if
     540             :      * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
     541             :      * that implied by LC_CTYPE.  In practice, all Unix-ish platforms seem to
     542             :      * believe that localeconv() should return strings that are encoded in the
     543             :      * codeset implied by the LC_MONETARY or LC_NUMERIC locale name.  Hence,
     544             :      * once we have successfully collected the localeconv() results, we will
     545             :      * convert them from that codeset to the desired server encoding.
     546             :      *
     547             :      * Windows, of course, resolutely does things its own way; on that
     548             :      * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
     549             :      * results.  Hence, we must temporarily set that category as well.
     550             :      */
     551             : 
     552             :     /* Save prevailing value of ctype locale */
     553             :     save_lc_ctype = setlocale(LC_CTYPE, NULL);
     554             :     if (!save_lc_ctype)
     555             :         elog(ERROR, "setlocale(NULL) failed");
     556             :     save_lc_ctype = pstrdup(save_lc_ctype);
     557             : 
     558             :     /* Here begins the critical section where we must not throw error */
     559             : 
     560             :     /* use numeric to set the ctype */
     561             :     setlocale(LC_CTYPE, locale_numeric);
     562             : #endif
     563             : 
     564             :     /* Get formatting information for numeric */
     565          50 :     setlocale(LC_NUMERIC, locale_numeric);
     566          50 :     extlconv = localeconv();
     567             : 
     568             :     /* Must copy data now in case setlocale() overwrites it */
     569          50 :     worklconv.decimal_point = strdup(extlconv->decimal_point);
     570          50 :     worklconv.thousands_sep = strdup(extlconv->thousands_sep);
     571          50 :     worklconv.grouping = strdup(extlconv->grouping);
     572             : 
     573             : #ifdef WIN32
     574             :     /* use monetary to set the ctype */
     575             :     setlocale(LC_CTYPE, locale_monetary);
     576             : #endif
     577             : 
     578             :     /* Get formatting information for monetary */
     579          50 :     setlocale(LC_MONETARY, locale_monetary);
     580          50 :     extlconv = localeconv();
     581             : 
     582             :     /* Must copy data now in case setlocale() overwrites it */
     583          50 :     worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
     584          50 :     worklconv.currency_symbol = strdup(extlconv->currency_symbol);
     585          50 :     worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
     586          50 :     worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
     587          50 :     worklconv.mon_grouping = strdup(extlconv->mon_grouping);
     588          50 :     worklconv.positive_sign = strdup(extlconv->positive_sign);
     589          50 :     worklconv.negative_sign = strdup(extlconv->negative_sign);
     590             :     /* Copy scalar fields as well */
     591          50 :     worklconv.int_frac_digits = extlconv->int_frac_digits;
     592          50 :     worklconv.frac_digits = extlconv->frac_digits;
     593          50 :     worklconv.p_cs_precedes = extlconv->p_cs_precedes;
     594          50 :     worklconv.p_sep_by_space = extlconv->p_sep_by_space;
     595          50 :     worklconv.n_cs_precedes = extlconv->n_cs_precedes;
     596          50 :     worklconv.n_sep_by_space = extlconv->n_sep_by_space;
     597          50 :     worklconv.p_sign_posn = extlconv->p_sign_posn;
     598          50 :     worklconv.n_sign_posn = extlconv->n_sign_posn;
     599             : 
     600             :     /*
     601             :      * Restore the prevailing locale settings; failure to do so is fatal.
     602             :      * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
     603             :      * but proceeding with the wrong value of LC_CTYPE would certainly be bad
     604             :      * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
     605             :      * are almost certainly "C", there's really no reason that restoring those
     606             :      * should fail.
     607             :      */
     608             : #ifdef WIN32
     609             :     if (!setlocale(LC_CTYPE, save_lc_ctype))
     610             :         elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
     611             : #endif
     612          50 :     if (!setlocale(LC_MONETARY, save_lc_monetary))
     613           0 :         elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
     614          50 :     if (!setlocale(LC_NUMERIC, save_lc_numeric))
     615           0 :         elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
     616             : 
     617             :     /*
     618             :      * At this point we've done our best to clean up, and can call functions
     619             :      * that might possibly throw errors with a clean conscience.  But let's
     620             :      * make sure we don't leak any already-strdup'd fields in worklconv.
     621             :      */
     622          50 :     PG_TRY();
     623             :     {
     624             :         int         encoding;
     625             : 
     626             :         /* Release the pstrdup'd locale names */
     627          50 :         pfree(save_lc_monetary);
     628          50 :         pfree(save_lc_numeric);
     629             : #ifdef WIN32
     630             :         pfree(save_lc_ctype);
     631             : #endif
     632             : 
     633             :         /* If any of the preceding strdup calls failed, complain now. */
     634          50 :         if (!struct_lconv_is_valid(&worklconv))
     635           0 :             ereport(ERROR,
     636             :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     637             :                      errmsg("out of memory")));
     638             : 
     639             :         /*
     640             :          * Now we must perform encoding conversion from whatever's associated
     641             :          * with the locales into the database encoding.  If we can't identify
     642             :          * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
     643             :          * use PG_SQL_ASCII, which will result in just validating that the
     644             :          * strings are OK in the database encoding.
     645             :          */
     646          50 :         encoding = pg_get_encoding_from_locale(locale_numeric, true);
     647          50 :         if (encoding < 0)
     648           0 :             encoding = PG_SQL_ASCII;
     649             : 
     650          50 :         db_encoding_convert(encoding, &worklconv.decimal_point);
     651          50 :         db_encoding_convert(encoding, &worklconv.thousands_sep);
     652             :         /* grouping is not text and does not require conversion */
     653             : 
     654          50 :         encoding = pg_get_encoding_from_locale(locale_monetary, true);
     655          50 :         if (encoding < 0)
     656           0 :             encoding = PG_SQL_ASCII;
     657             : 
     658          50 :         db_encoding_convert(encoding, &worklconv.int_curr_symbol);
     659          50 :         db_encoding_convert(encoding, &worklconv.currency_symbol);
     660          50 :         db_encoding_convert(encoding, &worklconv.mon_decimal_point);
     661          50 :         db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
     662             :         /* mon_grouping is not text and does not require conversion */
     663          50 :         db_encoding_convert(encoding, &worklconv.positive_sign);
     664          50 :         db_encoding_convert(encoding, &worklconv.negative_sign);
     665             :     }
     666           0 :     PG_CATCH();
     667             :     {
     668           0 :         free_struct_lconv(&worklconv);
     669           0 :         PG_RE_THROW();
     670             :     }
     671          50 :     PG_END_TRY();
     672             : 
     673             :     /*
     674             :      * Everything is good, so save the results.
     675             :      */
     676          50 :     CurrentLocaleConv = worklconv;
     677          50 :     CurrentLocaleConvAllocated = true;
     678          50 :     CurrentLocaleConvValid = true;
     679          50 :     return &CurrentLocaleConv;
     680             : }
     681             : 
     682             : #ifdef WIN32
     683             : /*
     684             :  * On Windows, strftime() returns its output in encoding CP_ACP (the default
     685             :  * operating system codepage for the computer), which is likely different
     686             :  * from SERVER_ENCODING.  This is especially important in Japanese versions
     687             :  * of Windows which will use SJIS encoding, which we don't support as a
     688             :  * server encoding.
     689             :  *
     690             :  * So, instead of using strftime(), use wcsftime() to return the value in
     691             :  * wide characters (internally UTF16) and then convert to UTF8, which we
     692             :  * know how to handle directly.
     693             :  *
     694             :  * Note that this only affects the calls to strftime() in this file, which are
     695             :  * used to get the locale-aware strings. Other parts of the backend use
     696             :  * pg_strftime(), which isn't locale-aware and does not need to be replaced.
     697             :  */
     698             : static size_t
     699             : strftime_win32(char *dst, size_t dstlen,
     700             :                const char *format, const struct tm *tm)
     701             : {
     702             :     size_t      len;
     703             :     wchar_t     wformat[8];     /* formats used below need 3 chars */
     704             :     wchar_t     wbuf[MAX_L10N_DATA];
     705             : 
     706             :     /*
     707             :      * Get a wchar_t version of the format string.  We only actually use
     708             :      * plain-ASCII formats in this file, so we can say that they're UTF8.
     709             :      */
     710             :     len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
     711             :                               wformat, lengthof(wformat));
     712             :     if (len == 0)
     713             :         elog(ERROR, "could not convert format string from UTF-8: error code %lu",
     714             :              GetLastError());
     715             : 
     716             :     len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
     717             :     if (len == 0)
     718             :     {
     719             :         /*
     720             :          * wcsftime failed, possibly because the result would not fit in
     721             :          * MAX_L10N_DATA.  Return 0 with the contents of dst unspecified.
     722             :          */
     723             :         return 0;
     724             :     }
     725             : 
     726             :     len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
     727             :                               NULL, NULL);
     728             :     if (len == 0)
     729             :         elog(ERROR, "could not convert string to UTF-8: error code %lu",
     730             :              GetLastError());
     731             : 
     732             :     dst[len] = '\0';
     733             : 
     734             :     return len;
     735             : }
     736             : 
     737             : /* redefine strftime() */
     738             : #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
     739             : #endif                          /* WIN32 */
     740             : 
     741             : /*
     742             :  * Subroutine for cache_locale_time().
     743             :  * Convert the given string from encoding "encoding" to the database
     744             :  * encoding, and store the result at *dst, replacing any previous value.
     745             :  */
     746             : static void
     747         684 : cache_single_string(char **dst, const char *src, int encoding)
     748             : {
     749             :     char       *ptr;
     750             :     char       *olddst;
     751             : 
     752             :     /* Convert the string to the database encoding, or validate it's OK */
     753         684 :     ptr = pg_any_to_server(src, strlen(src), encoding);
     754             : 
     755             :     /* Store the string in long-lived storage, replacing any previous value */
     756         684 :     olddst = *dst;
     757         684 :     *dst = MemoryContextStrdup(TopMemoryContext, ptr);
     758         684 :     if (olddst)
     759           0 :         pfree(olddst);
     760             : 
     761             :     /* Might as well clean up any palloc'd conversion result, too */
     762         684 :     if (ptr != src)
     763           0 :         pfree(ptr);
     764         684 : }
     765             : 
     766             : /*
     767             :  * Update the lc_time localization cache variables if needed.
     768             :  */
     769             : void
     770        5728 : cache_locale_time(void)
     771             : {
     772             :     char        buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
     773             :     char       *bufptr;
     774             :     time_t      timenow;
     775             :     struct tm  *timeinfo;
     776        5728 :     bool        strftimefail = false;
     777             :     int         encoding;
     778             :     int         i;
     779             :     char       *save_lc_time;
     780             : #ifdef WIN32
     781             :     char       *save_lc_ctype;
     782             : #endif
     783             : 
     784             :     /* did we do this already? */
     785        5728 :     if (CurrentLCTimeValid)
     786        5710 :         return;
     787             : 
     788          18 :     elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
     789             : 
     790             :     /*
     791             :      * As in PGLC_localeconv(), it's critical that we not throw error while
     792             :      * libc's locale settings have nondefault values.  Hence, we just call
     793             :      * strftime() within the critical section, and then convert and save its
     794             :      * results afterwards.
     795             :      */
     796             : 
     797             :     /* Save prevailing value of time locale */
     798          18 :     save_lc_time = setlocale(LC_TIME, NULL);
     799          18 :     if (!save_lc_time)
     800           0 :         elog(ERROR, "setlocale(NULL) failed");
     801          18 :     save_lc_time = pstrdup(save_lc_time);
     802             : 
     803             : #ifdef WIN32
     804             : 
     805             :     /*
     806             :      * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
     807             :      * must set it here.  This code looks the same as what PGLC_localeconv()
     808             :      * does, but the underlying reason is different: this does NOT determine
     809             :      * the encoding we'll get back from strftime_win32().
     810             :      */
     811             : 
     812             :     /* Save prevailing value of ctype locale */
     813             :     save_lc_ctype = setlocale(LC_CTYPE, NULL);
     814             :     if (!save_lc_ctype)
     815             :         elog(ERROR, "setlocale(NULL) failed");
     816             :     save_lc_ctype = pstrdup(save_lc_ctype);
     817             : 
     818             :     /* use lc_time to set the ctype */
     819             :     setlocale(LC_CTYPE, locale_time);
     820             : #endif
     821             : 
     822          18 :     setlocale(LC_TIME, locale_time);
     823             : 
     824             :     /* We use times close to current time as data for strftime(). */
     825          18 :     timenow = time(NULL);
     826          18 :     timeinfo = localtime(&timenow);
     827             : 
     828             :     /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
     829          18 :     bufptr = buf;
     830             : 
     831             :     /*
     832             :      * MAX_L10N_DATA is sufficient buffer space for every known locale, and
     833             :      * POSIX defines no strftime() errors.  (Buffer space exhaustion is not an
     834             :      * error.)  An implementation might report errors (e.g. ENOMEM) by
     835             :      * returning 0 (or, less plausibly, a negative value) and setting errno.
     836             :      * Report errno just in case the implementation did that, but clear it in
     837             :      * advance of the calls so we don't emit a stale, unrelated errno.
     838             :      */
     839          18 :     errno = 0;
     840             : 
     841             :     /* localized days */
     842         144 :     for (i = 0; i < 7; i++)
     843             :     {
     844         126 :         timeinfo->tm_wday = i;
     845         126 :         if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
     846           0 :             strftimefail = true;
     847         126 :         bufptr += MAX_L10N_DATA;
     848         126 :         if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
     849           0 :             strftimefail = true;
     850         126 :         bufptr += MAX_L10N_DATA;
     851             :     }
     852             : 
     853             :     /* localized months */
     854         234 :     for (i = 0; i < 12; i++)
     855             :     {
     856         216 :         timeinfo->tm_mon = i;
     857         216 :         timeinfo->tm_mday = 1;   /* make sure we don't have invalid date */
     858         216 :         if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
     859           0 :             strftimefail = true;
     860         216 :         bufptr += MAX_L10N_DATA;
     861         216 :         if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
     862           0 :             strftimefail = true;
     863         216 :         bufptr += MAX_L10N_DATA;
     864             :     }
     865             : 
     866             :     /*
     867             :      * Restore the prevailing locale settings; as in PGLC_localeconv(),
     868             :      * failure to do so is fatal.
     869             :      */
     870             : #ifdef WIN32
     871             :     if (!setlocale(LC_CTYPE, save_lc_ctype))
     872             :         elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
     873             : #endif
     874          18 :     if (!setlocale(LC_TIME, save_lc_time))
     875           0 :         elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
     876             : 
     877             :     /*
     878             :      * At this point we've done our best to clean up, and can throw errors, or
     879             :      * call functions that might throw errors, with a clean conscience.
     880             :      */
     881          18 :     if (strftimefail)
     882           0 :         elog(ERROR, "strftime() failed: %m");
     883             : 
     884             :     /* Release the pstrdup'd locale names */
     885          18 :     pfree(save_lc_time);
     886             : #ifdef WIN32
     887             :     pfree(save_lc_ctype);
     888             : #endif
     889             : 
     890             : #ifndef WIN32
     891             : 
     892             :     /*
     893             :      * As in PGLC_localeconv(), we must convert strftime()'s output from the
     894             :      * encoding implied by LC_TIME to the database encoding.  If we can't
     895             :      * identify the LC_TIME encoding, just perform encoding validation.
     896             :      */
     897          18 :     encoding = pg_get_encoding_from_locale(locale_time, true);
     898          18 :     if (encoding < 0)
     899           0 :         encoding = PG_SQL_ASCII;
     900             : 
     901             : #else
     902             : 
     903             :     /*
     904             :      * On Windows, strftime_win32() always returns UTF8 data, so convert from
     905             :      * that if necessary.
     906             :      */
     907             :     encoding = PG_UTF8;
     908             : 
     909             : #endif                          /* WIN32 */
     910             : 
     911          18 :     bufptr = buf;
     912             : 
     913             :     /* localized days */
     914         144 :     for (i = 0; i < 7; i++)
     915             :     {
     916         126 :         cache_single_string(&localized_abbrev_days[i], bufptr, encoding);
     917         126 :         bufptr += MAX_L10N_DATA;
     918         126 :         cache_single_string(&localized_full_days[i], bufptr, encoding);
     919         126 :         bufptr += MAX_L10N_DATA;
     920             :     }
     921             : 
     922             :     /* localized months */
     923         234 :     for (i = 0; i < 12; i++)
     924             :     {
     925         216 :         cache_single_string(&localized_abbrev_months[i], bufptr, encoding);
     926         216 :         bufptr += MAX_L10N_DATA;
     927         216 :         cache_single_string(&localized_full_months[i], bufptr, encoding);
     928         216 :         bufptr += MAX_L10N_DATA;
     929             :     }
     930             : 
     931          18 :     CurrentLCTimeValid = true;
     932             : }
     933             : 
     934             : 
     935             : #if defined(WIN32) && defined(LC_MESSAGES)
     936             : /*
     937             :  * Convert a Windows setlocale() argument to a Unix-style one.
     938             :  *
     939             :  * Regardless of platform, we install message catalogs under a Unix-style
     940             :  * LL[_CC][.ENCODING][@VARIANT] naming convention.  Only LC_MESSAGES settings
     941             :  * following that style will elicit localized interface strings.
     942             :  *
     943             :  * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
     944             :  * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
     945             :  * case-insensitive.  setlocale() returns the fully-qualified form; for
     946             :  * example, setlocale("thaI") returns "Thai_Thailand.874".  Internally,
     947             :  * setlocale() and _create_locale() select a "locale identifier"[1] and store
     948             :  * it in an undocumented _locale_t field.  From that LCID, we can retrieve the
     949             :  * ISO 639 language and the ISO 3166 country.  Character encoding does not
     950             :  * matter, because the server and client encodings govern that.
     951             :  *
     952             :  * Windows Vista introduced the "locale name" concept[2], closely following
     953             :  * RFC 4646.  Locale identifiers are now deprecated.  Starting with Visual
     954             :  * Studio 2012, setlocale() accepts locale names in addition to the strings it
     955             :  * accepted historically.  It does not standardize them; setlocale("Th-tH")
     956             :  * returns "Th-tH".  setlocale(category, "") still returns a traditional
     957             :  * string.  Furthermore, msvcr110.dll changed the undocumented _locale_t
     958             :  * content to carry locale names instead of locale identifiers.
     959             :  *
     960             :  * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol.
     961             :  * IsoLocaleName() always fails in a MinGW-built postgres.exe, so only
     962             :  * Unix-style values of the lc_messages GUC can elicit localized messages.  In
     963             :  * particular, every lc_messages setting that initdb can select automatically
     964             :  * will yield only C-locale messages.  XXX This could be fixed by running the
     965             :  * fully-qualified locale name through a lookup table.
     966             :  *
     967             :  * This function returns a pointer to a static buffer bearing the converted
     968             :  * name or NULL if conversion fails.
     969             :  *
     970             :  * [1] http://msdn.microsoft.com/en-us/library/windows/desktop/dd373763.aspx
     971             :  * [2] http://msdn.microsoft.com/en-us/library/windows/desktop/dd373814.aspx
     972             :  */
     973             : static char *
     974             : IsoLocaleName(const char *winlocname)
     975             : {
     976             : #if (_MSC_VER >= 1400)           /* VC8.0 or later */
     977             :     static char iso_lc_messages[32];
     978             :     _locale_t   loct = NULL;
     979             : 
     980             :     if (pg_strcasecmp("c", winlocname) == 0 ||
     981             :         pg_strcasecmp("posix", winlocname) == 0)
     982             :     {
     983             :         strcpy(iso_lc_messages, "C");
     984             :         return iso_lc_messages;
     985             :     }
     986             : 
     987             :     loct = _create_locale(LC_CTYPE, winlocname);
     988             :     if (loct != NULL)
     989             :     {
     990             : #if (_MSC_VER >= 1700)           /* Visual Studio 2012 or later */
     991             :         size_t      rc;
     992             :         char       *hyphen;
     993             : 
     994             :         /* Locale names use only ASCII, any conversion locale suffices. */
     995             :         rc = wchar2char(iso_lc_messages, loct->locinfo->locale_name[LC_CTYPE],
     996             :                         sizeof(iso_lc_messages), NULL);
     997             :         _free_locale(loct);
     998             :         if (rc == -1 || rc == sizeof(iso_lc_messages))
     999             :             return NULL;
    1000             : 
    1001             :         /*
    1002             :          * Since the message catalogs sit on a case-insensitive filesystem, we
    1003             :          * need not standardize letter case here.  So long as we do not ship
    1004             :          * message catalogs for which it would matter, we also need not
    1005             :          * translate the script/variant portion, e.g. uz-Cyrl-UZ to
    1006             :          * uz_UZ@cyrillic.  Simply replace the hyphen with an underscore.
    1007             :          *
    1008             :          * Note that the locale name can be less-specific than the value we
    1009             :          * would derive under earlier Visual Studio releases.  For example,
    1010             :          * French_France.1252 yields just "fr".  This does not affect any of
    1011             :          * the country-specific message catalogs available as of this writing
    1012             :          * (pt_BR, zh_CN, zh_TW).
    1013             :          */
    1014             :         hyphen = strchr(iso_lc_messages, '-');
    1015             :         if (hyphen)
    1016             :             *hyphen = '_';
    1017             : #else
    1018             :         char        isolang[32],
    1019             :                     isocrty[32];
    1020             :         LCID        lcid;
    1021             : 
    1022             :         lcid = loct->locinfo->lc_handle[LC_CTYPE];
    1023             :         if (lcid == 0)
    1024             :             lcid = MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT);
    1025             :         _free_locale(loct);
    1026             : 
    1027             :         if (!GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, isolang, sizeof(isolang)))
    1028             :             return NULL;
    1029             :         if (!GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, isocrty, sizeof(isocrty)))
    1030             :             return NULL;
    1031             :         snprintf(iso_lc_messages, sizeof(iso_lc_messages) - 1, "%s_%s", isolang, isocrty);
    1032             : #endif
    1033             :         return iso_lc_messages;
    1034             :     }
    1035             :     return NULL;
    1036             : #else
    1037             :     return NULL;                /* Not supported on this version of msvc/mingw */
    1038             : #endif                          /* _MSC_VER >= 1400 */
    1039             : }
    1040             : #endif                          /* WIN32 && LC_MESSAGES */
    1041             : 
    1042             : 
    1043             : /*
    1044             :  * Detect aging strxfrm() implementations that, in a subset of locales, write
    1045             :  * past the specified buffer length.  Affected users must update OS packages
    1046             :  * before using PostgreSQL 9.5 or later.
    1047             :  *
    1048             :  * Assume that the bug can come and go from one postmaster startup to another
    1049             :  * due to physical replication among diverse machines.  Assume that the bug's
    1050             :  * presence will not change during the life of a particular postmaster.  Given
    1051             :  * those assumptions, call this no less than once per postmaster startup per
    1052             :  * LC_COLLATE setting used.  No known-affected system offers strxfrm_l(), so
    1053             :  * there is no need to consider pg_collation locales.
    1054             :  */
    1055             : void
    1056       10970 : check_strxfrm_bug(void)
    1057             : {
    1058             :     char        buf[32];
    1059       10970 :     const int   canary = 0x7F;
    1060       10970 :     bool        ok = true;
    1061             : 
    1062             :     /*
    1063             :      * Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10
    1064             :      * 05/08 returns 18 and modifies 10 bytes.  It respects limits above or
    1065             :      * below that range.
    1066             :      *
    1067             :      * The bug is present in Solaris 8 as well; it is absent in Solaris 10
    1068             :      * 01/13 and Solaris 11.2.  Affected locales include is_IS.ISO8859-1,
    1069             :      * en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R.  Unaffected locales
    1070             :      * include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C.
    1071             :      */
    1072       10970 :     buf[7] = canary;
    1073       10970 :     (void) strxfrm(buf, "ab", 7);
    1074       10970 :     if (buf[7] != canary)
    1075           0 :         ok = false;
    1076             : 
    1077             :     /*
    1078             :      * illumos bug #1594 was present in the source tree from 2010-10-11 to
    1079             :      * 2012-02-01.  Given an ASCII string of any length and length limit 1,
    1080             :      * affected systems ignore the length limit and modify a number of bytes
    1081             :      * one less than the return value.  The problem inputs for this bug do not
    1082             :      * overlap those for the Solaris bug, hence a distinct test.
    1083             :      *
    1084             :      * Affected systems include smartos-20110926T021612Z.  Affected locales
    1085             :      * include en_US.ISO8859-1 and en_US.UTF-8.  Unaffected locales include C.
    1086             :      */
    1087       10970 :     buf[1] = canary;
    1088       10970 :     (void) strxfrm(buf, "a", 1);
    1089       10970 :     if (buf[1] != canary)
    1090           0 :         ok = false;
    1091             : 
    1092       10970 :     if (!ok)
    1093           0 :         ereport(ERROR,
    1094             :                 (errcode(ERRCODE_SYSTEM_ERROR),
    1095             :                  errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length",
    1096             :                                  setlocale(LC_COLLATE, NULL)),
    1097             :                  errhint("Apply system library package updates.")));
    1098       10970 : }
    1099             : 
    1100             : 
    1101             : /*
    1102             :  * Cache mechanism for collation information.
    1103             :  *
    1104             :  * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
    1105             :  * (or POSIX), so we can optimize a few code paths in various places.
    1106             :  * For the built-in C and POSIX collations, we can know that without even
    1107             :  * doing a cache lookup, but we want to support aliases for C/POSIX too.
    1108             :  * For the "default" collation, there are separate static cache variables,
    1109             :  * since consulting the pg_collation catalog doesn't tell us what we need.
    1110             :  *
    1111             :  * Also, if a pg_locale_t has been requested for a collation, we cache that
    1112             :  * for the life of a backend.
    1113             :  *
    1114             :  * Note that some code relies on the flags not reporting false negatives
    1115             :  * (that is, saying it's not C when it is).  For example, char2wchar()
    1116             :  * could fail if the locale is C, so str_tolower() shouldn't call it
    1117             :  * in that case.
    1118             :  *
    1119             :  * Note that we currently lack any way to flush the cache.  Since we don't
    1120             :  * support ALTER COLLATION, this is OK.  The worst case is that someone
    1121             :  * drops a collation, and a useless cache entry hangs around in existing
    1122             :  * backends.
    1123             :  */
    1124             : 
    1125             : static collation_cache_entry *
    1126          24 : lookup_collation_cache(Oid collation, bool set_flags)
    1127             : {
    1128             :     collation_cache_entry *cache_entry;
    1129             :     bool        found;
    1130             : 
    1131             :     Assert(OidIsValid(collation));
    1132             :     Assert(collation != DEFAULT_COLLATION_OID);
    1133             : 
    1134          24 :     if (collation_cache == NULL)
    1135             :     {
    1136             :         /* First time through, initialize the hash table */
    1137             :         HASHCTL     ctl;
    1138             : 
    1139           8 :         memset(&ctl, 0, sizeof(ctl));
    1140           8 :         ctl.keysize = sizeof(Oid);
    1141           8 :         ctl.entrysize = sizeof(collation_cache_entry);
    1142           8 :         collation_cache = hash_create("Collation cache", 100, &ctl,
    1143             :                                       HASH_ELEM | HASH_BLOBS);
    1144             :     }
    1145             : 
    1146          24 :     cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
    1147          24 :     if (!found)
    1148             :     {
    1149             :         /*
    1150             :          * Make sure cache entry is marked invalid, in case we fail before
    1151             :          * setting things.
    1152             :          */
    1153          12 :         cache_entry->flags_valid = false;
    1154          12 :         cache_entry->locale = 0;
    1155             :     }
    1156             : 
    1157          24 :     if (set_flags && !cache_entry->flags_valid)
    1158             :     {
    1159             :         /* Attempt to set the flags */
    1160             :         HeapTuple   tp;
    1161             :         Form_pg_collation collform;
    1162             :         const char *collcollate;
    1163             :         const char *collctype;
    1164             : 
    1165          12 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
    1166          12 :         if (!HeapTupleIsValid(tp))
    1167           0 :             elog(ERROR, "cache lookup failed for collation %u", collation);
    1168          12 :         collform = (Form_pg_collation) GETSTRUCT(tp);
    1169             : 
    1170          12 :         collcollate = NameStr(collform->collcollate);
    1171          12 :         collctype = NameStr(collform->collctype);
    1172             : 
    1173          16 :         cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
    1174           4 :                                      (strcmp(collcollate, "POSIX") == 0));
    1175          16 :         cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
    1176           4 :                                    (strcmp(collctype, "POSIX") == 0));
    1177             : 
    1178          12 :         cache_entry->flags_valid = true;
    1179             : 
    1180          12 :         ReleaseSysCache(tp);
    1181             :     }
    1182             : 
    1183          24 :     return cache_entry;
    1184             : }
    1185             : 
    1186             : 
    1187             : /*
    1188             :  * Detect whether collation's LC_COLLATE property is C
    1189             :  */
    1190             : bool
    1191     9216978 : lc_collate_is_c(Oid collation)
    1192             : {
    1193             :     /*
    1194             :      * If we're asked about "collation 0", return false, so that the code will
    1195             :      * go into the non-C path and report that the collation is bogus.
    1196             :      */
    1197     9216978 :     if (!OidIsValid(collation))
    1198           0 :         return false;
    1199             : 
    1200             :     /*
    1201             :      * If we're asked about the default collation, we have to inquire of the C
    1202             :      * library.  Cache the result so we only have to compute it once.
    1203             :      */
    1204     9216978 :     if (collation == DEFAULT_COLLATION_OID)
    1205             :     {
    1206             :         static int  result = -1;
    1207             :         char       *localeptr;
    1208             : 
    1209     6421604 :         if (result >= 0)
    1210     6420016 :             return (bool) result;
    1211        1588 :         localeptr = setlocale(LC_COLLATE, NULL);
    1212        1588 :         if (!localeptr)
    1213           0 :             elog(ERROR, "invalid LC_COLLATE setting");
    1214             : 
    1215        1588 :         if (strcmp(localeptr, "C") == 0)
    1216          40 :             result = true;
    1217        1548 :         else if (strcmp(localeptr, "POSIX") == 0)
    1218           0 :             result = true;
    1219             :         else
    1220        1548 :             result = false;
    1221        1588 :         return (bool) result;
    1222             :     }
    1223             : 
    1224             :     /*
    1225             :      * If we're asked about the built-in C/POSIX collations, we know that.
    1226             :      */
    1227     2795374 :     if (collation == C_COLLATION_OID ||
    1228             :         collation == POSIX_COLLATION_OID)
    1229     2795362 :         return true;
    1230             : 
    1231             :     /*
    1232             :      * Otherwise, we have to consult pg_collation, but we cache that.
    1233             :      */
    1234          12 :     return (lookup_collation_cache(collation, true))->collate_is_c;
    1235             : }
    1236             : 
    1237             : /*
    1238             :  * Detect whether collation's LC_CTYPE property is C
    1239             :  */
    1240             : bool
    1241     3073530 : lc_ctype_is_c(Oid collation)
    1242             : {
    1243             :     /*
    1244             :      * If we're asked about "collation 0", return false, so that the code will
    1245             :      * go into the non-C path and report that the collation is bogus.
    1246             :      */
    1247     3073530 :     if (!OidIsValid(collation))
    1248           0 :         return false;
    1249             : 
    1250             :     /*
    1251             :      * If we're asked about the default collation, we have to inquire of the C
    1252             :      * library.  Cache the result so we only have to compute it once.
    1253             :      */
    1254     3073530 :     if (collation == DEFAULT_COLLATION_OID)
    1255             :     {
    1256             :         static int  result = -1;
    1257             :         char       *localeptr;
    1258             : 
    1259     1738226 :         if (result >= 0)
    1260     1737568 :             return (bool) result;
    1261         658 :         localeptr = setlocale(LC_CTYPE, NULL);
    1262         658 :         if (!localeptr)
    1263           0 :             elog(ERROR, "invalid LC_CTYPE setting");
    1264             : 
    1265         658 :         if (strcmp(localeptr, "C") == 0)
    1266          18 :             result = true;
    1267         640 :         else if (strcmp(localeptr, "POSIX") == 0)
    1268           0 :             result = true;
    1269             :         else
    1270         640 :             result = false;
    1271         658 :         return (bool) result;
    1272             :     }
    1273             : 
    1274             :     /*
    1275             :      * If we're asked about the built-in C/POSIX collations, we know that.
    1276             :      */
    1277     1335304 :     if (collation == C_COLLATION_OID ||
    1278             :         collation == POSIX_COLLATION_OID)
    1279     1335292 :         return true;
    1280             : 
    1281             :     /*
    1282             :      * Otherwise, we have to consult pg_collation, but we cache that.
    1283             :      */
    1284          12 :     return (lookup_collation_cache(collation, true))->ctype_is_c;
    1285             : }
    1286             : 
    1287             : 
    1288             : /* simple subroutine for reporting errors from newlocale() */
    1289             : #ifdef HAVE_LOCALE_T
    1290             : static void
    1291           0 : report_newlocale_failure(const char *localename)
    1292             : {
    1293             :     int         save_errno;
    1294             : 
    1295             :     /*
    1296             :      * Windows doesn't provide any useful error indication from
    1297             :      * _create_locale(), and BSD-derived platforms don't seem to feel they
    1298             :      * need to set errno either (even though POSIX is pretty clear that
    1299             :      * newlocale should do so).  So, if errno hasn't been set, assume ENOENT
    1300             :      * is what to report.
    1301             :      */
    1302           0 :     if (errno == 0)
    1303           0 :         errno = ENOENT;
    1304             : 
    1305             :     /*
    1306             :      * ENOENT means "no such locale", not "no such file", so clarify that
    1307             :      * errno with an errdetail message.
    1308             :      */
    1309           0 :     save_errno = errno;         /* auxiliary funcs might change errno */
    1310           0 :     ereport(ERROR,
    1311             :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1312             :              errmsg("could not create locale \"%s\": %m",
    1313             :                     localename),
    1314             :              (save_errno == ENOENT ?
    1315             :               errdetail("The operating system could not find any locale data for the locale name \"%s\".",
    1316             :                         localename) : 0)));
    1317             : }
    1318             : #endif                          /* HAVE_LOCALE_T */
    1319             : 
    1320             : 
    1321             : /*
    1322             :  * Create a locale_t from a collation OID.  Results are cached for the
    1323             :  * lifetime of the backend.  Thus, do not free the result with freelocale().
    1324             :  *
    1325             :  * As a special optimization, the default/database collation returns 0.
    1326             :  * Callers should then revert to the non-locale_t-enabled code path.
    1327             :  * In fact, they shouldn't call this function at all when they are dealing
    1328             :  * with the default locale.  That can save quite a bit in hotspots.
    1329             :  * Also, callers should avoid calling this before going down a C/POSIX
    1330             :  * fastpath, because such a fastpath should work even on platforms without
    1331             :  * locale_t support in the C library.
    1332             :  *
    1333             :  * For simplicity, we always generate COLLATE + CTYPE even though we
    1334             :  * might only need one of them.  Since this is called only once per session,
    1335             :  * it shouldn't cost much.
    1336             :  */
    1337             : pg_locale_t
    1338           0 : pg_newlocale_from_collation(Oid collid)
    1339             : {
    1340             :     collation_cache_entry *cache_entry;
    1341             : 
    1342             :     /* Callers must pass a valid OID */
    1343             :     Assert(OidIsValid(collid));
    1344             : 
    1345             :     /* Return 0 for "default" collation, just in case caller forgets */
    1346           0 :     if (collid == DEFAULT_COLLATION_OID)
    1347           0 :         return (pg_locale_t) 0;
    1348             : 
    1349           0 :     cache_entry = lookup_collation_cache(collid, false);
    1350             : 
    1351           0 :     if (cache_entry->locale == 0)
    1352             :     {
    1353             :         /* We haven't computed this yet in this session, so do it */
    1354             :         HeapTuple   tp;
    1355             :         Form_pg_collation collform;
    1356             :         const char *collcollate;
    1357             :         const char *collctype pg_attribute_unused();
    1358             :         struct pg_locale_struct result;
    1359             :         pg_locale_t resultp;
    1360             :         Datum       collversion;
    1361             :         bool        isnull;
    1362             : 
    1363           0 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
    1364           0 :         if (!HeapTupleIsValid(tp))
    1365           0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
    1366           0 :         collform = (Form_pg_collation) GETSTRUCT(tp);
    1367             : 
    1368           0 :         collcollate = NameStr(collform->collcollate);
    1369           0 :         collctype = NameStr(collform->collctype);
    1370             : 
    1371             :         /* We'll fill in the result struct locally before allocating memory */
    1372           0 :         memset(&result, 0, sizeof(result));
    1373           0 :         result.provider = collform->collprovider;
    1374           0 :         result.deterministic = collform->collisdeterministic;
    1375             : 
    1376           0 :         if (collform->collprovider == COLLPROVIDER_LIBC)
    1377             :         {
    1378             : #ifdef HAVE_LOCALE_T
    1379             :             locale_t    loc;
    1380             : 
    1381           0 :             if (strcmp(collcollate, collctype) == 0)
    1382             :             {
    1383             :                 /* Normal case where they're the same */
    1384           0 :                 errno = 0;
    1385             : #ifndef WIN32
    1386           0 :                 loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
    1387             :                                 NULL);
    1388             : #else
    1389             :                 loc = _create_locale(LC_ALL, collcollate);
    1390             : #endif
    1391           0 :                 if (!loc)
    1392           0 :                     report_newlocale_failure(collcollate);
    1393             :             }
    1394             :             else
    1395             :             {
    1396             : #ifndef WIN32
    1397             :                 /* We need two newlocale() steps */
    1398             :                 locale_t    loc1;
    1399             : 
    1400           0 :                 errno = 0;
    1401           0 :                 loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
    1402           0 :                 if (!loc1)
    1403           0 :                     report_newlocale_failure(collcollate);
    1404           0 :                 errno = 0;
    1405           0 :                 loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
    1406           0 :                 if (!loc)
    1407           0 :                     report_newlocale_failure(collctype);
    1408             : #else
    1409             : 
    1410             :                 /*
    1411             :                  * XXX The _create_locale() API doesn't appear to support
    1412             :                  * this. Could perhaps be worked around by changing
    1413             :                  * pg_locale_t to contain two separate fields.
    1414             :                  */
    1415             :                 ereport(ERROR,
    1416             :                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1417             :                          errmsg("collations with different collate and ctype values are not supported on this platform")));
    1418             : #endif
    1419             :             }
    1420             : 
    1421           0 :             result.info.lt = loc;
    1422             : #else                           /* not HAVE_LOCALE_T */
    1423             :             /* platform that doesn't support locale_t */
    1424             :             ereport(ERROR,
    1425             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1426             :                      errmsg("collation provider LIBC is not supported on this platform")));
    1427             : #endif                          /* not HAVE_LOCALE_T */
    1428             :         }
    1429           0 :         else if (collform->collprovider == COLLPROVIDER_ICU)
    1430             :         {
    1431             : #ifdef USE_ICU
    1432             :             UCollator  *collator;
    1433             :             UErrorCode  status;
    1434             : 
    1435             :             if (strcmp(collcollate, collctype) != 0)
    1436             :                 ereport(ERROR,
    1437             :                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1438             :                          errmsg("collations with different collate and ctype values are not supported by ICU")));
    1439             : 
    1440             :             status = U_ZERO_ERROR;
    1441             :             collator = ucol_open(collcollate, &status);
    1442             :             if (U_FAILURE(status))
    1443             :                 ereport(ERROR,
    1444             :                         (errmsg("could not open collator for locale \"%s\": %s",
    1445             :                                 collcollate, u_errorName(status))));
    1446             : 
    1447             :             if (U_ICU_VERSION_MAJOR_NUM < 54)
    1448             :                 icu_set_collation_attributes(collator, collcollate);
    1449             : 
    1450             :             /* We will leak this string if we get an error below :-( */
    1451             :             result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
    1452             :                                                          collcollate);
    1453             :             result.info.icu.ucol = collator;
    1454             : #else                           /* not USE_ICU */
    1455             :             /* could get here if a collation was created by a build with ICU */
    1456           0 :             ereport(ERROR,
    1457             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1458             :                      errmsg("ICU is not supported in this build"), \
    1459             :                      errhint("You need to rebuild PostgreSQL using --with-icu.")));
    1460             : #endif                          /* not USE_ICU */
    1461             :         }
    1462             : 
    1463           0 :         collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
    1464             :                                       &isnull);
    1465           0 :         if (!isnull)
    1466             :         {
    1467             :             char       *actual_versionstr;
    1468             :             char       *collversionstr;
    1469             : 
    1470           0 :             actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate);
    1471           0 :             if (!actual_versionstr)
    1472             :             {
    1473             :                 /*
    1474             :                  * This could happen when specifying a version in CREATE
    1475             :                  * COLLATION for a libc locale, or manually creating a mess in
    1476             :                  * the catalogs.
    1477             :                  */
    1478           0 :                 ereport(ERROR,
    1479             :                         (errmsg("collation \"%s\" has no actual version, but a version was specified",
    1480             :                                 NameStr(collform->collname))));
    1481             :             }
    1482           0 :             collversionstr = TextDatumGetCString(collversion);
    1483             : 
    1484           0 :             if (strcmp(actual_versionstr, collversionstr) != 0)
    1485           0 :                 ereport(WARNING,
    1486             :                         (errmsg("collation \"%s\" has version mismatch",
    1487             :                                 NameStr(collform->collname)),
    1488             :                          errdetail("The collation in the database was created using version %s, "
    1489             :                                    "but the operating system provides version %s.",
    1490             :                                    collversionstr, actual_versionstr),
    1491             :                          errhint("Rebuild all objects affected by this collation and run "
    1492             :                                  "ALTER COLLATION %s REFRESH VERSION, "
    1493             :                                  "or build PostgreSQL with the right library version.",
    1494             :                                  quote_qualified_identifier(get_namespace_name(collform->collnamespace),
    1495             :                                                             NameStr(collform->collname)))));
    1496             :         }
    1497             : 
    1498           0 :         ReleaseSysCache(tp);
    1499             : 
    1500             :         /* We'll keep the pg_locale_t structures in TopMemoryContext */
    1501           0 :         resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp));
    1502           0 :         *resultp = result;
    1503             : 
    1504           0 :         cache_entry->locale = resultp;
    1505             :     }
    1506             : 
    1507           0 :     return cache_entry->locale;
    1508             : }
    1509             : 
    1510             : /*
    1511             :  * Get provider-specific collation version string for the given collation from
    1512             :  * the operating system/library.
    1513             :  *
    1514             :  * A particular provider must always either return a non-NULL string or return
    1515             :  * NULL (if it doesn't support versions).  It must not return NULL for some
    1516             :  * collcollate and not NULL for others.
    1517             :  */
    1518             : char *
    1519        1284 : get_collation_actual_version(char collprovider, const char *collcollate)
    1520             : {
    1521             :     char       *collversion;
    1522             : 
    1523             : #ifdef USE_ICU
    1524             :     if (collprovider == COLLPROVIDER_ICU)
    1525             :     {
    1526             :         UCollator  *collator;
    1527             :         UErrorCode  status;
    1528             :         UVersionInfo versioninfo;
    1529             :         char        buf[U_MAX_VERSION_STRING_LENGTH];
    1530             : 
    1531             :         status = U_ZERO_ERROR;
    1532             :         collator = ucol_open(collcollate, &status);
    1533             :         if (U_FAILURE(status))
    1534             :             ereport(ERROR,
    1535             :                     (errmsg("could not open collator for locale \"%s\": %s",
    1536             :                             collcollate, u_errorName(status))));
    1537             :         ucol_getVersion(collator, versioninfo);
    1538             :         ucol_close(collator);
    1539             : 
    1540             :         u_versionToString(versioninfo, buf);
    1541             :         collversion = pstrdup(buf);
    1542             :     }
    1543             :     else
    1544             : #endif
    1545        1284 :         collversion = NULL;
    1546             : 
    1547        1284 :     return collversion;
    1548             : }
    1549             : 
    1550             : 
    1551             : #ifdef USE_ICU
    1552             : /*
    1553             :  * Converter object for converting between ICU's UChar strings and C strings
    1554             :  * in database encoding.  Since the database encoding doesn't change, we only
    1555             :  * need one of these per session.
    1556             :  */
    1557             : static UConverter *icu_converter = NULL;
    1558             : 
    1559             : static void
    1560             : init_icu_converter(void)
    1561             : {
    1562             :     const char *icu_encoding_name;
    1563             :     UErrorCode  status;
    1564             :     UConverter *conv;
    1565             : 
    1566             :     if (icu_converter)
    1567             :         return;
    1568             : 
    1569             :     icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
    1570             : 
    1571             :     status = U_ZERO_ERROR;
    1572             :     conv = ucnv_open(icu_encoding_name, &status);
    1573             :     if (U_FAILURE(status))
    1574             :         ereport(ERROR,
    1575             :                 (errmsg("could not open ICU converter for encoding \"%s\": %s",
    1576             :                         icu_encoding_name, u_errorName(status))));
    1577             : 
    1578             :     icu_converter = conv;
    1579             : }
    1580             : 
    1581             : /*
    1582             :  * Convert a string in the database encoding into a string of UChars.
    1583             :  *
    1584             :  * The source string at buff is of length nbytes
    1585             :  * (it needn't be nul-terminated)
    1586             :  *
    1587             :  * *buff_uchar receives a pointer to the palloc'd result string, and
    1588             :  * the function's result is the number of UChars generated.
    1589             :  *
    1590             :  * The result string is nul-terminated, though most callers rely on the
    1591             :  * result length instead.
    1592             :  */
    1593             : int32_t
    1594             : icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
    1595             : {
    1596             :     UErrorCode  status;
    1597             :     int32_t     len_uchar;
    1598             : 
    1599             :     init_icu_converter();
    1600             : 
    1601             :     status = U_ZERO_ERROR;
    1602             :     len_uchar = ucnv_toUChars(icu_converter, NULL, 0,
    1603             :                               buff, nbytes, &status);
    1604             :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
    1605             :         ereport(ERROR,
    1606             :                 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
    1607             : 
    1608             :     *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
    1609             : 
    1610             :     status = U_ZERO_ERROR;
    1611             :     len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar + 1,
    1612             :                               buff, nbytes, &status);
    1613             :     if (U_FAILURE(status))
    1614             :         ereport(ERROR,
    1615             :                 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
    1616             : 
    1617             :     return len_uchar;
    1618             : }
    1619             : 
    1620             : /*
    1621             :  * Convert a string of UChars into the database encoding.
    1622             :  *
    1623             :  * The source string at buff_uchar is of length len_uchar
    1624             :  * (it needn't be nul-terminated)
    1625             :  *
    1626             :  * *result receives a pointer to the palloc'd result string, and the
    1627             :  * function's result is the number of bytes generated (not counting nul).
    1628             :  *
    1629             :  * The result string is nul-terminated.
    1630             :  */
    1631             : int32_t
    1632             : icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
    1633             : {
    1634             :     UErrorCode  status;
    1635             :     int32_t     len_result;
    1636             : 
    1637             :     init_icu_converter();
    1638             : 
    1639             :     status = U_ZERO_ERROR;
    1640             :     len_result = ucnv_fromUChars(icu_converter, NULL, 0,
    1641             :                                  buff_uchar, len_uchar, &status);
    1642             :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
    1643             :         ereport(ERROR,
    1644             :                 (errmsg("%s failed: %s", "ucnv_fromUChars",
    1645             :                         u_errorName(status))));
    1646             : 
    1647             :     *result = palloc(len_result + 1);
    1648             : 
    1649             :     status = U_ZERO_ERROR;
    1650             :     len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
    1651             :                                  buff_uchar, len_uchar, &status);
    1652             :     if (U_FAILURE(status))
    1653             :         ereport(ERROR,
    1654             :                 (errmsg("%s failed: %s", "ucnv_fromUChars",
    1655             :                         u_errorName(status))));
    1656             : 
    1657             :     return len_result;
    1658             : }
    1659             : 
    1660             : /*
    1661             :  * Parse collation attributes and apply them to the open collator.  This takes
    1662             :  * a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
    1663             :  * applies the key-value arguments.
    1664             :  *
    1665             :  * Starting with ICU version 54, the attributes are processed automatically by
    1666             :  * ucol_open(), so this is only necessary for emulating this behavior on older
    1667             :  * versions.
    1668             :  */
    1669             : pg_attribute_unused()
    1670             : static void
    1671             : icu_set_collation_attributes(UCollator *collator, const char *loc)
    1672             : {
    1673             :     char       *str = asc_tolower(loc, strlen(loc));
    1674             : 
    1675             :     str = strchr(str, '@');
    1676             :     if (!str)
    1677             :         return;
    1678             :     str++;
    1679             : 
    1680             :     for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
    1681             :     {
    1682             :         char       *e = strchr(token, '=');
    1683             : 
    1684             :         if (e)
    1685             :         {
    1686             :             char       *name;
    1687             :             char       *value;
    1688             :             UColAttribute uattr;
    1689             :             UColAttributeValue uvalue;
    1690             :             UErrorCode  status;
    1691             : 
    1692             :             status = U_ZERO_ERROR;
    1693             : 
    1694             :             *e = '\0';
    1695             :             name = token;
    1696             :             value = e + 1;
    1697             : 
    1698             :             /*
    1699             :              * See attribute name and value lists in ICU i18n/coll.cpp
    1700             :              */
    1701             :             if (strcmp(name, "colstrength") == 0)
    1702             :                 uattr = UCOL_STRENGTH;
    1703             :             else if (strcmp(name, "colbackwards") == 0)
    1704             :                 uattr = UCOL_FRENCH_COLLATION;
    1705             :             else if (strcmp(name, "colcaselevel") == 0)
    1706             :                 uattr = UCOL_CASE_LEVEL;
    1707             :             else if (strcmp(name, "colcasefirst") == 0)
    1708             :                 uattr = UCOL_CASE_FIRST;
    1709             :             else if (strcmp(name, "colalternate") == 0)
    1710             :                 uattr = UCOL_ALTERNATE_HANDLING;
    1711             :             else if (strcmp(name, "colnormalization") == 0)
    1712             :                 uattr = UCOL_NORMALIZATION_MODE;
    1713             :             else if (strcmp(name, "colnumeric") == 0)
    1714             :                 uattr = UCOL_NUMERIC_COLLATION;
    1715             :             else
    1716             :                 /* ignore if unknown */
    1717             :                 continue;
    1718             : 
    1719             :             if (strcmp(value, "primary") == 0)
    1720             :                 uvalue = UCOL_PRIMARY;
    1721             :             else if (strcmp(value, "secondary") == 0)
    1722             :                 uvalue = UCOL_SECONDARY;
    1723             :             else if (strcmp(value, "tertiary") == 0)
    1724             :                 uvalue = UCOL_TERTIARY;
    1725             :             else if (strcmp(value, "quaternary") == 0)
    1726             :                 uvalue = UCOL_QUATERNARY;
    1727             :             else if (strcmp(value, "identical") == 0)
    1728             :                 uvalue = UCOL_IDENTICAL;
    1729             :             else if (strcmp(value, "no") == 0)
    1730             :                 uvalue = UCOL_OFF;
    1731             :             else if (strcmp(value, "yes") == 0)
    1732             :                 uvalue = UCOL_ON;
    1733             :             else if (strcmp(value, "shifted") == 0)
    1734             :                 uvalue = UCOL_SHIFTED;
    1735             :             else if (strcmp(value, "non-ignorable") == 0)
    1736             :                 uvalue = UCOL_NON_IGNORABLE;
    1737             :             else if (strcmp(value, "lower") == 0)
    1738             :                 uvalue = UCOL_LOWER_FIRST;
    1739             :             else if (strcmp(value, "upper") == 0)
    1740             :                 uvalue = UCOL_UPPER_FIRST;
    1741             :             else
    1742             :                 status = U_ILLEGAL_ARGUMENT_ERROR;
    1743             : 
    1744             :             if (status == U_ZERO_ERROR)
    1745             :                 ucol_setAttribute(collator, uattr, uvalue, &status);
    1746             : 
    1747             :             /*
    1748             :              * Pretend the error came from ucol_open(), for consistent error
    1749             :              * message across ICU versions.
    1750             :              */
    1751             :             if (U_FAILURE(status))
    1752             :                 ereport(ERROR,
    1753             :                         (errmsg("could not open collator for locale \"%s\": %s",
    1754             :                                 loc, u_errorName(status))));
    1755             :         }
    1756             :     }
    1757             : }
    1758             : 
    1759             : #endif                          /* USE_ICU */
    1760             : 
    1761             : /*
    1762             :  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
    1763             :  * Therefore we keep them here rather than with the mbutils code.
    1764             :  */
    1765             : 
    1766             : /*
    1767             :  * wchar2char --- convert wide characters to multibyte format
    1768             :  *
    1769             :  * This has the same API as the standard wcstombs_l() function; in particular,
    1770             :  * tolen is the maximum number of bytes to store at *to, and *from must be
    1771             :  * zero-terminated.  The output will be zero-terminated iff there is room.
    1772             :  */
    1773             : size_t
    1774      265606 : wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
    1775             : {
    1776             :     size_t      result;
    1777             : 
    1778             :     Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
    1779             : 
    1780      265606 :     if (tolen == 0)
    1781           0 :         return 0;
    1782             : 
    1783             : #ifdef WIN32
    1784             : 
    1785             :     /*
    1786             :      * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
    1787             :      * for some reason mbstowcs and wcstombs won't do this for us, so we use
    1788             :      * MultiByteToWideChar().
    1789             :      */
    1790             :     if (GetDatabaseEncoding() == PG_UTF8)
    1791             :     {
    1792             :         result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
    1793             :                                      NULL, NULL);
    1794             :         /* A zero return is failure */
    1795             :         if (result <= 0)
    1796             :             result = -1;
    1797             :         else
    1798             :         {
    1799             :             Assert(result <= tolen);
    1800             :             /* Microsoft counts the zero terminator in the result */
    1801             :             result--;
    1802             :         }
    1803             :     }
    1804             :     else
    1805             : #endif                          /* WIN32 */
    1806      265606 :     if (locale == (pg_locale_t) 0)
    1807             :     {
    1808             :         /* Use wcstombs directly for the default locale */
    1809      265606 :         result = wcstombs(to, from, tolen);
    1810             :     }
    1811             :     else
    1812             :     {
    1813             : #ifdef HAVE_LOCALE_T
    1814             : #ifdef HAVE_WCSTOMBS_L
    1815             :         /* Use wcstombs_l for nondefault locales */
    1816             :         result = wcstombs_l(to, from, tolen, locale->info.lt);
    1817             : #else                           /* !HAVE_WCSTOMBS_L */
    1818             :         /* We have to temporarily set the locale as current ... ugh */
    1819           0 :         locale_t    save_locale = uselocale(locale->info.lt);
    1820             : 
    1821           0 :         result = wcstombs(to, from, tolen);
    1822             : 
    1823           0 :         uselocale(save_locale);
    1824             : #endif                          /* HAVE_WCSTOMBS_L */
    1825             : #else                           /* !HAVE_LOCALE_T */
    1826             :         /* Can't have locale != 0 without HAVE_LOCALE_T */
    1827             :         elog(ERROR, "wcstombs_l is not available");
    1828             :         result = 0;             /* keep compiler quiet */
    1829             : #endif                          /* HAVE_LOCALE_T */
    1830             :     }
    1831             : 
    1832      265606 :     return result;
    1833             : }
    1834             : 
    1835             : /*
    1836             :  * char2wchar --- convert multibyte characters to wide characters
    1837             :  *
    1838             :  * This has almost the API of mbstowcs_l(), except that *from need not be
    1839             :  * null-terminated; instead, the number of input bytes is specified as
    1840             :  * fromlen.  Also, we ereport() rather than returning -1 for invalid
    1841             :  * input encoding.  tolen is the maximum number of wchar_t's to store at *to.
    1842             :  * The output will be zero-terminated iff there is room.
    1843             :  */
    1844             : size_t
    1845      268678 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
    1846             :            pg_locale_t locale)
    1847             : {
    1848             :     size_t      result;
    1849             : 
    1850             :     Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
    1851             : 
    1852      268678 :     if (tolen == 0)
    1853           0 :         return 0;
    1854             : 
    1855             : #ifdef WIN32
    1856             :     /* See WIN32 "Unicode" comment above */
    1857             :     if (GetDatabaseEncoding() == PG_UTF8)
    1858             :     {
    1859             :         /* Win32 API does not work for zero-length input */
    1860             :         if (fromlen == 0)
    1861             :             result = 0;
    1862             :         else
    1863             :         {
    1864             :             result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
    1865             :             /* A zero return is failure */
    1866             :             if (result == 0)
    1867             :                 result = -1;
    1868             :         }
    1869             : 
    1870             :         if (result != -1)
    1871             :         {
    1872             :             Assert(result < tolen);
    1873             :             /* Append trailing null wchar (MultiByteToWideChar() does not) */
    1874             :             to[result] = 0;
    1875             :         }
    1876             :     }
    1877             :     else
    1878             : #endif                          /* WIN32 */
    1879             :     {
    1880             :         /* mbstowcs requires ending '\0' */
    1881      268678 :         char       *str = pnstrdup(from, fromlen);
    1882             : 
    1883      268678 :         if (locale == (pg_locale_t) 0)
    1884             :         {
    1885             :             /* Use mbstowcs directly for the default locale */
    1886      268678 :             result = mbstowcs(to, str, tolen);
    1887             :         }
    1888             :         else
    1889             :         {
    1890             : #ifdef HAVE_LOCALE_T
    1891             : #ifdef HAVE_MBSTOWCS_L
    1892             :             /* Use mbstowcs_l for nondefault locales */
    1893             :             result = mbstowcs_l(to, str, tolen, locale->info.lt);
    1894             : #else                           /* !HAVE_MBSTOWCS_L */
    1895             :             /* We have to temporarily set the locale as current ... ugh */
    1896           0 :             locale_t    save_locale = uselocale(locale->info.lt);
    1897             : 
    1898           0 :             result = mbstowcs(to, str, tolen);
    1899             : 
    1900           0 :             uselocale(save_locale);
    1901             : #endif                          /* HAVE_MBSTOWCS_L */
    1902             : #else                           /* !HAVE_LOCALE_T */
    1903             :             /* Can't have locale != 0 without HAVE_LOCALE_T */
    1904             :             elog(ERROR, "mbstowcs_l is not available");
    1905             :             result = 0;         /* keep compiler quiet */
    1906             : #endif                          /* HAVE_LOCALE_T */
    1907             :         }
    1908             : 
    1909      268678 :         pfree(str);
    1910             :     }
    1911             : 
    1912      268678 :     if (result == -1)
    1913             :     {
    1914             :         /*
    1915             :          * Invalid multibyte character encountered.  We try to give a useful
    1916             :          * error message by letting pg_verifymbstr check the string.  But it's
    1917             :          * possible that the string is OK to us, and not OK to mbstowcs ---
    1918             :          * this suggests that the LC_CTYPE locale is different from the
    1919             :          * database encoding.  Give a generic error message if verifymbstr
    1920             :          * can't find anything wrong.
    1921             :          */
    1922           0 :         pg_verifymbstr(from, fromlen, false);   /* might not return */
    1923             :         /* but if it does ... */
    1924           0 :         ereport(ERROR,
    1925             :                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
    1926             :                  errmsg("invalid multibyte character for locale"),
    1927             :                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
    1928             :     }
    1929             : 
    1930      268678 :     return result;
    1931             : }

Generated by: LCOV version 1.13