LCOV - code coverage report
Current view: top level - src/port - chklocale.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 29 41 70.7 %
Date: 2019-11-15 22:06:47 Functions: 1 1 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * chklocale.c
       4             :  *      Functions for handling locale-related info
       5             :  *
       6             :  *
       7             :  * Copyright (c) 1996-2019, PostgreSQL Global Development Group
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/port/chklocale.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : 
      16             : #ifndef FRONTEND
      17             : #include "postgres.h"
      18             : #else
      19             : #include "postgres_fe.h"
      20             : #endif
      21             : 
      22             : #ifdef HAVE_LANGINFO_H
      23             : #include <langinfo.h>
      24             : #endif
      25             : 
      26             : #include "mb/pg_wchar.h"
      27             : 
      28             : 
      29             : /*
      30             :  * This table needs to recognize all the CODESET spellings for supported
      31             :  * backend encodings, as well as frontend-only encodings where possible
      32             :  * (the latter case is currently only needed for initdb to recognize
      33             :  * error situations).  On Windows, we rely on entries for codepage
      34             :  * numbers (CPnnn).
      35             :  *
      36             :  * Note that we search the table with pg_strcasecmp(), so variant
      37             :  * capitalizations don't need their own entries.
      38             :  */
      39             : struct encoding_match
      40             : {
      41             :     enum pg_enc pg_enc_code;
      42             :     const char *system_enc_name;
      43             : };
      44             : 
      45             : static const struct encoding_match encoding_match_list[] = {
      46             :     {PG_EUC_JP, "EUC-JP"},
      47             :     {PG_EUC_JP, "eucJP"},
      48             :     {PG_EUC_JP, "IBM-eucJP"},
      49             :     {PG_EUC_JP, "sdeckanji"},
      50             :     {PG_EUC_JP, "CP20932"},
      51             : 
      52             :     {PG_EUC_CN, "EUC-CN"},
      53             :     {PG_EUC_CN, "eucCN"},
      54             :     {PG_EUC_CN, "IBM-eucCN"},
      55             :     {PG_EUC_CN, "GB2312"},
      56             :     {PG_EUC_CN, "dechanzi"},
      57             :     {PG_EUC_CN, "CP20936"},
      58             : 
      59             :     {PG_EUC_KR, "EUC-KR"},
      60             :     {PG_EUC_KR, "eucKR"},
      61             :     {PG_EUC_KR, "IBM-eucKR"},
      62             :     {PG_EUC_KR, "deckorean"},
      63             :     {PG_EUC_KR, "5601"},
      64             :     {PG_EUC_KR, "CP51949"},
      65             : 
      66             :     {PG_EUC_TW, "EUC-TW"},
      67             :     {PG_EUC_TW, "eucTW"},
      68             :     {PG_EUC_TW, "IBM-eucTW"},
      69             :     {PG_EUC_TW, "cns11643"},
      70             :     /* No codepage for EUC-TW ? */
      71             : 
      72             :     {PG_UTF8, "UTF-8"},
      73             :     {PG_UTF8, "utf8"},
      74             :     {PG_UTF8, "CP65001"},
      75             : 
      76             :     {PG_LATIN1, "ISO-8859-1"},
      77             :     {PG_LATIN1, "ISO8859-1"},
      78             :     {PG_LATIN1, "iso88591"},
      79             :     {PG_LATIN1, "CP28591"},
      80             : 
      81             :     {PG_LATIN2, "ISO-8859-2"},
      82             :     {PG_LATIN2, "ISO8859-2"},
      83             :     {PG_LATIN2, "iso88592"},
      84             :     {PG_LATIN2, "CP28592"},
      85             : 
      86             :     {PG_LATIN3, "ISO-8859-3"},
      87             :     {PG_LATIN3, "ISO8859-3"},
      88             :     {PG_LATIN3, "iso88593"},
      89             :     {PG_LATIN3, "CP28593"},
      90             : 
      91             :     {PG_LATIN4, "ISO-8859-4"},
      92             :     {PG_LATIN4, "ISO8859-4"},
      93             :     {PG_LATIN4, "iso88594"},
      94             :     {PG_LATIN4, "CP28594"},
      95             : 
      96             :     {PG_LATIN5, "ISO-8859-9"},
      97             :     {PG_LATIN5, "ISO8859-9"},
      98             :     {PG_LATIN5, "iso88599"},
      99             :     {PG_LATIN5, "CP28599"},
     100             : 
     101             :     {PG_LATIN6, "ISO-8859-10"},
     102             :     {PG_LATIN6, "ISO8859-10"},
     103             :     {PG_LATIN6, "iso885910"},
     104             : 
     105             :     {PG_LATIN7, "ISO-8859-13"},
     106             :     {PG_LATIN7, "ISO8859-13"},
     107             :     {PG_LATIN7, "iso885913"},
     108             : 
     109             :     {PG_LATIN8, "ISO-8859-14"},
     110             :     {PG_LATIN8, "ISO8859-14"},
     111             :     {PG_LATIN8, "iso885914"},
     112             : 
     113             :     {PG_LATIN9, "ISO-8859-15"},
     114             :     {PG_LATIN9, "ISO8859-15"},
     115             :     {PG_LATIN9, "iso885915"},
     116             :     {PG_LATIN9, "CP28605"},
     117             : 
     118             :     {PG_LATIN10, "ISO-8859-16"},
     119             :     {PG_LATIN10, "ISO8859-16"},
     120             :     {PG_LATIN10, "iso885916"},
     121             : 
     122             :     {PG_KOI8R, "KOI8-R"},
     123             :     {PG_KOI8R, "CP20866"},
     124             : 
     125             :     {PG_KOI8U, "KOI8-U"},
     126             :     {PG_KOI8U, "CP21866"},
     127             : 
     128             :     {PG_WIN866, "CP866"},
     129             :     {PG_WIN874, "CP874"},
     130             :     {PG_WIN1250, "CP1250"},
     131             :     {PG_WIN1251, "CP1251"},
     132             :     {PG_WIN1251, "ansi-1251"},
     133             :     {PG_WIN1252, "CP1252"},
     134             :     {PG_WIN1253, "CP1253"},
     135             :     {PG_WIN1254, "CP1254"},
     136             :     {PG_WIN1255, "CP1255"},
     137             :     {PG_WIN1256, "CP1256"},
     138             :     {PG_WIN1257, "CP1257"},
     139             :     {PG_WIN1258, "CP1258"},
     140             : 
     141             :     {PG_ISO_8859_5, "ISO-8859-5"},
     142             :     {PG_ISO_8859_5, "ISO8859-5"},
     143             :     {PG_ISO_8859_5, "iso88595"},
     144             :     {PG_ISO_8859_5, "CP28595"},
     145             : 
     146             :     {PG_ISO_8859_6, "ISO-8859-6"},
     147             :     {PG_ISO_8859_6, "ISO8859-6"},
     148             :     {PG_ISO_8859_6, "iso88596"},
     149             :     {PG_ISO_8859_6, "CP28596"},
     150             : 
     151             :     {PG_ISO_8859_7, "ISO-8859-7"},
     152             :     {PG_ISO_8859_7, "ISO8859-7"},
     153             :     {PG_ISO_8859_7, "iso88597"},
     154             :     {PG_ISO_8859_7, "CP28597"},
     155             : 
     156             :     {PG_ISO_8859_8, "ISO-8859-8"},
     157             :     {PG_ISO_8859_8, "ISO8859-8"},
     158             :     {PG_ISO_8859_8, "iso88598"},
     159             :     {PG_ISO_8859_8, "CP28598"},
     160             : 
     161             :     {PG_SJIS, "SJIS"},
     162             :     {PG_SJIS, "PCK"},
     163             :     {PG_SJIS, "CP932"},
     164             :     {PG_SJIS, "SHIFT_JIS"},
     165             : 
     166             :     {PG_BIG5, "BIG5"},
     167             :     {PG_BIG5, "BIG5HKSCS"},
     168             :     {PG_BIG5, "Big5-HKSCS"},
     169             :     {PG_BIG5, "CP950"},
     170             : 
     171             :     {PG_GBK, "GBK"},
     172             :     {PG_GBK, "CP936"},
     173             : 
     174             :     {PG_UHC, "UHC"},
     175             :     {PG_UHC, "CP949"},
     176             : 
     177             :     {PG_JOHAB, "JOHAB"},
     178             :     {PG_JOHAB, "CP1361"},
     179             : 
     180             :     {PG_GB18030, "GB18030"},
     181             :     {PG_GB18030, "CP54936"},
     182             : 
     183             :     {PG_SHIFT_JIS_2004, "SJIS_2004"},
     184             : 
     185             :     {PG_SQL_ASCII, "US-ASCII"},
     186             : 
     187             :     {PG_SQL_ASCII, NULL}        /* end marker */
     188             : };
     189             : 
     190             : #ifdef WIN32
     191             : /*
     192             :  * On Windows, use CP<code page number> instead of the nl_langinfo() result
     193             :  *
     194             :  * Visual Studio 2012 expanded the set of valid LC_CTYPE values, so have its
     195             :  * locale machinery determine the code page.  See comments at IsoLocaleName().
     196             :  * For other compilers, follow the locale's predictable format.
     197             :  *
     198             :  * Visual Studio 2015 should still be able to do the same, but the declaration
     199             :  * of lc_codepage is missing in _locale_t, causing this code compilation to
     200             :  * fail, hence this falls back instead on GetLocaleInfoEx. VS 2015 may be an
     201             :  * exception and post-VS2015 versions should be able to handle properly the
     202             :  * codepage number using _create_locale(). So, instead of the same logic as
     203             :  * VS 2012 and VS 2013, this routine uses GetLocaleInfoEx to parse short
     204             :  * locale names like "de-DE", "fr-FR", etc. If those cannot be parsed correctly
     205             :  * process falls back to the pre-VS-2010 manual parsing done with
     206             :  * using <Language>_<Country>.<CodePage> as a base.
     207             :  *
     208             :  * Returns a malloc()'d string for the caller to free.
     209             :  */
     210             : static char *
     211             : win32_langinfo(const char *ctype)
     212             : {
     213             :     char       *r = NULL;
     214             : 
     215             : #if defined(_MSC_VER) && (_MSC_VER < 1900)
     216             :     _locale_t   loct = NULL;
     217             : 
     218             :     loct = _create_locale(LC_CTYPE, ctype);
     219             :     if (loct != NULL)
     220             :     {
     221             :         r = malloc(16);         /* excess */
     222             :         if (r != NULL)
     223             :             sprintf(r, "CP%u", loct->locinfo->lc_codepage);
     224             :         _free_locale(loct);
     225             :     }
     226             : #else
     227             :     char       *codepage;
     228             : 
     229             : #if defined(_MSC_VER) && (_MSC_VER >= 1900)
     230             :     uint32      cp;
     231             :     WCHAR       wctype[LOCALE_NAME_MAX_LENGTH];
     232             : 
     233             :     memset(wctype, 0, sizeof(wctype));
     234             :     MultiByteToWideChar(CP_ACP, 0, ctype, -1, wctype, LOCALE_NAME_MAX_LENGTH);
     235             : 
     236             :     if (GetLocaleInfoEx(wctype,
     237             :                         LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
     238             :                         (LPWSTR) &cp, sizeof(cp) / sizeof(WCHAR)) > 0)
     239             :     {
     240             :         r = malloc(16);         /* excess */
     241             :         if (r != NULL)
     242             :             sprintf(r, "CP%u", cp);
     243             :     }
     244             :     else
     245             : #endif
     246             :     {
     247             :         /*
     248             :          * Locale format on Win32 is <Language>_<Country>.<CodePage> . For
     249             :          * example, English_United States.1252.
     250             :          */
     251             :         codepage = strrchr(ctype, '.');
     252             :         if (codepage != NULL)
     253             :         {
     254             :             int         ln;
     255             : 
     256             :             codepage++;
     257             :             ln = strlen(codepage);
     258             :             r = malloc(ln + 3);
     259             :             if (r != NULL)
     260             :                 sprintf(r, "CP%s", codepage);
     261             :         }
     262             : 
     263             :     }
     264             : #endif
     265             : 
     266             :     return r;
     267             : }
     268             : 
     269             : #ifndef FRONTEND
     270             : /*
     271             :  * Given a Windows code page identifier, find the corresponding PostgreSQL
     272             :  * encoding.  Issue a warning and return -1 if none found.
     273             :  */
     274             : int
     275             : pg_codepage_to_encoding(UINT cp)
     276             : {
     277             :     char        sys[16];
     278             :     int         i;
     279             : 
     280             :     sprintf(sys, "CP%u", cp);
     281             : 
     282             :     /* Check the table */
     283             :     for (i = 0; encoding_match_list[i].system_enc_name; i++)
     284             :         if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
     285             :             return encoding_match_list[i].pg_enc_code;
     286             : 
     287             :     ereport(WARNING,
     288             :             (errmsg("could not determine encoding for codeset \"%s\"", sys)));
     289             : 
     290             :     return -1;
     291             : }
     292             : #endif
     293             : #endif                          /* WIN32 */
     294             : 
     295             : #if (defined(HAVE_LANGINFO_H) && defined(CODESET)) || defined(WIN32)
     296             : 
     297             : /*
     298             :  * Given a setting for LC_CTYPE, return the Postgres ID of the associated
     299             :  * encoding, if we can determine it.  Return -1 if we can't determine it.
     300             :  *
     301             :  * Pass in NULL to get the encoding for the current locale setting.
     302             :  * Pass "" to get the encoding selected by the server's environment.
     303             :  *
     304             :  * If the result is PG_SQL_ASCII, callers should treat it as being compatible
     305             :  * with any desired encoding.
     306             :  *
     307             :  * If running in the backend and write_message is false, this function must
     308             :  * cope with the possibility that elog() and palloc() are not yet usable.
     309             :  */
     310             : int
     311       17242 : pg_get_encoding_from_locale(const char *ctype, bool write_message)
     312             : {
     313             :     char       *sys;
     314             :     int         i;
     315             : 
     316             :     /* Get the CODESET property, and also LC_CTYPE if not passed in */
     317       17242 :     if (ctype)
     318             :     {
     319             :         char       *save;
     320             :         char       *name;
     321             : 
     322             :         /* If locale is C or POSIX, we can allow all encodings */
     323        7674 :         if (pg_strcasecmp(ctype, "C") == 0 ||
     324        3522 :             pg_strcasecmp(ctype, "POSIX") == 0)
     325         960 :             return PG_SQL_ASCII;
     326             : 
     327        3192 :         save = setlocale(LC_CTYPE, NULL);
     328        3192 :         if (!save)
     329           0 :             return -1;          /* setlocale() broken? */
     330             :         /* must copy result, or it might change after setlocale */
     331        3192 :         save = strdup(save);
     332        3192 :         if (!save)
     333           0 :             return -1;          /* out of memory; unlikely */
     334             : 
     335        3192 :         name = setlocale(LC_CTYPE, ctype);
     336        3192 :         if (!name)
     337             :         {
     338           0 :             free(save);
     339           0 :             return -1;          /* bogus ctype passed in? */
     340             :         }
     341             : 
     342             : #ifndef WIN32
     343        3192 :         sys = nl_langinfo(CODESET);
     344        3192 :         if (sys)
     345        3192 :             sys = strdup(sys);
     346             : #else
     347             :         sys = win32_langinfo(name);
     348             : #endif
     349             : 
     350        3192 :         setlocale(LC_CTYPE, save);
     351        3192 :         free(save);
     352             :     }
     353             :     else
     354             :     {
     355             :         /* much easier... */
     356       13090 :         ctype = setlocale(LC_CTYPE, NULL);
     357       13090 :         if (!ctype)
     358           0 :             return -1;          /* setlocale() broken? */
     359             : 
     360             :         /* If locale is C or POSIX, we can allow all encodings */
     361       26104 :         if (pg_strcasecmp(ctype, "C") == 0 ||
     362       13014 :             pg_strcasecmp(ctype, "POSIX") == 0)
     363          76 :             return PG_SQL_ASCII;
     364             : 
     365             : #ifndef WIN32
     366       13014 :         sys = nl_langinfo(CODESET);
     367       13014 :         if (sys)
     368       13014 :             sys = strdup(sys);
     369             : #else
     370             :         sys = win32_langinfo(ctype);
     371             : #endif
     372             :     }
     373             : 
     374       16206 :     if (!sys)
     375           0 :         return -1;              /* out of memory; unlikely */
     376             : 
     377             :     /* Check the table */
     378      356532 :     for (i = 0; encoding_match_list[i].system_enc_name; i++)
     379             :     {
     380      356532 :         if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
     381             :         {
     382       16206 :             free(sys);
     383       16206 :             return encoding_match_list[i].pg_enc_code;
     384             :         }
     385             :     }
     386             : 
     387             :     /* Special-case kluges for particular platforms go here */
     388             : 
     389             : #ifdef __darwin__
     390             : 
     391             :     /*
     392             :      * Current macOS has many locales that report an empty string for CODESET,
     393             :      * but they all seem to actually use UTF-8.
     394             :      */
     395             :     if (strlen(sys) == 0)
     396             :     {
     397             :         free(sys);
     398             :         return PG_UTF8;
     399             :     }
     400             : #endif
     401             : 
     402             :     /*
     403             :      * We print a warning if we got a CODESET string but couldn't recognize
     404             :      * it.  This means we need another entry in the table.
     405             :      */
     406           0 :     if (write_message)
     407             :     {
     408             : #ifdef FRONTEND
     409           0 :         fprintf(stderr, _("could not determine encoding for locale \"%s\": codeset is \"%s\""),
     410             :                 ctype, sys);
     411             :         /* keep newline separate so there's only one translatable string */
     412           0 :         fputc('\n', stderr);
     413             : #else
     414           0 :         ereport(WARNING,
     415             :                 (errmsg("could not determine encoding for locale \"%s\": codeset is \"%s\"",
     416             :                         ctype, sys)));
     417             : #endif
     418             :     }
     419             : 
     420           0 :     free(sys);
     421           0 :     return -1;
     422             : }
     423             : #else                           /* (HAVE_LANGINFO_H && CODESET) || WIN32 */
     424             : 
     425             : /*
     426             :  * stub if no multi-language platform support
     427             :  *
     428             :  * Note: we could return -1 here, but that would have the effect of
     429             :  * forcing users to specify an encoding to initdb on such platforms.
     430             :  * It seems better to silently default to SQL_ASCII.
     431             :  */
     432             : int
     433             : pg_get_encoding_from_locale(const char *ctype, bool write_message)
     434             : {
     435             :     return PG_SQL_ASCII;
     436             : }
     437             : 
     438             : #endif                          /* (HAVE_LANGINFO_H && CODESET) || WIN32 */

Generated by: LCOV version 1.13