LCOV - code coverage report
Current view: top level - src/backend/tsearch - ts_locale.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 72 95 75.8 %
Date: 2019-11-21 12:06:29 Functions: 10 11 90.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * ts_locale.c
       4             :  *      locale compatibility layer for tsearch
       5             :  *
       6             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       7             :  *
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/tsearch/ts_locale.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #include "postgres.h"
      15             : 
      16             : #include "catalog/pg_collation.h"
      17             : #include "storage/fd.h"
      18             : #include "tsearch/ts_locale.h"
      19             : #include "tsearch/ts_public.h"
      20             : 
      21             : static void tsearch_readline_callback(void *arg);
      22             : 
      23             : 
      24             : /*
      25             :  * The reason these functions use a 3-wchar_t output buffer, not 2 as you
      26             :  * might expect, is that on Windows "wchar_t" is 16 bits and what we'll be
      27             :  * getting from char2wchar() is UTF16 not UTF32.  A single input character
      28             :  * may therefore produce a surrogate pair rather than just one wchar_t;
      29             :  * we also need room for a trailing null.  When we do get a surrogate pair,
      30             :  * we pass just the first code to iswdigit() etc, so that these functions will
      31             :  * always return false for characters outside the Basic Multilingual Plane.
      32             :  */
      33             : #define WC_BUF_LEN  3
      34             : 
      35             : int
      36      351602 : t_isdigit(const char *ptr)
      37             : {
      38      351602 :     int         clen = pg_mblen(ptr);
      39             :     wchar_t     character[WC_BUF_LEN];
      40      351602 :     Oid         collation = DEFAULT_COLLATION_OID;  /* TODO */
      41      351602 :     pg_locale_t mylocale = 0;   /* TODO */
      42             : 
      43      351602 :     if (clen == 1 || lc_ctype_is_c(collation))
      44      351602 :         return isdigit(TOUCHAR(ptr));
      45             : 
      46           0 :     char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
      47             : 
      48           0 :     return iswdigit((wint_t) character[0]);
      49             : }
      50             : 
      51             : int
      52      612298 : t_isspace(const char *ptr)
      53             : {
      54      612298 :     int         clen = pg_mblen(ptr);
      55             :     wchar_t     character[WC_BUF_LEN];
      56      612298 :     Oid         collation = DEFAULT_COLLATION_OID;  /* TODO */
      57      612298 :     pg_locale_t mylocale = 0;   /* TODO */
      58             : 
      59      612298 :     if (clen == 1 || lc_ctype_is_c(collation))
      60      612298 :         return isspace(TOUCHAR(ptr));
      61             : 
      62           0 :     char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
      63             : 
      64           0 :     return iswspace((wint_t) character[0]);
      65             : }
      66             : 
      67             : int
      68     2266110 : t_isalpha(const char *ptr)
      69             : {
      70     2266110 :     int         clen = pg_mblen(ptr);
      71             :     wchar_t     character[WC_BUF_LEN];
      72     2266110 :     Oid         collation = DEFAULT_COLLATION_OID;  /* TODO */
      73     2266110 :     pg_locale_t mylocale = 0;   /* TODO */
      74             : 
      75     2266110 :     if (clen == 1 || lc_ctype_is_c(collation))
      76     2266110 :         return isalpha(TOUCHAR(ptr));
      77             : 
      78           0 :     char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
      79             : 
      80           0 :     return iswalpha((wint_t) character[0]);
      81             : }
      82             : 
      83             : int
      84        2922 : t_isprint(const char *ptr)
      85             : {
      86        2922 :     int         clen = pg_mblen(ptr);
      87             :     wchar_t     character[WC_BUF_LEN];
      88        2922 :     Oid         collation = DEFAULT_COLLATION_OID;  /* TODO */
      89        2922 :     pg_locale_t mylocale = 0;   /* TODO */
      90             : 
      91        2922 :     if (clen == 1 || lc_ctype_is_c(collation))
      92        2922 :         return isprint(TOUCHAR(ptr));
      93             : 
      94           0 :     char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
      95             : 
      96           0 :     return iswprint((wint_t) character[0]);
      97             : }
      98             : 
      99             : 
     100             : /*
     101             :  * Set up to read a file using tsearch_readline().  This facility is
     102             :  * better than just reading the file directly because it provides error
     103             :  * context pointing to the specific line where a problem is detected.
     104             :  *
     105             :  * Expected usage is:
     106             :  *
     107             :  *      tsearch_readline_state trst;
     108             :  *
     109             :  *      if (!tsearch_readline_begin(&trst, filename))
     110             :  *          ereport(ERROR,
     111             :  *                  (errcode(ERRCODE_CONFIG_FILE_ERROR),
     112             :  *                   errmsg("could not open stop-word file \"%s\": %m",
     113             :  *                          filename)));
     114             :  *      while ((line = tsearch_readline(&trst)) != NULL)
     115             :  *          process line;
     116             :  *      tsearch_readline_end(&trst);
     117             :  *
     118             :  * Note that the caller supplies the ereport() for file open failure;
     119             :  * this is so that a custom message can be provided.  The filename string
     120             :  * passed to tsearch_readline_begin() must remain valid through
     121             :  * tsearch_readline_end().
     122             :  */
     123             : bool
     124         402 : tsearch_readline_begin(tsearch_readline_state *stp,
     125             :                        const char *filename)
     126             : {
     127         402 :     if ((stp->fp = AllocateFile(filename, "r")) == NULL)
     128           0 :         return false;
     129         402 :     stp->filename = filename;
     130         402 :     stp->lineno = 0;
     131         402 :     stp->curline = NULL;
     132             :     /* Setup error traceback support for ereport() */
     133         402 :     stp->cb.callback = tsearch_readline_callback;
     134         402 :     stp->cb.arg = (void *) stp;
     135         402 :     stp->cb.previous = error_context_stack;
     136         402 :     error_context_stack = &stp->cb;
     137         402 :     return true;
     138             : }
     139             : 
     140             : /*
     141             :  * Read the next line from a tsearch data file (expected to be in UTF-8), and
     142             :  * convert it to database encoding if needed. The returned string is palloc'd.
     143             :  * NULL return means EOF.
     144             :  */
     145             : char *
     146       18834 : tsearch_readline(tsearch_readline_state *stp)
     147             : {
     148             :     char       *result;
     149             : 
     150       18834 :     stp->lineno++;
     151       18834 :     stp->curline = NULL;
     152       18834 :     result = t_readline(stp->fp);
     153       18834 :     stp->curline = result;
     154       18834 :     return result;
     155             : }
     156             : 
     157             : /*
     158             :  * Close down after reading a file with tsearch_readline()
     159             :  */
     160             : void
     161         402 : tsearch_readline_end(tsearch_readline_state *stp)
     162             : {
     163         402 :     FreeFile(stp->fp);
     164             :     /* Pop the error context stack */
     165         402 :     error_context_stack = stp->cb.previous;
     166         402 : }
     167             : 
     168             : /*
     169             :  * Error context callback for errors occurring while reading a tsearch
     170             :  * configuration file.
     171             :  */
     172             : static void
     173           0 : tsearch_readline_callback(void *arg)
     174             : {
     175           0 :     tsearch_readline_state *stp = (tsearch_readline_state *) arg;
     176             : 
     177             :     /*
     178             :      * We can't include the text of the config line for errors that occur
     179             :      * during t_readline() itself.  This is only partly a consequence of our
     180             :      * arms-length use of that routine: the major cause of such errors is
     181             :      * encoding violations, and we daren't try to print error messages
     182             :      * containing badly-encoded data.
     183             :      */
     184           0 :     if (stp->curline)
     185           0 :         errcontext("line %d of configuration file \"%s\": \"%s\"",
     186             :                    stp->lineno,
     187             :                    stp->filename,
     188             :                    stp->curline);
     189             :     else
     190           0 :         errcontext("line %d of configuration file \"%s\"",
     191             :                    stp->lineno,
     192             :                    stp->filename);
     193           0 : }
     194             : 
     195             : 
     196             : /*
     197             :  * Read the next line from a tsearch data file (expected to be in UTF-8), and
     198             :  * convert it to database encoding if needed. The returned string is palloc'd.
     199             :  * NULL return means EOF.
     200             :  *
     201             :  * Note: direct use of this function is now deprecated.  Go through
     202             :  * tsearch_readline() to provide better error reporting.
     203             :  */
     204             : char *
     205       18834 : t_readline(FILE *fp)
     206             : {
     207             :     int         len;
     208             :     char       *recoded;
     209             :     char        buf[4096];      /* lines must not be longer than this */
     210             : 
     211       18834 :     if (fgets(buf, sizeof(buf), fp) == NULL)
     212         342 :         return NULL;
     213             : 
     214       18492 :     len = strlen(buf);
     215             : 
     216             :     /* Make sure the input is valid UTF-8 */
     217       18492 :     (void) pg_verify_mbstr(PG_UTF8, buf, len, false);
     218             : 
     219             :     /* And convert */
     220       18492 :     recoded = pg_any_to_server(buf, len, PG_UTF8);
     221       18492 :     if (recoded == buf)
     222             :     {
     223             :         /*
     224             :          * conversion didn't pstrdup, so we must. We can use the length of the
     225             :          * original string, because no conversion was done.
     226             :          */
     227       18492 :         recoded = pnstrdup(recoded, len);
     228             :     }
     229             : 
     230       18492 :     return recoded;
     231             : }
     232             : 
     233             : /*
     234             :  * lowerstr --- fold null-terminated string to lower case
     235             :  *
     236             :  * Returned string is palloc'd
     237             :  */
     238             : char *
     239       12572 : lowerstr(const char *str)
     240             : {
     241       12572 :     return lowerstr_with_len(str, strlen(str));
     242             : }
     243             : 
     244             : /*
     245             :  * lowerstr_with_len --- fold string to lower case
     246             :  *
     247             :  * Input string need not be null-terminated.
     248             :  *
     249             :  * Returned string is palloc'd
     250             :  */
     251             : char *
     252      267172 : lowerstr_with_len(const char *str, int len)
     253             : {
     254             :     char       *out;
     255      267172 :     Oid         collation = DEFAULT_COLLATION_OID;  /* TODO */
     256      267172 :     pg_locale_t mylocale = 0;   /* TODO */
     257             : 
     258      267172 :     if (len == 0)
     259           0 :         return pstrdup("");
     260             : 
     261             :     /*
     262             :      * Use wide char code only when max encoding length > 1 and ctype != C.
     263             :      * Some operating systems fail with multi-byte encodings and a C locale.
     264             :      * Also, for a C locale there is no need to process as multibyte. From
     265             :      * backend/utils/adt/oracle_compat.c Teodor
     266             :      */
     267      267172 :     if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collation))
     268      267172 :     {
     269             :         wchar_t    *wstr,
     270             :                    *wptr;
     271             :         int         wlen;
     272             : 
     273             :         /*
     274             :          * alloc number of wchar_t for worst case, len contains number of
     275             :          * bytes >= number of characters and alloc 1 wchar_t for 0, because
     276             :          * wchar2char wants zero-terminated string
     277             :          */
     278      267172 :         wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
     279             : 
     280      267172 :         wlen = char2wchar(wstr, len + 1, str, len, mylocale);
     281             :         Assert(wlen <= len);
     282             : 
     283     2482060 :         while (*wptr)
     284             :         {
     285     1947716 :             *wptr = towlower((wint_t) *wptr);
     286     1947716 :             wptr++;
     287             :         }
     288             : 
     289             :         /*
     290             :          * Alloc result string for worst case + '\0'
     291             :          */
     292      267172 :         len = pg_database_encoding_max_length() * wlen + 1;
     293      267172 :         out = (char *) palloc(len);
     294             : 
     295      267172 :         wlen = wchar2char(out, wstr, len, mylocale);
     296             : 
     297      267172 :         pfree(wstr);
     298             : 
     299      267172 :         if (wlen < 0)
     300           0 :             ereport(ERROR,
     301             :                     (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
     302             :                      errmsg("conversion from wchar_t to server encoding failed: %m")));
     303             :         Assert(wlen < len);
     304             :     }
     305             :     else
     306             :     {
     307           0 :         const char *ptr = str;
     308             :         char       *outptr;
     309             : 
     310           0 :         outptr = out = (char *) palloc(sizeof(char) * (len + 1));
     311           0 :         while ((ptr - str) < len && *ptr)
     312             :         {
     313           0 :             *outptr++ = tolower(TOUCHAR(ptr));
     314           0 :             ptr++;
     315             :         }
     316           0 :         *outptr = '\0';
     317             :     }
     318             : 
     319      267172 :     return out;
     320             : }

Generated by: LCOV version 1.13