LCOV - code coverage report
Current view: top level - src/backend/tsearch - ts_locale.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 39 52 75.0 %
Date: 2025-01-18 04:15:08 Functions: 5 6 83.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * ts_locale.c
       4             :  *      locale compatibility layer for tsearch
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  *
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/tsearch/ts_locale.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #include "postgres.h"
      15             : 
      16             : #include "common/string.h"
      17             : #include "storage/fd.h"
      18             : #include "tsearch/ts_locale.h"
      19             : 
      20             : static void tsearch_readline_callback(void *arg);
      21             : 
      22             : 
      23             : /*
      24             :  * The reason these functions use a 3-wchar_t output buffer, not 2 as you
      25             :  * might expect, is that on Windows "wchar_t" is 16 bits and what we'll be
      26             :  * getting from char2wchar() is UTF16 not UTF32.  A single input character
      27             :  * may therefore produce a surrogate pair rather than just one wchar_t;
      28             :  * we also need room for a trailing null.  When we do get a surrogate pair,
      29             :  * we pass just the first code to iswdigit() etc, so that these functions will
      30             :  * always return false for characters outside the Basic Multilingual Plane.
      31             :  */
      32             : #define WC_BUF_LEN  3
      33             : 
      34             : int
      35       10284 : t_isalpha(const char *ptr)
      36             : {
      37       10284 :     int         clen = pg_mblen(ptr);
      38             :     wchar_t     character[WC_BUF_LEN];
      39       10284 :     pg_locale_t mylocale = 0;   /* TODO */
      40             : 
      41       10284 :     if (clen == 1 || database_ctype_is_c)
      42       10284 :         return isalpha(TOUCHAR(ptr));
      43             : 
      44           0 :     char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
      45             : 
      46           0 :     return iswalpha((wint_t) character[0]);
      47             : }
      48             : 
      49             : int
      50     2767806 : t_isalnum(const char *ptr)
      51             : {
      52     2767806 :     int         clen = pg_mblen(ptr);
      53             :     wchar_t     character[WC_BUF_LEN];
      54     2767806 :     pg_locale_t mylocale = 0;   /* TODO */
      55             : 
      56     2767806 :     if (clen == 1 || database_ctype_is_c)
      57     2767806 :         return isalnum(TOUCHAR(ptr));
      58             : 
      59           0 :     char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
      60             : 
      61           0 :     return iswalnum((wint_t) character[0]);
      62             : }
      63             : 
      64             : 
      65             : /*
      66             :  * Set up to read a file using tsearch_readline().  This facility is
      67             :  * better than just reading the file directly because it provides error
      68             :  * context pointing to the specific line where a problem is detected.
      69             :  *
      70             :  * Expected usage is:
      71             :  *
      72             :  *      tsearch_readline_state trst;
      73             :  *
      74             :  *      if (!tsearch_readline_begin(&trst, filename))
      75             :  *          ereport(ERROR,
      76             :  *                  (errcode(ERRCODE_CONFIG_FILE_ERROR),
      77             :  *                   errmsg("could not open stop-word file \"%s\": %m",
      78             :  *                          filename)));
      79             :  *      while ((line = tsearch_readline(&trst)) != NULL)
      80             :  *          process line;
      81             :  *      tsearch_readline_end(&trst);
      82             :  *
      83             :  * Note that the caller supplies the ereport() for file open failure;
      84             :  * this is so that a custom message can be provided.  The filename string
      85             :  * passed to tsearch_readline_begin() must remain valid through
      86             :  * tsearch_readline_end().
      87             :  */
      88             : bool
      89         556 : tsearch_readline_begin(tsearch_readline_state *stp,
      90             :                        const char *filename)
      91             : {
      92         556 :     if ((stp->fp = AllocateFile(filename, "r")) == NULL)
      93           0 :         return false;
      94         556 :     stp->filename = filename;
      95         556 :     stp->lineno = 0;
      96         556 :     initStringInfo(&stp->buf);
      97         556 :     stp->curline = NULL;
      98             :     /* Setup error traceback support for ereport() */
      99         556 :     stp->cb.callback = tsearch_readline_callback;
     100         556 :     stp->cb.arg = stp;
     101         556 :     stp->cb.previous = error_context_stack;
     102         556 :     error_context_stack = &stp->cb;
     103         556 :     return true;
     104             : }
     105             : 
     106             : /*
     107             :  * Read the next line from a tsearch data file (expected to be in UTF-8), and
     108             :  * convert it to database encoding if needed. The returned string is palloc'd.
     109             :  * NULL return means EOF.
     110             :  */
     111             : char *
     112       25466 : tsearch_readline(tsearch_readline_state *stp)
     113             : {
     114             :     char       *recoded;
     115             : 
     116             :     /* Advance line number to use in error reports */
     117       25466 :     stp->lineno++;
     118             : 
     119             :     /* Clear curline, it's no longer relevant */
     120       25466 :     if (stp->curline)
     121             :     {
     122       24910 :         if (stp->curline != stp->buf.data)
     123           0 :             pfree(stp->curline);
     124       24910 :         stp->curline = NULL;
     125             :     }
     126             : 
     127             :     /* Collect next line, if there is one */
     128       25466 :     if (!pg_get_line_buf(stp->fp, &stp->buf))
     129         470 :         return NULL;
     130             : 
     131             :     /* Validate the input as UTF-8, then convert to DB encoding if needed */
     132       24996 :     recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8);
     133             : 
     134             :     /* Save the correctly-encoded string for possible error reports */
     135       24996 :     stp->curline = recoded;      /* might be equal to buf.data */
     136             : 
     137             :     /*
     138             :      * We always return a freshly pstrdup'd string.  This is clearly necessary
     139             :      * if pg_any_to_server() returned buf.data, and we need a second copy even
     140             :      * if encoding conversion did occur.  The caller is entitled to pfree the
     141             :      * returned string at any time, which would leave curline pointing to
     142             :      * recycled storage, causing problems if an error occurs after that point.
     143             :      * (It's preferable to return the result of pstrdup instead of the output
     144             :      * of pg_any_to_server, because the conversion result tends to be
     145             :      * over-allocated.  Since callers might save the result string directly
     146             :      * into a long-lived dictionary structure, we don't want it to be a larger
     147             :      * palloc chunk than necessary.  We'll reclaim the conversion result on
     148             :      * the next call.)
     149             :      */
     150       24996 :     return pstrdup(recoded);
     151             : }
     152             : 
     153             : /*
     154             :  * Close down after reading a file with tsearch_readline()
     155             :  */
     156             : void
     157         556 : tsearch_readline_end(tsearch_readline_state *stp)
     158             : {
     159             :     /* Suppress use of curline in any error reported below */
     160         556 :     if (stp->curline)
     161             :     {
     162          86 :         if (stp->curline != stp->buf.data)
     163           0 :             pfree(stp->curline);
     164          86 :         stp->curline = NULL;
     165             :     }
     166             : 
     167             :     /* Release other resources */
     168         556 :     pfree(stp->buf.data);
     169         556 :     FreeFile(stp->fp);
     170             : 
     171             :     /* Pop the error context stack */
     172         556 :     error_context_stack = stp->cb.previous;
     173         556 : }
     174             : 
     175             : /*
     176             :  * Error context callback for errors occurring while reading a tsearch
     177             :  * configuration file.
     178             :  */
     179             : static void
     180           0 : tsearch_readline_callback(void *arg)
     181             : {
     182           0 :     tsearch_readline_state *stp = (tsearch_readline_state *) arg;
     183             : 
     184             :     /*
     185             :      * We can't include the text of the config line for errors that occur
     186             :      * during tsearch_readline() itself.  The major cause of such errors is
     187             :      * encoding violations, and we daren't try to print error messages
     188             :      * containing badly-encoded data.
     189             :      */
     190           0 :     if (stp->curline)
     191           0 :         errcontext("line %d of configuration file \"%s\": \"%s\"",
     192             :                    stp->lineno,
     193             :                    stp->filename,
     194             :                    stp->curline);
     195             :     else
     196           0 :         errcontext("line %d of configuration file \"%s\"",
     197             :                    stp->lineno,
     198             :                    stp->filename);
     199           0 : }

Generated by: LCOV version 1.14