LCOV - code coverage report
Current view: top level - src/backend/tsearch - ts_locale.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 77.5 % 40 31
Test Date: 2026-03-01 22:14:38 Functions: 58.3 % 12 7
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * ts_locale.c
       4              :  *      locale compatibility layer for tsearch
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  *
       8              :  *
       9              :  * IDENTIFICATION
      10              :  *    src/backend/tsearch/ts_locale.c
      11              :  *
      12              :  *-------------------------------------------------------------------------
      13              :  */
      14              : #include "postgres.h"
      15              : 
      16              : #include "common/string.h"
      17              : #include "storage/fd.h"
      18              : #include "tsearch/ts_locale.h"
      19              : 
      20              : static void tsearch_readline_callback(void *arg);
      21              : 
      22              : 
      23              : /* space for a single character plus a trailing NUL */
      24              : #define WC_BUF_LEN  2
      25              : 
      26              : #define GENERATE_T_ISCLASS_DEF(character_class) \
      27              : /* mblen shall be that of the first character */ \
      28              : int \
      29              : t_is##character_class##_with_len(const char *ptr, int mblen) \
      30              : { \
      31              :     pg_wchar    wstr[WC_BUF_LEN]; \
      32              :     int         wlen pg_attribute_unused(); \
      33              :     wlen = pg_mb2wchar_with_len(ptr, wstr, mblen); \
      34              :     Assert(wlen <= 1); \
      35              :     /* pass single character, or NUL if empty */ \
      36              :     return pg_isw##character_class(wstr[0], pg_database_locale()); \
      37              : } \
      38              : \
      39              : /* ptr shall point to a NUL-terminated string */ \
      40              : int \
      41              : t_is##character_class##_cstr(const char *ptr) \
      42              : { \
      43              :     return t_is##character_class##_with_len(ptr, pg_mblen_cstr(ptr)); \
      44              : } \
      45              : /* ptr shall point to a string with pre-validated encoding */ \
      46              : int \
      47              : t_is##character_class##_unbounded(const char *ptr) \
      48              : { \
      49              :     return t_is##character_class##_with_len(ptr, pg_mblen_unbounded(ptr)); \
      50              : } \
      51              : /* historical name for _unbounded */ \
      52              : int \
      53              : t_is##character_class(const char *ptr) \
      54              : { \
      55              :     return t_is##character_class##_unbounded(ptr); \
      56              : }
      57              : 
      58      1652955 : GENERATE_T_ISCLASS_DEF(alnum)
      59        10284 : GENERATE_T_ISCLASS_DEF(alpha)
      60              : 
      61              : /*
      62              :  * Set up to read a file using tsearch_readline().  This facility is
      63              :  * better than just reading the file directly because it provides error
      64              :  * context pointing to the specific line where a problem is detected.
      65              :  *
      66              :  * Expected usage is:
      67              :  *
      68              :  *      tsearch_readline_state trst;
      69              :  *
      70              :  *      if (!tsearch_readline_begin(&trst, filename))
      71              :  *          ereport(ERROR,
      72              :  *                  (errcode(ERRCODE_CONFIG_FILE_ERROR),
      73              :  *                   errmsg("could not open stop-word file \"%s\": %m",
      74              :  *                          filename)));
      75              :  *      while ((line = tsearch_readline(&trst)) != NULL)
      76              :  *          process line;
      77              :  *      tsearch_readline_end(&trst);
      78              :  *
      79              :  * Note that the caller supplies the ereport() for file open failure;
      80              :  * this is so that a custom message can be provided.  The filename string
      81              :  * passed to tsearch_readline_begin() must remain valid through
      82              :  * tsearch_readline_end().
      83              :  */
      84              : bool
      85          278 : tsearch_readline_begin(tsearch_readline_state *stp,
      86              :                        const char *filename)
      87              : {
      88          278 :     if ((stp->fp = AllocateFile(filename, "r")) == NULL)
      89            0 :         return false;
      90          278 :     stp->filename = filename;
      91          278 :     stp->lineno = 0;
      92          278 :     initStringInfo(&stp->buf);
      93          278 :     stp->curline = NULL;
      94              :     /* Setup error traceback support for ereport() */
      95          278 :     stp->cb.callback = tsearch_readline_callback;
      96          278 :     stp->cb.arg = stp;
      97          278 :     stp->cb.previous = error_context_stack;
      98          278 :     error_context_stack = &stp->cb;
      99          278 :     return true;
     100              : }
     101              : 
     102              : /*
     103              :  * Read the next line from a tsearch data file (expected to be in UTF-8), and
     104              :  * convert it to database encoding if needed. The returned string is palloc'd.
     105              :  * NULL return means EOF.
     106              :  */
     107              : char *
     108        12733 : tsearch_readline(tsearch_readline_state *stp)
     109              : {
     110              :     char       *recoded;
     111              : 
     112              :     /* Advance line number to use in error reports */
     113        12733 :     stp->lineno++;
     114              : 
     115              :     /* Clear curline, it's no longer relevant */
     116        12733 :     if (stp->curline)
     117              :     {
     118        12455 :         if (stp->curline != stp->buf.data)
     119            0 :             pfree(stp->curline);
     120        12455 :         stp->curline = NULL;
     121              :     }
     122              : 
     123              :     /* Collect next line, if there is one */
     124        12733 :     if (!pg_get_line_buf(stp->fp, &stp->buf))
     125          235 :         return NULL;
     126              : 
     127              :     /* Validate the input as UTF-8, then convert to DB encoding if needed */
     128        12498 :     recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8);
     129              : 
     130              :     /* Save the correctly-encoded string for possible error reports */
     131        12498 :     stp->curline = recoded;      /* might be equal to buf.data */
     132              : 
     133              :     /*
     134              :      * We always return a freshly pstrdup'd string.  This is clearly necessary
     135              :      * if pg_any_to_server() returned buf.data, and we need a second copy even
     136              :      * if encoding conversion did occur.  The caller is entitled to pfree the
     137              :      * returned string at any time, which would leave curline pointing to
     138              :      * recycled storage, causing problems if an error occurs after that point.
     139              :      * (It's preferable to return the result of pstrdup instead of the output
     140              :      * of pg_any_to_server, because the conversion result tends to be
     141              :      * over-allocated.  Since callers might save the result string directly
     142              :      * into a long-lived dictionary structure, we don't want it to be a larger
     143              :      * palloc chunk than necessary.  We'll reclaim the conversion result on
     144              :      * the next call.)
     145              :      */
     146        12498 :     return pstrdup(recoded);
     147              : }
     148              : 
     149              : /*
     150              :  * Close down after reading a file with tsearch_readline()
     151              :  */
     152              : void
     153          278 : tsearch_readline_end(tsearch_readline_state *stp)
     154              : {
     155              :     /* Suppress use of curline in any error reported below */
     156          278 :     if (stp->curline)
     157              :     {
     158           43 :         if (stp->curline != stp->buf.data)
     159            0 :             pfree(stp->curline);
     160           43 :         stp->curline = NULL;
     161              :     }
     162              : 
     163              :     /* Release other resources */
     164          278 :     pfree(stp->buf.data);
     165          278 :     FreeFile(stp->fp);
     166              : 
     167              :     /* Pop the error context stack */
     168          278 :     error_context_stack = stp->cb.previous;
     169          278 : }
     170              : 
     171              : /*
     172              :  * Error context callback for errors occurring while reading a tsearch
     173              :  * configuration file.
     174              :  */
     175              : static void
     176            0 : tsearch_readline_callback(void *arg)
     177              : {
     178            0 :     tsearch_readline_state *stp = (tsearch_readline_state *) arg;
     179              : 
     180              :     /*
     181              :      * We can't include the text of the config line for errors that occur
     182              :      * during tsearch_readline() itself.  The major cause of such errors is
     183              :      * encoding violations, and we daren't try to print error messages
     184              :      * containing badly-encoded data.
     185              :      */
     186            0 :     if (stp->curline)
     187            0 :         errcontext("line %d of configuration file \"%s\": \"%s\"",
     188              :                    stp->lineno,
     189              :                    stp->filename,
     190              :                    stp->curline);
     191              :     else
     192            0 :         errcontext("line %d of configuration file \"%s\"",
     193              :                    stp->lineno,
     194              :                    stp->filename);
     195            0 : }
        

Generated by: LCOV version 2.0-1