LCOV - code coverage report
Current view: top level - src/backend/tsearch - ts_utils.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 39 44 88.6 %
Date: 2025-01-29 20:17:04 Functions: 3 3 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * ts_utils.c
       4             :  *      various support functions
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  *
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/tsearch/ts_utils.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include <ctype.h>
      18             : 
      19             : #include "catalog/pg_collation_d.h"
      20             : #include "miscadmin.h"
      21             : #include "tsearch/ts_locale.h"
      22             : #include "tsearch/ts_public.h"
      23             : 
      24             : 
      25             : /*
      26             :  * Given the base name and extension of a tsearch config file, return
      27             :  * its full path name.  The base name is assumed to be user-supplied,
      28             :  * and is checked to prevent pathname attacks.  The extension is assumed
      29             :  * to be safe.
      30             :  *
      31             :  * The result is a palloc'd string.
      32             :  */
      33             : char *
      34         384 : get_tsearch_config_filename(const char *basename,
      35             :                             const char *extension)
      36             : {
      37             :     char        sharepath[MAXPGPATH];
      38             :     char       *result;
      39             : 
      40             :     /*
      41             :      * We limit the basename to contain a-z, 0-9, and underscores.  This may
      42             :      * be overly restrictive, but we don't want to allow access to anything
      43             :      * outside the tsearch_data directory, so for instance '/' *must* be
      44             :      * rejected, and on some platforms '\' and ':' are risky as well. Allowing
      45             :      * uppercase might result in incompatible behavior between case-sensitive
      46             :      * and case-insensitive filesystems, and non-ASCII characters create other
      47             :      * interesting risks, so on the whole a tight policy seems best.
      48             :      */
      49         384 :     if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_") != strlen(basename))
      50           0 :         ereport(ERROR,
      51             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
      52             :                  errmsg("invalid text search configuration file name \"%s\"",
      53             :                         basename)));
      54             : 
      55         384 :     get_share_path(my_exec_path, sharepath);
      56         384 :     result = palloc(MAXPGPATH);
      57         384 :     snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s",
      58             :              sharepath, basename, extension);
      59             : 
      60         384 :     return result;
      61             : }
      62             : 
      63             : /*
      64             :  * Reads a stop-word file. Each word is run through 'wordop'
      65             :  * function, if given.  wordop may either modify the input in-place,
      66             :  * or palloc a new version.
      67             :  */
      68             : void
      69          38 : readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *, size_t, Oid))
      70             : {
      71          38 :     char      **stop = NULL;
      72             : 
      73          38 :     s->len = 0;
      74          38 :     if (fname && *fname)
      75             :     {
      76          38 :         char       *filename = get_tsearch_config_filename(fname, "stop");
      77             :         tsearch_readline_state trst;
      78             :         char       *line;
      79          38 :         int         reallen = 0;
      80             : 
      81          38 :         if (!tsearch_readline_begin(&trst, filename))
      82           0 :             ereport(ERROR,
      83             :                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
      84             :                      errmsg("could not open stop-word file \"%s\": %m",
      85             :                             filename)));
      86             : 
      87        4864 :         while ((line = tsearch_readline(&trst)) != NULL)
      88             :         {
      89        4826 :             char       *pbuf = line;
      90             : 
      91             :             /* Trim trailing space */
      92       23636 :             while (*pbuf && !isspace((unsigned char) *pbuf))
      93       18810 :                 pbuf += pg_mblen(pbuf);
      94        4826 :             *pbuf = '\0';
      95             : 
      96             :             /* Skip empty lines */
      97        4826 :             if (*line == '\0')
      98             :             {
      99           0 :                 pfree(line);
     100           0 :                 continue;
     101             :             }
     102             : 
     103        4826 :             if (s->len >= reallen)
     104             :             {
     105          76 :                 if (reallen == 0)
     106             :                 {
     107          38 :                     reallen = 64;
     108          38 :                     stop = (char **) palloc(sizeof(char *) * reallen);
     109             :                 }
     110             :                 else
     111             :                 {
     112          38 :                     reallen *= 2;
     113          38 :                     stop = (char **) repalloc(stop, sizeof(char *) * reallen);
     114             :                 }
     115             :             }
     116             : 
     117        4826 :             if (wordop)
     118             :             {
     119        4826 :                 stop[s->len] = wordop(line, strlen(line), DEFAULT_COLLATION_OID);
     120        4826 :                 if (stop[s->len] != line)
     121        4826 :                     pfree(line);
     122             :             }
     123             :             else
     124           0 :                 stop[s->len] = line;
     125             : 
     126        4826 :             (s->len)++;
     127             :         }
     128             : 
     129          38 :         tsearch_readline_end(&trst);
     130          38 :         pfree(filename);
     131             :     }
     132             : 
     133          38 :     s->stop = stop;
     134             : 
     135             :     /* Sort to allow binary searching */
     136          38 :     if (s->stop && s->len > 0)
     137          38 :         qsort(s->stop, s->len, sizeof(char *), pg_qsort_strcmp);
     138          38 : }
     139             : 
     140             : bool
     141       15282 : searchstoplist(StopList *s, char *key)
     142             : {
     143       25552 :     return (s->stop && s->len > 0 &&
     144       10270 :             bsearch(&key, s->stop, s->len,
     145             :                     sizeof(char *), pg_qsort_strcmp));
     146             : }

Generated by: LCOV version 1.14