Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * ts_utils.c
4 : * various support functions
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/tsearch/ts_utils.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include <ctype.h>
18 :
19 : #include "catalog/pg_collation_d.h"
20 : #include "miscadmin.h"
21 : #include "tsearch/ts_locale.h"
22 : #include "tsearch/ts_public.h"
23 :
24 :
25 : /*
26 : * Given the base name and extension of a tsearch config file, return
27 : * its full path name. The base name is assumed to be user-supplied,
28 : * and is checked to prevent pathname attacks. The extension is assumed
29 : * to be safe.
30 : *
31 : * The result is a palloc'd string.
32 : */
33 : char *
34 384 : get_tsearch_config_filename(const char *basename,
35 : const char *extension)
36 : {
37 : char sharepath[MAXPGPATH];
38 : char *result;
39 :
40 : /*
41 : * We limit the basename to contain a-z, 0-9, and underscores. This may
42 : * be overly restrictive, but we don't want to allow access to anything
43 : * outside the tsearch_data directory, so for instance '/' *must* be
44 : * rejected, and on some platforms '\' and ':' are risky as well. Allowing
45 : * uppercase might result in incompatible behavior between case-sensitive
46 : * and case-insensitive filesystems, and non-ASCII characters create other
47 : * interesting risks, so on the whole a tight policy seems best.
48 : */
49 384 : if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_") != strlen(basename))
50 0 : ereport(ERROR,
51 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
52 : errmsg("invalid text search configuration file name \"%s\"",
53 : basename)));
54 :
55 384 : get_share_path(my_exec_path, sharepath);
56 384 : result = palloc(MAXPGPATH);
57 384 : snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s",
58 : sharepath, basename, extension);
59 :
60 384 : return result;
61 : }
62 :
63 : /*
64 : * Reads a stop-word file. Each word is run through 'wordop'
65 : * function, if given. wordop may either modify the input in-place,
66 : * or palloc a new version.
67 : */
68 : void
69 38 : readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *, size_t, Oid))
70 : {
71 38 : char **stop = NULL;
72 :
73 38 : s->len = 0;
74 38 : if (fname && *fname)
75 : {
76 38 : char *filename = get_tsearch_config_filename(fname, "stop");
77 : tsearch_readline_state trst;
78 : char *line;
79 38 : int reallen = 0;
80 :
81 38 : if (!tsearch_readline_begin(&trst, filename))
82 0 : ereport(ERROR,
83 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
84 : errmsg("could not open stop-word file \"%s\": %m",
85 : filename)));
86 :
87 4864 : while ((line = tsearch_readline(&trst)) != NULL)
88 : {
89 4826 : char *pbuf = line;
90 :
91 : /* Trim trailing space */
92 23636 : while (*pbuf && !isspace((unsigned char) *pbuf))
93 18810 : pbuf += pg_mblen(pbuf);
94 4826 : *pbuf = '\0';
95 :
96 : /* Skip empty lines */
97 4826 : if (*line == '\0')
98 : {
99 0 : pfree(line);
100 0 : continue;
101 : }
102 :
103 4826 : if (s->len >= reallen)
104 : {
105 76 : if (reallen == 0)
106 : {
107 38 : reallen = 64;
108 38 : stop = (char **) palloc(sizeof(char *) * reallen);
109 : }
110 : else
111 : {
112 38 : reallen *= 2;
113 38 : stop = (char **) repalloc(stop, sizeof(char *) * reallen);
114 : }
115 : }
116 :
117 4826 : if (wordop)
118 : {
119 4826 : stop[s->len] = wordop(line, strlen(line), DEFAULT_COLLATION_OID);
120 4826 : if (stop[s->len] != line)
121 4826 : pfree(line);
122 : }
123 : else
124 0 : stop[s->len] = line;
125 :
126 4826 : (s->len)++;
127 : }
128 :
129 38 : tsearch_readline_end(&trst);
130 38 : pfree(filename);
131 : }
132 :
133 38 : s->stop = stop;
134 :
135 : /* Sort to allow binary searching */
136 38 : if (s->stop && s->len > 0)
137 38 : qsort(s->stop, s->len, sizeof(char *), pg_qsort_strcmp);
138 38 : }
139 :
140 : bool
141 15282 : searchstoplist(StopList *s, char *key)
142 : {
143 25552 : return (s->stop && s->len > 0 &&
144 10270 : bsearch(&key, s->stop, s->len,
145 : sizeof(char *), pg_qsort_strcmp));
146 : }
|