Line data Source code
1 : /*------------------------------------------------------------------------- 2 : * 3 : * ts_locale.c 4 : * locale compatibility layer for tsearch 5 : * 6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group 7 : * 8 : * 9 : * IDENTIFICATION 10 : * src/backend/tsearch/ts_locale.c 11 : * 12 : *------------------------------------------------------------------------- 13 : */ 14 : #include "postgres.h" 15 : 16 : #include "common/string.h" 17 : #include "storage/fd.h" 18 : #include "tsearch/ts_locale.h" 19 : 20 : static void tsearch_readline_callback(void *arg); 21 : 22 : 23 : /* space for a single character plus a trailing NUL */ 24 : #define WC_BUF_LEN 2 25 : 26 : int 27 10284 : t_isalpha(const char *ptr) 28 : { 29 : pg_wchar wstr[WC_BUF_LEN]; 30 : int wlen pg_attribute_unused(); 31 : 32 10284 : wlen = pg_mb2wchar_with_len(ptr, wstr, pg_mblen(ptr)); 33 : Assert(wlen <= 1); 34 : 35 : /* pass single character, or NUL if empty */ 36 10284 : return pg_iswalpha(wstr[0], pg_database_locale()); 37 : } 38 : 39 : int 40 2791982 : t_isalnum(const char *ptr) 41 : { 42 : pg_wchar wstr[WC_BUF_LEN]; 43 : int wlen pg_attribute_unused(); 44 : 45 2791982 : wlen = pg_mb2wchar_with_len(ptr, wstr, pg_mblen(ptr)); 46 : Assert(wlen <= 1); 47 : 48 : /* pass single character, or NUL if empty */ 49 2791982 : return pg_iswalnum(wstr[0], pg_database_locale()); 50 : } 51 : 52 : 53 : /* 54 : * Set up to read a file using tsearch_readline(). This facility is 55 : * better than just reading the file directly because it provides error 56 : * context pointing to the specific line where a problem is detected. 57 : * 58 : * Expected usage is: 59 : * 60 : * tsearch_readline_state trst; 61 : * 62 : * if (!tsearch_readline_begin(&trst, filename)) 63 : * ereport(ERROR, 64 : * (errcode(ERRCODE_CONFIG_FILE_ERROR), 65 : * errmsg("could not open stop-word file \"%s\": %m", 66 : * filename))); 67 : * while ((line = tsearch_readline(&trst)) != NULL) 68 : * process line; 69 : * tsearch_readline_end(&trst); 70 : * 71 : * Note that the caller supplies the ereport() for file open failure; 72 : * this is so that a custom message can be provided. The filename string 73 : * passed to tsearch_readline_begin() must remain valid through 74 : * tsearch_readline_end(). 75 : */ 76 : bool 77 556 : tsearch_readline_begin(tsearch_readline_state *stp, 78 : const char *filename) 79 : { 80 556 : if ((stp->fp = AllocateFile(filename, "r")) == NULL) 81 0 : return false; 82 556 : stp->filename = filename; 83 556 : stp->lineno = 0; 84 556 : initStringInfo(&stp->buf); 85 556 : stp->curline = NULL; 86 : /* Setup error traceback support for ereport() */ 87 556 : stp->cb.callback = tsearch_readline_callback; 88 556 : stp->cb.arg = stp; 89 556 : stp->cb.previous = error_context_stack; 90 556 : error_context_stack = &stp->cb; 91 556 : return true; 92 : } 93 : 94 : /* 95 : * Read the next line from a tsearch data file (expected to be in UTF-8), and 96 : * convert it to database encoding if needed. The returned string is palloc'd. 97 : * NULL return means EOF. 98 : */ 99 : char * 100 25466 : tsearch_readline(tsearch_readline_state *stp) 101 : { 102 : char *recoded; 103 : 104 : /* Advance line number to use in error reports */ 105 25466 : stp->lineno++; 106 : 107 : /* Clear curline, it's no longer relevant */ 108 25466 : if (stp->curline) 109 : { 110 24910 : if (stp->curline != stp->buf.data) 111 0 : pfree(stp->curline); 112 24910 : stp->curline = NULL; 113 : } 114 : 115 : /* Collect next line, if there is one */ 116 25466 : if (!pg_get_line_buf(stp->fp, &stp->buf)) 117 470 : return NULL; 118 : 119 : /* Validate the input as UTF-8, then convert to DB encoding if needed */ 120 24996 : recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8); 121 : 122 : /* Save the correctly-encoded string for possible error reports */ 123 24996 : stp->curline = recoded; /* might be equal to buf.data */ 124 : 125 : /* 126 : * We always return a freshly pstrdup'd string. This is clearly necessary 127 : * if pg_any_to_server() returned buf.data, and we need a second copy even 128 : * if encoding conversion did occur. The caller is entitled to pfree the 129 : * returned string at any time, which would leave curline pointing to 130 : * recycled storage, causing problems if an error occurs after that point. 131 : * (It's preferable to return the result of pstrdup instead of the output 132 : * of pg_any_to_server, because the conversion result tends to be 133 : * over-allocated. Since callers might save the result string directly 134 : * into a long-lived dictionary structure, we don't want it to be a larger 135 : * palloc chunk than necessary. We'll reclaim the conversion result on 136 : * the next call.) 137 : */ 138 24996 : return pstrdup(recoded); 139 : } 140 : 141 : /* 142 : * Close down after reading a file with tsearch_readline() 143 : */ 144 : void 145 556 : tsearch_readline_end(tsearch_readline_state *stp) 146 : { 147 : /* Suppress use of curline in any error reported below */ 148 556 : if (stp->curline) 149 : { 150 86 : if (stp->curline != stp->buf.data) 151 0 : pfree(stp->curline); 152 86 : stp->curline = NULL; 153 : } 154 : 155 : /* Release other resources */ 156 556 : pfree(stp->buf.data); 157 556 : FreeFile(stp->fp); 158 : 159 : /* Pop the error context stack */ 160 556 : error_context_stack = stp->cb.previous; 161 556 : } 162 : 163 : /* 164 : * Error context callback for errors occurring while reading a tsearch 165 : * configuration file. 166 : */ 167 : static void 168 0 : tsearch_readline_callback(void *arg) 169 : { 170 0 : tsearch_readline_state *stp = (tsearch_readline_state *) arg; 171 : 172 : /* 173 : * We can't include the text of the config line for errors that occur 174 : * during tsearch_readline() itself. The major cause of such errors is 175 : * encoding violations, and we daren't try to print error messages 176 : * containing badly-encoded data. 177 : */ 178 0 : if (stp->curline) 179 0 : errcontext("line %d of configuration file \"%s\": \"%s\"", 180 : stp->lineno, 181 : stp->filename, 182 : stp->curline); 183 : else 184 0 : errcontext("line %d of configuration file \"%s\"", 185 : stp->lineno, 186 : stp->filename); 187 0 : }