Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * like.c
4 : * like expression handling code.
5 : *
6 : * NOTES
7 : * A big hack of the regexp.c code!! Contributed by
8 : * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
9 : *
10 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
11 : * Portions Copyright (c) 1994, Regents of the University of California
12 : *
13 : * IDENTIFICATION
14 : * src/backend/utils/adt/like.c
15 : *
16 : *-------------------------------------------------------------------------
17 : */
18 : #include "postgres.h"
19 :
20 : #include <ctype.h>
21 :
22 : #include "catalog/pg_collation.h"
23 : #include "mb/pg_wchar.h"
24 : #include "miscadmin.h"
25 : #include "utils/fmgrprotos.h"
26 : #include "utils/pg_locale.h"
27 : #include "varatt.h"
28 :
29 :
30 : #define LIKE_TRUE 1
31 : #define LIKE_FALSE 0
32 : #define LIKE_ABORT (-1)
33 :
34 :
35 : static int SB_MatchText(const char *t, int tlen, const char *p, int plen,
36 : pg_locale_t locale);
37 : static text *SB_do_like_escape(text *pat, text *esc);
38 :
39 : static int MB_MatchText(const char *t, int tlen, const char *p, int plen,
40 : pg_locale_t locale);
41 : static text *MB_do_like_escape(text *pat, text *esc);
42 :
43 : static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
44 : pg_locale_t locale);
45 :
46 : static int SB_IMatchText(const char *t, int tlen, const char *p, int plen,
47 : pg_locale_t locale);
48 :
49 : static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
50 : static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
51 :
52 : /*--------------------
53 : * Support routine for MatchText. Compares given multibyte streams
54 : * as wide characters. If they match, returns 1 otherwise returns 0.
55 : *--------------------
56 : */
57 : static inline int
58 924 : wchareq(const char *p1, const char *p2)
59 : {
60 : int p1_len;
61 :
62 : /* Optimization: quickly compare the first byte. */
63 924 : if (*p1 != *p2)
64 696 : return 0;
65 :
66 228 : p1_len = pg_mblen(p1);
67 228 : if (pg_mblen(p2) != p1_len)
68 0 : return 0;
69 :
70 : /* They are the same length */
71 456 : while (p1_len--)
72 : {
73 228 : if (*p1++ != *p2++)
74 0 : return 0;
75 : }
76 228 : return 1;
77 : }
78 :
79 : /*
80 : * Formerly we had a routine iwchareq() here that tried to do case-insensitive
81 : * comparison of multibyte characters. It did not work at all, however,
82 : * because it relied on tolower() which has a single-byte API ... and
83 : * towlower() wouldn't be much better since we have no suitably cheap way
84 : * of getting a single character transformed to the system's wchar_t format.
85 : * So now, we just downcase the strings using lower() and apply regular LIKE
86 : * comparison. This should be revisited when we install better locale support.
87 : */
88 :
89 : /*
90 : * We do handle case-insensitive matching for single-byte encodings using
91 : * fold-on-the-fly processing, however.
92 : */
93 : static char
94 0 : SB_lower_char(unsigned char c, pg_locale_t locale)
95 : {
96 0 : if (locale->ctype_is_c)
97 0 : return pg_ascii_tolower(c);
98 : else
99 0 : return tolower_l(c, locale->info.lt);
100 : }
101 :
102 :
103 : #define NextByte(p, plen) ((p)++, (plen)--)
104 :
105 : /* Set up to compile like_match.c for multibyte characters */
106 : #define CHAREQ(p1, p2) wchareq((p1), (p2))
107 : #define NextChar(p, plen) \
108 : do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
109 : #define CopyAdvChar(dst, src, srclen) \
110 : do { int __l = pg_mblen(src); \
111 : (srclen) -= __l; \
112 : while (__l-- > 0) \
113 : *(dst)++ = *(src)++; \
114 : } while (0)
115 :
116 : #define MatchText MB_MatchText
117 : #define do_like_escape MB_do_like_escape
118 :
119 : #include "like_match.c"
120 :
121 : /* Set up to compile like_match.c for single-byte characters */
122 : #define CHAREQ(p1, p2) (*(p1) == *(p2))
123 : #define NextChar(p, plen) NextByte((p), (plen))
124 : #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
125 :
126 : #define MatchText SB_MatchText
127 : #define do_like_escape SB_do_like_escape
128 :
129 : #include "like_match.c"
130 :
131 : /* setup to compile like_match.c for single byte case insensitive matches */
132 : #define MATCH_LOWER(t, locale) SB_lower_char((unsigned char) (t), locale)
133 : #define NextChar(p, plen) NextByte((p), (plen))
134 : #define MatchText SB_IMatchText
135 :
136 : #include "like_match.c"
137 :
138 : /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
139 :
140 : #define NextChar(p, plen) \
141 : do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
142 : #define MatchText UTF8_MatchText
143 :
144 : #include "like_match.c"
145 :
146 : /* Generic for all cases not requiring inline case-folding */
147 : static inline int
148 1008036 : GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
149 : {
150 1008036 : if (collation)
151 : {
152 1008036 : pg_locale_t locale = pg_newlocale_from_collation(collation);
153 :
154 1008036 : if (!locale->deterministic)
155 24 : ereport(ERROR,
156 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
157 : errmsg("nondeterministic collations are not supported for LIKE")));
158 : }
159 :
160 1008012 : if (pg_database_encoding_max_length() == 1)
161 79960 : return SB_MatchText(s, slen, p, plen, 0);
162 928052 : else if (GetDatabaseEncoding() == PG_UTF8)
163 928052 : return UTF8_MatchText(s, slen, p, plen, 0);
164 : else
165 0 : return MB_MatchText(s, slen, p, plen, 0);
166 : }
167 :
168 : static inline int
169 83846 : Generic_Text_IC_like(text *str, text *pat, Oid collation)
170 : {
171 : char *s,
172 : *p;
173 : int slen,
174 : plen;
175 : pg_locale_t locale;
176 :
177 83846 : if (!OidIsValid(collation))
178 : {
179 : /*
180 : * This typically means that the parser could not resolve a conflict
181 : * of implicit collations, so report it that way.
182 : */
183 0 : ereport(ERROR,
184 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
185 : errmsg("could not determine which collation to use for ILIKE"),
186 : errhint("Use the COLLATE clause to set the collation explicitly.")));
187 : }
188 :
189 83846 : locale = pg_newlocale_from_collation(collation);
190 :
191 83846 : if (!locale->deterministic)
192 12 : ereport(ERROR,
193 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
194 : errmsg("nondeterministic collations are not supported for ILIKE")));
195 :
196 : /*
197 : * For efficiency reasons, in the single byte case we don't call lower()
198 : * on the pattern and text, but instead call SB_lower_char on each
199 : * character. In the multi-byte case we don't have much choice :-(. Also,
200 : * ICU does not support single-character case folding, so we go the long
201 : * way.
202 : */
203 :
204 83834 : if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU))
205 : {
206 83834 : pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
207 : PointerGetDatum(pat)));
208 83834 : p = VARDATA_ANY(pat);
209 83834 : plen = VARSIZE_ANY_EXHDR(pat);
210 83834 : str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
211 : PointerGetDatum(str)));
212 83834 : s = VARDATA_ANY(str);
213 83834 : slen = VARSIZE_ANY_EXHDR(str);
214 83834 : if (GetDatabaseEncoding() == PG_UTF8)
215 83834 : return UTF8_MatchText(s, slen, p, plen, 0);
216 : else
217 0 : return MB_MatchText(s, slen, p, plen, 0);
218 : }
219 : else
220 : {
221 0 : p = VARDATA_ANY(pat);
222 0 : plen = VARSIZE_ANY_EXHDR(pat);
223 0 : s = VARDATA_ANY(str);
224 0 : slen = VARSIZE_ANY_EXHDR(str);
225 0 : return SB_IMatchText(s, slen, p, plen, locale);
226 : }
227 : }
228 :
229 : /*
230 : * interface routines called by the function manager
231 : */
232 :
233 : Datum
234 159784 : namelike(PG_FUNCTION_ARGS)
235 : {
236 159784 : Name str = PG_GETARG_NAME(0);
237 159784 : text *pat = PG_GETARG_TEXT_PP(1);
238 : bool result;
239 : char *s,
240 : *p;
241 : int slen,
242 : plen;
243 :
244 159784 : s = NameStr(*str);
245 159784 : slen = strlen(s);
246 159784 : p = VARDATA_ANY(pat);
247 159784 : plen = VARSIZE_ANY_EXHDR(pat);
248 :
249 159784 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
250 :
251 159784 : PG_RETURN_BOOL(result);
252 : }
253 :
254 : Datum
255 5346 : namenlike(PG_FUNCTION_ARGS)
256 : {
257 5346 : Name str = PG_GETARG_NAME(0);
258 5346 : text *pat = PG_GETARG_TEXT_PP(1);
259 : bool result;
260 : char *s,
261 : *p;
262 : int slen,
263 : plen;
264 :
265 5346 : s = NameStr(*str);
266 5346 : slen = strlen(s);
267 5346 : p = VARDATA_ANY(pat);
268 5346 : plen = VARSIZE_ANY_EXHDR(pat);
269 :
270 5346 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
271 :
272 5346 : PG_RETURN_BOOL(result);
273 : }
274 :
275 : Datum
276 525620 : textlike(PG_FUNCTION_ARGS)
277 : {
278 525620 : text *str = PG_GETARG_TEXT_PP(0);
279 525620 : text *pat = PG_GETARG_TEXT_PP(1);
280 : bool result;
281 : char *s,
282 : *p;
283 : int slen,
284 : plen;
285 :
286 525620 : s = VARDATA_ANY(str);
287 525620 : slen = VARSIZE_ANY_EXHDR(str);
288 525620 : p = VARDATA_ANY(pat);
289 525620 : plen = VARSIZE_ANY_EXHDR(pat);
290 :
291 525620 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
292 :
293 525596 : PG_RETURN_BOOL(result);
294 : }
295 :
296 : Datum
297 317286 : textnlike(PG_FUNCTION_ARGS)
298 : {
299 317286 : text *str = PG_GETARG_TEXT_PP(0);
300 317286 : text *pat = PG_GETARG_TEXT_PP(1);
301 : bool result;
302 : char *s,
303 : *p;
304 : int slen,
305 : plen;
306 :
307 317286 : s = VARDATA_ANY(str);
308 317286 : slen = VARSIZE_ANY_EXHDR(str);
309 317286 : p = VARDATA_ANY(pat);
310 317286 : plen = VARSIZE_ANY_EXHDR(pat);
311 :
312 317286 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
313 :
314 317286 : PG_RETURN_BOOL(result);
315 : }
316 :
317 : Datum
318 12 : bytealike(PG_FUNCTION_ARGS)
319 : {
320 12 : bytea *str = PG_GETARG_BYTEA_PP(0);
321 12 : bytea *pat = PG_GETARG_BYTEA_PP(1);
322 : bool result;
323 : char *s,
324 : *p;
325 : int slen,
326 : plen;
327 :
328 12 : s = VARDATA_ANY(str);
329 12 : slen = VARSIZE_ANY_EXHDR(str);
330 12 : p = VARDATA_ANY(pat);
331 12 : plen = VARSIZE_ANY_EXHDR(pat);
332 :
333 12 : result = (SB_MatchText(s, slen, p, plen, 0) == LIKE_TRUE);
334 :
335 12 : PG_RETURN_BOOL(result);
336 : }
337 :
338 : Datum
339 12 : byteanlike(PG_FUNCTION_ARGS)
340 : {
341 12 : bytea *str = PG_GETARG_BYTEA_PP(0);
342 12 : bytea *pat = PG_GETARG_BYTEA_PP(1);
343 : bool result;
344 : char *s,
345 : *p;
346 : int slen,
347 : plen;
348 :
349 12 : s = VARDATA_ANY(str);
350 12 : slen = VARSIZE_ANY_EXHDR(str);
351 12 : p = VARDATA_ANY(pat);
352 12 : plen = VARSIZE_ANY_EXHDR(pat);
353 :
354 12 : result = (SB_MatchText(s, slen, p, plen, 0) != LIKE_TRUE);
355 :
356 12 : PG_RETURN_BOOL(result);
357 : }
358 :
359 : /*
360 : * Case-insensitive versions
361 : */
362 :
363 : Datum
364 15320 : nameiclike(PG_FUNCTION_ARGS)
365 : {
366 15320 : Name str = PG_GETARG_NAME(0);
367 15320 : text *pat = PG_GETARG_TEXT_PP(1);
368 : bool result;
369 : text *strtext;
370 :
371 15320 : strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
372 : NameGetDatum(str)));
373 15320 : result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
374 :
375 15320 : PG_RETURN_BOOL(result);
376 : }
377 :
378 : Datum
379 6 : nameicnlike(PG_FUNCTION_ARGS)
380 : {
381 6 : Name str = PG_GETARG_NAME(0);
382 6 : text *pat = PG_GETARG_TEXT_PP(1);
383 : bool result;
384 : text *strtext;
385 :
386 6 : strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
387 : NameGetDatum(str)));
388 6 : result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
389 :
390 6 : PG_RETURN_BOOL(result);
391 : }
392 :
393 : Datum
394 68464 : texticlike(PG_FUNCTION_ARGS)
395 : {
396 68464 : text *str = PG_GETARG_TEXT_PP(0);
397 68464 : text *pat = PG_GETARG_TEXT_PP(1);
398 : bool result;
399 :
400 68464 : result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
401 :
402 68452 : PG_RETURN_BOOL(result);
403 : }
404 :
405 : Datum
406 56 : texticnlike(PG_FUNCTION_ARGS)
407 : {
408 56 : text *str = PG_GETARG_TEXT_PP(0);
409 56 : text *pat = PG_GETARG_TEXT_PP(1);
410 : bool result;
411 :
412 56 : result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
413 :
414 56 : PG_RETURN_BOOL(result);
415 : }
416 :
417 : /*
418 : * like_escape() --- given a pattern and an ESCAPE string,
419 : * convert the pattern to use Postgres' standard backslash escape convention.
420 : */
421 : Datum
422 212 : like_escape(PG_FUNCTION_ARGS)
423 : {
424 212 : text *pat = PG_GETARG_TEXT_PP(0);
425 212 : text *esc = PG_GETARG_TEXT_PP(1);
426 : text *result;
427 :
428 212 : if (pg_database_encoding_max_length() == 1)
429 0 : result = SB_do_like_escape(pat, esc);
430 : else
431 212 : result = MB_do_like_escape(pat, esc);
432 :
433 212 : PG_RETURN_TEXT_P(result);
434 : }
435 :
436 : /*
437 : * like_escape_bytea() --- given a pattern and an ESCAPE string,
438 : * convert the pattern to use Postgres' standard backslash escape convention.
439 : */
440 : Datum
441 12 : like_escape_bytea(PG_FUNCTION_ARGS)
442 : {
443 12 : bytea *pat = PG_GETARG_BYTEA_PP(0);
444 12 : bytea *esc = PG_GETARG_BYTEA_PP(1);
445 12 : bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
446 :
447 12 : PG_RETURN_BYTEA_P((bytea *) result);
448 : }
|