Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * like.c
4 : * like expression handling code.
5 : *
6 : * NOTES
7 : * A big hack of the regexp.c code!! Contributed by
8 : * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
9 : *
10 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
11 : * Portions Copyright (c) 1994, Regents of the University of California
12 : *
13 : * IDENTIFICATION
14 : * src/backend/utils/adt/like.c
15 : *
16 : *-------------------------------------------------------------------------
17 : */
18 : #include "postgres.h"
19 :
20 : #include <ctype.h>
21 :
22 : #include "catalog/pg_collation.h"
23 : #include "mb/pg_wchar.h"
24 : #include "miscadmin.h"
25 : #include "utils/fmgrprotos.h"
26 : #include "utils/pg_locale.h"
27 : #include "varatt.h"
28 :
29 :
30 : #define LIKE_TRUE 1
31 : #define LIKE_FALSE 0
32 : #define LIKE_ABORT (-1)
33 :
34 :
35 : static int SB_MatchText(const char *t, int tlen, const char *p, int plen,
36 : pg_locale_t locale, bool locale_is_c);
37 : static text *SB_do_like_escape(text *pat, text *esc);
38 :
39 : static int MB_MatchText(const char *t, int tlen, const char *p, int plen,
40 : pg_locale_t locale, bool locale_is_c);
41 : static text *MB_do_like_escape(text *pat, text *esc);
42 :
43 : static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
44 : pg_locale_t locale, bool locale_is_c);
45 :
46 : static int SB_IMatchText(const char *t, int tlen, const char *p, int plen,
47 : pg_locale_t locale, bool locale_is_c);
48 :
49 : static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
50 : static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
51 :
52 : /*--------------------
53 : * Support routine for MatchText. Compares given multibyte streams
54 : * as wide characters. If they match, returns 1 otherwise returns 0.
55 : *--------------------
56 : */
57 : static inline int
58 924 : wchareq(const char *p1, const char *p2)
59 : {
60 : int p1_len;
61 :
62 : /* Optimization: quickly compare the first byte. */
63 924 : if (*p1 != *p2)
64 696 : return 0;
65 :
66 228 : p1_len = pg_mblen(p1);
67 228 : if (pg_mblen(p2) != p1_len)
68 0 : return 0;
69 :
70 : /* They are the same length */
71 456 : while (p1_len--)
72 : {
73 228 : if (*p1++ != *p2++)
74 0 : return 0;
75 : }
76 228 : return 1;
77 : }
78 :
79 : /*
80 : * Formerly we had a routine iwchareq() here that tried to do case-insensitive
81 : * comparison of multibyte characters. It did not work at all, however,
82 : * because it relied on tolower() which has a single-byte API ... and
83 : * towlower() wouldn't be much better since we have no suitably cheap way
84 : * of getting a single character transformed to the system's wchar_t format.
85 : * So now, we just downcase the strings using lower() and apply regular LIKE
86 : * comparison. This should be revisited when we install better locale support.
87 : */
88 :
89 : /*
90 : * We do handle case-insensitive matching for single-byte encodings using
91 : * fold-on-the-fly processing, however.
92 : */
93 : static char
94 0 : SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
95 : {
96 0 : if (locale_is_c)
97 0 : return pg_ascii_tolower(c);
98 0 : else if (locale)
99 0 : return tolower_l(c, locale->info.lt);
100 : else
101 0 : return pg_tolower(c);
102 : }
103 :
104 :
105 : #define NextByte(p, plen) ((p)++, (plen)--)
106 :
107 : /* Set up to compile like_match.c for multibyte characters */
108 : #define CHAREQ(p1, p2) wchareq((p1), (p2))
109 : #define NextChar(p, plen) \
110 : do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
111 : #define CopyAdvChar(dst, src, srclen) \
112 : do { int __l = pg_mblen(src); \
113 : (srclen) -= __l; \
114 : while (__l-- > 0) \
115 : *(dst)++ = *(src)++; \
116 : } while (0)
117 :
118 : #define MatchText MB_MatchText
119 : #define do_like_escape MB_do_like_escape
120 :
121 : #include "like_match.c"
122 :
123 : /* Set up to compile like_match.c for single-byte characters */
124 : #define CHAREQ(p1, p2) (*(p1) == *(p2))
125 : #define NextChar(p, plen) NextByte((p), (plen))
126 : #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
127 :
128 : #define MatchText SB_MatchText
129 : #define do_like_escape SB_do_like_escape
130 :
131 : #include "like_match.c"
132 :
133 : /* setup to compile like_match.c for single byte case insensitive matches */
134 : #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
135 : #define NextChar(p, plen) NextByte((p), (plen))
136 : #define MatchText SB_IMatchText
137 :
138 : #include "like_match.c"
139 :
140 : /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
141 :
142 : #define NextChar(p, plen) \
143 : do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
144 : #define MatchText UTF8_MatchText
145 :
146 : #include "like_match.c"
147 :
148 : /* Generic for all cases not requiring inline case-folding */
149 : static inline int
150 965302 : GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
151 : {
152 965302 : if (collation && !lc_ctype_is_c(collation))
153 : {
154 206566 : pg_locale_t locale = pg_newlocale_from_collation(collation);
155 :
156 206566 : if (!pg_locale_deterministic(locale))
157 24 : ereport(ERROR,
158 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
159 : errmsg("nondeterministic collations are not supported for LIKE")));
160 : }
161 :
162 965278 : if (pg_database_encoding_max_length() == 1)
163 79036 : return SB_MatchText(s, slen, p, plen, 0, true);
164 886242 : else if (GetDatabaseEncoding() == PG_UTF8)
165 886242 : return UTF8_MatchText(s, slen, p, plen, 0, true);
166 : else
167 0 : return MB_MatchText(s, slen, p, plen, 0, true);
168 : }
169 :
170 : static inline int
171 85096 : Generic_Text_IC_like(text *str, text *pat, Oid collation)
172 : {
173 : char *s,
174 : *p;
175 : int slen,
176 : plen;
177 85096 : pg_locale_t locale = 0;
178 85096 : bool locale_is_c = false;
179 :
180 85096 : if (!OidIsValid(collation))
181 : {
182 : /*
183 : * This typically means that the parser could not resolve a conflict
184 : * of implicit collations, so report it that way.
185 : */
186 0 : ereport(ERROR,
187 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
188 : errmsg("could not determine which collation to use for ILIKE"),
189 : errhint("Use the COLLATE clause to set the collation explicitly.")));
190 : }
191 :
192 85096 : if (lc_ctype_is_c(collation))
193 16590 : locale_is_c = true;
194 : else
195 68506 : locale = pg_newlocale_from_collation(collation);
196 :
197 85096 : if (!pg_locale_deterministic(locale))
198 12 : ereport(ERROR,
199 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
200 : errmsg("nondeterministic collations are not supported for ILIKE")));
201 :
202 : /*
203 : * For efficiency reasons, in the single byte case we don't call lower()
204 : * on the pattern and text, but instead call SB_lower_char on each
205 : * character. In the multi-byte case we don't have much choice :-(. Also,
206 : * ICU does not support single-character case folding, so we go the long
207 : * way.
208 : */
209 :
210 85084 : if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU))
211 : {
212 85084 : pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
213 : PointerGetDatum(pat)));
214 85084 : p = VARDATA_ANY(pat);
215 85084 : plen = VARSIZE_ANY_EXHDR(pat);
216 85084 : str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
217 : PointerGetDatum(str)));
218 85084 : s = VARDATA_ANY(str);
219 85084 : slen = VARSIZE_ANY_EXHDR(str);
220 85084 : if (GetDatabaseEncoding() == PG_UTF8)
221 85084 : return UTF8_MatchText(s, slen, p, plen, 0, true);
222 : else
223 0 : return MB_MatchText(s, slen, p, plen, 0, true);
224 : }
225 : else
226 : {
227 0 : p = VARDATA_ANY(pat);
228 0 : plen = VARSIZE_ANY_EXHDR(pat);
229 0 : s = VARDATA_ANY(str);
230 0 : slen = VARSIZE_ANY_EXHDR(str);
231 0 : return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
232 : }
233 : }
234 :
235 : /*
236 : * interface routines called by the function manager
237 : */
238 :
239 : Datum
240 161922 : namelike(PG_FUNCTION_ARGS)
241 : {
242 161922 : Name str = PG_GETARG_NAME(0);
243 161922 : text *pat = PG_GETARG_TEXT_PP(1);
244 : bool result;
245 : char *s,
246 : *p;
247 : int slen,
248 : plen;
249 :
250 161922 : s = NameStr(*str);
251 161922 : slen = strlen(s);
252 161922 : p = VARDATA_ANY(pat);
253 161922 : plen = VARSIZE_ANY_EXHDR(pat);
254 :
255 161922 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
256 :
257 161922 : PG_RETURN_BOOL(result);
258 : }
259 :
260 : Datum
261 5330 : namenlike(PG_FUNCTION_ARGS)
262 : {
263 5330 : Name str = PG_GETARG_NAME(0);
264 5330 : text *pat = PG_GETARG_TEXT_PP(1);
265 : bool result;
266 : char *s,
267 : *p;
268 : int slen,
269 : plen;
270 :
271 5330 : s = NameStr(*str);
272 5330 : slen = strlen(s);
273 5330 : p = VARDATA_ANY(pat);
274 5330 : plen = VARSIZE_ANY_EXHDR(pat);
275 :
276 5330 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
277 :
278 5330 : PG_RETURN_BOOL(result);
279 : }
280 :
281 : Datum
282 483968 : textlike(PG_FUNCTION_ARGS)
283 : {
284 483968 : text *str = PG_GETARG_TEXT_PP(0);
285 483968 : text *pat = PG_GETARG_TEXT_PP(1);
286 : bool result;
287 : char *s,
288 : *p;
289 : int slen,
290 : plen;
291 :
292 483968 : s = VARDATA_ANY(str);
293 483968 : slen = VARSIZE_ANY_EXHDR(str);
294 483968 : p = VARDATA_ANY(pat);
295 483968 : plen = VARSIZE_ANY_EXHDR(pat);
296 :
297 483968 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
298 :
299 483944 : PG_RETURN_BOOL(result);
300 : }
301 :
302 : Datum
303 314082 : textnlike(PG_FUNCTION_ARGS)
304 : {
305 314082 : text *str = PG_GETARG_TEXT_PP(0);
306 314082 : text *pat = PG_GETARG_TEXT_PP(1);
307 : bool result;
308 : char *s,
309 : *p;
310 : int slen,
311 : plen;
312 :
313 314082 : s = VARDATA_ANY(str);
314 314082 : slen = VARSIZE_ANY_EXHDR(str);
315 314082 : p = VARDATA_ANY(pat);
316 314082 : plen = VARSIZE_ANY_EXHDR(pat);
317 :
318 314082 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
319 :
320 314082 : PG_RETURN_BOOL(result);
321 : }
322 :
323 : Datum
324 12 : bytealike(PG_FUNCTION_ARGS)
325 : {
326 12 : bytea *str = PG_GETARG_BYTEA_PP(0);
327 12 : bytea *pat = PG_GETARG_BYTEA_PP(1);
328 : bool result;
329 : char *s,
330 : *p;
331 : int slen,
332 : plen;
333 :
334 12 : s = VARDATA_ANY(str);
335 12 : slen = VARSIZE_ANY_EXHDR(str);
336 12 : p = VARDATA_ANY(pat);
337 12 : plen = VARSIZE_ANY_EXHDR(pat);
338 :
339 12 : result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
340 :
341 12 : PG_RETURN_BOOL(result);
342 : }
343 :
344 : Datum
345 12 : byteanlike(PG_FUNCTION_ARGS)
346 : {
347 12 : bytea *str = PG_GETARG_BYTEA_PP(0);
348 12 : bytea *pat = PG_GETARG_BYTEA_PP(1);
349 : bool result;
350 : char *s,
351 : *p;
352 : int slen,
353 : plen;
354 :
355 12 : s = VARDATA_ANY(str);
356 12 : slen = VARSIZE_ANY_EXHDR(str);
357 12 : p = VARDATA_ANY(pat);
358 12 : plen = VARSIZE_ANY_EXHDR(pat);
359 :
360 12 : result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
361 :
362 12 : PG_RETURN_BOOL(result);
363 : }
364 :
365 : /*
366 : * Case-insensitive versions
367 : */
368 :
369 : Datum
370 16576 : nameiclike(PG_FUNCTION_ARGS)
371 : {
372 16576 : Name str = PG_GETARG_NAME(0);
373 16576 : text *pat = PG_GETARG_TEXT_PP(1);
374 : bool result;
375 : text *strtext;
376 :
377 16576 : strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
378 : NameGetDatum(str)));
379 16576 : result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
380 :
381 16576 : PG_RETURN_BOOL(result);
382 : }
383 :
384 : Datum
385 6 : nameicnlike(PG_FUNCTION_ARGS)
386 : {
387 6 : Name str = PG_GETARG_NAME(0);
388 6 : text *pat = PG_GETARG_TEXT_PP(1);
389 : bool result;
390 : text *strtext;
391 :
392 6 : strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
393 : NameGetDatum(str)));
394 6 : result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
395 :
396 6 : PG_RETURN_BOOL(result);
397 : }
398 :
399 : Datum
400 68458 : texticlike(PG_FUNCTION_ARGS)
401 : {
402 68458 : text *str = PG_GETARG_TEXT_PP(0);
403 68458 : text *pat = PG_GETARG_TEXT_PP(1);
404 : bool result;
405 :
406 68458 : result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
407 :
408 68446 : PG_RETURN_BOOL(result);
409 : }
410 :
411 : Datum
412 56 : texticnlike(PG_FUNCTION_ARGS)
413 : {
414 56 : text *str = PG_GETARG_TEXT_PP(0);
415 56 : text *pat = PG_GETARG_TEXT_PP(1);
416 : bool result;
417 :
418 56 : result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
419 :
420 56 : PG_RETURN_BOOL(result);
421 : }
422 :
423 : /*
424 : * like_escape() --- given a pattern and an ESCAPE string,
425 : * convert the pattern to use Postgres' standard backslash escape convention.
426 : */
427 : Datum
428 212 : like_escape(PG_FUNCTION_ARGS)
429 : {
430 212 : text *pat = PG_GETARG_TEXT_PP(0);
431 212 : text *esc = PG_GETARG_TEXT_PP(1);
432 : text *result;
433 :
434 212 : if (pg_database_encoding_max_length() == 1)
435 0 : result = SB_do_like_escape(pat, esc);
436 : else
437 212 : result = MB_do_like_escape(pat, esc);
438 :
439 212 : PG_RETURN_TEXT_P(result);
440 : }
441 :
442 : /*
443 : * like_escape_bytea() --- given a pattern and an ESCAPE string,
444 : * convert the pattern to use Postgres' standard backslash escape convention.
445 : */
446 : Datum
447 12 : like_escape_bytea(PG_FUNCTION_ARGS)
448 : {
449 12 : bytea *pat = PG_GETARG_BYTEA_PP(0);
450 12 : bytea *esc = PG_GETARG_BYTEA_PP(1);
451 12 : bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
452 :
453 12 : PG_RETURN_BYTEA_P((bytea *) result);
454 : }
|