Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * like.c
4 : * like expression handling code.
5 : *
6 : * NOTES
7 : * A big hack of the regexp.c code!! Contributed by
8 : * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
9 : *
10 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
11 : * Portions Copyright (c) 1994, Regents of the University of California
12 : *
13 : * IDENTIFICATION
14 : * src/backend/utils/adt/like.c
15 : *
16 : *-------------------------------------------------------------------------
17 : */
18 : #include "postgres.h"
19 :
20 : #include <ctype.h>
21 :
22 : #include "catalog/pg_collation.h"
23 : #include "mb/pg_wchar.h"
24 : #include "miscadmin.h"
25 : #include "utils/fmgrprotos.h"
26 : #include "utils/pg_locale.h"
27 : #include "varatt.h"
28 :
29 :
30 : #define LIKE_TRUE 1
31 : #define LIKE_FALSE 0
32 : #define LIKE_ABORT (-1)
33 :
34 :
35 : static int SB_MatchText(const char *t, int tlen, const char *p, int plen,
36 : pg_locale_t locale);
37 : static text *SB_do_like_escape(text *pat, text *esc);
38 :
39 : static int MB_MatchText(const char *t, int tlen, const char *p, int plen,
40 : pg_locale_t locale);
41 : static text *MB_do_like_escape(text *pat, text *esc);
42 :
43 : static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
44 : pg_locale_t locale);
45 :
46 : static int C_IMatchText(const char *t, int tlen, const char *p, int plen,
47 : pg_locale_t locale);
48 :
49 : static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
50 : static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
51 :
52 : /*--------------------
53 : * Support routine for MatchText. Compares given multibyte streams
54 : * as wide characters. If they match, returns 1 otherwise returns 0.
55 : *--------------------
56 : */
57 : static inline int
58 924 : wchareq(const char *p1, const char *p2)
59 : {
60 : int p1_len;
61 :
62 : /* Optimization: quickly compare the first byte. */
63 924 : if (*p1 != *p2)
64 696 : return 0;
65 :
66 228 : p1_len = pg_mblen(p1);
67 228 : if (pg_mblen(p2) != p1_len)
68 0 : return 0;
69 :
70 : /* They are the same length */
71 456 : while (p1_len--)
72 : {
73 228 : if (*p1++ != *p2++)
74 0 : return 0;
75 : }
76 228 : return 1;
77 : }
78 :
79 : /*
80 : * Formerly we had a routine iwchareq() here that tried to do case-insensitive
81 : * comparison of multibyte characters. It did not work at all, however,
82 : * because it relied on tolower() which has a single-byte API ... and
83 : * towlower() wouldn't be much better since we have no suitably cheap way
84 : * of getting a single character transformed to the system's wchar_t format.
85 : * So now, we just downcase the strings using lower() and apply regular LIKE
86 : * comparison. This should be revisited when we install better locale support.
87 : *
88 : * We do handle case-insensitive matching for the C locale using
89 : * fold-on-the-fly processing, however.
90 : */
91 :
92 :
93 : #define NextByte(p, plen) ((p)++, (plen)--)
94 :
95 : /* Set up to compile like_match.c for multibyte characters */
96 : #define CHAREQ(p1, p2) wchareq((p1), (p2))
97 : #define NextChar(p, plen) \
98 : do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
99 : #define CopyAdvChar(dst, src, srclen) \
100 : do { int __l = pg_mblen(src); \
101 : (srclen) -= __l; \
102 : while (__l-- > 0) \
103 : *(dst)++ = *(src)++; \
104 : } while (0)
105 :
106 : #define MatchText MB_MatchText
107 : #define do_like_escape MB_do_like_escape
108 :
109 : #include "like_match.c"
110 :
111 : /* Set up to compile like_match.c for single-byte characters */
112 : #define CHAREQ(p1, p2) (*(p1) == *(p2))
113 : #define NextChar(p, plen) NextByte((p), (plen))
114 : #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
115 :
116 : #define MatchText SB_MatchText
117 : #define do_like_escape SB_do_like_escape
118 :
119 : #include "like_match.c"
120 :
121 : /* setup to compile like_match.c for case-insensitive matches in C locale */
122 : #define MATCH_LOWER
123 : #define NextChar(p, plen) NextByte((p), (plen))
124 : #define MatchText C_IMatchText
125 :
126 : #include "like_match.c"
127 :
128 : /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
129 :
130 : #define NextChar(p, plen) \
131 : do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
132 : #define MatchText UTF8_MatchText
133 :
134 : #include "like_match.c"
135 :
136 : /* Generic for all cases not requiring inline case-folding */
137 : static inline int
138 1100782 : GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
139 : {
140 : pg_locale_t locale;
141 :
142 1100782 : if (!OidIsValid(collation))
143 : {
144 : /*
145 : * This typically means that the parser could not resolve a conflict
146 : * of implicit collations, so report it that way.
147 : */
148 0 : ereport(ERROR,
149 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
150 : errmsg("could not determine which collation to use for LIKE"),
151 : errhint("Use the COLLATE clause to set the collation explicitly.")));
152 : }
153 :
154 1100782 : locale = pg_newlocale_from_collation(collation);
155 :
156 1100782 : if (pg_database_encoding_max_length() == 1)
157 81720 : return SB_MatchText(s, slen, p, plen, locale);
158 1019062 : else if (GetDatabaseEncoding() == PG_UTF8)
159 1019062 : return UTF8_MatchText(s, slen, p, plen, locale);
160 : else
161 0 : return MB_MatchText(s, slen, p, plen, locale);
162 : }
163 :
164 : static inline int
165 85252 : Generic_Text_IC_like(text *str, text *pat, Oid collation)
166 : {
167 : char *s,
168 : *p;
169 : int slen,
170 : plen;
171 : pg_locale_t locale;
172 :
173 85252 : if (!OidIsValid(collation))
174 : {
175 : /*
176 : * This typically means that the parser could not resolve a conflict
177 : * of implicit collations, so report it that way.
178 : */
179 0 : ereport(ERROR,
180 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
181 : errmsg("could not determine which collation to use for ILIKE"),
182 : errhint("Use the COLLATE clause to set the collation explicitly.")));
183 : }
184 :
185 85252 : locale = pg_newlocale_from_collation(collation);
186 :
187 85252 : if (!locale->deterministic)
188 12 : ereport(ERROR,
189 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
190 : errmsg("nondeterministic collations are not supported for ILIKE")));
191 :
192 : /*
193 : * For efficiency reasons, in the C locale we don't call lower() on the
194 : * pattern and text, but instead lowercase each character lazily.
195 : *
196 : * XXX: use casefolding instead?
197 : */
198 :
199 85240 : if (locale->ctype_is_c)
200 : {
201 16748 : p = VARDATA_ANY(pat);
202 16748 : plen = VARSIZE_ANY_EXHDR(pat);
203 16748 : s = VARDATA_ANY(str);
204 16748 : slen = VARSIZE_ANY_EXHDR(str);
205 16748 : return C_IMatchText(s, slen, p, plen, locale);
206 : }
207 : else
208 : {
209 68492 : pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
210 : PointerGetDatum(pat)));
211 68492 : p = VARDATA_ANY(pat);
212 68492 : plen = VARSIZE_ANY_EXHDR(pat);
213 68492 : str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
214 : PointerGetDatum(str)));
215 68492 : s = VARDATA_ANY(str);
216 68492 : slen = VARSIZE_ANY_EXHDR(str);
217 :
218 68492 : if (GetDatabaseEncoding() == PG_UTF8)
219 68492 : return UTF8_MatchText(s, slen, p, plen, 0);
220 0 : else if (pg_database_encoding_max_length() > 1)
221 0 : return MB_MatchText(s, slen, p, plen, 0);
222 : else
223 0 : return SB_MatchText(s, slen, p, plen, 0);
224 : }
225 : }
226 :
227 : /*
228 : * interface routines called by the function manager
229 : */
230 :
231 : Datum
232 175148 : namelike(PG_FUNCTION_ARGS)
233 : {
234 175148 : Name str = PG_GETARG_NAME(0);
235 175148 : text *pat = PG_GETARG_TEXT_PP(1);
236 : bool result;
237 : char *s,
238 : *p;
239 : int slen,
240 : plen;
241 :
242 175148 : s = NameStr(*str);
243 175148 : slen = strlen(s);
244 175148 : p = VARDATA_ANY(pat);
245 175148 : plen = VARSIZE_ANY_EXHDR(pat);
246 :
247 175148 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
248 :
249 175148 : PG_RETURN_BOOL(result);
250 : }
251 :
252 : Datum
253 5446 : namenlike(PG_FUNCTION_ARGS)
254 : {
255 5446 : Name str = PG_GETARG_NAME(0);
256 5446 : text *pat = PG_GETARG_TEXT_PP(1);
257 : bool result;
258 : char *s,
259 : *p;
260 : int slen,
261 : plen;
262 :
263 5446 : s = NameStr(*str);
264 5446 : slen = strlen(s);
265 5446 : p = VARDATA_ANY(pat);
266 5446 : plen = VARSIZE_ANY_EXHDR(pat);
267 :
268 5446 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
269 :
270 5446 : PG_RETURN_BOOL(result);
271 : }
272 :
273 : Datum
274 596218 : textlike(PG_FUNCTION_ARGS)
275 : {
276 596218 : text *str = PG_GETARG_TEXT_PP(0);
277 596218 : text *pat = PG_GETARG_TEXT_PP(1);
278 : bool result;
279 : char *s,
280 : *p;
281 : int slen,
282 : plen;
283 :
284 596218 : s = VARDATA_ANY(str);
285 596218 : slen = VARSIZE_ANY_EXHDR(str);
286 596218 : p = VARDATA_ANY(pat);
287 596218 : plen = VARSIZE_ANY_EXHDR(pat);
288 :
289 596218 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
290 :
291 596212 : PG_RETURN_BOOL(result);
292 : }
293 :
294 : Datum
295 323970 : textnlike(PG_FUNCTION_ARGS)
296 : {
297 323970 : text *str = PG_GETARG_TEXT_PP(0);
298 323970 : text *pat = PG_GETARG_TEXT_PP(1);
299 : bool result;
300 : char *s,
301 : *p;
302 : int slen,
303 : plen;
304 :
305 323970 : s = VARDATA_ANY(str);
306 323970 : slen = VARSIZE_ANY_EXHDR(str);
307 323970 : p = VARDATA_ANY(pat);
308 323970 : plen = VARSIZE_ANY_EXHDR(pat);
309 :
310 323970 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
311 :
312 323970 : PG_RETURN_BOOL(result);
313 : }
314 :
315 : Datum
316 12 : bytealike(PG_FUNCTION_ARGS)
317 : {
318 12 : bytea *str = PG_GETARG_BYTEA_PP(0);
319 12 : bytea *pat = PG_GETARG_BYTEA_PP(1);
320 : bool result;
321 : char *s,
322 : *p;
323 : int slen,
324 : plen;
325 :
326 12 : s = VARDATA_ANY(str);
327 12 : slen = VARSIZE_ANY_EXHDR(str);
328 12 : p = VARDATA_ANY(pat);
329 12 : plen = VARSIZE_ANY_EXHDR(pat);
330 :
331 12 : result = (SB_MatchText(s, slen, p, plen, 0) == LIKE_TRUE);
332 :
333 12 : PG_RETURN_BOOL(result);
334 : }
335 :
336 : Datum
337 12 : byteanlike(PG_FUNCTION_ARGS)
338 : {
339 12 : bytea *str = PG_GETARG_BYTEA_PP(0);
340 12 : bytea *pat = PG_GETARG_BYTEA_PP(1);
341 : bool result;
342 : char *s,
343 : *p;
344 : int slen,
345 : plen;
346 :
347 12 : s = VARDATA_ANY(str);
348 12 : slen = VARSIZE_ANY_EXHDR(str);
349 12 : p = VARDATA_ANY(pat);
350 12 : plen = VARSIZE_ANY_EXHDR(pat);
351 :
352 12 : result = (SB_MatchText(s, slen, p, plen, 0) != LIKE_TRUE);
353 :
354 12 : PG_RETURN_BOOL(result);
355 : }
356 :
357 : /*
358 : * Case-insensitive versions
359 : */
360 :
361 : Datum
362 16734 : nameiclike(PG_FUNCTION_ARGS)
363 : {
364 16734 : Name str = PG_GETARG_NAME(0);
365 16734 : text *pat = PG_GETARG_TEXT_PP(1);
366 : bool result;
367 : text *strtext;
368 :
369 16734 : strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
370 : NameGetDatum(str)));
371 16734 : result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
372 :
373 16734 : PG_RETURN_BOOL(result);
374 : }
375 :
376 : Datum
377 6 : nameicnlike(PG_FUNCTION_ARGS)
378 : {
379 6 : Name str = PG_GETARG_NAME(0);
380 6 : text *pat = PG_GETARG_TEXT_PP(1);
381 : bool result;
382 : text *strtext;
383 :
384 6 : strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
385 : NameGetDatum(str)));
386 6 : result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
387 :
388 6 : PG_RETURN_BOOL(result);
389 : }
390 :
391 : Datum
392 68456 : texticlike(PG_FUNCTION_ARGS)
393 : {
394 68456 : text *str = PG_GETARG_TEXT_PP(0);
395 68456 : text *pat = PG_GETARG_TEXT_PP(1);
396 : bool result;
397 :
398 68456 : result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
399 :
400 68444 : PG_RETURN_BOOL(result);
401 : }
402 :
403 : Datum
404 56 : texticnlike(PG_FUNCTION_ARGS)
405 : {
406 56 : text *str = PG_GETARG_TEXT_PP(0);
407 56 : text *pat = PG_GETARG_TEXT_PP(1);
408 : bool result;
409 :
410 56 : result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
411 :
412 56 : PG_RETURN_BOOL(result);
413 : }
414 :
415 : /*
416 : * like_escape() --- given a pattern and an ESCAPE string,
417 : * convert the pattern to use Postgres' standard backslash escape convention.
418 : */
419 : Datum
420 212 : like_escape(PG_FUNCTION_ARGS)
421 : {
422 212 : text *pat = PG_GETARG_TEXT_PP(0);
423 212 : text *esc = PG_GETARG_TEXT_PP(1);
424 : text *result;
425 :
426 212 : if (pg_database_encoding_max_length() == 1)
427 0 : result = SB_do_like_escape(pat, esc);
428 : else
429 212 : result = MB_do_like_escape(pat, esc);
430 :
431 212 : PG_RETURN_TEXT_P(result);
432 : }
433 :
434 : /*
435 : * like_escape_bytea() --- given a pattern and an ESCAPE string,
436 : * convert the pattern to use Postgres' standard backslash escape convention.
437 : */
438 : Datum
439 12 : like_escape_bytea(PG_FUNCTION_ARGS)
440 : {
441 12 : bytea *pat = PG_GETARG_BYTEA_PP(0);
442 12 : bytea *esc = PG_GETARG_BYTEA_PP(1);
443 12 : bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
444 :
445 12 : PG_RETURN_BYTEA_P((bytea *) result);
446 : }
|