LCOV - code coverage report
Current view: top level - src/backend/utils/adt - like.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 122 139 87.8 %
Date: 2024-04-25 10:13:14 Functions: 15 16 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * like.c
       4             :  *    like expression handling code.
       5             :  *
       6             :  *   NOTES
       7             :  *      A big hack of the regexp.c code!! Contributed by
       8             :  *      Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
       9             :  *
      10             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      11             :  * Portions Copyright (c) 1994, Regents of the University of California
      12             :  *
      13             :  * IDENTIFICATION
      14             :  *  src/backend/utils/adt/like.c
      15             :  *
      16             :  *-------------------------------------------------------------------------
      17             :  */
      18             : #include "postgres.h"
      19             : 
      20             : #include <ctype.h>
      21             : 
      22             : #include "catalog/pg_collation.h"
      23             : #include "mb/pg_wchar.h"
      24             : #include "miscadmin.h"
      25             : #include "utils/fmgrprotos.h"
      26             : #include "utils/pg_locale.h"
      27             : #include "varatt.h"
      28             : 
      29             : 
      30             : #define LIKE_TRUE                       1
      31             : #define LIKE_FALSE                      0
      32             : #define LIKE_ABORT                      (-1)
      33             : 
      34             : 
      35             : static int  SB_MatchText(const char *t, int tlen, const char *p, int plen,
      36             :                          pg_locale_t locale, bool locale_is_c);
      37             : static text *SB_do_like_escape(text *pat, text *esc);
      38             : 
      39             : static int  MB_MatchText(const char *t, int tlen, const char *p, int plen,
      40             :                          pg_locale_t locale, bool locale_is_c);
      41             : static text *MB_do_like_escape(text *pat, text *esc);
      42             : 
      43             : static int  UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
      44             :                            pg_locale_t locale, bool locale_is_c);
      45             : 
      46             : static int  SB_IMatchText(const char *t, int tlen, const char *p, int plen,
      47             :                           pg_locale_t locale, bool locale_is_c);
      48             : 
      49             : static int  GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
      50             : static int  Generic_Text_IC_like(text *str, text *pat, Oid collation);
      51             : 
      52             : /*--------------------
      53             :  * Support routine for MatchText. Compares given multibyte streams
      54             :  * as wide characters. If they match, returns 1 otherwise returns 0.
      55             :  *--------------------
      56             :  */
      57             : static inline int
      58         924 : wchareq(const char *p1, const char *p2)
      59             : {
      60             :     int         p1_len;
      61             : 
      62             :     /* Optimization:  quickly compare the first byte. */
      63         924 :     if (*p1 != *p2)
      64         696 :         return 0;
      65             : 
      66         228 :     p1_len = pg_mblen(p1);
      67         228 :     if (pg_mblen(p2) != p1_len)
      68           0 :         return 0;
      69             : 
      70             :     /* They are the same length */
      71         456 :     while (p1_len--)
      72             :     {
      73         228 :         if (*p1++ != *p2++)
      74           0 :             return 0;
      75             :     }
      76         228 :     return 1;
      77             : }
      78             : 
      79             : /*
      80             :  * Formerly we had a routine iwchareq() here that tried to do case-insensitive
      81             :  * comparison of multibyte characters.  It did not work at all, however,
      82             :  * because it relied on tolower() which has a single-byte API ... and
      83             :  * towlower() wouldn't be much better since we have no suitably cheap way
      84             :  * of getting a single character transformed to the system's wchar_t format.
      85             :  * So now, we just downcase the strings using lower() and apply regular LIKE
      86             :  * comparison.  This should be revisited when we install better locale support.
      87             :  */
      88             : 
      89             : /*
      90             :  * We do handle case-insensitive matching for single-byte encodings using
      91             :  * fold-on-the-fly processing, however.
      92             :  */
      93             : static char
      94           0 : SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
      95             : {
      96           0 :     if (locale_is_c)
      97           0 :         return pg_ascii_tolower(c);
      98           0 :     else if (locale)
      99           0 :         return tolower_l(c, locale->info.lt);
     100             :     else
     101           0 :         return pg_tolower(c);
     102             : }
     103             : 
     104             : 
     105             : #define NextByte(p, plen)   ((p)++, (plen)--)
     106             : 
     107             : /* Set up to compile like_match.c for multibyte characters */
     108             : #define CHAREQ(p1, p2) wchareq((p1), (p2))
     109             : #define NextChar(p, plen) \
     110             :     do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
     111             : #define CopyAdvChar(dst, src, srclen) \
     112             :     do { int __l = pg_mblen(src); \
     113             :          (srclen) -= __l; \
     114             :          while (__l-- > 0) \
     115             :              *(dst)++ = *(src)++; \
     116             :        } while (0)
     117             : 
     118             : #define MatchText   MB_MatchText
     119             : #define do_like_escape  MB_do_like_escape
     120             : 
     121             : #include "like_match.c"
     122             : 
     123             : /* Set up to compile like_match.c for single-byte characters */
     124             : #define CHAREQ(p1, p2) (*(p1) == *(p2))
     125             : #define NextChar(p, plen) NextByte((p), (plen))
     126             : #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
     127             : 
     128             : #define MatchText   SB_MatchText
     129             : #define do_like_escape  SB_do_like_escape
     130             : 
     131             : #include "like_match.c"
     132             : 
     133             : /* setup to compile like_match.c for single byte case insensitive matches */
     134             : #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
     135             : #define NextChar(p, plen) NextByte((p), (plen))
     136             : #define MatchText SB_IMatchText
     137             : 
     138             : #include "like_match.c"
     139             : 
     140             : /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
     141             : 
     142             : #define NextChar(p, plen) \
     143             :     do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
     144             : #define MatchText   UTF8_MatchText
     145             : 
     146             : #include "like_match.c"
     147             : 
     148             : /* Generic for all cases not requiring inline case-folding */
     149             : static inline int
     150      965302 : GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
     151             : {
     152      965302 :     if (collation && !lc_ctype_is_c(collation))
     153             :     {
     154      206566 :         pg_locale_t locale = pg_newlocale_from_collation(collation);
     155             : 
     156      206566 :         if (!pg_locale_deterministic(locale))
     157          24 :             ereport(ERROR,
     158             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     159             :                      errmsg("nondeterministic collations are not supported for LIKE")));
     160             :     }
     161             : 
     162      965278 :     if (pg_database_encoding_max_length() == 1)
     163       79036 :         return SB_MatchText(s, slen, p, plen, 0, true);
     164      886242 :     else if (GetDatabaseEncoding() == PG_UTF8)
     165      886242 :         return UTF8_MatchText(s, slen, p, plen, 0, true);
     166             :     else
     167           0 :         return MB_MatchText(s, slen, p, plen, 0, true);
     168             : }
     169             : 
     170             : static inline int
     171       85096 : Generic_Text_IC_like(text *str, text *pat, Oid collation)
     172             : {
     173             :     char       *s,
     174             :                *p;
     175             :     int         slen,
     176             :                 plen;
     177       85096 :     pg_locale_t locale = 0;
     178       85096 :     bool        locale_is_c = false;
     179             : 
     180       85096 :     if (!OidIsValid(collation))
     181             :     {
     182             :         /*
     183             :          * This typically means that the parser could not resolve a conflict
     184             :          * of implicit collations, so report it that way.
     185             :          */
     186           0 :         ereport(ERROR,
     187             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     188             :                  errmsg("could not determine which collation to use for ILIKE"),
     189             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     190             :     }
     191             : 
     192       85096 :     if (lc_ctype_is_c(collation))
     193       16590 :         locale_is_c = true;
     194             :     else
     195       68506 :         locale = pg_newlocale_from_collation(collation);
     196             : 
     197       85096 :     if (!pg_locale_deterministic(locale))
     198          12 :         ereport(ERROR,
     199             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     200             :                  errmsg("nondeterministic collations are not supported for ILIKE")));
     201             : 
     202             :     /*
     203             :      * For efficiency reasons, in the single byte case we don't call lower()
     204             :      * on the pattern and text, but instead call SB_lower_char on each
     205             :      * character.  In the multi-byte case we don't have much choice :-(. Also,
     206             :      * ICU does not support single-character case folding, so we go the long
     207             :      * way.
     208             :      */
     209             : 
     210       85084 :     if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU))
     211             :     {
     212       85084 :         pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
     213             :                                                      PointerGetDatum(pat)));
     214       85084 :         p = VARDATA_ANY(pat);
     215       85084 :         plen = VARSIZE_ANY_EXHDR(pat);
     216       85084 :         str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
     217             :                                                      PointerGetDatum(str)));
     218       85084 :         s = VARDATA_ANY(str);
     219       85084 :         slen = VARSIZE_ANY_EXHDR(str);
     220       85084 :         if (GetDatabaseEncoding() == PG_UTF8)
     221       85084 :             return UTF8_MatchText(s, slen, p, plen, 0, true);
     222             :         else
     223           0 :             return MB_MatchText(s, slen, p, plen, 0, true);
     224             :     }
     225             :     else
     226             :     {
     227           0 :         p = VARDATA_ANY(pat);
     228           0 :         plen = VARSIZE_ANY_EXHDR(pat);
     229           0 :         s = VARDATA_ANY(str);
     230           0 :         slen = VARSIZE_ANY_EXHDR(str);
     231           0 :         return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
     232             :     }
     233             : }
     234             : 
     235             : /*
     236             :  *  interface routines called by the function manager
     237             :  */
     238             : 
     239             : Datum
     240      161922 : namelike(PG_FUNCTION_ARGS)
     241             : {
     242      161922 :     Name        str = PG_GETARG_NAME(0);
     243      161922 :     text       *pat = PG_GETARG_TEXT_PP(1);
     244             :     bool        result;
     245             :     char       *s,
     246             :                *p;
     247             :     int         slen,
     248             :                 plen;
     249             : 
     250      161922 :     s = NameStr(*str);
     251      161922 :     slen = strlen(s);
     252      161922 :     p = VARDATA_ANY(pat);
     253      161922 :     plen = VARSIZE_ANY_EXHDR(pat);
     254             : 
     255      161922 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
     256             : 
     257      161922 :     PG_RETURN_BOOL(result);
     258             : }
     259             : 
     260             : Datum
     261        5330 : namenlike(PG_FUNCTION_ARGS)
     262             : {
     263        5330 :     Name        str = PG_GETARG_NAME(0);
     264        5330 :     text       *pat = PG_GETARG_TEXT_PP(1);
     265             :     bool        result;
     266             :     char       *s,
     267             :                *p;
     268             :     int         slen,
     269             :                 plen;
     270             : 
     271        5330 :     s = NameStr(*str);
     272        5330 :     slen = strlen(s);
     273        5330 :     p = VARDATA_ANY(pat);
     274        5330 :     plen = VARSIZE_ANY_EXHDR(pat);
     275             : 
     276        5330 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
     277             : 
     278        5330 :     PG_RETURN_BOOL(result);
     279             : }
     280             : 
     281             : Datum
     282      483968 : textlike(PG_FUNCTION_ARGS)
     283             : {
     284      483968 :     text       *str = PG_GETARG_TEXT_PP(0);
     285      483968 :     text       *pat = PG_GETARG_TEXT_PP(1);
     286             :     bool        result;
     287             :     char       *s,
     288             :                *p;
     289             :     int         slen,
     290             :                 plen;
     291             : 
     292      483968 :     s = VARDATA_ANY(str);
     293      483968 :     slen = VARSIZE_ANY_EXHDR(str);
     294      483968 :     p = VARDATA_ANY(pat);
     295      483968 :     plen = VARSIZE_ANY_EXHDR(pat);
     296             : 
     297      483968 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
     298             : 
     299      483944 :     PG_RETURN_BOOL(result);
     300             : }
     301             : 
     302             : Datum
     303      314082 : textnlike(PG_FUNCTION_ARGS)
     304             : {
     305      314082 :     text       *str = PG_GETARG_TEXT_PP(0);
     306      314082 :     text       *pat = PG_GETARG_TEXT_PP(1);
     307             :     bool        result;
     308             :     char       *s,
     309             :                *p;
     310             :     int         slen,
     311             :                 plen;
     312             : 
     313      314082 :     s = VARDATA_ANY(str);
     314      314082 :     slen = VARSIZE_ANY_EXHDR(str);
     315      314082 :     p = VARDATA_ANY(pat);
     316      314082 :     plen = VARSIZE_ANY_EXHDR(pat);
     317             : 
     318      314082 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
     319             : 
     320      314082 :     PG_RETURN_BOOL(result);
     321             : }
     322             : 
     323             : Datum
     324          12 : bytealike(PG_FUNCTION_ARGS)
     325             : {
     326          12 :     bytea      *str = PG_GETARG_BYTEA_PP(0);
     327          12 :     bytea      *pat = PG_GETARG_BYTEA_PP(1);
     328             :     bool        result;
     329             :     char       *s,
     330             :                *p;
     331             :     int         slen,
     332             :                 plen;
     333             : 
     334          12 :     s = VARDATA_ANY(str);
     335          12 :     slen = VARSIZE_ANY_EXHDR(str);
     336          12 :     p = VARDATA_ANY(pat);
     337          12 :     plen = VARSIZE_ANY_EXHDR(pat);
     338             : 
     339          12 :     result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
     340             : 
     341          12 :     PG_RETURN_BOOL(result);
     342             : }
     343             : 
     344             : Datum
     345          12 : byteanlike(PG_FUNCTION_ARGS)
     346             : {
     347          12 :     bytea      *str = PG_GETARG_BYTEA_PP(0);
     348          12 :     bytea      *pat = PG_GETARG_BYTEA_PP(1);
     349             :     bool        result;
     350             :     char       *s,
     351             :                *p;
     352             :     int         slen,
     353             :                 plen;
     354             : 
     355          12 :     s = VARDATA_ANY(str);
     356          12 :     slen = VARSIZE_ANY_EXHDR(str);
     357          12 :     p = VARDATA_ANY(pat);
     358          12 :     plen = VARSIZE_ANY_EXHDR(pat);
     359             : 
     360          12 :     result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
     361             : 
     362          12 :     PG_RETURN_BOOL(result);
     363             : }
     364             : 
     365             : /*
     366             :  * Case-insensitive versions
     367             :  */
     368             : 
     369             : Datum
     370       16576 : nameiclike(PG_FUNCTION_ARGS)
     371             : {
     372       16576 :     Name        str = PG_GETARG_NAME(0);
     373       16576 :     text       *pat = PG_GETARG_TEXT_PP(1);
     374             :     bool        result;
     375             :     text       *strtext;
     376             : 
     377       16576 :     strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
     378             :                                                  NameGetDatum(str)));
     379       16576 :     result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
     380             : 
     381       16576 :     PG_RETURN_BOOL(result);
     382             : }
     383             : 
     384             : Datum
     385           6 : nameicnlike(PG_FUNCTION_ARGS)
     386             : {
     387           6 :     Name        str = PG_GETARG_NAME(0);
     388           6 :     text       *pat = PG_GETARG_TEXT_PP(1);
     389             :     bool        result;
     390             :     text       *strtext;
     391             : 
     392           6 :     strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
     393             :                                                  NameGetDatum(str)));
     394           6 :     result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
     395             : 
     396           6 :     PG_RETURN_BOOL(result);
     397             : }
     398             : 
     399             : Datum
     400       68458 : texticlike(PG_FUNCTION_ARGS)
     401             : {
     402       68458 :     text       *str = PG_GETARG_TEXT_PP(0);
     403       68458 :     text       *pat = PG_GETARG_TEXT_PP(1);
     404             :     bool        result;
     405             : 
     406       68458 :     result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
     407             : 
     408       68446 :     PG_RETURN_BOOL(result);
     409             : }
     410             : 
     411             : Datum
     412          56 : texticnlike(PG_FUNCTION_ARGS)
     413             : {
     414          56 :     text       *str = PG_GETARG_TEXT_PP(0);
     415          56 :     text       *pat = PG_GETARG_TEXT_PP(1);
     416             :     bool        result;
     417             : 
     418          56 :     result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
     419             : 
     420          56 :     PG_RETURN_BOOL(result);
     421             : }
     422             : 
     423             : /*
     424             :  * like_escape() --- given a pattern and an ESCAPE string,
     425             :  * convert the pattern to use Postgres' standard backslash escape convention.
     426             :  */
     427             : Datum
     428         212 : like_escape(PG_FUNCTION_ARGS)
     429             : {
     430         212 :     text       *pat = PG_GETARG_TEXT_PP(0);
     431         212 :     text       *esc = PG_GETARG_TEXT_PP(1);
     432             :     text       *result;
     433             : 
     434         212 :     if (pg_database_encoding_max_length() == 1)
     435           0 :         result = SB_do_like_escape(pat, esc);
     436             :     else
     437         212 :         result = MB_do_like_escape(pat, esc);
     438             : 
     439         212 :     PG_RETURN_TEXT_P(result);
     440             : }
     441             : 
     442             : /*
     443             :  * like_escape_bytea() --- given a pattern and an ESCAPE string,
     444             :  * convert the pattern to use Postgres' standard backslash escape convention.
     445             :  */
     446             : Datum
     447          12 : like_escape_bytea(PG_FUNCTION_ARGS)
     448             : {
     449          12 :     bytea      *pat = PG_GETARG_BYTEA_PP(0);
     450          12 :     bytea      *esc = PG_GETARG_BYTEA_PP(1);
     451          12 :     bytea      *result = SB_do_like_escape((text *) pat, (text *) esc);
     452             : 
     453          12 :     PG_RETURN_BYTEA_P((bytea *) result);
     454             : }

Generated by: LCOV version 1.14