LCOV - code coverage report
Current view: top level - src/backend/utils/adt - like.c (source / functions) Hit Total Coverage
Test: PostgreSQL 15beta1 Lines: 120 139 86.3 %
Date: 2022-05-18 03:10:05 Functions: 15 16 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * like.c
       4             :  *    like expression handling code.
       5             :  *
       6             :  *   NOTES
       7             :  *      A big hack of the regexp.c code!! Contributed by
       8             :  *      Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
       9             :  *
      10             :  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
      11             :  * Portions Copyright (c) 1994, Regents of the University of California
      12             :  *
      13             :  * IDENTIFICATION
      14             :  *  src/backend/utils/adt/like.c
      15             :  *
      16             :  *-------------------------------------------------------------------------
      17             :  */
      18             : #include "postgres.h"
      19             : 
      20             : #include <ctype.h>
      21             : 
      22             : #include "catalog/pg_collation.h"
      23             : #include "mb/pg_wchar.h"
      24             : #include "miscadmin.h"
      25             : #include "utils/builtins.h"
      26             : #include "utils/pg_locale.h"
      27             : 
      28             : 
      29             : #define LIKE_TRUE                       1
      30             : #define LIKE_FALSE                      0
      31             : #define LIKE_ABORT                      (-1)
      32             : 
      33             : 
      34             : static int  SB_MatchText(const char *t, int tlen, const char *p, int plen,
      35             :                          pg_locale_t locale, bool locale_is_c);
      36             : static text *SB_do_like_escape(text *, text *);
      37             : 
      38             : static int  MB_MatchText(const char *t, int tlen, const char *p, int plen,
      39             :                          pg_locale_t locale, bool locale_is_c);
      40             : static text *MB_do_like_escape(text *, text *);
      41             : 
      42             : static int  UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
      43             :                            pg_locale_t locale, bool locale_is_c);
      44             : 
      45             : static int  SB_IMatchText(const char *t, int tlen, const char *p, int plen,
      46             :                           pg_locale_t locale, bool locale_is_c);
      47             : 
      48             : static int  GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
      49             : static int  Generic_Text_IC_like(text *str, text *pat, Oid collation);
      50             : 
      51             : /*--------------------
      52             :  * Support routine for MatchText. Compares given multibyte streams
      53             :  * as wide characters. If they match, returns 1 otherwise returns 0.
      54             :  *--------------------
      55             :  */
      56             : static inline int
      57         924 : wchareq(const char *p1, const char *p2)
      58             : {
      59             :     int         p1_len;
      60             : 
      61             :     /* Optimization:  quickly compare the first byte. */
      62         924 :     if (*p1 != *p2)
      63         696 :         return 0;
      64             : 
      65         228 :     p1_len = pg_mblen(p1);
      66         228 :     if (pg_mblen(p2) != p1_len)
      67           0 :         return 0;
      68             : 
      69             :     /* They are the same length */
      70         456 :     while (p1_len--)
      71             :     {
      72         228 :         if (*p1++ != *p2++)
      73           0 :             return 0;
      74             :     }
      75         228 :     return 1;
      76             : }
      77             : 
      78             : /*
      79             :  * Formerly we had a routine iwchareq() here that tried to do case-insensitive
      80             :  * comparison of multibyte characters.  It did not work at all, however,
      81             :  * because it relied on tolower() which has a single-byte API ... and
      82             :  * towlower() wouldn't be much better since we have no suitably cheap way
      83             :  * of getting a single character transformed to the system's wchar_t format.
      84             :  * So now, we just downcase the strings using lower() and apply regular LIKE
      85             :  * comparison.  This should be revisited when we install better locale support.
      86             :  */
      87             : 
      88             : /*
      89             :  * We do handle case-insensitive matching for single-byte encodings using
      90             :  * fold-on-the-fly processing, however.
      91             :  */
      92             : static char
      93           0 : SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
      94             : {
      95           0 :     if (locale_is_c)
      96           0 :         return pg_ascii_tolower(c);
      97             : #ifdef HAVE_LOCALE_T
      98           0 :     else if (locale)
      99           0 :         return tolower_l(c, locale->info.lt);
     100             : #endif
     101             :     else
     102           0 :         return pg_tolower(c);
     103             : }
     104             : 
     105             : 
     106             : #define NextByte(p, plen)   ((p)++, (plen)--)
     107             : 
     108             : /* Set up to compile like_match.c for multibyte characters */
     109             : #define CHAREQ(p1, p2) wchareq((p1), (p2))
     110             : #define NextChar(p, plen) \
     111             :     do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
     112             : #define CopyAdvChar(dst, src, srclen) \
     113             :     do { int __l = pg_mblen(src); \
     114             :          (srclen) -= __l; \
     115             :          while (__l-- > 0) \
     116             :              *(dst)++ = *(src)++; \
     117             :        } while (0)
     118             : 
     119             : #define MatchText   MB_MatchText
     120             : #define do_like_escape  MB_do_like_escape
     121             : 
     122             : #include "like_match.c"
     123             : 
     124             : /* Set up to compile like_match.c for single-byte characters */
     125             : #define CHAREQ(p1, p2) (*(p1) == *(p2))
     126             : #define NextChar(p, plen) NextByte((p), (plen))
     127             : #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
     128             : 
     129             : #define MatchText   SB_MatchText
     130             : #define do_like_escape  SB_do_like_escape
     131             : 
     132             : #include "like_match.c"
     133             : 
     134             : /* setup to compile like_match.c for single byte case insensitive matches */
     135             : #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
     136             : #define NextChar(p, plen) NextByte((p), (plen))
     137             : #define MatchText SB_IMatchText
     138             : 
     139             : #include "like_match.c"
     140             : 
     141             : /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
     142             : 
     143             : #define NextChar(p, plen) \
     144             :     do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
     145             : #define MatchText   UTF8_MatchText
     146             : 
     147             : #include "like_match.c"
     148             : 
     149             : /* Generic for all cases not requiring inline case-folding */
     150             : static inline int
     151     2509080 : GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
     152             : {
     153     2509080 :     if (collation && !lc_ctype_is_c(collation))
     154             :     {
     155      174900 :         pg_locale_t locale = pg_newlocale_from_collation(collation);
     156             : 
     157      174900 :         if (locale && !locale->deterministic)
     158           0 :             ereport(ERROR,
     159             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     160             :                      errmsg("nondeterministic collations are not supported for LIKE")));
     161             :     }
     162             : 
     163     2509080 :     if (pg_database_encoding_max_length() == 1)
     164       56272 :         return SB_MatchText(s, slen, p, plen, 0, true);
     165     2452808 :     else if (GetDatabaseEncoding() == PG_UTF8)
     166     2452808 :         return UTF8_MatchText(s, slen, p, plen, 0, true);
     167             :     else
     168           0 :         return MB_MatchText(s, slen, p, plen, 0, true);
     169             : }
     170             : 
     171             : static inline int
     172        7320 : Generic_Text_IC_like(text *str, text *pat, Oid collation)
     173             : {
     174             :     char       *s,
     175             :                *p;
     176             :     int         slen,
     177             :                 plen;
     178        7320 :     pg_locale_t locale = 0;
     179        7320 :     bool        locale_is_c = false;
     180             : 
     181        7320 :     if (!OidIsValid(collation))
     182             :     {
     183             :         /*
     184             :          * This typically means that the parser could not resolve a conflict
     185             :          * of implicit collations, so report it that way.
     186             :          */
     187           0 :         ereport(ERROR,
     188             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     189             :                  errmsg("could not determine which collation to use for ILIKE"),
     190             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     191             :     }
     192             : 
     193        7320 :     if (lc_ctype_is_c(collation))
     194        7184 :         locale_is_c = true;
     195             :     else
     196         136 :         locale = pg_newlocale_from_collation(collation);
     197             : 
     198        7320 :     if (locale && !locale->deterministic)
     199           0 :         ereport(ERROR,
     200             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     201             :                  errmsg("nondeterministic collations are not supported for ILIKE")));
     202             : 
     203             :     /*
     204             :      * For efficiency reasons, in the single byte case we don't call lower()
     205             :      * on the pattern and text, but instead call SB_lower_char on each
     206             :      * character.  In the multi-byte case we don't have much choice :-(. Also,
     207             :      * ICU does not support single-character case folding, so we go the long
     208             :      * way.
     209             :      */
     210             : 
     211        7320 :     if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU))
     212             :     {
     213        7320 :         pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
     214             :                                                      PointerGetDatum(pat)));
     215        7320 :         p = VARDATA_ANY(pat);
     216        7320 :         plen = VARSIZE_ANY_EXHDR(pat);
     217        7320 :         str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
     218             :                                                      PointerGetDatum(str)));
     219        7320 :         s = VARDATA_ANY(str);
     220        7320 :         slen = VARSIZE_ANY_EXHDR(str);
     221        7320 :         if (GetDatabaseEncoding() == PG_UTF8)
     222        7320 :             return UTF8_MatchText(s, slen, p, plen, 0, true);
     223             :         else
     224           0 :             return MB_MatchText(s, slen, p, plen, 0, true);
     225             :     }
     226             :     else
     227             :     {
     228           0 :         p = VARDATA_ANY(pat);
     229           0 :         plen = VARSIZE_ANY_EXHDR(pat);
     230           0 :         s = VARDATA_ANY(str);
     231           0 :         slen = VARSIZE_ANY_EXHDR(str);
     232           0 :         return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
     233             :     }
     234             : }
     235             : 
     236             : /*
     237             :  *  interface routines called by the function manager
     238             :  */
     239             : 
     240             : Datum
     241      154710 : namelike(PG_FUNCTION_ARGS)
     242             : {
     243      154710 :     Name        str = PG_GETARG_NAME(0);
     244      154710 :     text       *pat = PG_GETARG_TEXT_PP(1);
     245             :     bool        result;
     246             :     char       *s,
     247             :                *p;
     248             :     int         slen,
     249             :                 plen;
     250             : 
     251      154710 :     s = NameStr(*str);
     252      154710 :     slen = strlen(s);
     253      154710 :     p = VARDATA_ANY(pat);
     254      154710 :     plen = VARSIZE_ANY_EXHDR(pat);
     255             : 
     256      154710 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
     257             : 
     258      154710 :     PG_RETURN_BOOL(result);
     259             : }
     260             : 
     261             : Datum
     262        5282 : namenlike(PG_FUNCTION_ARGS)
     263             : {
     264        5282 :     Name        str = PG_GETARG_NAME(0);
     265        5282 :     text       *pat = PG_GETARG_TEXT_PP(1);
     266             :     bool        result;
     267             :     char       *s,
     268             :                *p;
     269             :     int         slen,
     270             :                 plen;
     271             : 
     272        5282 :     s = NameStr(*str);
     273        5282 :     slen = strlen(s);
     274        5282 :     p = VARDATA_ANY(pat);
     275        5282 :     plen = VARSIZE_ANY_EXHDR(pat);
     276             : 
     277        5282 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
     278             : 
     279        5282 :     PG_RETURN_BOOL(result);
     280             : }
     281             : 
     282             : Datum
     283     2050332 : textlike(PG_FUNCTION_ARGS)
     284             : {
     285     2050332 :     text       *str = PG_GETARG_TEXT_PP(0);
     286     2050332 :     text       *pat = PG_GETARG_TEXT_PP(1);
     287             :     bool        result;
     288             :     char       *s,
     289             :                *p;
     290             :     int         slen,
     291             :                 plen;
     292             : 
     293     2050332 :     s = VARDATA_ANY(str);
     294     2050332 :     slen = VARSIZE_ANY_EXHDR(str);
     295     2050332 :     p = VARDATA_ANY(pat);
     296     2050332 :     plen = VARSIZE_ANY_EXHDR(pat);
     297             : 
     298     2050332 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
     299             : 
     300     2050332 :     PG_RETURN_BOOL(result);
     301             : }
     302             : 
     303             : Datum
     304      298756 : textnlike(PG_FUNCTION_ARGS)
     305             : {
     306      298756 :     text       *str = PG_GETARG_TEXT_PP(0);
     307      298756 :     text       *pat = PG_GETARG_TEXT_PP(1);
     308             :     bool        result;
     309             :     char       *s,
     310             :                *p;
     311             :     int         slen,
     312             :                 plen;
     313             : 
     314      298756 :     s = VARDATA_ANY(str);
     315      298756 :     slen = VARSIZE_ANY_EXHDR(str);
     316      298756 :     p = VARDATA_ANY(pat);
     317      298756 :     plen = VARSIZE_ANY_EXHDR(pat);
     318             : 
     319      298756 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
     320             : 
     321      298756 :     PG_RETURN_BOOL(result);
     322             : }
     323             : 
     324             : Datum
     325          12 : bytealike(PG_FUNCTION_ARGS)
     326             : {
     327          12 :     bytea      *str = PG_GETARG_BYTEA_PP(0);
     328          12 :     bytea      *pat = PG_GETARG_BYTEA_PP(1);
     329             :     bool        result;
     330             :     char       *s,
     331             :                *p;
     332             :     int         slen,
     333             :                 plen;
     334             : 
     335          12 :     s = VARDATA_ANY(str);
     336          12 :     slen = VARSIZE_ANY_EXHDR(str);
     337          12 :     p = VARDATA_ANY(pat);
     338          12 :     plen = VARSIZE_ANY_EXHDR(pat);
     339             : 
     340          12 :     result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
     341             : 
     342          12 :     PG_RETURN_BOOL(result);
     343             : }
     344             : 
     345             : Datum
     346          12 : byteanlike(PG_FUNCTION_ARGS)
     347             : {
     348          12 :     bytea      *str = PG_GETARG_BYTEA_PP(0);
     349          12 :     bytea      *pat = PG_GETARG_BYTEA_PP(1);
     350             :     bool        result;
     351             :     char       *s,
     352             :                *p;
     353             :     int         slen,
     354             :                 plen;
     355             : 
     356          12 :     s = VARDATA_ANY(str);
     357          12 :     slen = VARSIZE_ANY_EXHDR(str);
     358          12 :     p = VARDATA_ANY(pat);
     359          12 :     plen = VARSIZE_ANY_EXHDR(pat);
     360             : 
     361          12 :     result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
     362             : 
     363          12 :     PG_RETURN_BOOL(result);
     364             : }
     365             : 
     366             : /*
     367             :  * Case-insensitive versions
     368             :  */
     369             : 
     370             : Datum
     371        7170 : nameiclike(PG_FUNCTION_ARGS)
     372             : {
     373        7170 :     Name        str = PG_GETARG_NAME(0);
     374        7170 :     text       *pat = PG_GETARG_TEXT_PP(1);
     375             :     bool        result;
     376             :     text       *strtext;
     377             : 
     378        7170 :     strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
     379             :                                                  NameGetDatum(str)));
     380        7170 :     result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
     381             : 
     382        7170 :     PG_RETURN_BOOL(result);
     383             : }
     384             : 
     385             : Datum
     386           6 : nameicnlike(PG_FUNCTION_ARGS)
     387             : {
     388           6 :     Name        str = PG_GETARG_NAME(0);
     389           6 :     text       *pat = PG_GETARG_TEXT_PP(1);
     390             :     bool        result;
     391             :     text       *strtext;
     392             : 
     393           6 :     strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
     394             :                                                  NameGetDatum(str)));
     395           6 :     result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
     396             : 
     397           6 :     PG_RETURN_BOOL(result);
     398             : }
     399             : 
     400             : Datum
     401          88 : texticlike(PG_FUNCTION_ARGS)
     402             : {
     403          88 :     text       *str = PG_GETARG_TEXT_PP(0);
     404          88 :     text       *pat = PG_GETARG_TEXT_PP(1);
     405             :     bool        result;
     406             : 
     407          88 :     result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
     408             : 
     409          88 :     PG_RETURN_BOOL(result);
     410             : }
     411             : 
     412             : Datum
     413          56 : texticnlike(PG_FUNCTION_ARGS)
     414             : {
     415          56 :     text       *str = PG_GETARG_TEXT_PP(0);
     416          56 :     text       *pat = PG_GETARG_TEXT_PP(1);
     417             :     bool        result;
     418             : 
     419          56 :     result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
     420             : 
     421          56 :     PG_RETURN_BOOL(result);
     422             : }
     423             : 
     424             : /*
     425             :  * like_escape() --- given a pattern and an ESCAPE string,
     426             :  * convert the pattern to use Postgres' standard backslash escape convention.
     427             :  */
     428             : Datum
     429         212 : like_escape(PG_FUNCTION_ARGS)
     430             : {
     431         212 :     text       *pat = PG_GETARG_TEXT_PP(0);
     432         212 :     text       *esc = PG_GETARG_TEXT_PP(1);
     433             :     text       *result;
     434             : 
     435         212 :     if (pg_database_encoding_max_length() == 1)
     436           0 :         result = SB_do_like_escape(pat, esc);
     437             :     else
     438         212 :         result = MB_do_like_escape(pat, esc);
     439             : 
     440         212 :     PG_RETURN_TEXT_P(result);
     441             : }
     442             : 
     443             : /*
     444             :  * like_escape_bytea() --- given a pattern and an ESCAPE string,
     445             :  * convert the pattern to use Postgres' standard backslash escape convention.
     446             :  */
     447             : Datum
     448          12 : like_escape_bytea(PG_FUNCTION_ARGS)
     449             : {
     450          12 :     bytea      *pat = PG_GETARG_BYTEA_PP(0);
     451          12 :     bytea      *esc = PG_GETARG_BYTEA_PP(1);
     452          12 :     bytea      *result = SB_do_like_escape((text *) pat, (text *) esc);
     453             : 
     454          12 :     PG_RETURN_BYTEA_P((bytea *) result);
     455             : }

Generated by: LCOV version 1.14