LCOV - code coverage report
Current view: top level - src/backend/utils/adt - like.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 121 130 93.1 %
Date: 2026-01-12 08:18:31 Functions: 15 15 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * like.c
       4             :  *    like expression handling code.
       5             :  *
       6             :  *   NOTES
       7             :  *      A big hack of the regexp.c code!! Contributed by
       8             :  *      Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
       9             :  *
      10             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      11             :  * Portions Copyright (c) 1994, Regents of the University of California
      12             :  *
      13             :  * IDENTIFICATION
      14             :  *  src/backend/utils/adt/like.c
      15             :  *
      16             :  *-------------------------------------------------------------------------
      17             :  */
      18             : #include "postgres.h"
      19             : 
      20             : #include <ctype.h>
      21             : 
      22             : #include "catalog/pg_collation.h"
      23             : #include "mb/pg_wchar.h"
      24             : #include "miscadmin.h"
      25             : #include "utils/fmgrprotos.h"
      26             : #include "utils/pg_locale.h"
      27             : #include "varatt.h"
      28             : 
      29             : 
      30             : #define LIKE_TRUE                       1
      31             : #define LIKE_FALSE                      0
      32             : #define LIKE_ABORT                      (-1)
      33             : 
      34             : 
      35             : static int  SB_MatchText(const char *t, int tlen, const char *p, int plen,
      36             :                          pg_locale_t locale);
      37             : static text *SB_do_like_escape(text *pat, text *esc);
      38             : 
      39             : static int  MB_MatchText(const char *t, int tlen, const char *p, int plen,
      40             :                          pg_locale_t locale);
      41             : static text *MB_do_like_escape(text *pat, text *esc);
      42             : 
      43             : static int  UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
      44             :                            pg_locale_t locale);
      45             : 
      46             : static int  C_IMatchText(const char *t, int tlen, const char *p, int plen,
      47             :                          pg_locale_t locale);
      48             : 
      49             : static int  GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
      50             : static int  Generic_Text_IC_like(text *str, text *pat, Oid collation);
      51             : 
      52             : /*--------------------
      53             :  * Support routine for MatchText. Compares given multibyte streams
      54             :  * as wide characters. If they match, returns 1 otherwise returns 0.
      55             :  *--------------------
      56             :  */
      57             : static inline int
      58         924 : wchareq(const char *p1, const char *p2)
      59             : {
      60             :     int         p1_len;
      61             : 
      62             :     /* Optimization:  quickly compare the first byte. */
      63         924 :     if (*p1 != *p2)
      64         696 :         return 0;
      65             : 
      66         228 :     p1_len = pg_mblen(p1);
      67         228 :     if (pg_mblen(p2) != p1_len)
      68           0 :         return 0;
      69             : 
      70             :     /* They are the same length */
      71         456 :     while (p1_len--)
      72             :     {
      73         228 :         if (*p1++ != *p2++)
      74           0 :             return 0;
      75             :     }
      76         228 :     return 1;
      77             : }
      78             : 
      79             : /*
      80             :  * Formerly we had a routine iwchareq() here that tried to do case-insensitive
      81             :  * comparison of multibyte characters.  It did not work at all, however,
      82             :  * because it relied on tolower() which has a single-byte API ... and
      83             :  * towlower() wouldn't be much better since we have no suitably cheap way
      84             :  * of getting a single character transformed to the system's wchar_t format.
      85             :  * So now, we just downcase the strings using lower() and apply regular LIKE
      86             :  * comparison.  This should be revisited when we install better locale support.
      87             :  *
      88             :  * We do handle case-insensitive matching for the C locale using
      89             :  * fold-on-the-fly processing, however.
      90             :  */
      91             : 
      92             : 
      93             : #define NextByte(p, plen)   ((p)++, (plen)--)
      94             : 
      95             : /* Set up to compile like_match.c for multibyte characters */
      96             : #define CHAREQ(p1, p2) wchareq((p1), (p2))
      97             : #define NextChar(p, plen) \
      98             :     do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
      99             : #define CopyAdvChar(dst, src, srclen) \
     100             :     do { int __l = pg_mblen(src); \
     101             :          (srclen) -= __l; \
     102             :          while (__l-- > 0) \
     103             :              *(dst)++ = *(src)++; \
     104             :        } while (0)
     105             : 
     106             : #define MatchText   MB_MatchText
     107             : #define do_like_escape  MB_do_like_escape
     108             : 
     109             : #include "like_match.c"
     110             : 
     111             : /* Set up to compile like_match.c for single-byte characters */
     112             : #define CHAREQ(p1, p2) (*(p1) == *(p2))
     113             : #define NextChar(p, plen) NextByte((p), (plen))
     114             : #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
     115             : 
     116             : #define MatchText   SB_MatchText
     117             : #define do_like_escape  SB_do_like_escape
     118             : 
     119             : #include "like_match.c"
     120             : 
     121             : /* setup to compile like_match.c for case-insensitive matches in C locale */
     122             : #define MATCH_LOWER
     123             : #define NextChar(p, plen) NextByte((p), (plen))
     124             : #define MatchText C_IMatchText
     125             : 
     126             : #include "like_match.c"
     127             : 
     128             : /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
     129             : 
     130             : #define NextChar(p, plen) \
     131             :     do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
     132             : #define MatchText   UTF8_MatchText
     133             : 
     134             : #include "like_match.c"
     135             : 
     136             : /* Generic for all cases not requiring inline case-folding */
     137             : static inline int
     138     1100782 : GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
     139             : {
     140             :     pg_locale_t locale;
     141             : 
     142     1100782 :     if (!OidIsValid(collation))
     143             :     {
     144             :         /*
     145             :          * This typically means that the parser could not resolve a conflict
     146             :          * of implicit collations, so report it that way.
     147             :          */
     148           0 :         ereport(ERROR,
     149             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     150             :                  errmsg("could not determine which collation to use for LIKE"),
     151             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     152             :     }
     153             : 
     154     1100782 :     locale = pg_newlocale_from_collation(collation);
     155             : 
     156     1100782 :     if (pg_database_encoding_max_length() == 1)
     157       81720 :         return SB_MatchText(s, slen, p, plen, locale);
     158     1019062 :     else if (GetDatabaseEncoding() == PG_UTF8)
     159     1019062 :         return UTF8_MatchText(s, slen, p, plen, locale);
     160             :     else
     161           0 :         return MB_MatchText(s, slen, p, plen, locale);
     162             : }
     163             : 
     164             : static inline int
     165       85252 : Generic_Text_IC_like(text *str, text *pat, Oid collation)
     166             : {
     167             :     char       *s,
     168             :                *p;
     169             :     int         slen,
     170             :                 plen;
     171             :     pg_locale_t locale;
     172             : 
     173       85252 :     if (!OidIsValid(collation))
     174             :     {
     175             :         /*
     176             :          * This typically means that the parser could not resolve a conflict
     177             :          * of implicit collations, so report it that way.
     178             :          */
     179           0 :         ereport(ERROR,
     180             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     181             :                  errmsg("could not determine which collation to use for ILIKE"),
     182             :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     183             :     }
     184             : 
     185       85252 :     locale = pg_newlocale_from_collation(collation);
     186             : 
     187       85252 :     if (!locale->deterministic)
     188          12 :         ereport(ERROR,
     189             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     190             :                  errmsg("nondeterministic collations are not supported for ILIKE")));
     191             : 
     192             :     /*
     193             :      * For efficiency reasons, in the C locale we don't call lower() on the
     194             :      * pattern and text, but instead lowercase each character lazily.
     195             :      *
     196             :      * XXX: use casefolding instead?
     197             :      */
     198             : 
     199       85240 :     if (locale->ctype_is_c)
     200             :     {
     201       16748 :         p = VARDATA_ANY(pat);
     202       16748 :         plen = VARSIZE_ANY_EXHDR(pat);
     203       16748 :         s = VARDATA_ANY(str);
     204       16748 :         slen = VARSIZE_ANY_EXHDR(str);
     205       16748 :         return C_IMatchText(s, slen, p, plen, locale);
     206             :     }
     207             :     else
     208             :     {
     209       68492 :         pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
     210             :                                                      PointerGetDatum(pat)));
     211       68492 :         p = VARDATA_ANY(pat);
     212       68492 :         plen = VARSIZE_ANY_EXHDR(pat);
     213       68492 :         str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
     214             :                                                      PointerGetDatum(str)));
     215       68492 :         s = VARDATA_ANY(str);
     216       68492 :         slen = VARSIZE_ANY_EXHDR(str);
     217             : 
     218       68492 :         if (GetDatabaseEncoding() == PG_UTF8)
     219       68492 :             return UTF8_MatchText(s, slen, p, plen, 0);
     220           0 :         else if (pg_database_encoding_max_length() > 1)
     221           0 :             return MB_MatchText(s, slen, p, plen, 0);
     222             :         else
     223           0 :             return SB_MatchText(s, slen, p, plen, 0);
     224             :     }
     225             : }
     226             : 
     227             : /*
     228             :  *  interface routines called by the function manager
     229             :  */
     230             : 
     231             : Datum
     232      175148 : namelike(PG_FUNCTION_ARGS)
     233             : {
     234      175148 :     Name        str = PG_GETARG_NAME(0);
     235      175148 :     text       *pat = PG_GETARG_TEXT_PP(1);
     236             :     bool        result;
     237             :     char       *s,
     238             :                *p;
     239             :     int         slen,
     240             :                 plen;
     241             : 
     242      175148 :     s = NameStr(*str);
     243      175148 :     slen = strlen(s);
     244      175148 :     p = VARDATA_ANY(pat);
     245      175148 :     plen = VARSIZE_ANY_EXHDR(pat);
     246             : 
     247      175148 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
     248             : 
     249      175148 :     PG_RETURN_BOOL(result);
     250             : }
     251             : 
     252             : Datum
     253        5446 : namenlike(PG_FUNCTION_ARGS)
     254             : {
     255        5446 :     Name        str = PG_GETARG_NAME(0);
     256        5446 :     text       *pat = PG_GETARG_TEXT_PP(1);
     257             :     bool        result;
     258             :     char       *s,
     259             :                *p;
     260             :     int         slen,
     261             :                 plen;
     262             : 
     263        5446 :     s = NameStr(*str);
     264        5446 :     slen = strlen(s);
     265        5446 :     p = VARDATA_ANY(pat);
     266        5446 :     plen = VARSIZE_ANY_EXHDR(pat);
     267             : 
     268        5446 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
     269             : 
     270        5446 :     PG_RETURN_BOOL(result);
     271             : }
     272             : 
     273             : Datum
     274      596218 : textlike(PG_FUNCTION_ARGS)
     275             : {
     276      596218 :     text       *str = PG_GETARG_TEXT_PP(0);
     277      596218 :     text       *pat = PG_GETARG_TEXT_PP(1);
     278             :     bool        result;
     279             :     char       *s,
     280             :                *p;
     281             :     int         slen,
     282             :                 plen;
     283             : 
     284      596218 :     s = VARDATA_ANY(str);
     285      596218 :     slen = VARSIZE_ANY_EXHDR(str);
     286      596218 :     p = VARDATA_ANY(pat);
     287      596218 :     plen = VARSIZE_ANY_EXHDR(pat);
     288             : 
     289      596218 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
     290             : 
     291      596212 :     PG_RETURN_BOOL(result);
     292             : }
     293             : 
     294             : Datum
     295      323970 : textnlike(PG_FUNCTION_ARGS)
     296             : {
     297      323970 :     text       *str = PG_GETARG_TEXT_PP(0);
     298      323970 :     text       *pat = PG_GETARG_TEXT_PP(1);
     299             :     bool        result;
     300             :     char       *s,
     301             :                *p;
     302             :     int         slen,
     303             :                 plen;
     304             : 
     305      323970 :     s = VARDATA_ANY(str);
     306      323970 :     slen = VARSIZE_ANY_EXHDR(str);
     307      323970 :     p = VARDATA_ANY(pat);
     308      323970 :     plen = VARSIZE_ANY_EXHDR(pat);
     309             : 
     310      323970 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
     311             : 
     312      323970 :     PG_RETURN_BOOL(result);
     313             : }
     314             : 
     315             : Datum
     316          12 : bytealike(PG_FUNCTION_ARGS)
     317             : {
     318          12 :     bytea      *str = PG_GETARG_BYTEA_PP(0);
     319          12 :     bytea      *pat = PG_GETARG_BYTEA_PP(1);
     320             :     bool        result;
     321             :     char       *s,
     322             :                *p;
     323             :     int         slen,
     324             :                 plen;
     325             : 
     326          12 :     s = VARDATA_ANY(str);
     327          12 :     slen = VARSIZE_ANY_EXHDR(str);
     328          12 :     p = VARDATA_ANY(pat);
     329          12 :     plen = VARSIZE_ANY_EXHDR(pat);
     330             : 
     331          12 :     result = (SB_MatchText(s, slen, p, plen, 0) == LIKE_TRUE);
     332             : 
     333          12 :     PG_RETURN_BOOL(result);
     334             : }
     335             : 
     336             : Datum
     337          12 : byteanlike(PG_FUNCTION_ARGS)
     338             : {
     339          12 :     bytea      *str = PG_GETARG_BYTEA_PP(0);
     340          12 :     bytea      *pat = PG_GETARG_BYTEA_PP(1);
     341             :     bool        result;
     342             :     char       *s,
     343             :                *p;
     344             :     int         slen,
     345             :                 plen;
     346             : 
     347          12 :     s = VARDATA_ANY(str);
     348          12 :     slen = VARSIZE_ANY_EXHDR(str);
     349          12 :     p = VARDATA_ANY(pat);
     350          12 :     plen = VARSIZE_ANY_EXHDR(pat);
     351             : 
     352          12 :     result = (SB_MatchText(s, slen, p, plen, 0) != LIKE_TRUE);
     353             : 
     354          12 :     PG_RETURN_BOOL(result);
     355             : }
     356             : 
     357             : /*
     358             :  * Case-insensitive versions
     359             :  */
     360             : 
     361             : Datum
     362       16734 : nameiclike(PG_FUNCTION_ARGS)
     363             : {
     364       16734 :     Name        str = PG_GETARG_NAME(0);
     365       16734 :     text       *pat = PG_GETARG_TEXT_PP(1);
     366             :     bool        result;
     367             :     text       *strtext;
     368             : 
     369       16734 :     strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
     370             :                                                  NameGetDatum(str)));
     371       16734 :     result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
     372             : 
     373       16734 :     PG_RETURN_BOOL(result);
     374             : }
     375             : 
     376             : Datum
     377           6 : nameicnlike(PG_FUNCTION_ARGS)
     378             : {
     379           6 :     Name        str = PG_GETARG_NAME(0);
     380           6 :     text       *pat = PG_GETARG_TEXT_PP(1);
     381             :     bool        result;
     382             :     text       *strtext;
     383             : 
     384           6 :     strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
     385             :                                                  NameGetDatum(str)));
     386           6 :     result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
     387             : 
     388           6 :     PG_RETURN_BOOL(result);
     389             : }
     390             : 
     391             : Datum
     392       68456 : texticlike(PG_FUNCTION_ARGS)
     393             : {
     394       68456 :     text       *str = PG_GETARG_TEXT_PP(0);
     395       68456 :     text       *pat = PG_GETARG_TEXT_PP(1);
     396             :     bool        result;
     397             : 
     398       68456 :     result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
     399             : 
     400       68444 :     PG_RETURN_BOOL(result);
     401             : }
     402             : 
     403             : Datum
     404          56 : texticnlike(PG_FUNCTION_ARGS)
     405             : {
     406          56 :     text       *str = PG_GETARG_TEXT_PP(0);
     407          56 :     text       *pat = PG_GETARG_TEXT_PP(1);
     408             :     bool        result;
     409             : 
     410          56 :     result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
     411             : 
     412          56 :     PG_RETURN_BOOL(result);
     413             : }
     414             : 
     415             : /*
     416             :  * like_escape() --- given a pattern and an ESCAPE string,
     417             :  * convert the pattern to use Postgres' standard backslash escape convention.
     418             :  */
     419             : Datum
     420         212 : like_escape(PG_FUNCTION_ARGS)
     421             : {
     422         212 :     text       *pat = PG_GETARG_TEXT_PP(0);
     423         212 :     text       *esc = PG_GETARG_TEXT_PP(1);
     424             :     text       *result;
     425             : 
     426         212 :     if (pg_database_encoding_max_length() == 1)
     427           0 :         result = SB_do_like_escape(pat, esc);
     428             :     else
     429         212 :         result = MB_do_like_escape(pat, esc);
     430             : 
     431         212 :     PG_RETURN_TEXT_P(result);
     432             : }
     433             : 
     434             : /*
     435             :  * like_escape_bytea() --- given a pattern and an ESCAPE string,
     436             :  * convert the pattern to use Postgres' standard backslash escape convention.
     437             :  */
     438             : Datum
     439          12 : like_escape_bytea(PG_FUNCTION_ARGS)
     440             : {
     441          12 :     bytea      *pat = PG_GETARG_BYTEA_PP(0);
     442          12 :     bytea      *esc = PG_GETARG_BYTEA_PP(1);
     443          12 :     bytea      *result = SB_do_like_escape((text *) pat, (text *) esc);
     444             : 
     445          12 :     PG_RETURN_BYTEA_P((bytea *) result);
     446             : }

Generated by: LCOV version 1.16