LCOV - code coverage report
Current view: top level - src/backend/utils/adt - like.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 93.1 % 130 121
Test Date: 2026-02-17 17:20:33 Functions: 100.0 % 15 15
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * like.c
       4              :  *    like expression handling code.
       5              :  *
       6              :  *   NOTES
       7              :  *      A big hack of the regexp.c code!! Contributed by
       8              :  *      Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
       9              :  *
      10              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      11              :  * Portions Copyright (c) 1994, Regents of the University of California
      12              :  *
      13              :  * IDENTIFICATION
      14              :  *  src/backend/utils/adt/like.c
      15              :  *
      16              :  *-------------------------------------------------------------------------
      17              :  */
      18              : #include "postgres.h"
      19              : 
      20              : #include <ctype.h>
      21              : 
      22              : #include "catalog/pg_collation.h"
      23              : #include "mb/pg_wchar.h"
      24              : #include "miscadmin.h"
      25              : #include "utils/fmgrprotos.h"
      26              : #include "utils/pg_locale.h"
      27              : #include "varatt.h"
      28              : 
      29              : 
      30              : #define LIKE_TRUE                       1
      31              : #define LIKE_FALSE                      0
      32              : #define LIKE_ABORT                      (-1)
      33              : 
      34              : 
      35              : static int  SB_MatchText(const char *t, int tlen, const char *p, int plen,
      36              :                          pg_locale_t locale);
      37              : static text *SB_do_like_escape(text *pat, text *esc);
      38              : 
      39              : static int  MB_MatchText(const char *t, int tlen, const char *p, int plen,
      40              :                          pg_locale_t locale);
      41              : static text *MB_do_like_escape(text *pat, text *esc);
      42              : 
      43              : static int  UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
      44              :                            pg_locale_t locale);
      45              : 
      46              : static int  C_IMatchText(const char *t, int tlen, const char *p, int plen,
      47              :                          pg_locale_t locale);
      48              : 
      49              : static int  GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
      50              : static int  Generic_Text_IC_like(text *str, text *pat, Oid collation);
      51              : 
      52              : /*--------------------
      53              :  * Support routine for MatchText. Compares given multibyte streams
      54              :  * as wide characters. If they match, returns 1 otherwise returns 0.
      55              :  *--------------------
      56              :  */
      57              : static inline int
      58          462 : wchareq(const char *p1, int p1len, const char *p2, int p2len)
      59              : {
      60              :     int         p1clen;
      61              : 
      62              :     /* Optimization:  quickly compare the first byte. */
      63          462 :     if (*p1 != *p2)
      64          348 :         return 0;
      65              : 
      66          114 :     p1clen = pg_mblen_with_len(p1, p1len);
      67          114 :     if (pg_mblen_with_len(p2, p2len) != p1clen)
      68            0 :         return 0;
      69              : 
      70              :     /* They are the same length */
      71          228 :     while (p1clen--)
      72              :     {
      73          114 :         if (*p1++ != *p2++)
      74            0 :             return 0;
      75              :     }
      76          114 :     return 1;
      77              : }
      78              : 
      79              : /*
      80              :  * Formerly we had a routine iwchareq() here that tried to do case-insensitive
      81              :  * comparison of multibyte characters.  It did not work at all, however,
      82              :  * because it relied on tolower() which has a single-byte API ... and
      83              :  * towlower() wouldn't be much better since we have no suitably cheap way
      84              :  * of getting a single character transformed to the system's wchar_t format.
      85              :  * So now, we just downcase the strings using lower() and apply regular LIKE
      86              :  * comparison.  This should be revisited when we install better locale support.
      87              :  *
      88              :  * We do handle case-insensitive matching for the C locale using
      89              :  * fold-on-the-fly processing, however.
      90              :  */
      91              : 
      92              : 
      93              : #define NextByte(p, plen)   ((p)++, (plen)--)
      94              : 
      95              : /* Set up to compile like_match.c for multibyte characters */
      96              : #define CHAREQ(p1, p1len, p2, p2len) wchareq((p1), (p1len), (p2), (p2len))
      97              : #define NextChar(p, plen) \
      98              :     do { int __l = pg_mblen_with_len((p), (plen)); (p) +=__l; (plen) -=__l; } while (0)
      99              : #define CopyAdvChar(dst, src, srclen) \
     100              :     do { int __l = pg_mblen_with_len((src), (srclen)); \
     101              :          (srclen) -= __l; \
     102              :          while (__l-- > 0) \
     103              :              *(dst)++ = *(src)++; \
     104              :        } while (0)
     105              : 
     106              : #define MatchText   MB_MatchText
     107              : #define do_like_escape  MB_do_like_escape
     108              : 
     109              : #include "like_match.c"
     110              : 
     111              : /* Set up to compile like_match.c for single-byte characters */
     112              : #define CHAREQ(p1, p1len, p2, p2len) (*(p1) == *(p2))
     113              : #define NextChar(p, plen) NextByte((p), (plen))
     114              : #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
     115              : 
     116              : #define MatchText   SB_MatchText
     117              : #define do_like_escape  SB_do_like_escape
     118              : 
     119              : #include "like_match.c"
     120              : 
     121              : /* setup to compile like_match.c for case-insensitive matches in C locale */
     122              : #define MATCH_LOWER
     123              : #define NextChar(p, plen) NextByte((p), (plen))
     124              : #define MatchText C_IMatchText
     125              : 
     126              : #include "like_match.c"
     127              : 
     128              : /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
     129              : 
     130              : #define NextChar(p, plen) \
     131              :     do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
     132              : #define MatchText   UTF8_MatchText
     133              : 
     134              : #include "like_match.c"
     135              : 
     136              : /* Generic for all cases not requiring inline case-folding */
     137              : static inline int
     138       548813 : GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
     139              : {
     140              :     pg_locale_t locale;
     141              : 
     142       548813 :     if (!OidIsValid(collation))
     143              :     {
     144              :         /*
     145              :          * This typically means that the parser could not resolve a conflict
     146              :          * of implicit collations, so report it that way.
     147              :          */
     148            0 :         ereport(ERROR,
     149              :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     150              :                  errmsg("could not determine which collation to use for LIKE"),
     151              :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     152              :     }
     153              : 
     154       548813 :     locale = pg_newlocale_from_collation(collation);
     155              : 
     156       548813 :     if (pg_database_encoding_max_length() == 1)
     157        40882 :         return SB_MatchText(s, slen, p, plen, locale);
     158       507931 :     else if (GetDatabaseEncoding() == PG_UTF8)
     159       507931 :         return UTF8_MatchText(s, slen, p, plen, locale);
     160              :     else
     161            0 :         return MB_MatchText(s, slen, p, plen, locale);
     162              : }
     163              : 
     164              : static inline int
     165        42097 : Generic_Text_IC_like(text *str, text *pat, Oid collation)
     166              : {
     167              :     char       *s,
     168              :                *p;
     169              :     int         slen,
     170              :                 plen;
     171              :     pg_locale_t locale;
     172              : 
     173        42097 :     if (!OidIsValid(collation))
     174              :     {
     175              :         /*
     176              :          * This typically means that the parser could not resolve a conflict
     177              :          * of implicit collations, so report it that way.
     178              :          */
     179            0 :         ereport(ERROR,
     180              :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     181              :                  errmsg("could not determine which collation to use for ILIKE"),
     182              :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     183              :     }
     184              : 
     185        42097 :     locale = pg_newlocale_from_collation(collation);
     186              : 
     187        42097 :     if (!locale->deterministic)
     188            6 :         ereport(ERROR,
     189              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     190              :                  errmsg("nondeterministic collations are not supported for ILIKE")));
     191              : 
     192              :     /*
     193              :      * For efficiency reasons, in the C locale we don't call lower() on the
     194              :      * pattern and text, but instead lowercase each character lazily.
     195              :      *
     196              :      * XXX: use casefolding instead?
     197              :      */
     198              : 
     199        42091 :     if (locale->ctype_is_c)
     200              :     {
     201         7845 :         p = VARDATA_ANY(pat);
     202         7845 :         plen = VARSIZE_ANY_EXHDR(pat);
     203         7845 :         s = VARDATA_ANY(str);
     204         7845 :         slen = VARSIZE_ANY_EXHDR(str);
     205         7845 :         return C_IMatchText(s, slen, p, plen, locale);
     206              :     }
     207              :     else
     208              :     {
     209        34246 :         pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
     210              :                                                      PointerGetDatum(pat)));
     211        34246 :         p = VARDATA_ANY(pat);
     212        34246 :         plen = VARSIZE_ANY_EXHDR(pat);
     213        34246 :         str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
     214              :                                                      PointerGetDatum(str)));
     215        34246 :         s = VARDATA_ANY(str);
     216        34246 :         slen = VARSIZE_ANY_EXHDR(str);
     217              : 
     218        34246 :         if (GetDatabaseEncoding() == PG_UTF8)
     219        34246 :             return UTF8_MatchText(s, slen, p, plen, 0);
     220            0 :         else if (pg_database_encoding_max_length() > 1)
     221            0 :             return MB_MatchText(s, slen, p, plen, 0);
     222              :         else
     223            0 :             return SB_MatchText(s, slen, p, plen, 0);
     224              :     }
     225              : }
     226              : 
     227              : /*
     228              :  *  interface routines called by the function manager
     229              :  */
     230              : 
     231              : Datum
     232        85179 : namelike(PG_FUNCTION_ARGS)
     233              : {
     234        85179 :     Name        str = PG_GETARG_NAME(0);
     235        85179 :     text       *pat = PG_GETARG_TEXT_PP(1);
     236              :     bool        result;
     237              :     char       *s,
     238              :                *p;
     239              :     int         slen,
     240              :                 plen;
     241              : 
     242        85179 :     s = NameStr(*str);
     243        85179 :     slen = strlen(s);
     244        85179 :     p = VARDATA_ANY(pat);
     245        85179 :     plen = VARSIZE_ANY_EXHDR(pat);
     246              : 
     247        85179 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
     248              : 
     249        85179 :     PG_RETURN_BOOL(result);
     250              : }
     251              : 
     252              : Datum
     253         2725 : namenlike(PG_FUNCTION_ARGS)
     254              : {
     255         2725 :     Name        str = PG_GETARG_NAME(0);
     256         2725 :     text       *pat = PG_GETARG_TEXT_PP(1);
     257              :     bool        result;
     258              :     char       *s,
     259              :                *p;
     260              :     int         slen,
     261              :                 plen;
     262              : 
     263         2725 :     s = NameStr(*str);
     264         2725 :     slen = strlen(s);
     265         2725 :     p = VARDATA_ANY(pat);
     266         2725 :     plen = VARSIZE_ANY_EXHDR(pat);
     267              : 
     268         2725 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
     269              : 
     270         2725 :     PG_RETURN_BOOL(result);
     271              : }
     272              : 
     273              : Datum
     274       298416 : textlike(PG_FUNCTION_ARGS)
     275              : {
     276       298416 :     text       *str = PG_GETARG_TEXT_PP(0);
     277       298416 :     text       *pat = PG_GETARG_TEXT_PP(1);
     278              :     bool        result;
     279              :     char       *s,
     280              :                *p;
     281              :     int         slen,
     282              :                 plen;
     283              : 
     284       298416 :     s = VARDATA_ANY(str);
     285       298416 :     slen = VARSIZE_ANY_EXHDR(str);
     286       298416 :     p = VARDATA_ANY(pat);
     287       298416 :     plen = VARSIZE_ANY_EXHDR(pat);
     288              : 
     289       298416 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
     290              : 
     291       298413 :     PG_RETURN_BOOL(result);
     292              : }
     293              : 
     294              : Datum
     295       162493 : textnlike(PG_FUNCTION_ARGS)
     296              : {
     297       162493 :     text       *str = PG_GETARG_TEXT_PP(0);
     298       162493 :     text       *pat = PG_GETARG_TEXT_PP(1);
     299              :     bool        result;
     300              :     char       *s,
     301              :                *p;
     302              :     int         slen,
     303              :                 plen;
     304              : 
     305       162493 :     s = VARDATA_ANY(str);
     306       162493 :     slen = VARSIZE_ANY_EXHDR(str);
     307       162493 :     p = VARDATA_ANY(pat);
     308       162493 :     plen = VARSIZE_ANY_EXHDR(pat);
     309              : 
     310       162493 :     result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
     311              : 
     312       162493 :     PG_RETURN_BOOL(result);
     313              : }
     314              : 
     315              : Datum
     316            6 : bytealike(PG_FUNCTION_ARGS)
     317              : {
     318            6 :     bytea      *str = PG_GETARG_BYTEA_PP(0);
     319            6 :     bytea      *pat = PG_GETARG_BYTEA_PP(1);
     320              :     bool        result;
     321              :     char       *s,
     322              :                *p;
     323              :     int         slen,
     324              :                 plen;
     325              : 
     326            6 :     s = VARDATA_ANY(str);
     327            6 :     slen = VARSIZE_ANY_EXHDR(str);
     328            6 :     p = VARDATA_ANY(pat);
     329            6 :     plen = VARSIZE_ANY_EXHDR(pat);
     330              : 
     331            6 :     result = (SB_MatchText(s, slen, p, plen, 0) == LIKE_TRUE);
     332              : 
     333            6 :     PG_RETURN_BOOL(result);
     334              : }
     335              : 
     336              : Datum
     337            6 : byteanlike(PG_FUNCTION_ARGS)
     338              : {
     339            6 :     bytea      *str = PG_GETARG_BYTEA_PP(0);
     340            6 :     bytea      *pat = PG_GETARG_BYTEA_PP(1);
     341              :     bool        result;
     342              :     char       *s,
     343              :                *p;
     344              :     int         slen,
     345              :                 plen;
     346              : 
     347            6 :     s = VARDATA_ANY(str);
     348            6 :     slen = VARSIZE_ANY_EXHDR(str);
     349            6 :     p = VARDATA_ANY(pat);
     350            6 :     plen = VARSIZE_ANY_EXHDR(pat);
     351              : 
     352            6 :     result = (SB_MatchText(s, slen, p, plen, 0) != LIKE_TRUE);
     353              : 
     354            6 :     PG_RETURN_BOOL(result);
     355              : }
     356              : 
     357              : /*
     358              :  * Case-insensitive versions
     359              :  */
     360              : 
     361              : Datum
     362         7838 : nameiclike(PG_FUNCTION_ARGS)
     363              : {
     364         7838 :     Name        str = PG_GETARG_NAME(0);
     365         7838 :     text       *pat = PG_GETARG_TEXT_PP(1);
     366              :     bool        result;
     367              :     text       *strtext;
     368              : 
     369         7838 :     strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
     370              :                                                  NameGetDatum(str)));
     371         7838 :     result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
     372              : 
     373         7838 :     PG_RETURN_BOOL(result);
     374              : }
     375              : 
     376              : Datum
     377            3 : nameicnlike(PG_FUNCTION_ARGS)
     378              : {
     379            3 :     Name        str = PG_GETARG_NAME(0);
     380            3 :     text       *pat = PG_GETARG_TEXT_PP(1);
     381              :     bool        result;
     382              :     text       *strtext;
     383              : 
     384            3 :     strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
     385              :                                                  NameGetDatum(str)));
     386            3 :     result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
     387              : 
     388            3 :     PG_RETURN_BOOL(result);
     389              : }
     390              : 
     391              : Datum
     392        34228 : texticlike(PG_FUNCTION_ARGS)
     393              : {
     394        34228 :     text       *str = PG_GETARG_TEXT_PP(0);
     395        34228 :     text       *pat = PG_GETARG_TEXT_PP(1);
     396              :     bool        result;
     397              : 
     398        34228 :     result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
     399              : 
     400        34222 :     PG_RETURN_BOOL(result);
     401              : }
     402              : 
     403              : Datum
     404           28 : texticnlike(PG_FUNCTION_ARGS)
     405              : {
     406           28 :     text       *str = PG_GETARG_TEXT_PP(0);
     407           28 :     text       *pat = PG_GETARG_TEXT_PP(1);
     408              :     bool        result;
     409              : 
     410           28 :     result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
     411              : 
     412           28 :     PG_RETURN_BOOL(result);
     413              : }
     414              : 
     415              : /*
     416              :  * like_escape() --- given a pattern and an ESCAPE string,
     417              :  * convert the pattern to use Postgres' standard backslash escape convention.
     418              :  */
     419              : Datum
     420          106 : like_escape(PG_FUNCTION_ARGS)
     421              : {
     422          106 :     text       *pat = PG_GETARG_TEXT_PP(0);
     423          106 :     text       *esc = PG_GETARG_TEXT_PP(1);
     424              :     text       *result;
     425              : 
     426          106 :     if (pg_database_encoding_max_length() == 1)
     427            0 :         result = SB_do_like_escape(pat, esc);
     428              :     else
     429          106 :         result = MB_do_like_escape(pat, esc);
     430              : 
     431          106 :     PG_RETURN_TEXT_P(result);
     432              : }
     433              : 
     434              : /*
     435              :  * like_escape_bytea() --- given a pattern and an ESCAPE string,
     436              :  * convert the pattern to use Postgres' standard backslash escape convention.
     437              :  */
     438              : Datum
     439            6 : like_escape_bytea(PG_FUNCTION_ARGS)
     440              : {
     441            6 :     bytea      *pat = PG_GETARG_BYTEA_PP(0);
     442            6 :     bytea      *esc = PG_GETARG_BYTEA_PP(1);
     443            6 :     bytea      *result = SB_do_like_escape((text *) pat, (text *) esc);
     444              : 
     445            6 :     PG_RETURN_BYTEA_P((bytea *) result);
     446              : }
        

Generated by: LCOV version 2.0-1