LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_builtin.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 87.8 % 90 79
Test Date: 2026-05-22 02:16:35 Functions: 81.8 % 22 18
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-----------------------------------------------------------------------
       2              :  *
       3              :  * PostgreSQL locale utilities for builtin provider
       4              :  *
       5              :  * Portions Copyright (c) 2002-2026, PostgreSQL Global Development Group
       6              :  *
       7              :  * src/backend/utils/adt/pg_locale_builtin.c
       8              :  *
       9              :  *-----------------------------------------------------------------------
      10              :  */
      11              : 
      12              : #include "postgres.h"
      13              : 
      14              : #include "catalog/pg_database.h"
      15              : #include "catalog/pg_collation.h"
      16              : #include "common/unicode_case.h"
      17              : #include "common/unicode_category.h"
      18              : #include "miscadmin.h"
      19              : #include "utils/builtins.h"
      20              : #include "utils/pg_locale.h"
      21              : #include "utils/syscache.h"
      22              : 
      23              : extern pg_locale_t create_pg_locale_builtin(Oid collid,
      24              :                                             MemoryContext context);
      25              : extern char *get_collation_actual_version_builtin(const char *collcollate);
      26              : 
      27              : struct WordBoundaryState
      28              : {
      29              :     const char *str;
      30              :     size_t      len;
      31              :     size_t      offset;
      32              :     bool        posix;
      33              :     bool        init;
      34              :     bool        prev_alnum;
      35              : };
      36              : 
      37              : /*
      38              :  * In UTF-8, pg_wchar is guaranteed to be the code point value.
      39              :  */
      40              : static inline char32_t
      41       129459 : to_char32(pg_wchar wc)
      42              : {
      43              :     Assert(GetDatabaseEncoding() == PG_UTF8);
      44       129459 :     return (char32_t) wc;
      45              : }
      46              : 
      47              : static inline pg_wchar
      48          650 : to_pg_wchar(char32_t c32)
      49              : {
      50              :     Assert(GetDatabaseEncoding() == PG_UTF8);
      51          650 :     return (pg_wchar) c32;
      52              : }
      53              : 
      54              : /*
      55              :  * Simple word boundary iterator that draws boundaries each time the result of
      56              :  * pg_u_isalnum() changes.
      57              :  */
      58              : static size_t
      59          564 : initcap_wbnext(void *state)
      60              : {
      61          564 :     struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
      62              : 
      63         1165 :     while (wbstate->offset < wbstate->len)
      64              :     {
      65         1032 :         char32_t    u = utf8_to_unicode((const unsigned char *) wbstate->str +
      66         1032 :                                         wbstate->offset);
      67         1032 :         bool        curr_alnum = pg_u_isalnum(u, wbstate->posix);
      68              : 
      69         1032 :         if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
      70              :         {
      71          431 :             size_t      prev_offset = wbstate->offset;
      72              : 
      73          431 :             wbstate->init = true;
      74          431 :             wbstate->offset += unicode_utf8len(u);
      75          431 :             wbstate->prev_alnum = curr_alnum;
      76          431 :             return prev_offset;
      77              :         }
      78              : 
      79          601 :         wbstate->offset += unicode_utf8len(u);
      80              :     }
      81              : 
      82          133 :     return wbstate->len;
      83              : }
      84              : 
      85              : static size_t
      86         6312 : strlower_builtin(char *dest, size_t destsize, const char *src, size_t srclen,
      87              :                  pg_locale_t locale)
      88              : {
      89        12624 :     return unicode_strlower(dest, destsize, src, srclen,
      90         6312 :                             locale->builtin.casemap_full);
      91              : }
      92              : 
      93              : static size_t
      94          133 : strtitle_builtin(char *dest, size_t destsize, const char *src, size_t srclen,
      95              :                  pg_locale_t locale)
      96              : {
      97          133 :     struct WordBoundaryState wbstate = {
      98              :         .str = src,
      99              :         .len = srclen,
     100              :         .offset = 0,
     101          133 :         .posix = !locale->builtin.casemap_full,
     102              :         .init = false,
     103              :         .prev_alnum = false,
     104              :     };
     105              : 
     106          266 :     return unicode_strtitle(dest, destsize, src, srclen,
     107          133 :                             locale->builtin.casemap_full,
     108              :                             initcap_wbnext, &wbstate);
     109              : }
     110              : 
     111              : static size_t
     112       158561 : strupper_builtin(char *dest, size_t destsize, const char *src, size_t srclen,
     113              :                  pg_locale_t locale)
     114              : {
     115       317122 :     return unicode_strupper(dest, destsize, src, srclen,
     116       158561 :                             locale->builtin.casemap_full);
     117              : }
     118              : 
     119              : static size_t
     120           10 : strfold_builtin(char *dest, size_t destsize, const char *src, size_t srclen,
     121              :                 pg_locale_t locale)
     122              : {
     123           20 :     return unicode_strfold(dest, destsize, src, srclen,
     124           10 :                            locale->builtin.casemap_full);
     125              : }
     126              : 
     127              : static bool
     128        43117 : wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
     129              : {
     130        43117 :     return pg_u_isdigit(to_char32(wc), !locale->builtin.casemap_full);
     131              : }
     132              : 
     133              : static bool
     134        19901 : wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
     135              : {
     136        19901 :     return pg_u_isalpha(to_char32(wc));
     137              : }
     138              : 
     139              : static bool
     140        24708 : wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
     141              : {
     142        24708 :     return pg_u_isalnum(to_char32(wc), !locale->builtin.casemap_full);
     143              : }
     144              : 
     145              : static bool
     146        16384 : wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
     147              : {
     148        16384 :     return pg_u_isupper(to_char32(wc));
     149              : }
     150              : 
     151              : static bool
     152            0 : wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
     153              : {
     154            0 :     return pg_u_islower(to_char32(wc));
     155              : }
     156              : 
     157              : static bool
     158            0 : wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
     159              : {
     160            0 :     return pg_u_isgraph(to_char32(wc));
     161              : }
     162              : 
     163              : static bool
     164            0 : wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
     165              : {
     166            0 :     return pg_u_isprint(to_char32(wc));
     167              : }
     168              : 
     169              : static bool
     170        16384 : wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
     171              : {
     172        16384 :     return pg_u_ispunct(to_char32(wc), !locale->builtin.casemap_full);
     173              : }
     174              : 
     175              : static bool
     176         8312 : wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
     177              : {
     178         8312 :     return pg_u_isspace(to_char32(wc));
     179              : }
     180              : 
     181              : static bool
     182            3 : wc_isxdigit_builtin(pg_wchar wc, pg_locale_t locale)
     183              : {
     184            3 :     return pg_u_isxdigit(to_char32(wc), !locale->builtin.casemap_full);
     185              : }
     186              : 
     187              : static bool
     188            0 : wc_iscased_builtin(pg_wchar wc, pg_locale_t locale)
     189              : {
     190            0 :     return pg_u_prop_cased(to_char32(wc));
     191              : }
     192              : 
     193              : static pg_wchar
     194          325 : wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
     195              : {
     196          325 :     return to_pg_wchar(unicode_uppercase_simple(to_char32(wc)));
     197              : }
     198              : 
     199              : static pg_wchar
     200          325 : wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
     201              : {
     202          325 :     return to_pg_wchar(unicode_lowercase_simple(to_char32(wc)));
     203              : }
     204              : 
     205              : static const struct ctype_methods ctype_methods_builtin = {
     206              :     .strlower = strlower_builtin,
     207              :     .strtitle = strtitle_builtin,
     208              :     .strupper = strupper_builtin,
     209              :     .strfold = strfold_builtin,
     210              :     /* uses plain ASCII semantics for historical reasons */
     211              :     .downcase_ident = NULL,
     212              :     .wc_isdigit = wc_isdigit_builtin,
     213              :     .wc_isalpha = wc_isalpha_builtin,
     214              :     .wc_isalnum = wc_isalnum_builtin,
     215              :     .wc_isupper = wc_isupper_builtin,
     216              :     .wc_islower = wc_islower_builtin,
     217              :     .wc_isgraph = wc_isgraph_builtin,
     218              :     .wc_isprint = wc_isprint_builtin,
     219              :     .wc_ispunct = wc_ispunct_builtin,
     220              :     .wc_isspace = wc_isspace_builtin,
     221              :     .wc_isxdigit = wc_isxdigit_builtin,
     222              :     .wc_iscased = wc_iscased_builtin,
     223              :     .wc_tolower = wc_tolower_builtin,
     224              :     .wc_toupper = wc_toupper_builtin,
     225              : };
     226              : 
     227              : pg_locale_t
     228          971 : create_pg_locale_builtin(Oid collid, MemoryContext context)
     229              : {
     230              :     const char *locstr;
     231              :     pg_locale_t result;
     232              : 
     233          971 :     if (collid == DEFAULT_COLLATION_OID)
     234              :     {
     235              :         HeapTuple   tp;
     236              :         Datum       datum;
     237              : 
     238          933 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     239          933 :         if (!HeapTupleIsValid(tp))
     240            0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     241          933 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     242              :                                        Anum_pg_database_datlocale);
     243          933 :         locstr = TextDatumGetCString(datum);
     244          933 :         ReleaseSysCache(tp);
     245              :     }
     246              :     else
     247              :     {
     248              :         HeapTuple   tp;
     249              :         Datum       datum;
     250              : 
     251           38 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     252           38 :         if (!HeapTupleIsValid(tp))
     253            0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     254           38 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     255              :                                        Anum_pg_collation_colllocale);
     256           38 :         locstr = TextDatumGetCString(datum);
     257           38 :         ReleaseSysCache(tp);
     258              :     }
     259              : 
     260          971 :     builtin_validate_locale(GetDatabaseEncoding(), locstr);
     261              : 
     262          971 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     263              : 
     264          971 :     result->builtin.locale = MemoryContextStrdup(context, locstr);
     265          971 :     result->builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
     266          971 :     result->deterministic = true;
     267          971 :     result->collate_is_c = true;
     268          971 :     result->ctype_is_c = (strcmp(locstr, "C") == 0);
     269          971 :     if (!result->ctype_is_c)
     270          948 :         result->ctype = &ctype_methods_builtin;
     271              : 
     272          971 :     return result;
     273              : }
     274              : 
     275              : char *
     276         1014 : get_collation_actual_version_builtin(const char *collcollate)
     277              : {
     278              :     /*
     279              :      * The only two supported locales (C and C.UTF-8) are both based on memcmp
     280              :      * and are not expected to change, but track the version anyway.
     281              :      *
     282              :      * Note that the character semantics may change for some locales, but the
     283              :      * collation version only tracks changes to sort order.
     284              :      */
     285         1014 :     if (strcmp(collcollate, "C") == 0)
     286           44 :         return "1";
     287          970 :     else if (strcmp(collcollate, "C.UTF-8") == 0)
     288          957 :         return "1";
     289           13 :     else if (strcmp(collcollate, "PG_UNICODE_FAST") == 0)
     290           13 :         return "1";
     291              :     else
     292            0 :         ereport(ERROR,
     293              :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     294              :                  errmsg("invalid locale name \"%s\" for builtin provider",
     295              :                         collcollate)));
     296              : 
     297              :     return NULL;                /* keep compiler quiet */
     298              : }
        

Generated by: LCOV version 2.0-1