LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_builtin.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 87.9 % 91 80
Test Date: 2026-03-01 18:15:11 Functions: 81.8 % 22 18
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-----------------------------------------------------------------------
       2              :  *
       3              :  * PostgreSQL locale utilities for builtin provider
       4              :  *
       5              :  * Portions Copyright (c) 2002-2026, PostgreSQL Global Development Group
       6              :  *
       7              :  * src/backend/utils/adt/pg_locale_builtin.c
       8              :  *
       9              :  *-----------------------------------------------------------------------
      10              :  */
      11              : 
      12              : #include "postgres.h"
      13              : 
      14              : #include "catalog/pg_database.h"
      15              : #include "catalog/pg_collation.h"
      16              : #include "common/unicode_case.h"
      17              : #include "common/unicode_category.h"
      18              : #include "miscadmin.h"
      19              : #include "utils/builtins.h"
      20              : #include "utils/pg_locale.h"
      21              : #include "utils/syscache.h"
      22              : 
      23              : extern pg_locale_t create_pg_locale_builtin(Oid collid,
      24              :                                             MemoryContext context);
      25              : extern char *get_collation_actual_version_builtin(const char *collcollate);
      26              : 
      27              : struct WordBoundaryState
      28              : {
      29              :     const char *str;
      30              :     size_t      len;
      31              :     size_t      offset;
      32              :     bool        posix;
      33              :     bool        init;
      34              :     bool        prev_alnum;
      35              : };
      36              : 
      37              : /*
      38              :  * In UTF-8, pg_wchar is guaranteed to be the code point value.
      39              :  */
      40              : static inline char32_t
      41       112863 : to_char32(pg_wchar wc)
      42              : {
      43              :     Assert(GetDatabaseEncoding() == PG_UTF8);
      44       112863 :     return (char32_t) wc;
      45              : }
      46              : 
      47              : static inline pg_wchar
      48          528 : to_pg_wchar(char32_t c32)
      49              : {
      50              :     Assert(GetDatabaseEncoding() == PG_UTF8);
      51          528 :     return (pg_wchar) c32;
      52              : }
      53              : 
      54              : /*
      55              :  * Simple word boundary iterator that draws boundaries each time the result of
      56              :  * pg_u_isalnum() changes.
      57              :  */
      58              : static size_t
      59          412 : initcap_wbnext(void *state)
      60              : {
      61          412 :     struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
      62              : 
      63          850 :     while (wbstate->offset < wbstate->len &&
      64          753 :            wbstate->str[wbstate->offset] != '\0')
      65              :     {
      66          753 :         char32_t    u = utf8_to_unicode((const unsigned char *) wbstate->str +
      67          753 :                                         wbstate->offset);
      68          753 :         bool        curr_alnum = pg_u_isalnum(u, wbstate->posix);
      69              : 
      70          753 :         if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
      71              :         {
      72          315 :             size_t      prev_offset = wbstate->offset;
      73              : 
      74          315 :             wbstate->init = true;
      75          315 :             wbstate->offset += unicode_utf8len(u);
      76          315 :             wbstate->prev_alnum = curr_alnum;
      77          315 :             return prev_offset;
      78              :         }
      79              : 
      80          438 :         wbstate->offset += unicode_utf8len(u);
      81              :     }
      82              : 
      83           97 :     return wbstate->len;
      84              : }
      85              : 
      86              : static size_t
      87         6065 : strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
      88              :                  pg_locale_t locale)
      89              : {
      90        12130 :     return unicode_strlower(dest, destsize, src, srclen,
      91         6065 :                             locale->builtin.casemap_full);
      92              : }
      93              : 
      94              : static size_t
      95           97 : strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
      96              :                  pg_locale_t locale)
      97              : {
      98           97 :     struct WordBoundaryState wbstate = {
      99              :         .str = src,
     100              :         .len = srclen,
     101              :         .offset = 0,
     102           97 :         .posix = !locale->builtin.casemap_full,
     103              :         .init = false,
     104              :         .prev_alnum = false,
     105              :     };
     106              : 
     107          194 :     return unicode_strtitle(dest, destsize, src, srclen,
     108           97 :                             locale->builtin.casemap_full,
     109              :                             initcap_wbnext, &wbstate);
     110              : }
     111              : 
     112              : static size_t
     113       158517 : strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
     114              :                  pg_locale_t locale)
     115              : {
     116       317034 :     return unicode_strupper(dest, destsize, src, srclen,
     117       158517 :                             locale->builtin.casemap_full);
     118              : }
     119              : 
     120              : static size_t
     121            6 : strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
     122              :                 pg_locale_t locale)
     123              : {
     124           12 :     return unicode_strfold(dest, destsize, src, srclen,
     125            6 :                            locale->builtin.casemap_full);
     126              : }
     127              : 
     128              : static bool
     129        39008 : wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
     130              : {
     131        39008 :     return pg_u_isdigit(to_char32(wc), !locale->builtin.casemap_full);
     132              : }
     133              : 
     134              : static bool
     135        19827 : wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
     136              : {
     137        19827 :     return pg_u_isalpha(to_char32(wc));
     138              : }
     139              : 
     140              : static bool
     141        20609 : wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
     142              : {
     143        20609 :     return pg_u_isalnum(to_char32(wc), !locale->builtin.casemap_full);
     144              : }
     145              : 
     146              : static bool
     147        12288 : wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
     148              : {
     149        12288 :     return pg_u_isupper(to_char32(wc));
     150              : }
     151              : 
     152              : static bool
     153            0 : wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
     154              : {
     155            0 :     return pg_u_islower(to_char32(wc));
     156              : }
     157              : 
     158              : static bool
     159            0 : wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
     160              : {
     161            0 :     return pg_u_isgraph(to_char32(wc));
     162              : }
     163              : 
     164              : static bool
     165            0 : wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
     166              : {
     167            0 :     return pg_u_isprint(to_char32(wc));
     168              : }
     169              : 
     170              : static bool
     171        12288 : wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
     172              : {
     173        12288 :     return pg_u_ispunct(to_char32(wc), !locale->builtin.casemap_full);
     174              : }
     175              : 
     176              : static bool
     177         8312 : wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
     178              : {
     179         8312 :     return pg_u_isspace(to_char32(wc));
     180              : }
     181              : 
     182              : static bool
     183            3 : wc_isxdigit_builtin(pg_wchar wc, pg_locale_t locale)
     184              : {
     185            3 :     return pg_u_isxdigit(to_char32(wc), !locale->builtin.casemap_full);
     186              : }
     187              : 
     188              : static bool
     189            0 : wc_iscased_builtin(pg_wchar wc, pg_locale_t locale)
     190              : {
     191            0 :     return pg_u_prop_cased(to_char32(wc));
     192              : }
     193              : 
     194              : static pg_wchar
     195          264 : wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
     196              : {
     197          264 :     return to_pg_wchar(unicode_uppercase_simple(to_char32(wc)));
     198              : }
     199              : 
     200              : static pg_wchar
     201          264 : wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
     202              : {
     203          264 :     return to_pg_wchar(unicode_lowercase_simple(to_char32(wc)));
     204              : }
     205              : 
     206              : static const struct ctype_methods ctype_methods_builtin = {
     207              :     .strlower = strlower_builtin,
     208              :     .strtitle = strtitle_builtin,
     209              :     .strupper = strupper_builtin,
     210              :     .strfold = strfold_builtin,
     211              :     /* uses plain ASCII semantics for historical reasons */
     212              :     .downcase_ident = NULL,
     213              :     .wc_isdigit = wc_isdigit_builtin,
     214              :     .wc_isalpha = wc_isalpha_builtin,
     215              :     .wc_isalnum = wc_isalnum_builtin,
     216              :     .wc_isupper = wc_isupper_builtin,
     217              :     .wc_islower = wc_islower_builtin,
     218              :     .wc_isgraph = wc_isgraph_builtin,
     219              :     .wc_isprint = wc_isprint_builtin,
     220              :     .wc_ispunct = wc_ispunct_builtin,
     221              :     .wc_isspace = wc_isspace_builtin,
     222              :     .wc_isxdigit = wc_isxdigit_builtin,
     223              :     .wc_iscased = wc_iscased_builtin,
     224              :     .wc_tolower = wc_tolower_builtin,
     225              :     .wc_toupper = wc_toupper_builtin,
     226              : };
     227              : 
     228              : pg_locale_t
     229          946 : create_pg_locale_builtin(Oid collid, MemoryContext context)
     230              : {
     231              :     const char *locstr;
     232              :     pg_locale_t result;
     233              : 
     234          946 :     if (collid == DEFAULT_COLLATION_OID)
     235              :     {
     236              :         HeapTuple   tp;
     237              :         Datum       datum;
     238              : 
     239          920 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     240          920 :         if (!HeapTupleIsValid(tp))
     241            0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     242          920 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     243              :                                        Anum_pg_database_datlocale);
     244          920 :         locstr = TextDatumGetCString(datum);
     245          920 :         ReleaseSysCache(tp);
     246              :     }
     247              :     else
     248              :     {
     249              :         HeapTuple   tp;
     250              :         Datum       datum;
     251              : 
     252           26 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     253           26 :         if (!HeapTupleIsValid(tp))
     254            0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     255           26 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     256              :                                        Anum_pg_collation_colllocale);
     257           26 :         locstr = TextDatumGetCString(datum);
     258           26 :         ReleaseSysCache(tp);
     259              :     }
     260              : 
     261          946 :     builtin_validate_locale(GetDatabaseEncoding(), locstr);
     262              : 
     263          946 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     264              : 
     265          946 :     result->builtin.locale = MemoryContextStrdup(context, locstr);
     266          946 :     result->builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
     267          946 :     result->deterministic = true;
     268          946 :     result->collate_is_c = true;
     269          946 :     result->ctype_is_c = (strcmp(locstr, "C") == 0);
     270          946 :     if (!result->ctype_is_c)
     271          930 :         result->ctype = &ctype_methods_builtin;
     272              : 
     273          946 :     return result;
     274              : }
     275              : 
     276              : char *
     277          979 : get_collation_actual_version_builtin(const char *collcollate)
     278              : {
     279              :     /*
     280              :      * The only two supported locales (C and C.UTF-8) are both based on memcmp
     281              :      * and are not expected to change, but track the version anyway.
     282              :      *
     283              :      * Note that the character semantics may change for some locales, but the
     284              :      * collation version only tracks changes to sort order.
     285              :      */
     286          979 :     if (strcmp(collcollate, "C") == 0)
     287           31 :         return "1";
     288          948 :     else if (strcmp(collcollate, "C.UTF-8") == 0)
     289          938 :         return "1";
     290           10 :     else if (strcmp(collcollate, "PG_UNICODE_FAST") == 0)
     291           10 :         return "1";
     292              :     else
     293            0 :         ereport(ERROR,
     294              :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     295              :                  errmsg("invalid locale name \"%s\" for builtin provider",
     296              :                         collcollate)));
     297              : 
     298              :     return NULL;                /* keep compiler quiet */
     299              : }
        

Generated by: LCOV version 2.0-1