LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_builtin.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 80 92 87.0 %
Date: 2025-12-03 05:18:44 Functions: 18 22 81.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-----------------------------------------------------------------------
       2             :  *
       3             :  * PostgreSQL locale utilities for builtin provider
       4             :  *
       5             :  * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
       6             :  *
       7             :  * src/backend/utils/adt/pg_locale_builtin.c
       8             :  *
       9             :  *-----------------------------------------------------------------------
      10             :  */
      11             : 
      12             : #include "postgres.h"
      13             : 
      14             : #include "catalog/pg_database.h"
      15             : #include "catalog/pg_collation.h"
      16             : #include "common/unicode_case.h"
      17             : #include "common/unicode_category.h"
      18             : #include "miscadmin.h"
      19             : #include "utils/builtins.h"
      20             : #include "utils/pg_locale.h"
      21             : #include "utils/syscache.h"
      22             : 
      23             : extern pg_locale_t create_pg_locale_builtin(Oid collid,
      24             :                                             MemoryContext context);
      25             : extern char *get_collation_actual_version_builtin(const char *collcollate);
      26             : 
      27             : struct WordBoundaryState
      28             : {
      29             :     const char *str;
      30             :     size_t      len;
      31             :     size_t      offset;
      32             :     bool        posix;
      33             :     bool        init;
      34             :     bool        prev_alnum;
      35             : };
      36             : 
      37             : /*
      38             :  * In UTF-8, pg_wchar is guaranteed to be the code point value.
      39             :  */
      40             : static inline char32_t
      41      225726 : to_char32(pg_wchar wc)
      42             : {
      43             :     Assert(GetDatabaseEncoding() == PG_UTF8);
      44      225726 :     return (char32_t) wc;
      45             : }
      46             : 
      47             : static inline pg_wchar
      48        1056 : to_pg_wchar(char32_t c32)
      49             : {
      50             :     Assert(GetDatabaseEncoding() == PG_UTF8);
      51        1056 :     return (pg_wchar) c32;
      52             : }
      53             : 
      54             : /*
      55             :  * Simple word boundary iterator that draws boundaries each time the result of
      56             :  * pg_u_isalnum() changes.
      57             :  */
      58             : static size_t
      59         824 : initcap_wbnext(void *state)
      60             : {
      61         824 :     struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
      62             : 
      63        1700 :     while (wbstate->offset < wbstate->len &&
      64        1506 :            wbstate->str[wbstate->offset] != '\0')
      65             :     {
      66        1506 :         char32_t    u = utf8_to_unicode((unsigned char *) wbstate->str +
      67        1506 :                                         wbstate->offset);
      68        1506 :         bool        curr_alnum = pg_u_isalnum(u, wbstate->posix);
      69             : 
      70        1506 :         if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
      71             :         {
      72         630 :             size_t      prev_offset = wbstate->offset;
      73             : 
      74         630 :             wbstate->init = true;
      75         630 :             wbstate->offset += unicode_utf8len(u);
      76         630 :             wbstate->prev_alnum = curr_alnum;
      77         630 :             return prev_offset;
      78             :         }
      79             : 
      80         876 :         wbstate->offset += unicode_utf8len(u);
      81             :     }
      82             : 
      83         194 :     return wbstate->len;
      84             : }
      85             : 
      86             : static size_t
      87       12026 : strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
      88             :                  pg_locale_t locale)
      89             : {
      90       24052 :     return unicode_strlower(dest, destsize, src, srclen,
      91       12026 :                             locale->builtin.casemap_full);
      92             : }
      93             : 
      94             : static size_t
      95         194 : strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
      96             :                  pg_locale_t locale)
      97             : {
      98         194 :     struct WordBoundaryState wbstate = {
      99             :         .str = src,
     100             :         .len = srclen,
     101             :         .offset = 0,
     102         194 :         .posix = !locale->builtin.casemap_full,
     103             :         .init = false,
     104             :         .prev_alnum = false,
     105             :     };
     106             : 
     107         388 :     return unicode_strtitle(dest, destsize, src, srclen,
     108         194 :                             locale->builtin.casemap_full,
     109             :                             initcap_wbnext, &wbstate);
     110             : }
     111             : 
     112             : static size_t
     113      316918 : strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
     114             :                  pg_locale_t locale)
     115             : {
     116      633836 :     return unicode_strupper(dest, destsize, src, srclen,
     117      316918 :                             locale->builtin.casemap_full);
     118             : }
     119             : 
     120             : static size_t
     121          12 : strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
     122             :                 pg_locale_t locale)
     123             : {
     124          24 :     return unicode_strfold(dest, destsize, src, srclen,
     125          12 :                            locale->builtin.casemap_full);
     126             : }
     127             : 
     128             : static bool
     129       78016 : wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
     130             : {
     131       78016 :     return pg_u_isdigit(to_char32(wc), !locale->builtin.casemap_full);
     132             : }
     133             : 
     134             : static bool
     135       39654 : wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
     136             : {
     137       39654 :     return pg_u_isalpha(to_char32(wc));
     138             : }
     139             : 
     140             : static bool
     141       41218 : wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
     142             : {
     143       41218 :     return pg_u_isalnum(to_char32(wc), !locale->builtin.casemap_full);
     144             : }
     145             : 
     146             : static bool
     147       24576 : wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
     148             : {
     149       24576 :     return pg_u_isupper(to_char32(wc));
     150             : }
     151             : 
     152             : static bool
     153           0 : wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
     154             : {
     155           0 :     return pg_u_islower(to_char32(wc));
     156             : }
     157             : 
     158             : static bool
     159           0 : wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
     160             : {
     161           0 :     return pg_u_isgraph(to_char32(wc));
     162             : }
     163             : 
     164             : static bool
     165           0 : wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
     166             : {
     167           0 :     return pg_u_isprint(to_char32(wc));
     168             : }
     169             : 
     170             : static bool
     171       24576 : wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
     172             : {
     173       24576 :     return pg_u_ispunct(to_char32(wc), !locale->builtin.casemap_full);
     174             : }
     175             : 
     176             : static bool
     177       16624 : wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
     178             : {
     179       16624 :     return pg_u_isspace(to_char32(wc));
     180             : }
     181             : 
     182             : static bool
     183           6 : wc_isxdigit_builtin(pg_wchar wc, pg_locale_t locale)
     184             : {
     185           6 :     return pg_u_isxdigit(to_char32(wc), !locale->builtin.casemap_full);
     186             : }
     187             : 
     188             : static bool
     189           0 : char_is_cased_builtin(char ch, pg_locale_t locale)
     190             : {
     191           0 :     return IS_HIGHBIT_SET(ch) ||
     192           0 :         (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
     193             : }
     194             : 
     195             : static pg_wchar
     196         528 : wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
     197             : {
     198         528 :     return to_pg_wchar(unicode_uppercase_simple(to_char32(wc)));
     199             : }
     200             : 
     201             : static pg_wchar
     202         528 : wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
     203             : {
     204         528 :     return to_pg_wchar(unicode_lowercase_simple(to_char32(wc)));
     205             : }
     206             : 
     207             : static const struct ctype_methods ctype_methods_builtin = {
     208             :     .strlower = strlower_builtin,
     209             :     .strtitle = strtitle_builtin,
     210             :     .strupper = strupper_builtin,
     211             :     .strfold = strfold_builtin,
     212             :     .wc_isdigit = wc_isdigit_builtin,
     213             :     .wc_isalpha = wc_isalpha_builtin,
     214             :     .wc_isalnum = wc_isalnum_builtin,
     215             :     .wc_isupper = wc_isupper_builtin,
     216             :     .wc_islower = wc_islower_builtin,
     217             :     .wc_isgraph = wc_isgraph_builtin,
     218             :     .wc_isprint = wc_isprint_builtin,
     219             :     .wc_ispunct = wc_ispunct_builtin,
     220             :     .wc_isspace = wc_isspace_builtin,
     221             :     .wc_isxdigit = wc_isxdigit_builtin,
     222             :     .char_is_cased = char_is_cased_builtin,
     223             :     .wc_tolower = wc_tolower_builtin,
     224             :     .wc_toupper = wc_toupper_builtin,
     225             : };
     226             : 
     227             : pg_locale_t
     228        1836 : create_pg_locale_builtin(Oid collid, MemoryContext context)
     229             : {
     230             :     const char *locstr;
     231             :     pg_locale_t result;
     232             : 
     233        1836 :     if (collid == DEFAULT_COLLATION_OID)
     234             :     {
     235             :         HeapTuple   tp;
     236             :         Datum       datum;
     237             : 
     238        1784 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     239        1784 :         if (!HeapTupleIsValid(tp))
     240           0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     241        1784 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     242             :                                        Anum_pg_database_datlocale);
     243        1784 :         locstr = TextDatumGetCString(datum);
     244        1784 :         ReleaseSysCache(tp);
     245             :     }
     246             :     else
     247             :     {
     248             :         HeapTuple   tp;
     249             :         Datum       datum;
     250             : 
     251          52 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     252          52 :         if (!HeapTupleIsValid(tp))
     253           0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     254          52 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     255             :                                        Anum_pg_collation_colllocale);
     256          52 :         locstr = TextDatumGetCString(datum);
     257          52 :         ReleaseSysCache(tp);
     258             :     }
     259             : 
     260        1836 :     builtin_validate_locale(GetDatabaseEncoding(), locstr);
     261             : 
     262        1836 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     263             : 
     264        1836 :     result->builtin.locale = MemoryContextStrdup(context, locstr);
     265        1836 :     result->builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
     266        1836 :     result->deterministic = true;
     267        1836 :     result->collate_is_c = true;
     268        1836 :     result->ctype_is_c = (strcmp(locstr, "C") == 0);
     269        1836 :     if (!result->ctype_is_c)
     270        1804 :         result->ctype = &ctype_methods_builtin;
     271             : 
     272        1836 :     return result;
     273             : }
     274             : 
     275             : char *
     276        1904 : get_collation_actual_version_builtin(const char *collcollate)
     277             : {
     278             :     /*
     279             :      * The only two supported locales (C and C.UTF-8) are both based on memcmp
     280             :      * and are not expected to change, but track the version anyway.
     281             :      *
     282             :      * Note that the character semantics may change for some locales, but the
     283             :      * collation version only tracks changes to sort order.
     284             :      */
     285        1904 :     if (strcmp(collcollate, "C") == 0)
     286          62 :         return "1";
     287        1842 :     else if (strcmp(collcollate, "C.UTF-8") == 0)
     288        1822 :         return "1";
     289          20 :     else if (strcmp(collcollate, "PG_UNICODE_FAST") == 0)
     290          20 :         return "1";
     291             :     else
     292           0 :         ereport(ERROR,
     293             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     294             :                  errmsg("invalid locale name \"%s\" for builtin provider",
     295             :                         collcollate)));
     296             : 
     297             :     return NULL;                /* keep compiler quiet */
     298             : }

Generated by: LCOV version 1.16