LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_builtin.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 57 60 95.0 %
Date: 2025-04-24 12:15:10 Functions: 7 7 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-----------------------------------------------------------------------
       2             :  *
       3             :  * PostgreSQL locale utilities for builtin provider
       4             :  *
       5             :  * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
       6             :  *
       7             :  * src/backend/utils/adt/pg_locale_builtin.c
       8             :  *
       9             :  *-----------------------------------------------------------------------
      10             :  */
      11             : 
      12             : #include "postgres.h"
      13             : 
      14             : #include "catalog/pg_database.h"
      15             : #include "catalog/pg_collation.h"
      16             : #include "common/unicode_case.h"
      17             : #include "common/unicode_category.h"
      18             : #include "mb/pg_wchar.h"
      19             : #include "miscadmin.h"
      20             : #include "utils/builtins.h"
      21             : #include "utils/memutils.h"
      22             : #include "utils/pg_locale.h"
      23             : #include "utils/syscache.h"
      24             : 
      25             : extern pg_locale_t create_pg_locale_builtin(Oid collid,
      26             :                                             MemoryContext context);
      27             : extern char *get_collation_actual_version_builtin(const char *collcollate);
      28             : extern size_t strlower_builtin(char *dest, size_t destsize, const char *src,
      29             :                                ssize_t srclen, pg_locale_t locale);
      30             : extern size_t strtitle_builtin(char *dest, size_t destsize, const char *src,
      31             :                                ssize_t srclen, pg_locale_t locale);
      32             : extern size_t strupper_builtin(char *dest, size_t destsize, const char *src,
      33             :                                ssize_t srclen, pg_locale_t locale);
      34             : extern size_t strfold_builtin(char *dest, size_t destsize, const char *src,
      35             :                               ssize_t srclen, pg_locale_t locale);
      36             : 
      37             : 
      38             : struct WordBoundaryState
      39             : {
      40             :     const char *str;
      41             :     size_t      len;
      42             :     size_t      offset;
      43             :     bool        posix;
      44             :     bool        init;
      45             :     bool        prev_alnum;
      46             : };
      47             : 
      48             : /*
      49             :  * Simple word boundary iterator that draws boundaries each time the result of
      50             :  * pg_u_isalnum() changes.
      51             :  */
      52             : static size_t
      53         824 : initcap_wbnext(void *state)
      54             : {
      55         824 :     struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
      56             : 
      57        1700 :     while (wbstate->offset < wbstate->len &&
      58        1506 :            wbstate->str[wbstate->offset] != '\0')
      59             :     {
      60        1506 :         pg_wchar    u = utf8_to_unicode((unsigned char *) wbstate->str +
      61        1506 :                                         wbstate->offset);
      62        1506 :         bool        curr_alnum = pg_u_isalnum(u, wbstate->posix);
      63             : 
      64        1506 :         if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
      65             :         {
      66         630 :             size_t      prev_offset = wbstate->offset;
      67             : 
      68         630 :             wbstate->init = true;
      69         630 :             wbstate->offset += unicode_utf8len(u);
      70         630 :             wbstate->prev_alnum = curr_alnum;
      71         630 :             return prev_offset;
      72             :         }
      73             : 
      74         876 :         wbstate->offset += unicode_utf8len(u);
      75             :     }
      76             : 
      77         194 :     return wbstate->len;
      78             : }
      79             : 
      80             : size_t
      81       13022 : strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
      82             :                  pg_locale_t locale)
      83             : {
      84       26044 :     return unicode_strlower(dest, destsize, src, srclen,
      85       13022 :                             locale->info.builtin.casemap_full);
      86             : }
      87             : 
      88             : size_t
      89         194 : strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
      90             :                  pg_locale_t locale)
      91             : {
      92         194 :     struct WordBoundaryState wbstate = {
      93             :         .str = src,
      94             :         .len = srclen,
      95             :         .offset = 0,
      96         194 :         .posix = !locale->info.builtin.casemap_full,
      97             :         .init = false,
      98             :         .prev_alnum = false,
      99             :     };
     100             : 
     101         388 :     return unicode_strtitle(dest, destsize, src, srclen,
     102         194 :                             locale->info.builtin.casemap_full,
     103             :                             initcap_wbnext, &wbstate);
     104             : }
     105             : 
     106             : size_t
     107      316882 : strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
     108             :                  pg_locale_t locale)
     109             : {
     110      633764 :     return unicode_strupper(dest, destsize, src, srclen,
     111      316882 :                             locale->info.builtin.casemap_full);
     112             : }
     113             : 
     114             : size_t
     115          12 : strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
     116             :                 pg_locale_t locale)
     117             : {
     118          24 :     return unicode_strfold(dest, destsize, src, srclen,
     119          12 :                            locale->info.builtin.casemap_full);
     120             : }
     121             : 
     122             : pg_locale_t
     123        1794 : create_pg_locale_builtin(Oid collid, MemoryContext context)
     124             : {
     125             :     const char *locstr;
     126             :     pg_locale_t result;
     127             : 
     128        1794 :     if (collid == DEFAULT_COLLATION_OID)
     129             :     {
     130             :         HeapTuple   tp;
     131             :         Datum       datum;
     132             : 
     133        1738 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     134        1738 :         if (!HeapTupleIsValid(tp))
     135           0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     136        1738 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     137             :                                        Anum_pg_database_datlocale);
     138        1738 :         locstr = TextDatumGetCString(datum);
     139        1738 :         ReleaseSysCache(tp);
     140             :     }
     141             :     else
     142             :     {
     143             :         HeapTuple   tp;
     144             :         Datum       datum;
     145             : 
     146          56 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     147          56 :         if (!HeapTupleIsValid(tp))
     148           0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     149          56 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     150             :                                        Anum_pg_collation_colllocale);
     151          56 :         locstr = TextDatumGetCString(datum);
     152          56 :         ReleaseSysCache(tp);
     153             :     }
     154             : 
     155        1794 :     builtin_validate_locale(GetDatabaseEncoding(), locstr);
     156             : 
     157        1794 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     158             : 
     159        1794 :     result->info.builtin.locale = MemoryContextStrdup(context, locstr);
     160        1794 :     result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
     161        1794 :     result->provider = COLLPROVIDER_BUILTIN;
     162        1794 :     result->deterministic = true;
     163        1794 :     result->collate_is_c = true;
     164        1794 :     result->ctype_is_c = (strcmp(locstr, "C") == 0);
     165             : 
     166        1794 :     return result;
     167             : }
     168             : 
     169             : char *
     170        1868 : get_collation_actual_version_builtin(const char *collcollate)
     171             : {
     172             :     /*
     173             :      * The only two supported locales (C and C.UTF-8) are both based on memcmp
     174             :      * and are not expected to change, but track the version anyway.
     175             :      *
     176             :      * Note that the character semantics may change for some locales, but the
     177             :      * collation version only tracks changes to sort order.
     178             :      */
     179        1868 :     if (strcmp(collcollate, "C") == 0)
     180          48 :         return "1";
     181        1820 :     else if (strcmp(collcollate, "C.UTF-8") == 0)
     182        1796 :         return "1";
     183          24 :     else if (strcmp(collcollate, "PG_UNICODE_FAST") == 0)
     184          24 :         return "1";
     185             :     else
     186           0 :         ereport(ERROR,
     187             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     188             :                  errmsg("invalid locale name \"%s\" for builtin provider",
     189             :                         collcollate)));
     190             : 
     191             :     return NULL;                /* keep compiler quiet */
     192             : }

Generated by: LCOV version 1.14