Line data Source code
1 : /*-----------------------------------------------------------------------
2 : *
3 : * PostgreSQL locale utilities for builtin provider
4 : *
5 : * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
6 : *
7 : * src/backend/utils/adt/pg_locale_builtin.c
8 : *
9 : *-----------------------------------------------------------------------
10 : */
11 :
12 : #include "postgres.h"
13 :
14 : #include "catalog/pg_database.h"
15 : #include "catalog/pg_collation.h"
16 : #include "common/unicode_case.h"
17 : #include "common/unicode_category.h"
18 : #include "mb/pg_wchar.h"
19 : #include "miscadmin.h"
20 : #include "utils/builtins.h"
21 : #include "utils/memutils.h"
22 : #include "utils/pg_locale.h"
23 : #include "utils/syscache.h"
24 :
25 : extern pg_locale_t create_pg_locale_builtin(Oid collid,
26 : MemoryContext context);
27 : extern char *get_collation_actual_version_builtin(const char *collcollate);
28 : extern size_t strlower_builtin(char *dest, size_t destsize, const char *src,
29 : ssize_t srclen, pg_locale_t locale);
30 : extern size_t strtitle_builtin(char *dest, size_t destsize, const char *src,
31 : ssize_t srclen, pg_locale_t locale);
32 : extern size_t strupper_builtin(char *dest, size_t destsize, const char *src,
33 : ssize_t srclen, pg_locale_t locale);
34 : extern size_t strfold_builtin(char *dest, size_t destsize, const char *src,
35 : ssize_t srclen, pg_locale_t locale);
36 :
37 :
38 : struct WordBoundaryState
39 : {
40 : const char *str;
41 : size_t len;
42 : size_t offset;
43 : bool posix;
44 : bool init;
45 : bool prev_alnum;
46 : };
47 :
48 : /*
49 : * Simple word boundary iterator that draws boundaries each time the result of
50 : * pg_u_isalnum() changes.
51 : */
52 : static size_t
53 824 : initcap_wbnext(void *state)
54 : {
55 824 : struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
56 :
57 1700 : while (wbstate->offset < wbstate->len &&
58 1506 : wbstate->str[wbstate->offset] != '\0')
59 : {
60 1506 : pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
61 1506 : wbstate->offset);
62 1506 : bool curr_alnum = pg_u_isalnum(u, wbstate->posix);
63 :
64 1506 : if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
65 : {
66 630 : size_t prev_offset = wbstate->offset;
67 :
68 630 : wbstate->init = true;
69 630 : wbstate->offset += unicode_utf8len(u);
70 630 : wbstate->prev_alnum = curr_alnum;
71 630 : return prev_offset;
72 : }
73 :
74 876 : wbstate->offset += unicode_utf8len(u);
75 : }
76 :
77 194 : return wbstate->len;
78 : }
79 :
80 : size_t
81 13022 : strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
82 : pg_locale_t locale)
83 : {
84 26044 : return unicode_strlower(dest, destsize, src, srclen,
85 13022 : locale->info.builtin.casemap_full);
86 : }
87 :
88 : size_t
89 194 : strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
90 : pg_locale_t locale)
91 : {
92 194 : struct WordBoundaryState wbstate = {
93 : .str = src,
94 : .len = srclen,
95 : .offset = 0,
96 194 : .posix = !locale->info.builtin.casemap_full,
97 : .init = false,
98 : .prev_alnum = false,
99 : };
100 :
101 388 : return unicode_strtitle(dest, destsize, src, srclen,
102 194 : locale->info.builtin.casemap_full,
103 : initcap_wbnext, &wbstate);
104 : }
105 :
106 : size_t
107 316882 : strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
108 : pg_locale_t locale)
109 : {
110 633764 : return unicode_strupper(dest, destsize, src, srclen,
111 316882 : locale->info.builtin.casemap_full);
112 : }
113 :
114 : size_t
115 12 : strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
116 : pg_locale_t locale)
117 : {
118 24 : return unicode_strfold(dest, destsize, src, srclen,
119 12 : locale->info.builtin.casemap_full);
120 : }
121 :
122 : pg_locale_t
123 1794 : create_pg_locale_builtin(Oid collid, MemoryContext context)
124 : {
125 : const char *locstr;
126 : pg_locale_t result;
127 :
128 1794 : if (collid == DEFAULT_COLLATION_OID)
129 : {
130 : HeapTuple tp;
131 : Datum datum;
132 :
133 1738 : tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
134 1738 : if (!HeapTupleIsValid(tp))
135 0 : elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
136 1738 : datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
137 : Anum_pg_database_datlocale);
138 1738 : locstr = TextDatumGetCString(datum);
139 1738 : ReleaseSysCache(tp);
140 : }
141 : else
142 : {
143 : HeapTuple tp;
144 : Datum datum;
145 :
146 56 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
147 56 : if (!HeapTupleIsValid(tp))
148 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
149 56 : datum = SysCacheGetAttrNotNull(COLLOID, tp,
150 : Anum_pg_collation_colllocale);
151 56 : locstr = TextDatumGetCString(datum);
152 56 : ReleaseSysCache(tp);
153 : }
154 :
155 1794 : builtin_validate_locale(GetDatabaseEncoding(), locstr);
156 :
157 1794 : result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
158 :
159 1794 : result->info.builtin.locale = MemoryContextStrdup(context, locstr);
160 1794 : result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
161 1794 : result->provider = COLLPROVIDER_BUILTIN;
162 1794 : result->deterministic = true;
163 1794 : result->collate_is_c = true;
164 1794 : result->ctype_is_c = (strcmp(locstr, "C") == 0);
165 :
166 1794 : return result;
167 : }
168 :
169 : char *
170 1868 : get_collation_actual_version_builtin(const char *collcollate)
171 : {
172 : /*
173 : * The only two supported locales (C and C.UTF-8) are both based on memcmp
174 : * and are not expected to change, but track the version anyway.
175 : *
176 : * Note that the character semantics may change for some locales, but the
177 : * collation version only tracks changes to sort order.
178 : */
179 1868 : if (strcmp(collcollate, "C") == 0)
180 48 : return "1";
181 1820 : else if (strcmp(collcollate, "C.UTF-8") == 0)
182 1796 : return "1";
183 24 : else if (strcmp(collcollate, "PG_UNICODE_FAST") == 0)
184 24 : return "1";
185 : else
186 0 : ereport(ERROR,
187 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
188 : errmsg("invalid locale name \"%s\" for builtin provider",
189 : collcollate)));
190 :
191 : return NULL; /* keep compiler quiet */
192 : }
|