Line data Source code
1 : /*-----------------------------------------------------------------------
2 : *
3 : * PostgreSQL locale utilities for builtin provider
4 : *
5 : * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
6 : *
7 : * src/backend/utils/adt/pg_locale_builtin.c
8 : *
9 : *-----------------------------------------------------------------------
10 : */
11 :
12 : #include "postgres.h"
13 :
14 : #include "catalog/pg_database.h"
15 : #include "catalog/pg_collation.h"
16 : #include "common/unicode_case.h"
17 : #include "common/unicode_category.h"
18 : #include "mb/pg_wchar.h"
19 : #include "miscadmin.h"
20 : #include "utils/builtins.h"
21 : #include "utils/memutils.h"
22 : #include "utils/pg_locale.h"
23 : #include "utils/syscache.h"
24 :
25 : extern pg_locale_t create_pg_locale_builtin(Oid collid,
26 : MemoryContext context);
27 : extern char *get_collation_actual_version_builtin(const char *collcollate);
28 : extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
29 : ssize_t srclen, pg_locale_t locale);
30 : extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
31 : ssize_t srclen, pg_locale_t locale);
32 : extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
33 : ssize_t srclen, pg_locale_t locale);
34 : extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src,
35 : ssize_t srclen, pg_locale_t locale);
36 :
37 :
38 : struct WordBoundaryState
39 : {
40 : const char *str;
41 : size_t len;
42 : size_t offset;
43 : bool init;
44 : bool prev_alnum;
45 : };
46 :
47 : /*
48 : * Simple word boundary iterator that draws boundaries each time the result of
49 : * pg_u_isalnum() changes.
50 : */
51 : static size_t
52 680 : initcap_wbnext(void *state)
53 : {
54 680 : struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
55 :
56 1484 : while (wbstate->offset < wbstate->len &&
57 1314 : wbstate->str[wbstate->offset] != '\0')
58 : {
59 1314 : pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
60 1314 : wbstate->offset);
61 1314 : bool curr_alnum = pg_u_isalnum(u, true);
62 :
63 1314 : if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
64 : {
65 510 : size_t prev_offset = wbstate->offset;
66 :
67 510 : wbstate->init = true;
68 510 : wbstate->offset += unicode_utf8len(u);
69 510 : wbstate->prev_alnum = curr_alnum;
70 510 : return prev_offset;
71 : }
72 :
73 804 : wbstate->offset += unicode_utf8len(u);
74 : }
75 :
76 170 : return wbstate->len;
77 : }
78 :
79 : size_t
80 11922 : strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
81 : pg_locale_t locale)
82 : {
83 23844 : return unicode_strlower(dest, destsize, src, srclen,
84 11922 : locale->info.builtin.casemap_full);
85 : }
86 :
87 : size_t
88 170 : strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
89 : pg_locale_t locale)
90 : {
91 170 : struct WordBoundaryState wbstate = {
92 : .str = src,
93 : .len = srclen,
94 : .offset = 0,
95 : .init = false,
96 : .prev_alnum = false,
97 : };
98 :
99 340 : return unicode_strtitle(dest, destsize, src, srclen,
100 170 : locale->info.builtin.casemap_full,
101 : initcap_wbnext, &wbstate);
102 : }
103 :
104 : size_t
105 316858 : strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
106 : pg_locale_t locale)
107 : {
108 633716 : return unicode_strupper(dest, destsize, src, srclen,
109 316858 : locale->info.builtin.casemap_full);
110 : }
111 :
112 : size_t
113 12 : strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
114 : pg_locale_t locale)
115 : {
116 24 : return unicode_strfold(dest, destsize, src, srclen,
117 12 : locale->info.builtin.casemap_full);
118 : }
119 :
120 : pg_locale_t
121 1742 : create_pg_locale_builtin(Oid collid, MemoryContext context)
122 : {
123 : const char *locstr;
124 : pg_locale_t result;
125 :
126 1742 : if (collid == DEFAULT_COLLATION_OID)
127 : {
128 : HeapTuple tp;
129 : Datum datum;
130 :
131 1690 : tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
132 1690 : if (!HeapTupleIsValid(tp))
133 0 : elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
134 1690 : datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
135 : Anum_pg_database_datlocale);
136 1690 : locstr = TextDatumGetCString(datum);
137 1690 : ReleaseSysCache(tp);
138 : }
139 : else
140 : {
141 : HeapTuple tp;
142 : Datum datum;
143 :
144 52 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
145 52 : if (!HeapTupleIsValid(tp))
146 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
147 52 : datum = SysCacheGetAttrNotNull(COLLOID, tp,
148 : Anum_pg_collation_colllocale);
149 52 : locstr = TextDatumGetCString(datum);
150 52 : ReleaseSysCache(tp);
151 : }
152 :
153 1742 : builtin_validate_locale(GetDatabaseEncoding(), locstr);
154 :
155 1742 : result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
156 :
157 1742 : result->info.builtin.locale = MemoryContextStrdup(context, locstr);
158 1742 : result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
159 1742 : result->provider = COLLPROVIDER_BUILTIN;
160 1742 : result->deterministic = true;
161 1742 : result->collate_is_c = true;
162 1742 : result->ctype_is_c = (strcmp(locstr, "C") == 0);
163 :
164 1742 : return result;
165 : }
166 :
167 : char *
168 1808 : get_collation_actual_version_builtin(const char *collcollate)
169 : {
170 : /*
171 : * The only two supported locales (C and C.UTF-8) are both based on memcmp
172 : * and are not expected to change, but track the version anyway.
173 : *
174 : * Note that the character semantics may change for some locales, but the
175 : * collation version only tracks changes to sort order.
176 : */
177 1808 : if (strcmp(collcollate, "C") == 0)
178 48 : return "1";
179 1760 : else if (strcmp(collcollate, "C.UTF-8") == 0)
180 1740 : return "1";
181 20 : else if (strcmp(collcollate, "PG_UNICODE_FAST") == 0)
182 20 : return "1";
183 : else
184 0 : ereport(ERROR,
185 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
186 : errmsg("invalid locale name \"%s\" for builtin provider",
187 : collcollate)));
188 :
189 : return NULL; /* keep compiler quiet */
190 : }
|