Line data Source code
1 : /*-----------------------------------------------------------------------
2 : *
3 : * PostgreSQL locale utilities for builtin provider
4 : *
5 : * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
6 : *
7 : * src/backend/utils/adt/pg_locale_builtin.c
8 : *
9 : *-----------------------------------------------------------------------
10 : */
11 :
12 : #include "postgres.h"
13 :
14 : #include "catalog/pg_database.h"
15 : #include "catalog/pg_collation.h"
16 : #include "common/unicode_case.h"
17 : #include "common/unicode_category.h"
18 : #include "mb/pg_wchar.h"
19 : #include "miscadmin.h"
20 : #include "utils/builtins.h"
21 : #include "utils/memutils.h"
22 : #include "utils/pg_locale.h"
23 : #include "utils/syscache.h"
24 :
25 : extern pg_locale_t create_pg_locale_builtin(Oid collid,
26 : MemoryContext context);
27 : extern char *get_collation_actual_version_builtin(const char *collcollate);
28 : extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
29 : ssize_t srclen, pg_locale_t locale);
30 : extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
31 : ssize_t srclen, pg_locale_t locale);
32 : extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
33 : ssize_t srclen, pg_locale_t locale);
34 :
35 :
36 : struct WordBoundaryState
37 : {
38 : const char *str;
39 : size_t len;
40 : size_t offset;
41 : bool init;
42 : bool prev_alnum;
43 : };
44 :
45 : /*
46 : * Simple word boundary iterator that draws boundaries each time the result of
47 : * pg_u_isalnum() changes.
48 : */
49 : static size_t
50 344 : initcap_wbnext(void *state)
51 : {
52 344 : struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
53 :
54 752 : while (wbstate->offset < wbstate->len &&
55 666 : wbstate->str[wbstate->offset] != '\0')
56 : {
57 666 : pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
58 666 : wbstate->offset);
59 666 : bool curr_alnum = pg_u_isalnum(u, true);
60 :
61 666 : if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
62 : {
63 258 : size_t prev_offset = wbstate->offset;
64 :
65 258 : wbstate->init = true;
66 258 : wbstate->offset += unicode_utf8len(u);
67 258 : wbstate->prev_alnum = curr_alnum;
68 258 : return prev_offset;
69 : }
70 :
71 408 : wbstate->offset += unicode_utf8len(u);
72 : }
73 :
74 86 : return wbstate->len;
75 : }
76 :
77 : size_t
78 11772 : strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
79 : pg_locale_t locale)
80 : {
81 11772 : return unicode_strlower(dest, destsize, src, srclen);
82 : }
83 :
84 : size_t
85 86 : strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
86 : pg_locale_t locale)
87 : {
88 86 : struct WordBoundaryState wbstate = {
89 : .str = src,
90 : .len = srclen,
91 : .offset = 0,
92 : .init = false,
93 : .prev_alnum = false,
94 : };
95 :
96 86 : return unicode_strtitle(dest, destsize, src, srclen,
97 : initcap_wbnext, &wbstate);
98 : }
99 :
100 : size_t
101 316786 : strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
102 : pg_locale_t locale)
103 : {
104 316786 : return unicode_strupper(dest, destsize, src, srclen);
105 : }
106 :
107 : pg_locale_t
108 1724 : create_pg_locale_builtin(Oid collid, MemoryContext context)
109 : {
110 : const char *locstr;
111 : pg_locale_t result;
112 :
113 1724 : if (collid == DEFAULT_COLLATION_OID)
114 : {
115 : HeapTuple tp;
116 : Datum datum;
117 :
118 1686 : tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
119 1686 : if (!HeapTupleIsValid(tp))
120 0 : elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
121 1686 : datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
122 : Anum_pg_database_datlocale);
123 1686 : locstr = TextDatumGetCString(datum);
124 1686 : ReleaseSysCache(tp);
125 : }
126 : else
127 : {
128 : HeapTuple tp;
129 : Datum datum;
130 :
131 38 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
132 38 : if (!HeapTupleIsValid(tp))
133 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
134 38 : datum = SysCacheGetAttrNotNull(COLLOID, tp,
135 : Anum_pg_collation_colllocale);
136 38 : locstr = TextDatumGetCString(datum);
137 38 : ReleaseSysCache(tp);
138 : }
139 :
140 1724 : builtin_validate_locale(GetDatabaseEncoding(), locstr);
141 :
142 1724 : result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
143 :
144 1724 : result->info.builtin.locale = MemoryContextStrdup(context, locstr);
145 1724 : result->provider = COLLPROVIDER_BUILTIN;
146 1724 : result->deterministic = true;
147 1724 : result->collate_is_c = true;
148 1724 : result->ctype_is_c = (strcmp(locstr, "C") == 0);
149 :
150 1724 : return result;
151 : }
152 :
153 : char *
154 1784 : get_collation_actual_version_builtin(const char *collcollate)
155 : {
156 : /*
157 : * The only two supported locales (C and C.UTF-8) are both based on memcmp
158 : * and are not expected to change, but track the version anyway.
159 : *
160 : * Note that the character semantics may change for some locales, but the
161 : * collation version only tracks changes to sort order.
162 : */
163 1784 : if (strcmp(collcollate, "C") == 0)
164 48 : return "1";
165 1736 : else if (strcmp(collcollate, "C.UTF-8") == 0)
166 1736 : return "1";
167 : else
168 0 : ereport(ERROR,
169 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
170 : errmsg("invalid locale name \"%s\" for builtin provider",
171 : collcollate)));
172 :
173 : return NULL; /* keep compiler quiet */
174 : }
|