Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * collationcmds.c
4 : * collation-related commands support code
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/commands/collationcmds.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #ifdef USE_ICU
18 : #include <unicode/uloc.h>
19 : #endif
20 :
21 : #include "access/htup_details.h"
22 : #include "access/table.h"
23 : #include "access/xact.h"
24 : #include "catalog/indexing.h"
25 : #include "catalog/namespace.h"
26 : #include "catalog/objectaccess.h"
27 : #include "catalog/pg_collation.h"
28 : #include "catalog/pg_database.h"
29 : #include "catalog/pg_namespace.h"
30 : #include "commands/collationcmds.h"
31 : #include "commands/comment.h"
32 : #include "commands/dbcommands.h"
33 : #include "commands/defrem.h"
34 : #include "common/string.h"
35 : #include "mb/pg_wchar.h"
36 : #include "miscadmin.h"
37 : #include "storage/fd.h"
38 : #include "utils/acl.h"
39 : #include "utils/builtins.h"
40 : #include "utils/lsyscache.h"
41 : #include "utils/pg_locale.h"
42 : #include "utils/rel.h"
43 : #include "utils/syscache.h"
44 :
45 :
46 : typedef struct
47 : {
48 : char *localename; /* name of locale, as per "locale -a" */
49 : char *alias; /* shortened alias for same */
50 : int enc; /* encoding */
51 : } CollAliasData;
52 :
53 :
54 : /*
55 : * CREATE COLLATION
56 : */
57 : ObjectAddress
58 364 : DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists)
59 : {
60 : char *collName;
61 : Oid collNamespace;
62 : AclResult aclresult;
63 : ListCell *pl;
64 364 : DefElem *fromEl = NULL;
65 364 : DefElem *localeEl = NULL;
66 364 : DefElem *lccollateEl = NULL;
67 364 : DefElem *lcctypeEl = NULL;
68 364 : DefElem *providerEl = NULL;
69 364 : DefElem *deterministicEl = NULL;
70 364 : DefElem *rulesEl = NULL;
71 364 : DefElem *versionEl = NULL;
72 : char *collcollate;
73 : char *collctype;
74 : const char *colllocale;
75 : char *collicurules;
76 : bool collisdeterministic;
77 : int collencoding;
78 : char collprovider;
79 364 : char *collversion = NULL;
80 : Oid newoid;
81 : ObjectAddress address;
82 :
83 364 : collNamespace = QualifiedNameGetCreationNamespace(names, &collName);
84 :
85 364 : aclresult = object_aclcheck(NamespaceRelationId, collNamespace, GetUserId(), ACL_CREATE);
86 364 : if (aclresult != ACLCHECK_OK)
87 0 : aclcheck_error(aclresult, OBJECT_SCHEMA,
88 0 : get_namespace_name(collNamespace));
89 :
90 1044 : foreach(pl, parameters)
91 : {
92 722 : DefElem *defel = lfirst_node(DefElem, pl);
93 : DefElem **defelp;
94 :
95 722 : if (strcmp(defel->defname, "from") == 0)
96 60 : defelp = &fromEl;
97 662 : else if (strcmp(defel->defname, "locale") == 0)
98 230 : defelp = &localeEl;
99 432 : else if (strcmp(defel->defname, "lc_collate") == 0)
100 62 : defelp = &lccollateEl;
101 370 : else if (strcmp(defel->defname, "lc_ctype") == 0)
102 56 : defelp = &lcctypeEl;
103 314 : else if (strcmp(defel->defname, "provider") == 0)
104 228 : defelp = &providerEl;
105 86 : else if (strcmp(defel->defname, "deterministic") == 0)
106 44 : defelp = &deterministicEl;
107 42 : else if (strcmp(defel->defname, "rules") == 0)
108 12 : defelp = &rulesEl;
109 30 : else if (strcmp(defel->defname, "version") == 0)
110 24 : defelp = &versionEl;
111 : else
112 : {
113 6 : ereport(ERROR,
114 : (errcode(ERRCODE_SYNTAX_ERROR),
115 : errmsg("collation attribute \"%s\" not recognized",
116 : defel->defname),
117 : parser_errposition(pstate, defel->location)));
118 : break;
119 : }
120 716 : if (*defelp != NULL)
121 36 : errorConflictingDefElem(defel, pstate);
122 680 : *defelp = defel;
123 : }
124 :
125 322 : if (localeEl && (lccollateEl || lcctypeEl))
126 18 : ereport(ERROR,
127 : errcode(ERRCODE_SYNTAX_ERROR),
128 : errmsg("conflicting or redundant options"),
129 : errdetail("LOCALE cannot be specified together with LC_COLLATE or LC_CTYPE."));
130 :
131 304 : if (fromEl && list_length(parameters) != 1)
132 6 : ereport(ERROR,
133 : errcode(ERRCODE_SYNTAX_ERROR),
134 : errmsg("conflicting or redundant options"),
135 : errdetail("FROM cannot be specified together with any other options."));
136 :
137 298 : if (fromEl)
138 : {
139 : Oid collid;
140 : HeapTuple tp;
141 : Datum datum;
142 : bool isnull;
143 :
144 54 : collid = get_collation_oid(defGetQualifiedName(fromEl), false);
145 48 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
146 48 : if (!HeapTupleIsValid(tp))
147 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
148 :
149 48 : collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
150 48 : collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic;
151 48 : collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding;
152 :
153 48 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull);
154 48 : if (!isnull)
155 30 : collcollate = TextDatumGetCString(datum);
156 : else
157 18 : collcollate = NULL;
158 :
159 48 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull);
160 48 : if (!isnull)
161 30 : collctype = TextDatumGetCString(datum);
162 : else
163 18 : collctype = NULL;
164 :
165 48 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colllocale, &isnull);
166 48 : if (!isnull)
167 12 : colllocale = TextDatumGetCString(datum);
168 : else
169 36 : colllocale = NULL;
170 :
171 : /*
172 : * When the ICU locale comes from an existing collation, do not
173 : * canonicalize to a language tag.
174 : */
175 :
176 48 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
177 48 : if (!isnull)
178 0 : collicurules = TextDatumGetCString(datum);
179 : else
180 48 : collicurules = NULL;
181 :
182 48 : ReleaseSysCache(tp);
183 :
184 : /*
185 : * Copying the "default" collation is not allowed because most code
186 : * checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT,
187 : * and so having a second collation with COLLPROVIDER_DEFAULT would
188 : * not work and potentially confuse or crash some code. This could be
189 : * fixed with some legwork.
190 : */
191 48 : if (collprovider == COLLPROVIDER_DEFAULT)
192 6 : ereport(ERROR,
193 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
194 : errmsg("collation \"default\" cannot be copied")));
195 : }
196 : else
197 : {
198 244 : char *collproviderstr = NULL;
199 :
200 244 : collcollate = NULL;
201 244 : collctype = NULL;
202 244 : colllocale = NULL;
203 244 : collicurules = NULL;
204 :
205 244 : if (providerEl)
206 216 : collproviderstr = defGetString(providerEl);
207 :
208 244 : if (deterministicEl)
209 32 : collisdeterministic = defGetBoolean(deterministicEl);
210 : else
211 212 : collisdeterministic = true;
212 :
213 244 : if (rulesEl)
214 12 : collicurules = defGetString(rulesEl);
215 :
216 244 : if (versionEl)
217 6 : collversion = defGetString(versionEl);
218 :
219 244 : if (collproviderstr)
220 : {
221 216 : if (pg_strcasecmp(collproviderstr, "builtin") == 0)
222 64 : collprovider = COLLPROVIDER_BUILTIN;
223 152 : else if (pg_strcasecmp(collproviderstr, "icu") == 0)
224 152 : collprovider = COLLPROVIDER_ICU;
225 0 : else if (pg_strcasecmp(collproviderstr, "libc") == 0)
226 0 : collprovider = COLLPROVIDER_LIBC;
227 : else
228 0 : ereport(ERROR,
229 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
230 : errmsg("unrecognized collation provider: %s",
231 : collproviderstr)));
232 : }
233 : else
234 28 : collprovider = COLLPROVIDER_LIBC;
235 :
236 244 : if (localeEl)
237 : {
238 200 : if (collprovider == COLLPROVIDER_LIBC)
239 : {
240 2 : collcollate = defGetString(localeEl);
241 2 : collctype = defGetString(localeEl);
242 : }
243 : else
244 198 : colllocale = defGetString(localeEl);
245 : }
246 :
247 244 : if (lccollateEl)
248 38 : collcollate = defGetString(lccollateEl);
249 :
250 244 : if (lcctypeEl)
251 32 : collctype = defGetString(lcctypeEl);
252 :
253 244 : if (collprovider == COLLPROVIDER_BUILTIN)
254 : {
255 64 : if (!colllocale)
256 12 : ereport(ERROR,
257 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
258 : errmsg("parameter \"%s\" must be specified",
259 : "locale")));
260 :
261 52 : colllocale = builtin_validate_locale(GetDatabaseEncoding(),
262 : colllocale);
263 : }
264 180 : else if (collprovider == COLLPROVIDER_LIBC)
265 : {
266 28 : if (!collcollate)
267 0 : ereport(ERROR,
268 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
269 : errmsg("parameter \"%s\" must be specified",
270 : "lc_collate")));
271 :
272 28 : if (!collctype)
273 0 : ereport(ERROR,
274 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
275 : errmsg("parameter \"%s\" must be specified",
276 : "lc_ctype")));
277 : }
278 152 : else if (collprovider == COLLPROVIDER_ICU)
279 : {
280 152 : if (!colllocale)
281 6 : ereport(ERROR,
282 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
283 : errmsg("parameter \"%s\" must be specified",
284 : "locale")));
285 :
286 : /*
287 : * During binary upgrade, preserve the locale string. Otherwise,
288 : * canonicalize to a language tag.
289 : */
290 146 : if (!IsBinaryUpgrade)
291 : {
292 144 : char *langtag = icu_language_tag(colllocale,
293 : icu_validation_level);
294 :
295 138 : if (langtag && strcmp(colllocale, langtag) != 0)
296 : {
297 100 : ereport(NOTICE,
298 : (errmsg("using standard form \"%s\" for ICU locale \"%s\"",
299 : langtag, colllocale)));
300 :
301 100 : colllocale = langtag;
302 : }
303 : }
304 :
305 140 : icu_validate_locale(colllocale);
306 : }
307 :
308 : /*
309 : * Nondeterministic collations are currently only supported with ICU
310 : * because that's the only case where it can actually make a
311 : * difference. So we can save writing the code for the other
312 : * providers.
313 : */
314 190 : if (!collisdeterministic && collprovider != COLLPROVIDER_ICU)
315 0 : ereport(ERROR,
316 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
317 : errmsg("nondeterministic collations not supported with this provider")));
318 :
319 190 : if (collicurules && collprovider != COLLPROVIDER_ICU)
320 0 : ereport(ERROR,
321 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
322 : errmsg("ICU rules cannot be specified unless locale provider is ICU")));
323 :
324 190 : if (collprovider == COLLPROVIDER_BUILTIN)
325 : {
326 34 : collencoding = builtin_locale_encoding(colllocale);
327 : }
328 156 : else if (collprovider == COLLPROVIDER_ICU)
329 : {
330 : #ifdef USE_ICU
331 : /*
332 : * We could create ICU collations with collencoding == database
333 : * encoding, but it seems better to use -1 so that it matches the
334 : * way initdb would create ICU collations. However, only allow
335 : * one to be created when the current database's encoding is
336 : * supported. Otherwise the collation is useless, plus we get
337 : * surprising behaviors like not being able to drop the collation.
338 : *
339 : * Skip this test when !USE_ICU, because the error we want to
340 : * throw for that isn't thrown till later.
341 : */
342 128 : if (!is_encoding_supported_by_icu(GetDatabaseEncoding()))
343 0 : ereport(ERROR,
344 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
345 : errmsg("current database's encoding is not supported with this provider")));
346 : #endif
347 128 : collencoding = -1;
348 : }
349 : else
350 : {
351 28 : collencoding = GetDatabaseEncoding();
352 28 : check_encoding_locale_matches(collencoding, collcollate, collctype);
353 : }
354 : }
355 :
356 232 : if (!collversion)
357 : {
358 : const char *locale;
359 :
360 226 : if (collprovider == COLLPROVIDER_LIBC)
361 58 : locale = collcollate;
362 : else
363 168 : locale = colllocale;
364 :
365 226 : collversion = get_collation_actual_version(collprovider, locale);
366 : }
367 :
368 232 : newoid = CollationCreate(collName,
369 : collNamespace,
370 : GetUserId(),
371 : collprovider,
372 : collisdeterministic,
373 : collencoding,
374 : collcollate,
375 : collctype,
376 : colllocale,
377 : collicurules,
378 : collversion,
379 : if_not_exists,
380 : false); /* not quiet */
381 :
382 224 : if (!OidIsValid(newoid))
383 2 : return InvalidObjectAddress;
384 :
385 : /* Check that the locales can be loaded. */
386 222 : CommandCounterIncrement();
387 222 : (void) pg_newlocale_from_collation(newoid);
388 :
389 216 : ObjectAddressSet(address, CollationRelationId, newoid);
390 :
391 216 : return address;
392 : }
393 :
394 : /*
395 : * Subroutine for ALTER COLLATION SET SCHEMA and RENAME
396 : *
397 : * Is there a collation with the same name of the given collation already in
398 : * the given namespace? If so, raise an appropriate error message.
399 : */
400 : void
401 18 : IsThereCollationInNamespace(const char *collname, Oid nspOid)
402 : {
403 : /* make sure the name doesn't already exist in new schema */
404 18 : if (SearchSysCacheExists3(COLLNAMEENCNSP,
405 : CStringGetDatum(collname),
406 : Int32GetDatum(GetDatabaseEncoding()),
407 : ObjectIdGetDatum(nspOid)))
408 0 : ereport(ERROR,
409 : (errcode(ERRCODE_DUPLICATE_OBJECT),
410 : errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"",
411 : collname, GetDatabaseEncodingName(),
412 : get_namespace_name(nspOid))));
413 :
414 : /* mustn't match an any-encoding entry, either */
415 18 : if (SearchSysCacheExists3(COLLNAMEENCNSP,
416 : CStringGetDatum(collname),
417 : Int32GetDatum(-1),
418 : ObjectIdGetDatum(nspOid)))
419 6 : ereport(ERROR,
420 : (errcode(ERRCODE_DUPLICATE_OBJECT),
421 : errmsg("collation \"%s\" already exists in schema \"%s\"",
422 : collname, get_namespace_name(nspOid))));
423 12 : }
424 :
425 : /*
426 : * ALTER COLLATION
427 : */
428 : ObjectAddress
429 6 : AlterCollation(AlterCollationStmt *stmt)
430 : {
431 : Relation rel;
432 : Oid collOid;
433 : HeapTuple tup;
434 : Form_pg_collation collForm;
435 : Datum datum;
436 : bool isnull;
437 : char *oldversion;
438 : char *newversion;
439 : ObjectAddress address;
440 :
441 6 : rel = table_open(CollationRelationId, RowExclusiveLock);
442 6 : collOid = get_collation_oid(stmt->collname, false);
443 :
444 6 : if (collOid == DEFAULT_COLLATION_OID)
445 0 : ereport(ERROR,
446 : (errmsg("cannot refresh version of default collation"),
447 : /* translator: %s is an SQL command */
448 : errhint("Use %s instead.",
449 : "ALTER DATABASE ... REFRESH COLLATION VERSION")));
450 :
451 6 : if (!object_ownercheck(CollationRelationId, collOid, GetUserId()))
452 0 : aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_COLLATION,
453 0 : NameListToString(stmt->collname));
454 :
455 6 : tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid));
456 6 : if (!HeapTupleIsValid(tup))
457 0 : elog(ERROR, "cache lookup failed for collation %u", collOid);
458 :
459 6 : collForm = (Form_pg_collation) GETSTRUCT(tup);
460 6 : datum = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, &isnull);
461 6 : oldversion = isnull ? NULL : TextDatumGetCString(datum);
462 :
463 6 : if (collForm->collprovider == COLLPROVIDER_LIBC)
464 0 : datum = SysCacheGetAttrNotNull(COLLOID, tup, Anum_pg_collation_collcollate);
465 : else
466 6 : datum = SysCacheGetAttrNotNull(COLLOID, tup, Anum_pg_collation_colllocale);
467 :
468 6 : newversion = get_collation_actual_version(collForm->collprovider,
469 6 : TextDatumGetCString(datum));
470 :
471 : /* cannot change from NULL to non-NULL or vice versa */
472 6 : if ((!oldversion && newversion) || (oldversion && !newversion))
473 0 : elog(ERROR, "invalid collation version change");
474 6 : else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
475 0 : {
476 : bool nulls[Natts_pg_collation];
477 : bool replaces[Natts_pg_collation];
478 : Datum values[Natts_pg_collation];
479 :
480 0 : ereport(NOTICE,
481 : (errmsg("changing version from %s to %s",
482 : oldversion, newversion)));
483 :
484 0 : memset(values, 0, sizeof(values));
485 0 : memset(nulls, false, sizeof(nulls));
486 0 : memset(replaces, false, sizeof(replaces));
487 :
488 0 : values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion);
489 0 : replaces[Anum_pg_collation_collversion - 1] = true;
490 :
491 0 : tup = heap_modify_tuple(tup, RelationGetDescr(rel),
492 : values, nulls, replaces);
493 : }
494 : else
495 6 : ereport(NOTICE,
496 : (errmsg("version has not changed")));
497 :
498 6 : CatalogTupleUpdate(rel, &tup->t_self, tup);
499 :
500 6 : InvokeObjectPostAlterHook(CollationRelationId, collOid, 0);
501 :
502 6 : ObjectAddressSet(address, CollationRelationId, collOid);
503 :
504 6 : heap_freetuple(tup);
505 6 : table_close(rel, NoLock);
506 :
507 6 : return address;
508 : }
509 :
510 :
511 : Datum
512 98 : pg_collation_actual_version(PG_FUNCTION_ARGS)
513 : {
514 98 : Oid collid = PG_GETARG_OID(0);
515 : char provider;
516 : char *locale;
517 : char *version;
518 : Datum datum;
519 :
520 98 : if (collid == DEFAULT_COLLATION_OID)
521 : {
522 : /* retrieve from pg_database */
523 :
524 0 : HeapTuple dbtup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
525 :
526 0 : if (!HeapTupleIsValid(dbtup))
527 0 : ereport(ERROR,
528 : (errcode(ERRCODE_UNDEFINED_OBJECT),
529 : errmsg("database with OID %u does not exist", MyDatabaseId)));
530 :
531 0 : provider = ((Form_pg_database) GETSTRUCT(dbtup))->datlocprovider;
532 :
533 0 : if (provider == COLLPROVIDER_LIBC)
534 : {
535 0 : datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, Anum_pg_database_datcollate);
536 0 : locale = TextDatumGetCString(datum);
537 : }
538 : else
539 : {
540 0 : datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, Anum_pg_database_datlocale);
541 0 : locale = TextDatumGetCString(datum);
542 : }
543 :
544 0 : ReleaseSysCache(dbtup);
545 : }
546 : else
547 : {
548 : /* retrieve from pg_collation */
549 :
550 98 : HeapTuple colltp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
551 :
552 98 : if (!HeapTupleIsValid(colltp))
553 0 : ereport(ERROR,
554 : (errcode(ERRCODE_UNDEFINED_OBJECT),
555 : errmsg("collation with OID %u does not exist", collid)));
556 :
557 98 : provider = ((Form_pg_collation) GETSTRUCT(colltp))->collprovider;
558 : Assert(provider != COLLPROVIDER_DEFAULT);
559 :
560 98 : if (provider == COLLPROVIDER_LIBC)
561 : {
562 0 : datum = SysCacheGetAttrNotNull(COLLOID, colltp, Anum_pg_collation_collcollate);
563 0 : locale = TextDatumGetCString(datum);
564 : }
565 : else
566 : {
567 98 : datum = SysCacheGetAttrNotNull(COLLOID, colltp, Anum_pg_collation_colllocale);
568 98 : locale = TextDatumGetCString(datum);
569 : }
570 :
571 98 : ReleaseSysCache(colltp);
572 : }
573 :
574 98 : version = get_collation_actual_version(provider, locale);
575 98 : if (version)
576 98 : PG_RETURN_TEXT_P(cstring_to_text(version));
577 : else
578 0 : PG_RETURN_NULL();
579 : }
580 :
581 :
582 : /* will we use "locale -a" in pg_import_system_collations? */
583 : #if !defined(WIN32)
584 : #define READ_LOCALE_A_OUTPUT
585 : #endif
586 :
587 : /* will we use EnumSystemLocalesEx in pg_import_system_collations? */
588 : #ifdef WIN32
589 : #define ENUM_SYSTEM_LOCALE
590 : #endif
591 :
592 :
593 : #ifdef READ_LOCALE_A_OUTPUT
594 : /*
595 : * "Normalize" a libc locale name, stripping off encoding tags such as
596 : * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
597 : * -> "br_FR@euro"). Return true if a new, different name was
598 : * generated.
599 : */
600 : static bool
601 196 : normalize_libc_locale_name(char *new, const char *old)
602 : {
603 196 : char *n = new;
604 196 : const char *o = old;
605 196 : bool changed = false;
606 :
607 980 : while (*o)
608 : {
609 784 : if (*o == '.')
610 : {
611 : /* skip over encoding tag such as ".utf8" or ".UTF-8" */
612 196 : o++;
613 784 : while ((*o >= 'A' && *o <= 'Z')
614 980 : || (*o >= 'a' && *o <= 'z')
615 392 : || (*o >= '0' && *o <= '9')
616 1176 : || (*o == '-'))
617 784 : o++;
618 196 : changed = true;
619 : }
620 : else
621 588 : *n++ = *o++;
622 : }
623 196 : *n = '\0';
624 :
625 196 : return changed;
626 : }
627 :
628 : /*
629 : * qsort comparator for CollAliasData items
630 : */
631 : static int
632 98 : cmpaliases(const void *a, const void *b)
633 : {
634 98 : const CollAliasData *ca = (const CollAliasData *) a;
635 98 : const CollAliasData *cb = (const CollAliasData *) b;
636 :
637 : /* comparing localename is enough because other fields are derived */
638 98 : return strcmp(ca->localename, cb->localename);
639 : }
640 : #endif /* READ_LOCALE_A_OUTPUT */
641 :
642 :
643 : #ifdef USE_ICU
644 : /*
645 : * Get a comment (specifically, the display name) for an ICU locale.
646 : * The result is a palloc'd string, or NULL if we can't get a comment
647 : * or find that it's not all ASCII. (We can *not* accept non-ASCII
648 : * comments, because the contents of template0 must be encoding-agnostic.)
649 : */
650 : static char *
651 78988 : get_icu_locale_comment(const char *localename)
652 : {
653 : UErrorCode status;
654 : UChar displayname[128];
655 : int32 len_uchar;
656 : int32 i;
657 : char *result;
658 :
659 78988 : status = U_ZERO_ERROR;
660 78988 : len_uchar = uloc_getDisplayName(localename, "en",
661 : displayname, lengthof(displayname),
662 : &status);
663 78988 : if (U_FAILURE(status))
664 0 : return NULL; /* no good reason to raise an error */
665 :
666 : /* Check for non-ASCII comment (can't use pg_is_ascii for this) */
667 1342992 : for (i = 0; i < len_uchar; i++)
668 : {
669 1265278 : if (displayname[i] > 127)
670 1274 : return NULL;
671 : }
672 :
673 : /* OK, transcribe */
674 77714 : result = palloc(len_uchar + 1);
675 1329958 : for (i = 0; i < len_uchar; i++)
676 1252244 : result[i] = displayname[i];
677 77714 : result[len_uchar] = '\0';
678 :
679 77714 : return result;
680 : }
681 : #endif /* USE_ICU */
682 :
683 :
684 : /*
685 : * Create a new collation using the input locale 'locale'. (subroutine for
686 : * pg_import_system_collations())
687 : *
688 : * 'nspid' is the namespace id where the collation will be created.
689 : *
690 : * 'nvalidp' is incremented if the locale has a valid encoding.
691 : *
692 : * 'ncreatedp' is incremented if the collation is actually created. If the
693 : * collation already exists it will quietly do nothing.
694 : *
695 : * The returned value is the encoding of the locale, -1 if the locale is not
696 : * valid for creating a collation.
697 : *
698 : */
699 : pg_attribute_unused()
700 : static int
701 392 : create_collation_from_locale(const char *locale, int nspid,
702 : int *nvalidp, int *ncreatedp)
703 : {
704 : int enc;
705 : Oid collid;
706 :
707 : /*
708 : * Some systems have locale names that don't consist entirely of ASCII
709 : * letters (such as "bokmål" or "français"). This is pretty
710 : * silly, since we need the locale itself to interpret the non-ASCII
711 : * characters. We can't do much with those, so we filter them out.
712 : */
713 392 : if (!pg_is_ascii(locale))
714 : {
715 0 : elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", locale);
716 0 : return -1;
717 : }
718 :
719 392 : enc = pg_get_encoding_from_locale(locale, false);
720 392 : if (enc < 0)
721 : {
722 0 : elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", locale);
723 0 : return -1;
724 : }
725 392 : if (!PG_VALID_BE_ENCODING(enc))
726 : {
727 0 : elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", locale);
728 0 : return -1;
729 : }
730 392 : if (enc == PG_SQL_ASCII)
731 196 : return -1; /* C/POSIX are already in the catalog */
732 :
733 : /* count valid locales found in operating system */
734 196 : (*nvalidp)++;
735 :
736 : /*
737 : * Create a collation named the same as the locale, but quietly doing
738 : * nothing if it already exists. This is the behavior we need even at
739 : * initdb time, because some versions of "locale -a" can report the same
740 : * locale name more than once. And it's convenient for later import runs,
741 : * too, since you just about always want to add on new locales without a
742 : * lot of chatter about existing ones.
743 : */
744 196 : collid = CollationCreate(locale, nspid, GetUserId(),
745 : COLLPROVIDER_LIBC, true, enc,
746 : locale, locale, NULL, NULL,
747 196 : get_collation_actual_version(COLLPROVIDER_LIBC, locale),
748 : true, true);
749 196 : if (OidIsValid(collid))
750 : {
751 196 : (*ncreatedp)++;
752 :
753 : /* Must do CCI between inserts to handle duplicates correctly */
754 196 : CommandCounterIncrement();
755 : }
756 :
757 196 : return enc;
758 : }
759 :
760 :
761 : #ifdef ENUM_SYSTEM_LOCALE
762 : /* parameter to be passed to the callback function win32_read_locale() */
763 : typedef struct
764 : {
765 : Oid nspid;
766 : int *ncreatedp;
767 : int *nvalidp;
768 : } CollParam;
769 :
770 : /*
771 : * Callback function for EnumSystemLocalesEx() in
772 : * pg_import_system_collations(). Creates a collation for every valid locale
773 : * and a POSIX alias collation.
774 : *
775 : * The callback contract is to return TRUE to continue enumerating and FALSE
776 : * to stop enumerating. We always want to continue.
777 : */
778 : static BOOL CALLBACK
779 : win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
780 : {
781 : CollParam *param = (CollParam *) lparam;
782 : char localebuf[NAMEDATALEN];
783 : int result;
784 : int enc;
785 :
786 : (void) dwFlags;
787 :
788 : result = WideCharToMultiByte(CP_ACP, 0, pStr, -1, localebuf, NAMEDATALEN,
789 : NULL, NULL);
790 :
791 : if (result == 0)
792 : {
793 : if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
794 : elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
795 : return TRUE;
796 : }
797 : if (localebuf[0] == '\0')
798 : return TRUE;
799 :
800 : enc = create_collation_from_locale(localebuf, param->nspid,
801 : param->nvalidp, param->ncreatedp);
802 : if (enc < 0)
803 : return TRUE;
804 :
805 : /*
806 : * Windows will use hyphens between language and territory, where POSIX
807 : * uses an underscore. Simply create a POSIX alias.
808 : */
809 : if (strchr(localebuf, '-'))
810 : {
811 : char alias[NAMEDATALEN];
812 : Oid collid;
813 :
814 : strcpy(alias, localebuf);
815 : for (char *p = alias; *p; p++)
816 : if (*p == '-')
817 : *p = '_';
818 :
819 : collid = CollationCreate(alias, param->nspid, GetUserId(),
820 : COLLPROVIDER_LIBC, true, enc,
821 : localebuf, localebuf, NULL, NULL,
822 : get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
823 : true, true);
824 : if (OidIsValid(collid))
825 : {
826 : (*param->ncreatedp)++;
827 :
828 : CommandCounterIncrement();
829 : }
830 : }
831 :
832 : return TRUE;
833 : }
834 : #endif /* ENUM_SYSTEM_LOCALE */
835 :
836 :
837 : /*
838 : * pg_import_system_collations: add known system collations to pg_collation
839 : */
840 : Datum
841 98 : pg_import_system_collations(PG_FUNCTION_ARGS)
842 : {
843 98 : Oid nspid = PG_GETARG_OID(0);
844 98 : int ncreated = 0;
845 :
846 98 : if (!superuser())
847 0 : ereport(ERROR,
848 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
849 : errmsg("must be superuser to import system collations")));
850 :
851 98 : if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(nspid)))
852 0 : ereport(ERROR,
853 : (errcode(ERRCODE_UNDEFINED_SCHEMA),
854 : errmsg("schema with OID %u does not exist", nspid)));
855 :
856 : /* Load collations known to libc, using "locale -a" to enumerate them */
857 : #ifdef READ_LOCALE_A_OUTPUT
858 : {
859 : FILE *locale_a_handle;
860 : char localebuf[LOCALE_NAME_BUFLEN];
861 98 : int nvalid = 0;
862 : Oid collid;
863 : CollAliasData *aliases;
864 : int naliases,
865 : maxaliases,
866 : i;
867 :
868 : /* expansible array of aliases */
869 98 : maxaliases = 100;
870 98 : aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData));
871 98 : naliases = 0;
872 :
873 98 : locale_a_handle = OpenPipeStream("locale -a", "r");
874 98 : if (locale_a_handle == NULL)
875 0 : ereport(ERROR,
876 : (errcode_for_file_access(),
877 : errmsg("could not execute command \"%s\": %m",
878 : "locale -a")));
879 :
880 490 : while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
881 : {
882 : size_t len;
883 : int enc;
884 : char alias[LOCALE_NAME_BUFLEN];
885 :
886 392 : len = strlen(localebuf);
887 :
888 392 : if (len == 0 || localebuf[len - 1] != '\n')
889 : {
890 0 : elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
891 196 : continue;
892 : }
893 392 : localebuf[len - 1] = '\0';
894 :
895 392 : enc = create_collation_from_locale(localebuf, nspid, &nvalid, &ncreated);
896 392 : if (enc < 0)
897 196 : continue;
898 :
899 : /*
900 : * Generate aliases such as "en_US" in addition to "en_US.utf8"
901 : * for ease of use. Note that collation names are unique per
902 : * encoding only, so this doesn't clash with "en_US" for LATIN1,
903 : * say.
904 : *
905 : * However, it might conflict with a name we'll see later in the
906 : * "locale -a" output. So save up the aliases and try to add them
907 : * after we've read all the output.
908 : */
909 196 : if (normalize_libc_locale_name(alias, localebuf))
910 : {
911 196 : if (naliases >= maxaliases)
912 : {
913 0 : maxaliases *= 2;
914 : aliases = (CollAliasData *)
915 0 : repalloc(aliases, maxaliases * sizeof(CollAliasData));
916 : }
917 196 : aliases[naliases].localename = pstrdup(localebuf);
918 196 : aliases[naliases].alias = pstrdup(alias);
919 196 : aliases[naliases].enc = enc;
920 196 : naliases++;
921 : }
922 : }
923 :
924 : /*
925 : * We don't check the return value of this, because we want to support
926 : * the case where there "locale" command does not exist. (This is
927 : * unusual but can happen on minimalized Linux distributions, for
928 : * example.) We will warn below if no locales could be found.
929 : */
930 98 : ClosePipeStream(locale_a_handle);
931 :
932 : /*
933 : * Before processing the aliases, sort them by locale name. The point
934 : * here is that if "locale -a" gives us multiple locale names with the
935 : * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we
936 : * want to pick a deterministic one of them. First in ASCII sort
937 : * order is a good enough rule. (Before PG 10, the code corresponding
938 : * to this logic in initdb.c had an additional ordering rule, to
939 : * prefer the locale name exactly matching the alias, if any. We
940 : * don't need to consider that here, because we would have already
941 : * created such a pg_collation entry above, and that one will win.)
942 : */
943 98 : if (naliases > 1)
944 98 : qsort(aliases, naliases, sizeof(CollAliasData), cmpaliases);
945 :
946 : /* Now add aliases, ignoring any that match pre-existing entries */
947 294 : for (i = 0; i < naliases; i++)
948 : {
949 196 : char *locale = aliases[i].localename;
950 196 : char *alias = aliases[i].alias;
951 196 : int enc = aliases[i].enc;
952 :
953 196 : collid = CollationCreate(alias, nspid, GetUserId(),
954 : COLLPROVIDER_LIBC, true, enc,
955 : locale, locale, NULL, NULL,
956 196 : get_collation_actual_version(COLLPROVIDER_LIBC, locale),
957 : true, true);
958 196 : if (OidIsValid(collid))
959 : {
960 98 : ncreated++;
961 :
962 98 : CommandCounterIncrement();
963 : }
964 : }
965 :
966 : /* Give a warning if "locale -a" seems to be malfunctioning */
967 98 : if (nvalid == 0)
968 0 : ereport(WARNING,
969 : (errmsg("no usable system locales were found")));
970 : }
971 : #endif /* READ_LOCALE_A_OUTPUT */
972 :
973 : /*
974 : * Load collations known to ICU
975 : *
976 : * We use uloc_countAvailable()/uloc_getAvailable() rather than
977 : * ucol_countAvailable()/ucol_getAvailable(). The former returns a full
978 : * set of language+region combinations, whereas the latter only returns
979 : * language+region combinations if they are distinct from the language's
980 : * base collation. So there might not be a de-DE or en-GB, which would be
981 : * confusing.
982 : */
983 : #ifdef USE_ICU
984 : {
985 : int i;
986 :
987 : /*
988 : * Start the loop at -1 to sneak in the root locale without too much
989 : * code duplication.
990 : */
991 79086 : for (i = -1; i < uloc_countAvailable(); i++)
992 : {
993 : const char *name;
994 : char *langtag;
995 : char *icucomment;
996 : Oid collid;
997 :
998 78988 : if (i == -1)
999 98 : name = ""; /* ICU root locale */
1000 : else
1001 78890 : name = uloc_getAvailable(i);
1002 :
1003 78988 : langtag = icu_language_tag(name, ERROR);
1004 :
1005 : /*
1006 : * Be paranoid about not allowing any non-ASCII strings into
1007 : * pg_collation
1008 : */
1009 78988 : if (!pg_is_ascii(langtag))
1010 0 : continue;
1011 :
1012 78988 : collid = CollationCreate(psprintf("%s-x-icu", langtag),
1013 : nspid, GetUserId(),
1014 : COLLPROVIDER_ICU, true, -1,
1015 : NULL, NULL, langtag, NULL,
1016 78988 : get_collation_actual_version(COLLPROVIDER_ICU, langtag),
1017 : true, true);
1018 78988 : if (OidIsValid(collid))
1019 : {
1020 78988 : ncreated++;
1021 :
1022 78988 : CommandCounterIncrement();
1023 :
1024 78988 : icucomment = get_icu_locale_comment(name);
1025 78988 : if (icucomment)
1026 77714 : CreateComments(collid, CollationRelationId, 0,
1027 : icucomment);
1028 : }
1029 : }
1030 : }
1031 : #endif /* USE_ICU */
1032 :
1033 : /* Load collations known to WIN32 */
1034 : #ifdef ENUM_SYSTEM_LOCALE
1035 : {
1036 : int nvalid = 0;
1037 : CollParam param;
1038 :
1039 : param.nspid = nspid;
1040 : param.ncreatedp = &ncreated;
1041 : param.nvalidp = &nvalid;
1042 :
1043 : /*
1044 : * Enumerate the locales that are either installed on or supported by
1045 : * the OS.
1046 : */
1047 : if (!EnumSystemLocalesEx(win32_read_locale, LOCALE_ALL,
1048 : (LPARAM) ¶m, NULL))
1049 : _dosmaperr(GetLastError());
1050 :
1051 : /* Give a warning if EnumSystemLocalesEx seems to be malfunctioning */
1052 : if (nvalid == 0)
1053 : ereport(WARNING,
1054 : (errmsg("no usable system locales were found")));
1055 : }
1056 : #endif /* ENUM_SYSTEM_LOCALE */
1057 :
1058 98 : PG_RETURN_INT32(ncreated);
1059 : }
|