Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * collationcmds.c
4 : * collation-related commands support code
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/commands/collationcmds.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/htup_details.h"
18 : #include "access/table.h"
19 : #include "access/xact.h"
20 : #include "catalog/indexing.h"
21 : #include "catalog/namespace.h"
22 : #include "catalog/objectaccess.h"
23 : #include "catalog/pg_collation.h"
24 : #include "catalog/pg_database.h"
25 : #include "catalog/pg_namespace.h"
26 : #include "commands/collationcmds.h"
27 : #include "commands/comment.h"
28 : #include "commands/dbcommands.h"
29 : #include "commands/defrem.h"
30 : #include "common/string.h"
31 : #include "mb/pg_wchar.h"
32 : #include "miscadmin.h"
33 : #include "utils/acl.h"
34 : #include "utils/builtins.h"
35 : #include "utils/lsyscache.h"
36 : #include "utils/pg_locale.h"
37 : #include "utils/rel.h"
38 : #include "utils/syscache.h"
39 :
40 :
41 : typedef struct
42 : {
43 : char *localename; /* name of locale, as per "locale -a" */
44 : char *alias; /* shortened alias for same */
45 : int enc; /* encoding */
46 : } CollAliasData;
47 :
48 :
49 : /*
50 : * CREATE COLLATION
51 : */
52 : ObjectAddress
53 364 : DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists)
54 : {
55 : char *collName;
56 : Oid collNamespace;
57 : AclResult aclresult;
58 : ListCell *pl;
59 364 : DefElem *fromEl = NULL;
60 364 : DefElem *localeEl = NULL;
61 364 : DefElem *lccollateEl = NULL;
62 364 : DefElem *lcctypeEl = NULL;
63 364 : DefElem *providerEl = NULL;
64 364 : DefElem *deterministicEl = NULL;
65 364 : DefElem *rulesEl = NULL;
66 364 : DefElem *versionEl = NULL;
67 : char *collcollate;
68 : char *collctype;
69 : const char *colllocale;
70 : char *collicurules;
71 : bool collisdeterministic;
72 : int collencoding;
73 : char collprovider;
74 364 : char *collversion = NULL;
75 : Oid newoid;
76 : ObjectAddress address;
77 :
78 364 : collNamespace = QualifiedNameGetCreationNamespace(names, &collName);
79 :
80 364 : aclresult = object_aclcheck(NamespaceRelationId, collNamespace, GetUserId(), ACL_CREATE);
81 364 : if (aclresult != ACLCHECK_OK)
82 0 : aclcheck_error(aclresult, OBJECT_SCHEMA,
83 0 : get_namespace_name(collNamespace));
84 :
85 1044 : foreach(pl, parameters)
86 : {
87 722 : DefElem *defel = lfirst_node(DefElem, pl);
88 : DefElem **defelp;
89 :
90 722 : if (strcmp(defel->defname, "from") == 0)
91 60 : defelp = &fromEl;
92 662 : else if (strcmp(defel->defname, "locale") == 0)
93 230 : defelp = &localeEl;
94 432 : else if (strcmp(defel->defname, "lc_collate") == 0)
95 62 : defelp = &lccollateEl;
96 370 : else if (strcmp(defel->defname, "lc_ctype") == 0)
97 56 : defelp = &lcctypeEl;
98 314 : else if (strcmp(defel->defname, "provider") == 0)
99 228 : defelp = &providerEl;
100 86 : else if (strcmp(defel->defname, "deterministic") == 0)
101 44 : defelp = &deterministicEl;
102 42 : else if (strcmp(defel->defname, "rules") == 0)
103 12 : defelp = &rulesEl;
104 30 : else if (strcmp(defel->defname, "version") == 0)
105 24 : defelp = &versionEl;
106 : else
107 : {
108 6 : ereport(ERROR,
109 : (errcode(ERRCODE_SYNTAX_ERROR),
110 : errmsg("collation attribute \"%s\" not recognized",
111 : defel->defname),
112 : parser_errposition(pstate, defel->location)));
113 : break;
114 : }
115 716 : if (*defelp != NULL)
116 36 : errorConflictingDefElem(defel, pstate);
117 680 : *defelp = defel;
118 : }
119 :
120 322 : if (localeEl && (lccollateEl || lcctypeEl))
121 18 : ereport(ERROR,
122 : errcode(ERRCODE_SYNTAX_ERROR),
123 : errmsg("conflicting or redundant options"),
124 : errdetail("LOCALE cannot be specified together with LC_COLLATE or LC_CTYPE."));
125 :
126 304 : if (fromEl && list_length(parameters) != 1)
127 6 : ereport(ERROR,
128 : errcode(ERRCODE_SYNTAX_ERROR),
129 : errmsg("conflicting or redundant options"),
130 : errdetail("FROM cannot be specified together with any other options."));
131 :
132 298 : if (fromEl)
133 : {
134 : Oid collid;
135 : HeapTuple tp;
136 : Datum datum;
137 : bool isnull;
138 :
139 54 : collid = get_collation_oid(defGetQualifiedName(fromEl), false);
140 48 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
141 48 : if (!HeapTupleIsValid(tp))
142 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
143 :
144 48 : collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
145 48 : collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic;
146 48 : collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding;
147 :
148 48 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull);
149 48 : if (!isnull)
150 30 : collcollate = TextDatumGetCString(datum);
151 : else
152 18 : collcollate = NULL;
153 :
154 48 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull);
155 48 : if (!isnull)
156 30 : collctype = TextDatumGetCString(datum);
157 : else
158 18 : collctype = NULL;
159 :
160 48 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colllocale, &isnull);
161 48 : if (!isnull)
162 12 : colllocale = TextDatumGetCString(datum);
163 : else
164 36 : colllocale = NULL;
165 :
166 : /*
167 : * When the ICU locale comes from an existing collation, do not
168 : * canonicalize to a language tag.
169 : */
170 :
171 48 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
172 48 : if (!isnull)
173 0 : collicurules = TextDatumGetCString(datum);
174 : else
175 48 : collicurules = NULL;
176 :
177 48 : ReleaseSysCache(tp);
178 :
179 : /*
180 : * Copying the "default" collation is not allowed because most code
181 : * checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT,
182 : * and so having a second collation with COLLPROVIDER_DEFAULT would
183 : * not work and potentially confuse or crash some code. This could be
184 : * fixed with some legwork.
185 : */
186 48 : if (collprovider == COLLPROVIDER_DEFAULT)
187 6 : ereport(ERROR,
188 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
189 : errmsg("collation \"default\" cannot be copied")));
190 : }
191 : else
192 : {
193 244 : char *collproviderstr = NULL;
194 :
195 244 : collcollate = NULL;
196 244 : collctype = NULL;
197 244 : colllocale = NULL;
198 244 : collicurules = NULL;
199 :
200 244 : if (providerEl)
201 216 : collproviderstr = defGetString(providerEl);
202 :
203 244 : if (deterministicEl)
204 32 : collisdeterministic = defGetBoolean(deterministicEl);
205 : else
206 212 : collisdeterministic = true;
207 :
208 244 : if (rulesEl)
209 12 : collicurules = defGetString(rulesEl);
210 :
211 244 : if (versionEl)
212 6 : collversion = defGetString(versionEl);
213 :
214 244 : if (collproviderstr)
215 : {
216 216 : if (pg_strcasecmp(collproviderstr, "builtin") == 0)
217 64 : collprovider = COLLPROVIDER_BUILTIN;
218 152 : else if (pg_strcasecmp(collproviderstr, "icu") == 0)
219 152 : collprovider = COLLPROVIDER_ICU;
220 0 : else if (pg_strcasecmp(collproviderstr, "libc") == 0)
221 0 : collprovider = COLLPROVIDER_LIBC;
222 : else
223 0 : ereport(ERROR,
224 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
225 : errmsg("unrecognized collation provider: %s",
226 : collproviderstr)));
227 : }
228 : else
229 28 : collprovider = COLLPROVIDER_LIBC;
230 :
231 244 : if (localeEl)
232 : {
233 200 : if (collprovider == COLLPROVIDER_LIBC)
234 : {
235 2 : collcollate = defGetString(localeEl);
236 2 : collctype = defGetString(localeEl);
237 : }
238 : else
239 198 : colllocale = defGetString(localeEl);
240 : }
241 :
242 244 : if (lccollateEl)
243 38 : collcollate = defGetString(lccollateEl);
244 :
245 244 : if (lcctypeEl)
246 32 : collctype = defGetString(lcctypeEl);
247 :
248 244 : if (collprovider == COLLPROVIDER_BUILTIN)
249 : {
250 64 : if (!colllocale)
251 12 : ereport(ERROR,
252 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
253 : errmsg("parameter \"%s\" must be specified",
254 : "locale")));
255 :
256 52 : colllocale = builtin_validate_locale(GetDatabaseEncoding(),
257 : colllocale);
258 : }
259 180 : else if (collprovider == COLLPROVIDER_LIBC)
260 : {
261 28 : if (!collcollate)
262 0 : ereport(ERROR,
263 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
264 : errmsg("parameter \"%s\" must be specified",
265 : "lc_collate")));
266 :
267 28 : if (!collctype)
268 0 : ereport(ERROR,
269 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
270 : errmsg("parameter \"%s\" must be specified",
271 : "lc_ctype")));
272 : }
273 152 : else if (collprovider == COLLPROVIDER_ICU)
274 : {
275 152 : if (!colllocale)
276 6 : ereport(ERROR,
277 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
278 : errmsg("parameter \"%s\" must be specified",
279 : "locale")));
280 :
281 : /*
282 : * During binary upgrade, preserve the locale string. Otherwise,
283 : * canonicalize to a language tag.
284 : */
285 146 : if (!IsBinaryUpgrade)
286 : {
287 144 : char *langtag = icu_language_tag(colllocale,
288 : icu_validation_level);
289 :
290 138 : if (langtag && strcmp(colllocale, langtag) != 0)
291 : {
292 100 : ereport(NOTICE,
293 : (errmsg("using standard form \"%s\" for ICU locale \"%s\"",
294 : langtag, colllocale)));
295 :
296 100 : colllocale = langtag;
297 : }
298 : }
299 :
300 140 : icu_validate_locale(colllocale);
301 : }
302 :
303 : /*
304 : * Nondeterministic collations are currently only supported with ICU
305 : * because that's the only case where it can actually make a
306 : * difference. So we can save writing the code for the other
307 : * providers.
308 : */
309 190 : if (!collisdeterministic && collprovider != COLLPROVIDER_ICU)
310 0 : ereport(ERROR,
311 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
312 : errmsg("nondeterministic collations not supported with this provider")));
313 :
314 190 : if (collicurules && collprovider != COLLPROVIDER_ICU)
315 0 : ereport(ERROR,
316 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
317 : errmsg("ICU rules cannot be specified unless locale provider is ICU")));
318 :
319 190 : if (collprovider == COLLPROVIDER_BUILTIN)
320 : {
321 34 : collencoding = builtin_locale_encoding(colllocale);
322 : }
323 156 : else if (collprovider == COLLPROVIDER_ICU)
324 : {
325 : #ifdef USE_ICU
326 : /*
327 : * We could create ICU collations with collencoding == database
328 : * encoding, but it seems better to use -1 so that it matches the
329 : * way initdb would create ICU collations. However, only allow
330 : * one to be created when the current database's encoding is
331 : * supported. Otherwise the collation is useless, plus we get
332 : * surprising behaviors like not being able to drop the collation.
333 : *
334 : * Skip this test when !USE_ICU, because the error we want to
335 : * throw for that isn't thrown till later.
336 : */
337 128 : if (!is_encoding_supported_by_icu(GetDatabaseEncoding()))
338 0 : ereport(ERROR,
339 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
340 : errmsg("current database's encoding is not supported with this provider")));
341 : #endif
342 128 : collencoding = -1;
343 : }
344 : else
345 : {
346 28 : collencoding = GetDatabaseEncoding();
347 28 : check_encoding_locale_matches(collencoding, collcollate, collctype);
348 : }
349 : }
350 :
351 232 : if (!collversion)
352 : {
353 : const char *locale;
354 :
355 226 : if (collprovider == COLLPROVIDER_LIBC)
356 58 : locale = collcollate;
357 : else
358 168 : locale = colllocale;
359 :
360 226 : collversion = get_collation_actual_version(collprovider, locale);
361 : }
362 :
363 232 : newoid = CollationCreate(collName,
364 : collNamespace,
365 : GetUserId(),
366 : collprovider,
367 : collisdeterministic,
368 : collencoding,
369 : collcollate,
370 : collctype,
371 : colllocale,
372 : collicurules,
373 : collversion,
374 : if_not_exists,
375 : false); /* not quiet */
376 :
377 224 : if (!OidIsValid(newoid))
378 2 : return InvalidObjectAddress;
379 :
380 : /* Check that the locales can be loaded. */
381 222 : CommandCounterIncrement();
382 222 : (void) pg_newlocale_from_collation(newoid);
383 :
384 216 : ObjectAddressSet(address, CollationRelationId, newoid);
385 :
386 216 : return address;
387 : }
388 :
389 : /*
390 : * Subroutine for ALTER COLLATION SET SCHEMA and RENAME
391 : *
392 : * Is there a collation with the same name of the given collation already in
393 : * the given namespace? If so, raise an appropriate error message.
394 : */
395 : void
396 18 : IsThereCollationInNamespace(const char *collname, Oid nspOid)
397 : {
398 : /* make sure the name doesn't already exist in new schema */
399 18 : if (SearchSysCacheExists3(COLLNAMEENCNSP,
400 : CStringGetDatum(collname),
401 : Int32GetDatum(GetDatabaseEncoding()),
402 : ObjectIdGetDatum(nspOid)))
403 0 : ereport(ERROR,
404 : (errcode(ERRCODE_DUPLICATE_OBJECT),
405 : errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"",
406 : collname, GetDatabaseEncodingName(),
407 : get_namespace_name(nspOid))));
408 :
409 : /* mustn't match an any-encoding entry, either */
410 18 : if (SearchSysCacheExists3(COLLNAMEENCNSP,
411 : CStringGetDatum(collname),
412 : Int32GetDatum(-1),
413 : ObjectIdGetDatum(nspOid)))
414 6 : ereport(ERROR,
415 : (errcode(ERRCODE_DUPLICATE_OBJECT),
416 : errmsg("collation \"%s\" already exists in schema \"%s\"",
417 : collname, get_namespace_name(nspOid))));
418 12 : }
419 :
420 : /*
421 : * ALTER COLLATION
422 : */
423 : ObjectAddress
424 6 : AlterCollation(AlterCollationStmt *stmt)
425 : {
426 : Relation rel;
427 : Oid collOid;
428 : HeapTuple tup;
429 : Form_pg_collation collForm;
430 : Datum datum;
431 : bool isnull;
432 : char *oldversion;
433 : char *newversion;
434 : ObjectAddress address;
435 :
436 6 : rel = table_open(CollationRelationId, RowExclusiveLock);
437 6 : collOid = get_collation_oid(stmt->collname, false);
438 :
439 6 : if (collOid == DEFAULT_COLLATION_OID)
440 0 : ereport(ERROR,
441 : (errmsg("cannot refresh version of default collation"),
442 : /* translator: %s is an SQL command */
443 : errhint("Use %s instead.",
444 : "ALTER DATABASE ... REFRESH COLLATION VERSION")));
445 :
446 6 : if (!object_ownercheck(CollationRelationId, collOid, GetUserId()))
447 0 : aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_COLLATION,
448 0 : NameListToString(stmt->collname));
449 :
450 6 : tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid));
451 6 : if (!HeapTupleIsValid(tup))
452 0 : elog(ERROR, "cache lookup failed for collation %u", collOid);
453 :
454 6 : collForm = (Form_pg_collation) GETSTRUCT(tup);
455 6 : datum = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, &isnull);
456 6 : oldversion = isnull ? NULL : TextDatumGetCString(datum);
457 :
458 6 : if (collForm->collprovider == COLLPROVIDER_LIBC)
459 0 : datum = SysCacheGetAttrNotNull(COLLOID, tup, Anum_pg_collation_collcollate);
460 : else
461 6 : datum = SysCacheGetAttrNotNull(COLLOID, tup, Anum_pg_collation_colllocale);
462 :
463 6 : newversion = get_collation_actual_version(collForm->collprovider,
464 6 : TextDatumGetCString(datum));
465 :
466 : /* cannot change from NULL to non-NULL or vice versa */
467 6 : if ((!oldversion && newversion) || (oldversion && !newversion))
468 0 : elog(ERROR, "invalid collation version change");
469 6 : else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
470 0 : {
471 : bool nulls[Natts_pg_collation];
472 : bool replaces[Natts_pg_collation];
473 : Datum values[Natts_pg_collation];
474 :
475 0 : ereport(NOTICE,
476 : (errmsg("changing version from %s to %s",
477 : oldversion, newversion)));
478 :
479 0 : memset(values, 0, sizeof(values));
480 0 : memset(nulls, false, sizeof(nulls));
481 0 : memset(replaces, false, sizeof(replaces));
482 :
483 0 : values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion);
484 0 : replaces[Anum_pg_collation_collversion - 1] = true;
485 :
486 0 : tup = heap_modify_tuple(tup, RelationGetDescr(rel),
487 : values, nulls, replaces);
488 : }
489 : else
490 6 : ereport(NOTICE,
491 : (errmsg("version has not changed")));
492 :
493 6 : CatalogTupleUpdate(rel, &tup->t_self, tup);
494 :
495 6 : InvokeObjectPostAlterHook(CollationRelationId, collOid, 0);
496 :
497 6 : ObjectAddressSet(address, CollationRelationId, collOid);
498 :
499 6 : heap_freetuple(tup);
500 6 : table_close(rel, NoLock);
501 :
502 6 : return address;
503 : }
504 :
505 :
506 : Datum
507 86 : pg_collation_actual_version(PG_FUNCTION_ARGS)
508 : {
509 86 : Oid collid = PG_GETARG_OID(0);
510 : char provider;
511 : char *locale;
512 : char *version;
513 : Datum datum;
514 :
515 86 : if (collid == DEFAULT_COLLATION_OID)
516 : {
517 : /* retrieve from pg_database */
518 :
519 0 : HeapTuple dbtup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
520 :
521 0 : if (!HeapTupleIsValid(dbtup))
522 0 : ereport(ERROR,
523 : (errcode(ERRCODE_UNDEFINED_OBJECT),
524 : errmsg("database with OID %u does not exist", MyDatabaseId)));
525 :
526 0 : provider = ((Form_pg_database) GETSTRUCT(dbtup))->datlocprovider;
527 :
528 0 : if (provider == COLLPROVIDER_LIBC)
529 : {
530 0 : datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, Anum_pg_database_datcollate);
531 0 : locale = TextDatumGetCString(datum);
532 : }
533 : else
534 : {
535 0 : datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, Anum_pg_database_datlocale);
536 0 : locale = TextDatumGetCString(datum);
537 : }
538 :
539 0 : ReleaseSysCache(dbtup);
540 : }
541 : else
542 : {
543 : /* retrieve from pg_collation */
544 :
545 86 : HeapTuple colltp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
546 :
547 86 : if (!HeapTupleIsValid(colltp))
548 0 : ereport(ERROR,
549 : (errcode(ERRCODE_UNDEFINED_OBJECT),
550 : errmsg("collation with OID %u does not exist", collid)));
551 :
552 86 : provider = ((Form_pg_collation) GETSTRUCT(colltp))->collprovider;
553 : Assert(provider != COLLPROVIDER_DEFAULT);
554 :
555 86 : if (provider == COLLPROVIDER_LIBC)
556 : {
557 0 : datum = SysCacheGetAttrNotNull(COLLOID, colltp, Anum_pg_collation_collcollate);
558 0 : locale = TextDatumGetCString(datum);
559 : }
560 : else
561 : {
562 86 : datum = SysCacheGetAttrNotNull(COLLOID, colltp, Anum_pg_collation_colllocale);
563 86 : locale = TextDatumGetCString(datum);
564 : }
565 :
566 86 : ReleaseSysCache(colltp);
567 : }
568 :
569 86 : version = get_collation_actual_version(provider, locale);
570 86 : if (version)
571 86 : PG_RETURN_TEXT_P(cstring_to_text(version));
572 : else
573 0 : PG_RETURN_NULL();
574 : }
575 :
576 :
577 : /* will we use "locale -a" in pg_import_system_collations? */
578 : #if !defined(WIN32)
579 : #define READ_LOCALE_A_OUTPUT
580 : #endif
581 :
582 : /* will we use EnumSystemLocalesEx in pg_import_system_collations? */
583 : #ifdef WIN32
584 : #define ENUM_SYSTEM_LOCALE
585 : #endif
586 :
587 :
588 : #ifdef READ_LOCALE_A_OUTPUT
589 : /*
590 : * "Normalize" a libc locale name, stripping off encoding tags such as
591 : * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
592 : * -> "br_FR@euro"). Return true if a new, different name was
593 : * generated.
594 : */
595 : static bool
596 172 : normalize_libc_locale_name(char *new, const char *old)
597 : {
598 172 : char *n = new;
599 172 : const char *o = old;
600 172 : bool changed = false;
601 :
602 860 : while (*o)
603 : {
604 688 : if (*o == '.')
605 : {
606 : /* skip over encoding tag such as ".utf8" or ".UTF-8" */
607 172 : o++;
608 946 : while ((*o >= 'A' && *o <= 'Z')
609 688 : || (*o >= 'a' && *o <= 'z')
610 430 : || (*o >= '0' && *o <= '9')
611 258 : || (*o == '-'))
612 774 : o++;
613 172 : changed = true;
614 : }
615 : else
616 516 : *n++ = *o++;
617 : }
618 172 : *n = '\0';
619 :
620 172 : return changed;
621 : }
622 :
623 : /*
624 : * qsort comparator for CollAliasData items
625 : */
626 : static int
627 86 : cmpaliases(const void *a, const void *b)
628 : {
629 86 : const CollAliasData *ca = (const CollAliasData *) a;
630 86 : const CollAliasData *cb = (const CollAliasData *) b;
631 :
632 : /* comparing localename is enough because other fields are derived */
633 86 : return strcmp(ca->localename, cb->localename);
634 : }
635 : #endif /* READ_LOCALE_A_OUTPUT */
636 :
637 :
638 : #ifdef USE_ICU
639 : /*
640 : * Get a comment (specifically, the display name) for an ICU locale.
641 : * The result is a palloc'd string, or NULL if we can't get a comment
642 : * or find that it's not all ASCII. (We can *not* accept non-ASCII
643 : * comments, because the contents of template0 must be encoding-agnostic.)
644 : */
645 : static char *
646 67338 : get_icu_locale_comment(const char *localename)
647 : {
648 : UErrorCode status;
649 : UChar displayname[128];
650 : int32 len_uchar;
651 : int32 i;
652 : char *result;
653 :
654 67338 : status = U_ZERO_ERROR;
655 67338 : len_uchar = uloc_getDisplayName(localename, "en",
656 : displayname, lengthof(displayname),
657 : &status);
658 67338 : if (U_FAILURE(status))
659 0 : return NULL; /* no good reason to raise an error */
660 :
661 : /* Check for non-ASCII comment (can't use pg_is_ascii for this) */
662 1153690 : for (i = 0; i < len_uchar; i++)
663 : {
664 1087298 : if (displayname[i] > 127)
665 946 : return NULL;
666 : }
667 :
668 : /* OK, transcribe */
669 66392 : result = palloc(len_uchar + 1);
670 1142596 : for (i = 0; i < len_uchar; i++)
671 1076204 : result[i] = displayname[i];
672 66392 : result[len_uchar] = '\0';
673 :
674 66392 : return result;
675 : }
676 : #endif /* USE_ICU */
677 :
678 :
679 : /*
680 : * Create a new collation using the input locale 'locale'. (subroutine for
681 : * pg_import_system_collations())
682 : *
683 : * 'nspid' is the namespace id where the collation will be created.
684 : *
685 : * 'nvalidp' is incremented if the locale has a valid encoding.
686 : *
687 : * 'ncreatedp' is incremented if the collation is actually created. If the
688 : * collation already exists it will quietly do nothing.
689 : *
690 : * The returned value is the encoding of the locale, -1 if the locale is not
691 : * valid for creating a collation.
692 : *
693 : */
694 : pg_attribute_unused()
695 : static int
696 344 : create_collation_from_locale(const char *locale, int nspid,
697 : int *nvalidp, int *ncreatedp)
698 : {
699 : int enc;
700 : Oid collid;
701 :
702 : /*
703 : * Some systems have locale names that don't consist entirely of ASCII
704 : * letters (such as "bokmål" or "français"). This is pretty
705 : * silly, since we need the locale itself to interpret the non-ASCII
706 : * characters. We can't do much with those, so we filter them out.
707 : */
708 344 : if (!pg_is_ascii(locale))
709 : {
710 0 : elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", locale);
711 0 : return -1;
712 : }
713 :
714 344 : enc = pg_get_encoding_from_locale(locale, false);
715 344 : if (enc < 0)
716 : {
717 0 : elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", locale);
718 0 : return -1;
719 : }
720 344 : if (!PG_VALID_BE_ENCODING(enc))
721 : {
722 0 : elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", locale);
723 0 : return -1;
724 : }
725 344 : if (enc == PG_SQL_ASCII)
726 172 : return -1; /* C/POSIX are already in the catalog */
727 :
728 : /* count valid locales found in operating system */
729 172 : (*nvalidp)++;
730 :
731 : /*
732 : * Create a collation named the same as the locale, but quietly doing
733 : * nothing if it already exists. This is the behavior we need even at
734 : * initdb time, because some versions of "locale -a" can report the same
735 : * locale name more than once. And it's convenient for later import runs,
736 : * too, since you just about always want to add on new locales without a
737 : * lot of chatter about existing ones.
738 : */
739 172 : collid = CollationCreate(locale, nspid, GetUserId(),
740 : COLLPROVIDER_LIBC, true, enc,
741 : locale, locale, NULL, NULL,
742 172 : get_collation_actual_version(COLLPROVIDER_LIBC, locale),
743 : true, true);
744 172 : if (OidIsValid(collid))
745 : {
746 172 : (*ncreatedp)++;
747 :
748 : /* Must do CCI between inserts to handle duplicates correctly */
749 172 : CommandCounterIncrement();
750 : }
751 :
752 172 : return enc;
753 : }
754 :
755 :
756 : #ifdef ENUM_SYSTEM_LOCALE
757 : /* parameter to be passed to the callback function win32_read_locale() */
758 : typedef struct
759 : {
760 : Oid nspid;
761 : int *ncreatedp;
762 : int *nvalidp;
763 : } CollParam;
764 :
765 : /*
766 : * Callback function for EnumSystemLocalesEx() in
767 : * pg_import_system_collations(). Creates a collation for every valid locale
768 : * and a POSIX alias collation.
769 : *
770 : * The callback contract is to return TRUE to continue enumerating and FALSE
771 : * to stop enumerating. We always want to continue.
772 : */
773 : static BOOL CALLBACK
774 : win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
775 : {
776 : CollParam *param = (CollParam *) lparam;
777 : char localebuf[NAMEDATALEN];
778 : int result;
779 : int enc;
780 :
781 : (void) dwFlags;
782 :
783 : result = WideCharToMultiByte(CP_ACP, 0, pStr, -1, localebuf, NAMEDATALEN,
784 : NULL, NULL);
785 :
786 : if (result == 0)
787 : {
788 : if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
789 : elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
790 : return TRUE;
791 : }
792 : if (localebuf[0] == '\0')
793 : return TRUE;
794 :
795 : enc = create_collation_from_locale(localebuf, param->nspid,
796 : param->nvalidp, param->ncreatedp);
797 : if (enc < 0)
798 : return TRUE;
799 :
800 : /*
801 : * Windows will use hyphens between language and territory, where POSIX
802 : * uses an underscore. Simply create a POSIX alias.
803 : */
804 : if (strchr(localebuf, '-'))
805 : {
806 : char alias[NAMEDATALEN];
807 : Oid collid;
808 :
809 : strcpy(alias, localebuf);
810 : for (char *p = alias; *p; p++)
811 : if (*p == '-')
812 : *p = '_';
813 :
814 : collid = CollationCreate(alias, param->nspid, GetUserId(),
815 : COLLPROVIDER_LIBC, true, enc,
816 : localebuf, localebuf, NULL, NULL,
817 : get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
818 : true, true);
819 : if (OidIsValid(collid))
820 : {
821 : (*param->ncreatedp)++;
822 :
823 : CommandCounterIncrement();
824 : }
825 : }
826 :
827 : return TRUE;
828 : }
829 : #endif /* ENUM_SYSTEM_LOCALE */
830 :
831 :
832 : /*
833 : * pg_import_system_collations: add known system collations to pg_collation
834 : */
835 : Datum
836 86 : pg_import_system_collations(PG_FUNCTION_ARGS)
837 : {
838 86 : Oid nspid = PG_GETARG_OID(0);
839 86 : int ncreated = 0;
840 :
841 86 : if (!superuser())
842 0 : ereport(ERROR,
843 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
844 : errmsg("must be superuser to import system collations")));
845 :
846 86 : if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(nspid)))
847 0 : ereport(ERROR,
848 : (errcode(ERRCODE_UNDEFINED_SCHEMA),
849 : errmsg("schema with OID %u does not exist", nspid)));
850 :
851 : /* Load collations known to libc, using "locale -a" to enumerate them */
852 : #ifdef READ_LOCALE_A_OUTPUT
853 : {
854 : FILE *locale_a_handle;
855 : char localebuf[LOCALE_NAME_BUFLEN];
856 86 : int nvalid = 0;
857 : Oid collid;
858 : CollAliasData *aliases;
859 : int naliases,
860 : maxaliases,
861 : i;
862 :
863 : /* expansible array of aliases */
864 86 : maxaliases = 100;
865 86 : aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData));
866 86 : naliases = 0;
867 :
868 86 : locale_a_handle = OpenPipeStream("locale -a", "r");
869 86 : if (locale_a_handle == NULL)
870 0 : ereport(ERROR,
871 : (errcode_for_file_access(),
872 : errmsg("could not execute command \"%s\": %m",
873 : "locale -a")));
874 :
875 430 : while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
876 : {
877 : size_t len;
878 : int enc;
879 : char alias[LOCALE_NAME_BUFLEN];
880 :
881 344 : len = strlen(localebuf);
882 :
883 344 : if (len == 0 || localebuf[len - 1] != '\n')
884 : {
885 0 : elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
886 172 : continue;
887 : }
888 344 : localebuf[len - 1] = '\0';
889 :
890 344 : enc = create_collation_from_locale(localebuf, nspid, &nvalid, &ncreated);
891 344 : if (enc < 0)
892 172 : continue;
893 :
894 : /*
895 : * Generate aliases such as "en_US" in addition to "en_US.utf8"
896 : * for ease of use. Note that collation names are unique per
897 : * encoding only, so this doesn't clash with "en_US" for LATIN1,
898 : * say.
899 : *
900 : * However, it might conflict with a name we'll see later in the
901 : * "locale -a" output. So save up the aliases and try to add them
902 : * after we've read all the output.
903 : */
904 172 : if (normalize_libc_locale_name(alias, localebuf))
905 : {
906 172 : if (naliases >= maxaliases)
907 : {
908 0 : maxaliases *= 2;
909 : aliases = (CollAliasData *)
910 0 : repalloc(aliases, maxaliases * sizeof(CollAliasData));
911 : }
912 172 : aliases[naliases].localename = pstrdup(localebuf);
913 172 : aliases[naliases].alias = pstrdup(alias);
914 172 : aliases[naliases].enc = enc;
915 172 : naliases++;
916 : }
917 : }
918 :
919 : /*
920 : * We don't check the return value of this, because we want to support
921 : * the case where there "locale" command does not exist. (This is
922 : * unusual but can happen on minimalized Linux distributions, for
923 : * example.) We will warn below if no locales could be found.
924 : */
925 86 : ClosePipeStream(locale_a_handle);
926 :
927 : /*
928 : * Before processing the aliases, sort them by locale name. The point
929 : * here is that if "locale -a" gives us multiple locale names with the
930 : * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we
931 : * want to pick a deterministic one of them. First in ASCII sort
932 : * order is a good enough rule. (Before PG 10, the code corresponding
933 : * to this logic in initdb.c had an additional ordering rule, to
934 : * prefer the locale name exactly matching the alias, if any. We
935 : * don't need to consider that here, because we would have already
936 : * created such a pg_collation entry above, and that one will win.)
937 : */
938 86 : if (naliases > 1)
939 86 : qsort(aliases, naliases, sizeof(CollAliasData), cmpaliases);
940 :
941 : /* Now add aliases, ignoring any that match pre-existing entries */
942 258 : for (i = 0; i < naliases; i++)
943 : {
944 172 : char *locale = aliases[i].localename;
945 172 : char *alias = aliases[i].alias;
946 172 : int enc = aliases[i].enc;
947 :
948 172 : collid = CollationCreate(alias, nspid, GetUserId(),
949 : COLLPROVIDER_LIBC, true, enc,
950 : locale, locale, NULL, NULL,
951 172 : get_collation_actual_version(COLLPROVIDER_LIBC, locale),
952 : true, true);
953 172 : if (OidIsValid(collid))
954 : {
955 86 : ncreated++;
956 :
957 86 : CommandCounterIncrement();
958 : }
959 : }
960 :
961 : /* Give a warning if "locale -a" seems to be malfunctioning */
962 86 : if (nvalid == 0)
963 0 : ereport(WARNING,
964 : (errmsg("no usable system locales were found")));
965 : }
966 : #endif /* READ_LOCALE_A_OUTPUT */
967 :
968 : /*
969 : * Load collations known to ICU
970 : *
971 : * We use uloc_countAvailable()/uloc_getAvailable() rather than
972 : * ucol_countAvailable()/ucol_getAvailable(). The former returns a full
973 : * set of language+region combinations, whereas the latter only returns
974 : * language+region combinations if they are distinct from the language's
975 : * base collation. So there might not be a de-DE or en-GB, which would be
976 : * confusing.
977 : */
978 : #ifdef USE_ICU
979 : {
980 : int i;
981 :
982 : /*
983 : * Start the loop at -1 to sneak in the root locale without too much
984 : * code duplication.
985 : */
986 67424 : for (i = -1; i < uloc_countAvailable(); i++)
987 : {
988 : const char *name;
989 : char *langtag;
990 : char *icucomment;
991 : Oid collid;
992 :
993 67338 : if (i == -1)
994 86 : name = ""; /* ICU root locale */
995 : else
996 67252 : name = uloc_getAvailable(i);
997 :
998 67338 : langtag = icu_language_tag(name, ERROR);
999 :
1000 : /*
1001 : * Be paranoid about not allowing any non-ASCII strings into
1002 : * pg_collation
1003 : */
1004 67338 : if (!pg_is_ascii(langtag))
1005 0 : continue;
1006 :
1007 67338 : collid = CollationCreate(psprintf("%s-x-icu", langtag),
1008 : nspid, GetUserId(),
1009 : COLLPROVIDER_ICU, true, -1,
1010 : NULL, NULL, langtag, NULL,
1011 67338 : get_collation_actual_version(COLLPROVIDER_ICU, langtag),
1012 : true, true);
1013 67338 : if (OidIsValid(collid))
1014 : {
1015 67338 : ncreated++;
1016 :
1017 67338 : CommandCounterIncrement();
1018 :
1019 67338 : icucomment = get_icu_locale_comment(name);
1020 67338 : if (icucomment)
1021 66392 : CreateComments(collid, CollationRelationId, 0,
1022 : icucomment);
1023 : }
1024 : }
1025 : }
1026 : #endif /* USE_ICU */
1027 :
1028 : /* Load collations known to WIN32 */
1029 : #ifdef ENUM_SYSTEM_LOCALE
1030 : {
1031 : int nvalid = 0;
1032 : CollParam param;
1033 :
1034 : param.nspid = nspid;
1035 : param.ncreatedp = &ncreated;
1036 : param.nvalidp = &nvalid;
1037 :
1038 : /*
1039 : * Enumerate the locales that are either installed on or supported by
1040 : * the OS.
1041 : */
1042 : if (!EnumSystemLocalesEx(win32_read_locale, LOCALE_ALL,
1043 : (LPARAM) ¶m, NULL))
1044 : _dosmaperr(GetLastError());
1045 :
1046 : /* Give a warning if EnumSystemLocalesEx seems to be malfunctioning */
1047 : if (nvalid == 0)
1048 : ereport(WARNING,
1049 : (errmsg("no usable system locales were found")));
1050 : }
1051 : #endif /* ENUM_SYSTEM_LOCALE */
1052 :
1053 86 : PG_RETURN_INT32(ncreated);
1054 : }
|