Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * collationcmds.c
4 : * collation-related commands support code
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/commands/collationcmds.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/htup_details.h"
18 : #include "access/table.h"
19 : #include "access/xact.h"
20 : #include "catalog/indexing.h"
21 : #include "catalog/namespace.h"
22 : #include "catalog/objectaccess.h"
23 : #include "catalog/pg_collation.h"
24 : #include "catalog/pg_database.h"
25 : #include "catalog/pg_namespace.h"
26 : #include "commands/collationcmds.h"
27 : #include "commands/comment.h"
28 : #include "commands/dbcommands.h"
29 : #include "commands/defrem.h"
30 : #include "common/string.h"
31 : #include "mb/pg_wchar.h"
32 : #include "miscadmin.h"
33 : #include "utils/acl.h"
34 : #include "utils/builtins.h"
35 : #include "utils/lsyscache.h"
36 : #include "utils/pg_locale.h"
37 : #include "utils/rel.h"
38 : #include "utils/syscache.h"
39 :
40 :
41 : typedef struct
42 : {
43 : char *localename; /* name of locale, as per "locale -a" */
44 : char *alias; /* shortened alias for same */
45 : int enc; /* encoding */
46 : } CollAliasData;
47 :
48 :
49 : /*
50 : * CREATE COLLATION
51 : */
52 : ObjectAddress
53 342 : DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists)
54 : {
55 : char *collName;
56 : Oid collNamespace;
57 : AclResult aclresult;
58 : ListCell *pl;
59 342 : DefElem *fromEl = NULL;
60 342 : DefElem *localeEl = NULL;
61 342 : DefElem *lccollateEl = NULL;
62 342 : DefElem *lcctypeEl = NULL;
63 342 : DefElem *providerEl = NULL;
64 342 : DefElem *deterministicEl = NULL;
65 342 : DefElem *rulesEl = NULL;
66 342 : DefElem *versionEl = NULL;
67 : char *collcollate;
68 : char *collctype;
69 : const char *colllocale;
70 : char *collicurules;
71 : bool collisdeterministic;
72 : int collencoding;
73 : char collprovider;
74 342 : char *collversion = NULL;
75 : Oid newoid;
76 : ObjectAddress address;
77 :
78 342 : collNamespace = QualifiedNameGetCreationNamespace(names, &collName);
79 :
80 342 : aclresult = object_aclcheck(NamespaceRelationId, collNamespace, GetUserId(), ACL_CREATE);
81 342 : if (aclresult != ACLCHECK_OK)
82 0 : aclcheck_error(aclresult, OBJECT_SCHEMA,
83 0 : get_namespace_name(collNamespace));
84 :
85 978 : foreach(pl, parameters)
86 : {
87 678 : DefElem *defel = lfirst_node(DefElem, pl);
88 : DefElem **defelp;
89 :
90 678 : if (strcmp(defel->defname, "from") == 0)
91 58 : defelp = &fromEl;
92 620 : else if (strcmp(defel->defname, "locale") == 0)
93 210 : defelp = &localeEl;
94 410 : else if (strcmp(defel->defname, "lc_collate") == 0)
95 62 : defelp = &lccollateEl;
96 348 : else if (strcmp(defel->defname, "lc_ctype") == 0)
97 56 : defelp = &lcctypeEl;
98 292 : else if (strcmp(defel->defname, "provider") == 0)
99 208 : defelp = &providerEl;
100 84 : else if (strcmp(defel->defname, "deterministic") == 0)
101 44 : defelp = &deterministicEl;
102 40 : else if (strcmp(defel->defname, "rules") == 0)
103 12 : defelp = &rulesEl;
104 28 : else if (strcmp(defel->defname, "version") == 0)
105 22 : defelp = &versionEl;
106 : else
107 : {
108 6 : ereport(ERROR,
109 : (errcode(ERRCODE_SYNTAX_ERROR),
110 : errmsg("collation attribute \"%s\" not recognized",
111 : defel->defname),
112 : parser_errposition(pstate, defel->location)));
113 : break;
114 : }
115 672 : if (*defelp != NULL)
116 36 : errorConflictingDefElem(defel, pstate);
117 636 : *defelp = defel;
118 : }
119 :
120 300 : if (localeEl && (lccollateEl || lcctypeEl))
121 18 : ereport(ERROR,
122 : errcode(ERRCODE_SYNTAX_ERROR),
123 : errmsg("conflicting or redundant options"),
124 : errdetail("LOCALE cannot be specified together with LC_COLLATE or LC_CTYPE."));
125 :
126 282 : if (fromEl && list_length(parameters) != 1)
127 6 : ereport(ERROR,
128 : errcode(ERRCODE_SYNTAX_ERROR),
129 : errmsg("conflicting or redundant options"),
130 : errdetail("FROM cannot be specified together with any other options."));
131 :
132 276 : if (fromEl)
133 : {
134 : Oid collid;
135 : HeapTuple tp;
136 : Datum datum;
137 : bool isnull;
138 :
139 52 : collid = get_collation_oid(defGetQualifiedName(fromEl), false);
140 46 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
141 46 : if (!HeapTupleIsValid(tp))
142 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
143 :
144 46 : collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
145 46 : collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic;
146 46 : collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding;
147 :
148 46 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull);
149 46 : if (!isnull)
150 28 : collcollate = TextDatumGetCString(datum);
151 : else
152 18 : collcollate = NULL;
153 :
154 46 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull);
155 46 : if (!isnull)
156 28 : collctype = TextDatumGetCString(datum);
157 : else
158 18 : collctype = NULL;
159 :
160 46 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colllocale, &isnull);
161 46 : if (!isnull)
162 12 : colllocale = TextDatumGetCString(datum);
163 : else
164 34 : colllocale = NULL;
165 :
166 : /*
167 : * When the ICU locale comes from an existing collation, do not
168 : * canonicalize to a language tag.
169 : */
170 :
171 46 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
172 46 : if (!isnull)
173 0 : collicurules = TextDatumGetCString(datum);
174 : else
175 46 : collicurules = NULL;
176 :
177 46 : ReleaseSysCache(tp);
178 :
179 : /*
180 : * Copying the "default" collation is not allowed because most code
181 : * checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT,
182 : * and so having a second collation with COLLPROVIDER_DEFAULT would
183 : * not work and potentially confuse or crash some code. This could be
184 : * fixed with some legwork.
185 : */
186 46 : if (collprovider == COLLPROVIDER_DEFAULT)
187 6 : ereport(ERROR,
188 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
189 : errmsg("collation \"default\" cannot be copied")));
190 : }
191 : else
192 : {
193 224 : char *collproviderstr = NULL;
194 :
195 224 : collcollate = NULL;
196 224 : collctype = NULL;
197 224 : colllocale = NULL;
198 224 : collicurules = NULL;
199 :
200 224 : if (providerEl)
201 196 : collproviderstr = defGetString(providerEl);
202 :
203 224 : if (deterministicEl)
204 32 : collisdeterministic = defGetBoolean(deterministicEl);
205 : else
206 192 : collisdeterministic = true;
207 :
208 224 : if (rulesEl)
209 12 : collicurules = defGetString(rulesEl);
210 :
211 224 : if (versionEl)
212 4 : collversion = defGetString(versionEl);
213 :
214 224 : if (collproviderstr)
215 : {
216 196 : if (pg_strcasecmp(collproviderstr, "builtin") == 0)
217 44 : collprovider = COLLPROVIDER_BUILTIN;
218 152 : else if (pg_strcasecmp(collproviderstr, "icu") == 0)
219 152 : collprovider = COLLPROVIDER_ICU;
220 0 : else if (pg_strcasecmp(collproviderstr, "libc") == 0)
221 0 : collprovider = COLLPROVIDER_LIBC;
222 : else
223 0 : ereport(ERROR,
224 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
225 : errmsg("unrecognized collation provider: %s",
226 : collproviderstr)));
227 : }
228 : else
229 28 : collprovider = COLLPROVIDER_LIBC;
230 :
231 224 : if (localeEl)
232 : {
233 180 : if (collprovider == COLLPROVIDER_LIBC)
234 : {
235 2 : collcollate = defGetString(localeEl);
236 2 : collctype = defGetString(localeEl);
237 : }
238 : else
239 178 : colllocale = defGetString(localeEl);
240 : }
241 :
242 224 : if (lccollateEl)
243 38 : collcollate = defGetString(lccollateEl);
244 :
245 224 : if (lcctypeEl)
246 32 : collctype = defGetString(lcctypeEl);
247 :
248 224 : if (collprovider == COLLPROVIDER_BUILTIN)
249 : {
250 44 : if (!colllocale)
251 12 : ereport(ERROR,
252 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
253 : errmsg("parameter \"%s\" must be specified",
254 : "locale")));
255 :
256 32 : colllocale = builtin_validate_locale(GetDatabaseEncoding(),
257 : colllocale);
258 : }
259 180 : else if (collprovider == COLLPROVIDER_LIBC)
260 : {
261 28 : if (!collcollate)
262 0 : ereport(ERROR,
263 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
264 : errmsg("parameter \"%s\" must be specified",
265 : "lc_collate")));
266 :
267 28 : if (!collctype)
268 0 : ereport(ERROR,
269 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
270 : errmsg("parameter \"%s\" must be specified",
271 : "lc_ctype")));
272 : }
273 152 : else if (collprovider == COLLPROVIDER_ICU)
274 : {
275 152 : if (!colllocale)
276 6 : ereport(ERROR,
277 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
278 : errmsg("parameter \"%s\" must be specified",
279 : "locale")));
280 :
281 : /*
282 : * During binary upgrade, preserve the locale string. Otherwise,
283 : * canonicalize to a language tag.
284 : */
285 146 : if (!IsBinaryUpgrade)
286 : {
287 144 : char *langtag = icu_language_tag(colllocale,
288 : icu_validation_level);
289 :
290 138 : if (langtag && strcmp(colllocale, langtag) != 0)
291 : {
292 100 : ereport(NOTICE,
293 : (errmsg("using standard form \"%s\" for ICU locale \"%s\"",
294 : langtag, colllocale)));
295 :
296 100 : colllocale = langtag;
297 : }
298 : }
299 :
300 140 : icu_validate_locale(colllocale);
301 : }
302 :
303 : /*
304 : * Nondeterministic collations are currently only supported with ICU
305 : * because that's the only case where it can actually make a
306 : * difference. So we can save writing the code for the other
307 : * providers.
308 : */
309 176 : if (!collisdeterministic && collprovider != COLLPROVIDER_ICU)
310 0 : ereport(ERROR,
311 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
312 : errmsg("nondeterministic collations not supported with this provider")));
313 :
314 176 : if (collicurules && collprovider != COLLPROVIDER_ICU)
315 0 : ereport(ERROR,
316 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
317 : errmsg("ICU rules cannot be specified unless locale provider is ICU")));
318 :
319 176 : if (collprovider == COLLPROVIDER_BUILTIN)
320 : {
321 20 : collencoding = builtin_locale_encoding(colllocale);
322 : }
323 156 : else if (collprovider == COLLPROVIDER_ICU)
324 : {
325 : #ifdef USE_ICU
326 : /*
327 : * We could create ICU collations with collencoding == database
328 : * encoding, but it seems better to use -1 so that it matches the
329 : * way initdb would create ICU collations. However, only allow
330 : * one to be created when the current database's encoding is
331 : * supported. Otherwise the collation is useless, plus we get
332 : * surprising behaviors like not being able to drop the collation.
333 : *
334 : * Skip this test when !USE_ICU, because the error we want to
335 : * throw for that isn't thrown till later.
336 : */
337 128 : if (!is_encoding_supported_by_icu(GetDatabaseEncoding()))
338 0 : ereport(ERROR,
339 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
340 : errmsg("current database's encoding is not supported with this provider")));
341 : #endif
342 128 : collencoding = -1;
343 : }
344 : else
345 : {
346 28 : collencoding = GetDatabaseEncoding();
347 28 : check_encoding_locale_matches(collencoding, collcollate, collctype);
348 : }
349 : }
350 :
351 216 : if (!collversion)
352 : {
353 : const char *locale;
354 :
355 212 : if (collprovider == COLLPROVIDER_LIBC)
356 56 : locale = collcollate;
357 : else
358 156 : locale = colllocale;
359 :
360 212 : collversion = get_collation_actual_version(collprovider, locale);
361 : }
362 :
363 216 : newoid = CollationCreate(collName,
364 : collNamespace,
365 : GetUserId(),
366 : collprovider,
367 : collisdeterministic,
368 : collencoding,
369 : collcollate,
370 : collctype,
371 : colllocale,
372 : collicurules,
373 : collversion,
374 : if_not_exists,
375 : false); /* not quiet */
376 :
377 208 : if (!OidIsValid(newoid))
378 2 : return InvalidObjectAddress;
379 :
380 : /*
381 : * Check that the locales can be loaded. NB: pg_newlocale_from_collation
382 : * is only supposed to be called on non-C-equivalent locales.
383 : */
384 206 : CommandCounterIncrement();
385 206 : if (!lc_collate_is_c(newoid) || !lc_ctype_is_c(newoid))
386 154 : (void) pg_newlocale_from_collation(newoid);
387 :
388 200 : ObjectAddressSet(address, CollationRelationId, newoid);
389 :
390 200 : return address;
391 : }
392 :
393 : /*
394 : * Subroutine for ALTER COLLATION SET SCHEMA and RENAME
395 : *
396 : * Is there a collation with the same name of the given collation already in
397 : * the given namespace? If so, raise an appropriate error message.
398 : */
399 : void
400 18 : IsThereCollationInNamespace(const char *collname, Oid nspOid)
401 : {
402 : /* make sure the name doesn't already exist in new schema */
403 18 : if (SearchSysCacheExists3(COLLNAMEENCNSP,
404 : CStringGetDatum(collname),
405 : Int32GetDatum(GetDatabaseEncoding()),
406 : ObjectIdGetDatum(nspOid)))
407 0 : ereport(ERROR,
408 : (errcode(ERRCODE_DUPLICATE_OBJECT),
409 : errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"",
410 : collname, GetDatabaseEncodingName(),
411 : get_namespace_name(nspOid))));
412 :
413 : /* mustn't match an any-encoding entry, either */
414 18 : if (SearchSysCacheExists3(COLLNAMEENCNSP,
415 : CStringGetDatum(collname),
416 : Int32GetDatum(-1),
417 : ObjectIdGetDatum(nspOid)))
418 6 : ereport(ERROR,
419 : (errcode(ERRCODE_DUPLICATE_OBJECT),
420 : errmsg("collation \"%s\" already exists in schema \"%s\"",
421 : collname, get_namespace_name(nspOid))));
422 12 : }
423 :
424 : /*
425 : * ALTER COLLATION
426 : */
427 : ObjectAddress
428 6 : AlterCollation(AlterCollationStmt *stmt)
429 : {
430 : Relation rel;
431 : Oid collOid;
432 : HeapTuple tup;
433 : Form_pg_collation collForm;
434 : Datum datum;
435 : bool isnull;
436 : char *oldversion;
437 : char *newversion;
438 : ObjectAddress address;
439 :
440 6 : rel = table_open(CollationRelationId, RowExclusiveLock);
441 6 : collOid = get_collation_oid(stmt->collname, false);
442 :
443 6 : if (collOid == DEFAULT_COLLATION_OID)
444 0 : ereport(ERROR,
445 : (errmsg("cannot refresh version of default collation"),
446 : /* translator: %s is an SQL command */
447 : errhint("Use %s instead.",
448 : "ALTER DATABASE ... REFRESH COLLATION VERSION")));
449 :
450 6 : if (!object_ownercheck(CollationRelationId, collOid, GetUserId()))
451 0 : aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_COLLATION,
452 0 : NameListToString(stmt->collname));
453 :
454 6 : tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid));
455 6 : if (!HeapTupleIsValid(tup))
456 0 : elog(ERROR, "cache lookup failed for collation %u", collOid);
457 :
458 6 : collForm = (Form_pg_collation) GETSTRUCT(tup);
459 6 : datum = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, &isnull);
460 6 : oldversion = isnull ? NULL : TextDatumGetCString(datum);
461 :
462 6 : if (collForm->collprovider == COLLPROVIDER_LIBC)
463 0 : datum = SysCacheGetAttrNotNull(COLLOID, tup, Anum_pg_collation_collcollate);
464 : else
465 6 : datum = SysCacheGetAttrNotNull(COLLOID, tup, Anum_pg_collation_colllocale);
466 :
467 6 : newversion = get_collation_actual_version(collForm->collprovider,
468 6 : TextDatumGetCString(datum));
469 :
470 : /* cannot change from NULL to non-NULL or vice versa */
471 6 : if ((!oldversion && newversion) || (oldversion && !newversion))
472 0 : elog(ERROR, "invalid collation version change");
473 6 : else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
474 0 : {
475 : bool nulls[Natts_pg_collation];
476 : bool replaces[Natts_pg_collation];
477 : Datum values[Natts_pg_collation];
478 :
479 0 : ereport(NOTICE,
480 : (errmsg("changing version from %s to %s",
481 : oldversion, newversion)));
482 :
483 0 : memset(values, 0, sizeof(values));
484 0 : memset(nulls, false, sizeof(nulls));
485 0 : memset(replaces, false, sizeof(replaces));
486 :
487 0 : values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion);
488 0 : replaces[Anum_pg_collation_collversion - 1] = true;
489 :
490 0 : tup = heap_modify_tuple(tup, RelationGetDescr(rel),
491 : values, nulls, replaces);
492 : }
493 : else
494 6 : ereport(NOTICE,
495 : (errmsg("version has not changed")));
496 :
497 6 : CatalogTupleUpdate(rel, &tup->t_self, tup);
498 :
499 6 : InvokeObjectPostAlterHook(CollationRelationId, collOid, 0);
500 :
501 6 : ObjectAddressSet(address, CollationRelationId, collOid);
502 :
503 6 : heap_freetuple(tup);
504 6 : table_close(rel, NoLock);
505 :
506 6 : return address;
507 : }
508 :
509 :
510 : Datum
511 76 : pg_collation_actual_version(PG_FUNCTION_ARGS)
512 : {
513 76 : Oid collid = PG_GETARG_OID(0);
514 : char provider;
515 : char *locale;
516 : char *version;
517 : Datum datum;
518 :
519 76 : if (collid == DEFAULT_COLLATION_OID)
520 : {
521 : /* retrieve from pg_database */
522 :
523 0 : HeapTuple dbtup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
524 :
525 0 : if (!HeapTupleIsValid(dbtup))
526 0 : ereport(ERROR,
527 : (errcode(ERRCODE_UNDEFINED_OBJECT),
528 : errmsg("database with OID %u does not exist", MyDatabaseId)));
529 :
530 0 : provider = ((Form_pg_database) GETSTRUCT(dbtup))->datlocprovider;
531 :
532 0 : if (provider == COLLPROVIDER_LIBC)
533 : {
534 0 : datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, Anum_pg_database_datcollate);
535 0 : locale = TextDatumGetCString(datum);
536 : }
537 : else
538 : {
539 0 : datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, Anum_pg_database_datlocale);
540 0 : locale = TextDatumGetCString(datum);
541 : }
542 :
543 0 : ReleaseSysCache(dbtup);
544 : }
545 : else
546 : {
547 : /* retrieve from pg_collation */
548 :
549 76 : HeapTuple colltp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
550 :
551 76 : if (!HeapTupleIsValid(colltp))
552 0 : ereport(ERROR,
553 : (errcode(ERRCODE_UNDEFINED_OBJECT),
554 : errmsg("collation with OID %u does not exist", collid)));
555 :
556 76 : provider = ((Form_pg_collation) GETSTRUCT(colltp))->collprovider;
557 : Assert(provider != COLLPROVIDER_DEFAULT);
558 :
559 76 : if (provider == COLLPROVIDER_LIBC)
560 : {
561 0 : datum = SysCacheGetAttrNotNull(COLLOID, colltp, Anum_pg_collation_collcollate);
562 0 : locale = TextDatumGetCString(datum);
563 : }
564 : else
565 : {
566 76 : datum = SysCacheGetAttrNotNull(COLLOID, colltp, Anum_pg_collation_colllocale);
567 76 : locale = TextDatumGetCString(datum);
568 : }
569 :
570 76 : ReleaseSysCache(colltp);
571 : }
572 :
573 76 : version = get_collation_actual_version(provider, locale);
574 76 : if (version)
575 76 : PG_RETURN_TEXT_P(cstring_to_text(version));
576 : else
577 0 : PG_RETURN_NULL();
578 : }
579 :
580 :
581 : /* will we use "locale -a" in pg_import_system_collations? */
582 : #if !defined(WIN32)
583 : #define READ_LOCALE_A_OUTPUT
584 : #endif
585 :
586 : /* will we use EnumSystemLocalesEx in pg_import_system_collations? */
587 : #ifdef WIN32
588 : #define ENUM_SYSTEM_LOCALE
589 : #endif
590 :
591 :
592 : #ifdef READ_LOCALE_A_OUTPUT
593 : /*
594 : * "Normalize" a libc locale name, stripping off encoding tags such as
595 : * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
596 : * -> "br_FR@euro"). Return true if a new, different name was
597 : * generated.
598 : */
599 : static bool
600 152 : normalize_libc_locale_name(char *new, const char *old)
601 : {
602 152 : char *n = new;
603 152 : const char *o = old;
604 152 : bool changed = false;
605 :
606 760 : while (*o)
607 : {
608 608 : if (*o == '.')
609 : {
610 : /* skip over encoding tag such as ".utf8" or ".UTF-8" */
611 152 : o++;
612 836 : while ((*o >= 'A' && *o <= 'Z')
613 608 : || (*o >= 'a' && *o <= 'z')
614 380 : || (*o >= '0' && *o <= '9')
615 228 : || (*o == '-'))
616 684 : o++;
617 152 : changed = true;
618 : }
619 : else
620 456 : *n++ = *o++;
621 : }
622 152 : *n = '\0';
623 :
624 152 : return changed;
625 : }
626 :
627 : /*
628 : * qsort comparator for CollAliasData items
629 : */
630 : static int
631 76 : cmpaliases(const void *a, const void *b)
632 : {
633 76 : const CollAliasData *ca = (const CollAliasData *) a;
634 76 : const CollAliasData *cb = (const CollAliasData *) b;
635 :
636 : /* comparing localename is enough because other fields are derived */
637 76 : return strcmp(ca->localename, cb->localename);
638 : }
639 : #endif /* READ_LOCALE_A_OUTPUT */
640 :
641 :
642 : #ifdef USE_ICU
643 : /*
644 : * Get a comment (specifically, the display name) for an ICU locale.
645 : * The result is a palloc'd string, or NULL if we can't get a comment
646 : * or find that it's not all ASCII. (We can *not* accept non-ASCII
647 : * comments, because the contents of template0 must be encoding-agnostic.)
648 : */
649 : static char *
650 59508 : get_icu_locale_comment(const char *localename)
651 : {
652 : UErrorCode status;
653 : UChar displayname[128];
654 : int32 len_uchar;
655 : int32 i;
656 : char *result;
657 :
658 59508 : status = U_ZERO_ERROR;
659 59508 : len_uchar = uloc_getDisplayName(localename, "en",
660 : displayname, lengthof(displayname),
661 : &status);
662 59508 : if (U_FAILURE(status))
663 0 : return NULL; /* no good reason to raise an error */
664 :
665 : /* Check for non-ASCII comment (can't use pg_is_ascii for this) */
666 1019540 : for (i = 0; i < len_uchar; i++)
667 : {
668 960868 : if (displayname[i] > 127)
669 836 : return NULL;
670 : }
671 :
672 : /* OK, transcribe */
673 58672 : result = palloc(len_uchar + 1);
674 1009736 : for (i = 0; i < len_uchar; i++)
675 951064 : result[i] = displayname[i];
676 58672 : result[len_uchar] = '\0';
677 :
678 58672 : return result;
679 : }
680 : #endif /* USE_ICU */
681 :
682 :
683 : /*
684 : * Create a new collation using the input locale 'locale'. (subroutine for
685 : * pg_import_system_collations())
686 : *
687 : * 'nspid' is the namespace id where the collation will be created.
688 : *
689 : * 'nvalidp' is incremented if the locale has a valid encoding.
690 : *
691 : * 'ncreatedp' is incremented if the collation is actually created. If the
692 : * collation already exists it will quietly do nothing.
693 : *
694 : * The returned value is the encoding of the locale, -1 if the locale is not
695 : * valid for creating a collation.
696 : *
697 : */
698 : pg_attribute_unused()
699 : static int
700 304 : create_collation_from_locale(const char *locale, int nspid,
701 : int *nvalidp, int *ncreatedp)
702 : {
703 : int enc;
704 : Oid collid;
705 :
706 : /*
707 : * Some systems have locale names that don't consist entirely of ASCII
708 : * letters (such as "bokmål" or "français"). This is pretty
709 : * silly, since we need the locale itself to interpret the non-ASCII
710 : * characters. We can't do much with those, so we filter them out.
711 : */
712 304 : if (!pg_is_ascii(locale))
713 : {
714 0 : elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", locale);
715 0 : return -1;
716 : }
717 :
718 304 : enc = pg_get_encoding_from_locale(locale, false);
719 304 : if (enc < 0)
720 : {
721 0 : elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", locale);
722 0 : return -1;
723 : }
724 304 : if (!PG_VALID_BE_ENCODING(enc))
725 : {
726 0 : elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", locale);
727 0 : return -1;
728 : }
729 304 : if (enc == PG_SQL_ASCII)
730 152 : return -1; /* C/POSIX are already in the catalog */
731 :
732 : /* count valid locales found in operating system */
733 152 : (*nvalidp)++;
734 :
735 : /*
736 : * Create a collation named the same as the locale, but quietly doing
737 : * nothing if it already exists. This is the behavior we need even at
738 : * initdb time, because some versions of "locale -a" can report the same
739 : * locale name more than once. And it's convenient for later import runs,
740 : * too, since you just about always want to add on new locales without a
741 : * lot of chatter about existing ones.
742 : */
743 152 : collid = CollationCreate(locale, nspid, GetUserId(),
744 : COLLPROVIDER_LIBC, true, enc,
745 : locale, locale, NULL, NULL,
746 152 : get_collation_actual_version(COLLPROVIDER_LIBC, locale),
747 : true, true);
748 152 : if (OidIsValid(collid))
749 : {
750 152 : (*ncreatedp)++;
751 :
752 : /* Must do CCI between inserts to handle duplicates correctly */
753 152 : CommandCounterIncrement();
754 : }
755 :
756 152 : return enc;
757 : }
758 :
759 :
760 : #ifdef ENUM_SYSTEM_LOCALE
761 : /* parameter to be passed to the callback function win32_read_locale() */
762 : typedef struct
763 : {
764 : Oid nspid;
765 : int *ncreatedp;
766 : int *nvalidp;
767 : } CollParam;
768 :
769 : /*
770 : * Callback function for EnumSystemLocalesEx() in
771 : * pg_import_system_collations(). Creates a collation for every valid locale
772 : * and a POSIX alias collation.
773 : *
774 : * The callback contract is to return TRUE to continue enumerating and FALSE
775 : * to stop enumerating. We always want to continue.
776 : */
777 : static BOOL CALLBACK
778 : win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
779 : {
780 : CollParam *param = (CollParam *) lparam;
781 : char localebuf[NAMEDATALEN];
782 : int result;
783 : int enc;
784 :
785 : (void) dwFlags;
786 :
787 : result = WideCharToMultiByte(CP_ACP, 0, pStr, -1, localebuf, NAMEDATALEN,
788 : NULL, NULL);
789 :
790 : if (result == 0)
791 : {
792 : if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
793 : elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
794 : return TRUE;
795 : }
796 : if (localebuf[0] == '\0')
797 : return TRUE;
798 :
799 : enc = create_collation_from_locale(localebuf, param->nspid,
800 : param->nvalidp, param->ncreatedp);
801 : if (enc < 0)
802 : return TRUE;
803 :
804 : /*
805 : * Windows will use hyphens between language and territory, where POSIX
806 : * uses an underscore. Simply create a POSIX alias.
807 : */
808 : if (strchr(localebuf, '-'))
809 : {
810 : char alias[NAMEDATALEN];
811 : Oid collid;
812 :
813 : strcpy(alias, localebuf);
814 : for (char *p = alias; *p; p++)
815 : if (*p == '-')
816 : *p = '_';
817 :
818 : collid = CollationCreate(alias, param->nspid, GetUserId(),
819 : COLLPROVIDER_LIBC, true, enc,
820 : localebuf, localebuf, NULL, NULL,
821 : get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
822 : true, true);
823 : if (OidIsValid(collid))
824 : {
825 : (*param->ncreatedp)++;
826 :
827 : CommandCounterIncrement();
828 : }
829 : }
830 :
831 : return TRUE;
832 : }
833 : #endif /* ENUM_SYSTEM_LOCALE */
834 :
835 :
836 : /*
837 : * pg_import_system_collations: add known system collations to pg_collation
838 : */
839 : Datum
840 76 : pg_import_system_collations(PG_FUNCTION_ARGS)
841 : {
842 76 : Oid nspid = PG_GETARG_OID(0);
843 76 : int ncreated = 0;
844 :
845 76 : if (!superuser())
846 0 : ereport(ERROR,
847 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
848 : errmsg("must be superuser to import system collations")));
849 :
850 76 : if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(nspid)))
851 0 : ereport(ERROR,
852 : (errcode(ERRCODE_UNDEFINED_SCHEMA),
853 : errmsg("schema with OID %u does not exist", nspid)));
854 :
855 : /* Load collations known to libc, using "locale -a" to enumerate them */
856 : #ifdef READ_LOCALE_A_OUTPUT
857 : {
858 : FILE *locale_a_handle;
859 : char localebuf[LOCALE_NAME_BUFLEN];
860 76 : int nvalid = 0;
861 : Oid collid;
862 : CollAliasData *aliases;
863 : int naliases,
864 : maxaliases,
865 : i;
866 :
867 : /* expansible array of aliases */
868 76 : maxaliases = 100;
869 76 : aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData));
870 76 : naliases = 0;
871 :
872 76 : locale_a_handle = OpenPipeStream("locale -a", "r");
873 76 : if (locale_a_handle == NULL)
874 0 : ereport(ERROR,
875 : (errcode_for_file_access(),
876 : errmsg("could not execute command \"%s\": %m",
877 : "locale -a")));
878 :
879 380 : while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
880 : {
881 : size_t len;
882 : int enc;
883 : char alias[LOCALE_NAME_BUFLEN];
884 :
885 304 : len = strlen(localebuf);
886 :
887 304 : if (len == 0 || localebuf[len - 1] != '\n')
888 : {
889 0 : elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
890 152 : continue;
891 : }
892 304 : localebuf[len - 1] = '\0';
893 :
894 304 : enc = create_collation_from_locale(localebuf, nspid, &nvalid, &ncreated);
895 304 : if (enc < 0)
896 152 : continue;
897 :
898 : /*
899 : * Generate aliases such as "en_US" in addition to "en_US.utf8"
900 : * for ease of use. Note that collation names are unique per
901 : * encoding only, so this doesn't clash with "en_US" for LATIN1,
902 : * say.
903 : *
904 : * However, it might conflict with a name we'll see later in the
905 : * "locale -a" output. So save up the aliases and try to add them
906 : * after we've read all the output.
907 : */
908 152 : if (normalize_libc_locale_name(alias, localebuf))
909 : {
910 152 : if (naliases >= maxaliases)
911 : {
912 0 : maxaliases *= 2;
913 : aliases = (CollAliasData *)
914 0 : repalloc(aliases, maxaliases * sizeof(CollAliasData));
915 : }
916 152 : aliases[naliases].localename = pstrdup(localebuf);
917 152 : aliases[naliases].alias = pstrdup(alias);
918 152 : aliases[naliases].enc = enc;
919 152 : naliases++;
920 : }
921 : }
922 :
923 : /*
924 : * We don't check the return value of this, because we want to support
925 : * the case where there "locale" command does not exist. (This is
926 : * unusual but can happen on minimalized Linux distributions, for
927 : * example.) We will warn below if no locales could be found.
928 : */
929 76 : ClosePipeStream(locale_a_handle);
930 :
931 : /*
932 : * Before processing the aliases, sort them by locale name. The point
933 : * here is that if "locale -a" gives us multiple locale names with the
934 : * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we
935 : * want to pick a deterministic one of them. First in ASCII sort
936 : * order is a good enough rule. (Before PG 10, the code corresponding
937 : * to this logic in initdb.c had an additional ordering rule, to
938 : * prefer the locale name exactly matching the alias, if any. We
939 : * don't need to consider that here, because we would have already
940 : * created such a pg_collation entry above, and that one will win.)
941 : */
942 76 : if (naliases > 1)
943 76 : qsort(aliases, naliases, sizeof(CollAliasData), cmpaliases);
944 :
945 : /* Now add aliases, ignoring any that match pre-existing entries */
946 228 : for (i = 0; i < naliases; i++)
947 : {
948 152 : char *locale = aliases[i].localename;
949 152 : char *alias = aliases[i].alias;
950 152 : int enc = aliases[i].enc;
951 :
952 152 : collid = CollationCreate(alias, nspid, GetUserId(),
953 : COLLPROVIDER_LIBC, true, enc,
954 : locale, locale, NULL, NULL,
955 152 : get_collation_actual_version(COLLPROVIDER_LIBC, locale),
956 : true, true);
957 152 : if (OidIsValid(collid))
958 : {
959 76 : ncreated++;
960 :
961 76 : CommandCounterIncrement();
962 : }
963 : }
964 :
965 : /* Give a warning if "locale -a" seems to be malfunctioning */
966 76 : if (nvalid == 0)
967 0 : ereport(WARNING,
968 : (errmsg("no usable system locales were found")));
969 : }
970 : #endif /* READ_LOCALE_A_OUTPUT */
971 :
972 : /*
973 : * Load collations known to ICU
974 : *
975 : * We use uloc_countAvailable()/uloc_getAvailable() rather than
976 : * ucol_countAvailable()/ucol_getAvailable(). The former returns a full
977 : * set of language+region combinations, whereas the latter only returns
978 : * language+region combinations if they are distinct from the language's
979 : * base collation. So there might not be a de-DE or en-GB, which would be
980 : * confusing.
981 : */
982 : #ifdef USE_ICU
983 : {
984 : int i;
985 :
986 : /*
987 : * Start the loop at -1 to sneak in the root locale without too much
988 : * code duplication.
989 : */
990 59584 : for (i = -1; i < uloc_countAvailable(); i++)
991 : {
992 : const char *name;
993 : char *langtag;
994 : char *icucomment;
995 : Oid collid;
996 :
997 59508 : if (i == -1)
998 76 : name = ""; /* ICU root locale */
999 : else
1000 59432 : name = uloc_getAvailable(i);
1001 :
1002 59508 : langtag = icu_language_tag(name, ERROR);
1003 :
1004 : /*
1005 : * Be paranoid about not allowing any non-ASCII strings into
1006 : * pg_collation
1007 : */
1008 59508 : if (!pg_is_ascii(langtag))
1009 0 : continue;
1010 :
1011 59508 : collid = CollationCreate(psprintf("%s-x-icu", langtag),
1012 : nspid, GetUserId(),
1013 : COLLPROVIDER_ICU, true, -1,
1014 : NULL, NULL, langtag, NULL,
1015 59508 : get_collation_actual_version(COLLPROVIDER_ICU, langtag),
1016 : true, true);
1017 59508 : if (OidIsValid(collid))
1018 : {
1019 59508 : ncreated++;
1020 :
1021 59508 : CommandCounterIncrement();
1022 :
1023 59508 : icucomment = get_icu_locale_comment(name);
1024 59508 : if (icucomment)
1025 58672 : CreateComments(collid, CollationRelationId, 0,
1026 : icucomment);
1027 : }
1028 : }
1029 : }
1030 : #endif /* USE_ICU */
1031 :
1032 : /* Load collations known to WIN32 */
1033 : #ifdef ENUM_SYSTEM_LOCALE
1034 : {
1035 : int nvalid = 0;
1036 : CollParam param;
1037 :
1038 : param.nspid = nspid;
1039 : param.ncreatedp = &ncreated;
1040 : param.nvalidp = &nvalid;
1041 :
1042 : /*
1043 : * Enumerate the locales that are either installed on or supported by
1044 : * the OS.
1045 : */
1046 : if (!EnumSystemLocalesEx(win32_read_locale, LOCALE_ALL,
1047 : (LPARAM) ¶m, NULL))
1048 : _dosmaperr(GetLastError());
1049 :
1050 : /* Give a warning if EnumSystemLocalesEx seems to be malfunctioning */
1051 : if (nvalid == 0)
1052 : ereport(WARNING,
1053 : (errmsg("no usable system locales were found")));
1054 : }
1055 : #endif /* ENUM_SYSTEM_LOCALE */
1056 :
1057 76 : PG_RETURN_INT32(ncreated);
1058 : }
|