Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * collationcmds.c
4 : * collation-related commands support code
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/commands/collationcmds.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/htup_details.h"
18 : #include "access/table.h"
19 : #include "access/xact.h"
20 : #include "catalog/dependency.h"
21 : #include "catalog/indexing.h"
22 : #include "catalog/namespace.h"
23 : #include "catalog/objectaccess.h"
24 : #include "catalog/pg_collation.h"
25 : #include "catalog/pg_database.h"
26 : #include "catalog/pg_namespace.h"
27 : #include "commands/alter.h"
28 : #include "commands/collationcmds.h"
29 : #include "commands/comment.h"
30 : #include "commands/dbcommands.h"
31 : #include "commands/defrem.h"
32 : #include "common/string.h"
33 : #include "mb/pg_wchar.h"
34 : #include "miscadmin.h"
35 : #include "utils/acl.h"
36 : #include "utils/builtins.h"
37 : #include "utils/lsyscache.h"
38 : #include "utils/pg_locale.h"
39 : #include "utils/rel.h"
40 : #include "utils/syscache.h"
41 :
42 :
43 : typedef struct
44 : {
45 : char *localename; /* name of locale, as per "locale -a" */
46 : char *alias; /* shortened alias for same */
47 : int enc; /* encoding */
48 : } CollAliasData;
49 :
50 :
51 : /*
52 : * CREATE COLLATION
53 : */
54 : ObjectAddress
55 298 : DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists)
56 : {
57 : char *collName;
58 : Oid collNamespace;
59 : AclResult aclresult;
60 : ListCell *pl;
61 298 : DefElem *fromEl = NULL;
62 298 : DefElem *localeEl = NULL;
63 298 : DefElem *lccollateEl = NULL;
64 298 : DefElem *lcctypeEl = NULL;
65 298 : DefElem *providerEl = NULL;
66 298 : DefElem *deterministicEl = NULL;
67 298 : DefElem *rulesEl = NULL;
68 298 : DefElem *versionEl = NULL;
69 : char *collcollate;
70 : char *collctype;
71 : char *colliculocale;
72 : char *collicurules;
73 : bool collisdeterministic;
74 : int collencoding;
75 : char collprovider;
76 298 : char *collversion = NULL;
77 : Oid newoid;
78 : ObjectAddress address;
79 :
80 298 : collNamespace = QualifiedNameGetCreationNamespace(names, &collName);
81 :
82 298 : aclresult = object_aclcheck(NamespaceRelationId, collNamespace, GetUserId(), ACL_CREATE);
83 298 : if (aclresult != ACLCHECK_OK)
84 0 : aclcheck_error(aclresult, OBJECT_SCHEMA,
85 0 : get_namespace_name(collNamespace));
86 :
87 844 : foreach(pl, parameters)
88 : {
89 588 : DefElem *defel = lfirst_node(DefElem, pl);
90 : DefElem **defelp;
91 :
92 588 : if (strcmp(defel->defname, "from") == 0)
93 58 : defelp = &fromEl;
94 530 : else if (strcmp(defel->defname, "locale") == 0)
95 178 : defelp = &localeEl;
96 352 : else if (strcmp(defel->defname, "lc_collate") == 0)
97 56 : defelp = &lccollateEl;
98 296 : else if (strcmp(defel->defname, "lc_ctype") == 0)
99 50 : defelp = &lcctypeEl;
100 246 : else if (strcmp(defel->defname, "provider") == 0)
101 164 : defelp = &providerEl;
102 82 : else if (strcmp(defel->defname, "deterministic") == 0)
103 44 : defelp = &deterministicEl;
104 38 : else if (strcmp(defel->defname, "rules") == 0)
105 12 : defelp = &rulesEl;
106 26 : else if (strcmp(defel->defname, "version") == 0)
107 20 : defelp = &versionEl;
108 : else
109 : {
110 6 : ereport(ERROR,
111 : (errcode(ERRCODE_SYNTAX_ERROR),
112 : errmsg("collation attribute \"%s\" not recognized",
113 : defel->defname),
114 : parser_errposition(pstate, defel->location)));
115 : break;
116 : }
117 582 : if (*defelp != NULL)
118 36 : errorConflictingDefElem(defel, pstate);
119 546 : *defelp = defel;
120 : }
121 :
122 256 : if (localeEl && (lccollateEl || lcctypeEl))
123 18 : ereport(ERROR,
124 : errcode(ERRCODE_SYNTAX_ERROR),
125 : errmsg("conflicting or redundant options"),
126 : errdetail("LOCALE cannot be specified together with LC_COLLATE or LC_CTYPE."));
127 :
128 238 : if (fromEl && list_length(parameters) != 1)
129 6 : ereport(ERROR,
130 : errcode(ERRCODE_SYNTAX_ERROR),
131 : errmsg("conflicting or redundant options"),
132 : errdetail("FROM cannot be specified together with any other options."));
133 :
134 232 : if (fromEl)
135 : {
136 : Oid collid;
137 : HeapTuple tp;
138 : Datum datum;
139 : bool isnull;
140 :
141 52 : collid = get_collation_oid(defGetQualifiedName(fromEl), false);
142 46 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
143 46 : if (!HeapTupleIsValid(tp))
144 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
145 :
146 46 : collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
147 46 : collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic;
148 46 : collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding;
149 :
150 46 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull);
151 46 : if (!isnull)
152 28 : collcollate = TextDatumGetCString(datum);
153 : else
154 18 : collcollate = NULL;
155 :
156 46 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull);
157 46 : if (!isnull)
158 28 : collctype = TextDatumGetCString(datum);
159 : else
160 18 : collctype = NULL;
161 :
162 46 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colliculocale, &isnull);
163 46 : if (!isnull)
164 12 : colliculocale = TextDatumGetCString(datum);
165 : else
166 34 : colliculocale = NULL;
167 :
168 : /*
169 : * When the ICU locale comes from an existing collation, do not
170 : * canonicalize to a language tag.
171 : */
172 :
173 46 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
174 46 : if (!isnull)
175 0 : collicurules = TextDatumGetCString(datum);
176 : else
177 46 : collicurules = NULL;
178 :
179 46 : ReleaseSysCache(tp);
180 :
181 : /*
182 : * Copying the "default" collation is not allowed because most code
183 : * checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT,
184 : * and so having a second collation with COLLPROVIDER_DEFAULT would
185 : * not work and potentially confuse or crash some code. This could be
186 : * fixed with some legwork.
187 : */
188 46 : if (collprovider == COLLPROVIDER_DEFAULT)
189 6 : ereport(ERROR,
190 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
191 : errmsg("collation \"default\" cannot be copied")));
192 : }
193 : else
194 : {
195 180 : char *collproviderstr = NULL;
196 :
197 180 : collcollate = NULL;
198 180 : collctype = NULL;
199 180 : colliculocale = NULL;
200 180 : collicurules = NULL;
201 :
202 180 : if (providerEl)
203 152 : collproviderstr = defGetString(providerEl);
204 :
205 180 : if (deterministicEl)
206 32 : collisdeterministic = defGetBoolean(deterministicEl);
207 : else
208 148 : collisdeterministic = true;
209 :
210 180 : if (rulesEl)
211 12 : collicurules = defGetString(rulesEl);
212 :
213 180 : if (versionEl)
214 2 : collversion = defGetString(versionEl);
215 :
216 180 : if (collproviderstr)
217 : {
218 152 : if (pg_strcasecmp(collproviderstr, "icu") == 0)
219 152 : collprovider = COLLPROVIDER_ICU;
220 0 : else if (pg_strcasecmp(collproviderstr, "libc") == 0)
221 0 : collprovider = COLLPROVIDER_LIBC;
222 : else
223 0 : ereport(ERROR,
224 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
225 : errmsg("unrecognized collation provider: %s",
226 : collproviderstr)));
227 : }
228 : else
229 28 : collprovider = COLLPROVIDER_LIBC;
230 :
231 180 : if (localeEl)
232 : {
233 148 : if (collprovider == COLLPROVIDER_LIBC)
234 : {
235 2 : collcollate = defGetString(localeEl);
236 2 : collctype = defGetString(localeEl);
237 : }
238 : else
239 146 : colliculocale = defGetString(localeEl);
240 : }
241 :
242 180 : if (lccollateEl)
243 32 : collcollate = defGetString(lccollateEl);
244 :
245 180 : if (lcctypeEl)
246 26 : collctype = defGetString(lcctypeEl);
247 :
248 180 : if (collprovider == COLLPROVIDER_LIBC)
249 : {
250 28 : if (!collcollate)
251 0 : ereport(ERROR,
252 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
253 : errmsg("parameter \"lc_collate\" must be specified")));
254 :
255 28 : if (!collctype)
256 0 : ereport(ERROR,
257 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
258 : errmsg("parameter \"lc_ctype\" must be specified")));
259 : }
260 152 : else if (collprovider == COLLPROVIDER_ICU)
261 : {
262 152 : if (!colliculocale)
263 6 : ereport(ERROR,
264 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
265 : errmsg("parameter \"locale\" must be specified")));
266 :
267 : /*
268 : * During binary upgrade, preserve the locale string. Otherwise,
269 : * canonicalize to a language tag.
270 : */
271 146 : if (!IsBinaryUpgrade)
272 : {
273 144 : char *langtag = icu_language_tag(colliculocale,
274 : icu_validation_level);
275 :
276 138 : if (langtag && strcmp(colliculocale, langtag) != 0)
277 : {
278 104 : ereport(NOTICE,
279 : (errmsg("using standard form \"%s\" for locale \"%s\"",
280 : langtag, colliculocale)));
281 :
282 104 : colliculocale = langtag;
283 : }
284 : }
285 :
286 140 : icu_validate_locale(colliculocale);
287 : }
288 :
289 : /*
290 : * Nondeterministic collations are currently only supported with ICU
291 : * because that's the only case where it can actually make a
292 : * difference. So we can save writing the code for the other
293 : * providers.
294 : */
295 156 : if (!collisdeterministic && collprovider != COLLPROVIDER_ICU)
296 0 : ereport(ERROR,
297 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
298 : errmsg("nondeterministic collations not supported with this provider")));
299 :
300 156 : if (collicurules && collprovider != COLLPROVIDER_ICU)
301 0 : ereport(ERROR,
302 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
303 : errmsg("ICU rules cannot be specified unless locale provider is ICU")));
304 :
305 156 : if (collprovider == COLLPROVIDER_ICU)
306 : {
307 : #ifdef USE_ICU
308 : /*
309 : * We could create ICU collations with collencoding == database
310 : * encoding, but it seems better to use -1 so that it matches the
311 : * way initdb would create ICU collations. However, only allow
312 : * one to be created when the current database's encoding is
313 : * supported. Otherwise the collation is useless, plus we get
314 : * surprising behaviors like not being able to drop the collation.
315 : *
316 : * Skip this test when !USE_ICU, because the error we want to
317 : * throw for that isn't thrown till later.
318 : */
319 128 : if (!is_encoding_supported_by_icu(GetDatabaseEncoding()))
320 0 : ereport(ERROR,
321 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
322 : errmsg("current database's encoding is not supported with this provider")));
323 : #endif
324 128 : collencoding = -1;
325 : }
326 : else
327 : {
328 28 : collencoding = GetDatabaseEncoding();
329 28 : check_encoding_locale_matches(collencoding, collcollate, collctype);
330 : }
331 : }
332 :
333 196 : if (!collversion)
334 194 : collversion = get_collation_actual_version(collprovider, collprovider == COLLPROVIDER_ICU ? colliculocale : collcollate);
335 :
336 196 : newoid = CollationCreate(collName,
337 : collNamespace,
338 : GetUserId(),
339 : collprovider,
340 : collisdeterministic,
341 : collencoding,
342 : collcollate,
343 : collctype,
344 : colliculocale,
345 : collicurules,
346 : collversion,
347 : if_not_exists,
348 : false); /* not quiet */
349 :
350 188 : if (!OidIsValid(newoid))
351 2 : return InvalidObjectAddress;
352 :
353 : /*
354 : * Check that the locales can be loaded. NB: pg_newlocale_from_collation
355 : * is only supposed to be called on non-C-equivalent locales.
356 : */
357 186 : CommandCounterIncrement();
358 186 : if (!lc_collate_is_c(newoid) || !lc_ctype_is_c(newoid))
359 140 : (void) pg_newlocale_from_collation(newoid);
360 :
361 180 : ObjectAddressSet(address, CollationRelationId, newoid);
362 :
363 180 : return address;
364 : }
365 :
366 : /*
367 : * Subroutine for ALTER COLLATION SET SCHEMA and RENAME
368 : *
369 : * Is there a collation with the same name of the given collation already in
370 : * the given namespace? If so, raise an appropriate error message.
371 : */
372 : void
373 18 : IsThereCollationInNamespace(const char *collname, Oid nspOid)
374 : {
375 : /* make sure the name doesn't already exist in new schema */
376 18 : if (SearchSysCacheExists3(COLLNAMEENCNSP,
377 : CStringGetDatum(collname),
378 : Int32GetDatum(GetDatabaseEncoding()),
379 : ObjectIdGetDatum(nspOid)))
380 0 : ereport(ERROR,
381 : (errcode(ERRCODE_DUPLICATE_OBJECT),
382 : errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"",
383 : collname, GetDatabaseEncodingName(),
384 : get_namespace_name(nspOid))));
385 :
386 : /* mustn't match an any-encoding entry, either */
387 18 : if (SearchSysCacheExists3(COLLNAMEENCNSP,
388 : CStringGetDatum(collname),
389 : Int32GetDatum(-1),
390 : ObjectIdGetDatum(nspOid)))
391 6 : ereport(ERROR,
392 : (errcode(ERRCODE_DUPLICATE_OBJECT),
393 : errmsg("collation \"%s\" already exists in schema \"%s\"",
394 : collname, get_namespace_name(nspOid))));
395 12 : }
396 :
397 : /*
398 : * ALTER COLLATION
399 : */
400 : ObjectAddress
401 6 : AlterCollation(AlterCollationStmt *stmt)
402 : {
403 : Relation rel;
404 : Oid collOid;
405 : HeapTuple tup;
406 : Form_pg_collation collForm;
407 : Datum datum;
408 : bool isnull;
409 : char *oldversion;
410 : char *newversion;
411 : ObjectAddress address;
412 :
413 6 : rel = table_open(CollationRelationId, RowExclusiveLock);
414 6 : collOid = get_collation_oid(stmt->collname, false);
415 :
416 6 : if (collOid == DEFAULT_COLLATION_OID)
417 0 : ereport(ERROR,
418 : (errmsg("cannot refresh version of default collation"),
419 : errhint("Use ALTER DATABASE ... REFRESH COLLATION VERSION instead.")));
420 :
421 6 : if (!object_ownercheck(CollationRelationId, collOid, GetUserId()))
422 0 : aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_COLLATION,
423 0 : NameListToString(stmt->collname));
424 :
425 6 : tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid));
426 6 : if (!HeapTupleIsValid(tup))
427 0 : elog(ERROR, "cache lookup failed for collation %u", collOid);
428 :
429 6 : collForm = (Form_pg_collation) GETSTRUCT(tup);
430 6 : datum = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, &isnull);
431 6 : oldversion = isnull ? NULL : TextDatumGetCString(datum);
432 :
433 6 : datum = SysCacheGetAttrNotNull(COLLOID, tup, collForm->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate);
434 6 : newversion = get_collation_actual_version(collForm->collprovider, TextDatumGetCString(datum));
435 :
436 : /* cannot change from NULL to non-NULL or vice versa */
437 6 : if ((!oldversion && newversion) || (oldversion && !newversion))
438 0 : elog(ERROR, "invalid collation version change");
439 6 : else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
440 0 : {
441 : bool nulls[Natts_pg_collation];
442 : bool replaces[Natts_pg_collation];
443 : Datum values[Natts_pg_collation];
444 :
445 0 : ereport(NOTICE,
446 : (errmsg("changing version from %s to %s",
447 : oldversion, newversion)));
448 :
449 0 : memset(values, 0, sizeof(values));
450 0 : memset(nulls, false, sizeof(nulls));
451 0 : memset(replaces, false, sizeof(replaces));
452 :
453 0 : values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion);
454 0 : replaces[Anum_pg_collation_collversion - 1] = true;
455 :
456 0 : tup = heap_modify_tuple(tup, RelationGetDescr(rel),
457 : values, nulls, replaces);
458 : }
459 : else
460 6 : ereport(NOTICE,
461 : (errmsg("version has not changed")));
462 :
463 6 : CatalogTupleUpdate(rel, &tup->t_self, tup);
464 :
465 6 : InvokeObjectPostAlterHook(CollationRelationId, collOid, 0);
466 :
467 6 : ObjectAddressSet(address, CollationRelationId, collOid);
468 :
469 6 : heap_freetuple(tup);
470 6 : table_close(rel, NoLock);
471 :
472 6 : return address;
473 : }
474 :
475 :
476 : Datum
477 602 : pg_collation_actual_version(PG_FUNCTION_ARGS)
478 : {
479 602 : Oid collid = PG_GETARG_OID(0);
480 : char provider;
481 : char *locale;
482 : char *version;
483 : Datum datum;
484 :
485 602 : if (collid == DEFAULT_COLLATION_OID)
486 : {
487 : /* retrieve from pg_database */
488 :
489 0 : HeapTuple dbtup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
490 :
491 0 : if (!HeapTupleIsValid(dbtup))
492 0 : ereport(ERROR,
493 : (errcode(ERRCODE_UNDEFINED_OBJECT),
494 : errmsg("database with OID %u does not exist", MyDatabaseId)));
495 :
496 0 : provider = ((Form_pg_database) GETSTRUCT(dbtup))->datlocprovider;
497 :
498 0 : datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup,
499 : provider == COLLPROVIDER_ICU ?
500 : Anum_pg_database_daticulocale : Anum_pg_database_datcollate);
501 :
502 0 : locale = TextDatumGetCString(datum);
503 :
504 0 : ReleaseSysCache(dbtup);
505 : }
506 : else
507 : {
508 : /* retrieve from pg_collation */
509 :
510 602 : HeapTuple colltp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
511 :
512 602 : if (!HeapTupleIsValid(colltp))
513 0 : ereport(ERROR,
514 : (errcode(ERRCODE_UNDEFINED_OBJECT),
515 : errmsg("collation with OID %u does not exist", collid)));
516 :
517 602 : provider = ((Form_pg_collation) GETSTRUCT(colltp))->collprovider;
518 : Assert(provider != COLLPROVIDER_DEFAULT);
519 602 : datum = SysCacheGetAttrNotNull(COLLOID, colltp,
520 : provider == COLLPROVIDER_ICU ?
521 : Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate);
522 :
523 602 : locale = TextDatumGetCString(datum);
524 :
525 602 : ReleaseSysCache(colltp);
526 : }
527 :
528 602 : version = get_collation_actual_version(provider, locale);
529 602 : if (version)
530 602 : PG_RETURN_TEXT_P(cstring_to_text(version));
531 : else
532 0 : PG_RETURN_NULL();
533 : }
534 :
535 :
536 : /* will we use "locale -a" in pg_import_system_collations? */
537 : #if defined(HAVE_LOCALE_T) && !defined(WIN32)
538 : #define READ_LOCALE_A_OUTPUT
539 : #endif
540 :
541 : /* will we use EnumSystemLocalesEx in pg_import_system_collations? */
542 : #ifdef WIN32
543 : #define ENUM_SYSTEM_LOCALE
544 : #endif
545 :
546 :
547 : #ifdef READ_LOCALE_A_OUTPUT
548 : /*
549 : * "Normalize" a libc locale name, stripping off encoding tags such as
550 : * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
551 : * -> "br_FR@euro"). Return true if a new, different name was
552 : * generated.
553 : */
554 : static bool
555 1204 : normalize_libc_locale_name(char *new, const char *old)
556 : {
557 1204 : char *n = new;
558 1204 : const char *o = old;
559 1204 : bool changed = false;
560 :
561 6020 : while (*o)
562 : {
563 4816 : if (*o == '.')
564 : {
565 : /* skip over encoding tag such as ".utf8" or ".UTF-8" */
566 1204 : o++;
567 6622 : while ((*o >= 'A' && *o <= 'Z')
568 4816 : || (*o >= 'a' && *o <= 'z')
569 3010 : || (*o >= '0' && *o <= '9')
570 1806 : || (*o == '-'))
571 5418 : o++;
572 1204 : changed = true;
573 : }
574 : else
575 3612 : *n++ = *o++;
576 : }
577 1204 : *n = '\0';
578 :
579 1204 : return changed;
580 : }
581 :
582 : /*
583 : * qsort comparator for CollAliasData items
584 : */
585 : static int
586 602 : cmpaliases(const void *a, const void *b)
587 : {
588 602 : const CollAliasData *ca = (const CollAliasData *) a;
589 602 : const CollAliasData *cb = (const CollAliasData *) b;
590 :
591 : /* comparing localename is enough because other fields are derived */
592 602 : return strcmp(ca->localename, cb->localename);
593 : }
594 : #endif /* READ_LOCALE_A_OUTPUT */
595 :
596 :
597 : #ifdef USE_ICU
598 : /*
599 : * Get a comment (specifically, the display name) for an ICU locale.
600 : * The result is a palloc'd string, or NULL if we can't get a comment
601 : * or find that it's not all ASCII. (We can *not* accept non-ASCII
602 : * comments, because the contents of template0 must be encoding-agnostic.)
603 : */
604 : static char *
605 471366 : get_icu_locale_comment(const char *localename)
606 : {
607 : UErrorCode status;
608 : UChar displayname[128];
609 : int32 len_uchar;
610 : int32 i;
611 : char *result;
612 :
613 471366 : status = U_ZERO_ERROR;
614 471366 : len_uchar = uloc_getDisplayName(localename, "en",
615 : displayname, lengthof(displayname),
616 : &status);
617 471366 : if (U_FAILURE(status))
618 0 : return NULL; /* no good reason to raise an error */
619 :
620 : /* Check for non-ASCII comment (can't use pg_is_ascii for this) */
621 8075830 : for (i = 0; i < len_uchar; i++)
622 : {
623 7611086 : if (displayname[i] > 127)
624 6622 : return NULL;
625 : }
626 :
627 : /* OK, transcribe */
628 464744 : result = palloc(len_uchar + 1);
629 7998172 : for (i = 0; i < len_uchar; i++)
630 7533428 : result[i] = displayname[i];
631 464744 : result[len_uchar] = '\0';
632 :
633 464744 : return result;
634 : }
635 : #endif /* USE_ICU */
636 :
637 :
638 : /*
639 : * Create a new collation using the input locale 'locale'. (subroutine for
640 : * pg_import_system_collations())
641 : *
642 : * 'nspid' is the namespace id where the collation will be created.
643 : *
644 : * 'nvalidp' is incremented if the locale has a valid encoding.
645 : *
646 : * 'ncreatedp' is incremented if the collation is actually created. If the
647 : * collation already exists it will quietly do nothing.
648 : *
649 : * The returned value is the encoding of the locale, -1 if the locale is not
650 : * valid for creating a collation.
651 : *
652 : */
653 : pg_attribute_unused()
654 : static int
655 2408 : create_collation_from_locale(const char *locale, int nspid,
656 : int *nvalidp, int *ncreatedp)
657 : {
658 : int enc;
659 : Oid collid;
660 :
661 : /*
662 : * Some systems have locale names that don't consist entirely of ASCII
663 : * letters (such as "bokmål" or "français"). This is pretty
664 : * silly, since we need the locale itself to interpret the non-ASCII
665 : * characters. We can't do much with those, so we filter them out.
666 : */
667 2408 : if (!pg_is_ascii(locale))
668 : {
669 0 : elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", locale);
670 0 : return -1;
671 : }
672 :
673 2408 : enc = pg_get_encoding_from_locale(locale, false);
674 2408 : if (enc < 0)
675 : {
676 0 : elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", locale);
677 0 : return -1;
678 : }
679 2408 : if (!PG_VALID_BE_ENCODING(enc))
680 : {
681 0 : elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", locale);
682 0 : return -1;
683 : }
684 2408 : if (enc == PG_SQL_ASCII)
685 1204 : return -1; /* C/POSIX are already in the catalog */
686 :
687 : /* count valid locales found in operating system */
688 1204 : (*nvalidp)++;
689 :
690 : /*
691 : * Create a collation named the same as the locale, but quietly doing
692 : * nothing if it already exists. This is the behavior we need even at
693 : * initdb time, because some versions of "locale -a" can report the same
694 : * locale name more than once. And it's convenient for later import runs,
695 : * too, since you just about always want to add on new locales without a
696 : * lot of chatter about existing ones.
697 : */
698 1204 : collid = CollationCreate(locale, nspid, GetUserId(),
699 : COLLPROVIDER_LIBC, true, enc,
700 : locale, locale, NULL, NULL,
701 1204 : get_collation_actual_version(COLLPROVIDER_LIBC, locale),
702 : true, true);
703 1204 : if (OidIsValid(collid))
704 : {
705 1204 : (*ncreatedp)++;
706 :
707 : /* Must do CCI between inserts to handle duplicates correctly */
708 1204 : CommandCounterIncrement();
709 : }
710 :
711 1204 : return enc;
712 : }
713 :
714 :
715 : #ifdef ENUM_SYSTEM_LOCALE
716 : /* parameter to be passed to the callback function win32_read_locale() */
717 : typedef struct
718 : {
719 : Oid nspid;
720 : int *ncreatedp;
721 : int *nvalidp;
722 : } CollParam;
723 :
724 : /*
725 : * Callback function for EnumSystemLocalesEx() in
726 : * pg_import_system_collations(). Creates a collation for every valid locale
727 : * and a POSIX alias collation.
728 : *
729 : * The callback contract is to return TRUE to continue enumerating and FALSE
730 : * to stop enumerating. We always want to continue.
731 : */
732 : static BOOL CALLBACK
733 : win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
734 : {
735 : CollParam *param = (CollParam *) lparam;
736 : char localebuf[NAMEDATALEN];
737 : int result;
738 : int enc;
739 :
740 : (void) dwFlags;
741 :
742 : result = WideCharToMultiByte(CP_ACP, 0, pStr, -1, localebuf, NAMEDATALEN,
743 : NULL, NULL);
744 :
745 : if (result == 0)
746 : {
747 : if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
748 : elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
749 : return TRUE;
750 : }
751 : if (localebuf[0] == '\0')
752 : return TRUE;
753 :
754 : enc = create_collation_from_locale(localebuf, param->nspid,
755 : param->nvalidp, param->ncreatedp);
756 : if (enc < 0)
757 : return TRUE;
758 :
759 : /*
760 : * Windows will use hyphens between language and territory, where POSIX
761 : * uses an underscore. Simply create a POSIX alias.
762 : */
763 : if (strchr(localebuf, '-'))
764 : {
765 : char alias[NAMEDATALEN];
766 : Oid collid;
767 :
768 : strcpy(alias, localebuf);
769 : for (char *p = alias; *p; p++)
770 : if (*p == '-')
771 : *p = '_';
772 :
773 : collid = CollationCreate(alias, param->nspid, GetUserId(),
774 : COLLPROVIDER_LIBC, true, enc,
775 : localebuf, localebuf, NULL, NULL,
776 : get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
777 : true, true);
778 : if (OidIsValid(collid))
779 : {
780 : (*param->ncreatedp)++;
781 :
782 : CommandCounterIncrement();
783 : }
784 : }
785 :
786 : return TRUE;
787 : }
788 : #endif /* ENUM_SYSTEM_LOCALE */
789 :
790 :
791 : /*
792 : * pg_import_system_collations: add known system collations to pg_collation
793 : */
794 : Datum
795 602 : pg_import_system_collations(PG_FUNCTION_ARGS)
796 : {
797 602 : Oid nspid = PG_GETARG_OID(0);
798 602 : int ncreated = 0;
799 :
800 602 : if (!superuser())
801 0 : ereport(ERROR,
802 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
803 : errmsg("must be superuser to import system collations")));
804 :
805 602 : if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(nspid)))
806 0 : ereport(ERROR,
807 : (errcode(ERRCODE_UNDEFINED_SCHEMA),
808 : errmsg("schema with OID %u does not exist", nspid)));
809 :
810 : /* Load collations known to libc, using "locale -a" to enumerate them */
811 : #ifdef READ_LOCALE_A_OUTPUT
812 : {
813 : FILE *locale_a_handle;
814 : char localebuf[LOCALE_NAME_BUFLEN];
815 602 : int nvalid = 0;
816 : Oid collid;
817 : CollAliasData *aliases;
818 : int naliases,
819 : maxaliases,
820 : i;
821 :
822 : /* expansible array of aliases */
823 602 : maxaliases = 100;
824 602 : aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData));
825 602 : naliases = 0;
826 :
827 602 : locale_a_handle = OpenPipeStream("locale -a", "r");
828 602 : if (locale_a_handle == NULL)
829 0 : ereport(ERROR,
830 : (errcode_for_file_access(),
831 : errmsg("could not execute command \"%s\": %m",
832 : "locale -a")));
833 :
834 3010 : while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
835 : {
836 : size_t len;
837 : int enc;
838 : char alias[LOCALE_NAME_BUFLEN];
839 :
840 2408 : len = strlen(localebuf);
841 :
842 2408 : if (len == 0 || localebuf[len - 1] != '\n')
843 : {
844 0 : elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
845 1204 : continue;
846 : }
847 2408 : localebuf[len - 1] = '\0';
848 :
849 2408 : enc = create_collation_from_locale(localebuf, nspid, &nvalid, &ncreated);
850 2408 : if (enc < 0)
851 1204 : continue;
852 :
853 : /*
854 : * Generate aliases such as "en_US" in addition to "en_US.utf8"
855 : * for ease of use. Note that collation names are unique per
856 : * encoding only, so this doesn't clash with "en_US" for LATIN1,
857 : * say.
858 : *
859 : * However, it might conflict with a name we'll see later in the
860 : * "locale -a" output. So save up the aliases and try to add them
861 : * after we've read all the output.
862 : */
863 1204 : if (normalize_libc_locale_name(alias, localebuf))
864 : {
865 1204 : if (naliases >= maxaliases)
866 : {
867 0 : maxaliases *= 2;
868 : aliases = (CollAliasData *)
869 0 : repalloc(aliases, maxaliases * sizeof(CollAliasData));
870 : }
871 1204 : aliases[naliases].localename = pstrdup(localebuf);
872 1204 : aliases[naliases].alias = pstrdup(alias);
873 1204 : aliases[naliases].enc = enc;
874 1204 : naliases++;
875 : }
876 : }
877 :
878 : /*
879 : * We don't check the return value of this, because we want to support
880 : * the case where there "locale" command does not exist. (This is
881 : * unusual but can happen on minimalized Linux distributions, for
882 : * example.) We will warn below if no locales could be found.
883 : */
884 602 : ClosePipeStream(locale_a_handle);
885 :
886 : /*
887 : * Before processing the aliases, sort them by locale name. The point
888 : * here is that if "locale -a" gives us multiple locale names with the
889 : * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we
890 : * want to pick a deterministic one of them. First in ASCII sort
891 : * order is a good enough rule. (Before PG 10, the code corresponding
892 : * to this logic in initdb.c had an additional ordering rule, to
893 : * prefer the locale name exactly matching the alias, if any. We
894 : * don't need to consider that here, because we would have already
895 : * created such a pg_collation entry above, and that one will win.)
896 : */
897 602 : if (naliases > 1)
898 602 : qsort(aliases, naliases, sizeof(CollAliasData), cmpaliases);
899 :
900 : /* Now add aliases, ignoring any that match pre-existing entries */
901 1806 : for (i = 0; i < naliases; i++)
902 : {
903 1204 : char *locale = aliases[i].localename;
904 1204 : char *alias = aliases[i].alias;
905 1204 : int enc = aliases[i].enc;
906 :
907 1204 : collid = CollationCreate(alias, nspid, GetUserId(),
908 : COLLPROVIDER_LIBC, true, enc,
909 : locale, locale, NULL, NULL,
910 1204 : get_collation_actual_version(COLLPROVIDER_LIBC, locale),
911 : true, true);
912 1204 : if (OidIsValid(collid))
913 : {
914 602 : ncreated++;
915 :
916 602 : CommandCounterIncrement();
917 : }
918 : }
919 :
920 : /* Give a warning if "locale -a" seems to be malfunctioning */
921 602 : if (nvalid == 0)
922 0 : ereport(WARNING,
923 : (errmsg("no usable system locales were found")));
924 : }
925 : #endif /* READ_LOCALE_A_OUTPUT */
926 :
927 : /*
928 : * Load collations known to ICU
929 : *
930 : * We use uloc_countAvailable()/uloc_getAvailable() rather than
931 : * ucol_countAvailable()/ucol_getAvailable(). The former returns a full
932 : * set of language+region combinations, whereas the latter only returns
933 : * language+region combinations if they are distinct from the language's
934 : * base collation. So there might not be a de-DE or en-GB, which would be
935 : * confusing.
936 : */
937 : #ifdef USE_ICU
938 : {
939 : int i;
940 :
941 : /*
942 : * Start the loop at -1 to sneak in the root locale without too much
943 : * code duplication.
944 : */
945 471968 : for (i = -1; i < uloc_countAvailable(); i++)
946 : {
947 : const char *name;
948 : char *langtag;
949 : char *icucomment;
950 : Oid collid;
951 :
952 471366 : if (i == -1)
953 602 : name = ""; /* ICU root locale */
954 : else
955 470764 : name = uloc_getAvailable(i);
956 :
957 471366 : langtag = icu_language_tag(name, ERROR);
958 :
959 : /*
960 : * Be paranoid about not allowing any non-ASCII strings into
961 : * pg_collation
962 : */
963 471366 : if (!pg_is_ascii(langtag))
964 0 : continue;
965 :
966 471366 : collid = CollationCreate(psprintf("%s-x-icu", langtag),
967 : nspid, GetUserId(),
968 : COLLPROVIDER_ICU, true, -1,
969 : NULL, NULL, langtag, NULL,
970 471366 : get_collation_actual_version(COLLPROVIDER_ICU, langtag),
971 : true, true);
972 471366 : if (OidIsValid(collid))
973 : {
974 471366 : ncreated++;
975 :
976 471366 : CommandCounterIncrement();
977 :
978 471366 : icucomment = get_icu_locale_comment(name);
979 471366 : if (icucomment)
980 464744 : CreateComments(collid, CollationRelationId, 0,
981 : icucomment);
982 : }
983 : }
984 : }
985 : #endif /* USE_ICU */
986 :
987 : /* Load collations known to WIN32 */
988 : #ifdef ENUM_SYSTEM_LOCALE
989 : {
990 : int nvalid = 0;
991 : CollParam param;
992 :
993 : param.nspid = nspid;
994 : param.ncreatedp = &ncreated;
995 : param.nvalidp = &nvalid;
996 :
997 : /*
998 : * Enumerate the locales that are either installed on or supported by
999 : * the OS.
1000 : */
1001 : if (!EnumSystemLocalesEx(win32_read_locale, LOCALE_ALL,
1002 : (LPARAM) ¶m, NULL))
1003 : _dosmaperr(GetLastError());
1004 :
1005 : /* Give a warning if EnumSystemLocalesEx seems to be malfunctioning */
1006 : if (nvalid == 0)
1007 : ereport(WARNING,
1008 : (errmsg("no usable system locales were found")));
1009 : }
1010 : #endif /* ENUM_SYSTEM_LOCALE */
1011 :
1012 602 : PG_RETURN_INT32(ncreated);
1013 : }
|