Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * ISO 8859 2-16 <--> UTF8
4 : *
5 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
6 : * Portions Copyright (c) 1994, Regents of the University of California
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 :
14 : #include "postgres.h"
15 : #include "fmgr.h"
16 : #include "mb/pg_wchar.h"
17 : #include "../../Unicode/iso8859_10_to_utf8.map"
18 : #include "../../Unicode/iso8859_13_to_utf8.map"
19 : #include "../../Unicode/iso8859_14_to_utf8.map"
20 : #include "../../Unicode/iso8859_15_to_utf8.map"
21 : #include "../../Unicode/iso8859_2_to_utf8.map"
22 : #include "../../Unicode/iso8859_3_to_utf8.map"
23 : #include "../../Unicode/iso8859_4_to_utf8.map"
24 : #include "../../Unicode/iso8859_5_to_utf8.map"
25 : #include "../../Unicode/iso8859_6_to_utf8.map"
26 : #include "../../Unicode/iso8859_7_to_utf8.map"
27 : #include "../../Unicode/iso8859_8_to_utf8.map"
28 : #include "../../Unicode/iso8859_9_to_utf8.map"
29 : #include "../../Unicode/utf8_to_iso8859_10.map"
30 : #include "../../Unicode/utf8_to_iso8859_13.map"
31 : #include "../../Unicode/utf8_to_iso8859_14.map"
32 : #include "../../Unicode/utf8_to_iso8859_15.map"
33 : #include "../../Unicode/utf8_to_iso8859_16.map"
34 : #include "../../Unicode/utf8_to_iso8859_2.map"
35 : #include "../../Unicode/utf8_to_iso8859_3.map"
36 : #include "../../Unicode/utf8_to_iso8859_4.map"
37 : #include "../../Unicode/utf8_to_iso8859_5.map"
38 : #include "../../Unicode/utf8_to_iso8859_6.map"
39 : #include "../../Unicode/utf8_to_iso8859_7.map"
40 : #include "../../Unicode/utf8_to_iso8859_8.map"
41 : #include "../../Unicode/utf8_to_iso8859_9.map"
42 : #include "../../Unicode/iso8859_16_to_utf8.map"
43 :
44 12 : PG_MODULE_MAGIC;
45 :
46 12 : PG_FUNCTION_INFO_V1(iso8859_to_utf8);
47 12 : PG_FUNCTION_INFO_V1(utf8_to_iso8859);
48 :
49 : /* ----------
50 : * conv_proc(
51 : * INTEGER, -- source encoding id
52 : * INTEGER, -- destination encoding id
53 : * CSTRING, -- source string (null terminated C string)
54 : * CSTRING, -- destination string (null terminated C string)
55 : * INTEGER, -- source string length
56 : * BOOL -- if true, don't throw an error if conversion fails
57 : * ) returns INTEGER;
58 : *
59 : * Returns the number of bytes successfully converted.
60 : * ----------
61 : */
62 :
63 : typedef struct
64 : {
65 : pg_enc encoding;
66 : const pg_mb_radix_tree *map1; /* to UTF8 map name */
67 : const pg_mb_radix_tree *map2; /* from UTF8 map name */
68 : } pg_conv_map;
69 :
70 : static const pg_conv_map maps[] = {
71 : {PG_LATIN2, &iso8859_2_to_unicode_tree,
72 : &iso8859_2_from_unicode_tree}, /* ISO-8859-2 Latin 2 */
73 : {PG_LATIN3, &iso8859_3_to_unicode_tree,
74 : &iso8859_3_from_unicode_tree}, /* ISO-8859-3 Latin 3 */
75 : {PG_LATIN4, &iso8859_4_to_unicode_tree,
76 : &iso8859_4_from_unicode_tree}, /* ISO-8859-4 Latin 4 */
77 : {PG_LATIN5, &iso8859_9_to_unicode_tree,
78 : &iso8859_9_from_unicode_tree}, /* ISO-8859-9 Latin 5 */
79 : {PG_LATIN6, &iso8859_10_to_unicode_tree,
80 : &iso8859_10_from_unicode_tree}, /* ISO-8859-10 Latin 6 */
81 : {PG_LATIN7, &iso8859_13_to_unicode_tree,
82 : &iso8859_13_from_unicode_tree}, /* ISO-8859-13 Latin 7 */
83 : {PG_LATIN8, &iso8859_14_to_unicode_tree,
84 : &iso8859_14_from_unicode_tree}, /* ISO-8859-14 Latin 8 */
85 : {PG_LATIN9, &iso8859_15_to_unicode_tree,
86 : &iso8859_15_from_unicode_tree}, /* ISO-8859-15 Latin 9 */
87 : {PG_LATIN10, &iso8859_16_to_unicode_tree,
88 : &iso8859_16_from_unicode_tree}, /* ISO-8859-16 Latin 10 */
89 : {PG_ISO_8859_5, &iso8859_5_to_unicode_tree,
90 : &iso8859_5_from_unicode_tree}, /* ISO-8859-5 */
91 : {PG_ISO_8859_6, &iso8859_6_to_unicode_tree,
92 : &iso8859_6_from_unicode_tree}, /* ISO-8859-6 */
93 : {PG_ISO_8859_7, &iso8859_7_to_unicode_tree,
94 : &iso8859_7_from_unicode_tree}, /* ISO-8859-7 */
95 : {PG_ISO_8859_8, &iso8859_8_to_unicode_tree,
96 : &iso8859_8_from_unicode_tree}, /* ISO-8859-8 */
97 : };
98 :
99 : Datum
100 222 : iso8859_to_utf8(PG_FUNCTION_ARGS)
101 : {
102 222 : int encoding = PG_GETARG_INT32(0);
103 222 : unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
104 222 : unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
105 222 : int len = PG_GETARG_INT32(4);
106 222 : bool noError = PG_GETARG_BOOL(5);
107 : int i;
108 :
109 222 : CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8);
110 :
111 1986 : for (i = 0; i < lengthof(maps); i++)
112 : {
113 1986 : if (encoding == maps[i].encoding)
114 : {
115 : int converted;
116 :
117 222 : converted = LocalToUtf(src, len, dest,
118 : maps[i].map1,
119 : NULL, 0,
120 : NULL,
121 : encoding,
122 : noError);
123 168 : PG_RETURN_INT32(converted);
124 : }
125 : }
126 :
127 0 : ereport(ERROR,
128 : (errcode(ERRCODE_INTERNAL_ERROR),
129 : errmsg("unexpected encoding ID %d for ISO 8859 character sets",
130 : encoding)));
131 :
132 : PG_RETURN_INT32(0);
133 : }
134 :
135 : Datum
136 942 : utf8_to_iso8859(PG_FUNCTION_ARGS)
137 : {
138 942 : int encoding = PG_GETARG_INT32(1);
139 942 : unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
140 942 : unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
141 942 : int len = PG_GETARG_INT32(4);
142 942 : bool noError = PG_GETARG_BOOL(5);
143 : int i;
144 :
145 942 : CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1);
146 :
147 2706 : for (i = 0; i < lengthof(maps); i++)
148 : {
149 2706 : if (encoding == maps[i].encoding)
150 : {
151 : int converted;
152 :
153 942 : converted = UtfToLocal(src, len, dest,
154 : maps[i].map2,
155 : NULL, 0,
156 : NULL,
157 : encoding,
158 : noError);
159 546 : PG_RETURN_INT32(converted);
160 : }
161 : }
162 :
163 0 : ereport(ERROR,
164 : (errcode(ERRCODE_INTERNAL_ERROR),
165 : errmsg("unexpected encoding ID %d for ISO 8859 character sets",
166 : encoding)));
167 :
168 : PG_RETURN_INT32(0);
169 : }
|