Line data Source code
1 : /*-----------------------------------------------------------------------
2 : * ascii.c
3 : * The PostgreSQL routine for string to ascii conversion.
4 : *
5 : * Portions Copyright (c) 1999-2025, PostgreSQL Global Development Group
6 : *
7 : * IDENTIFICATION
8 : * src/backend/utils/adt/ascii.c
9 : *
10 : *-----------------------------------------------------------------------
11 : */
12 : #include "postgres.h"
13 :
14 : #include "mb/pg_wchar.h"
15 : #include "utils/ascii.h"
16 : #include "utils/fmgrprotos.h"
17 : #include "varatt.h"
18 :
19 : static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
20 : unsigned char *dest, int enc);
21 : static text *encode_to_ascii(text *data, int enc);
22 :
23 :
24 : /* ----------
25 : * to_ascii
26 : * ----------
27 : */
28 : static void
29 0 : pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
30 : {
31 : unsigned char *x;
32 : const unsigned char *ascii;
33 : int range;
34 :
35 : /*
36 : * relevant start for an encoding
37 : */
38 : #define RANGE_128 128
39 : #define RANGE_160 160
40 :
41 0 : if (enc == PG_LATIN1)
42 : {
43 : /*
44 : * ISO-8859-1 <range: 160 -- 255>
45 : */
46 0 : ascii = (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
47 0 : range = RANGE_160;
48 : }
49 0 : else if (enc == PG_LATIN2)
50 : {
51 : /*
52 : * ISO-8859-2 <range: 160 -- 255>
53 : */
54 0 : ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
55 0 : range = RANGE_160;
56 : }
57 0 : else if (enc == PG_LATIN9)
58 : {
59 : /*
60 : * ISO-8859-15 <range: 160 -- 255>
61 : */
62 0 : ascii = (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
63 0 : range = RANGE_160;
64 : }
65 0 : else if (enc == PG_WIN1250)
66 : {
67 : /*
68 : * Window CP1250 <range: 128 -- 255>
69 : */
70 0 : ascii = (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
71 0 : range = RANGE_128;
72 : }
73 : else
74 : {
75 0 : ereport(ERROR,
76 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
77 : errmsg("encoding conversion from %s to ASCII not supported",
78 : pg_encoding_to_char(enc))));
79 : return; /* keep compiler quiet */
80 : }
81 :
82 : /*
83 : * Encode
84 : */
85 0 : for (x = src; x < src_end; x++)
86 : {
87 0 : if (*x < 128)
88 0 : *dest++ = *x;
89 0 : else if (*x < range)
90 0 : *dest++ = ' '; /* bogus 128 to 'range' */
91 : else
92 0 : *dest++ = ascii[*x - range];
93 : }
94 : }
95 :
96 : /* ----------
97 : * encode text
98 : *
99 : * The text datum is overwritten in-place, therefore this coding method
100 : * cannot support conversions that change the string length!
101 : * ----------
102 : */
103 : static text *
104 0 : encode_to_ascii(text *data, int enc)
105 : {
106 0 : pg_to_ascii((unsigned char *) VARDATA(data), /* src */
107 0 : (unsigned char *) (data) + VARSIZE(data), /* src end */
108 0 : (unsigned char *) VARDATA(data), /* dest */
109 : enc); /* encoding */
110 :
111 0 : return data;
112 : }
113 :
114 : /* ----------
115 : * convert to ASCII - enc is set as 'name' arg.
116 : * ----------
117 : */
118 : Datum
119 0 : to_ascii_encname(PG_FUNCTION_ARGS)
120 : {
121 0 : text *data = PG_GETARG_TEXT_P_COPY(0);
122 0 : char *encname = NameStr(*PG_GETARG_NAME(1));
123 0 : int enc = pg_char_to_encoding(encname);
124 :
125 0 : if (enc < 0)
126 0 : ereport(ERROR,
127 : (errcode(ERRCODE_UNDEFINED_OBJECT),
128 : errmsg("%s is not a valid encoding name", encname)));
129 :
130 0 : PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
131 : }
132 :
133 : /* ----------
134 : * convert to ASCII - enc is set as int4
135 : * ----------
136 : */
137 : Datum
138 0 : to_ascii_enc(PG_FUNCTION_ARGS)
139 : {
140 0 : text *data = PG_GETARG_TEXT_P_COPY(0);
141 0 : int enc = PG_GETARG_INT32(1);
142 :
143 0 : if (!PG_VALID_ENCODING(enc))
144 0 : ereport(ERROR,
145 : (errcode(ERRCODE_UNDEFINED_OBJECT),
146 : errmsg("%d is not a valid encoding code", enc)));
147 :
148 0 : PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
149 : }
150 :
151 : /* ----------
152 : * convert to ASCII - current enc is DatabaseEncoding
153 : * ----------
154 : */
155 : Datum
156 0 : to_ascii_default(PG_FUNCTION_ARGS)
157 : {
158 0 : text *data = PG_GETARG_TEXT_P_COPY(0);
159 0 : int enc = GetDatabaseEncoding();
160 :
161 0 : PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
162 : }
163 :
164 : /* ----------
165 : * Copy a string in an arbitrary backend-safe encoding, converting it to a
166 : * valid ASCII string by replacing non-ASCII bytes with '?'. Otherwise the
167 : * behavior is identical to strlcpy(), except that we don't bother with a
168 : * return value.
169 : *
170 : * This must not trigger ereport(ERROR), as it is called in postmaster.
171 : * ----------
172 : */
173 : void
174 14282 : ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
175 : {
176 14282 : if (destsiz == 0) /* corner case: no room for trailing nul */
177 0 : return;
178 :
179 311646 : while (--destsiz > 0)
180 : {
181 : /* use unsigned char here to avoid compiler warning */
182 311646 : unsigned char ch = *src++;
183 :
184 311646 : if (ch == '\0')
185 14282 : break;
186 : /* Keep printable ASCII characters */
187 297364 : if (32 <= ch && ch <= 127)
188 297364 : *dest = ch;
189 : /* White-space is also OK */
190 0 : else if (ch == '\n' || ch == '\r' || ch == '\t')
191 0 : *dest = ch;
192 : /* Everything else is replaced with '?' */
193 : else
194 0 : *dest = '?';
195 297364 : dest++;
196 : }
197 :
198 14282 : *dest = '\0';
199 : }
|