1 /*-
2 * Copyright (C) 2009 Gabor Kovesdan <[email protected]>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/endian.h>
31 #include <sys/types.h>
32
33 #include <err.h>
34 #include <errno.h>
35 #include <iconv.h>
36 #include <stdbool.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40
41 static bool uc_hook = false;
42 static bool wc_hook = false;
43 static bool mb_uc_fb = false;
44
45 void unicode_hook(unsigned int mbr, void *data);
46 void wchar_hook(wchar_t wc, void *data);
47
48 void mb_to_uc_fb(const char *, size_t,
49 void (*write_replacement) (const unsigned int *, size_t, void *),
50 void *, void *);
51
52 static int
ctl_get_translit1(void)53 ctl_get_translit1(void)
54 {
55 iconv_t cd;
56 int arg, ret;
57
58 cd = iconv_open("ASCII//TRANSLIT", "UTF-8");
59 if (cd == (iconv_t)-1)
60 return (-1);
61 if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0)
62 ret = (arg == 1) ? 0 : -1;
63 else
64 ret = -1;
65 if (iconv_close(cd) == -1)
66 return (-1);
67 return (ret);
68 }
69
70 static int
ctl_get_translit2(void)71 ctl_get_translit2(void)
72 {
73 iconv_t cd;
74 int arg, ret;
75
76 cd = iconv_open("ASCII", "UTF-8");
77 if (cd == (iconv_t)-1)
78 return (-1);
79 if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0)
80 ret = (arg == 0) ? 0 : -1;
81 else
82 ret = -1;
83 if (iconv_close(cd) == -1)
84 return (-1);
85 return (ret);
86 }
87
88 static int
ctl_set_translit1(void)89 ctl_set_translit1(void)
90 {
91 iconv_t cd;
92 int arg = 1, ret;
93
94 cd = iconv_open("ASCII", "UTF-8");
95 if (cd == (iconv_t)-1)
96 return (-1);
97 ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1;
98 if (iconv_close(cd) == -1)
99 return (-1);
100 return (ret);
101 }
102
103 static int
ctl_set_translit2(void)104 ctl_set_translit2(void)
105 {
106 iconv_t cd;
107 int arg = 0, ret;
108
109 cd = iconv_open("ASCII//TRANSLIT", "UTF-8");
110 if (cd == (iconv_t)-1)
111 return (-1);
112 ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1;
113 if (iconv_close(cd) == -1)
114 return (-1);
115 return (ret);
116 }
117
118 static int
ctl_get_discard_ilseq1(void)119 ctl_get_discard_ilseq1(void)
120 {
121 iconv_t cd;
122 int arg, ret;
123
124 cd = iconv_open("ASCII", "UTF-8");
125 if (cd == (iconv_t)-1)
126 return (-1);
127 if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0)
128 ret = arg == 0 ? 0 : -1;
129 else
130 ret = -1;
131 if (iconv_close(cd) == -1)
132 return (-1);
133 return (ret);
134 }
135
136 static int
ctl_get_discard_ilseq2(void)137 ctl_get_discard_ilseq2(void)
138 {
139 iconv_t cd;
140 int arg, ret;
141
142 cd = iconv_open("ASCII//IGNORE", "UTF-8");
143 if (cd == (iconv_t)-1)
144 return (-1);
145 if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0)
146 ret = arg == 1 ? 0 : -1;
147 else
148 ret = -1;
149 if (iconv_close(cd) == -1)
150 return (-1);
151 return (ret);
152 }
153
154 static int
ctl_set_discard_ilseq1(void)155 ctl_set_discard_ilseq1(void)
156 {
157 iconv_t cd;
158 int arg = 1, ret;
159
160 cd = iconv_open("ASCII", "UTF-8");
161 if (cd == (iconv_t)-1)
162 return (-1);
163 ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1;
164 if (iconv_close(cd) == -1)
165 return (-1);
166 return (ret);
167 }
168
169 static int
ctl_set_discard_ilseq2(void)170 ctl_set_discard_ilseq2(void)
171 {
172 iconv_t cd;
173 int arg = 0, ret;
174
175 cd = iconv_open("ASCII//IGNORE", "UTF-8");
176 if (cd == (iconv_t)-1)
177 return (-1);
178 ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1;
179 if (iconv_close(cd) == -1)
180 return (-1);
181 return (ret);
182 }
183
184 static int
ctl_trivialp1(void)185 ctl_trivialp1(void)
186 {
187 iconv_t cd;
188 int arg, ret;
189
190 cd = iconv_open("latin2", "latin2");
191 if (cd == (iconv_t)-1)
192 return (-1);
193 if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) {
194 ret = (arg == 1) ? 0 : -1;
195 } else
196 ret = -1;
197 if (iconv_close(cd) == -1)
198 return (-1);
199 return (ret);
200 }
201
202 static int
ctl_trivialp2(void)203 ctl_trivialp2(void)
204 {
205 iconv_t cd;
206 int arg, ret;
207
208 cd = iconv_open("ASCII", "KOI8-R");
209 if (cd == (iconv_t)-1)
210 return (-1);
211 if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) {
212 ret = (arg == 0) ? 0 : -1;
213 } else
214 ret = -1;
215 if (iconv_close(cd) == -1)
216 return (-1);
217 return (ret);
218 }
219
220 void
unicode_hook(unsigned int mbr,void * data)221 unicode_hook(unsigned int mbr, void *data)
222 {
223
224 #ifdef VERBOSE
225 printf("Unicode hook: %u\n", mbr);
226 #endif
227 uc_hook = true;
228 }
229
230 void
wchar_hook(wchar_t wc,void * data)231 wchar_hook(wchar_t wc, void *data)
232 {
233
234 #ifdef VERBOSE
235 printf("Wchar hook: %ull\n", wc);
236 #endif
237 wc_hook = true;
238 }
239
240 static int
ctl_uc_hook(void)241 ctl_uc_hook(void)
242 {
243 struct iconv_hooks hooks;
244 iconv_t cd;
245 size_t inbytesleft = 15, outbytesleft = 40;
246 const char **inptr;
247 const char *s = "Hello World!";
248 char **outptr;
249 char *outbuf;
250
251 inptr = &s;
252 hooks.uc_hook = unicode_hook;
253 hooks.wc_hook = NULL;
254
255 outbuf = malloc(40);
256 outptr = &outbuf;
257
258 cd = iconv_open("UTF-8", "ASCII");
259 if (cd == (iconv_t)-1)
260 return (-1);
261 if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0)
262 return (-1);
263 if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1)
264 return (-1);
265 if (iconv_close(cd) == -1)
266 return (-1);
267 return (uc_hook ? 0 : 1);
268 }
269
270 static int
ctl_wc_hook(void)271 ctl_wc_hook(void)
272 {
273 struct iconv_hooks hooks;
274 iconv_t cd;
275 size_t inbytesleft, outbytesleft = 40;
276 const char **inptr;
277 const char *s = "Hello World!";
278 char **outptr;
279 char *outbuf;
280
281 inptr = &s;
282 hooks.wc_hook = wchar_hook;
283 hooks.uc_hook = NULL;
284
285 outbuf = malloc(40);
286 outptr = &outbuf;
287 inbytesleft = sizeof(s);
288
289 cd = iconv_open("SHIFT_JIS", "ASCII");
290 if (cd == (iconv_t)-1)
291 return (-1);
292 if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0)
293 return (-1);
294 if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1)
295 return (-1);
296 if (iconv_close(cd) == -1)
297 return (-1);
298 return (wc_hook ? 0 : 1);
299 }
300
301
302
303 static int
gnu_canonicalize1(void)304 gnu_canonicalize1(void)
305 {
306
307 return (strcmp(iconv_canonicalize("latin2"), "ISO-8859-2"));
308 }
309
310 static int
gnu_canonicalize2(void)311 gnu_canonicalize2(void)
312 {
313
314 return (!strcmp(iconv_canonicalize("ASCII"), iconv_canonicalize("latin2")));
315 }
316
317
318 static int
iconvlist_cb(unsigned int count,const char * const * names,void * data)319 iconvlist_cb(unsigned int count, const char * const *names, void *data)
320 {
321
322 return (*(int *)data = ((names == NULL) && (count > 0)) ? -1 : 0);
323 }
324
325 static int
gnu_iconvlist(void)326 gnu_iconvlist(void)
327 {
328 int i;
329
330 iconvlist(iconvlist_cb, (void *)&i);
331 return (i);
332 }
333
334 void
mb_to_uc_fb(const char * inbuf,size_t inbufsize,void (* write_replacement)(const unsigned int * buf,size_t buflen,void * callback_arg),void * callback_arg,void * data)335 mb_to_uc_fb(const char* inbuf, size_t inbufsize,
336 void (*write_replacement)(const unsigned int *buf, size_t buflen,
337 void* callback_arg), void* callback_arg, void* data)
338 {
339 unsigned int c = 0x3F;
340
341 mb_uc_fb = true;
342 write_replacement((const unsigned int *)&c, 1, NULL);
343 }
344
345 static int __unused
ctl_mb_to_uc_fb(void)346 ctl_mb_to_uc_fb(void)
347 {
348 struct iconv_fallbacks fb;
349 iconv_t cd;
350 size_t inbytesleft, outbytesleft;
351 uint16_t inbuf[1] = { 0xF187 };
352 uint8_t outbuf[4] = { 0x00, 0x00, 0x00, 0x00 };
353 const char *inptr;
354 char *outptr;
355 int ret;
356
357 if ((cd = iconv_open("UTF-32", "UTF-8")) == (iconv_t)-1)
358 return (1);
359
360 fb.uc_to_mb_fallback = NULL;
361 fb.mb_to_wc_fallback = NULL;
362 fb.wc_to_mb_fallback = NULL;
363 fb.mb_to_uc_fallback = mb_to_uc_fb;
364 fb.data = NULL;
365
366 if (iconvctl(cd, ICONV_SET_FALLBACKS, (void *)&fb) != 0)
367 return (1);
368
369 inptr = (const char *)inbuf;
370 outptr = (char *)outbuf;
371 inbytesleft = 2;
372 outbytesleft = 4;
373
374 errno = 0;
375 ret = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft);
376
377 #ifdef VERBOSE
378 printf("mb_uc fallback: %c\n", outbuf[0]);
379 #endif
380
381 if (mb_uc_fb && (outbuf[0] == 0x3F))
382 return (0);
383 else
384 return (1);
385 }
386
387 static int
gnu_openinto(void)388 gnu_openinto(void)
389 {
390 iconv_allocation_t *myspace;
391 size_t inbytesleft, outbytesleft;
392 const char *inptr;
393 char *inbuf = "works!", *outptr;
394 char outbuf[6];
395
396 if ((myspace = (iconv_allocation_t *)malloc(sizeof(iconv_allocation_t))) == NULL)
397 return (1);
398 if (iconv_open_into("ASCII", "ASCII", myspace) == -1)
399 return (1);
400
401 inptr = (const char *)inbuf;
402 outptr = (char *)outbuf;
403 inbytesleft = 6;
404 outbytesleft = 6;
405
406 iconv((iconv_t)myspace, &inptr, &inbytesleft, &outptr, &outbytesleft);
407
408 return ((memcmp(inbuf, outbuf, 6) == 0) ? 0 : 1);
409 }
410
411 static void
test(int (tester)(void),const char * label)412 test(int (tester) (void), const char * label)
413 {
414 int ret;
415
416 if ((ret = tester()))
417 printf("%s failed (%d)\n", label, ret);
418 else
419 printf("%s succeeded\n", label);
420 }
421
422 int
main(void)423 main(void)
424 {
425 test(ctl_get_translit1, "ctl_get_translit1");
426 test(ctl_get_translit2, "ctl_get_translit2");
427 test(ctl_set_translit1, "ctl_set_translit1");
428 test(ctl_set_translit2, "ctl_set_translit2");
429 test(ctl_get_discard_ilseq1, "ctl_get_discard_ilseq1");
430 test(ctl_get_discard_ilseq2, "ctl_get_discard_ilseq2");
431 test(ctl_set_discard_ilseq1, "ctl_set_discard_ilseq1");
432 test(ctl_set_discard_ilseq2, "ctl_set_discard_ilseq2");
433 test(ctl_trivialp1, "ctl_trivialp1");
434 test(ctl_trivialp2, "ctl_trivialp2");
435 test(ctl_uc_hook, "ctl_uc_hook");
436 test(ctl_wc_hook, "ctl_wc_hook");
437 // test(ctl_mb_to_uc_fb, "ctl_mb_to_uc_fb");
438 test(gnu_openinto, "gnu_openinto");
439 test(gnu_canonicalize1, "gnu_canonicalize1");
440 test(gnu_canonicalize2, "gnu_canonicalize2");
441 test(gnu_iconvlist, "gnu_iconvlist");
442 }
443