1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright 2013 Garrett D'Amore <[email protected]>
5 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
6 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
7 * Copyright (c) 1993
8 * The Regents of the University of California. All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * Paul Borman at Krystal Technologies.
12 *
13 * Copyright (c) 2011 The FreeBSD Foundation
14 * All rights reserved.
15 * Portions of this software were developed by David Chisnall
16 * under sponsorship from the FreeBSD Foundation.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. Neither the name of the University nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 */
42
43 #if defined(LIBC_SCCS) && !defined(lint)
44 static char sccsid[] = "@(#)euc.c 8.1 (Berkeley) 6/4/93";
45 #endif /* LIBC_SCCS and not lint */
46 #include <sys/param.h>
47 __FBSDID("$FreeBSD$");
48
49 #include <errno.h>
50 #include <limits.h>
51 #include <runetype.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include <wchar.h>
55 #include "mblocal.h"
56
57 extern int __mb_sb_limit;
58
59 static size_t _EUC_mbrtowc_impl(wchar_t * __restrict, const char * __restrict,
60 size_t, mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
61 static size_t _EUC_wcrtomb_impl(char * __restrict, wchar_t,
62 mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
63
64 static size_t _EUC_CN_mbrtowc(wchar_t * __restrict, const char * __restrict,
65 size_t, mbstate_t * __restrict);
66 static size_t _EUC_JP_mbrtowc(wchar_t * __restrict, const char * __restrict,
67 size_t, mbstate_t * __restrict);
68 static size_t _EUC_KR_mbrtowc(wchar_t * __restrict, const char * __restrict,
69 size_t, mbstate_t * __restrict);
70 static size_t _EUC_TW_mbrtowc(wchar_t * __restrict, const char * __restrict,
71 size_t, mbstate_t * __restrict);
72
73 static size_t _EUC_CN_wcrtomb(char * __restrict, wchar_t,
74 mbstate_t * __restrict);
75 static size_t _EUC_JP_wcrtomb(char * __restrict, wchar_t,
76 mbstate_t * __restrict);
77 static size_t _EUC_KR_wcrtomb(char * __restrict, wchar_t,
78 mbstate_t * __restrict);
79 static size_t _EUC_TW_wcrtomb(char * __restrict, wchar_t,
80 mbstate_t * __restrict);
81
82 static size_t _EUC_CN_mbsnrtowcs(wchar_t * __restrict,
83 const char ** __restrict, size_t, size_t,
84 mbstate_t * __restrict);
85 static size_t _EUC_JP_mbsnrtowcs(wchar_t * __restrict,
86 const char ** __restrict, size_t, size_t,
87 mbstate_t * __restrict);
88 static size_t _EUC_KR_mbsnrtowcs(wchar_t * __restrict,
89 const char ** __restrict, size_t, size_t,
90 mbstate_t * __restrict);
91 static size_t _EUC_TW_mbsnrtowcs(wchar_t * __restrict,
92 const char ** __restrict, size_t, size_t,
93 mbstate_t * __restrict);
94
95 static size_t _EUC_CN_wcsnrtombs(char * __restrict,
96 const wchar_t ** __restrict, size_t, size_t,
97 mbstate_t * __restrict);
98 static size_t _EUC_JP_wcsnrtombs(char * __restrict,
99 const wchar_t ** __restrict, size_t, size_t,
100 mbstate_t * __restrict);
101 static size_t _EUC_KR_wcsnrtombs(char * __restrict,
102 const wchar_t ** __restrict, size_t, size_t,
103 mbstate_t * __restrict);
104 static size_t _EUC_TW_wcsnrtombs(char * __restrict,
105 const wchar_t ** __restrict, size_t, size_t,
106 mbstate_t * __restrict);
107
108 static int _EUC_mbsinit(const mbstate_t *);
109
110 typedef struct {
111 wchar_t ch;
112 int set;
113 int want;
114 } _EucState;
115
116 static int
_EUC_mbsinit(const mbstate_t * ps)117 _EUC_mbsinit(const mbstate_t *ps)
118 {
119
120 return (ps == NULL || ((const _EucState *)ps)->want == 0);
121 }
122
123 /*
124 * EUC-CN uses CS0, CS1 and CS2 (4 bytes).
125 */
126 int
_EUC_CN_init(struct xlocale_ctype * l,_RuneLocale * rl)127 _EUC_CN_init(struct xlocale_ctype *l, _RuneLocale *rl)
128 {
129 l->__mbrtowc = _EUC_CN_mbrtowc;
130 l->__wcrtomb = _EUC_CN_wcrtomb;
131 l->__mbsnrtowcs = _EUC_CN_mbsnrtowcs;
132 l->__wcsnrtombs = _EUC_CN_wcsnrtombs;
133 l->__mbsinit = _EUC_mbsinit;
134
135 l->runes = rl;
136 l->__mb_cur_max = 4;
137 l->__mb_sb_limit = 128;
138 return (0);
139 }
140
141 static size_t
_EUC_CN_mbrtowc(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps)142 _EUC_CN_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
143 size_t n, mbstate_t * __restrict ps)
144 {
145 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
146 }
147
148 static size_t
_EUC_CN_mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t nms,size_t len,mbstate_t * __restrict ps)149 _EUC_CN_mbsnrtowcs(wchar_t * __restrict dst,
150 const char ** __restrict src,
151 size_t nms, size_t len, mbstate_t * __restrict ps)
152 {
153 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc));
154 }
155
156 static size_t
_EUC_CN_wcrtomb(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps)157 _EUC_CN_wcrtomb(char * __restrict s, wchar_t wc,
158 mbstate_t * __restrict ps)
159 {
160 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
161 }
162
163 static size_t
_EUC_CN_wcsnrtombs(char * __restrict dst,const wchar_t ** __restrict src,size_t nwc,size_t len,mbstate_t * __restrict ps)164 _EUC_CN_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
165 size_t nwc, size_t len, mbstate_t * __restrict ps)
166 {
167 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb));
168 }
169
170 /*
171 * EUC-KR uses only CS0 and CS1.
172 */
173 int
_EUC_KR_init(struct xlocale_ctype * l,_RuneLocale * rl)174 _EUC_KR_init(struct xlocale_ctype *l, _RuneLocale *rl)
175 {
176 l->__mbrtowc = _EUC_KR_mbrtowc;
177 l->__wcrtomb = _EUC_KR_wcrtomb;
178 l->__mbsnrtowcs = _EUC_KR_mbsnrtowcs;
179 l->__wcsnrtombs = _EUC_KR_wcsnrtombs;
180 l->__mbsinit = _EUC_mbsinit;
181
182 l->runes = rl;
183 l->__mb_cur_max = 2;
184 l->__mb_sb_limit = 128;
185 return (0);
186 }
187
188 static size_t
_EUC_KR_mbrtowc(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps)189 _EUC_KR_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
190 size_t n, mbstate_t * __restrict ps)
191 {
192 return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0));
193 }
194
195 static size_t
_EUC_KR_mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t nms,size_t len,mbstate_t * __restrict ps)196 _EUC_KR_mbsnrtowcs(wchar_t * __restrict dst,
197 const char ** __restrict src,
198 size_t nms, size_t len, mbstate_t * __restrict ps)
199 {
200 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc));
201 }
202
203 static size_t
_EUC_KR_wcrtomb(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps)204 _EUC_KR_wcrtomb(char * __restrict s, wchar_t wc,
205 mbstate_t * __restrict ps)
206 {
207 return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0));
208 }
209
210 static size_t
_EUC_KR_wcsnrtombs(char * __restrict dst,const wchar_t ** __restrict src,size_t nwc,size_t len,mbstate_t * __restrict ps)211 _EUC_KR_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
212 size_t nwc, size_t len, mbstate_t * __restrict ps)
213 {
214 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb));
215 }
216
217 /*
218 * EUC-JP uses CS0, CS1, CS2, and CS3.
219 */
220 int
_EUC_JP_init(struct xlocale_ctype * l,_RuneLocale * rl)221 _EUC_JP_init(struct xlocale_ctype *l, _RuneLocale *rl)
222 {
223 l->__mbrtowc = _EUC_JP_mbrtowc;
224 l->__wcrtomb = _EUC_JP_wcrtomb;
225 l->__mbsnrtowcs = _EUC_JP_mbsnrtowcs;
226 l->__wcsnrtombs = _EUC_JP_wcsnrtombs;
227 l->__mbsinit = _EUC_mbsinit;
228
229 l->runes = rl;
230 l->__mb_cur_max = 3;
231 l->__mb_sb_limit = 128;
232 return (0);
233 }
234
235 static size_t
_EUC_JP_mbrtowc(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps)236 _EUC_JP_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
237 size_t n, mbstate_t * __restrict ps)
238 {
239 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3));
240 }
241
242 static size_t
_EUC_JP_mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t nms,size_t len,mbstate_t * __restrict ps)243 _EUC_JP_mbsnrtowcs(wchar_t * __restrict dst,
244 const char ** __restrict src,
245 size_t nms, size_t len, mbstate_t * __restrict ps)
246 {
247 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc));
248 }
249
250 static size_t
_EUC_JP_wcrtomb(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps)251 _EUC_JP_wcrtomb(char * __restrict s, wchar_t wc,
252 mbstate_t * __restrict ps)
253 {
254 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3));
255 }
256
257 static size_t
_EUC_JP_wcsnrtombs(char * __restrict dst,const wchar_t ** __restrict src,size_t nwc,size_t len,mbstate_t * __restrict ps)258 _EUC_JP_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
259 size_t nwc, size_t len, mbstate_t * __restrict ps)
260 {
261 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb));
262 }
263
264 /*
265 * EUC-TW uses CS0, CS1, and CS2.
266 */
267 int
_EUC_TW_init(struct xlocale_ctype * l,_RuneLocale * rl)268 _EUC_TW_init(struct xlocale_ctype *l, _RuneLocale *rl)
269 {
270 l->__mbrtowc = _EUC_TW_mbrtowc;
271 l->__wcrtomb = _EUC_TW_wcrtomb;
272 l->__mbsnrtowcs = _EUC_TW_mbsnrtowcs;
273 l->__wcsnrtombs = _EUC_TW_wcsnrtombs;
274 l->__mbsinit = _EUC_mbsinit;
275
276 l->runes = rl;
277 l->__mb_cur_max = 4;
278 l->__mb_sb_limit = 128;
279 return (0);
280 }
281
282 static size_t
_EUC_TW_mbrtowc(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps)283 _EUC_TW_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
284 size_t n, mbstate_t * __restrict ps)
285 {
286 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
287 }
288
289 static size_t
_EUC_TW_mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t nms,size_t len,mbstate_t * __restrict ps)290 _EUC_TW_mbsnrtowcs(wchar_t * __restrict dst,
291 const char ** __restrict src,
292 size_t nms, size_t len, mbstate_t * __restrict ps)
293 {
294 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc));
295 }
296
297 static size_t
_EUC_TW_wcrtomb(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps)298 _EUC_TW_wcrtomb(char * __restrict s, wchar_t wc,
299 mbstate_t * __restrict ps)
300 {
301 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
302 }
303
304 static size_t
_EUC_TW_wcsnrtombs(char * __restrict dst,const wchar_t ** __restrict src,size_t nwc,size_t len,mbstate_t * __restrict ps)305 _EUC_TW_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
306 size_t nwc, size_t len, mbstate_t * __restrict ps)
307 {
308 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb));
309 }
310
311 /*
312 * Common EUC code.
313 */
314
315 static size_t
_EUC_mbrtowc_impl(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps,uint8_t cs2,uint8_t cs2width,uint8_t cs3,uint8_t cs3width)316 _EUC_mbrtowc_impl(wchar_t * __restrict pwc, const char * __restrict s,
317 size_t n, mbstate_t * __restrict ps,
318 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
319 {
320 _EucState *es;
321 int i, want;
322 wchar_t wc = 0;
323 unsigned char ch, chs;
324
325 es = (_EucState *)ps;
326
327 if (es->want < 0 || es->want > MB_CUR_MAX) {
328 errno = EINVAL;
329 return ((size_t)-1);
330 }
331
332 if (s == NULL) {
333 s = "";
334 n = 1;
335 pwc = NULL;
336 }
337
338 if (n == 0)
339 /* Incomplete multibyte sequence */
340 return ((size_t)-2);
341
342 if (es->want == 0) {
343 /* Fast path for plain ASCII (CS0) */
344 if (((ch = (unsigned char)*s) & 0x80) == 0) {
345 if (pwc != NULL)
346 *pwc = ch;
347 return (ch != '\0' ? 1 : 0);
348 }
349
350 if (ch >= 0xa1) {
351 /* CS1 */
352 want = 2;
353 } else if (ch == cs2) {
354 want = cs2width;
355 } else if (ch == cs3) {
356 want = cs3width;
357 } else {
358 errno = EILSEQ;
359 return ((size_t)-1);
360 }
361
362
363 es->want = want;
364 es->ch = 0;
365 } else {
366 want = es->want;
367 wc = es->ch;
368 }
369
370 for (i = 0; i < MIN(want, n); i++) {
371 wc <<= 8;
372 chs = *s;
373 wc |= chs;
374 s++;
375 }
376 if (i < want) {
377 /* Incomplete multibyte sequence */
378 es->want = want - i;
379 es->ch = wc;
380 errno = EILSEQ;
381 return ((size_t)-2);
382 }
383 if (pwc != NULL)
384 *pwc = wc;
385 es->want = 0;
386 return (wc == L'\0' ? 0 : want);
387 }
388
389 static size_t
_EUC_wcrtomb_impl(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps,uint8_t cs2,uint8_t cs2width,uint8_t cs3,uint8_t cs3width)390 _EUC_wcrtomb_impl(char * __restrict s, wchar_t wc,
391 mbstate_t * __restrict ps,
392 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
393 {
394 _EucState *es;
395 int i, len;
396 wchar_t nm;
397
398 es = (_EucState *)ps;
399
400 if (es->want != 0) {
401 errno = EINVAL;
402 return ((size_t)-1);
403 }
404
405 if (s == NULL)
406 /* Reset to initial shift state (no-op) */
407 return (1);
408
409 if ((wc & ~0x7f) == 0) {
410 /* Fast path for plain ASCII (CS0) */
411 *s = (char)wc;
412 return (1);
413 }
414
415 /* Determine the "length" */
416 if ((unsigned)wc > 0xffffff) {
417 len = 4;
418 } else if ((unsigned)wc > 0xffff) {
419 len = 3;
420 } else if ((unsigned)wc > 0xff) {
421 len = 2;
422 } else {
423 len = 1;
424 }
425
426 if (len > MB_CUR_MAX) {
427 errno = EILSEQ;
428 return ((size_t)-1);
429 }
430
431 /* This first check excludes CS1, which is implicitly valid. */
432 if ((wc < 0xa100) || (wc > 0xffff)) {
433 /* Check for valid CS2 or CS3 */
434 nm = (wc >> ((len - 1) * 8));
435 if (nm == cs2) {
436 if (len != cs2width) {
437 errno = EILSEQ;
438 return ((size_t)-1);
439 }
440 } else if (nm == cs3) {
441 if (len != cs3width) {
442 errno = EILSEQ;
443 return ((size_t)-1);
444 }
445 } else {
446 errno = EILSEQ;
447 return ((size_t)-1);
448 }
449 }
450
451 /* Stash the bytes, least significant last */
452 for (i = len - 1; i >= 0; i--) {
453 s[i] = (wc & 0xff);
454 wc >>= 8;
455 }
456 return (len);
457 }
458