1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (C) 2009 Gabor Kovesdan <[email protected]>
5 * Copyright (C) 2012 Oleg Moskalenko <[email protected]>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <ctype.h>
34 #include <errno.h>
35 #include <err.h>
36 #include <langinfo.h>
37 #include <math.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <wctype.h>
42
43 #include "bwstring.h"
44 #include "sort.h"
45
46 bool byte_sort;
47
48 static wchar_t **wmonths;
49 static unsigned char **cmonths;
50
51 /* initialise months */
52
53 void
initialise_months(void)54 initialise_months(void)
55 {
56 const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
57 ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
58 ABMON_11, ABMON_12 };
59 unsigned char *tmp;
60 size_t len;
61
62 if (MB_CUR_MAX == 1) {
63 if (cmonths == NULL) {
64 unsigned char *m;
65
66 cmonths = sort_malloc(sizeof(unsigned char*) * 12);
67 for (int i = 0; i < 12; i++) {
68 cmonths[i] = NULL;
69 tmp = (unsigned char *) nl_langinfo(item[i]);
70 if (debug_sort)
71 printf("month[%d]=%s\n", i, tmp);
72 if (*tmp == '\0')
73 continue;
74 m = sort_strdup(tmp);
75 len = strlen(tmp);
76 for (unsigned int j = 0; j < len; j++)
77 m[j] = toupper(m[j]);
78 cmonths[i] = m;
79 }
80 }
81
82 } else {
83 if (wmonths == NULL) {
84 wchar_t *m;
85
86 wmonths = sort_malloc(sizeof(wchar_t *) * 12);
87 for (int i = 0; i < 12; i++) {
88 wmonths[i] = NULL;
89 tmp = (unsigned char *) nl_langinfo(item[i]);
90 if (debug_sort)
91 printf("month[%d]=%s\n", i, tmp);
92 if (*tmp == '\0')
93 continue;
94 len = strlen(tmp);
95 m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
96 if (mbstowcs(m, (char*)tmp, len) ==
97 ((size_t) - 1)) {
98 sort_free(m);
99 continue;
100 }
101 m[len] = L'\0';
102 for (unsigned int j = 0; j < len; j++)
103 m[j] = towupper(m[j]);
104 wmonths[i] = m;
105 }
106 }
107 }
108 }
109
110 /*
111 * Compare two wide-character strings
112 */
113 static int
wide_str_coll(const wchar_t * s1,const wchar_t * s2)114 wide_str_coll(const wchar_t *s1, const wchar_t *s2)
115 {
116 int ret = 0;
117
118 errno = 0;
119 ret = wcscoll(s1, s2);
120 if (errno == EILSEQ) {
121 errno = 0;
122 ret = wcscmp(s1, s2);
123 if (errno != 0) {
124 for (size_t i = 0; ; ++i) {
125 wchar_t c1 = s1[i];
126 wchar_t c2 = s2[i];
127 if (c1 == L'\0')
128 return ((c2 == L'\0') ? 0 : -1);
129 if (c2 == L'\0')
130 return (+1);
131 if (c1 == c2)
132 continue;
133 return ((int)(c1 - c2));
134 }
135 }
136 }
137 return (ret);
138 }
139
140 /* counterparts of wcs functions */
141
142 void
bwsprintf(FILE * f,struct bwstring * bws,const char * prefix,const char * suffix)143 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
144 {
145
146 if (MB_CUR_MAX == 1)
147 fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix);
148 else
149 fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix);
150 }
151
bwsrawdata(const struct bwstring * bws)152 const void* bwsrawdata(const struct bwstring *bws)
153 {
154
155 return (&(bws->data));
156 }
157
bwsrawlen(const struct bwstring * bws)158 size_t bwsrawlen(const struct bwstring *bws)
159 {
160
161 return ((MB_CUR_MAX == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len));
162 }
163
164 size_t
bws_memsize(const struct bwstring * bws)165 bws_memsize(const struct bwstring *bws)
166 {
167
168 return ((MB_CUR_MAX == 1) ? (bws->len + 2 + sizeof(struct bwstring)) :
169 (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring)));
170 }
171
172 void
bws_setlen(struct bwstring * bws,size_t newlen)173 bws_setlen(struct bwstring *bws, size_t newlen)
174 {
175
176 if (bws && newlen != bws->len && newlen <= bws->len) {
177 bws->len = newlen;
178 if (MB_CUR_MAX == 1)
179 bws->data.cstr[newlen] = '\0';
180 else
181 bws->data.wstr[newlen] = L'\0';
182 }
183 }
184
185 /*
186 * Allocate a new binary string of specified size
187 */
188 struct bwstring *
bwsalloc(size_t sz)189 bwsalloc(size_t sz)
190 {
191 struct bwstring *ret;
192
193 if (MB_CUR_MAX == 1)
194 ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
195 else
196 ret = sort_malloc(sizeof(struct bwstring) +
197 SIZEOF_WCHAR_STRING(sz + 1));
198 ret->len = sz;
199
200 if (MB_CUR_MAX == 1)
201 ret->data.cstr[ret->len] = '\0';
202 else
203 ret->data.wstr[ret->len] = L'\0';
204
205 return (ret);
206 }
207
208 /*
209 * Create a copy of binary string.
210 * New string size equals the length of the old string.
211 */
212 struct bwstring *
bwsdup(const struct bwstring * s)213 bwsdup(const struct bwstring *s)
214 {
215
216 if (s == NULL)
217 return (NULL);
218 else {
219 struct bwstring *ret = bwsalloc(s->len);
220
221 if (MB_CUR_MAX == 1)
222 memcpy(ret->data.cstr, s->data.cstr, (s->len));
223 else
224 memcpy(ret->data.wstr, s->data.wstr,
225 SIZEOF_WCHAR_STRING(s->len));
226
227 return (ret);
228 }
229 }
230
231 /*
232 * Create a new binary string from a wide character buffer.
233 */
234 struct bwstring *
bwssbdup(const wchar_t * str,size_t len)235 bwssbdup(const wchar_t *str, size_t len)
236 {
237
238 if (str == NULL)
239 return ((len == 0) ? bwsalloc(0) : NULL);
240 else {
241 struct bwstring *ret;
242
243 ret = bwsalloc(len);
244
245 if (MB_CUR_MAX == 1)
246 for (size_t i = 0; i < len; ++i)
247 ret->data.cstr[i] = (unsigned char) str[i];
248 else
249 memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len));
250
251 return (ret);
252 }
253 }
254
255 /*
256 * Create a new binary string from a raw binary buffer.
257 */
258 struct bwstring *
bwscsbdup(const unsigned char * str,size_t len)259 bwscsbdup(const unsigned char *str, size_t len)
260 {
261 struct bwstring *ret;
262
263 ret = bwsalloc(len);
264
265 if (str) {
266 if (MB_CUR_MAX == 1)
267 memcpy(ret->data.cstr, str, len);
268 else {
269 mbstate_t mbs;
270 const char *s;
271 size_t charlen, chars, cptr;
272
273 chars = 0;
274 cptr = 0;
275 s = (const char *) str;
276
277 memset(&mbs, 0, sizeof(mbs));
278
279 while (cptr < len) {
280 size_t n = MB_CUR_MAX;
281
282 if (n > len - cptr)
283 n = len - cptr;
284 charlen = mbrlen(s + cptr, n, &mbs);
285 switch (charlen) {
286 case 0:
287 /* FALLTHROUGH */
288 case (size_t) -1:
289 /* FALLTHROUGH */
290 case (size_t) -2:
291 ret->data.wstr[chars++] =
292 (unsigned char) s[cptr];
293 ++cptr;
294 break;
295 default:
296 n = mbrtowc(ret->data.wstr + (chars++),
297 s + cptr, charlen, &mbs);
298 if ((n == (size_t)-1) || (n == (size_t)-2))
299 /* NOTREACHED */
300 err(2, "mbrtowc error");
301 cptr += charlen;
302 }
303 }
304
305 ret->len = chars;
306 ret->data.wstr[ret->len] = L'\0';
307 }
308 }
309 return (ret);
310 }
311
312 /*
313 * De-allocate object memory
314 */
315 void
bwsfree(const struct bwstring * s)316 bwsfree(const struct bwstring *s)
317 {
318
319 if (s)
320 sort_free(s);
321 }
322
323 /*
324 * Copy content of src binary string to dst.
325 * If the capacity of the dst string is not sufficient,
326 * then the data is truncated.
327 */
328 size_t
bwscpy(struct bwstring * dst,const struct bwstring * src)329 bwscpy(struct bwstring *dst, const struct bwstring *src)
330 {
331 size_t nums = src->len;
332
333 if (nums > dst->len)
334 nums = dst->len;
335 dst->len = nums;
336
337 if (MB_CUR_MAX == 1) {
338 memcpy(dst->data.cstr, src->data.cstr, nums);
339 dst->data.cstr[dst->len] = '\0';
340 } else {
341 memcpy(dst->data.wstr, src->data.wstr,
342 SIZEOF_WCHAR_STRING(nums + 1));
343 dst->data.wstr[dst->len] = L'\0';
344 }
345
346 return (nums);
347 }
348
349 /*
350 * Copy content of src binary string to dst,
351 * with specified number of symbols to be copied.
352 * If the capacity of the dst string is not sufficient,
353 * then the data is truncated.
354 */
355 struct bwstring *
bwsncpy(struct bwstring * dst,const struct bwstring * src,size_t size)356 bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
357 {
358 size_t nums = src->len;
359
360 if (nums > dst->len)
361 nums = dst->len;
362 if (nums > size)
363 nums = size;
364 dst->len = nums;
365
366 if (MB_CUR_MAX == 1) {
367 memcpy(dst->data.cstr, src->data.cstr, nums);
368 dst->data.cstr[dst->len] = '\0';
369 } else {
370 memcpy(dst->data.wstr, src->data.wstr,
371 SIZEOF_WCHAR_STRING(nums + 1));
372 dst->data.wstr[dst->len] = L'\0';
373 }
374
375 return (dst);
376 }
377
378 /*
379 * Copy content of src binary string to dst,
380 * with specified number of symbols to be copied.
381 * An offset value can be specified, from the start of src string.
382 * If the capacity of the dst string is not sufficient,
383 * then the data is truncated.
384 */
385 struct bwstring *
bwsnocpy(struct bwstring * dst,const struct bwstring * src,size_t offset,size_t size)386 bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
387 size_t size)
388 {
389
390 if (offset >= src->len) {
391 dst->data.wstr[0] = 0;
392 dst->len = 0;
393 } else {
394 size_t nums = src->len - offset;
395
396 if (nums > dst->len)
397 nums = dst->len;
398 if (nums > size)
399 nums = size;
400 dst->len = nums;
401 if (MB_CUR_MAX == 1) {
402 memcpy(dst->data.cstr, src->data.cstr + offset,
403 (nums));
404 dst->data.cstr[dst->len] = '\0';
405 } else {
406 memcpy(dst->data.wstr, src->data.wstr + offset,
407 SIZEOF_WCHAR_STRING(nums));
408 dst->data.wstr[dst->len] = L'\0';
409 }
410 }
411 return (dst);
412 }
413
414 /*
415 * Write binary string to the file.
416 * The output is ended either with '\n' (nl == true)
417 * or '\0' (nl == false).
418 */
419 size_t
bwsfwrite(struct bwstring * bws,FILE * f,bool zero_ended)420 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
421 {
422
423 if (MB_CUR_MAX == 1) {
424 size_t len = bws->len;
425
426 if (!zero_ended) {
427 bws->data.cstr[len] = '\n';
428
429 if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
430 err(2, NULL);
431
432 bws->data.cstr[len] = '\0';
433 } else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
434 err(2, NULL);
435
436 return (len + 1);
437
438 } else {
439 wchar_t eols;
440 size_t printed = 0;
441
442 eols = zero_ended ? btowc('\0') : btowc('\n');
443
444 while (printed < BWSLEN(bws)) {
445 const wchar_t *s = bws->data.wstr + printed;
446
447 if (*s == L'\0') {
448 int nums;
449
450 nums = fwprintf(f, L"%lc", *s);
451
452 if (nums != 1)
453 err(2, NULL);
454 ++printed;
455 } else {
456 int nums;
457
458 nums = fwprintf(f, L"%ls", s);
459
460 if (nums < 1)
461 err(2, NULL);
462 printed += nums;
463 }
464 }
465 fwprintf(f, L"%lc", eols);
466 return (printed + 1);
467 }
468 }
469
470 /*
471 * Allocate and read a binary string from file.
472 * The strings are nl-ended or zero-ended, depending on the sort setting.
473 */
474 struct bwstring *
bwsfgetln(FILE * f,size_t * len,bool zero_ended,struct reader_buffer * rb)475 bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
476 {
477 wint_t eols;
478
479 eols = zero_ended ? btowc('\0') : btowc('\n');
480
481 if (!zero_ended && (MB_CUR_MAX > 1)) {
482 wchar_t *ret;
483
484 ret = fgetwln(f, len);
485
486 if (ret == NULL) {
487 if (!feof(f))
488 err(2, NULL);
489 return (NULL);
490 }
491 if (*len > 0) {
492 if (ret[*len - 1] == (wchar_t)eols)
493 --(*len);
494 }
495 return (bwssbdup(ret, *len));
496
497 } else if (!zero_ended && (MB_CUR_MAX == 1)) {
498 char *ret;
499
500 ret = fgetln(f, len);
501
502 if (ret == NULL) {
503 if (!feof(f))
504 err(2, NULL);
505 return (NULL);
506 }
507 if (*len > 0) {
508 if (ret[*len - 1] == '\n')
509 --(*len);
510 }
511 return (bwscsbdup((unsigned char*)ret, *len));
512
513 } else {
514 *len = 0;
515
516 if (feof(f))
517 return (NULL);
518
519 if (2 >= rb->fgetwln_z_buffer_size) {
520 rb->fgetwln_z_buffer_size += 256;
521 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
522 sizeof(wchar_t) * rb->fgetwln_z_buffer_size);
523 }
524 rb->fgetwln_z_buffer[*len] = 0;
525
526 if (MB_CUR_MAX == 1)
527 while (!feof(f)) {
528 int c;
529
530 c = fgetc(f);
531
532 if (c == EOF) {
533 if (*len == 0)
534 return (NULL);
535 goto line_read_done;
536 }
537 if (c == eols)
538 goto line_read_done;
539
540 if (*len + 1 >= rb->fgetwln_z_buffer_size) {
541 rb->fgetwln_z_buffer_size += 256;
542 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
543 SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
544 }
545
546 rb->fgetwln_z_buffer[*len] = c;
547 rb->fgetwln_z_buffer[++(*len)] = 0;
548 }
549 else
550 while (!feof(f)) {
551 wint_t c = 0;
552
553 c = fgetwc(f);
554
555 if (c == WEOF) {
556 if (*len == 0)
557 return (NULL);
558 goto line_read_done;
559 }
560 if (c == eols)
561 goto line_read_done;
562
563 if (*len + 1 >= rb->fgetwln_z_buffer_size) {
564 rb->fgetwln_z_buffer_size += 256;
565 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
566 SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
567 }
568
569 rb->fgetwln_z_buffer[*len] = c;
570 rb->fgetwln_z_buffer[++(*len)] = 0;
571 }
572
573 line_read_done:
574 /* we do not count the last 0 */
575 return (bwssbdup(rb->fgetwln_z_buffer, *len));
576 }
577 }
578
579 int
bwsncmp(const struct bwstring * bws1,const struct bwstring * bws2,size_t offset,size_t len)580 bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
581 size_t offset, size_t len)
582 {
583 size_t cmp_len, len1, len2;
584 int res = 0;
585
586 len1 = bws1->len;
587 len2 = bws2->len;
588
589 if (len1 <= offset) {
590 return ((len2 <= offset) ? 0 : -1);
591 } else {
592 if (len2 <= offset)
593 return (+1);
594 else {
595 len1 -= offset;
596 len2 -= offset;
597
598 cmp_len = len1;
599
600 if (len2 < cmp_len)
601 cmp_len = len2;
602
603 if (len < cmp_len)
604 cmp_len = len;
605
606 if (MB_CUR_MAX == 1) {
607 const unsigned char *s1, *s2;
608
609 s1 = bws1->data.cstr + offset;
610 s2 = bws2->data.cstr + offset;
611
612 res = memcmp(s1, s2, cmp_len);
613
614 } else {
615 const wchar_t *s1, *s2;
616
617 s1 = bws1->data.wstr + offset;
618 s2 = bws2->data.wstr + offset;
619
620 res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
621 }
622 }
623 }
624
625 if (res == 0) {
626 if (len1 < cmp_len && len1 < len2)
627 res = -1;
628 else if (len2 < cmp_len && len2 < len1)
629 res = +1;
630 }
631
632 return (res);
633 }
634
635 int
bwscmp(const struct bwstring * bws1,const struct bwstring * bws2,size_t offset)636 bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
637 {
638 size_t len1, len2, cmp_len;
639 int res;
640
641 len1 = bws1->len;
642 len2 = bws2->len;
643
644 len1 -= offset;
645 len2 -= offset;
646
647 cmp_len = len1;
648
649 if (len2 < cmp_len)
650 cmp_len = len2;
651
652 res = bwsncmp(bws1, bws2, offset, cmp_len);
653
654 if (res == 0) {
655 if( len1 < len2)
656 res = -1;
657 else if (len2 < len1)
658 res = +1;
659 }
660
661 return (res);
662 }
663
664 int
bws_iterator_cmp(bwstring_iterator iter1,bwstring_iterator iter2,size_t len)665 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
666 {
667 wchar_t c1, c2;
668 size_t i = 0;
669
670 for (i = 0; i < len; ++i) {
671 c1 = bws_get_iter_value(iter1);
672 c2 = bws_get_iter_value(iter2);
673 if (c1 != c2)
674 return (c1 - c2);
675 iter1 = bws_iterator_inc(iter1, 1);
676 iter2 = bws_iterator_inc(iter2, 1);
677 }
678
679 return (0);
680 }
681
682 int
bwscoll(const struct bwstring * bws1,const struct bwstring * bws2,size_t offset)683 bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
684 {
685 size_t len1, len2;
686
687 len1 = bws1->len;
688 len2 = bws2->len;
689
690 if (len1 <= offset)
691 return ((len2 <= offset) ? 0 : -1);
692 else {
693 if (len2 <= offset)
694 return (+1);
695 else {
696 len1 -= offset;
697 len2 -= offset;
698
699 if (MB_CUR_MAX == 1) {
700 const unsigned char *s1, *s2;
701
702 s1 = bws1->data.cstr + offset;
703 s2 = bws2->data.cstr + offset;
704
705 if (byte_sort) {
706 int res = 0;
707
708 if (len1 > len2) {
709 res = memcmp(s1, s2, len2);
710 if (!res)
711 res = +1;
712 } else if (len1 < len2) {
713 res = memcmp(s1, s2, len1);
714 if (!res)
715 res = -1;
716 } else
717 res = memcmp(s1, s2, len1);
718
719 return (res);
720
721 } else {
722 int res = 0;
723 size_t i, maxlen;
724
725 i = 0;
726 maxlen = len1;
727
728 if (maxlen > len2)
729 maxlen = len2;
730
731 while (i < maxlen) {
732 /* goto next non-zero part: */
733 while ((i < maxlen) &&
734 !s1[i] && !s2[i])
735 ++i;
736
737 if (i >= maxlen)
738 break;
739
740 if (s1[i] == 0) {
741 if (s2[i] == 0)
742 /* NOTREACHED */
743 err(2, "bwscoll error 01");
744 else
745 return (-1);
746 } else if (s2[i] == 0)
747 return (+1);
748
749 res = strcoll((const char*)(s1 + i), (const char*)(s2 + i));
750 if (res)
751 return (res);
752
753 while ((i < maxlen) &&
754 s1[i] && s2[i])
755 ++i;
756
757 if (i >= maxlen)
758 break;
759
760 if (s1[i] == 0) {
761 if (s2[i] == 0) {
762 ++i;
763 continue;
764 } else
765 return (-1);
766 } else if (s2[i] == 0)
767 return (+1);
768 else
769 /* NOTREACHED */
770 err(2, "bwscoll error 02");
771 }
772
773 if (len1 < len2)
774 return (-1);
775 else if (len1 > len2)
776 return (+1);
777
778 return (0);
779 }
780 } else {
781 const wchar_t *s1, *s2;
782 size_t i, maxlen;
783 int res = 0;
784
785 s1 = bws1->data.wstr + offset;
786 s2 = bws2->data.wstr + offset;
787
788 i = 0;
789 maxlen = len1;
790
791 if (maxlen > len2)
792 maxlen = len2;
793
794 while (i < maxlen) {
795
796 /* goto next non-zero part: */
797 while ((i < maxlen) &&
798 !s1[i] && !s2[i])
799 ++i;
800
801 if (i >= maxlen)
802 break;
803
804 if (s1[i] == 0) {
805 if (s2[i] == 0)
806 /* NOTREACHED */
807 err(2, "bwscoll error 1");
808 else
809 return (-1);
810 } else if (s2[i] == 0)
811 return (+1);
812
813 res = wide_str_coll(s1 + i, s2 + i);
814 if (res)
815 return (res);
816
817 while ((i < maxlen) && s1[i] && s2[i])
818 ++i;
819
820 if (i >= maxlen)
821 break;
822
823 if (s1[i] == 0) {
824 if (s2[i] == 0) {
825 ++i;
826 continue;
827 } else
828 return (-1);
829 } else if (s2[i] == 0)
830 return (+1);
831 else
832 /* NOTREACHED */
833 err(2, "bwscoll error 2");
834 }
835
836 if (len1 < len2)
837 return (-1);
838 else if (len1 > len2)
839 return (+1);
840
841 return (0);
842 }
843 }
844 }
845 }
846
847 /*
848 * Correction of the system API
849 */
850 double
bwstod(struct bwstring * s0,bool * empty)851 bwstod(struct bwstring *s0, bool *empty)
852 {
853 double ret = 0;
854
855 if (MB_CUR_MAX == 1) {
856 unsigned char *end, *s;
857 char *ep;
858
859 s = s0->data.cstr;
860 end = s + s0->len;
861 ep = NULL;
862
863 while (isblank(*s) && s < end)
864 ++s;
865
866 if (!isprint(*s)) {
867 *empty = true;
868 return (0);
869 }
870
871 ret = strtod((char*)s, &ep);
872 if ((unsigned char*) ep == s) {
873 *empty = true;
874 return (0);
875 }
876 } else {
877 wchar_t *end, *ep, *s;
878
879 s = s0->data.wstr;
880 end = s + s0->len;
881 ep = NULL;
882
883 while (iswblank(*s) && s < end)
884 ++s;
885
886 if (!iswprint(*s)) {
887 *empty = true;
888 return (0);
889 }
890
891 ret = wcstod(s, &ep);
892 if (ep == s) {
893 *empty = true;
894 return (0);
895 }
896 }
897
898 *empty = false;
899 return (ret);
900 }
901
902 /*
903 * A helper function for monthcoll. If a line matches
904 * a month name, it returns (number of the month - 1),
905 * while if there is no match, it just return -1.
906 */
907
908 int
bws_month_score(const struct bwstring * s0)909 bws_month_score(const struct bwstring *s0)
910 {
911
912 if (MB_CUR_MAX == 1) {
913 const unsigned char *end, *s;
914
915 s = s0->data.cstr;
916 end = s + s0->len;
917
918 while (isblank(*s) && s < end)
919 ++s;
920
921 for (int i = 11; i >= 0; --i) {
922 if (cmonths[i] &&
923 (s == (unsigned char*)strstr((const char*)s, (char*)(cmonths[i]))))
924 return (i);
925 }
926
927 } else {
928 const wchar_t *end, *s;
929
930 s = s0->data.wstr;
931 end = s + s0->len;
932
933 while (iswblank(*s) && s < end)
934 ++s;
935
936 for (int i = 11; i >= 0; --i) {
937 if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
938 return (i);
939 }
940 }
941
942 return (-1);
943 }
944
945 /*
946 * Rips out leading blanks (-b).
947 */
948 struct bwstring *
ignore_leading_blanks(struct bwstring * str)949 ignore_leading_blanks(struct bwstring *str)
950 {
951
952 if (MB_CUR_MAX == 1) {
953 unsigned char *dst, *end, *src;
954
955 src = str->data.cstr;
956 dst = src;
957 end = src + str->len;
958
959 while (src < end && isblank(*src))
960 ++src;
961
962 if (src != dst) {
963 size_t newlen;
964
965 newlen = BWSLEN(str) - (src - dst);
966
967 while (src < end) {
968 *dst = *src;
969 ++dst;
970 ++src;
971 }
972 bws_setlen(str, newlen);
973 }
974 } else {
975 wchar_t *dst, *end, *src;
976
977 src = str->data.wstr;
978 dst = src;
979 end = src + str->len;
980
981 while (src < end && iswblank(*src))
982 ++src;
983
984 if (src != dst) {
985
986 size_t newlen = BWSLEN(str) - (src - dst);
987
988 while (src < end) {
989 *dst = *src;
990 ++dst;
991 ++src;
992 }
993 bws_setlen(str, newlen);
994
995 }
996 }
997 return (str);
998 }
999
1000 /*
1001 * Rips out nonprinting characters (-i).
1002 */
1003 struct bwstring *
ignore_nonprinting(struct bwstring * str)1004 ignore_nonprinting(struct bwstring *str)
1005 {
1006 size_t newlen = str->len;
1007
1008 if (MB_CUR_MAX == 1) {
1009 unsigned char *dst, *end, *src;
1010 unsigned char c;
1011
1012 src = str->data.cstr;
1013 dst = src;
1014 end = src + str->len;
1015
1016 while (src < end) {
1017 c = *src;
1018 if (isprint(c)) {
1019 *dst = c;
1020 ++dst;
1021 ++src;
1022 } else {
1023 ++src;
1024 --newlen;
1025 }
1026 }
1027 } else {
1028 wchar_t *dst, *end, *src;
1029 wchar_t c;
1030
1031 src = str->data.wstr;
1032 dst = src;
1033 end = src + str->len;
1034
1035 while (src < end) {
1036 c = *src;
1037 if (iswprint(c)) {
1038 *dst = c;
1039 ++dst;
1040 ++src;
1041 } else {
1042 ++src;
1043 --newlen;
1044 }
1045 }
1046 }
1047 bws_setlen(str, newlen);
1048
1049 return (str);
1050 }
1051
1052 /*
1053 * Rips out any characters that are not alphanumeric characters
1054 * nor blanks (-d).
1055 */
1056 struct bwstring *
dictionary_order(struct bwstring * str)1057 dictionary_order(struct bwstring *str)
1058 {
1059 size_t newlen = str->len;
1060
1061 if (MB_CUR_MAX == 1) {
1062 unsigned char *dst, *end, *src;
1063 unsigned char c;
1064
1065 src = str->data.cstr;
1066 dst = src;
1067 end = src + str->len;
1068
1069 while (src < end) {
1070 c = *src;
1071 if (isalnum(c) || isblank(c)) {
1072 *dst = c;
1073 ++dst;
1074 ++src;
1075 } else {
1076 ++src;
1077 --newlen;
1078 }
1079 }
1080 } else {
1081 wchar_t *dst, *end, *src;
1082 wchar_t c;
1083
1084 src = str->data.wstr;
1085 dst = src;
1086 end = src + str->len;
1087
1088 while (src < end) {
1089 c = *src;
1090 if (iswalnum(c) || iswblank(c)) {
1091 *dst = c;
1092 ++dst;
1093 ++src;
1094 } else {
1095 ++src;
1096 --newlen;
1097 }
1098 }
1099 }
1100 bws_setlen(str, newlen);
1101
1102 return (str);
1103 }
1104
1105 /*
1106 * Converts string to lower case(-f).
1107 */
1108 struct bwstring *
ignore_case(struct bwstring * str)1109 ignore_case(struct bwstring *str)
1110 {
1111
1112 if (MB_CUR_MAX == 1) {
1113 unsigned char *end, *s;
1114
1115 s = str->data.cstr;
1116 end = s + str->len;
1117
1118 while (s < end) {
1119 *s = toupper(*s);
1120 ++s;
1121 }
1122 } else {
1123 wchar_t *end, *s;
1124
1125 s = str->data.wstr;
1126 end = s + str->len;
1127
1128 while (s < end) {
1129 *s = towupper(*s);
1130 ++s;
1131 }
1132 }
1133 return (str);
1134 }
1135
1136 void
bws_disorder_warnx(struct bwstring * s,const char * fn,size_t pos)1137 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
1138 {
1139
1140 if (MB_CUR_MAX == 1)
1141 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr);
1142 else
1143 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr);
1144 }
1145