1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (C) 2009 Gabor Kovesdan <[email protected]>
5 * Copyright (C) 2012 Oleg Moskalenko <[email protected]>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <ctype.h>
34 #include <errno.h>
35 #include <err.h>
36 #include <langinfo.h>
37 #include <math.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <wctype.h>
42
43 #include "bwstring.h"
44 #include "sort.h"
45
46 bool byte_sort;
47
48 static wchar_t **wmonths;
49 static char **cmonths;
50
51 /* initialise months */
52
53 void
initialise_months(void)54 initialise_months(void)
55 {
56 const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
57 ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
58 ABMON_11, ABMON_12 };
59 char *tmp;
60 size_t len;
61
62 if (mb_cur_max == 1) {
63 if (cmonths == NULL) {
64 char *m;
65
66 cmonths = sort_malloc(sizeof(char*) * 12);
67 for (int i = 0; i < 12; i++) {
68 cmonths[i] = NULL;
69 tmp = nl_langinfo(item[i]);
70 if (debug_sort)
71 printf("month[%d]=%s\n", i, tmp);
72 if (*tmp == '\0')
73 continue;
74 m = sort_strdup(tmp);
75 len = strlen(tmp);
76 for (unsigned int j = 0; j < len; j++)
77 m[j] = toupper(m[j]);
78 cmonths[i] = m;
79 }
80 }
81
82 } else {
83 if (wmonths == NULL) {
84 wchar_t *m;
85
86 wmonths = sort_malloc(sizeof(wchar_t *) * 12);
87 for (int i = 0; i < 12; i++) {
88 wmonths[i] = NULL;
89 tmp = nl_langinfo(item[i]);
90 if (debug_sort)
91 printf("month[%d]=%s\n", i, tmp);
92 if (*tmp == '\0')
93 continue;
94 len = strlen(tmp);
95 m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
96 if (mbstowcs(m, tmp, len) ==
97 ((size_t) - 1)) {
98 sort_free(m);
99 continue;
100 }
101 m[len] = L'\0';
102 for (unsigned int j = 0; j < len; j++)
103 m[j] = towupper(m[j]);
104 wmonths[i] = m;
105 }
106 }
107 }
108 }
109
110 /*
111 * Compare two wide-character strings
112 */
113 static int
wide_str_coll(const wchar_t * s1,const wchar_t * s2)114 wide_str_coll(const wchar_t *s1, const wchar_t *s2)
115 {
116 int ret;
117
118 errno = 0;
119 ret = wcscoll(s1, s2);
120 if (errno == EILSEQ) {
121 errno = 0;
122 ret = wcscmp(s1, s2);
123 if (errno != 0) {
124 for (size_t i = 0; ; ++i) {
125 wchar_t c1 = s1[i];
126 wchar_t c2 = s2[i];
127 if (c1 == L'\0')
128 return ((c2 == L'\0') ? 0 : -1);
129 if (c2 == L'\0')
130 return (+1);
131 if (c1 == c2)
132 continue;
133 return ((int)(c1 - c2));
134 }
135 }
136 }
137 return (ret);
138 }
139
140 /* counterparts of wcs functions */
141
142 void
bwsprintf(FILE * f,struct bwstring * bws,const char * prefix,const char * suffix)143 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
144 {
145
146 if (mb_cur_max == 1)
147 fprintf(f, "%s%s%s", prefix, bws->cdata.str, suffix);
148 else
149 fprintf(f, "%s%S%s", prefix, bws->wdata.str, suffix);
150 }
151
bwsrawdata(const struct bwstring * bws)152 const void* bwsrawdata(const struct bwstring *bws)
153 {
154
155 return (bws->wdata.str);
156 }
157
bwsrawlen(const struct bwstring * bws)158 size_t bwsrawlen(const struct bwstring *bws)
159 {
160
161 return ((mb_cur_max == 1) ? bws->cdata.len :
162 SIZEOF_WCHAR_STRING(bws->wdata.len));
163 }
164
165 size_t
bws_memsize(const struct bwstring * bws)166 bws_memsize(const struct bwstring *bws)
167 {
168
169 return ((mb_cur_max == 1) ?
170 (bws->cdata.len + 2 + sizeof(struct bwstring)) :
171 (SIZEOF_WCHAR_STRING(bws->wdata.len + 1) + sizeof(struct bwstring)));
172 }
173
174 void
bws_setlen(struct bwstring * bws,size_t newlen)175 bws_setlen(struct bwstring *bws, size_t newlen)
176 {
177
178 if (mb_cur_max == 1 && bws && newlen != bws->cdata.len &&
179 newlen <= bws->cdata.len) {
180 bws->cdata.len = newlen;
181 bws->cdata.str[newlen] = '\0';
182 } else if (bws && newlen != bws->wdata.len && newlen <= bws->wdata.len) {
183 bws->wdata.len = newlen;
184 bws->wdata.str[newlen] = L'\0';
185 }
186 }
187
188 /*
189 * Allocate a new binary string of specified size
190 */
191 struct bwstring *
bwsalloc(size_t sz)192 bwsalloc(size_t sz)
193 {
194 struct bwstring *ret;
195
196 if (mb_cur_max == 1) {
197 ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
198 ret->cdata.len = sz;
199 ret->cdata.str[sz] = '\0';
200 } else {
201 ret = sort_malloc(
202 sizeof(struct bwstring) + SIZEOF_WCHAR_STRING(sz + 1));
203 ret->wdata.len = sz;
204 ret->wdata.str[sz] = L'\0';
205 }
206
207 return (ret);
208 }
209
210 /*
211 * Create a copy of binary string.
212 * New string size equals the length of the old string.
213 */
214 struct bwstring *
bwsdup(const struct bwstring * s)215 bwsdup(const struct bwstring *s)
216 {
217
218 if (s == NULL)
219 return (NULL);
220 else {
221 struct bwstring *ret = bwsalloc(BWSLEN(s));
222
223 if (mb_cur_max == 1)
224 memcpy(ret->cdata.str, s->cdata.str, (s->cdata.len));
225 else
226 memcpy(ret->wdata.str, s->wdata.str,
227 SIZEOF_WCHAR_STRING(s->wdata.len));
228
229 return (ret);
230 }
231 }
232
233 /*
234 * Create a new binary string from a wide character buffer.
235 */
236 struct bwstring *
bwssbdup(const wchar_t * str,size_t len)237 bwssbdup(const wchar_t *str, size_t len)
238 {
239
240 if (str == NULL)
241 return ((len == 0) ? bwsalloc(0) : NULL);
242 else {
243 struct bwstring *ret;
244
245 ret = bwsalloc(len);
246
247 if (mb_cur_max == 1)
248 for (size_t i = 0; i < len; ++i)
249 ret->cdata.str[i] = (char)str[i];
250 else
251 memcpy(ret->wdata.str, str, SIZEOF_WCHAR_STRING(len));
252
253 return (ret);
254 }
255 }
256
257 /*
258 * Create a new binary string from a raw binary buffer.
259 */
260 struct bwstring *
bwscsbdup(const unsigned char * str,size_t len)261 bwscsbdup(const unsigned char *str, size_t len)
262 {
263 struct bwstring *ret;
264
265 ret = bwsalloc(len);
266
267 if (str) {
268 if (mb_cur_max == 1)
269 memcpy(ret->cdata.str, str, len);
270 else {
271 mbstate_t mbs;
272 const char *s;
273 size_t charlen, chars, cptr;
274
275 chars = 0;
276 cptr = 0;
277 s = (const char *) str;
278
279 memset(&mbs, 0, sizeof(mbs));
280
281 while (cptr < len) {
282 size_t n = mb_cur_max;
283
284 if (n > len - cptr)
285 n = len - cptr;
286 charlen = mbrlen(s + cptr, n, &mbs);
287 switch (charlen) {
288 case 0:
289 /* FALLTHROUGH */
290 case (size_t) -1:
291 /* FALLTHROUGH */
292 case (size_t) -2:
293 ret->wdata.str[chars++] =
294 (unsigned char) s[cptr];
295 ++cptr;
296 break;
297 default:
298 n = mbrtowc(ret->wdata.str + (chars++),
299 s + cptr, charlen, &mbs);
300 if ((n == (size_t)-1) || (n == (size_t)-2))
301 /* NOTREACHED */
302 err(2, "mbrtowc error");
303 cptr += charlen;
304 }
305 }
306
307 ret->wdata.len = chars;
308 ret->wdata.str[ret->wdata.len] = L'\0';
309 }
310 }
311 return (ret);
312 }
313
314 /*
315 * De-allocate object memory
316 */
317 void
bwsfree(const struct bwstring * s)318 bwsfree(const struct bwstring *s)
319 {
320
321 if (s)
322 sort_free(s);
323 }
324
325 /*
326 * Copy content of src binary string to dst.
327 * If the capacity of the dst string is not sufficient,
328 * then the data is truncated.
329 */
330 size_t
bwscpy(struct bwstring * dst,const struct bwstring * src)331 bwscpy(struct bwstring *dst, const struct bwstring *src)
332 {
333 size_t nums = BWSLEN(src);
334
335 if (nums > BWSLEN(dst))
336 nums = BWSLEN(dst);
337
338 if (mb_cur_max == 1) {
339 memcpy(dst->cdata.str, src->cdata.str, nums);
340 dst->cdata.len = nums;
341 dst->cdata.str[dst->cdata.len] = '\0';
342 } else {
343 memcpy(dst->wdata.str, src->wdata.str,
344 SIZEOF_WCHAR_STRING(nums));
345 dst->wdata.len = nums;
346 dst->wdata.str[nums] = L'\0';
347 }
348
349 return (nums);
350 }
351
352 /*
353 * Copy content of src binary string to dst,
354 * with specified number of symbols to be copied.
355 * If the capacity of the dst string is not sufficient,
356 * then the data is truncated.
357 */
358 struct bwstring *
bwsncpy(struct bwstring * dst,const struct bwstring * src,size_t size)359 bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
360 {
361 size_t nums = BWSLEN(src);
362
363 if (nums > BWSLEN(dst))
364 nums = BWSLEN(dst);
365 if (nums > size)
366 nums = size;
367
368 if (mb_cur_max == 1) {
369 memcpy(dst->cdata.str, src->cdata.str, nums);
370 dst->cdata.len = nums;
371 dst->cdata.str[nums] = '\0';
372 } else {
373 memcpy(dst->wdata.str, src->wdata.str,
374 SIZEOF_WCHAR_STRING(nums));
375 dst->wdata.len = nums;
376 dst->wdata.str[nums] = L'\0';
377 }
378
379 return (dst);
380 }
381
382 /*
383 * Copy content of src binary string to dst,
384 * with specified number of symbols to be copied.
385 * An offset value can be specified, from the start of src string.
386 * If the capacity of the dst string is not sufficient,
387 * then the data is truncated.
388 */
389 struct bwstring *
bwsnocpy(struct bwstring * dst,const struct bwstring * src,size_t offset,size_t size)390 bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
391 size_t size)
392 {
393
394 if (offset >= BWSLEN(src)) {
395 bws_setlen(dst, 0);
396 } else {
397 size_t nums = BWSLEN(src) - offset;
398
399 if (nums > BWSLEN(dst))
400 nums = BWSLEN(dst);
401 if (nums > size)
402 nums = size;
403 if (mb_cur_max == 1) {
404 memcpy(dst->cdata.str, src->cdata.str + offset, nums);
405 dst->cdata.len = nums;
406 dst->cdata.str[nums] = '\0';
407 } else {
408 memcpy(dst->wdata.str, src->wdata.str + offset,
409 SIZEOF_WCHAR_STRING(nums));
410 dst->wdata.len = nums;
411 dst->wdata.str[nums] = L'\0';
412 }
413 }
414 return (dst);
415 }
416
417 /*
418 * Write binary string to the file.
419 * The output is ended either with '\n' (nl == true)
420 * or '\0' (nl == false).
421 */
422 size_t
bwsfwrite(struct bwstring * bws,FILE * f,bool zero_ended)423 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
424 {
425
426 if (mb_cur_max == 1) {
427 size_t len = bws->cdata.len;
428
429 if (!zero_ended) {
430 bws->cdata.str[len] = '\n';
431
432 if (fwrite(bws->cdata.str, len + 1, 1, f) < 1)
433 err(2, NULL);
434
435 bws->cdata.str[len] = '\0';
436 } else if (fwrite(bws->cdata.str, len + 1, 1, f) < 1)
437 err(2, NULL);
438
439 return (len + 1);
440
441 } else {
442 wchar_t eols;
443 size_t printed = 0;
444
445 eols = zero_ended ? btowc('\0') : btowc('\n');
446
447 while (printed < BWSLEN(bws)) {
448 const wchar_t *s = bws->wdata.str + printed;
449
450 if (*s == L'\0') {
451 int nums;
452
453 nums = fwprintf(f, L"%lc", *s);
454
455 if (nums != 1)
456 err(2, NULL);
457 ++printed;
458 } else {
459 int nums;
460
461 nums = fwprintf(f, L"%ls", s);
462
463 if (nums < 1)
464 err(2, NULL);
465 printed += nums;
466 }
467 }
468 fwprintf(f, L"%lc", eols);
469 return (printed + 1);
470 }
471 }
472
473 /*
474 * Allocate and read a binary string from file.
475 * The strings are nl-ended or zero-ended, depending on the sort setting.
476 */
477 struct bwstring *
bwsfgetln(FILE * f,size_t * len,bool zero_ended,struct reader_buffer * rb)478 bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
479 {
480 wint_t eols;
481
482 eols = zero_ended ? btowc('\0') : btowc('\n');
483
484 if (!zero_ended && (mb_cur_max > 1)) {
485 wchar_t *ret;
486
487 ret = fgetwln(f, len);
488
489 if (ret == NULL) {
490 if (!feof(f))
491 err(2, NULL);
492 return (NULL);
493 }
494 if (*len > 0) {
495 if (ret[*len - 1] == (wchar_t)eols)
496 --(*len);
497 }
498 return (bwssbdup(ret, *len));
499
500 } else if (!zero_ended && (mb_cur_max == 1)) {
501 char *ret;
502
503 ret = fgetln(f, len);
504
505 if (ret == NULL) {
506 if (!feof(f))
507 err(2, NULL);
508 return (NULL);
509 }
510 if (*len > 0) {
511 if (ret[*len - 1] == '\n')
512 --(*len);
513 }
514 return (bwscsbdup((unsigned char *)ret, *len));
515
516 } else {
517 *len = 0;
518
519 if (feof(f))
520 return (NULL);
521
522 if (2 >= rb->fgetwln_z_buffer_size) {
523 rb->fgetwln_z_buffer_size += 256;
524 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
525 sizeof(wchar_t) * rb->fgetwln_z_buffer_size);
526 }
527 rb->fgetwln_z_buffer[*len] = 0;
528
529 if (mb_cur_max == 1)
530 while (!feof(f)) {
531 int c;
532
533 c = fgetc(f);
534
535 if (c == EOF) {
536 if (*len == 0)
537 return (NULL);
538 goto line_read_done;
539 }
540 if (c == eols)
541 goto line_read_done;
542
543 if (*len + 1 >= rb->fgetwln_z_buffer_size) {
544 rb->fgetwln_z_buffer_size += 256;
545 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
546 SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
547 }
548
549 rb->fgetwln_z_buffer[*len] = c;
550 rb->fgetwln_z_buffer[++(*len)] = 0;
551 }
552 else
553 while (!feof(f)) {
554 wint_t c;
555
556 c = fgetwc(f);
557
558 if (c == WEOF) {
559 if (*len == 0)
560 return (NULL);
561 goto line_read_done;
562 }
563 if (c == eols)
564 goto line_read_done;
565
566 if (*len + 1 >= rb->fgetwln_z_buffer_size) {
567 rb->fgetwln_z_buffer_size += 256;
568 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
569 SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
570 }
571
572 rb->fgetwln_z_buffer[*len] = c;
573 rb->fgetwln_z_buffer[++(*len)] = 0;
574 }
575
576 line_read_done:
577 /* we do not count the last 0 */
578 return (bwssbdup(rb->fgetwln_z_buffer, *len));
579 }
580 }
581
582 int
bwsncmp(const struct bwstring * bws1,const struct bwstring * bws2,size_t offset,size_t len)583 bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
584 size_t offset, size_t len)
585 {
586 size_t cmp_len, len1, len2;
587 int res;
588
589 len1 = BWSLEN(bws1);
590 len2 = BWSLEN(bws2);
591
592 if (len1 <= offset) {
593 return ((len2 <= offset) ? 0 : -1);
594 } else {
595 if (len2 <= offset)
596 return (+1);
597 else {
598 len1 -= offset;
599 len2 -= offset;
600
601 cmp_len = len1;
602
603 if (len2 < cmp_len)
604 cmp_len = len2;
605
606 if (len < cmp_len)
607 cmp_len = len;
608
609 if (mb_cur_max == 1) {
610 const char *s1, *s2;
611
612 s1 = bws1->cdata.str + offset;
613 s2 = bws2->cdata.str + offset;
614
615 res = memcmp(s1, s2, cmp_len);
616
617 } else {
618 const wchar_t *s1, *s2;
619
620 s1 = bws1->wdata.str + offset;
621 s2 = bws2->wdata.str + offset;
622
623 res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
624 }
625 }
626 }
627
628 if (res == 0) {
629 if (len1 < cmp_len && len1 < len2)
630 res = -1;
631 else if (len2 < cmp_len && len2 < len1)
632 res = +1;
633 }
634
635 return (res);
636 }
637
638 int
bwscmp(const struct bwstring * bws1,const struct bwstring * bws2,size_t offset)639 bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
640 {
641 size_t len1, len2, cmp_len;
642 int res;
643
644 len1 = BWSLEN(bws1);
645 len2 = BWSLEN(bws2);
646
647 len1 -= offset;
648 len2 -= offset;
649
650 cmp_len = len1;
651
652 if (len2 < cmp_len)
653 cmp_len = len2;
654
655 res = bwsncmp(bws1, bws2, offset, cmp_len);
656
657 if (res == 0) {
658 if( len1 < len2)
659 res = -1;
660 else if (len2 < len1)
661 res = +1;
662 }
663
664 return (res);
665 }
666
667 int
bws_iterator_cmp(bwstring_iterator iter1,bwstring_iterator iter2,size_t len)668 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
669 {
670 wchar_t c1, c2;
671 size_t i;
672
673 for (i = 0; i < len; ++i) {
674 c1 = bws_get_iter_value(iter1);
675 c2 = bws_get_iter_value(iter2);
676 if (c1 != c2)
677 return (c1 - c2);
678 iter1 = bws_iterator_inc(iter1, 1);
679 iter2 = bws_iterator_inc(iter2, 1);
680 }
681
682 return (0);
683 }
684
685 int
bwscoll(const struct bwstring * bws1,const struct bwstring * bws2,size_t offset)686 bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
687 {
688 size_t len1, len2;
689
690 len1 = BWSLEN(bws1);
691 len2 = BWSLEN(bws2);
692
693 if (len1 <= offset)
694 return ((len2 <= offset) ? 0 : -1);
695 else {
696 if (len2 <= offset)
697 return (+1);
698 else {
699 len1 -= offset;
700 len2 -= offset;
701
702 if (mb_cur_max == 1) {
703 const char *s1, *s2;
704
705 s1 = bws1->cdata.str + offset;
706 s2 = bws2->cdata.str + offset;
707
708 if (byte_sort) {
709 int res;
710
711 if (len1 > len2) {
712 res = memcmp(s1, s2, len2);
713 if (!res)
714 res = +1;
715 } else if (len1 < len2) {
716 res = memcmp(s1, s2, len1);
717 if (!res)
718 res = -1;
719 } else
720 res = memcmp(s1, s2, len1);
721
722 return (res);
723
724 } else {
725 int res;
726 size_t i, maxlen;
727
728 i = 0;
729 maxlen = len1;
730
731 if (maxlen > len2)
732 maxlen = len2;
733
734 while (i < maxlen) {
735 /* goto next non-zero part: */
736 while ((i < maxlen) &&
737 !s1[i] && !s2[i])
738 ++i;
739
740 if (i >= maxlen)
741 break;
742
743 if (s1[i] == 0) {
744 if (s2[i] == 0)
745 /* NOTREACHED */
746 err(2, "bwscoll error 01");
747 else
748 return (-1);
749 } else if (s2[i] == 0)
750 return (+1);
751
752 res = strcoll((const char*)(s1 + i), (const char*)(s2 + i));
753 if (res)
754 return (res);
755
756 while ((i < maxlen) &&
757 s1[i] && s2[i])
758 ++i;
759
760 if (i >= maxlen)
761 break;
762
763 if (s1[i] == 0) {
764 if (s2[i] == 0) {
765 ++i;
766 continue;
767 } else
768 return (-1);
769 } else if (s2[i] == 0)
770 return (+1);
771 else
772 /* NOTREACHED */
773 err(2, "bwscoll error 02");
774 }
775
776 if (len1 < len2)
777 return (-1);
778 else if (len1 > len2)
779 return (+1);
780
781 return (0);
782 }
783 } else {
784 const wchar_t *s1, *s2;
785 size_t i, maxlen;
786 int res;
787
788 s1 = bws1->wdata.str + offset;
789 s2 = bws2->wdata.str + offset;
790
791 i = 0;
792 maxlen = len1;
793
794 if (maxlen > len2)
795 maxlen = len2;
796
797 while (i < maxlen) {
798
799 /* goto next non-zero part: */
800 while ((i < maxlen) &&
801 !s1[i] && !s2[i])
802 ++i;
803
804 if (i >= maxlen)
805 break;
806
807 if (s1[i] == 0) {
808 if (s2[i] == 0)
809 /* NOTREACHED */
810 err(2, "bwscoll error 1");
811 else
812 return (-1);
813 } else if (s2[i] == 0)
814 return (+1);
815
816 res = wide_str_coll(s1 + i, s2 + i);
817 if (res)
818 return (res);
819
820 while ((i < maxlen) && s1[i] && s2[i])
821 ++i;
822
823 if (i >= maxlen)
824 break;
825
826 if (s1[i] == 0) {
827 if (s2[i] == 0) {
828 ++i;
829 continue;
830 } else
831 return (-1);
832 } else if (s2[i] == 0)
833 return (+1);
834 else
835 /* NOTREACHED */
836 err(2, "bwscoll error 2");
837 }
838
839 if (len1 < len2)
840 return (-1);
841 else if (len1 > len2)
842 return (+1);
843
844 return (0);
845 }
846 }
847 }
848 }
849
850 /*
851 * Correction of the system API
852 */
853 double
bwstod(struct bwstring * s0,bool * empty)854 bwstod(struct bwstring *s0, bool *empty)
855 {
856 double ret;
857
858 if (mb_cur_max == 1) {
859 char *end, *s;
860 char *ep;
861
862 s = s0->cdata.str;
863 end = s + s0->cdata.len;
864 ep = NULL;
865
866 while (isblank(*s) && s < end)
867 ++s;
868
869 if (!isprint(*s)) {
870 *empty = true;
871 return (0);
872 }
873
874 ret = strtod((char*)s, &ep);
875 if (ep == s) {
876 *empty = true;
877 return (0);
878 }
879 } else {
880 wchar_t *end, *ep, *s;
881
882 s = s0->wdata.str;
883 end = s + s0->wdata.len;
884 ep = NULL;
885
886 while (iswblank(*s) && s < end)
887 ++s;
888
889 if (!iswprint(*s)) {
890 *empty = true;
891 return (0);
892 }
893
894 ret = wcstod(s, &ep);
895 if (ep == s) {
896 *empty = true;
897 return (0);
898 }
899 }
900
901 *empty = false;
902 return (ret);
903 }
904
905 /*
906 * A helper function for monthcoll. If a line matches
907 * a month name, it returns (number of the month - 1),
908 * while if there is no match, it just return -1.
909 */
910
911 int
bws_month_score(const struct bwstring * s0)912 bws_month_score(const struct bwstring *s0)
913 {
914
915 if (mb_cur_max == 1) {
916 const char *end, *s;
917
918 s = s0->cdata.str;
919 end = s + s0->cdata.len;
920
921 while (isblank(*s) && s < end)
922 ++s;
923
924 for (int i = 11; i >= 0; --i) {
925 if (cmonths[i] &&
926 (s == strstr(s, cmonths[i])))
927 return (i);
928 }
929
930 } else {
931 const wchar_t *end, *s;
932
933 s = s0->wdata.str;
934 end = s + s0->wdata.len;
935
936 while (iswblank(*s) && s < end)
937 ++s;
938
939 for (int i = 11; i >= 0; --i) {
940 if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
941 return (i);
942 }
943 }
944
945 return (-1);
946 }
947
948 /*
949 * Rips out leading blanks (-b).
950 */
951 struct bwstring *
ignore_leading_blanks(struct bwstring * str)952 ignore_leading_blanks(struct bwstring *str)
953 {
954
955 if (mb_cur_max == 1) {
956 char *dst, *end, *src;
957
958 src = str->cdata.str;
959 dst = src;
960 end = src + str->cdata.len;
961
962 while (src < end && isblank(*src))
963 ++src;
964
965 if (src != dst) {
966 size_t newlen;
967
968 newlen = BWSLEN(str) - (src - dst);
969
970 while (src < end) {
971 *dst = *src;
972 ++dst;
973 ++src;
974 }
975 bws_setlen(str, newlen);
976 }
977 } else {
978 wchar_t *dst, *end, *src;
979
980 src = str->wdata.str;
981 dst = src;
982 end = src + str->wdata.len;
983
984 while (src < end && iswblank(*src))
985 ++src;
986
987 if (src != dst) {
988
989 size_t newlen = BWSLEN(str) - (src - dst);
990
991 while (src < end) {
992 *dst = *src;
993 ++dst;
994 ++src;
995 }
996 bws_setlen(str, newlen);
997
998 }
999 }
1000 return (str);
1001 }
1002
1003 /*
1004 * Rips out nonprinting characters (-i).
1005 */
1006 struct bwstring *
ignore_nonprinting(struct bwstring * str)1007 ignore_nonprinting(struct bwstring *str)
1008 {
1009 size_t newlen = BWSLEN(str);
1010
1011 if (mb_cur_max == 1) {
1012 char *dst, *end, *src;
1013 char c;
1014
1015 src = str->cdata.str;
1016 dst = src;
1017 end = src + str->cdata.len;
1018
1019 while (src < end) {
1020 c = *src;
1021 if (isprint(c)) {
1022 *dst = c;
1023 ++dst;
1024 ++src;
1025 } else {
1026 ++src;
1027 --newlen;
1028 }
1029 }
1030 } else {
1031 wchar_t *dst, *end, *src;
1032 wchar_t c;
1033
1034 src = str->wdata.str;
1035 dst = src;
1036 end = src + str->wdata.len;
1037
1038 while (src < end) {
1039 c = *src;
1040 if (iswprint(c)) {
1041 *dst = c;
1042 ++dst;
1043 ++src;
1044 } else {
1045 ++src;
1046 --newlen;
1047 }
1048 }
1049 }
1050 bws_setlen(str, newlen);
1051
1052 return (str);
1053 }
1054
1055 /*
1056 * Rips out any characters that are not alphanumeric characters
1057 * nor blanks (-d).
1058 */
1059 struct bwstring *
dictionary_order(struct bwstring * str)1060 dictionary_order(struct bwstring *str)
1061 {
1062 size_t newlen = BWSLEN(str);
1063
1064 if (mb_cur_max == 1) {
1065 char *dst, *end, *src;
1066 char c;
1067
1068 src = str->cdata.str;
1069 dst = src;
1070 end = src + str->cdata.len;
1071
1072 while (src < end) {
1073 c = *src;
1074 if (isalnum(c) || isblank(c)) {
1075 *dst = c;
1076 ++dst;
1077 ++src;
1078 } else {
1079 ++src;
1080 --newlen;
1081 }
1082 }
1083 } else {
1084 wchar_t *dst, *end, *src;
1085 wchar_t c;
1086
1087 src = str->wdata.str;
1088 dst = src;
1089 end = src + str->wdata.len;
1090
1091 while (src < end) {
1092 c = *src;
1093 if (iswalnum(c) || iswblank(c)) {
1094 *dst = c;
1095 ++dst;
1096 ++src;
1097 } else {
1098 ++src;
1099 --newlen;
1100 }
1101 }
1102 }
1103 bws_setlen(str, newlen);
1104
1105 return (str);
1106 }
1107
1108 /*
1109 * Converts string to lower case(-f).
1110 */
1111 struct bwstring *
ignore_case(struct bwstring * str)1112 ignore_case(struct bwstring *str)
1113 {
1114
1115 if (mb_cur_max == 1) {
1116 char *end, *s;
1117
1118 s = str->cdata.str;
1119 end = s + str->cdata.len;
1120
1121 while (s < end) {
1122 *s = toupper(*s);
1123 ++s;
1124 }
1125 } else {
1126 wchar_t *end, *s;
1127
1128 s = str->wdata.str;
1129 end = s + str->wdata.len;
1130
1131 while (s < end) {
1132 *s = towupper(*s);
1133 ++s;
1134 }
1135 }
1136 return (str);
1137 }
1138
1139 void
bws_disorder_warnx(struct bwstring * s,const char * fn,size_t pos)1140 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
1141 {
1142
1143 if (mb_cur_max == 1)
1144 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->cdata.str);
1145 else
1146 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->wdata.str);
1147 }
1148