1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2009 Gabor Kovesdan <[email protected]>
5 * Copyright (C) 2012 Oleg Moskalenko <[email protected]>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 #include <sys/types.h>
32
33 #include <errno.h>
34 #include <err.h>
35 #include <langinfo.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <md5.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <wchar.h>
42 #include <wctype.h>
43
44 #include "coll.h"
45 #include "vsort.h"
46
47 struct key_specs *keys;
48 size_t keys_num = 0;
49
50 wint_t symbol_decimal_point = L'.';
51 /* there is no default thousands separator in collate rules: */
52 wint_t symbol_thousands_sep = 0;
53 wint_t symbol_negative_sign = L'-';
54 wint_t symbol_positive_sign = L'+';
55
56 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
57 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
58 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
59 static int numcoll(struct key_value*, struct key_value *, size_t offset);
60 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
61 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
62 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
63
64 /*
65 * Allocate keys array
66 */
67 struct keys_array *
keys_array_alloc(void)68 keys_array_alloc(void)
69 {
70 struct keys_array *ka;
71 size_t sz;
72
73 sz = keys_array_size();
74 ka = sort_calloc(1, sz);
75
76 return (ka);
77 }
78
79 /*
80 * Calculate whether we need key hint space
81 */
82 static size_t
key_hint_size(void)83 key_hint_size(void)
84 {
85
86 return (need_hint ? sizeof(struct key_hint) : 0);
87 }
88
89 /*
90 * Calculate keys array size
91 */
92 size_t
keys_array_size(void)93 keys_array_size(void)
94 {
95
96 return (keys_num * (sizeof(struct key_value) + key_hint_size()));
97 }
98
99 /*
100 * Clean data of keys array
101 */
102 void
clean_keys_array(const struct bwstring * s,struct keys_array * ka)103 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
104 {
105
106 if (ka) {
107 for (size_t i = 0; i < keys_num; ++i) {
108 const struct key_value *kv;
109
110 kv = get_key_from_keys_array(ka, i);
111 if (kv->k && kv->k != s)
112 bwsfree(kv->k);
113 }
114 memset(ka, 0, keys_array_size());
115 }
116 }
117
118 /*
119 * Get pointer to a key value in the keys set
120 */
121 struct key_value *
get_key_from_keys_array(struct keys_array * ka,size_t ind)122 get_key_from_keys_array(struct keys_array *ka, size_t ind)
123 {
124
125 return ((struct key_value *)((caddr_t)ka->key +
126 ind * (sizeof(struct key_value) + key_hint_size())));
127 }
128
129 /*
130 * Set value of a key in the keys set
131 */
132 void
set_key_on_keys_array(struct keys_array * ka,struct bwstring * s,size_t ind)133 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
134 {
135
136 if (ka && keys_num > ind) {
137 struct key_value *kv;
138
139 kv = get_key_from_keys_array(ka, ind);
140
141 if (kv->k && kv->k != s)
142 bwsfree(kv->k);
143 kv->k = s;
144 }
145 }
146
147 /*
148 * Initialize a sort list item
149 */
150 struct sort_list_item *
sort_list_item_alloc(void)151 sort_list_item_alloc(void)
152 {
153 struct sort_list_item *si;
154 size_t sz;
155
156 sz = sizeof(struct sort_list_item) + keys_array_size();
157 si = sort_calloc(1, sz);
158
159 return (si);
160 }
161
162 size_t
sort_list_item_size(struct sort_list_item * si)163 sort_list_item_size(struct sort_list_item *si)
164 {
165 size_t ret = 0;
166
167 if (si) {
168 ret = sizeof(struct sort_list_item) + keys_array_size();
169 if (si->str)
170 ret += bws_memsize(si->str);
171 for (size_t i = 0; i < keys_num; ++i) {
172 const struct key_value *kv;
173
174 kv = get_key_from_keys_array(&si->ka, i);
175
176 if (kv->k != si->str)
177 ret += bws_memsize(kv->k);
178 }
179 }
180 return (ret);
181 }
182
183 /*
184 * Calculate key for a sort list item
185 */
186 static void
sort_list_item_make_key(struct sort_list_item * si)187 sort_list_item_make_key(struct sort_list_item *si)
188 {
189
190 preproc(si->str, &(si->ka));
191 }
192
193 /*
194 * Set value of a sort list item.
195 * Return combined string and keys memory size.
196 */
197 void
sort_list_item_set(struct sort_list_item * si,struct bwstring * str)198 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
199 {
200
201 if (si) {
202 clean_keys_array(si->str, &(si->ka));
203 if (si->str) {
204 if (si->str == str) {
205 /* we are trying to reset the same string */
206 return;
207 } else {
208 bwsfree(si->str);
209 si->str = NULL;
210 }
211 }
212 si->str = str;
213 sort_list_item_make_key(si);
214 }
215 }
216
217 /*
218 * De-allocate a sort list item object memory
219 */
220 void
sort_list_item_clean(struct sort_list_item * si)221 sort_list_item_clean(struct sort_list_item *si)
222 {
223
224 if (si) {
225 clean_keys_array(si->str, &(si->ka));
226 if (si->str) {
227 bwsfree(si->str);
228 si->str = NULL;
229 }
230 }
231 }
232
233 /*
234 * Skip columns according to specs
235 */
236 static size_t
skip_cols_to_start(const struct bwstring * s,size_t cols,size_t start,bool skip_blanks,bool * empty_key)237 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
238 bool skip_blanks, bool *empty_key)
239 {
240 if (cols < 1)
241 return (BWSLEN(s) + 1);
242
243 if (skip_blanks)
244 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
245 ++start;
246
247 while (start < BWSLEN(s) && cols > 1) {
248 --cols;
249 ++start;
250 }
251
252 if (start >= BWSLEN(s))
253 *empty_key = true;
254
255 return (start);
256 }
257
258 /*
259 * Skip fields according to specs
260 */
261 static size_t
skip_fields_to_start(const struct bwstring * s,size_t fields,bool * empty_field)262 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
263 {
264
265 if (fields < 2) {
266 if (BWSLEN(s) == 0)
267 *empty_field = true;
268 return (0);
269 } else if (!(sort_opts_vals.tflag)) {
270 size_t cpos = 0;
271 bool pb = true;
272
273 while (cpos < BWSLEN(s)) {
274 bool isblank;
275
276 isblank = iswblank(BWS_GET(s, cpos));
277
278 if (isblank && !pb) {
279 --fields;
280 if (fields <= 1)
281 return (cpos);
282 }
283 pb = isblank;
284 ++cpos;
285 }
286 if (fields > 1)
287 *empty_field = true;
288 return (cpos);
289 } else {
290 size_t cpos = 0;
291
292 while (cpos < BWSLEN(s)) {
293 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
294 --fields;
295 if (fields <= 1)
296 return (cpos + 1);
297 }
298 ++cpos;
299 }
300 if (fields > 1)
301 *empty_field = true;
302 return (cpos);
303 }
304 }
305
306 /*
307 * Find fields start
308 */
309 static void
find_field_start(const struct bwstring * s,struct key_specs * ks,size_t * field_start,size_t * key_start,bool * empty_field,bool * empty_key)310 find_field_start(const struct bwstring *s, struct key_specs *ks,
311 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
312 {
313
314 *field_start = skip_fields_to_start(s, ks->f1, empty_field);
315 if (!*empty_field)
316 *key_start = skip_cols_to_start(s, ks->c1, *field_start,
317 ks->pos1b, empty_key);
318 else
319 *empty_key = true;
320 }
321
322 /*
323 * Find end key position
324 */
325 static size_t
find_field_end(const struct bwstring * s,struct key_specs * ks)326 find_field_end(const struct bwstring *s, struct key_specs *ks)
327 {
328 size_t f2, next_field_start, pos_end;
329 bool empty_field, empty_key;
330
331 empty_field = false;
332 empty_key = false;
333 f2 = ks->f2;
334
335 if (f2 == 0)
336 return (BWSLEN(s) + 1);
337 else {
338 if (ks->c2 == 0) {
339 next_field_start = skip_fields_to_start(s, f2 + 1,
340 &empty_field);
341 if ((next_field_start > 0) && sort_opts_vals.tflag &&
342 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
343 next_field_start - 1)))
344 --next_field_start;
345 } else
346 next_field_start = skip_fields_to_start(s, f2,
347 &empty_field);
348 }
349
350 if (empty_field || (next_field_start >= BWSLEN(s)))
351 return (BWSLEN(s) + 1);
352
353 if (ks->c2) {
354 pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
355 ks->pos2b, &empty_key);
356 if (pos_end < BWSLEN(s))
357 ++pos_end;
358 } else
359 pos_end = next_field_start;
360
361 return (pos_end);
362 }
363
364 /*
365 * Cut a field according to the key specs
366 */
367 static struct bwstring *
cut_field(const struct bwstring * s,struct key_specs * ks)368 cut_field(const struct bwstring *s, struct key_specs *ks)
369 {
370 struct bwstring *ret = NULL;
371
372 if (s && ks) {
373 size_t field_start, key_end, key_start, sz;
374 bool empty_field, empty_key;
375
376 field_start = 0;
377 key_start = 0;
378 empty_field = false;
379 empty_key = false;
380
381 find_field_start(s, ks, &field_start, &key_start,
382 &empty_field, &empty_key);
383
384 if (empty_key)
385 sz = 0;
386 else {
387 key_end = find_field_end(s, ks);
388 sz = (key_end < key_start) ? 0 : (key_end - key_start);
389 }
390
391 ret = bwsalloc(sz);
392 if (sz)
393 bwsnocpy(ret, s, key_start, sz);
394 } else
395 ret = bwsalloc(0);
396
397 return (ret);
398 }
399
400 /*
401 * Preprocesses a line applying the necessary transformations
402 * specified by command line options and returns the preprocessed
403 * string, which can be used to compare.
404 */
405 int
preproc(struct bwstring * s,struct keys_array * ka)406 preproc(struct bwstring *s, struct keys_array *ka)
407 {
408
409 if (sort_opts_vals.kflag)
410 for (size_t i = 0; i < keys_num; i++) {
411 struct bwstring *key;
412 struct key_specs *kspecs;
413 struct sort_mods *sm;
414
415 kspecs = &(keys[i]);
416 key = cut_field(s, kspecs);
417
418 sm = &(kspecs->sm);
419 if (sm->dflag)
420 key = dictionary_order(key);
421 else if (sm->iflag)
422 key = ignore_nonprinting(key);
423 if (sm->fflag || sm->Mflag)
424 key = ignore_case(key);
425
426 set_key_on_keys_array(ka, key, i);
427 }
428 else {
429 struct bwstring *ret = NULL;
430 struct sort_mods *sm = default_sort_mods;
431
432 if (sm->bflag) {
433 if (ret == NULL)
434 ret = bwsdup(s);
435 ret = ignore_leading_blanks(ret);
436 }
437 if (sm->dflag) {
438 if (ret == NULL)
439 ret = bwsdup(s);
440 ret = dictionary_order(ret);
441 } else if (sm->iflag) {
442 if (ret == NULL)
443 ret = bwsdup(s);
444 ret = ignore_nonprinting(ret);
445 }
446 if (sm->fflag || sm->Mflag) {
447 if (ret == NULL)
448 ret = bwsdup(s);
449 ret = ignore_case(ret);
450 }
451 if (ret == NULL)
452 set_key_on_keys_array(ka, s, 0);
453 else
454 set_key_on_keys_array(ka, ret, 0);
455 }
456
457 return 0;
458 }
459
460 cmpcoll_t
get_sort_func(struct sort_mods * sm)461 get_sort_func(struct sort_mods *sm)
462 {
463
464 if (sm->nflag)
465 return (numcoll);
466 else if (sm->hflag)
467 return (hnumcoll);
468 else if (sm->gflag)
469 return (gnumcoll);
470 else if (sm->Mflag)
471 return (monthcoll);
472 else if (sm->Rflag)
473 return (randomcoll);
474 else if (sm->Vflag)
475 return (versioncoll);
476 else
477 return (wstrcoll);
478 }
479
480 /*
481 * Compares the given strings. Returns a positive number if
482 * the first precedes the second, a negative number if the second is
483 * the preceding one, and zero if they are equal. This function calls
484 * the underlying collate functions, which done the actual comparison.
485 */
486 int
key_coll(struct keys_array * ps1,struct keys_array * ps2,size_t offset)487 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
488 {
489 struct key_value *kv1, *kv2;
490 struct sort_mods *sm;
491 int res = 0;
492
493 for (size_t i = 0; i < keys_num; ++i) {
494 kv1 = get_key_from_keys_array(ps1, i);
495 kv2 = get_key_from_keys_array(ps2, i);
496 sm = &(keys[i].sm);
497
498 if (sm->rflag)
499 res = sm->func(kv2, kv1, offset);
500 else
501 res = sm->func(kv1, kv2, offset);
502
503 if (res)
504 break;
505
506 /* offset applies to only the first key */
507 offset = 0;
508 }
509 return (res);
510 }
511
512 /*
513 * Compare two strings.
514 * Plain symbol-by-symbol comparison.
515 */
516 int
top_level_str_coll(const struct bwstring * s1,const struct bwstring * s2)517 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
518 {
519
520 if (default_sort_mods->rflag) {
521 const struct bwstring *tmp;
522
523 tmp = s1;
524 s1 = s2;
525 s2 = tmp;
526 }
527
528 return (bwscoll(s1, s2, 0));
529 }
530
531 /*
532 * Compare a string and a sort list item, according to the sort specs.
533 */
534 int
str_list_coll(struct bwstring * str1,struct sort_list_item ** ss2)535 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
536 {
537 struct keys_array *ka1;
538 int ret = 0;
539
540 ka1 = keys_array_alloc();
541
542 preproc(str1, ka1);
543
544 sort_list_item_make_key(*ss2);
545
546 if (debug_sort) {
547 bwsprintf(stdout, str1, "; s1=<", ">");
548 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
549 }
550
551 ret = key_coll(ka1, &((*ss2)->ka), 0);
552
553 if (debug_sort)
554 printf("; cmp1=%d", ret);
555
556 clean_keys_array(str1, ka1);
557 sort_free(ka1);
558
559 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
560 ret = top_level_str_coll(str1, ((*ss2)->str));
561 if (debug_sort)
562 printf("; cmp2=%d", ret);
563 }
564
565 if (debug_sort)
566 printf("\n");
567
568 return (ret);
569 }
570
571 /*
572 * Compare two sort list items, according to the sort specs.
573 */
574 int
list_coll_offset(struct sort_list_item ** ss1,struct sort_list_item ** ss2,size_t offset)575 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
576 size_t offset)
577 {
578 int ret;
579
580 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
581
582 if (debug_sort) {
583 if (offset)
584 printf("; offset=%d", (int) offset);
585 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
586 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
587 printf("; cmp1=%d\n", ret);
588 }
589
590 if (ret)
591 return (ret);
592
593 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
594 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
595 if (debug_sort)
596 printf("; cmp2=%d\n", ret);
597 }
598
599 return (ret);
600 }
601
602 /*
603 * Compare two sort list items, according to the sort specs.
604 */
605 int
list_coll(struct sort_list_item ** ss1,struct sort_list_item ** ss2)606 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
607 {
608
609 return (list_coll_offset(ss1, ss2, 0));
610 }
611
612 #define LSCDEF(N) \
613 static int \
614 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
615 { \
616 \
617 return (list_coll_offset(ss1, ss2, N)); \
618 }
619
620 LSCDEF(1)
621 LSCDEF(2)
622 LSCDEF(3)
623 LSCDEF(4)
624 LSCDEF(5)
625 LSCDEF(6)
626 LSCDEF(7)
627 LSCDEF(8)
628 LSCDEF(9)
629 LSCDEF(10)
630 LSCDEF(11)
631 LSCDEF(12)
632 LSCDEF(13)
633 LSCDEF(14)
634 LSCDEF(15)
635 LSCDEF(16)
636 LSCDEF(17)
637 LSCDEF(18)
638 LSCDEF(19)
639 LSCDEF(20)
640
641 listcoll_t
get_list_call_func(size_t offset)642 get_list_call_func(size_t offset)
643 {
644 static const listcoll_t lsarray[] = { list_coll, list_coll_1,
645 list_coll_2, list_coll_3, list_coll_4, list_coll_5,
646 list_coll_6, list_coll_7, list_coll_8, list_coll_9,
647 list_coll_10, list_coll_11, list_coll_12, list_coll_13,
648 list_coll_14, list_coll_15, list_coll_16, list_coll_17,
649 list_coll_18, list_coll_19, list_coll_20 };
650
651 if (offset <= 20)
652 return (lsarray[offset]);
653
654 return (list_coll);
655 }
656
657 /*
658 * Compare two sort list items, only by their original string.
659 */
660 int
list_coll_by_str_only(struct sort_list_item ** ss1,struct sort_list_item ** ss2)661 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
662 {
663
664 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
665 }
666
667 /*
668 * Maximum size of a number in the string (before or after decimal point)
669 */
670 #define MAX_NUM_SIZE (128)
671
672 /*
673 * Set suffix value
674 */
setsuffix(wchar_t c,unsigned char * si)675 static void setsuffix(wchar_t c, unsigned char *si)
676 {
677 switch (c){
678 case L'k':
679 case L'K':
680 *si = 1;
681 break;
682 case L'M':
683 *si = 2;
684 break;
685 case L'G':
686 *si = 3;
687 break;
688 case L'T':
689 *si = 4;
690 break;
691 case L'P':
692 *si = 5;
693 break;
694 case L'E':
695 *si = 6;
696 break;
697 case L'Z':
698 *si = 7;
699 break;
700 case L'Y':
701 *si = 8;
702 break;
703 default:
704 *si = 0;
705 }
706 }
707
708 /*
709 * Read string s and parse the string into a fixed-decimal-point number.
710 * sign equals -1 if the number is negative (explicit plus is not allowed,
711 * according to GNU sort's "info sort".
712 * The number part before decimal point is in the smain, after the decimal
713 * point is in sfrac, tail is the pointer to the remainder of the string.
714 */
715 static int
read_number(struct bwstring * s0,int * sign,wchar_t * smain,size_t * main_len,wchar_t * sfrac,size_t * frac_len,unsigned char * si)716 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
717 {
718 bwstring_iterator s;
719
720 s = bws_begin(s0);
721
722 /* always end the fraction with zero, even if we have no fraction */
723 sfrac[0] = 0;
724
725 while (iswblank(bws_get_iter_value(s)))
726 s = bws_iterator_inc(s, 1);
727
728 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
729 *sign = -1;
730 s = bws_iterator_inc(s, 1);
731 }
732
733 // This is '0', not '\0', do not change this
734 while (iswdigit(bws_get_iter_value(s)) &&
735 (bws_get_iter_value(s) == L'0'))
736 s = bws_iterator_inc(s, 1);
737
738 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
739 if (iswdigit(bws_get_iter_value(s))) {
740 smain[*main_len] = bws_get_iter_value(s);
741 s = bws_iterator_inc(s, 1);
742 *main_len += 1;
743 } else if (symbol_thousands_sep &&
744 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
745 s = bws_iterator_inc(s, 1);
746 else
747 break;
748 }
749
750 smain[*main_len] = 0;
751
752 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
753 s = bws_iterator_inc(s, 1);
754 while (iswdigit(bws_get_iter_value(s)) &&
755 *frac_len < MAX_NUM_SIZE) {
756 sfrac[*frac_len] = bws_get_iter_value(s);
757 s = bws_iterator_inc(s, 1);
758 *frac_len += 1;
759 }
760 sfrac[*frac_len] = 0;
761
762 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
763 --(*frac_len);
764 sfrac[*frac_len] = L'\0';
765 }
766 }
767
768 setsuffix(bws_get_iter_value(s),si);
769
770 if ((*main_len + *frac_len) == 0)
771 *sign = 0;
772
773 return (0);
774 }
775
776 /*
777 * Implements string sort.
778 */
779 static int
wstrcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)780 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
781 {
782
783 if (debug_sort) {
784 if (offset)
785 printf("; offset=%d\n", (int) offset);
786 bwsprintf(stdout, kv1->k, "; k1=<", ">");
787 printf("(%zu)", BWSLEN(kv1->k));
788 bwsprintf(stdout, kv2->k, ", k2=<", ">");
789 printf("(%zu)", BWSLEN(kv2->k));
790 }
791
792 return (bwscoll(kv1->k, kv2->k, offset));
793 }
794
795 /*
796 * Compare two suffixes
797 */
798 static inline int
cmpsuffix(unsigned char si1,unsigned char si2)799 cmpsuffix(unsigned char si1, unsigned char si2)
800 {
801
802 return ((char)si1 - (char)si2);
803 }
804
805 /*
806 * Implements numeric sort for -n and -h.
807 */
808 static int
numcoll_impl(struct key_value * kv1,struct key_value * kv2,size_t offset __unused,bool use_suffix)809 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
810 size_t offset __unused, bool use_suffix)
811 {
812 struct bwstring *s1, *s2;
813 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
814 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
815 int cmp_res, sign1, sign2;
816 size_t frac1, frac2, main1, main2;
817 unsigned char SI1, SI2;
818 bool e1, e2, key1_read, key2_read;
819
820 s1 = kv1->k;
821 s2 = kv2->k;
822 sign1 = sign2 = 0;
823 main1 = main2 = 0;
824 frac1 = frac2 = 0;
825
826 key1_read = key2_read = false;
827
828 if (debug_sort) {
829 bwsprintf(stdout, s1, "; k1=<", ">");
830 bwsprintf(stdout, s2, ", k2=<", ">");
831 }
832
833 if (s1 == s2)
834 return (0);
835
836 if (kv1->hint->status == HS_UNINITIALIZED) {
837 /* read the number from the string */
838 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
839 key1_read = true;
840 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
841 if(main1 < 1 && frac1 < 1)
842 kv1->hint->v.nh.empty=true;
843 kv1->hint->v.nh.si = SI1;
844 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
845 HS_INITIALIZED : HS_ERROR;
846 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
847 }
848
849 if (kv2->hint->status == HS_UNINITIALIZED) {
850 /* read the number from the string */
851 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
852 key2_read = true;
853 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
854 if(main2 < 1 && frac2 < 1)
855 kv2->hint->v.nh.empty=true;
856 kv2->hint->v.nh.si = SI2;
857 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
858 HS_INITIALIZED : HS_ERROR;
859 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
860 }
861
862 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
863 HS_INITIALIZED) {
864 unsigned long long n1, n2;
865 bool neg1, neg2;
866
867 e1 = kv1->hint->v.nh.empty;
868 e2 = kv2->hint->v.nh.empty;
869
870 if (e1 && e2)
871 return (0);
872
873 neg1 = kv1->hint->v.nh.neg;
874 neg2 = kv2->hint->v.nh.neg;
875
876 if (neg1 && !neg2)
877 return (-1);
878 if (neg2 && !neg1)
879 return (+1);
880
881 if (e1)
882 return (neg2 ? +1 : -1);
883 else if (e2)
884 return (neg1 ? -1 : +1);
885
886
887 if (use_suffix) {
888 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
889 if (cmp_res)
890 return (neg1 ? -cmp_res : cmp_res);
891 }
892
893 n1 = kv1->hint->v.nh.n1;
894 n2 = kv2->hint->v.nh.n1;
895 if (n1 < n2)
896 return (neg1 ? +1 : -1);
897 else if (n1 > n2)
898 return (neg1 ? -1 : +1);
899 }
900
901 /* read the numbers from the strings */
902 if (!key1_read)
903 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
904 if (!key2_read)
905 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
906
907 e1 = ((main1 + frac1) == 0);
908 e2 = ((main2 + frac2) == 0);
909
910 if (e1 && e2)
911 return (0);
912
913 /* we know the result if the signs are different */
914 if (sign1 < 0 && sign2 >= 0)
915 return (-1);
916 if (sign1 >= 0 && sign2 < 0)
917 return (+1);
918
919 if (e1)
920 return ((sign2 < 0) ? +1 : -1);
921 else if (e2)
922 return ((sign1 < 0) ? -1 : +1);
923
924 if (use_suffix) {
925 cmp_res = cmpsuffix(SI1, SI2);
926 if (cmp_res)
927 return ((sign1 < 0) ? -cmp_res : cmp_res);
928 }
929
930 /* if both numbers are empty assume that the strings are equal */
931 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
932 return (0);
933
934 /*
935 * if the main part is of different size, we know the result
936 * (because the leading zeros are removed)
937 */
938 if (main1 < main2)
939 cmp_res = -1;
940 else if (main1 > main2)
941 cmp_res = +1;
942 /* if the sizes are equal then simple non-collate string compare gives the correct result */
943 else
944 cmp_res = wcscmp(smain1, smain2);
945
946 /* check fraction */
947 if (!cmp_res)
948 cmp_res = wcscmp(sfrac1, sfrac2);
949
950 if (!cmp_res)
951 return (0);
952
953 /* reverse result if the signs are negative */
954 if (sign1 < 0 && sign2 < 0)
955 cmp_res = -cmp_res;
956
957 return (cmp_res);
958 }
959
960 /*
961 * Implements numeric sort (-n).
962 */
963 static int
numcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)964 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
965 {
966
967 return (numcoll_impl(kv1, kv2, offset, false));
968 }
969
970 /*
971 * Implements 'human' numeric sort (-h).
972 */
973 static int
hnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)974 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
975 {
976
977 return (numcoll_impl(kv1, kv2, offset, true));
978 }
979
980 /* Use hint space to memoize md5 computations, at least. */
981 static void
randomcoll_init_hint(struct key_value * kv,void * hash)982 randomcoll_init_hint(struct key_value *kv, void *hash)
983 {
984
985 memcpy(kv->hint->v.Rh.cached, hash, sizeof(kv->hint->v.Rh.cached));
986 kv->hint->status = HS_INITIALIZED;
987 }
988
989 /*
990 * Implements random sort (-R).
991 */
992 static int
randomcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)993 randomcoll(struct key_value *kv1, struct key_value *kv2,
994 size_t offset __unused)
995 {
996 struct bwstring *s1, *s2;
997 MD5_CTX ctx1, ctx2;
998 unsigned char hash1[MD5_DIGEST_LENGTH], hash2[MD5_DIGEST_LENGTH];
999 int cmp;
1000
1001 s1 = kv1->k;
1002 s2 = kv2->k;
1003
1004 if (debug_sort) {
1005 bwsprintf(stdout, s1, "; k1=<", ">");
1006 bwsprintf(stdout, s2, ", k2=<", ">");
1007 }
1008
1009 if (s1 == s2)
1010 return (0);
1011
1012 if (kv1->hint->status == HS_INITIALIZED &&
1013 kv2->hint->status == HS_INITIALIZED) {
1014 cmp = memcmp(kv1->hint->v.Rh.cached,
1015 kv2->hint->v.Rh.cached, sizeof(kv1->hint->v.Rh.cached));
1016 if (cmp != 0)
1017 return (cmp);
1018 }
1019
1020 memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
1021 memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
1022
1023 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1024 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1025
1026 MD5Final(hash1, &ctx1);
1027 MD5Final(hash2, &ctx2);
1028
1029 if (kv1->hint->status == HS_UNINITIALIZED)
1030 randomcoll_init_hint(kv1, hash1);
1031 if (kv2->hint->status == HS_UNINITIALIZED)
1032 randomcoll_init_hint(kv2, hash2);
1033
1034 return (memcmp(hash1, hash2, sizeof(hash1)));
1035 }
1036
1037 /*
1038 * Implements version sort (-V).
1039 */
1040 static int
versioncoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1041 versioncoll(struct key_value *kv1, struct key_value *kv2,
1042 size_t offset __unused)
1043 {
1044 struct bwstring *s1, *s2;
1045
1046 s1 = kv1->k;
1047 s2 = kv2->k;
1048
1049 if (debug_sort) {
1050 bwsprintf(stdout, s1, "; k1=<", ">");
1051 bwsprintf(stdout, s2, ", k2=<", ">");
1052 }
1053
1054 if (s1 == s2)
1055 return (0);
1056
1057 return (vcmp(s1, s2));
1058 }
1059
1060 /*
1061 * Check for minus infinity
1062 */
1063 static inline bool
huge_minus(double d,int err1)1064 huge_minus(double d, int err1)
1065 {
1066
1067 if (err1 == ERANGE)
1068 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1069 return (+1);
1070
1071 return (0);
1072 }
1073
1074 /*
1075 * Check for plus infinity
1076 */
1077 static inline bool
huge_plus(double d,int err1)1078 huge_plus(double d, int err1)
1079 {
1080
1081 if (err1 == ERANGE)
1082 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1083 return (+1);
1084
1085 return (0);
1086 }
1087
1088 /*
1089 * Check whether a function is a NAN
1090 */
1091 static bool
is_nan(double d)1092 is_nan(double d)
1093 {
1094
1095 return ((d == NAN) || (isnan(d)));
1096 }
1097
1098 /*
1099 * Compare two NANs
1100 */
1101 static int
cmp_nans(double d1,double d2)1102 cmp_nans(double d1, double d2)
1103 {
1104
1105 if (d1 < d2)
1106 return (-1);
1107 if (d1 > d2)
1108 return (+1);
1109 return (0);
1110 }
1111
1112 /*
1113 * Implements general numeric sort (-g).
1114 */
1115 static int
gnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1116 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1117 size_t offset __unused)
1118 {
1119 double d1, d2;
1120 int err1, err2;
1121 bool empty1, empty2, key1_read, key2_read;
1122
1123 d1 = d2 = 0;
1124 err1 = err2 = 0;
1125 key1_read = key2_read = false;
1126
1127 if (debug_sort) {
1128 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1129 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1130 }
1131
1132 if (kv1->hint->status == HS_UNINITIALIZED) {
1133 errno = 0;
1134 d1 = bwstod(kv1->k, &empty1);
1135 err1 = errno;
1136
1137 if (empty1)
1138 kv1->hint->v.gh.notnum = true;
1139 else if (err1 == 0) {
1140 kv1->hint->v.gh.d = d1;
1141 kv1->hint->v.gh.nan = is_nan(d1);
1142 kv1->hint->status = HS_INITIALIZED;
1143 } else
1144 kv1->hint->status = HS_ERROR;
1145
1146 key1_read = true;
1147 }
1148
1149 if (kv2->hint->status == HS_UNINITIALIZED) {
1150 errno = 0;
1151 d2 = bwstod(kv2->k, &empty2);
1152 err2 = errno;
1153
1154 if (empty2)
1155 kv2->hint->v.gh.notnum = true;
1156 else if (err2 == 0) {
1157 kv2->hint->v.gh.d = d2;
1158 kv2->hint->v.gh.nan = is_nan(d2);
1159 kv2->hint->status = HS_INITIALIZED;
1160 } else
1161 kv2->hint->status = HS_ERROR;
1162
1163 key2_read = true;
1164 }
1165
1166 if (kv1->hint->status == HS_INITIALIZED &&
1167 kv2->hint->status == HS_INITIALIZED) {
1168 if (kv1->hint->v.gh.notnum)
1169 return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1170 else if (kv2->hint->v.gh.notnum)
1171 return (+1);
1172
1173 if (kv1->hint->v.gh.nan)
1174 return ((kv2->hint->v.gh.nan) ?
1175 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1176 -1);
1177 else if (kv2->hint->v.gh.nan)
1178 return (+1);
1179
1180 d1 = kv1->hint->v.gh.d;
1181 d2 = kv2->hint->v.gh.d;
1182
1183 if (d1 < d2)
1184 return (-1);
1185 else if (d1 > d2)
1186 return (+1);
1187 else
1188 return (0);
1189 }
1190
1191 if (!key1_read) {
1192 errno = 0;
1193 d1 = bwstod(kv1->k, &empty1);
1194 err1 = errno;
1195 }
1196
1197 if (!key2_read) {
1198 errno = 0;
1199 d2 = bwstod(kv2->k, &empty2);
1200 err2 = errno;
1201 }
1202
1203 /* Non-value case: */
1204 if (empty1)
1205 return (empty2 ? 0 : -1);
1206 else if (empty2)
1207 return (+1);
1208
1209 /* NAN case */
1210 if (is_nan(d1))
1211 return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1212 else if (is_nan(d2))
1213 return (+1);
1214
1215 /* Infinities */
1216 if (err1 == ERANGE || err2 == ERANGE) {
1217 /* Minus infinity case */
1218 if (huge_minus(d1, err1)) {
1219 if (huge_minus(d2, err2)) {
1220 if (d1 < d2)
1221 return (-1);
1222 if (d1 > d2)
1223 return (+1);
1224 return (0);
1225 } else
1226 return (-1);
1227
1228 } else if (huge_minus(d2, err2)) {
1229 if (huge_minus(d1, err1)) {
1230 if (d1 < d2)
1231 return (-1);
1232 if (d1 > d2)
1233 return (+1);
1234 return (0);
1235 } else
1236 return (+1);
1237 }
1238
1239 /* Plus infinity case */
1240 if (huge_plus(d1, err1)) {
1241 if (huge_plus(d2, err2)) {
1242 if (d1 < d2)
1243 return (-1);
1244 if (d1 > d2)
1245 return (+1);
1246 return (0);
1247 } else
1248 return (+1);
1249 } else if (huge_plus(d2, err2)) {
1250 if (huge_plus(d1, err1)) {
1251 if (d1 < d2)
1252 return (-1);
1253 if (d1 > d2)
1254 return (+1);
1255 return (0);
1256 } else
1257 return (-1);
1258 }
1259 }
1260
1261 if (d1 < d2)
1262 return (-1);
1263 if (d1 > d2)
1264 return (+1);
1265
1266 return (0);
1267 }
1268
1269 /*
1270 * Implements month sort (-M).
1271 */
1272 static int
monthcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1273 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1274 {
1275 int val1, val2;
1276 bool key1_read, key2_read;
1277
1278 val1 = val2 = 0;
1279 key1_read = key2_read = false;
1280
1281 if (debug_sort) {
1282 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1283 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1284 }
1285
1286 if (kv1->hint->status == HS_UNINITIALIZED) {
1287 kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1288 key1_read = true;
1289 kv1->hint->status = HS_INITIALIZED;
1290 }
1291
1292 if (kv2->hint->status == HS_UNINITIALIZED) {
1293 kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1294 key2_read = true;
1295 kv2->hint->status = HS_INITIALIZED;
1296 }
1297
1298 if (kv1->hint->status == HS_INITIALIZED) {
1299 val1 = kv1->hint->v.Mh.m;
1300 key1_read = true;
1301 }
1302
1303 if (kv2->hint->status == HS_INITIALIZED) {
1304 val2 = kv2->hint->v.Mh.m;
1305 key2_read = true;
1306 }
1307
1308 if (!key1_read)
1309 val1 = bws_month_score(kv1->k);
1310 if (!key2_read)
1311 val2 = bws_month_score(kv2->k);
1312
1313 if (val1 == val2) {
1314 return (0);
1315 }
1316 if (val1 < val2)
1317 return (-1);
1318 return (+1);
1319 }
1320