1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (C) 2009 Gabor Kovesdan <[email protected]>
5 * Copyright (C) 2012 Oleg Moskalenko <[email protected]>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/types.h>
34
35 #include <errno.h>
36 #include <err.h>
37 #include <langinfo.h>
38 #include <limits.h>
39 #include <math.h>
40 #include <md5.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <wchar.h>
44 #include <wctype.h>
45
46 #include "coll.h"
47 #include "vsort.h"
48
49 struct key_specs *keys;
50 size_t keys_num = 0;
51
52 wint_t symbol_decimal_point = L'.';
53 /* there is no default thousands separator in collate rules: */
54 wint_t symbol_thousands_sep = 0;
55 wint_t symbol_negative_sign = L'-';
56 wint_t symbol_positive_sign = L'+';
57
58 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
59 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
60 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
61 static int numcoll(struct key_value*, struct key_value *, size_t offset);
62 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
63 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
64 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
65
66 /*
67 * Allocate keys array
68 */
69 struct keys_array *
keys_array_alloc(void)70 keys_array_alloc(void)
71 {
72 struct keys_array *ka;
73 size_t sz;
74
75 sz = keys_array_size();
76 ka = sort_malloc(sz);
77 memset(ka, 0, sz);
78
79 return (ka);
80 }
81
82 /*
83 * Calculate whether we need key hint space
84 */
85 static size_t
key_hint_size(void)86 key_hint_size(void)
87 {
88
89 return (need_hint ? sizeof(struct key_hint) : 0);
90 }
91
92 /*
93 * Calculate keys array size
94 */
95 size_t
keys_array_size(void)96 keys_array_size(void)
97 {
98
99 return (keys_num * (sizeof(struct key_value) + key_hint_size()));
100 }
101
102 /*
103 * Clean data of keys array
104 */
105 void
clean_keys_array(const struct bwstring * s,struct keys_array * ka)106 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
107 {
108
109 if (ka) {
110 for (size_t i = 0; i < keys_num; ++i) {
111 const struct key_value *kv;
112
113 kv = get_key_from_keys_array(ka, i);
114 if (kv->k && kv->k != s)
115 bwsfree(kv->k);
116 }
117 memset(ka, 0, keys_array_size());
118 }
119 }
120
121 /*
122 * Get pointer to a key value in the keys set
123 */
124 struct key_value *
get_key_from_keys_array(struct keys_array * ka,size_t ind)125 get_key_from_keys_array(struct keys_array *ka, size_t ind)
126 {
127
128 return ((struct key_value *)((caddr_t)ka->key +
129 ind * (sizeof(struct key_value) + key_hint_size())));
130 }
131
132 /*
133 * Set value of a key in the keys set
134 */
135 void
set_key_on_keys_array(struct keys_array * ka,struct bwstring * s,size_t ind)136 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
137 {
138
139 if (ka && keys_num > ind) {
140 struct key_value *kv;
141
142 kv = get_key_from_keys_array(ka, ind);
143
144 if (kv->k && kv->k != s)
145 bwsfree(kv->k);
146 kv->k = s;
147 }
148 }
149
150 /*
151 * Initialize a sort list item
152 */
153 struct sort_list_item *
sort_list_item_alloc(void)154 sort_list_item_alloc(void)
155 {
156 struct sort_list_item *si;
157 size_t sz;
158
159 sz = sizeof(struct sort_list_item) + keys_array_size();
160 si = sort_malloc(sz);
161 memset(si, 0, sz);
162
163 return (si);
164 }
165
166 size_t
sort_list_item_size(struct sort_list_item * si)167 sort_list_item_size(struct sort_list_item *si)
168 {
169 size_t ret = 0;
170
171 if (si) {
172 ret = sizeof(struct sort_list_item) + keys_array_size();
173 if (si->str)
174 ret += bws_memsize(si->str);
175 for (size_t i = 0; i < keys_num; ++i) {
176 const struct key_value *kv;
177
178 kv = get_key_from_keys_array(&si->ka, i);
179
180 if (kv->k != si->str)
181 ret += bws_memsize(kv->k);
182 }
183 }
184 return (ret);
185 }
186
187 /*
188 * Calculate key for a sort list item
189 */
190 static void
sort_list_item_make_key(struct sort_list_item * si)191 sort_list_item_make_key(struct sort_list_item *si)
192 {
193
194 preproc(si->str, &(si->ka));
195 }
196
197 /*
198 * Set value of a sort list item.
199 * Return combined string and keys memory size.
200 */
201 void
sort_list_item_set(struct sort_list_item * si,struct bwstring * str)202 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
203 {
204
205 if (si) {
206 clean_keys_array(si->str, &(si->ka));
207 if (si->str) {
208 if (si->str == str) {
209 /* we are trying to reset the same string */
210 return;
211 } else {
212 bwsfree(si->str);
213 si->str = NULL;
214 }
215 }
216 si->str = str;
217 sort_list_item_make_key(si);
218 }
219 }
220
221 /*
222 * De-allocate a sort list item object memory
223 */
224 void
sort_list_item_clean(struct sort_list_item * si)225 sort_list_item_clean(struct sort_list_item *si)
226 {
227
228 if (si) {
229 clean_keys_array(si->str, &(si->ka));
230 if (si->str) {
231 bwsfree(si->str);
232 si->str = NULL;
233 }
234 }
235 }
236
237 /*
238 * Skip columns according to specs
239 */
240 static size_t
skip_cols_to_start(const struct bwstring * s,size_t cols,size_t start,bool skip_blanks,bool * empty_key)241 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
242 bool skip_blanks, bool *empty_key)
243 {
244 if (cols < 1)
245 return (BWSLEN(s) + 1);
246
247 if (skip_blanks)
248 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
249 ++start;
250
251 while (start < BWSLEN(s) && cols > 1) {
252 --cols;
253 ++start;
254 }
255
256 if (start >= BWSLEN(s))
257 *empty_key = true;
258
259 return (start);
260 }
261
262 /*
263 * Skip fields according to specs
264 */
265 static size_t
skip_fields_to_start(const struct bwstring * s,size_t fields,bool * empty_field)266 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
267 {
268
269 if (fields < 2) {
270 if (BWSLEN(s) == 0)
271 *empty_field = true;
272 return (0);
273 } else if (!(sort_opts_vals.tflag)) {
274 size_t cpos = 0;
275 bool pb = true;
276
277 while (cpos < BWSLEN(s)) {
278 bool isblank;
279
280 isblank = iswblank(BWS_GET(s, cpos));
281
282 if (isblank && !pb) {
283 --fields;
284 if (fields <= 1)
285 return (cpos);
286 }
287 pb = isblank;
288 ++cpos;
289 }
290 if (fields > 1)
291 *empty_field = true;
292 return (cpos);
293 } else {
294 size_t cpos = 0;
295
296 while (cpos < BWSLEN(s)) {
297 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
298 --fields;
299 if (fields <= 1)
300 return (cpos + 1);
301 }
302 ++cpos;
303 }
304 if (fields > 1)
305 *empty_field = true;
306 return (cpos);
307 }
308 }
309
310 /*
311 * Find fields start
312 */
313 static void
find_field_start(const struct bwstring * s,struct key_specs * ks,size_t * field_start,size_t * key_start,bool * empty_field,bool * empty_key)314 find_field_start(const struct bwstring *s, struct key_specs *ks,
315 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
316 {
317
318 *field_start = skip_fields_to_start(s, ks->f1, empty_field);
319 if (!*empty_field)
320 *key_start = skip_cols_to_start(s, ks->c1, *field_start,
321 ks->pos1b, empty_key);
322 else
323 *empty_key = true;
324 }
325
326 /*
327 * Find end key position
328 */
329 static size_t
find_field_end(const struct bwstring * s,struct key_specs * ks)330 find_field_end(const struct bwstring *s, struct key_specs *ks)
331 {
332 size_t f2, next_field_start, pos_end;
333 bool empty_field, empty_key;
334
335 empty_field = false;
336 empty_key = false;
337 f2 = ks->f2;
338
339 if (f2 == 0)
340 return (BWSLEN(s) + 1);
341 else {
342 if (ks->c2 == 0) {
343 next_field_start = skip_fields_to_start(s, f2 + 1,
344 &empty_field);
345 if ((next_field_start > 0) && sort_opts_vals.tflag &&
346 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
347 next_field_start - 1)))
348 --next_field_start;
349 } else
350 next_field_start = skip_fields_to_start(s, f2,
351 &empty_field);
352 }
353
354 if (empty_field || (next_field_start >= BWSLEN(s)))
355 return (BWSLEN(s) + 1);
356
357 if (ks->c2) {
358 pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
359 ks->pos2b, &empty_key);
360 if (pos_end < BWSLEN(s))
361 ++pos_end;
362 } else
363 pos_end = next_field_start;
364
365 return (pos_end);
366 }
367
368 /*
369 * Cut a field according to the key specs
370 */
371 static struct bwstring *
cut_field(const struct bwstring * s,struct key_specs * ks)372 cut_field(const struct bwstring *s, struct key_specs *ks)
373 {
374 struct bwstring *ret = NULL;
375
376 if (s && ks) {
377 size_t field_start, key_end, key_start, sz;
378 bool empty_field, empty_key;
379
380 field_start = 0;
381 key_start = 0;
382 empty_field = false;
383 empty_key = false;
384
385 find_field_start(s, ks, &field_start, &key_start,
386 &empty_field, &empty_key);
387
388 if (empty_key)
389 sz = 0;
390 else {
391 key_end = find_field_end(s, ks);
392 sz = (key_end < key_start) ? 0 : (key_end - key_start);
393 }
394
395 ret = bwsalloc(sz);
396 if (sz)
397 bwsnocpy(ret, s, key_start, sz);
398 } else
399 ret = bwsalloc(0);
400
401 return (ret);
402 }
403
404 /*
405 * Preprocesses a line applying the necessary transformations
406 * specified by command line options and returns the preprocessed
407 * string, which can be used to compare.
408 */
409 int
preproc(struct bwstring * s,struct keys_array * ka)410 preproc(struct bwstring *s, struct keys_array *ka)
411 {
412
413 if (sort_opts_vals.kflag)
414 for (size_t i = 0; i < keys_num; i++) {
415 struct bwstring *key;
416 struct key_specs *kspecs;
417 struct sort_mods *sm;
418
419 kspecs = &(keys[i]);
420 key = cut_field(s, kspecs);
421
422 sm = &(kspecs->sm);
423 if (sm->dflag)
424 key = dictionary_order(key);
425 else if (sm->iflag)
426 key = ignore_nonprinting(key);
427 if (sm->fflag || sm->Mflag)
428 key = ignore_case(key);
429
430 set_key_on_keys_array(ka, key, i);
431 }
432 else {
433 struct bwstring *ret = NULL;
434 struct sort_mods *sm = default_sort_mods;
435
436 if (sm->bflag) {
437 if (ret == NULL)
438 ret = bwsdup(s);
439 ret = ignore_leading_blanks(ret);
440 }
441 if (sm->dflag) {
442 if (ret == NULL)
443 ret = bwsdup(s);
444 ret = dictionary_order(ret);
445 } else if (sm->iflag) {
446 if (ret == NULL)
447 ret = bwsdup(s);
448 ret = ignore_nonprinting(ret);
449 }
450 if (sm->fflag || sm->Mflag) {
451 if (ret == NULL)
452 ret = bwsdup(s);
453 ret = ignore_case(ret);
454 }
455 if (ret == NULL)
456 set_key_on_keys_array(ka, s, 0);
457 else
458 set_key_on_keys_array(ka, ret, 0);
459 }
460
461 return 0;
462 }
463
464 cmpcoll_t
get_sort_func(struct sort_mods * sm)465 get_sort_func(struct sort_mods *sm)
466 {
467
468 if (sm->nflag)
469 return (numcoll);
470 else if (sm->hflag)
471 return (hnumcoll);
472 else if (sm->gflag)
473 return (gnumcoll);
474 else if (sm->Mflag)
475 return (monthcoll);
476 else if (sm->Rflag)
477 return (randomcoll);
478 else if (sm->Vflag)
479 return (versioncoll);
480 else
481 return (wstrcoll);
482 }
483
484 /*
485 * Compares the given strings. Returns a positive number if
486 * the first precedes the second, a negative number if the second is
487 * the preceding one, and zero if they are equal. This function calls
488 * the underlying collate functions, which done the actual comparison.
489 */
490 int
key_coll(struct keys_array * ps1,struct keys_array * ps2,size_t offset)491 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
492 {
493 struct key_value *kv1, *kv2;
494 struct sort_mods *sm;
495 int res = 0;
496
497 for (size_t i = 0; i < keys_num; ++i) {
498 kv1 = get_key_from_keys_array(ps1, i);
499 kv2 = get_key_from_keys_array(ps2, i);
500 sm = &(keys[i].sm);
501
502 if (sm->rflag)
503 res = sm->func(kv2, kv1, offset);
504 else
505 res = sm->func(kv1, kv2, offset);
506
507 if (res)
508 break;
509
510 /* offset applies to only the first key */
511 offset = 0;
512 }
513 return (res);
514 }
515
516 /*
517 * Compare two strings.
518 * Plain symbol-by-symbol comparison.
519 */
520 int
top_level_str_coll(const struct bwstring * s1,const struct bwstring * s2)521 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
522 {
523
524 if (default_sort_mods->rflag) {
525 const struct bwstring *tmp;
526
527 tmp = s1;
528 s1 = s2;
529 s2 = tmp;
530 }
531
532 return (bwscoll(s1, s2, 0));
533 }
534
535 /*
536 * Compare a string and a sort list item, according to the sort specs.
537 */
538 int
str_list_coll(struct bwstring * str1,struct sort_list_item ** ss2)539 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
540 {
541 struct keys_array *ka1;
542 int ret = 0;
543
544 ka1 = keys_array_alloc();
545
546 preproc(str1, ka1);
547
548 sort_list_item_make_key(*ss2);
549
550 if (debug_sort) {
551 bwsprintf(stdout, str1, "; s1=<", ">");
552 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
553 }
554
555 ret = key_coll(ka1, &((*ss2)->ka), 0);
556
557 if (debug_sort)
558 printf("; cmp1=%d", ret);
559
560 clean_keys_array(str1, ka1);
561 sort_free(ka1);
562
563 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
564 ret = top_level_str_coll(str1, ((*ss2)->str));
565 if (debug_sort)
566 printf("; cmp2=%d", ret);
567 }
568
569 if (debug_sort)
570 printf("\n");
571
572 return (ret);
573 }
574
575 /*
576 * Compare two sort list items, according to the sort specs.
577 */
578 int
list_coll_offset(struct sort_list_item ** ss1,struct sort_list_item ** ss2,size_t offset)579 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
580 size_t offset)
581 {
582 int ret;
583
584 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
585
586 if (debug_sort) {
587 if (offset)
588 printf("; offset=%d", (int) offset);
589 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
590 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
591 printf("; cmp1=%d\n", ret);
592 }
593
594 if (ret)
595 return (ret);
596
597 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
598 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
599 if (debug_sort)
600 printf("; cmp2=%d\n", ret);
601 }
602
603 return (ret);
604 }
605
606 /*
607 * Compare two sort list items, according to the sort specs.
608 */
609 int
list_coll(struct sort_list_item ** ss1,struct sort_list_item ** ss2)610 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
611 {
612
613 return (list_coll_offset(ss1, ss2, 0));
614 }
615
616 #define LSCDEF(N) \
617 static int \
618 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
619 { \
620 \
621 return (list_coll_offset(ss1, ss2, N)); \
622 }
623
624 LSCDEF(1)
625 LSCDEF(2)
626 LSCDEF(3)
627 LSCDEF(4)
628 LSCDEF(5)
629 LSCDEF(6)
630 LSCDEF(7)
631 LSCDEF(8)
632 LSCDEF(9)
633 LSCDEF(10)
634 LSCDEF(11)
635 LSCDEF(12)
636 LSCDEF(13)
637 LSCDEF(14)
638 LSCDEF(15)
639 LSCDEF(16)
640 LSCDEF(17)
641 LSCDEF(18)
642 LSCDEF(19)
643 LSCDEF(20)
644
645 listcoll_t
get_list_call_func(size_t offset)646 get_list_call_func(size_t offset)
647 {
648 static const listcoll_t lsarray[] = { list_coll, list_coll_1,
649 list_coll_2, list_coll_3, list_coll_4, list_coll_5,
650 list_coll_6, list_coll_7, list_coll_8, list_coll_9,
651 list_coll_10, list_coll_11, list_coll_12, list_coll_13,
652 list_coll_14, list_coll_15, list_coll_16, list_coll_17,
653 list_coll_18, list_coll_19, list_coll_20 };
654
655 if (offset <= 20)
656 return (lsarray[offset]);
657
658 return (list_coll);
659 }
660
661 /*
662 * Compare two sort list items, only by their original string.
663 */
664 int
list_coll_by_str_only(struct sort_list_item ** ss1,struct sort_list_item ** ss2)665 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
666 {
667
668 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
669 }
670
671 /*
672 * Maximum size of a number in the string (before or after decimal point)
673 */
674 #define MAX_NUM_SIZE (128)
675
676 /*
677 * Set suffix value
678 */
setsuffix(wchar_t c,unsigned char * si)679 static void setsuffix(wchar_t c, unsigned char *si)
680 {
681 switch (c){
682 case L'k':
683 case L'K':
684 *si = 1;
685 break;
686 case L'M':
687 *si = 2;
688 break;
689 case L'G':
690 *si = 3;
691 break;
692 case L'T':
693 *si = 4;
694 break;
695 case L'P':
696 *si = 5;
697 break;
698 case L'E':
699 *si = 6;
700 break;
701 case L'Z':
702 *si = 7;
703 break;
704 case L'Y':
705 *si = 8;
706 break;
707 default:
708 *si = 0;
709 }
710 }
711
712 /*
713 * Read string s and parse the string into a fixed-decimal-point number.
714 * sign equals -1 if the number is negative (explicit plus is not allowed,
715 * according to GNU sort's "info sort".
716 * The number part before decimal point is in the smain, after the decimal
717 * point is in sfrac, tail is the pointer to the remainder of the string.
718 */
719 static int
read_number(struct bwstring * s0,int * sign,wchar_t * smain,size_t * main_len,wchar_t * sfrac,size_t * frac_len,unsigned char * si)720 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
721 {
722 bwstring_iterator s;
723
724 s = bws_begin(s0);
725
726 /* always end the fraction with zero, even if we have no fraction */
727 sfrac[0] = 0;
728
729 while (iswblank(bws_get_iter_value(s)))
730 s = bws_iterator_inc(s, 1);
731
732 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
733 *sign = -1;
734 s = bws_iterator_inc(s, 1);
735 }
736
737 // This is '0', not '\0', do not change this
738 while (iswdigit(bws_get_iter_value(s)) &&
739 (bws_get_iter_value(s) == L'0'))
740 s = bws_iterator_inc(s, 1);
741
742 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
743 if (iswdigit(bws_get_iter_value(s))) {
744 smain[*main_len] = bws_get_iter_value(s);
745 s = bws_iterator_inc(s, 1);
746 *main_len += 1;
747 } else if (symbol_thousands_sep &&
748 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
749 s = bws_iterator_inc(s, 1);
750 else
751 break;
752 }
753
754 smain[*main_len] = 0;
755
756 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
757 s = bws_iterator_inc(s, 1);
758 while (iswdigit(bws_get_iter_value(s)) &&
759 *frac_len < MAX_NUM_SIZE) {
760 sfrac[*frac_len] = bws_get_iter_value(s);
761 s = bws_iterator_inc(s, 1);
762 *frac_len += 1;
763 }
764 sfrac[*frac_len] = 0;
765
766 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
767 --(*frac_len);
768 sfrac[*frac_len] = L'\0';
769 }
770 }
771
772 setsuffix(bws_get_iter_value(s),si);
773
774 if ((*main_len + *frac_len) == 0)
775 *sign = 0;
776
777 return (0);
778 }
779
780 /*
781 * Implements string sort.
782 */
783 static int
wstrcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)784 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
785 {
786
787 if (debug_sort) {
788 if (offset)
789 printf("; offset=%d\n", (int) offset);
790 bwsprintf(stdout, kv1->k, "; k1=<", ">");
791 printf("(%zu)", BWSLEN(kv1->k));
792 bwsprintf(stdout, kv2->k, ", k2=<", ">");
793 printf("(%zu)", BWSLEN(kv2->k));
794 }
795
796 return (bwscoll(kv1->k, kv2->k, offset));
797 }
798
799 /*
800 * Compare two suffixes
801 */
802 static inline int
cmpsuffix(unsigned char si1,unsigned char si2)803 cmpsuffix(unsigned char si1, unsigned char si2)
804 {
805
806 return ((char)si1 - (char)si2);
807 }
808
809 /*
810 * Implements numeric sort for -n and -h.
811 */
812 static int
numcoll_impl(struct key_value * kv1,struct key_value * kv2,size_t offset __unused,bool use_suffix)813 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
814 size_t offset __unused, bool use_suffix)
815 {
816 struct bwstring *s1, *s2;
817 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
818 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
819 int cmp_res, sign1, sign2;
820 size_t frac1, frac2, main1, main2;
821 unsigned char SI1, SI2;
822 bool e1, e2, key1_read, key2_read;
823
824 s1 = kv1->k;
825 s2 = kv2->k;
826 sign1 = sign2 = 0;
827 main1 = main2 = 0;
828 frac1 = frac2 = 0;
829
830 key1_read = key2_read = false;
831
832 if (debug_sort) {
833 bwsprintf(stdout, s1, "; k1=<", ">");
834 bwsprintf(stdout, s2, ", k2=<", ">");
835 }
836
837 if (s1 == s2)
838 return (0);
839
840 if (kv1->hint->status == HS_UNINITIALIZED) {
841 /* read the number from the string */
842 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
843 key1_read = true;
844 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
845 if(main1 < 1 && frac1 < 1)
846 kv1->hint->v.nh.empty=true;
847 kv1->hint->v.nh.si = SI1;
848 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
849 HS_INITIALIZED : HS_ERROR;
850 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
851 }
852
853 if (kv2->hint->status == HS_UNINITIALIZED) {
854 /* read the number from the string */
855 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
856 key2_read = true;
857 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
858 if(main2 < 1 && frac2 < 1)
859 kv2->hint->v.nh.empty=true;
860 kv2->hint->v.nh.si = SI2;
861 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
862 HS_INITIALIZED : HS_ERROR;
863 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
864 }
865
866 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
867 HS_INITIALIZED) {
868 unsigned long long n1, n2;
869 bool neg1, neg2;
870
871 e1 = kv1->hint->v.nh.empty;
872 e2 = kv2->hint->v.nh.empty;
873
874 if (e1 && e2)
875 return (0);
876
877 neg1 = kv1->hint->v.nh.neg;
878 neg2 = kv2->hint->v.nh.neg;
879
880 if (neg1 && !neg2)
881 return (-1);
882 if (neg2 && !neg1)
883 return (+1);
884
885 if (e1)
886 return (neg2 ? +1 : -1);
887 else if (e2)
888 return (neg1 ? -1 : +1);
889
890
891 if (use_suffix) {
892 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
893 if (cmp_res)
894 return (neg1 ? -cmp_res : cmp_res);
895 }
896
897 n1 = kv1->hint->v.nh.n1;
898 n2 = kv2->hint->v.nh.n1;
899 if (n1 < n2)
900 return (neg1 ? +1 : -1);
901 else if (n1 > n2)
902 return (neg1 ? -1 : +1);
903 }
904
905 /* read the numbers from the strings */
906 if (!key1_read)
907 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
908 if (!key2_read)
909 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
910
911 e1 = ((main1 + frac1) == 0);
912 e2 = ((main2 + frac2) == 0);
913
914 if (e1 && e2)
915 return (0);
916
917 /* we know the result if the signs are different */
918 if (sign1 < 0 && sign2 >= 0)
919 return (-1);
920 if (sign1 >= 0 && sign2 < 0)
921 return (+1);
922
923 if (e1)
924 return ((sign2 < 0) ? +1 : -1);
925 else if (e2)
926 return ((sign1 < 0) ? -1 : +1);
927
928 if (use_suffix) {
929 cmp_res = cmpsuffix(SI1, SI2);
930 if (cmp_res)
931 return ((sign1 < 0) ? -cmp_res : cmp_res);
932 }
933
934 /* if both numbers are empty assume that the strings are equal */
935 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
936 return (0);
937
938 /*
939 * if the main part is of different size, we know the result
940 * (because the leading zeros are removed)
941 */
942 if (main1 < main2)
943 cmp_res = -1;
944 else if (main1 > main2)
945 cmp_res = +1;
946 /* if the sizes are equal then simple non-collate string compare gives the correct result */
947 else
948 cmp_res = wcscmp(smain1, smain2);
949
950 /* check fraction */
951 if (!cmp_res)
952 cmp_res = wcscmp(sfrac1, sfrac2);
953
954 if (!cmp_res)
955 return (0);
956
957 /* reverse result if the signs are negative */
958 if (sign1 < 0 && sign2 < 0)
959 cmp_res = -cmp_res;
960
961 return (cmp_res);
962 }
963
964 /*
965 * Implements numeric sort (-n).
966 */
967 static int
numcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)968 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
969 {
970
971 return (numcoll_impl(kv1, kv2, offset, false));
972 }
973
974 /*
975 * Implements 'human' numeric sort (-h).
976 */
977 static int
hnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)978 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
979 {
980
981 return (numcoll_impl(kv1, kv2, offset, true));
982 }
983
984 /* Use hint space to memoize md5 computations, at least. */
985 static void
randomcoll_init_hint(struct key_value * kv,void * hash)986 randomcoll_init_hint(struct key_value *kv, void *hash)
987 {
988
989 memcpy(kv->hint->v.Rh.cached, hash, sizeof(kv->hint->v.Rh.cached));
990 kv->hint->status = HS_INITIALIZED;
991 }
992
993 /*
994 * Implements random sort (-R).
995 */
996 static int
randomcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)997 randomcoll(struct key_value *kv1, struct key_value *kv2,
998 size_t offset __unused)
999 {
1000 struct bwstring *s1, *s2;
1001 MD5_CTX ctx1, ctx2;
1002 unsigned char hash1[MD5_DIGEST_LENGTH], hash2[MD5_DIGEST_LENGTH];
1003 int cmp;
1004
1005 s1 = kv1->k;
1006 s2 = kv2->k;
1007
1008 if (debug_sort) {
1009 bwsprintf(stdout, s1, "; k1=<", ">");
1010 bwsprintf(stdout, s2, ", k2=<", ">");
1011 }
1012
1013 if (s1 == s2)
1014 return (0);
1015
1016 if (kv1->hint->status == HS_INITIALIZED &&
1017 kv2->hint->status == HS_INITIALIZED) {
1018 cmp = memcmp(kv1->hint->v.Rh.cached,
1019 kv2->hint->v.Rh.cached, sizeof(kv1->hint->v.Rh.cached));
1020 if (cmp != 0)
1021 return (cmp);
1022 }
1023
1024 memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
1025 memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
1026
1027 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1028 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1029
1030 MD5Final(hash1, &ctx1);
1031 MD5Final(hash2, &ctx2);
1032
1033 if (kv1->hint->status == HS_UNINITIALIZED)
1034 randomcoll_init_hint(kv1, hash1);
1035 if (kv2->hint->status == HS_UNINITIALIZED)
1036 randomcoll_init_hint(kv2, hash2);
1037
1038 return (memcmp(hash1, hash2, sizeof(hash1)));
1039 }
1040
1041 /*
1042 * Implements version sort (-V).
1043 */
1044 static int
versioncoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1045 versioncoll(struct key_value *kv1, struct key_value *kv2,
1046 size_t offset __unused)
1047 {
1048 struct bwstring *s1, *s2;
1049
1050 s1 = kv1->k;
1051 s2 = kv2->k;
1052
1053 if (debug_sort) {
1054 bwsprintf(stdout, s1, "; k1=<", ">");
1055 bwsprintf(stdout, s2, ", k2=<", ">");
1056 }
1057
1058 if (s1 == s2)
1059 return (0);
1060
1061 return (vcmp(s1, s2));
1062 }
1063
1064 /*
1065 * Check for minus infinity
1066 */
1067 static inline bool
huge_minus(double d,int err1)1068 huge_minus(double d, int err1)
1069 {
1070
1071 if (err1 == ERANGE)
1072 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1073 return (+1);
1074
1075 return (0);
1076 }
1077
1078 /*
1079 * Check for plus infinity
1080 */
1081 static inline bool
huge_plus(double d,int err1)1082 huge_plus(double d, int err1)
1083 {
1084
1085 if (err1 == ERANGE)
1086 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1087 return (+1);
1088
1089 return (0);
1090 }
1091
1092 /*
1093 * Check whether a function is a NAN
1094 */
1095 static bool
is_nan(double d)1096 is_nan(double d)
1097 {
1098
1099 return ((d == NAN) || (isnan(d)));
1100 }
1101
1102 /*
1103 * Compare two NANs
1104 */
1105 static int
cmp_nans(double d1,double d2)1106 cmp_nans(double d1, double d2)
1107 {
1108
1109 if (d1 < d2)
1110 return (-1);
1111 if (d1 > d2)
1112 return (+1);
1113 return (0);
1114 }
1115
1116 /*
1117 * Implements general numeric sort (-g).
1118 */
1119 static int
gnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1120 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1121 size_t offset __unused)
1122 {
1123 double d1, d2;
1124 int err1, err2;
1125 bool empty1, empty2, key1_read, key2_read;
1126
1127 d1 = d2 = 0;
1128 err1 = err2 = 0;
1129 key1_read = key2_read = false;
1130
1131 if (debug_sort) {
1132 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1133 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1134 }
1135
1136 if (kv1->hint->status == HS_UNINITIALIZED) {
1137 errno = 0;
1138 d1 = bwstod(kv1->k, &empty1);
1139 err1 = errno;
1140
1141 if (empty1)
1142 kv1->hint->v.gh.notnum = true;
1143 else if (err1 == 0) {
1144 kv1->hint->v.gh.d = d1;
1145 kv1->hint->v.gh.nan = is_nan(d1);
1146 kv1->hint->status = HS_INITIALIZED;
1147 } else
1148 kv1->hint->status = HS_ERROR;
1149
1150 key1_read = true;
1151 }
1152
1153 if (kv2->hint->status == HS_UNINITIALIZED) {
1154 errno = 0;
1155 d2 = bwstod(kv2->k, &empty2);
1156 err2 = errno;
1157
1158 if (empty2)
1159 kv2->hint->v.gh.notnum = true;
1160 else if (err2 == 0) {
1161 kv2->hint->v.gh.d = d2;
1162 kv2->hint->v.gh.nan = is_nan(d2);
1163 kv2->hint->status = HS_INITIALIZED;
1164 } else
1165 kv2->hint->status = HS_ERROR;
1166
1167 key2_read = true;
1168 }
1169
1170 if (kv1->hint->status == HS_INITIALIZED &&
1171 kv2->hint->status == HS_INITIALIZED) {
1172 if (kv1->hint->v.gh.notnum)
1173 return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1174 else if (kv2->hint->v.gh.notnum)
1175 return (+1);
1176
1177 if (kv1->hint->v.gh.nan)
1178 return ((kv2->hint->v.gh.nan) ?
1179 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1180 -1);
1181 else if (kv2->hint->v.gh.nan)
1182 return (+1);
1183
1184 d1 = kv1->hint->v.gh.d;
1185 d2 = kv2->hint->v.gh.d;
1186
1187 if (d1 < d2)
1188 return (-1);
1189 else if (d1 > d2)
1190 return (+1);
1191 else
1192 return (0);
1193 }
1194
1195 if (!key1_read) {
1196 errno = 0;
1197 d1 = bwstod(kv1->k, &empty1);
1198 err1 = errno;
1199 }
1200
1201 if (!key2_read) {
1202 errno = 0;
1203 d2 = bwstod(kv2->k, &empty2);
1204 err2 = errno;
1205 }
1206
1207 /* Non-value case: */
1208 if (empty1)
1209 return (empty2 ? 0 : -1);
1210 else if (empty2)
1211 return (+1);
1212
1213 /* NAN case */
1214 if (is_nan(d1))
1215 return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1216 else if (is_nan(d2))
1217 return (+1);
1218
1219 /* Infinities */
1220 if (err1 == ERANGE || err2 == ERANGE) {
1221 /* Minus infinity case */
1222 if (huge_minus(d1, err1)) {
1223 if (huge_minus(d2, err2)) {
1224 if (d1 < d2)
1225 return (-1);
1226 if (d1 > d2)
1227 return (+1);
1228 return (0);
1229 } else
1230 return (-1);
1231
1232 } else if (huge_minus(d2, err2)) {
1233 if (huge_minus(d1, err1)) {
1234 if (d1 < d2)
1235 return (-1);
1236 if (d1 > d2)
1237 return (+1);
1238 return (0);
1239 } else
1240 return (+1);
1241 }
1242
1243 /* Plus infinity case */
1244 if (huge_plus(d1, err1)) {
1245 if (huge_plus(d2, err2)) {
1246 if (d1 < d2)
1247 return (-1);
1248 if (d1 > d2)
1249 return (+1);
1250 return (0);
1251 } else
1252 return (+1);
1253 } else if (huge_plus(d2, err2)) {
1254 if (huge_plus(d1, err1)) {
1255 if (d1 < d2)
1256 return (-1);
1257 if (d1 > d2)
1258 return (+1);
1259 return (0);
1260 } else
1261 return (-1);
1262 }
1263 }
1264
1265 if (d1 < d2)
1266 return (-1);
1267 if (d1 > d2)
1268 return (+1);
1269
1270 return (0);
1271 }
1272
1273 /*
1274 * Implements month sort (-M).
1275 */
1276 static int
monthcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1277 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1278 {
1279 int val1, val2;
1280 bool key1_read, key2_read;
1281
1282 val1 = val2 = 0;
1283 key1_read = key2_read = false;
1284
1285 if (debug_sort) {
1286 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1287 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1288 }
1289
1290 if (kv1->hint->status == HS_UNINITIALIZED) {
1291 kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1292 key1_read = true;
1293 kv1->hint->status = HS_INITIALIZED;
1294 }
1295
1296 if (kv2->hint->status == HS_UNINITIALIZED) {
1297 kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1298 key2_read = true;
1299 kv2->hint->status = HS_INITIALIZED;
1300 }
1301
1302 if (kv1->hint->status == HS_INITIALIZED) {
1303 val1 = kv1->hint->v.Mh.m;
1304 key1_read = true;
1305 }
1306
1307 if (kv2->hint->status == HS_INITIALIZED) {
1308 val2 = kv2->hint->v.Mh.m;
1309 key2_read = true;
1310 }
1311
1312 if (!key1_read)
1313 val1 = bws_month_score(kv1->k);
1314 if (!key2_read)
1315 val2 = bws_month_score(kv2->k);
1316
1317 if (val1 == val2) {
1318 return (0);
1319 }
1320 if (val1 < val2)
1321 return (-1);
1322 return (+1);
1323 }
1324