1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (C) 2009 Gabor Kovesdan <[email protected]>
5 * Copyright (C) 2012 Oleg Moskalenko <[email protected]>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/stat.h>
34 #include <sys/sysctl.h>
35 #include <sys/types.h>
36
37 #include <err.h>
38 #include <errno.h>
39 #include <getopt.h>
40 #include <limits.h>
41 #include <locale.h>
42 #include <md5.h>
43 #include <regex.h>
44 #include <signal.h>
45 #include <stdbool.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <wchar.h>
51 #include <wctype.h>
52
53 #include "coll.h"
54 #include "file.h"
55 #include "sort.h"
56
57 #ifndef WITHOUT_NLS
58 #include <nl_types.h>
59 nl_catd catalog;
60 #endif
61
62 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz"
63
64 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
65 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
66
67 static bool need_random;
68 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
69 static const void *random_seed;
70 static size_t random_seed_size;
71
72 MD5_CTX md5_ctx;
73
74 /*
75 * Default messages to use when NLS is disabled or no catalogue
76 * is found.
77 */
78 const char *nlsstr[] = { "",
79 /* 1*/"mutually exclusive flags",
80 /* 2*/"extra argument not allowed with -c",
81 /* 3*/"Unknown feature",
82 /* 4*/"Wrong memory buffer specification",
83 /* 5*/"0 field in key specs",
84 /* 6*/"0 column in key specs",
85 /* 7*/"Wrong file mode",
86 /* 8*/"Cannot open file for reading",
87 /* 9*/"Radix sort cannot be used with these sort options",
88 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
89 /*11*/"Invalid key position",
90 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
91 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
92 "[-o outfile] [--batch-size size] [--files0-from file] "
93 "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
94 "[--mmap] "
95 #if defined(SORT_THREADS)
96 "[--parallel thread_no] "
97 #endif
98 "[--human-numeric-sort] "
99 "[--version-sort] [--random-sort [--random-source file]] "
100 "[--compress-program program] [file ...]\n" };
101
102 struct sort_opts sort_opts_vals;
103
104 bool debug_sort;
105 bool need_hint;
106
107 #if defined(SORT_THREADS)
108 unsigned int ncpu = 1;
109 size_t nthreads = 1;
110 #endif
111
112 static bool gnusort_numeric_compatibility;
113
114 static struct sort_mods default_sort_mods_object;
115 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
116
117 static bool print_symbols_on_debug;
118
119 /*
120 * Arguments from file (when file0-from option is used:
121 */
122 static size_t argc_from_file0 = (size_t)-1;
123 static char **argv_from_file0;
124
125 /*
126 * Placeholder symbols for options which have no single-character equivalent
127 */
128 enum
129 {
130 SORT_OPT = CHAR_MAX + 1,
131 HELP_OPT,
132 FF_OPT,
133 BS_OPT,
134 VERSION_OPT,
135 DEBUG_OPT,
136 #if defined(SORT_THREADS)
137 PARALLEL_OPT,
138 #endif
139 RANDOMSOURCE_OPT,
140 COMPRESSPROGRAM_OPT,
141 QSORT_OPT,
142 MERGESORT_OPT,
143 HEAPSORT_OPT,
144 RADIXSORT_OPT,
145 MMAP_OPT
146 };
147
148 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
149 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
150
151 static struct option long_options[] = {
152 { "batch-size", required_argument, NULL, BS_OPT },
153 { "buffer-size", required_argument, NULL, 'S' },
154 { "check", optional_argument, NULL, 'c' },
155 { "check=silent|quiet", optional_argument, NULL, 'C' },
156 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
157 { "debug", no_argument, NULL, DEBUG_OPT },
158 { "dictionary-order", no_argument, NULL, 'd' },
159 { "field-separator", required_argument, NULL, 't' },
160 { "files0-from", required_argument, NULL, FF_OPT },
161 { "general-numeric-sort", no_argument, NULL, 'g' },
162 { "heapsort", no_argument, NULL, HEAPSORT_OPT },
163 { "help",no_argument, NULL, HELP_OPT },
164 { "human-numeric-sort", no_argument, NULL, 'h' },
165 { "ignore-leading-blanks", no_argument, NULL, 'b' },
166 { "ignore-case", no_argument, NULL, 'f' },
167 { "ignore-nonprinting", no_argument, NULL, 'i' },
168 { "key", required_argument, NULL, 'k' },
169 { "merge", no_argument, NULL, 'm' },
170 { "mergesort", no_argument, NULL, MERGESORT_OPT },
171 { "mmap", no_argument, NULL, MMAP_OPT },
172 { "month-sort", no_argument, NULL, 'M' },
173 { "numeric-sort", no_argument, NULL, 'n' },
174 { "output", required_argument, NULL, 'o' },
175 #if defined(SORT_THREADS)
176 { "parallel", required_argument, NULL, PARALLEL_OPT },
177 #endif
178 { "qsort", no_argument, NULL, QSORT_OPT },
179 { "radixsort", no_argument, NULL, RADIXSORT_OPT },
180 { "random-sort", no_argument, NULL, 'R' },
181 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
182 { "reverse", no_argument, NULL, 'r' },
183 { "sort", required_argument, NULL, SORT_OPT },
184 { "stable", no_argument, NULL, 's' },
185 { "temporary-directory",required_argument, NULL, 'T' },
186 { "unique", no_argument, NULL, 'u' },
187 { "version", no_argument, NULL, VERSION_OPT },
188 { "version-sort",no_argument, NULL, 'V' },
189 { "zero-terminated", no_argument, NULL, 'z' },
190 { NULL, no_argument, NULL, 0 }
191 };
192
193 void fix_obsolete_keys(int *argc, char **argv);
194
195 /*
196 * Check where sort modifier is present
197 */
198 static bool
sort_modifier_empty(struct sort_mods * sm)199 sort_modifier_empty(struct sort_mods *sm)
200 {
201
202 if (sm == NULL)
203 return (true);
204 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
205 sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
206 }
207
208 /*
209 * Print out usage text.
210 */
211 static void
usage(bool opt_err)212 usage(bool opt_err)
213 {
214 FILE *out;
215
216 out = opt_err ? stderr : stdout;
217
218 fprintf(out, getstr(12), getprogname());
219 if (opt_err)
220 exit(2);
221 exit(0);
222 }
223
224 /*
225 * Read input file names from a file (file0-from option).
226 */
227 static void
read_fns_from_file0(const char * fn)228 read_fns_from_file0(const char *fn)
229 {
230 FILE *f;
231 char *line = NULL;
232 size_t linesize = 0;
233 ssize_t linelen;
234
235 if (fn == NULL)
236 return;
237
238 f = fopen(fn, "r");
239 if (f == NULL)
240 err(2, "%s", fn);
241
242 while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
243 if (*line != '\0') {
244 if (argc_from_file0 == (size_t) - 1)
245 argc_from_file0 = 0;
246 ++argc_from_file0;
247 argv_from_file0 = sort_realloc(argv_from_file0,
248 argc_from_file0 * sizeof(char *));
249 if (argv_from_file0 == NULL)
250 err(2, NULL);
251 argv_from_file0[argc_from_file0 - 1] = line;
252 } else {
253 free(line);
254 }
255 line = NULL;
256 linesize = 0;
257 }
258 if (ferror(f))
259 err(2, "%s: getdelim", fn);
260
261 closefile(f, fn);
262 }
263
264 /*
265 * Check how much RAM is available for the sort.
266 */
267 static void
set_hw_params(void)268 set_hw_params(void)
269 {
270 long pages, psize;
271
272 #if defined(SORT_THREADS)
273 ncpu = 1;
274 #endif
275
276 pages = sysconf(_SC_PHYS_PAGES);
277 if (pages < 1) {
278 perror("sysconf pages");
279 pages = 1;
280 }
281 psize = sysconf(_SC_PAGESIZE);
282 if (psize < 1) {
283 perror("sysconf psize");
284 psize = 4096;
285 }
286 #if defined(SORT_THREADS)
287 ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
288 if (ncpu < 1)
289 ncpu = 1;
290 else if(ncpu > 32)
291 ncpu = 32;
292
293 nthreads = ncpu;
294 #endif
295
296 free_memory = (unsigned long long) pages * (unsigned long long) psize;
297 available_free_memory = free_memory / 2;
298
299 if (available_free_memory < 1024)
300 available_free_memory = 1024;
301 }
302
303 /*
304 * Convert "plain" symbol to wide symbol, with default value.
305 */
306 static void
conv_mbtowc(wchar_t * wc,const char * c,const wchar_t def)307 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
308 {
309
310 if (wc && c) {
311 int res;
312
313 res = mbtowc(wc, c, MB_CUR_MAX);
314 if (res < 1)
315 *wc = def;
316 }
317 }
318
319 /*
320 * Set current locale symbols.
321 */
322 static void
set_locale(void)323 set_locale(void)
324 {
325 struct lconv *lc;
326 const char *locale;
327
328 setlocale(LC_ALL, "");
329
330 lc = localeconv();
331
332 if (lc) {
333 /* obtain LC_NUMERIC info */
334 /* Convert to wide char form */
335 conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
336 symbol_decimal_point);
337 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
338 symbol_thousands_sep);
339 conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
340 symbol_positive_sign);
341 conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
342 symbol_negative_sign);
343 }
344
345 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
346 gnusort_numeric_compatibility = true;
347
348 locale = setlocale(LC_COLLATE, NULL);
349
350 if (locale) {
351 char *tmpl;
352 const char *cclocale;
353
354 tmpl = sort_strdup(locale);
355 cclocale = setlocale(LC_COLLATE, "C");
356 if (cclocale && !strcmp(cclocale, tmpl))
357 byte_sort = true;
358 else {
359 const char *pclocale;
360
361 pclocale = setlocale(LC_COLLATE, "POSIX");
362 if (pclocale && !strcmp(pclocale, tmpl))
363 byte_sort = true;
364 }
365 setlocale(LC_COLLATE, tmpl);
366 sort_free(tmpl);
367 }
368 }
369
370 /*
371 * Set directory temporary files.
372 */
373 static void
set_tmpdir(void)374 set_tmpdir(void)
375 {
376 char *td;
377
378 td = getenv("TMPDIR");
379 if (td != NULL)
380 tmpdir = sort_strdup(td);
381 }
382
383 /*
384 * Parse -S option.
385 */
386 static unsigned long long
parse_memory_buffer_value(const char * value)387 parse_memory_buffer_value(const char *value)
388 {
389
390 if (value == NULL)
391 return (available_free_memory);
392 else {
393 char *endptr;
394 unsigned long long membuf;
395
396 endptr = NULL;
397 errno = 0;
398 membuf = strtoll(value, &endptr, 10);
399
400 if (errno != 0) {
401 warn("%s",getstr(4));
402 membuf = available_free_memory;
403 } else {
404 switch (*endptr){
405 case 'Y':
406 membuf *= 1024;
407 /* FALLTHROUGH */
408 case 'Z':
409 membuf *= 1024;
410 /* FALLTHROUGH */
411 case 'E':
412 membuf *= 1024;
413 /* FALLTHROUGH */
414 case 'P':
415 membuf *= 1024;
416 /* FALLTHROUGH */
417 case 'T':
418 membuf *= 1024;
419 /* FALLTHROUGH */
420 case 'G':
421 membuf *= 1024;
422 /* FALLTHROUGH */
423 case 'M':
424 membuf *= 1024;
425 /* FALLTHROUGH */
426 case '\0':
427 case 'K':
428 membuf *= 1024;
429 /* FALLTHROUGH */
430 case 'b':
431 break;
432 case '%':
433 membuf = (available_free_memory * membuf) /
434 100;
435 break;
436 default:
437 warnc(EINVAL, "%s", optarg);
438 membuf = available_free_memory;
439 }
440 }
441 return (membuf);
442 }
443 }
444
445 /*
446 * Signal handler that clears the temporary files.
447 */
448 static void
sig_handler(int sig __unused,siginfo_t * siginfo __unused,void * context __unused)449 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
450 void *context __unused)
451 {
452
453 clear_tmp_files();
454 exit(-1);
455 }
456
457 /*
458 * Set signal handler on panic signals.
459 */
460 static void
set_signal_handler(void)461 set_signal_handler(void)
462 {
463 struct sigaction sa;
464
465 memset(&sa, 0, sizeof(sa));
466 sa.sa_sigaction = &sig_handler;
467 sa.sa_flags = SA_SIGINFO;
468
469 if (sigaction(SIGTERM, &sa, NULL) < 0) {
470 perror("sigaction");
471 return;
472 }
473 if (sigaction(SIGHUP, &sa, NULL) < 0) {
474 perror("sigaction");
475 return;
476 }
477 if (sigaction(SIGINT, &sa, NULL) < 0) {
478 perror("sigaction");
479 return;
480 }
481 if (sigaction(SIGQUIT, &sa, NULL) < 0) {
482 perror("sigaction");
483 return;
484 }
485 if (sigaction(SIGABRT, &sa, NULL) < 0) {
486 perror("sigaction");
487 return;
488 }
489 if (sigaction(SIGBUS, &sa, NULL) < 0) {
490 perror("sigaction");
491 return;
492 }
493 if (sigaction(SIGSEGV, &sa, NULL) < 0) {
494 perror("sigaction");
495 return;
496 }
497 if (sigaction(SIGUSR1, &sa, NULL) < 0) {
498 perror("sigaction");
499 return;
500 }
501 if (sigaction(SIGUSR2, &sa, NULL) < 0) {
502 perror("sigaction");
503 return;
504 }
505 }
506
507 /*
508 * Print "unknown" message and exit with status 2.
509 */
510 static void
unknown(const char * what)511 unknown(const char *what)
512 {
513
514 errx(2, "%s: %s", getstr(3), what);
515 }
516
517 /*
518 * Check whether contradictory input options are used.
519 */
520 static void
check_mutually_exclusive_flags(char c,bool * mef_flags)521 check_mutually_exclusive_flags(char c, bool *mef_flags)
522 {
523 int fo_index, mec;
524 bool found_others, found_this;
525
526 found_others = found_this = false;
527 fo_index = 0;
528
529 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
530 mec = mutually_exclusive_flags[i];
531
532 if (mec != c) {
533 if (mef_flags[i]) {
534 if (found_this)
535 errx(1, "%c:%c: %s", c, mec, getstr(1));
536 found_others = true;
537 fo_index = i;
538 }
539 } else {
540 if (found_others)
541 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
542 mef_flags[i] = true;
543 found_this = true;
544 }
545 }
546 }
547
548 /*
549 * Initialise sort opts data.
550 */
551 static void
set_sort_opts(void)552 set_sort_opts(void)
553 {
554
555 memset(&default_sort_mods_object, 0,
556 sizeof(default_sort_mods_object));
557 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
558 default_sort_mods_object.func =
559 get_sort_func(&default_sort_mods_object);
560 }
561
562 /*
563 * Set a sort modifier on a sort modifiers object.
564 */
565 static bool
set_sort_modifier(struct sort_mods * sm,int c)566 set_sort_modifier(struct sort_mods *sm, int c)
567 {
568
569 if (sm) {
570 switch (c){
571 case 'b':
572 sm->bflag = true;
573 break;
574 case 'd':
575 sm->dflag = true;
576 break;
577 case 'f':
578 sm->fflag = true;
579 break;
580 case 'g':
581 sm->gflag = true;
582 need_hint = true;
583 break;
584 case 'i':
585 sm->iflag = true;
586 break;
587 case 'R':
588 sm->Rflag = true;
589 need_random = true;
590 break;
591 case 'M':
592 initialise_months();
593 sm->Mflag = true;
594 need_hint = true;
595 break;
596 case 'n':
597 sm->nflag = true;
598 need_hint = true;
599 print_symbols_on_debug = true;
600 break;
601 case 'r':
602 sm->rflag = true;
603 break;
604 case 'V':
605 sm->Vflag = true;
606 break;
607 case 'h':
608 sm->hflag = true;
609 need_hint = true;
610 print_symbols_on_debug = true;
611 break;
612 default:
613 return false;
614 }
615 sort_opts_vals.complex_sort = true;
616 sm->func = get_sort_func(sm);
617 }
618 return (true);
619 }
620
621 /*
622 * Parse POS in -k option.
623 */
624 static int
parse_pos(const char * s,struct key_specs * ks,bool * mef_flags,bool second)625 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
626 {
627 regmatch_t pmatch[4];
628 regex_t re;
629 char *c, *f;
630 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
631 size_t len, nmatch;
632 int ret;
633
634 ret = -1;
635 nmatch = 4;
636 c = f = NULL;
637
638 if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
639 return (-1);
640
641 if (regexec(&re, s, nmatch, pmatch, 0) != 0)
642 goto end;
643
644 if (pmatch[0].rm_eo <= pmatch[0].rm_so)
645 goto end;
646
647 if (pmatch[1].rm_eo <= pmatch[1].rm_so)
648 goto end;
649
650 len = pmatch[1].rm_eo - pmatch[1].rm_so;
651 f = sort_malloc((len + 1) * sizeof(char));
652
653 strncpy(f, s + pmatch[1].rm_so, len);
654 f[len] = '\0';
655
656 if (second) {
657 errno = 0;
658 ks->f2 = (size_t) strtoul(f, NULL, 10);
659 if (errno != 0)
660 err(2, "-k");
661 if (ks->f2 == 0) {
662 warn("%s",getstr(5));
663 goto end;
664 }
665 } else {
666 errno = 0;
667 ks->f1 = (size_t) strtoul(f, NULL, 10);
668 if (errno != 0)
669 err(2, "-k");
670 if (ks->f1 == 0) {
671 warn("%s",getstr(5));
672 goto end;
673 }
674 }
675
676 if (pmatch[2].rm_eo > pmatch[2].rm_so) {
677 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
678 c = sort_malloc((len + 1) * sizeof(char));
679
680 strncpy(c, s + pmatch[2].rm_so + 1, len);
681 c[len] = '\0';
682
683 if (second) {
684 errno = 0;
685 ks->c2 = (size_t) strtoul(c, NULL, 10);
686 if (errno != 0)
687 err(2, "-k");
688 } else {
689 errno = 0;
690 ks->c1 = (size_t) strtoul(c, NULL, 10);
691 if (errno != 0)
692 err(2, "-k");
693 if (ks->c1 == 0) {
694 warn("%s",getstr(6));
695 goto end;
696 }
697 }
698 } else {
699 if (second)
700 ks->c2 = 0;
701 else
702 ks->c1 = 1;
703 }
704
705 if (pmatch[3].rm_eo > pmatch[3].rm_so) {
706 regoff_t i = 0;
707
708 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
709 check_mutually_exclusive_flags(s[i], mef_flags);
710 if (s[i] == 'b') {
711 if (second)
712 ks->pos2b = true;
713 else
714 ks->pos1b = true;
715 } else if (!set_sort_modifier(&(ks->sm), s[i]))
716 goto end;
717 }
718 }
719
720 ret = 0;
721
722 end:
723
724 if (c)
725 sort_free(c);
726 if (f)
727 sort_free(f);
728 regfree(&re);
729
730 return (ret);
731 }
732
733 /*
734 * Parse -k option value.
735 */
736 static int
parse_k(const char * s,struct key_specs * ks)737 parse_k(const char *s, struct key_specs *ks)
738 {
739 int ret = -1;
740 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
741 { false, false, false, false, false, false };
742
743 if (s && *s) {
744 char *sptr;
745
746 sptr = strchr(s, ',');
747 if (sptr) {
748 size_t size1;
749 char *pos1, *pos2;
750
751 size1 = sptr - s;
752
753 if (size1 < 1)
754 return (-1);
755 pos1 = sort_malloc((size1 + 1) * sizeof(char));
756
757 strncpy(pos1, s, size1);
758 pos1[size1] = '\0';
759
760 ret = parse_pos(pos1, ks, mef_flags, false);
761
762 sort_free(pos1);
763 if (ret < 0)
764 return (ret);
765
766 pos2 = sort_strdup(sptr + 1);
767 ret = parse_pos(pos2, ks, mef_flags, true);
768 sort_free(pos2);
769 } else
770 ret = parse_pos(s, ks, mef_flags, false);
771 }
772
773 return (ret);
774 }
775
776 /*
777 * Parse POS in +POS -POS option.
778 */
779 static int
parse_pos_obs(const char * s,int * nf,int * nc,char * sopts)780 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
781 {
782 regex_t re;
783 regmatch_t pmatch[4];
784 char *c, *f;
785 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
786 int ret;
787 size_t len, nmatch;
788
789 ret = -1;
790 nmatch = 4;
791 c = f = NULL;
792 *nc = *nf = 0;
793
794 if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
795 return (-1);
796
797 if (regexec(&re, s, nmatch, pmatch, 0) != 0)
798 goto end;
799
800 if (pmatch[0].rm_eo <= pmatch[0].rm_so)
801 goto end;
802
803 if (pmatch[1].rm_eo <= pmatch[1].rm_so)
804 goto end;
805
806 len = pmatch[1].rm_eo - pmatch[1].rm_so;
807 f = sort_malloc((len + 1) * sizeof(char));
808
809 strncpy(f, s + pmatch[1].rm_so, len);
810 f[len] = '\0';
811
812 errno = 0;
813 *nf = (size_t) strtoul(f, NULL, 10);
814 if (errno != 0)
815 errx(2, "%s", getstr(11));
816
817 if (pmatch[2].rm_eo > pmatch[2].rm_so) {
818 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
819 c = sort_malloc((len + 1) * sizeof(char));
820
821 strncpy(c, s + pmatch[2].rm_so + 1, len);
822 c[len] = '\0';
823
824 errno = 0;
825 *nc = (size_t) strtoul(c, NULL, 10);
826 if (errno != 0)
827 errx(2, "%s", getstr(11));
828 }
829
830 if (pmatch[3].rm_eo > pmatch[3].rm_so) {
831
832 len = pmatch[3].rm_eo - pmatch[3].rm_so;
833
834 strncpy(sopts, s + pmatch[3].rm_so, len);
835 sopts[len] = '\0';
836 }
837
838 ret = 0;
839
840 end:
841 if (c)
842 sort_free(c);
843 if (f)
844 sort_free(f);
845 regfree(&re);
846
847 return (ret);
848 }
849
850 /*
851 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
852 */
853 void
fix_obsolete_keys(int * argc,char ** argv)854 fix_obsolete_keys(int *argc, char **argv)
855 {
856 char sopt[129];
857
858 for (int i = 1; i < *argc; i++) {
859 char *arg1;
860
861 arg1 = argv[i];
862
863 if (strlen(arg1) > 1 && arg1[0] == '+') {
864 int c1, f1;
865 char sopts1[128];
866
867 sopts1[0] = 0;
868 c1 = f1 = 0;
869
870 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
871 continue;
872 else {
873 f1 += 1;
874 c1 += 1;
875 if (i + 1 < *argc) {
876 char *arg2 = argv[i + 1];
877
878 if (strlen(arg2) > 1 &&
879 arg2[0] == '-') {
880 int c2, f2;
881 char sopts2[128];
882
883 sopts2[0] = 0;
884 c2 = f2 = 0;
885
886 if (parse_pos_obs(arg2 + 1,
887 &f2, &c2, sopts2) >= 0) {
888 if (c2 > 0)
889 f2 += 1;
890 sprintf(sopt, "-k%d.%d%s,%d.%d%s",
891 f1, c1, sopts1, f2, c2, sopts2);
892 argv[i] = sort_strdup(sopt);
893 for (int j = i + 1; j + 1 < *argc; j++)
894 argv[j] = argv[j + 1];
895 *argc -= 1;
896 continue;
897 }
898 }
899 }
900 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
901 argv[i] = sort_strdup(sopt);
902 }
903 }
904 }
905 }
906
907 /*
908 * Set random seed
909 */
910 static void
set_random_seed(void)911 set_random_seed(void)
912 {
913 if (need_random) {
914
915 if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
916 FILE* fseed;
917 MD5_CTX ctx;
918 char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
919 size_t sz = 0;
920
921 fseed = openfile(random_source, "r");
922 while (!feof(fseed)) {
923 int cr;
924
925 cr = fgetc(fseed);
926 if (cr == EOF)
927 break;
928
929 rsd[sz++] = (char) cr;
930
931 if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
932 break;
933 }
934
935 closefile(fseed, random_source);
936
937 MD5Init(&ctx);
938 MD5Update(&ctx, rsd, sz);
939
940 random_seed = MD5End(&ctx, NULL);
941 random_seed_size = strlen(random_seed);
942
943 } else {
944 MD5_CTX ctx;
945 char *b;
946
947 MD5Init(&ctx);
948 b = MD5File(random_source, NULL);
949 if (b == NULL)
950 err(2, NULL);
951
952 random_seed = b;
953 random_seed_size = strlen(b);
954 }
955
956 MD5Init(&md5_ctx);
957 if(random_seed_size>0) {
958 MD5Update(&md5_ctx, random_seed, random_seed_size);
959 }
960 }
961 }
962
963 /*
964 * Main function.
965 */
966 int
main(int argc,char ** argv)967 main(int argc, char **argv)
968 {
969 char *outfile, *real_outfile;
970 int c, result;
971 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
972 { false, false, false, false, false, false };
973
974 result = 0;
975 outfile = sort_strdup("-");
976 real_outfile = NULL;
977
978 struct sort_mods *sm = &default_sort_mods_object;
979
980 init_tmp_files();
981
982 set_signal_handler();
983
984 set_hw_params();
985 set_locale();
986 set_tmpdir();
987 set_sort_opts();
988
989 fix_obsolete_keys(&argc, argv);
990
991 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
992 != -1)) {
993
994 check_mutually_exclusive_flags(c, mef_flags);
995
996 if (!set_sort_modifier(sm, c)) {
997
998 switch (c) {
999 case 'c':
1000 sort_opts_vals.cflag = true;
1001 if (optarg) {
1002 if (!strcmp(optarg, "diagnose-first"))
1003 ;
1004 else if (!strcmp(optarg, "silent") ||
1005 !strcmp(optarg, "quiet"))
1006 sort_opts_vals.csilentflag = true;
1007 else if (*optarg)
1008 unknown(optarg);
1009 }
1010 break;
1011 case 'C':
1012 sort_opts_vals.cflag = true;
1013 sort_opts_vals.csilentflag = true;
1014 break;
1015 case 'k':
1016 {
1017 sort_opts_vals.complex_sort = true;
1018 sort_opts_vals.kflag = true;
1019
1020 keys_num++;
1021 keys = sort_realloc(keys, keys_num *
1022 sizeof(struct key_specs));
1023 memset(&(keys[keys_num - 1]), 0,
1024 sizeof(struct key_specs));
1025
1026 if (parse_k(optarg, &(keys[keys_num - 1]))
1027 < 0) {
1028 errc(2, EINVAL, "-k %s", optarg);
1029 }
1030
1031 break;
1032 }
1033 case 'm':
1034 sort_opts_vals.mflag = true;
1035 break;
1036 case 'o':
1037 outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1038 strcpy(outfile, optarg);
1039 break;
1040 case 's':
1041 sort_opts_vals.sflag = true;
1042 break;
1043 case 'S':
1044 available_free_memory =
1045 parse_memory_buffer_value(optarg);
1046 break;
1047 case 'T':
1048 tmpdir = sort_strdup(optarg);
1049 break;
1050 case 't':
1051 while (strlen(optarg) > 1) {
1052 if (optarg[0] != '\\') {
1053 errc(2, EINVAL, "%s", optarg);
1054 }
1055 optarg += 1;
1056 if (*optarg == '0') {
1057 *optarg = 0;
1058 break;
1059 }
1060 }
1061 sort_opts_vals.tflag = true;
1062 sort_opts_vals.field_sep = btowc(optarg[0]);
1063 if (sort_opts_vals.field_sep == WEOF) {
1064 errno = EINVAL;
1065 err(2, NULL);
1066 }
1067 if (!gnusort_numeric_compatibility) {
1068 if (symbol_decimal_point == sort_opts_vals.field_sep)
1069 symbol_decimal_point = WEOF;
1070 if (symbol_thousands_sep == sort_opts_vals.field_sep)
1071 symbol_thousands_sep = WEOF;
1072 if (symbol_negative_sign == sort_opts_vals.field_sep)
1073 symbol_negative_sign = WEOF;
1074 if (symbol_positive_sign == sort_opts_vals.field_sep)
1075 symbol_positive_sign = WEOF;
1076 }
1077 break;
1078 case 'u':
1079 sort_opts_vals.uflag = true;
1080 /* stable sort for the correct unique val */
1081 sort_opts_vals.sflag = true;
1082 break;
1083 case 'z':
1084 sort_opts_vals.zflag = true;
1085 break;
1086 case SORT_OPT:
1087 if (optarg) {
1088 if (!strcmp(optarg, "general-numeric"))
1089 set_sort_modifier(sm, 'g');
1090 else if (!strcmp(optarg, "human-numeric"))
1091 set_sort_modifier(sm, 'h');
1092 else if (!strcmp(optarg, "numeric"))
1093 set_sort_modifier(sm, 'n');
1094 else if (!strcmp(optarg, "month"))
1095 set_sort_modifier(sm, 'M');
1096 else if (!strcmp(optarg, "random"))
1097 set_sort_modifier(sm, 'R');
1098 else
1099 unknown(optarg);
1100 }
1101 break;
1102 #if defined(SORT_THREADS)
1103 case PARALLEL_OPT:
1104 nthreads = (size_t)(atoi(optarg));
1105 if (nthreads < 1)
1106 nthreads = 1;
1107 if (nthreads > 1024)
1108 nthreads = 1024;
1109 break;
1110 #endif
1111 case QSORT_OPT:
1112 sort_opts_vals.sort_method = SORT_QSORT;
1113 break;
1114 case MERGESORT_OPT:
1115 sort_opts_vals.sort_method = SORT_MERGESORT;
1116 break;
1117 case MMAP_OPT:
1118 use_mmap = true;
1119 break;
1120 case HEAPSORT_OPT:
1121 sort_opts_vals.sort_method = SORT_HEAPSORT;
1122 break;
1123 case RADIXSORT_OPT:
1124 sort_opts_vals.sort_method = SORT_RADIXSORT;
1125 break;
1126 case RANDOMSOURCE_OPT:
1127 random_source = strdup(optarg);
1128 break;
1129 case COMPRESSPROGRAM_OPT:
1130 compress_program = strdup(optarg);
1131 break;
1132 case FF_OPT:
1133 read_fns_from_file0(optarg);
1134 break;
1135 case BS_OPT:
1136 {
1137 errno = 0;
1138 long mof = strtol(optarg, NULL, 10);
1139 if (errno != 0)
1140 err(2, "--batch-size");
1141 if (mof >= 2)
1142 max_open_files = (size_t) mof + 1;
1143 }
1144 break;
1145 case VERSION_OPT:
1146 printf("%s\n", VERSION);
1147 exit(EXIT_SUCCESS);
1148 /* NOTREACHED */
1149 break;
1150 case DEBUG_OPT:
1151 debug_sort = true;
1152 break;
1153 case HELP_OPT:
1154 usage(false);
1155 /* NOTREACHED */
1156 break;
1157 default:
1158 usage(true);
1159 /* NOTREACHED */
1160 }
1161 }
1162 }
1163
1164 argc -= optind;
1165 argv += optind;
1166
1167 if (argv_from_file0) {
1168 argc = argc_from_file0;
1169 argv = argv_from_file0;
1170 }
1171
1172 #ifndef WITHOUT_NLS
1173 catalog = catopen("sort", NL_CAT_LOCALE);
1174 #endif
1175
1176 if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1177 errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1178
1179 #ifndef WITHOUT_NLS
1180 catclose(catalog);
1181 #endif
1182
1183 if (keys_num == 0) {
1184 keys_num = 1;
1185 keys = sort_realloc(keys, sizeof(struct key_specs));
1186 memset(&(keys[0]), 0, sizeof(struct key_specs));
1187 keys[0].c1 = 1;
1188 keys[0].pos1b = default_sort_mods->bflag;
1189 keys[0].pos2b = default_sort_mods->bflag;
1190 memcpy(&(keys[0].sm), default_sort_mods,
1191 sizeof(struct sort_mods));
1192 }
1193
1194 for (size_t i = 0; i < keys_num; i++) {
1195 struct key_specs *ks;
1196
1197 ks = &(keys[i]);
1198
1199 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1200 !(ks->pos2b)) {
1201 ks->pos1b = sm->bflag;
1202 ks->pos2b = sm->bflag;
1203 memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1204 }
1205
1206 ks->sm.func = get_sort_func(&(ks->sm));
1207 }
1208
1209 if (debug_sort) {
1210 printf("Memory to be used for sorting: %llu\n",available_free_memory);
1211 #if defined(SORT_THREADS)
1212 printf("Number of CPUs: %d\n",(int)ncpu);
1213 nthreads = 1;
1214 #endif
1215 printf("Using collate rules of %s locale\n",
1216 setlocale(LC_COLLATE, NULL));
1217 if (byte_sort)
1218 printf("Byte sort is used\n");
1219 if (print_symbols_on_debug) {
1220 printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1221 if (symbol_thousands_sep)
1222 printf("Thousands separator: <%lc>\n",
1223 symbol_thousands_sep);
1224 printf("Positive sign: <%lc>\n", symbol_positive_sign);
1225 printf("Negative sign: <%lc>\n", symbol_negative_sign);
1226 }
1227 }
1228
1229 set_random_seed();
1230
1231 /* Case when the outfile equals one of the input files: */
1232 if (strcmp(outfile, "-")) {
1233
1234 for(int i = 0; i < argc; ++i) {
1235 if (strcmp(argv[i], outfile) == 0) {
1236 real_outfile = sort_strdup(outfile);
1237 for(;;) {
1238 char* tmp = sort_malloc(strlen(outfile) +
1239 strlen(".tmp") + 1);
1240
1241 strcpy(tmp, outfile);
1242 strcpy(tmp + strlen(tmp), ".tmp");
1243 sort_free(outfile);
1244 outfile = tmp;
1245 if (access(outfile, F_OK) < 0)
1246 break;
1247 }
1248 tmp_file_atexit(outfile);
1249 }
1250 }
1251 }
1252
1253 #if defined(SORT_THREADS)
1254 if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1255 nthreads = 1;
1256 #endif
1257
1258 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1259 struct file_list fl;
1260 struct sort_list list;
1261
1262 sort_list_init(&list);
1263 file_list_init(&fl, true);
1264
1265 if (argc < 1)
1266 procfile("-", &list, &fl);
1267 else {
1268 while (argc > 0) {
1269 procfile(*argv, &list, &fl);
1270 --argc;
1271 ++argv;
1272 }
1273 }
1274
1275 if (fl.count < 1)
1276 sort_list_to_file(&list, outfile);
1277 else {
1278 if (list.count > 0) {
1279 char *flast = new_tmp_file_name();
1280
1281 sort_list_to_file(&list, flast);
1282 file_list_add(&fl, flast, false);
1283 }
1284 merge_files(&fl, outfile);
1285 }
1286
1287 file_list_clean(&fl);
1288
1289 /*
1290 * We are about to exit the program, so we can ignore
1291 * the clean-up for speed
1292 *
1293 * sort_list_clean(&list);
1294 */
1295
1296 } else if (sort_opts_vals.cflag) {
1297 result = (argc == 0) ? (check("-")) : (check(*argv));
1298 } else if (sort_opts_vals.mflag) {
1299 struct file_list fl;
1300
1301 file_list_init(&fl, false);
1302 /* No file arguments remaining means "read from stdin." */
1303 if (argc == 0)
1304 file_list_add(&fl, "-", true);
1305 else
1306 file_list_populate(&fl, argc, argv, true);
1307 merge_files(&fl, outfile);
1308 file_list_clean(&fl);
1309 }
1310
1311 if (real_outfile) {
1312 unlink(real_outfile);
1313 if (rename(outfile, real_outfile) < 0)
1314 err(2, NULL);
1315 sort_free(real_outfile);
1316 }
1317
1318 sort_free(outfile);
1319
1320 return (result);
1321 }
1322