1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (C) 2009 Gabor Kovesdan <[email protected]>
5 * Copyright (C) 2012 Oleg Moskalenko <[email protected]>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/stat.h>
34 #include <sys/sysctl.h>
35 #include <sys/types.h>
36
37 #include <err.h>
38 #include <errno.h>
39 #include <fcntl.h>
40 #include <getopt.h>
41 #include <limits.h>
42 #include <locale.h>
43 #include <md5.h>
44 #include <regex.h>
45 #include <signal.h>
46 #include <stdbool.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <wchar.h>
52 #include <wctype.h>
53
54 #include "coll.h"
55 #include "file.h"
56 #include "sort.h"
57
58 #ifndef WITHOUT_NLS
59 #include <nl_types.h>
60 nl_catd catalog = (nl_catd)-1;
61 #endif
62
63 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz"
64
65 static bool need_random;
66
67 MD5_CTX md5_ctx;
68
69 /*
70 * Default messages to use when NLS is disabled or no catalogue
71 * is found.
72 */
73 const char *nlsstr[] = { "",
74 /* 1*/"mutually exclusive flags",
75 /* 2*/"extra argument not allowed with -c",
76 /* 3*/"Unknown feature",
77 /* 4*/"Wrong memory buffer specification",
78 /* 5*/"0 field in key specs",
79 /* 6*/"0 column in key specs",
80 /* 7*/"Wrong file mode",
81 /* 8*/"Cannot open file for reading",
82 /* 9*/"Radix sort cannot be used with these sort options",
83 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
84 /*11*/"Invalid key position",
85 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
86 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
87 "[-o outfile] [--batch-size size] [--files0-from file] "
88 "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
89 "[--mmap] "
90 #if defined(SORT_THREADS)
91 "[--parallel thread_no] "
92 #endif
93 "[--human-numeric-sort] "
94 "[--version-sort] [--random-sort [--random-source file]] "
95 "[--compress-program program] [file ...]\n" };
96
97 struct sort_opts sort_opts_vals;
98
99 bool debug_sort;
100 bool need_hint;
101
102 size_t mb_cur_max;
103
104 #if defined(SORT_THREADS)
105 unsigned int ncpu = 1;
106 size_t nthreads = 1;
107 #endif
108
109 static bool gnusort_numeric_compatibility;
110
111 static struct sort_mods default_sort_mods_object;
112 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
113
114 static bool print_symbols_on_debug;
115
116 /*
117 * Arguments from file (when file0-from option is used:
118 */
119 static size_t argc_from_file0 = (size_t)-1;
120 static char **argv_from_file0;
121
122 /*
123 * Placeholder symbols for options which have no single-character equivalent
124 */
125 enum
126 {
127 SORT_OPT = CHAR_MAX + 1,
128 HELP_OPT,
129 FF_OPT,
130 BS_OPT,
131 VERSION_OPT,
132 DEBUG_OPT,
133 #if defined(SORT_THREADS)
134 PARALLEL_OPT,
135 #endif
136 RANDOMSOURCE_OPT,
137 COMPRESSPROGRAM_OPT,
138 QSORT_OPT,
139 MERGESORT_OPT,
140 HEAPSORT_OPT,
141 RADIXSORT_OPT,
142 MMAP_OPT
143 };
144
145 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
146 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
147
148 static struct option long_options[] = {
149 { "batch-size", required_argument, NULL, BS_OPT },
150 { "buffer-size", required_argument, NULL, 'S' },
151 { "check", optional_argument, NULL, 'c' },
152 { "check=silent|quiet", optional_argument, NULL, 'C' },
153 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
154 { "debug", no_argument, NULL, DEBUG_OPT },
155 { "dictionary-order", no_argument, NULL, 'd' },
156 { "field-separator", required_argument, NULL, 't' },
157 { "files0-from", required_argument, NULL, FF_OPT },
158 { "general-numeric-sort", no_argument, NULL, 'g' },
159 { "heapsort", no_argument, NULL, HEAPSORT_OPT },
160 { "help",no_argument, NULL, HELP_OPT },
161 { "human-numeric-sort", no_argument, NULL, 'h' },
162 { "ignore-leading-blanks", no_argument, NULL, 'b' },
163 { "ignore-case", no_argument, NULL, 'f' },
164 { "ignore-nonprinting", no_argument, NULL, 'i' },
165 { "key", required_argument, NULL, 'k' },
166 { "merge", no_argument, NULL, 'm' },
167 { "mergesort", no_argument, NULL, MERGESORT_OPT },
168 { "mmap", no_argument, NULL, MMAP_OPT },
169 { "month-sort", no_argument, NULL, 'M' },
170 { "numeric-sort", no_argument, NULL, 'n' },
171 { "output", required_argument, NULL, 'o' },
172 #if defined(SORT_THREADS)
173 { "parallel", required_argument, NULL, PARALLEL_OPT },
174 #endif
175 { "qsort", no_argument, NULL, QSORT_OPT },
176 { "radixsort", no_argument, NULL, RADIXSORT_OPT },
177 { "random-sort", no_argument, NULL, 'R' },
178 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
179 { "reverse", no_argument, NULL, 'r' },
180 { "sort", required_argument, NULL, SORT_OPT },
181 { "stable", no_argument, NULL, 's' },
182 { "temporary-directory",required_argument, NULL, 'T' },
183 { "unique", no_argument, NULL, 'u' },
184 { "version", no_argument, NULL, VERSION_OPT },
185 { "version-sort",no_argument, NULL, 'V' },
186 { "zero-terminated", no_argument, NULL, 'z' },
187 { NULL, no_argument, NULL, 0 }
188 };
189
190 void fix_obsolete_keys(int *argc, char **argv);
191
192 /*
193 * Check where sort modifier is present
194 */
195 static bool
sort_modifier_empty(struct sort_mods * sm)196 sort_modifier_empty(struct sort_mods *sm)
197 {
198
199 if (sm == NULL)
200 return (true);
201 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
202 sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
203 }
204
205 /*
206 * Print out usage text.
207 */
208 static void
usage(bool opt_err)209 usage(bool opt_err)
210 {
211 FILE *out;
212
213 out = opt_err ? stderr : stdout;
214
215 fprintf(out, getstr(12), getprogname());
216 if (opt_err)
217 exit(2);
218 exit(0);
219 }
220
221 /*
222 * Read input file names from a file (file0-from option).
223 */
224 static void
read_fns_from_file0(const char * fn)225 read_fns_from_file0(const char *fn)
226 {
227 FILE *f;
228 char *line = NULL;
229 size_t linesize = 0;
230 ssize_t linelen;
231
232 if (fn == NULL)
233 return;
234
235 f = fopen(fn, "r");
236 if (f == NULL)
237 err(2, "%s", fn);
238
239 while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
240 if (*line != '\0') {
241 if (argc_from_file0 == (size_t) - 1)
242 argc_from_file0 = 0;
243 ++argc_from_file0;
244 argv_from_file0 = sort_realloc(argv_from_file0,
245 argc_from_file0 * sizeof(char *));
246 if (argv_from_file0 == NULL)
247 err(2, NULL);
248 argv_from_file0[argc_from_file0 - 1] = line;
249 } else {
250 free(line);
251 }
252 line = NULL;
253 linesize = 0;
254 }
255 if (ferror(f))
256 err(2, "%s: getdelim", fn);
257
258 closefile(f, fn);
259 }
260
261 /*
262 * Check how much RAM is available for the sort.
263 */
264 static void
set_hw_params(void)265 set_hw_params(void)
266 {
267 long pages, psize;
268
269 #if defined(SORT_THREADS)
270 ncpu = 1;
271 #endif
272
273 pages = sysconf(_SC_PHYS_PAGES);
274 if (pages < 1) {
275 perror("sysconf pages");
276 pages = 1;
277 }
278 psize = sysconf(_SC_PAGESIZE);
279 if (psize < 1) {
280 perror("sysconf psize");
281 psize = 4096;
282 }
283 #if defined(SORT_THREADS)
284 ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
285 if (ncpu < 1)
286 ncpu = 1;
287 else if(ncpu > 32)
288 ncpu = 32;
289
290 nthreads = ncpu;
291 #endif
292
293 free_memory = (unsigned long long) pages * (unsigned long long) psize;
294 available_free_memory = free_memory / 2;
295
296 if (available_free_memory < 1024)
297 available_free_memory = 1024;
298 }
299
300 /*
301 * Convert "plain" symbol to wide symbol, with default value.
302 */
303 static void
conv_mbtowc(wchar_t * wc,const char * c,const wchar_t def)304 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
305 {
306
307 if (wc && c) {
308 int res;
309
310 res = mbtowc(wc, c, mb_cur_max);
311 if (res < 1)
312 *wc = def;
313 }
314 }
315
316 /*
317 * Set current locale symbols.
318 */
319 static void
set_locale(void)320 set_locale(void)
321 {
322 struct lconv *lc;
323 const char *locale;
324
325 setlocale(LC_ALL, "");
326
327 mb_cur_max = MB_CUR_MAX;
328
329 lc = localeconv();
330
331 if (lc) {
332 /* obtain LC_NUMERIC info */
333 /* Convert to wide char form */
334 conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
335 symbol_decimal_point);
336 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
337 symbol_thousands_sep);
338 conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
339 symbol_positive_sign);
340 conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
341 symbol_negative_sign);
342 }
343
344 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
345 gnusort_numeric_compatibility = true;
346
347 locale = setlocale(LC_COLLATE, NULL);
348
349 if (locale) {
350 char *tmpl;
351 const char *cclocale;
352
353 tmpl = sort_strdup(locale);
354 cclocale = setlocale(LC_COLLATE, "C");
355 if (cclocale && !strcmp(cclocale, tmpl))
356 byte_sort = true;
357 else {
358 const char *pclocale;
359
360 pclocale = setlocale(LC_COLLATE, "POSIX");
361 if (pclocale && !strcmp(pclocale, tmpl))
362 byte_sort = true;
363 }
364 setlocale(LC_COLLATE, tmpl);
365 sort_free(tmpl);
366 }
367 }
368
369 /*
370 * Set directory temporary files.
371 */
372 static void
set_tmpdir(void)373 set_tmpdir(void)
374 {
375 char *td;
376
377 td = getenv("TMPDIR");
378 if (td != NULL)
379 tmpdir = sort_strdup(td);
380 }
381
382 /*
383 * Parse -S option.
384 */
385 static unsigned long long
parse_memory_buffer_value(const char * value)386 parse_memory_buffer_value(const char *value)
387 {
388
389 if (value == NULL)
390 return (available_free_memory);
391 else {
392 char *endptr;
393 unsigned long long membuf;
394
395 endptr = NULL;
396 errno = 0;
397 membuf = strtoll(value, &endptr, 10);
398
399 if (errno != 0) {
400 warn("%s",getstr(4));
401 membuf = available_free_memory;
402 } else {
403 switch (*endptr){
404 case 'Y':
405 membuf *= 1024;
406 /* FALLTHROUGH */
407 case 'Z':
408 membuf *= 1024;
409 /* FALLTHROUGH */
410 case 'E':
411 membuf *= 1024;
412 /* FALLTHROUGH */
413 case 'P':
414 membuf *= 1024;
415 /* FALLTHROUGH */
416 case 'T':
417 membuf *= 1024;
418 /* FALLTHROUGH */
419 case 'G':
420 membuf *= 1024;
421 /* FALLTHROUGH */
422 case 'M':
423 membuf *= 1024;
424 /* FALLTHROUGH */
425 case '\0':
426 case 'K':
427 membuf *= 1024;
428 /* FALLTHROUGH */
429 case 'b':
430 break;
431 case '%':
432 membuf = (available_free_memory * membuf) /
433 100;
434 break;
435 default:
436 warnc(EINVAL, "%s", optarg);
437 membuf = available_free_memory;
438 }
439 }
440 return (membuf);
441 }
442 }
443
444 /*
445 * Signal handler that clears the temporary files.
446 */
447 static void
sig_handler(int sig __unused,siginfo_t * siginfo __unused,void * context __unused)448 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
449 void *context __unused)
450 {
451
452 clear_tmp_files();
453 exit(-1);
454 }
455
456 /*
457 * Set signal handler on panic signals.
458 */
459 static void
set_signal_handler(void)460 set_signal_handler(void)
461 {
462 struct sigaction sa;
463
464 memset(&sa, 0, sizeof(sa));
465 sa.sa_sigaction = &sig_handler;
466 sa.sa_flags = SA_SIGINFO;
467
468 if (sigaction(SIGTERM, &sa, NULL) < 0) {
469 perror("sigaction");
470 return;
471 }
472 if (sigaction(SIGHUP, &sa, NULL) < 0) {
473 perror("sigaction");
474 return;
475 }
476 if (sigaction(SIGINT, &sa, NULL) < 0) {
477 perror("sigaction");
478 return;
479 }
480 if (sigaction(SIGQUIT, &sa, NULL) < 0) {
481 perror("sigaction");
482 return;
483 }
484 if (sigaction(SIGABRT, &sa, NULL) < 0) {
485 perror("sigaction");
486 return;
487 }
488 if (sigaction(SIGBUS, &sa, NULL) < 0) {
489 perror("sigaction");
490 return;
491 }
492 if (sigaction(SIGSEGV, &sa, NULL) < 0) {
493 perror("sigaction");
494 return;
495 }
496 if (sigaction(SIGUSR1, &sa, NULL) < 0) {
497 perror("sigaction");
498 return;
499 }
500 if (sigaction(SIGUSR2, &sa, NULL) < 0) {
501 perror("sigaction");
502 return;
503 }
504 }
505
506 /*
507 * Print "unknown" message and exit with status 2.
508 */
509 static void
unknown(const char * what)510 unknown(const char *what)
511 {
512
513 errx(2, "%s: %s", getstr(3), what);
514 }
515
516 /*
517 * Check whether contradictory input options are used.
518 */
519 static void
check_mutually_exclusive_flags(char c,bool * mef_flags)520 check_mutually_exclusive_flags(char c, bool *mef_flags)
521 {
522 int fo_index, mec;
523 bool found_others, found_this;
524
525 found_others = found_this = false;
526 fo_index = 0;
527
528 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
529 mec = mutually_exclusive_flags[i];
530
531 if (mec != c) {
532 if (mef_flags[i]) {
533 if (found_this)
534 errx(1, "%c:%c: %s", c, mec, getstr(1));
535 found_others = true;
536 fo_index = i;
537 }
538 } else {
539 if (found_others)
540 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
541 mef_flags[i] = true;
542 found_this = true;
543 }
544 }
545 }
546
547 /*
548 * Initialise sort opts data.
549 */
550 static void
set_sort_opts(void)551 set_sort_opts(void)
552 {
553
554 memset(&default_sort_mods_object, 0,
555 sizeof(default_sort_mods_object));
556 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
557 default_sort_mods_object.func =
558 get_sort_func(&default_sort_mods_object);
559 }
560
561 /*
562 * Set a sort modifier on a sort modifiers object.
563 */
564 static bool
set_sort_modifier(struct sort_mods * sm,int c)565 set_sort_modifier(struct sort_mods *sm, int c)
566 {
567
568 if (sm == NULL)
569 return (true);
570
571 switch (c){
572 case 'b':
573 sm->bflag = true;
574 break;
575 case 'd':
576 sm->dflag = true;
577 break;
578 case 'f':
579 sm->fflag = true;
580 break;
581 case 'g':
582 sm->gflag = true;
583 need_hint = true;
584 break;
585 case 'i':
586 sm->iflag = true;
587 break;
588 case 'R':
589 sm->Rflag = true;
590 need_hint = true;
591 need_random = true;
592 break;
593 case 'M':
594 initialise_months();
595 sm->Mflag = true;
596 need_hint = true;
597 break;
598 case 'n':
599 sm->nflag = true;
600 need_hint = true;
601 print_symbols_on_debug = true;
602 break;
603 case 'r':
604 sm->rflag = true;
605 break;
606 case 'V':
607 sm->Vflag = true;
608 break;
609 case 'h':
610 sm->hflag = true;
611 need_hint = true;
612 print_symbols_on_debug = true;
613 break;
614 default:
615 return (false);
616 }
617
618 sort_opts_vals.complex_sort = true;
619 sm->func = get_sort_func(sm);
620 return (true);
621 }
622
623 /*
624 * Parse POS in -k option.
625 */
626 static int
parse_pos(const char * s,struct key_specs * ks,bool * mef_flags,bool second)627 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
628 {
629 regmatch_t pmatch[4];
630 regex_t re;
631 char *c, *f;
632 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
633 size_t len, nmatch;
634 int ret;
635
636 ret = -1;
637 nmatch = 4;
638 c = f = NULL;
639
640 if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
641 return (-1);
642
643 if (regexec(&re, s, nmatch, pmatch, 0) != 0)
644 goto end;
645
646 if (pmatch[0].rm_eo <= pmatch[0].rm_so)
647 goto end;
648
649 if (pmatch[1].rm_eo <= pmatch[1].rm_so)
650 goto end;
651
652 len = pmatch[1].rm_eo - pmatch[1].rm_so;
653 f = sort_malloc((len + 1) * sizeof(char));
654
655 strncpy(f, s + pmatch[1].rm_so, len);
656 f[len] = '\0';
657
658 if (second) {
659 errno = 0;
660 ks->f2 = (size_t) strtoul(f, NULL, 10);
661 if (errno != 0)
662 err(2, "-k");
663 if (ks->f2 == 0) {
664 warn("%s",getstr(5));
665 goto end;
666 }
667 } else {
668 errno = 0;
669 ks->f1 = (size_t) strtoul(f, NULL, 10);
670 if (errno != 0)
671 err(2, "-k");
672 if (ks->f1 == 0) {
673 warn("%s",getstr(5));
674 goto end;
675 }
676 }
677
678 if (pmatch[2].rm_eo > pmatch[2].rm_so) {
679 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
680 c = sort_malloc((len + 1) * sizeof(char));
681
682 strncpy(c, s + pmatch[2].rm_so + 1, len);
683 c[len] = '\0';
684
685 if (second) {
686 errno = 0;
687 ks->c2 = (size_t) strtoul(c, NULL, 10);
688 if (errno != 0)
689 err(2, "-k");
690 } else {
691 errno = 0;
692 ks->c1 = (size_t) strtoul(c, NULL, 10);
693 if (errno != 0)
694 err(2, "-k");
695 if (ks->c1 == 0) {
696 warn("%s",getstr(6));
697 goto end;
698 }
699 }
700 } else {
701 if (second)
702 ks->c2 = 0;
703 else
704 ks->c1 = 1;
705 }
706
707 if (pmatch[3].rm_eo > pmatch[3].rm_so) {
708 regoff_t i = 0;
709
710 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
711 check_mutually_exclusive_flags(s[i], mef_flags);
712 if (s[i] == 'b') {
713 if (second)
714 ks->pos2b = true;
715 else
716 ks->pos1b = true;
717 } else if (!set_sort_modifier(&(ks->sm), s[i]))
718 goto end;
719 }
720 }
721
722 ret = 0;
723
724 end:
725
726 if (c)
727 sort_free(c);
728 if (f)
729 sort_free(f);
730 regfree(&re);
731
732 return (ret);
733 }
734
735 /*
736 * Parse -k option value.
737 */
738 static int
parse_k(const char * s,struct key_specs * ks)739 parse_k(const char *s, struct key_specs *ks)
740 {
741 int ret = -1;
742 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
743 { false, false, false, false, false, false };
744
745 if (s && *s) {
746 char *sptr;
747
748 sptr = strchr(s, ',');
749 if (sptr) {
750 size_t size1;
751 char *pos1, *pos2;
752
753 size1 = sptr - s;
754
755 if (size1 < 1)
756 return (-1);
757 pos1 = sort_malloc((size1 + 1) * sizeof(char));
758
759 strncpy(pos1, s, size1);
760 pos1[size1] = '\0';
761
762 ret = parse_pos(pos1, ks, mef_flags, false);
763
764 sort_free(pos1);
765 if (ret < 0)
766 return (ret);
767
768 pos2 = sort_strdup(sptr + 1);
769 ret = parse_pos(pos2, ks, mef_flags, true);
770 sort_free(pos2);
771 } else
772 ret = parse_pos(s, ks, mef_flags, false);
773 }
774
775 return (ret);
776 }
777
778 /*
779 * Parse POS in +POS -POS option.
780 */
781 static int
parse_pos_obs(const char * s,int * nf,int * nc,char * sopts)782 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
783 {
784 regex_t re;
785 regmatch_t pmatch[4];
786 char *c, *f;
787 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
788 int ret;
789 size_t len, nmatch;
790
791 ret = -1;
792 nmatch = 4;
793 c = f = NULL;
794 *nc = *nf = 0;
795
796 if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
797 return (-1);
798
799 if (regexec(&re, s, nmatch, pmatch, 0) != 0)
800 goto end;
801
802 if (pmatch[0].rm_eo <= pmatch[0].rm_so)
803 goto end;
804
805 if (pmatch[1].rm_eo <= pmatch[1].rm_so)
806 goto end;
807
808 len = pmatch[1].rm_eo - pmatch[1].rm_so;
809 f = sort_malloc((len + 1) * sizeof(char));
810
811 strncpy(f, s + pmatch[1].rm_so, len);
812 f[len] = '\0';
813
814 errno = 0;
815 *nf = (size_t) strtoul(f, NULL, 10);
816 if (errno != 0)
817 errx(2, "%s", getstr(11));
818
819 if (pmatch[2].rm_eo > pmatch[2].rm_so) {
820 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
821 c = sort_malloc((len + 1) * sizeof(char));
822
823 strncpy(c, s + pmatch[2].rm_so + 1, len);
824 c[len] = '\0';
825
826 errno = 0;
827 *nc = (size_t) strtoul(c, NULL, 10);
828 if (errno != 0)
829 errx(2, "%s", getstr(11));
830 }
831
832 if (pmatch[3].rm_eo > pmatch[3].rm_so) {
833
834 len = pmatch[3].rm_eo - pmatch[3].rm_so;
835
836 strncpy(sopts, s + pmatch[3].rm_so, len);
837 sopts[len] = '\0';
838 }
839
840 ret = 0;
841
842 end:
843 if (c)
844 sort_free(c);
845 if (f)
846 sort_free(f);
847 regfree(&re);
848
849 return (ret);
850 }
851
852 /*
853 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
854 */
855 void
fix_obsolete_keys(int * argc,char ** argv)856 fix_obsolete_keys(int *argc, char **argv)
857 {
858 char sopt[129];
859
860 for (int i = 1; i < *argc; i++) {
861 char *arg1;
862
863 arg1 = argv[i];
864
865 if (strcmp(arg1, "--") == 0) {
866 /* Following arguments are treated as filenames. */
867 break;
868 }
869
870 if (strlen(arg1) > 1 && arg1[0] == '+') {
871 int c1, f1;
872 char sopts1[128];
873
874 sopts1[0] = 0;
875 c1 = f1 = 0;
876
877 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
878 continue;
879 else {
880 f1 += 1;
881 c1 += 1;
882 if (i + 1 < *argc) {
883 char *arg2 = argv[i + 1];
884
885 if (strlen(arg2) > 1 &&
886 arg2[0] == '-') {
887 int c2, f2;
888 char sopts2[128];
889
890 sopts2[0] = 0;
891 c2 = f2 = 0;
892
893 if (parse_pos_obs(arg2 + 1,
894 &f2, &c2, sopts2) >= 0) {
895 if (c2 > 0)
896 f2 += 1;
897 sprintf(sopt, "-k%d.%d%s,%d.%d%s",
898 f1, c1, sopts1, f2, c2, sopts2);
899 argv[i] = sort_strdup(sopt);
900 for (int j = i + 1; j + 1 < *argc; j++)
901 argv[j] = argv[j + 1];
902 *argc -= 1;
903 continue;
904 }
905 }
906 }
907 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
908 argv[i] = sort_strdup(sopt);
909 }
910 }
911 }
912 }
913
914 /*
915 * Seed random sort
916 */
917 static void
get_random_seed(const char * random_source)918 get_random_seed(const char *random_source)
919 {
920 char randseed[32];
921 struct stat fsb, rsb;
922 ssize_t rd;
923 int rsfd;
924
925 rsfd = -1;
926 rd = sizeof(randseed);
927
928 if (random_source == NULL) {
929 if (getentropy(randseed, sizeof(randseed)) < 0)
930 err(EX_SOFTWARE, "getentropy");
931 goto out;
932 }
933
934 rsfd = open(random_source, O_RDONLY | O_CLOEXEC);
935 if (rsfd < 0)
936 err(EX_NOINPUT, "open: %s", random_source);
937
938 if (fstat(rsfd, &fsb) != 0)
939 err(EX_SOFTWARE, "fstat");
940
941 if (!S_ISREG(fsb.st_mode) && !S_ISCHR(fsb.st_mode))
942 err(EX_USAGE,
943 "random seed isn't a regular file or /dev/random");
944
945 /*
946 * Regular files: read up to maximum seed size and explicitly
947 * reject longer files.
948 */
949 if (S_ISREG(fsb.st_mode)) {
950 if (fsb.st_size > (off_t)sizeof(randseed))
951 errx(EX_USAGE, "random seed is too large (%jd >"
952 " %zu)!", (intmax_t)fsb.st_size,
953 sizeof(randseed));
954 else if (fsb.st_size < 1)
955 errx(EX_USAGE, "random seed is too small ("
956 "0 bytes)");
957
958 memset(randseed, 0, sizeof(randseed));
959
960 rd = read(rsfd, randseed, fsb.st_size);
961 if (rd < 0)
962 err(EX_SOFTWARE, "reading random seed file %s",
963 random_source);
964 if (rd < (ssize_t)fsb.st_size)
965 errx(EX_SOFTWARE, "short read from %s", random_source);
966 } else if (S_ISCHR(fsb.st_mode)) {
967 if (stat("/dev/random", &rsb) < 0)
968 err(EX_SOFTWARE, "stat");
969
970 if (fsb.st_dev != rsb.st_dev ||
971 fsb.st_ino != rsb.st_ino)
972 errx(EX_USAGE, "random seed is a character "
973 "device other than /dev/random");
974
975 if (getentropy(randseed, sizeof(randseed)) < 0)
976 err(EX_SOFTWARE, "getentropy");
977 }
978
979 out:
980 if (rsfd >= 0)
981 close(rsfd);
982
983 MD5Init(&md5_ctx);
984 MD5Update(&md5_ctx, randseed, rd);
985 }
986
987 /*
988 * Main function.
989 */
990 int
main(int argc,char ** argv)991 main(int argc, char **argv)
992 {
993 char *outfile, *real_outfile;
994 char *random_source = NULL;
995 int c, result;
996 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
997 { false, false, false, false, false, false };
998
999 result = 0;
1000 outfile = sort_strdup("-");
1001 real_outfile = NULL;
1002
1003 struct sort_mods *sm = &default_sort_mods_object;
1004
1005 init_tmp_files();
1006
1007 set_signal_handler();
1008
1009 set_hw_params();
1010 set_locale();
1011 set_tmpdir();
1012 set_sort_opts();
1013
1014 #ifndef WITHOUT_NLS
1015 catalog = catopen("sort", NL_CAT_LOCALE);
1016 #endif
1017
1018 fix_obsolete_keys(&argc, argv);
1019
1020 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1021 != -1)) {
1022
1023 check_mutually_exclusive_flags(c, mef_flags);
1024
1025 if (!set_sort_modifier(sm, c)) {
1026
1027 switch (c) {
1028 case 'c':
1029 sort_opts_vals.cflag = true;
1030 if (optarg) {
1031 if (!strcmp(optarg, "diagnose-first"))
1032 ;
1033 else if (!strcmp(optarg, "silent") ||
1034 !strcmp(optarg, "quiet"))
1035 sort_opts_vals.csilentflag = true;
1036 else if (*optarg)
1037 unknown(optarg);
1038 }
1039 break;
1040 case 'C':
1041 sort_opts_vals.cflag = true;
1042 sort_opts_vals.csilentflag = true;
1043 break;
1044 case 'k':
1045 {
1046 sort_opts_vals.complex_sort = true;
1047 sort_opts_vals.kflag = true;
1048
1049 keys_num++;
1050 keys = sort_realloc(keys, keys_num *
1051 sizeof(struct key_specs));
1052 memset(&(keys[keys_num - 1]), 0,
1053 sizeof(struct key_specs));
1054
1055 if (parse_k(optarg, &(keys[keys_num - 1]))
1056 < 0) {
1057 errc(2, EINVAL, "-k %s", optarg);
1058 }
1059
1060 break;
1061 }
1062 case 'm':
1063 sort_opts_vals.mflag = true;
1064 break;
1065 case 'o':
1066 outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1067 strcpy(outfile, optarg);
1068 break;
1069 case 's':
1070 sort_opts_vals.sflag = true;
1071 break;
1072 case 'S':
1073 available_free_memory =
1074 parse_memory_buffer_value(optarg);
1075 break;
1076 case 'T':
1077 tmpdir = sort_strdup(optarg);
1078 break;
1079 case 't':
1080 while (strlen(optarg) > 1) {
1081 if (optarg[0] != '\\') {
1082 errc(2, EINVAL, "%s", optarg);
1083 }
1084 optarg += 1;
1085 if (*optarg == '0') {
1086 *optarg = 0;
1087 break;
1088 }
1089 }
1090 sort_opts_vals.tflag = true;
1091 sort_opts_vals.field_sep = btowc(optarg[0]);
1092 if (sort_opts_vals.field_sep == WEOF) {
1093 errno = EINVAL;
1094 err(2, NULL);
1095 }
1096 if (!gnusort_numeric_compatibility) {
1097 if (symbol_decimal_point == sort_opts_vals.field_sep)
1098 symbol_decimal_point = WEOF;
1099 if (symbol_thousands_sep == sort_opts_vals.field_sep)
1100 symbol_thousands_sep = WEOF;
1101 if (symbol_negative_sign == sort_opts_vals.field_sep)
1102 symbol_negative_sign = WEOF;
1103 if (symbol_positive_sign == sort_opts_vals.field_sep)
1104 symbol_positive_sign = WEOF;
1105 }
1106 break;
1107 case 'u':
1108 sort_opts_vals.uflag = true;
1109 /* stable sort for the correct unique val */
1110 sort_opts_vals.sflag = true;
1111 break;
1112 case 'z':
1113 sort_opts_vals.zflag = true;
1114 break;
1115 case SORT_OPT:
1116 if (optarg) {
1117 if (!strcmp(optarg, "general-numeric"))
1118 set_sort_modifier(sm, 'g');
1119 else if (!strcmp(optarg, "human-numeric"))
1120 set_sort_modifier(sm, 'h');
1121 else if (!strcmp(optarg, "numeric"))
1122 set_sort_modifier(sm, 'n');
1123 else if (!strcmp(optarg, "month"))
1124 set_sort_modifier(sm, 'M');
1125 else if (!strcmp(optarg, "random"))
1126 set_sort_modifier(sm, 'R');
1127 else
1128 unknown(optarg);
1129 }
1130 break;
1131 #if defined(SORT_THREADS)
1132 case PARALLEL_OPT:
1133 nthreads = (size_t)(atoi(optarg));
1134 if (nthreads < 1)
1135 nthreads = 1;
1136 if (nthreads > 1024)
1137 nthreads = 1024;
1138 break;
1139 #endif
1140 case QSORT_OPT:
1141 sort_opts_vals.sort_method = SORT_QSORT;
1142 break;
1143 case MERGESORT_OPT:
1144 sort_opts_vals.sort_method = SORT_MERGESORT;
1145 break;
1146 case MMAP_OPT:
1147 use_mmap = true;
1148 break;
1149 case HEAPSORT_OPT:
1150 sort_opts_vals.sort_method = SORT_HEAPSORT;
1151 break;
1152 case RADIXSORT_OPT:
1153 sort_opts_vals.sort_method = SORT_RADIXSORT;
1154 break;
1155 case RANDOMSOURCE_OPT:
1156 random_source = strdup(optarg);
1157 break;
1158 case COMPRESSPROGRAM_OPT:
1159 compress_program = strdup(optarg);
1160 break;
1161 case FF_OPT:
1162 read_fns_from_file0(optarg);
1163 break;
1164 case BS_OPT:
1165 {
1166 errno = 0;
1167 long mof = strtol(optarg, NULL, 10);
1168 if (errno != 0)
1169 err(2, "--batch-size");
1170 if (mof >= 2)
1171 max_open_files = (size_t) mof + 1;
1172 }
1173 break;
1174 case VERSION_OPT:
1175 printf("%s\n", VERSION);
1176 exit(EXIT_SUCCESS);
1177 /* NOTREACHED */
1178 break;
1179 case DEBUG_OPT:
1180 debug_sort = true;
1181 break;
1182 case HELP_OPT:
1183 usage(false);
1184 /* NOTREACHED */
1185 break;
1186 default:
1187 usage(true);
1188 /* NOTREACHED */
1189 }
1190 }
1191 }
1192
1193 argc -= optind;
1194 argv += optind;
1195
1196 if (argv_from_file0) {
1197 argc = argc_from_file0;
1198 argv = argv_from_file0;
1199 }
1200
1201 if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1202 errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1203
1204 if (keys_num == 0) {
1205 keys_num = 1;
1206 keys = sort_realloc(keys, sizeof(struct key_specs));
1207 memset(&(keys[0]), 0, sizeof(struct key_specs));
1208 keys[0].c1 = 1;
1209 keys[0].pos1b = default_sort_mods->bflag;
1210 keys[0].pos2b = default_sort_mods->bflag;
1211 memcpy(&(keys[0].sm), default_sort_mods,
1212 sizeof(struct sort_mods));
1213 }
1214
1215 for (size_t i = 0; i < keys_num; i++) {
1216 struct key_specs *ks;
1217
1218 ks = &(keys[i]);
1219
1220 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1221 !(ks->pos2b)) {
1222 ks->pos1b = sm->bflag;
1223 ks->pos2b = sm->bflag;
1224 memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1225 }
1226
1227 ks->sm.func = get_sort_func(&(ks->sm));
1228 }
1229
1230 if (debug_sort) {
1231 printf("Memory to be used for sorting: %llu\n",available_free_memory);
1232 #if defined(SORT_THREADS)
1233 printf("Number of CPUs: %d\n",(int)ncpu);
1234 nthreads = 1;
1235 #endif
1236 printf("Using collate rules of %s locale\n",
1237 setlocale(LC_COLLATE, NULL));
1238 if (byte_sort)
1239 printf("Byte sort is used\n");
1240 if (print_symbols_on_debug) {
1241 printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1242 if (symbol_thousands_sep)
1243 printf("Thousands separator: <%lc>\n",
1244 symbol_thousands_sep);
1245 printf("Positive sign: <%lc>\n", symbol_positive_sign);
1246 printf("Negative sign: <%lc>\n", symbol_negative_sign);
1247 }
1248 }
1249
1250 if (need_random)
1251 get_random_seed(random_source);
1252
1253 /* Case when the outfile equals one of the input files: */
1254 if (strcmp(outfile, "-")) {
1255
1256 for(int i = 0; i < argc; ++i) {
1257 if (strcmp(argv[i], outfile) == 0) {
1258 real_outfile = sort_strdup(outfile);
1259 for(;;) {
1260 char* tmp = sort_malloc(strlen(outfile) +
1261 strlen(".tmp") + 1);
1262
1263 strcpy(tmp, outfile);
1264 strcpy(tmp + strlen(tmp), ".tmp");
1265 sort_free(outfile);
1266 outfile = tmp;
1267 if (access(outfile, F_OK) < 0)
1268 break;
1269 }
1270 tmp_file_atexit(outfile);
1271 }
1272 }
1273 }
1274
1275 #if defined(SORT_THREADS)
1276 if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1277 nthreads = 1;
1278 #endif
1279
1280 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1281 struct file_list fl;
1282 struct sort_list list;
1283
1284 sort_list_init(&list);
1285 file_list_init(&fl, true);
1286
1287 if (argc < 1)
1288 procfile("-", &list, &fl);
1289 else {
1290 while (argc > 0) {
1291 procfile(*argv, &list, &fl);
1292 --argc;
1293 ++argv;
1294 }
1295 }
1296
1297 if (fl.count < 1)
1298 sort_list_to_file(&list, outfile);
1299 else {
1300 if (list.count > 0) {
1301 char *flast = new_tmp_file_name();
1302
1303 sort_list_to_file(&list, flast);
1304 file_list_add(&fl, flast, false);
1305 }
1306 merge_files(&fl, outfile);
1307 }
1308
1309 file_list_clean(&fl);
1310
1311 /*
1312 * We are about to exit the program, so we can ignore
1313 * the clean-up for speed
1314 *
1315 * sort_list_clean(&list);
1316 */
1317
1318 } else if (sort_opts_vals.cflag) {
1319 result = (argc == 0) ? (check("-")) : (check(*argv));
1320 } else if (sort_opts_vals.mflag) {
1321 struct file_list fl;
1322
1323 file_list_init(&fl, false);
1324 /* No file arguments remaining means "read from stdin." */
1325 if (argc == 0)
1326 file_list_add(&fl, "-", true);
1327 else
1328 file_list_populate(&fl, argc, argv, true);
1329 merge_files(&fl, outfile);
1330 file_list_clean(&fl);
1331 }
1332
1333 if (real_outfile) {
1334 unlink(real_outfile);
1335 if (rename(outfile, real_outfile) < 0)
1336 err(2, NULL);
1337 sort_free(real_outfile);
1338 }
1339
1340 sort_free(outfile);
1341
1342 #ifndef WITHOUT_NLS
1343 if (catalog != (nl_catd)-1)
1344 catclose(catalog);
1345 #endif
1346
1347 return (result);
1348 }
1349