1 /*-
2 * Copyright (c) 2007 S.Sam Arun Raj
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/stat.h>
28 #include <sys/types.h>
29
30 #include <ctype.h>
31 #include <err.h>
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <getopt.h>
35 #include <inttypes.h>
36 #include <stdint.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sysexits.h>
41 #include <unistd.h>
42
43 #include <libelf.h>
44 #include <libelftc.h>
45 #include <gelf.h>
46
47 #include "_elftc.h"
48
49 ELFTC_VCSID("$Id: strings.c 3648 2018-11-22 23:26:43Z emaste $");
50
51 enum radix_style {
52 RADIX_DECIMAL,
53 RADIX_HEX,
54 RADIX_OCTAL
55 };
56
57 enum encoding_style {
58 ENCODING_7BIT,
59 ENCODING_8BIT,
60 ENCODING_16BIT_BIG,
61 ENCODING_16BIT_LITTLE,
62 ENCODING_32BIT_BIG,
63 ENCODING_32BIT_LITTLE
64 };
65
66 #define PRINTABLE(c) \
67 ((c) >= 0 && (c) <= 255 && \
68 ((c) == '\t' || isprint((c)) || \
69 (encoding == ENCODING_8BIT && (c) > 127)))
70
71 static int encoding_size, entire_file, show_filename, show_loc;
72 static enum encoding_style encoding;
73 static enum radix_style radix;
74 static intmax_t min_len;
75
76 static struct option strings_longopts[] = {
77 { "all", no_argument, NULL, 'a'},
78 { "bytes", required_argument, NULL, 'n'},
79 { "encoding", required_argument, NULL, 'e'},
80 { "help", no_argument, NULL, 'h'},
81 { "print-file-name", no_argument, NULL, 'f'},
82 { "radix", required_argument, NULL, 't'},
83 { "version", no_argument, NULL, 'v'},
84 { NULL, 0, NULL, 0 }
85 };
86
87 long getcharacter(void);
88 int handle_file(const char *);
89 int handle_elf(const char *, int);
90 int handle_binary(const char *, int);
91 int find_strings(const char *, off_t, off_t);
92 void show_version(void);
93 void usage(void);
94
95 /*
96 * strings(1) extracts text(contiguous printable characters)
97 * from elf and binary files.
98 */
99 int
main(int argc,char ** argv)100 main(int argc, char **argv)
101 {
102 int ch, rc;
103
104 rc = 0;
105 min_len = 0;
106 encoding_size = 1;
107 if (elf_version(EV_CURRENT) == EV_NONE)
108 errx(EXIT_FAILURE, "ELF library initialization failed: %s",
109 elf_errmsg(-1));
110
111 while ((ch = getopt_long(argc, argv, "1234567890ae:fhn:ot:Vv",
112 strings_longopts, NULL)) != -1)
113 switch((char)ch) {
114 case 'a':
115 entire_file = 1;
116 break;
117 case 'e':
118 if (*optarg == 's') {
119 encoding = ENCODING_7BIT;
120 } else if (*optarg == 'S') {
121 encoding = ENCODING_8BIT;
122 } else if (*optarg == 'b') {
123 encoding = ENCODING_16BIT_BIG;
124 encoding_size = 2;
125 } else if (*optarg == 'B') {
126 encoding = ENCODING_32BIT_BIG;
127 encoding_size = 4;
128 } else if (*optarg == 'l') {
129 encoding = ENCODING_16BIT_LITTLE;
130 encoding_size = 2;
131 } else if (*optarg == 'L') {
132 encoding = ENCODING_32BIT_LITTLE;
133 encoding_size = 4;
134 } else
135 usage();
136 /* NOTREACHED */
137 break;
138 case 'f':
139 show_filename = 1;
140 break;
141 case 'n':
142 min_len = strtoimax(optarg, (char**)NULL, 10);
143 if (min_len <= 0)
144 errx(EX_USAGE, "option -n should specify a "
145 "positive decimal integer.");
146 break;
147 case 'o':
148 show_loc = 1;
149 radix = RADIX_OCTAL;
150 break;
151 case 't':
152 show_loc = 1;
153 if (*optarg == 'd')
154 radix = RADIX_DECIMAL;
155 else if (*optarg == 'o')
156 radix = RADIX_OCTAL;
157 else if (*optarg == 'x')
158 radix = RADIX_HEX;
159 else
160 usage();
161 /* NOTREACHED */
162 break;
163 case 'v':
164 case 'V':
165 show_version();
166 /* NOTREACHED */
167 case '0':
168 case '1':
169 case '2':
170 case '3':
171 case '4':
172 case '5':
173 case '6':
174 case '7':
175 case '8':
176 case '9':
177 min_len *= 10;
178 min_len += ch - '0';
179 break;
180 case 'h':
181 case '?':
182 default:
183 usage();
184 /* NOTREACHED */
185 }
186 argc -= optind;
187 argv += optind;
188
189 if (!min_len)
190 min_len = 4;
191 if (!*argv)
192 rc = find_strings("{standard input}", 0, 0);
193 else while (*argv) {
194 if (handle_file(*argv) != 0)
195 rc = 1;
196 argv++;
197 }
198 return (rc);
199 }
200
201 int
handle_file(const char * name)202 handle_file(const char *name)
203 {
204 int fd, rt;
205
206 if (name == NULL)
207 return (1);
208 if (freopen(name, "rb", stdin) == NULL) {
209 warnx("'%s': %s", name, strerror(errno));
210 return (1);
211 }
212
213 fd = fileno(stdin);
214 if (fd < 0)
215 return (1);
216 rt = handle_elf(name, fd);
217 return (rt);
218 }
219
220 /*
221 * Files not understood by handle_elf, will be passed off here and will
222 * treated as a binary file. This would include text file, core dumps ...
223 */
224 int
handle_binary(const char * name,int fd)225 handle_binary(const char *name, int fd)
226 {
227 struct stat buf;
228
229 memset(&buf, 0, sizeof(struct stat));
230 (void) lseek(fd, (off_t)0, SEEK_SET);
231 if (!fstat(fd, &buf))
232 return (find_strings(name, (off_t)0, buf.st_size));
233 return (1);
234 }
235
236 /*
237 * Will analyse a file to see if it ELF, other files including ar(1),
238 * core dumps are passed off and treated as flat binary files. Unlike
239 * GNU size in FreeBSD this routine will not treat ELF object from
240 * different archs as flat binary files(has to overridden using -a).
241 */
242 int
handle_elf(const char * name,int fd)243 handle_elf(const char *name, int fd)
244 {
245 GElf_Ehdr elfhdr;
246 GElf_Shdr shdr;
247 Elf *elf;
248 Elf_Scn *scn;
249 int rc;
250
251 rc = 0;
252 /* If entire file is chosen, treat it as a binary file */
253 if (entire_file)
254 return (handle_binary(name, fd));
255
256 (void) lseek(fd, (off_t)0, SEEK_SET);
257 elf = elf_begin(fd, ELF_C_READ, NULL);
258 if (elf_kind(elf) != ELF_K_ELF) {
259 (void) elf_end(elf);
260 return (handle_binary(name, fd));
261 }
262
263 if (gelf_getehdr(elf, &elfhdr) == NULL) {
264 (void) elf_end(elf);
265 warnx("%s: ELF file could not be processed", name);
266 return (1);
267 }
268
269 if (elfhdr.e_shnum == 0 && elfhdr.e_type == ET_CORE) {
270 (void) elf_end(elf);
271 return (handle_binary(name, fd));
272 } else {
273 scn = NULL;
274 while ((scn = elf_nextscn(elf, scn)) != NULL) {
275 if (gelf_getshdr(scn, &shdr) == NULL)
276 continue;
277 if (shdr.sh_type != SHT_NOBITS &&
278 (shdr.sh_flags & SHF_ALLOC) != 0) {
279 rc = find_strings(name, shdr.sh_offset,
280 shdr.sh_size);
281 }
282 }
283 }
284 (void) elf_end(elf);
285 return (rc);
286 }
287
288 /*
289 * Retrieves a character from input stream based on the encoding
290 * type requested.
291 */
292 long
getcharacter(void)293 getcharacter(void)
294 {
295 long rt;
296 int i;
297 char buf[4], c;
298
299 rt = EOF;
300 for(i = 0; i < encoding_size; i++) {
301 c = getc(stdin);
302 if (feof(stdin))
303 return (EOF);
304 buf[i] = c;
305 }
306
307 switch(encoding) {
308 case ENCODING_7BIT:
309 case ENCODING_8BIT:
310 rt = buf[0];
311 break;
312 case ENCODING_16BIT_BIG:
313 rt = (buf[0] << 8) | buf[1];
314 break;
315 case ENCODING_16BIT_LITTLE:
316 rt = buf[0] | (buf[1] << 8);
317 break;
318 case ENCODING_32BIT_BIG:
319 rt = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
320 ((long) buf[2] << 8) | buf[3];
321 break;
322 case ENCODING_32BIT_LITTLE:
323 rt = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
324 ((long) buf[3] << 24);
325 break;
326 }
327 return (rt);
328 }
329
330 /*
331 * Input stream stdin is read until the end of file is reached or until
332 * the section size is reached in case of ELF files. Contiguous
333 * characters of >= min_size(default 4) will be displayed.
334 */
335 int
find_strings(const char * name,off_t offset,off_t size)336 find_strings(const char *name, off_t offset, off_t size)
337 {
338 off_t cur_off, start_off;
339 char *obuf;
340 long c;
341 int i;
342
343 if ((obuf = (char*)calloc(1, min_len + 1)) == NULL) {
344 (void) fprintf(stderr, "Unable to allocate memory: %s\n",
345 strerror(errno));
346 return (1);
347 }
348
349 (void) fseeko(stdin, offset, SEEK_SET);
350 cur_off = offset;
351 start_off = 0;
352 while(1) {
353 if ((offset + size) && (cur_off >= offset + size))
354 break;
355 start_off = cur_off;
356 memset(obuf, 0, min_len+1);
357 for(i = 0; i < min_len; i++) {
358 c = getcharacter();
359 if (c == EOF && feof(stdin))
360 goto _exit1;
361 if (PRINTABLE(c)) {
362 obuf[i] = c;
363 obuf[i+1] = 0;
364 cur_off += encoding_size;
365 } else {
366 if (encoding == ENCODING_8BIT &&
367 (uint8_t)c > 127) {
368 obuf[i] = c;
369 obuf[i+1] = 0;
370 cur_off += encoding_size;
371 continue;
372 }
373 cur_off += encoding_size;
374 break;
375 }
376 }
377
378 if (i >= min_len && ((cur_off <= offset + size) ||
379 !(offset + size))) {
380 if (show_filename)
381 printf ("%s: ", name);
382 if (show_loc) {
383 switch(radix) {
384 case RADIX_DECIMAL:
385 (void) printf("%7ju ",
386 (uintmax_t)start_off);
387 break;
388 case RADIX_HEX:
389 (void) printf("%7jx ",
390 (uintmax_t)start_off);
391 break;
392 case RADIX_OCTAL:
393 (void) printf("%7jo ",
394 (uintmax_t)start_off);
395 break;
396 }
397 }
398 printf("%s", obuf);
399
400 while(1) {
401 if ((offset + size) &&
402 (cur_off >= offset + size))
403 break;
404 c = getcharacter();
405 cur_off += encoding_size;
406 if (encoding == ENCODING_8BIT &&
407 (uint8_t)c > 127) {
408 putchar(c);
409 continue;
410 }
411 if (!PRINTABLE(c) || c == EOF)
412 break;
413 putchar(c);
414 }
415 putchar('\n');
416 }
417 }
418 _exit1:
419 free(obuf);
420 return (0);
421 }
422
423 #define USAGE_MESSAGE "\
424 Usage: %s [options] [file...]\n\
425 Print contiguous sequences of printable characters.\n\n\
426 Options:\n\
427 -a | --all Scan the entire file for strings.\n\
428 -e ENC | --encoding=ENC Select the character encoding to use.\n\
429 -f | --print-file-name Print the file name before each string.\n\
430 -h | --help Print a help message and exit.\n\
431 -n N | --bytes=N | -N Print sequences with 'N' or more characters.\n\
432 -o Print offsets in octal.\n\
433 -t R | --radix=R Print offsets using the radix named by 'R'.\n\
434 -v | --version Print a version identifier and exit.\n"
435
436 void
usage(void)437 usage(void)
438 {
439 (void) fprintf(stderr, USAGE_MESSAGE, ELFTC_GETPROGNAME());
440 exit(EXIT_FAILURE);
441 }
442
443 void
show_version(void)444 show_version(void)
445 {
446 (void) printf("%s (%s)\n", ELFTC_GETPROGNAME(), elftc_version());
447 exit(EXIT_SUCCESS);
448 }
449