1 /*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice immediately at the beginning of the file, without modification,
11 * this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28 /*
29 * apprentice - make one pass through /etc/magic, learning its secrets.
30 */
31
32 #include "file.h"
33
34 #ifndef lint
35 FILE_RCSID("@(#)$File: apprentice.c,v 1.309 2021/09/24 13:59:19 christos Exp $")
36 #endif /* lint */
37
38 #include "magic.h"
39 #include <stdlib.h>
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>
42 #endif
43 #include <stddef.h>
44 #include <string.h>
45 #include <assert.h>
46 #include <ctype.h>
47 #include <fcntl.h>
48 #ifdef QUICK
49 #include <sys/mman.h>
50 #endif
51 #include <dirent.h>
52 #include <limits.h>
53
54
55 #define EATAB {while (isascii(CAST(unsigned char, *l)) && \
56 isspace(CAST(unsigned char, *l))) ++l;}
57 #define LOWCASE(l) (isupper(CAST(unsigned char, l)) ? \
58 tolower(CAST(unsigned char, l)) : (l))
59 /*
60 * Work around a bug in headers on Digital Unix.
61 * At least confirmed for: OSF1 V4.0 878
62 */
63 #if defined(__osf__) && defined(__DECC)
64 #ifdef MAP_FAILED
65 #undef MAP_FAILED
66 #endif
67 #endif
68
69 #ifndef MAP_FAILED
70 #define MAP_FAILED (void *) -1
71 #endif
72
73 #ifndef MAP_FILE
74 #define MAP_FILE 0
75 #endif
76
77 #define ALLOC_CHUNK CAST(size_t, 10)
78 #define ALLOC_INCR CAST(size_t, 200)
79
80 #define MAP_TYPE_USER 0
81 #define MAP_TYPE_MALLOC 1
82 #define MAP_TYPE_MMAP 2
83
84 struct magic_entry {
85 struct magic *mp;
86 uint32_t cont_count;
87 uint32_t max_count;
88 };
89
90 struct magic_entry_set {
91 struct magic_entry *me;
92 uint32_t count;
93 uint32_t max;
94 };
95
96 struct magic_map {
97 void *p;
98 size_t len;
99 int type;
100 struct magic *magic[MAGIC_SETS];
101 uint32_t nmagic[MAGIC_SETS];
102 };
103
104 int file_formats[FILE_NAMES_SIZE];
105 const size_t file_nformats = FILE_NAMES_SIZE;
106 const char *file_names[FILE_NAMES_SIZE];
107 const size_t file_nnames = FILE_NAMES_SIZE;
108
109 private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
110 private int hextoint(int);
111 private const char *getstr(struct magic_set *, struct magic *, const char *,
112 int);
113 private int parse(struct magic_set *, struct magic_entry *, const char *,
114 size_t, int);
115 private void eatsize(const char **);
116 private int apprentice_1(struct magic_set *, const char *, int);
117 private size_t apprentice_magic_strength(const struct magic *);
118 private int apprentice_sort(const void *, const void *);
119 private void apprentice_list(struct mlist *, int );
120 private struct magic_map *apprentice_load(struct magic_set *,
121 const char *, int);
122 private struct mlist *mlist_alloc(void);
123 private void mlist_free_all(struct magic_set *);
124 private void mlist_free(struct mlist *);
125 private void byteswap(struct magic *, uint32_t);
126 private void bs1(struct magic *);
127 private uint16_t swap2(uint16_t);
128 private uint32_t swap4(uint32_t);
129 private uint64_t swap8(uint64_t);
130 private char *mkdbname(struct magic_set *, const char *, int);
131 private struct magic_map *apprentice_buf(struct magic_set *, struct magic *,
132 size_t);
133 private struct magic_map *apprentice_map(struct magic_set *, const char *);
134 private int check_buffer(struct magic_set *, struct magic_map *, const char *);
135 private void apprentice_unmap(struct magic_map *);
136 private int apprentice_compile(struct magic_set *, struct magic_map *,
137 const char *);
138 private int check_format_type(const char *, int, const char **);
139 private int check_format(struct magic_set *, struct magic *);
140 private int get_op(char);
141 private int parse_mime(struct magic_set *, struct magic_entry *, const char *,
142 size_t);
143 private int parse_strength(struct magic_set *, struct magic_entry *,
144 const char *, size_t);
145 private int parse_apple(struct magic_set *, struct magic_entry *, const char *,
146 size_t);
147 private int parse_ext(struct magic_set *, struct magic_entry *, const char *,
148 size_t);
149
150
151 private size_t magicsize = sizeof(struct magic);
152
153 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
154
155 private struct {
156 const char *name;
157 size_t len;
158 int (*fun)(struct magic_set *, struct magic_entry *, const char *,
159 size_t);
160 } bang[] = {
161 #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
162 DECLARE_FIELD(mime),
163 DECLARE_FIELD(apple),
164 DECLARE_FIELD(ext),
165 DECLARE_FIELD(strength),
166 #undef DECLARE_FIELD
167 { NULL, 0, NULL }
168 };
169
170 #ifdef COMPILE_ONLY
171
172 int main(int, char *[]);
173
174 int
main(int argc,char * argv[])175 main(int argc, char *argv[])
176 {
177 int ret;
178 struct magic_set *ms;
179 char *progname;
180
181 if ((progname = strrchr(argv[0], '/')) != NULL)
182 progname++;
183 else
184 progname = argv[0];
185
186 if (argc != 2) {
187 (void)fprintf(stderr, "Usage: %s file\n", progname);
188 return 1;
189 }
190
191 if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
192 (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
193 return 1;
194 }
195 ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
196 if (ret == 1)
197 (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
198 magic_close(ms);
199 return ret;
200 }
201 #endif /* COMPILE_ONLY */
202
203 struct type_tbl_s {
204 const char name[16];
205 const size_t len;
206 const int type;
207 const int format;
208 };
209
210 /*
211 * XXX - the actual Single UNIX Specification says that "long" means "long",
212 * as in the C data type, but we treat it as meaning "4-byte integer".
213 * Given that the OS X version of file 5.04 did the same, I guess that passes
214 * the actual test; having "long" be dependent on how big a "long" is on
215 * the machine running "file" is silly.
216 */
217 static const struct type_tbl_s type_tbl[] = {
218 # define XX(s) s, (sizeof(s) - 1)
219 # define XX_NULL "", 0
220 { XX("invalid"), FILE_INVALID, FILE_FMT_NONE },
221 { XX("byte"), FILE_BYTE, FILE_FMT_NUM },
222 { XX("short"), FILE_SHORT, FILE_FMT_NUM },
223 { XX("default"), FILE_DEFAULT, FILE_FMT_NONE },
224 { XX("long"), FILE_LONG, FILE_FMT_NUM },
225 { XX("string"), FILE_STRING, FILE_FMT_STR },
226 { XX("date"), FILE_DATE, FILE_FMT_STR },
227 { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM },
228 { XX("belong"), FILE_BELONG, FILE_FMT_NUM },
229 { XX("bedate"), FILE_BEDATE, FILE_FMT_STR },
230 { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM },
231 { XX("lelong"), FILE_LELONG, FILE_FMT_NUM },
232 { XX("ledate"), FILE_LEDATE, FILE_FMT_STR },
233 { XX("pstring"), FILE_PSTRING, FILE_FMT_STR },
234 { XX("ldate"), FILE_LDATE, FILE_FMT_STR },
235 { XX("beldate"), FILE_BELDATE, FILE_FMT_STR },
236 { XX("leldate"), FILE_LELDATE, FILE_FMT_STR },
237 { XX("regex"), FILE_REGEX, FILE_FMT_STR },
238 { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR },
239 { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR },
240 { XX("search"), FILE_SEARCH, FILE_FMT_STR },
241 { XX("medate"), FILE_MEDATE, FILE_FMT_STR },
242 { XX("meldate"), FILE_MELDATE, FILE_FMT_STR },
243 { XX("melong"), FILE_MELONG, FILE_FMT_NUM },
244 { XX("quad"), FILE_QUAD, FILE_FMT_QUAD },
245 { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD },
246 { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD },
247 { XX("qdate"), FILE_QDATE, FILE_FMT_STR },
248 { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR },
249 { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR },
250 { XX("qldate"), FILE_QLDATE, FILE_FMT_STR },
251 { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR },
252 { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR },
253 { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT },
254 { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT },
255 { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT },
256 { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE },
257 { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE },
258 { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE },
259 { XX("leid3"), FILE_LEID3, FILE_FMT_NUM },
260 { XX("beid3"), FILE_BEID3, FILE_FMT_NUM },
261 { XX("indirect"), FILE_INDIRECT, FILE_FMT_NUM },
262 { XX("qwdate"), FILE_QWDATE, FILE_FMT_STR },
263 { XX("leqwdate"), FILE_LEQWDATE, FILE_FMT_STR },
264 { XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR },
265 { XX("name"), FILE_NAME, FILE_FMT_NONE },
266 { XX("use"), FILE_USE, FILE_FMT_NONE },
267 { XX("clear"), FILE_CLEAR, FILE_FMT_NONE },
268 { XX("der"), FILE_DER, FILE_FMT_STR },
269 { XX("guid"), FILE_GUID, FILE_FMT_STR },
270 { XX("offset"), FILE_OFFSET, FILE_FMT_QUAD },
271 { XX("bevarint"), FILE_BEVARINT, FILE_FMT_STR },
272 { XX("levarint"), FILE_LEVARINT, FILE_FMT_STR },
273 { XX_NULL, FILE_INVALID, FILE_FMT_NONE },
274 };
275
276 /*
277 * These are not types, and cannot be preceded by "u" to make them
278 * unsigned.
279 */
280 static const struct type_tbl_s special_tbl[] = {
281 { XX("der"), FILE_DER, FILE_FMT_STR },
282 { XX("name"), FILE_NAME, FILE_FMT_STR },
283 { XX("use"), FILE_USE, FILE_FMT_STR },
284 { XX_NULL, FILE_INVALID, FILE_FMT_NONE },
285 };
286 # undef XX
287 # undef XX_NULL
288
289 private int
get_type(const struct type_tbl_s * tbl,const char * l,const char ** t)290 get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
291 {
292 const struct type_tbl_s *p;
293
294 for (p = tbl; p->len; p++) {
295 if (strncmp(l, p->name, p->len) == 0) {
296 if (t)
297 *t = l + p->len;
298 break;
299 }
300 }
301 return p->type;
302 }
303
304 private off_t
maxoff_t(void)305 maxoff_t(void) {
306 if (/*CONSTCOND*/sizeof(off_t) == sizeof(int))
307 return CAST(off_t, INT_MAX);
308 if (/*CONSTCOND*/sizeof(off_t) == sizeof(long))
309 return CAST(off_t, LONG_MAX);
310 return 0x7fffffff;
311 }
312
313 private int
get_standard_integer_type(const char * l,const char ** t)314 get_standard_integer_type(const char *l, const char **t)
315 {
316 int type;
317
318 if (isalpha(CAST(unsigned char, l[1]))) {
319 switch (l[1]) {
320 case 'C':
321 /* "dC" and "uC" */
322 type = FILE_BYTE;
323 break;
324 case 'S':
325 /* "dS" and "uS" */
326 type = FILE_SHORT;
327 break;
328 case 'I':
329 case 'L':
330 /*
331 * "dI", "dL", "uI", and "uL".
332 *
333 * XXX - the actual Single UNIX Specification says
334 * that "L" means "long", as in the C data type,
335 * but we treat it as meaning "4-byte integer".
336 * Given that the OS X version of file 5.04 did
337 * the same, I guess that passes the actual SUS
338 * validation suite; having "dL" be dependent on
339 * how big a "long" is on the machine running
340 * "file" is silly.
341 */
342 type = FILE_LONG;
343 break;
344 case 'Q':
345 /* "dQ" and "uQ" */
346 type = FILE_QUAD;
347 break;
348 default:
349 /* "d{anything else}", "u{anything else}" */
350 return FILE_INVALID;
351 }
352 l += 2;
353 } else if (isdigit(CAST(unsigned char, l[1]))) {
354 /*
355 * "d{num}" and "u{num}"; we only support {num} values
356 * of 1, 2, 4, and 8 - the Single UNIX Specification
357 * doesn't say anything about whether arbitrary
358 * values should be supported, but both the Solaris 10
359 * and OS X Mountain Lion versions of file passed the
360 * Single UNIX Specification validation suite, and
361 * neither of them support values bigger than 8 or
362 * non-power-of-2 values.
363 */
364 if (isdigit(CAST(unsigned char, l[2]))) {
365 /* Multi-digit, so > 9 */
366 return FILE_INVALID;
367 }
368 switch (l[1]) {
369 case '1':
370 type = FILE_BYTE;
371 break;
372 case '2':
373 type = FILE_SHORT;
374 break;
375 case '4':
376 type = FILE_LONG;
377 break;
378 case '8':
379 type = FILE_QUAD;
380 break;
381 default:
382 /* XXX - what about 3, 5, 6, or 7? */
383 return FILE_INVALID;
384 }
385 l += 2;
386 } else {
387 /*
388 * "d" or "u" by itself.
389 */
390 type = FILE_LONG;
391 ++l;
392 }
393 if (t)
394 *t = l;
395 return type;
396 }
397
398 private void
init_file_tables(void)399 init_file_tables(void)
400 {
401 static int done = 0;
402 const struct type_tbl_s *p;
403
404 if (done)
405 return;
406 done++;
407
408 for (p = type_tbl; p->len; p++) {
409 assert(p->type < FILE_NAMES_SIZE);
410 file_names[p->type] = p->name;
411 file_formats[p->type] = p->format;
412 }
413 assert(p - type_tbl == FILE_NAMES_SIZE);
414 }
415
416 private int
add_mlist(struct mlist * mlp,struct magic_map * map,size_t idx)417 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
418 {
419 struct mlist *ml;
420
421 mlp->map = NULL;
422 if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL)
423 return -1;
424
425 ml->map = idx == 0 ? map : NULL;
426 ml->magic = map->magic[idx];
427 ml->nmagic = map->nmagic[idx];
428
429 mlp->prev->next = ml;
430 ml->prev = mlp->prev;
431 ml->next = mlp;
432 mlp->prev = ml;
433 return 0;
434 }
435
436 /*
437 * Handle one file or directory.
438 */
439 private int
apprentice_1(struct magic_set * ms,const char * fn,int action)440 apprentice_1(struct magic_set *ms, const char *fn, int action)
441 {
442 struct magic_map *map;
443 #ifndef COMPILE_ONLY
444 struct mlist *ml;
445 size_t i;
446 #endif
447
448 if (magicsize != FILE_MAGICSIZE) {
449 file_error(ms, 0, "magic element size %lu != %lu",
450 CAST(unsigned long, sizeof(*map->magic[0])),
451 CAST(unsigned long, FILE_MAGICSIZE));
452 return -1;
453 }
454
455 if (action == FILE_COMPILE) {
456 map = apprentice_load(ms, fn, action);
457 if (map == NULL)
458 return -1;
459 return apprentice_compile(ms, map, fn);
460 }
461
462 #ifndef COMPILE_ONLY
463 map = apprentice_map(ms, fn);
464 if (map == NULL) {
465 if (ms->flags & MAGIC_CHECK)
466 file_magwarn(ms, "using regular magic file `%s'", fn);
467 map = apprentice_load(ms, fn, action);
468 if (map == NULL)
469 return -1;
470 }
471
472 for (i = 0; i < MAGIC_SETS; i++) {
473 if (add_mlist(ms->mlist[i], map, i) == -1) {
474 /* failed to add to any list, free explicitly */
475 if (i == 0)
476 apprentice_unmap(map);
477 else
478 mlist_free_all(ms);
479 file_oomem(ms, sizeof(*ml));
480 return -1;
481 }
482 }
483
484 if (action == FILE_LIST) {
485 for (i = 0; i < MAGIC_SETS; i++) {
486 printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n",
487 i);
488 apprentice_list(ms->mlist[i], BINTEST);
489 printf("Text patterns:\n");
490 apprentice_list(ms->mlist[i], TEXTTEST);
491 }
492 }
493 return 0;
494 #else
495 return 0;
496 #endif /* COMPILE_ONLY */
497 }
498
499 protected void
file_ms_free(struct magic_set * ms)500 file_ms_free(struct magic_set *ms)
501 {
502 size_t i;
503 if (ms == NULL)
504 return;
505 for (i = 0; i < MAGIC_SETS; i++)
506 mlist_free(ms->mlist[i]);
507 free(ms->o.pbuf);
508 free(ms->o.buf);
509 free(ms->c.li);
510 free(ms);
511 }
512
513 protected struct magic_set *
file_ms_alloc(int flags)514 file_ms_alloc(int flags)
515 {
516 struct magic_set *ms;
517 size_t i, len;
518
519 if ((ms = CAST(struct magic_set *, calloc(CAST(size_t, 1u),
520 sizeof(struct magic_set)))) == NULL)
521 return NULL;
522
523 if (magic_setflags(ms, flags) == -1) {
524 errno = EINVAL;
525 goto free;
526 }
527
528 ms->o.buf = ms->o.pbuf = NULL;
529 ms->o.blen = 0;
530 len = (ms->c.len = 10) * sizeof(*ms->c.li);
531
532 if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL)
533 goto free;
534
535 ms->event_flags = 0;
536 ms->error = -1;
537 for (i = 0; i < MAGIC_SETS; i++)
538 ms->mlist[i] = NULL;
539 ms->file = "unknown";
540 ms->line = 0;
541 ms->indir_max = FILE_INDIR_MAX;
542 ms->name_max = FILE_NAME_MAX;
543 ms->elf_shnum_max = FILE_ELF_SHNUM_MAX;
544 ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
545 ms->elf_notes_max = FILE_ELF_NOTES_MAX;
546 ms->regex_max = FILE_REGEX_MAX;
547 ms->bytes_max = FILE_BYTES_MAX;
548 ms->encoding_max = FILE_ENCODING_MAX;
549 return ms;
550 free:
551 free(ms);
552 return NULL;
553 }
554
555 private void
apprentice_unmap(struct magic_map * map)556 apprentice_unmap(struct magic_map *map)
557 {
558 size_t i;
559 char *p;
560 if (map == NULL)
561 return;
562
563 switch (map->type) {
564 case MAP_TYPE_USER:
565 break;
566 case MAP_TYPE_MALLOC:
567 p = CAST(char *, map->p);
568 for (i = 0; i < MAGIC_SETS; i++) {
569 char *b = RCAST(char *, map->magic[i]);
570 if (p != NULL && b >= p && b <= p + map->len)
571 continue;
572 free(b);
573 }
574 free(p);
575 break;
576 #ifdef QUICK
577 case MAP_TYPE_MMAP:
578 if (map->p && map->p != MAP_FAILED)
579 (void)munmap(map->p, map->len);
580 break;
581 #endif
582 default:
583 abort();
584 }
585 free(map);
586 }
587
588 private struct mlist *
mlist_alloc(void)589 mlist_alloc(void)
590 {
591 struct mlist *mlist;
592 if ((mlist = CAST(struct mlist *, calloc(1, sizeof(*mlist)))) == NULL) {
593 return NULL;
594 }
595 mlist->next = mlist->prev = mlist;
596 return mlist;
597 }
598
599 private void
mlist_free_all(struct magic_set * ms)600 mlist_free_all(struct magic_set *ms)
601 {
602 size_t i;
603
604 for (i = 0; i < MAGIC_SETS; i++) {
605 mlist_free(ms->mlist[i]);
606 ms->mlist[i] = NULL;
607 }
608 }
609
610 private void
mlist_free_one(struct mlist * ml)611 mlist_free_one(struct mlist *ml)
612 {
613 if (ml->map)
614 apprentice_unmap(CAST(struct magic_map *, ml->map));
615 free(ml);
616 }
617
618 private void
mlist_free(struct mlist * mlist)619 mlist_free(struct mlist *mlist)
620 {
621 struct mlist *ml, *next;
622
623 if (mlist == NULL)
624 return;
625
626 for (ml = mlist->next; ml != mlist;) {
627 next = ml->next;
628 mlist_free_one(ml);
629 ml = next;
630 }
631 mlist_free_one(mlist);
632 }
633
634 #ifndef COMPILE_ONLY
635 /* void **bufs: an array of compiled magic files */
636 protected int
buffer_apprentice(struct magic_set * ms,struct magic ** bufs,size_t * sizes,size_t nbufs)637 buffer_apprentice(struct magic_set *ms, struct magic **bufs,
638 size_t *sizes, size_t nbufs)
639 {
640 size_t i, j;
641 struct mlist *ml;
642 struct magic_map *map;
643
644 if (nbufs == 0)
645 return -1;
646
647 (void)file_reset(ms, 0);
648
649 init_file_tables();
650
651 for (i = 0; i < MAGIC_SETS; i++) {
652 mlist_free(ms->mlist[i]);
653 if ((ms->mlist[i] = mlist_alloc()) == NULL) {
654 file_oomem(ms, sizeof(*ms->mlist[i]));
655 goto fail;
656 }
657 }
658
659 for (i = 0; i < nbufs; i++) {
660 map = apprentice_buf(ms, bufs[i], sizes[i]);
661 if (map == NULL)
662 goto fail;
663
664 for (j = 0; j < MAGIC_SETS; j++) {
665 if (add_mlist(ms->mlist[j], map, j) == -1) {
666 file_oomem(ms, sizeof(*ml));
667 goto fail;
668 }
669 }
670 }
671
672 return 0;
673 fail:
674 mlist_free_all(ms);
675 return -1;
676 }
677 #endif
678
679 /* const char *fn: list of magic files and directories */
680 protected int
file_apprentice(struct magic_set * ms,const char * fn,int action)681 file_apprentice(struct magic_set *ms, const char *fn, int action)
682 {
683 char *p, *mfn;
684 int fileerr, errs = -1;
685 size_t i, j;
686
687 (void)file_reset(ms, 0);
688
689 if ((fn = magic_getpath(fn, action)) == NULL)
690 return -1;
691
692 init_file_tables();
693
694 if ((mfn = strdup(fn)) == NULL) {
695 file_oomem(ms, strlen(fn));
696 return -1;
697 }
698
699 for (i = 0; i < MAGIC_SETS; i++) {
700 mlist_free(ms->mlist[i]);
701 if ((ms->mlist[i] = mlist_alloc()) == NULL) {
702 file_oomem(ms, sizeof(*ms->mlist[i]));
703 for (j = 0; j < i; j++) {
704 mlist_free(ms->mlist[j]);
705 ms->mlist[j] = NULL;
706 }
707 free(mfn);
708 return -1;
709 }
710 }
711 fn = mfn;
712
713 while (fn) {
714 p = strchr(fn, PATHSEP);
715 if (p)
716 *p++ = '\0';
717 if (*fn == '\0')
718 break;
719 fileerr = apprentice_1(ms, fn, action);
720 errs = MAX(errs, fileerr);
721 fn = p;
722 }
723
724 free(mfn);
725
726 if (errs == -1) {
727 for (i = 0; i < MAGIC_SETS; i++) {
728 mlist_free(ms->mlist[i]);
729 ms->mlist[i] = NULL;
730 }
731 file_error(ms, 0, "could not find any valid magic files!");
732 return -1;
733 }
734
735 #if 0
736 /*
737 * Always leave the database loaded
738 */
739 if (action == FILE_LOAD)
740 return 0;
741
742 for (i = 0; i < MAGIC_SETS; i++) {
743 mlist_free(ms->mlist[i]);
744 ms->mlist[i] = NULL;
745 }
746 #endif
747
748 switch (action) {
749 case FILE_LOAD:
750 case FILE_COMPILE:
751 case FILE_CHECK:
752 case FILE_LIST:
753 return 0;
754 default:
755 file_error(ms, 0, "Invalid action %d", action);
756 return -1;
757 }
758 }
759
760 /*
761 * Compute the real length of a magic expression, for the purposes
762 * of determining how "strong" a magic expression is (approximating
763 * how specific its matches are):
764 * - magic characters count 0 unless escaped.
765 * - [] expressions count 1
766 * - {} expressions count 0
767 * - regular characters or escaped magic characters count 1
768 * - 0 length expressions count as one
769 */
770 private size_t
nonmagic(const char * str)771 nonmagic(const char *str)
772 {
773 const char *p;
774 size_t rv = 0;
775
776 for (p = str; *p; p++)
777 switch (*p) {
778 case '\\': /* Escaped anything counts 1 */
779 if (!*++p)
780 p--;
781 rv++;
782 continue;
783 case '?': /* Magic characters count 0 */
784 case '*':
785 case '.':
786 case '+':
787 case '^':
788 case '$':
789 continue;
790 case '[': /* Bracketed expressions count 1 the ']' */
791 while (*p && *p != ']')
792 p++;
793 p--;
794 continue;
795 case '{': /* Braced expressions count 0 */
796 while (*p && *p != '}')
797 p++;
798 if (!*p)
799 p--;
800 continue;
801 default: /* Anything else counts 1 */
802 rv++;
803 continue;
804 }
805
806 return rv == 0 ? 1 : rv; /* Return at least 1 */
807 }
808
809
810 private size_t
typesize(int type)811 typesize(int type)
812 {
813 switch (type) {
814 case FILE_BYTE:
815 return 1;
816
817 case FILE_SHORT:
818 case FILE_LESHORT:
819 case FILE_BESHORT:
820 return 2;
821
822 case FILE_LONG:
823 case FILE_LELONG:
824 case FILE_BELONG:
825 case FILE_MELONG:
826 return 4;
827
828 case FILE_DATE:
829 case FILE_LEDATE:
830 case FILE_BEDATE:
831 case FILE_MEDATE:
832 case FILE_LDATE:
833 case FILE_LELDATE:
834 case FILE_BELDATE:
835 case FILE_MELDATE:
836 case FILE_FLOAT:
837 case FILE_BEFLOAT:
838 case FILE_LEFLOAT:
839 return 4;
840
841 case FILE_QUAD:
842 case FILE_BEQUAD:
843 case FILE_LEQUAD:
844 case FILE_QDATE:
845 case FILE_LEQDATE:
846 case FILE_BEQDATE:
847 case FILE_QLDATE:
848 case FILE_LEQLDATE:
849 case FILE_BEQLDATE:
850 case FILE_QWDATE:
851 case FILE_LEQWDATE:
852 case FILE_BEQWDATE:
853 case FILE_DOUBLE:
854 case FILE_BEDOUBLE:
855 case FILE_LEDOUBLE:
856 case FILE_OFFSET:
857 case FILE_BEVARINT:
858 case FILE_LEVARINT:
859 return 8;
860
861 case FILE_GUID:
862 return 16;
863
864 default:
865 return FILE_BADSIZE;
866 }
867 }
868
869 /*
870 * Get weight of this magic entry, for sorting purposes.
871 */
872 private size_t
apprentice_magic_strength(const struct magic * m)873 apprentice_magic_strength(const struct magic *m)
874 {
875 #define MULT 10U
876 size_t ts, v;
877 ssize_t val = 2 * MULT; /* baseline strength */
878
879 switch (m->type) {
880 case FILE_DEFAULT: /* make sure this sorts last */
881 if (m->factor_op != FILE_FACTOR_OP_NONE)
882 abort();
883 return 0;
884
885 case FILE_BYTE:
886 case FILE_SHORT:
887 case FILE_LESHORT:
888 case FILE_BESHORT:
889 case FILE_LONG:
890 case FILE_LELONG:
891 case FILE_BELONG:
892 case FILE_MELONG:
893 case FILE_DATE:
894 case FILE_LEDATE:
895 case FILE_BEDATE:
896 case FILE_MEDATE:
897 case FILE_LDATE:
898 case FILE_LELDATE:
899 case FILE_BELDATE:
900 case FILE_MELDATE:
901 case FILE_FLOAT:
902 case FILE_BEFLOAT:
903 case FILE_LEFLOAT:
904 case FILE_QUAD:
905 case FILE_BEQUAD:
906 case FILE_LEQUAD:
907 case FILE_QDATE:
908 case FILE_LEQDATE:
909 case FILE_BEQDATE:
910 case FILE_QLDATE:
911 case FILE_LEQLDATE:
912 case FILE_BEQLDATE:
913 case FILE_QWDATE:
914 case FILE_LEQWDATE:
915 case FILE_BEQWDATE:
916 case FILE_DOUBLE:
917 case FILE_BEDOUBLE:
918 case FILE_LEDOUBLE:
919 case FILE_BEVARINT:
920 case FILE_LEVARINT:
921 case FILE_GUID:
922 case FILE_OFFSET:
923 ts = typesize(m->type);
924 if (ts == FILE_BADSIZE)
925 abort();
926 val += ts * MULT;
927 break;
928
929 case FILE_PSTRING:
930 case FILE_STRING:
931 val += m->vallen * MULT;
932 break;
933
934 case FILE_BESTRING16:
935 case FILE_LESTRING16:
936 val += m->vallen * MULT / 2;
937 break;
938
939 case FILE_SEARCH:
940 if (m->vallen == 0)
941 break;
942 val += m->vallen * MAX(MULT / m->vallen, 1);
943 break;
944
945 case FILE_REGEX:
946 v = nonmagic(m->value.s);
947 val += v * MAX(MULT / v, 1);
948 break;
949
950 case FILE_INDIRECT:
951 case FILE_NAME:
952 case FILE_USE:
953 break;
954
955 case FILE_DER:
956 val += MULT;
957 break;
958
959 default:
960 (void)fprintf(stderr, "Bad type %d\n", m->type);
961 abort();
962 }
963
964 switch (m->reln) {
965 case 'x': /* matches anything penalize */
966 case '!': /* matches almost anything penalize */
967 val = 0;
968 break;
969
970 case '=': /* Exact match, prefer */
971 val += MULT;
972 break;
973
974 case '>':
975 case '<': /* comparison match reduce strength */
976 val -= 2 * MULT;
977 break;
978
979 case '^':
980 case '&': /* masking bits, we could count them too */
981 val -= MULT;
982 break;
983
984 default:
985 (void)fprintf(stderr, "Bad relation %c\n", m->reln);
986 abort();
987 }
988
989 switch (m->factor_op) {
990 case FILE_FACTOR_OP_NONE:
991 break;
992 case FILE_FACTOR_OP_PLUS:
993 val += m->factor;
994 break;
995 case FILE_FACTOR_OP_MINUS:
996 val -= m->factor;
997 break;
998 case FILE_FACTOR_OP_TIMES:
999 val *= m->factor;
1000 break;
1001 case FILE_FACTOR_OP_DIV:
1002 val /= m->factor;
1003 break;
1004 default:
1005 abort();
1006 }
1007
1008 if (val <= 0) /* ensure we only return 0 for FILE_DEFAULT */
1009 val = 1;
1010
1011 /*
1012 * Magic entries with no description get a bonus because they depend
1013 * on subsequent magic entries to print something.
1014 */
1015 if (m->desc[0] == '\0')
1016 val++;
1017 return val;
1018 }
1019
1020 /*
1021 * Sort callback for sorting entries by "strength" (basically length)
1022 */
1023 private int
apprentice_sort(const void * a,const void * b)1024 apprentice_sort(const void *a, const void *b)
1025 {
1026 const struct magic_entry *ma = CAST(const struct magic_entry *, a);
1027 const struct magic_entry *mb = CAST(const struct magic_entry *, b);
1028 size_t sa = apprentice_magic_strength(ma->mp);
1029 size_t sb = apprentice_magic_strength(mb->mp);
1030 if (sa == sb)
1031 return 0;
1032 else if (sa > sb)
1033 return -1;
1034 else
1035 return 1;
1036 }
1037
1038 /*
1039 * Shows sorted patterns list in the order which is used for the matching
1040 */
1041 private void
apprentice_list(struct mlist * mlist,int mode)1042 apprentice_list(struct mlist *mlist, int mode)
1043 {
1044 uint32_t magindex = 0;
1045 struct mlist *ml;
1046 for (ml = mlist->next; ml != mlist; ml = ml->next) {
1047 for (magindex = 0; magindex < ml->nmagic; magindex++) {
1048 struct magic *m = &ml->magic[magindex];
1049 if ((m->flag & mode) != mode) {
1050 /* Skip sub-tests */
1051 while (magindex + 1 < ml->nmagic &&
1052 ml->magic[magindex + 1].cont_level != 0)
1053 ++magindex;
1054 continue; /* Skip to next top-level test*/
1055 }
1056
1057 /*
1058 * Try to iterate over the tree until we find item with
1059 * description/mimetype.
1060 */
1061 while (magindex + 1 < ml->nmagic &&
1062 ml->magic[magindex + 1].cont_level != 0 &&
1063 *ml->magic[magindex].desc == '\0' &&
1064 *ml->magic[magindex].mimetype == '\0')
1065 magindex++;
1066
1067 printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n",
1068 apprentice_magic_strength(m),
1069 ml->magic[magindex].lineno,
1070 ml->magic[magindex].desc,
1071 ml->magic[magindex].mimetype);
1072 }
1073 }
1074 }
1075
1076 private void
set_test_type(struct magic * mstart,struct magic * m)1077 set_test_type(struct magic *mstart, struct magic *m)
1078 {
1079 switch (m->type) {
1080 case FILE_BYTE:
1081 case FILE_SHORT:
1082 case FILE_LONG:
1083 case FILE_DATE:
1084 case FILE_BESHORT:
1085 case FILE_BELONG:
1086 case FILE_BEDATE:
1087 case FILE_LESHORT:
1088 case FILE_LELONG:
1089 case FILE_LEDATE:
1090 case FILE_LDATE:
1091 case FILE_BELDATE:
1092 case FILE_LELDATE:
1093 case FILE_MEDATE:
1094 case FILE_MELDATE:
1095 case FILE_MELONG:
1096 case FILE_QUAD:
1097 case FILE_LEQUAD:
1098 case FILE_BEQUAD:
1099 case FILE_QDATE:
1100 case FILE_LEQDATE:
1101 case FILE_BEQDATE:
1102 case FILE_QLDATE:
1103 case FILE_LEQLDATE:
1104 case FILE_BEQLDATE:
1105 case FILE_QWDATE:
1106 case FILE_LEQWDATE:
1107 case FILE_BEQWDATE:
1108 case FILE_FLOAT:
1109 case FILE_BEFLOAT:
1110 case FILE_LEFLOAT:
1111 case FILE_DOUBLE:
1112 case FILE_BEDOUBLE:
1113 case FILE_LEDOUBLE:
1114 case FILE_BEVARINT:
1115 case FILE_LEVARINT:
1116 case FILE_DER:
1117 case FILE_GUID:
1118 case FILE_OFFSET:
1119 mstart->flag |= BINTEST;
1120 break;
1121 case FILE_STRING:
1122 case FILE_PSTRING:
1123 case FILE_BESTRING16:
1124 case FILE_LESTRING16:
1125 /* Allow text overrides */
1126 if (mstart->str_flags & STRING_TEXTTEST)
1127 mstart->flag |= TEXTTEST;
1128 else
1129 mstart->flag |= BINTEST;
1130 break;
1131 case FILE_REGEX:
1132 case FILE_SEARCH:
1133 /* Check for override */
1134 if (mstart->str_flags & STRING_BINTEST)
1135 mstart->flag |= BINTEST;
1136 if (mstart->str_flags & STRING_TEXTTEST)
1137 mstart->flag |= TEXTTEST;
1138
1139 if (mstart->flag & (TEXTTEST|BINTEST))
1140 break;
1141
1142 /* binary test if pattern is not text */
1143 if (file_looks_utf8(m->value.us, CAST(size_t, m->vallen), NULL,
1144 NULL) <= 0)
1145 mstart->flag |= BINTEST;
1146 else
1147 mstart->flag |= TEXTTEST;
1148 break;
1149 case FILE_DEFAULT:
1150 /* can't deduce anything; we shouldn't see this at the
1151 top level anyway */
1152 break;
1153 case FILE_INVALID:
1154 default:
1155 /* invalid search type, but no need to complain here */
1156 break;
1157 }
1158 }
1159
1160 private int
addentry(struct magic_set * ms,struct magic_entry * me,struct magic_entry_set * mset)1161 addentry(struct magic_set *ms, struct magic_entry *me,
1162 struct magic_entry_set *mset)
1163 {
1164 size_t i = me->mp->type == FILE_NAME ? 1 : 0;
1165 if (mset[i].count == mset[i].max) {
1166 struct magic_entry *mp;
1167
1168 mset[i].max += ALLOC_INCR;
1169 if ((mp = CAST(struct magic_entry *,
1170 realloc(mset[i].me, sizeof(*mp) * mset[i].max))) ==
1171 NULL) {
1172 file_oomem(ms, sizeof(*mp) * mset[i].max);
1173 return -1;
1174 }
1175 (void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
1176 ALLOC_INCR);
1177 mset[i].me = mp;
1178 }
1179 mset[i].me[mset[i].count++] = *me;
1180 memset(me, 0, sizeof(*me));
1181 return 0;
1182 }
1183
1184 /*
1185 * Load and parse one file.
1186 */
1187 private void
load_1(struct magic_set * ms,int action,const char * fn,int * errs,struct magic_entry_set * mset)1188 load_1(struct magic_set *ms, int action, const char *fn, int *errs,
1189 struct magic_entry_set *mset)
1190 {
1191 size_t lineno = 0, llen = 0;
1192 char *line = NULL;
1193 ssize_t len;
1194 struct magic_entry me;
1195
1196 FILE *f = fopen(ms->file = fn, "r");
1197 if (f == NULL) {
1198 if (errno != ENOENT)
1199 file_error(ms, errno, "cannot read magic file `%s'",
1200 fn);
1201 (*errs)++;
1202 return;
1203 }
1204
1205 memset(&me, 0, sizeof(me));
1206 /* read and parse this file */
1207 for (ms->line = 1; (len = getline(&line, &llen, f)) != -1;
1208 ms->line++) {
1209 if (len == 0) /* null line, garbage, etc */
1210 continue;
1211 if (line[len - 1] == '\n') {
1212 lineno++;
1213 line[len - 1] = '\0'; /* delete newline */
1214 }
1215 switch (line[0]) {
1216 case '\0': /* empty, do not parse */
1217 case '#': /* comment, do not parse */
1218 continue;
1219 case '!':
1220 if (line[1] == ':') {
1221 size_t i;
1222
1223 for (i = 0; bang[i].name != NULL; i++) {
1224 if (CAST(size_t, len - 2) > bang[i].len &&
1225 memcmp(bang[i].name, line + 2,
1226 bang[i].len) == 0)
1227 break;
1228 }
1229 if (bang[i].name == NULL) {
1230 file_error(ms, 0,
1231 "Unknown !: entry `%s'", line);
1232 (*errs)++;
1233 continue;
1234 }
1235 if (me.mp == NULL) {
1236 file_error(ms, 0,
1237 "No current entry for :!%s type",
1238 bang[i].name);
1239 (*errs)++;
1240 continue;
1241 }
1242 if ((*bang[i].fun)(ms, &me,
1243 line + bang[i].len + 2,
1244 len - bang[i].len - 2) != 0) {
1245 (*errs)++;
1246 continue;
1247 }
1248 continue;
1249 }
1250 /*FALLTHROUGH*/
1251 default:
1252 again:
1253 switch (parse(ms, &me, line, lineno, action)) {
1254 case 0:
1255 continue;
1256 case 1:
1257 (void)addentry(ms, &me, mset);
1258 goto again;
1259 default:
1260 (*errs)++;
1261 break;
1262 }
1263 }
1264 }
1265 if (me.mp)
1266 (void)addentry(ms, &me, mset);
1267 free(line);
1268 (void)fclose(f);
1269 }
1270
1271 /*
1272 * parse a file or directory of files
1273 * const char *fn: name of magic file or directory
1274 */
1275 private int
cmpstrp(const void * p1,const void * p2)1276 cmpstrp(const void *p1, const void *p2)
1277 {
1278 return strcmp(*RCAST(char *const *, p1), *RCAST(char *const *, p2));
1279 }
1280
1281
1282 private uint32_t
set_text_binary(struct magic_set * ms,struct magic_entry * me,uint32_t nme,uint32_t starttest)1283 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1284 uint32_t starttest)
1285 {
1286 static const char text[] = "text";
1287 static const char binary[] = "binary";
1288 static const size_t len = sizeof(text);
1289
1290 uint32_t i = starttest;
1291
1292 do {
1293 set_test_type(me[starttest].mp, me[i].mp);
1294 if ((ms->flags & MAGIC_DEBUG) == 0)
1295 continue;
1296 (void)fprintf(stderr, "%s%s%s: %s\n",
1297 me[i].mp->mimetype,
1298 me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1299 me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1300 me[i].mp->flag & BINTEST ? binary : text);
1301 if (me[i].mp->flag & BINTEST) {
1302 char *p = strstr(me[i].mp->desc, text);
1303 if (p && (p == me[i].mp->desc ||
1304 isspace(CAST(unsigned char, p[-1]))) &&
1305 (p + len - me[i].mp->desc == MAXstring
1306 || (p[len] == '\0' ||
1307 isspace(CAST(unsigned char, p[len])))))
1308 (void)fprintf(stderr, "*** Possible "
1309 "binary test for text type\n");
1310 }
1311 } while (++i < nme && me[i].mp->cont_level != 0);
1312 return i;
1313 }
1314
1315 private void
set_last_default(struct magic_set * ms,struct magic_entry * me,uint32_t nme)1316 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1317 {
1318 uint32_t i;
1319 for (i = 0; i < nme; i++) {
1320 if (me[i].mp->cont_level == 0 &&
1321 me[i].mp->type == FILE_DEFAULT) {
1322 while (++i < nme)
1323 if (me[i].mp->cont_level == 0)
1324 break;
1325 if (i != nme) {
1326 /* XXX - Ugh! */
1327 ms->line = me[i].mp->lineno;
1328 file_magwarn(ms,
1329 "level 0 \"default\" did not sort last");
1330 }
1331 return;
1332 }
1333 }
1334 }
1335
1336 private int
coalesce_entries(struct magic_set * ms,struct magic_entry * me,uint32_t nme,struct magic ** ma,uint32_t * nma)1337 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1338 struct magic **ma, uint32_t *nma)
1339 {
1340 uint32_t i, mentrycount = 0;
1341 size_t slen;
1342
1343 for (i = 0; i < nme; i++)
1344 mentrycount += me[i].cont_count;
1345
1346 if (mentrycount == 0) {
1347 *ma = NULL;
1348 *nma = 0;
1349 return 0;
1350 }
1351
1352 slen = sizeof(**ma) * mentrycount;
1353 if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) {
1354 file_oomem(ms, slen);
1355 return -1;
1356 }
1357
1358 mentrycount = 0;
1359 for (i = 0; i < nme; i++) {
1360 (void)memcpy(*ma + mentrycount, me[i].mp,
1361 me[i].cont_count * sizeof(**ma));
1362 mentrycount += me[i].cont_count;
1363 }
1364 *nma = mentrycount;
1365 return 0;
1366 }
1367
1368 private void
magic_entry_free(struct magic_entry * me,uint32_t nme)1369 magic_entry_free(struct magic_entry *me, uint32_t nme)
1370 {
1371 uint32_t i;
1372 if (me == NULL)
1373 return;
1374 for (i = 0; i < nme; i++)
1375 free(me[i].mp);
1376 free(me);
1377 }
1378
1379 private struct magic_map *
apprentice_load(struct magic_set * ms,const char * fn,int action)1380 apprentice_load(struct magic_set *ms, const char *fn, int action)
1381 {
1382 int errs = 0;
1383 uint32_t i, j;
1384 size_t files = 0, maxfiles = 0;
1385 char **filearr = NULL, *mfn;
1386 struct stat st;
1387 struct magic_map *map;
1388 struct magic_entry_set mset[MAGIC_SETS];
1389 DIR *dir;
1390 struct dirent *d;
1391
1392 memset(mset, 0, sizeof(mset));
1393 ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */
1394
1395
1396 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL)
1397 {
1398 file_oomem(ms, sizeof(*map));
1399 return NULL;
1400 }
1401 map->type = MAP_TYPE_MALLOC;
1402
1403 /* print silly verbose header for USG compat. */
1404 if (action == FILE_CHECK)
1405 (void)fprintf(stderr, "%s\n", usg_hdr);
1406
1407 /* load directory or file */
1408 if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1409 dir = opendir(fn);
1410 if (!dir) {
1411 errs++;
1412 goto out;
1413 }
1414 while ((d = readdir(dir)) != NULL) {
1415 if (d->d_name[0] == '.')
1416 continue;
1417 if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) {
1418 file_oomem(ms,
1419 strlen(fn) + strlen(d->d_name) + 2);
1420 errs++;
1421 closedir(dir);
1422 goto out;
1423 }
1424 if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1425 free(mfn);
1426 continue;
1427 }
1428 if (files >= maxfiles) {
1429 size_t mlen;
1430 char **nfilearr;
1431 maxfiles = (maxfiles + 1) * 2;
1432 mlen = maxfiles * sizeof(*filearr);
1433 if ((nfilearr = CAST(char **,
1434 realloc(filearr, mlen))) == NULL) {
1435 file_oomem(ms, mlen);
1436 free(mfn);
1437 closedir(dir);
1438 errs++;
1439 goto out;
1440 }
1441 filearr = nfilearr;
1442 }
1443 filearr[files++] = mfn;
1444 }
1445 closedir(dir);
1446 if (filearr) {
1447 qsort(filearr, files, sizeof(*filearr), cmpstrp);
1448 for (i = 0; i < files; i++) {
1449 load_1(ms, action, filearr[i], &errs, mset);
1450 free(filearr[i]);
1451 }
1452 free(filearr);
1453 filearr = NULL;
1454 }
1455 } else
1456 load_1(ms, action, fn, &errs, mset);
1457 if (errs)
1458 goto out;
1459
1460 for (j = 0; j < MAGIC_SETS; j++) {
1461 /* Set types of tests */
1462 for (i = 0; i < mset[j].count; ) {
1463 if (mset[j].me[i].mp->cont_level != 0) {
1464 i++;
1465 continue;
1466 }
1467 i = set_text_binary(ms, mset[j].me, mset[j].count, i);
1468 }
1469 if (mset[j].me)
1470 qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me),
1471 apprentice_sort);
1472
1473 /*
1474 * Make sure that any level 0 "default" line is last
1475 * (if one exists).
1476 */
1477 set_last_default(ms, mset[j].me, mset[j].count);
1478
1479 /* coalesce per file arrays into a single one, if needed */
1480 if (mset[j].count == 0)
1481 continue;
1482
1483 if (coalesce_entries(ms, mset[j].me, mset[j].count,
1484 &map->magic[j], &map->nmagic[j]) == -1) {
1485 errs++;
1486 goto out;
1487 }
1488 }
1489
1490 out:
1491 free(filearr);
1492 for (j = 0; j < MAGIC_SETS; j++)
1493 magic_entry_free(mset[j].me, mset[j].count);
1494
1495 if (errs) {
1496 apprentice_unmap(map);
1497 return NULL;
1498 }
1499 return map;
1500 }
1501
1502 /*
1503 * extend the sign bit if the comparison is to be signed
1504 */
1505 protected uint64_t
file_signextend(struct magic_set * ms,struct magic * m,uint64_t v)1506 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1507 {
1508 if (!(m->flag & UNSIGNED)) {
1509 switch(m->type) {
1510 /*
1511 * Do not remove the casts below. They are
1512 * vital. When later compared with the data,
1513 * the sign extension must have happened.
1514 */
1515 case FILE_BYTE:
1516 v = CAST(signed char, v);
1517 break;
1518 case FILE_SHORT:
1519 case FILE_BESHORT:
1520 case FILE_LESHORT:
1521 v = CAST(short, v);
1522 break;
1523 case FILE_DATE:
1524 case FILE_BEDATE:
1525 case FILE_LEDATE:
1526 case FILE_MEDATE:
1527 case FILE_LDATE:
1528 case FILE_BELDATE:
1529 case FILE_LELDATE:
1530 case FILE_MELDATE:
1531 case FILE_LONG:
1532 case FILE_BELONG:
1533 case FILE_LELONG:
1534 case FILE_MELONG:
1535 case FILE_FLOAT:
1536 case FILE_BEFLOAT:
1537 case FILE_LEFLOAT:
1538 v = CAST(int32_t, v);
1539 break;
1540 case FILE_QUAD:
1541 case FILE_BEQUAD:
1542 case FILE_LEQUAD:
1543 case FILE_QDATE:
1544 case FILE_QLDATE:
1545 case FILE_QWDATE:
1546 case FILE_BEQDATE:
1547 case FILE_BEQLDATE:
1548 case FILE_BEQWDATE:
1549 case FILE_LEQDATE:
1550 case FILE_LEQLDATE:
1551 case FILE_LEQWDATE:
1552 case FILE_DOUBLE:
1553 case FILE_BEDOUBLE:
1554 case FILE_LEDOUBLE:
1555 case FILE_OFFSET:
1556 case FILE_BEVARINT:
1557 case FILE_LEVARINT:
1558 v = CAST(int64_t, v);
1559 break;
1560 case FILE_STRING:
1561 case FILE_PSTRING:
1562 case FILE_BESTRING16:
1563 case FILE_LESTRING16:
1564 case FILE_REGEX:
1565 case FILE_SEARCH:
1566 case FILE_DEFAULT:
1567 case FILE_INDIRECT:
1568 case FILE_NAME:
1569 case FILE_USE:
1570 case FILE_CLEAR:
1571 case FILE_DER:
1572 case FILE_GUID:
1573 break;
1574 default:
1575 if (ms->flags & MAGIC_CHECK)
1576 file_magwarn(ms, "cannot happen: m->type=%d\n",
1577 m->type);
1578 return FILE_BADSIZE;
1579 }
1580 }
1581 return v;
1582 }
1583
1584 private int
string_modifier_check(struct magic_set * ms,struct magic * m)1585 string_modifier_check(struct magic_set *ms, struct magic *m)
1586 {
1587 if ((ms->flags & MAGIC_CHECK) == 0)
1588 return 0;
1589
1590 if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
1591 (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
1592 file_magwarn(ms,
1593 "'/BHhLl' modifiers are only allowed for pascal strings\n");
1594 return -1;
1595 }
1596 switch (m->type) {
1597 case FILE_BESTRING16:
1598 case FILE_LESTRING16:
1599 if (m->str_flags != 0) {
1600 file_magwarn(ms,
1601 "no modifiers allowed for 16-bit strings\n");
1602 return -1;
1603 }
1604 break;
1605 case FILE_STRING:
1606 case FILE_PSTRING:
1607 if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1608 file_magwarn(ms,
1609 "'/%c' only allowed on regex and search\n",
1610 CHAR_REGEX_OFFSET_START);
1611 return -1;
1612 }
1613 break;
1614 case FILE_SEARCH:
1615 if (m->str_range == 0) {
1616 file_magwarn(ms,
1617 "missing range; defaulting to %d\n",
1618 STRING_DEFAULT_RANGE);
1619 m->str_range = STRING_DEFAULT_RANGE;
1620 return -1;
1621 }
1622 break;
1623 case FILE_REGEX:
1624 if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1625 file_magwarn(ms, "'/%c' not allowed on regex\n",
1626 CHAR_COMPACT_WHITESPACE);
1627 return -1;
1628 }
1629 if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1630 file_magwarn(ms, "'/%c' not allowed on regex\n",
1631 CHAR_COMPACT_OPTIONAL_WHITESPACE);
1632 return -1;
1633 }
1634 break;
1635 default:
1636 file_magwarn(ms, "coding error: m->type=%d\n",
1637 m->type);
1638 return -1;
1639 }
1640 return 0;
1641 }
1642
1643 private int
get_op(char c)1644 get_op(char c)
1645 {
1646 switch (c) {
1647 case '&':
1648 return FILE_OPAND;
1649 case '|':
1650 return FILE_OPOR;
1651 case '^':
1652 return FILE_OPXOR;
1653 case '+':
1654 return FILE_OPADD;
1655 case '-':
1656 return FILE_OPMINUS;
1657 case '*':
1658 return FILE_OPMULTIPLY;
1659 case '/':
1660 return FILE_OPDIVIDE;
1661 case '%':
1662 return FILE_OPMODULO;
1663 default:
1664 return -1;
1665 }
1666 }
1667
1668 #ifdef ENABLE_CONDITIONALS
1669 private int
get_cond(const char * l,const char ** t)1670 get_cond(const char *l, const char **t)
1671 {
1672 static const struct cond_tbl_s {
1673 char name[8];
1674 size_t len;
1675 int cond;
1676 } cond_tbl[] = {
1677 { "if", 2, COND_IF },
1678 { "elif", 4, COND_ELIF },
1679 { "else", 4, COND_ELSE },
1680 { "", 0, COND_NONE },
1681 };
1682 const struct cond_tbl_s *p;
1683
1684 for (p = cond_tbl; p->len; p++) {
1685 if (strncmp(l, p->name, p->len) == 0 &&
1686 isspace(CAST(unsigned char, l[p->len]))) {
1687 if (t)
1688 *t = l + p->len;
1689 break;
1690 }
1691 }
1692 return p->cond;
1693 }
1694
1695 private int
check_cond(struct magic_set * ms,int cond,uint32_t cont_level)1696 check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1697 {
1698 int last_cond;
1699 last_cond = ms->c.li[cont_level].last_cond;
1700
1701 switch (cond) {
1702 case COND_IF:
1703 if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1704 if (ms->flags & MAGIC_CHECK)
1705 file_magwarn(ms, "syntax error: `if'");
1706 return -1;
1707 }
1708 last_cond = COND_IF;
1709 break;
1710
1711 case COND_ELIF:
1712 if (last_cond != COND_IF && last_cond != COND_ELIF) {
1713 if (ms->flags & MAGIC_CHECK)
1714 file_magwarn(ms, "syntax error: `elif'");
1715 return -1;
1716 }
1717 last_cond = COND_ELIF;
1718 break;
1719
1720 case COND_ELSE:
1721 if (last_cond != COND_IF && last_cond != COND_ELIF) {
1722 if (ms->flags & MAGIC_CHECK)
1723 file_magwarn(ms, "syntax error: `else'");
1724 return -1;
1725 }
1726 last_cond = COND_NONE;
1727 break;
1728
1729 case COND_NONE:
1730 last_cond = COND_NONE;
1731 break;
1732 }
1733
1734 ms->c.li[cont_level].last_cond = last_cond;
1735 return 0;
1736 }
1737 #endif /* ENABLE_CONDITIONALS */
1738
1739 private int
parse_indirect_modifier(struct magic_set * ms,struct magic * m,const char ** lp)1740 parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1741 {
1742 const char *l = *lp;
1743
1744 while (!isspace(CAST(unsigned char, *++l)))
1745 switch (*l) {
1746 case CHAR_INDIRECT_RELATIVE:
1747 m->str_flags |= INDIRECT_RELATIVE;
1748 break;
1749 default:
1750 if (ms->flags & MAGIC_CHECK)
1751 file_magwarn(ms, "indirect modifier `%c' "
1752 "invalid", *l);
1753 *lp = l;
1754 return -1;
1755 }
1756 *lp = l;
1757 return 0;
1758 }
1759
1760 private void
parse_op_modifier(struct magic_set * ms,struct magic * m,const char ** lp,int op)1761 parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp,
1762 int op)
1763 {
1764 const char *l = *lp;
1765 char *t;
1766 uint64_t val;
1767
1768 ++l;
1769 m->mask_op |= op;
1770 val = CAST(uint64_t, strtoull(l, &t, 0));
1771 l = t;
1772 m->num_mask = file_signextend(ms, m, val);
1773 eatsize(&l);
1774 *lp = l;
1775 }
1776
1777 private int
parse_string_modifier(struct magic_set * ms,struct magic * m,const char ** lp)1778 parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1779 {
1780 const char *l = *lp;
1781 char *t;
1782 int have_range = 0;
1783
1784 while (!isspace(CAST(unsigned char, *++l))) {
1785 switch (*l) {
1786 case '0': case '1': case '2':
1787 case '3': case '4': case '5':
1788 case '6': case '7': case '8':
1789 case '9':
1790 if (have_range && (ms->flags & MAGIC_CHECK))
1791 file_magwarn(ms, "multiple ranges");
1792 have_range = 1;
1793 m->str_range = CAST(uint32_t, strtoul(l, &t, 0));
1794 if (m->str_range == 0)
1795 file_magwarn(ms, "zero range");
1796 l = t - 1;
1797 break;
1798 case CHAR_COMPACT_WHITESPACE:
1799 m->str_flags |= STRING_COMPACT_WHITESPACE;
1800 break;
1801 case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1802 m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE;
1803 break;
1804 case CHAR_IGNORE_LOWERCASE:
1805 m->str_flags |= STRING_IGNORE_LOWERCASE;
1806 break;
1807 case CHAR_IGNORE_UPPERCASE:
1808 m->str_flags |= STRING_IGNORE_UPPERCASE;
1809 break;
1810 case CHAR_REGEX_OFFSET_START:
1811 m->str_flags |= REGEX_OFFSET_START;
1812 break;
1813 case CHAR_BINTEST:
1814 m->str_flags |= STRING_BINTEST;
1815 break;
1816 case CHAR_TEXTTEST:
1817 m->str_flags |= STRING_TEXTTEST;
1818 break;
1819 case CHAR_TRIM:
1820 m->str_flags |= STRING_TRIM;
1821 break;
1822 case CHAR_FULL_WORD:
1823 m->str_flags |= STRING_FULL_WORD;
1824 break;
1825 case CHAR_PSTRING_1_LE:
1826 #define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
1827 if (m->type != FILE_PSTRING)
1828 goto bad;
1829 SET_LENGTH(PSTRING_1_LE);
1830 break;
1831 case CHAR_PSTRING_2_BE:
1832 if (m->type != FILE_PSTRING)
1833 goto bad;
1834 SET_LENGTH(PSTRING_2_BE);
1835 break;
1836 case CHAR_PSTRING_2_LE:
1837 if (m->type != FILE_PSTRING)
1838 goto bad;
1839 SET_LENGTH(PSTRING_2_LE);
1840 break;
1841 case CHAR_PSTRING_4_BE:
1842 if (m->type != FILE_PSTRING)
1843 goto bad;
1844 SET_LENGTH(PSTRING_4_BE);
1845 break;
1846 case CHAR_PSTRING_4_LE:
1847 switch (m->type) {
1848 case FILE_PSTRING:
1849 case FILE_REGEX:
1850 break;
1851 default:
1852 goto bad;
1853 }
1854 SET_LENGTH(PSTRING_4_LE);
1855 break;
1856 case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1857 if (m->type != FILE_PSTRING)
1858 goto bad;
1859 m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
1860 break;
1861 default:
1862 bad:
1863 if (ms->flags & MAGIC_CHECK)
1864 file_magwarn(ms, "string modifier `%c' "
1865 "invalid", *l);
1866 goto out;
1867 }
1868 /* allow multiple '/' for readability */
1869 if (l[1] == '/' && !isspace(CAST(unsigned char, l[2])))
1870 l++;
1871 }
1872 if (string_modifier_check(ms, m) == -1)
1873 goto out;
1874 *lp = l;
1875 return 0;
1876 out:
1877 *lp = l;
1878 return -1;
1879 }
1880
1881 /*
1882 * parse one line from magic file, put into magic[index++] if valid
1883 */
1884 private int
parse(struct magic_set * ms,struct magic_entry * me,const char * line,size_t lineno,int action)1885 parse(struct magic_set *ms, struct magic_entry *me, const char *line,
1886 size_t lineno, int action)
1887 {
1888 #ifdef ENABLE_CONDITIONALS
1889 static uint32_t last_cont_level = 0;
1890 #endif
1891 size_t i;
1892 struct magic *m;
1893 const char *l = line;
1894 char *t;
1895 int op;
1896 uint32_t cont_level;
1897 int32_t diff;
1898
1899 cont_level = 0;
1900
1901 /*
1902 * Parse the offset.
1903 */
1904 while (*l == '>') {
1905 ++l; /* step over */
1906 cont_level++;
1907 }
1908 #ifdef ENABLE_CONDITIONALS
1909 if (cont_level == 0 || cont_level > last_cont_level)
1910 if (file_check_mem(ms, cont_level) == -1)
1911 return -1;
1912 last_cont_level = cont_level;
1913 #endif
1914 if (cont_level != 0) {
1915 if (me->mp == NULL) {
1916 file_magerror(ms, "No current entry for continuation");
1917 return -1;
1918 }
1919 if (me->cont_count == 0) {
1920 file_magerror(ms, "Continuations present with 0 count");
1921 return -1;
1922 }
1923 m = &me->mp[me->cont_count - 1];
1924 diff = CAST(int32_t, cont_level) - CAST(int32_t, m->cont_level);
1925 if (diff > 1)
1926 file_magwarn(ms, "New continuation level %u is more "
1927 "than one larger than current level %u", cont_level,
1928 m->cont_level);
1929 if (me->cont_count == me->max_count) {
1930 struct magic *nm;
1931 size_t cnt = me->max_count + ALLOC_CHUNK;
1932 if ((nm = CAST(struct magic *, realloc(me->mp,
1933 sizeof(*nm) * cnt))) == NULL) {
1934 file_oomem(ms, sizeof(*nm) * cnt);
1935 return -1;
1936 }
1937 me->mp = nm;
1938 me->max_count = CAST(uint32_t, cnt);
1939 }
1940 m = &me->mp[me->cont_count++];
1941 (void)memset(m, 0, sizeof(*m));
1942 m->cont_level = cont_level;
1943 } else {
1944 static const size_t len = sizeof(*m) * ALLOC_CHUNK;
1945 if (me->mp != NULL)
1946 return 1;
1947 if ((m = CAST(struct magic *, malloc(len))) == NULL) {
1948 file_oomem(ms, len);
1949 return -1;
1950 }
1951 me->mp = m;
1952 me->max_count = ALLOC_CHUNK;
1953 (void)memset(m, 0, sizeof(*m));
1954 m->factor_op = FILE_FACTOR_OP_NONE;
1955 m->cont_level = 0;
1956 me->cont_count = 1;
1957 }
1958 m->lineno = CAST(uint32_t, lineno);
1959
1960 if (*l == '&') { /* m->cont_level == 0 checked below. */
1961 ++l; /* step over */
1962 m->flag |= OFFADD;
1963 }
1964 if (*l == '(') {
1965 ++l; /* step over */
1966 m->flag |= INDIR;
1967 if (m->flag & OFFADD)
1968 m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
1969
1970 if (*l == '&') { /* m->cont_level == 0 checked below */
1971 ++l; /* step over */
1972 m->flag |= OFFADD;
1973 }
1974 }
1975 /* Indirect offsets are not valid at level 0. */
1976 if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) {
1977 if (ms->flags & MAGIC_CHECK)
1978 file_magwarn(ms, "relative offset at level 0");
1979 return -1;
1980 }
1981
1982 /* get offset, then skip over it */
1983 if (*l == '-') {
1984 ++l; /* step over */
1985 m->flag |= OFFNEGATIVE;
1986 }
1987 m->offset = CAST(int32_t, strtol(l, &t, 0));
1988 if (l == t) {
1989 if (ms->flags & MAGIC_CHECK)
1990 file_magwarn(ms, "offset `%s' invalid", l);
1991 return -1;
1992 }
1993
1994 l = t;
1995
1996 if (m->flag & INDIR) {
1997 m->in_type = FILE_LONG;
1998 m->in_offset = 0;
1999 m->in_op = 0;
2000 /*
2001 * read [.,lbs][+-]nnnnn)
2002 */
2003 if (*l == '.' || *l == ',') {
2004 if (*l == ',')
2005 m->in_op |= FILE_OPSIGNED;
2006 l++;
2007 switch (*l) {
2008 case 'l':
2009 m->in_type = FILE_LELONG;
2010 break;
2011 case 'L':
2012 m->in_type = FILE_BELONG;
2013 break;
2014 case 'm':
2015 m->in_type = FILE_MELONG;
2016 break;
2017 case 'h':
2018 case 's':
2019 m->in_type = FILE_LESHORT;
2020 break;
2021 case 'H':
2022 case 'S':
2023 m->in_type = FILE_BESHORT;
2024 break;
2025 case 'c':
2026 case 'b':
2027 case 'C':
2028 case 'B':
2029 m->in_type = FILE_BYTE;
2030 break;
2031 case 'e':
2032 case 'f':
2033 case 'g':
2034 m->in_type = FILE_LEDOUBLE;
2035 break;
2036 case 'E':
2037 case 'F':
2038 case 'G':
2039 m->in_type = FILE_BEDOUBLE;
2040 break;
2041 case 'i':
2042 m->in_type = FILE_LEID3;
2043 break;
2044 case 'I':
2045 m->in_type = FILE_BEID3;
2046 break;
2047 case 'q':
2048 m->in_type = FILE_LEQUAD;
2049 break;
2050 case 'Q':
2051 m->in_type = FILE_BEQUAD;
2052 break;
2053 default:
2054 if (ms->flags & MAGIC_CHECK)
2055 file_magwarn(ms,
2056 "indirect offset type `%c' invalid",
2057 *l);
2058 return -1;
2059 }
2060 l++;
2061 }
2062
2063 if (*l == '~') {
2064 m->in_op |= FILE_OPINVERSE;
2065 l++;
2066 }
2067 if ((op = get_op(*l)) != -1) {
2068 m->in_op |= op;
2069 l++;
2070 }
2071 if (*l == '(') {
2072 m->in_op |= FILE_OPINDIRECT;
2073 l++;
2074 }
2075 if (isdigit(CAST(unsigned char, *l)) || *l == '-') {
2076 m->in_offset = CAST(int32_t, strtol(l, &t, 0));
2077 if (l == t) {
2078 if (ms->flags & MAGIC_CHECK)
2079 file_magwarn(ms,
2080 "in_offset `%s' invalid", l);
2081 return -1;
2082 }
2083 l = t;
2084 }
2085 if (*l++ != ')' ||
2086 ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) {
2087 if (ms->flags & MAGIC_CHECK)
2088 file_magwarn(ms,
2089 "missing ')' in indirect offset");
2090 return -1;
2091 }
2092 }
2093 EATAB;
2094
2095 #ifdef ENABLE_CONDITIONALS
2096 m->cond = get_cond(l, &l);
2097 if (check_cond(ms, m->cond, cont_level) == -1)
2098 return -1;
2099
2100 EATAB;
2101 #endif
2102
2103 /*
2104 * Parse the type.
2105 */
2106 if (*l == 'u') {
2107 /*
2108 * Try it as a keyword type prefixed by "u"; match what
2109 * follows the "u". If that fails, try it as an SUS
2110 * integer type.
2111 */
2112 m->type = get_type(type_tbl, l + 1, &l);
2113 if (m->type == FILE_INVALID) {
2114 /*
2115 * Not a keyword type; parse it as an SUS type,
2116 * 'u' possibly followed by a number or C/S/L.
2117 */
2118 m->type = get_standard_integer_type(l, &l);
2119 }
2120 /* It's unsigned. */
2121 if (m->type != FILE_INVALID)
2122 m->flag |= UNSIGNED;
2123 } else {
2124 /*
2125 * Try it as a keyword type. If that fails, try it as
2126 * an SUS integer type if it begins with "d" or as an
2127 * SUS string type if it begins with "s". In any case,
2128 * it's not unsigned.
2129 */
2130 m->type = get_type(type_tbl, l, &l);
2131 if (m->type == FILE_INVALID) {
2132 /*
2133 * Not a keyword type; parse it as an SUS type,
2134 * either 'd' possibly followed by a number or
2135 * C/S/L, or just 's'.
2136 */
2137 if (*l == 'd')
2138 m->type = get_standard_integer_type(l, &l);
2139 else if (*l == 's'
2140 && !isalpha(CAST(unsigned char, l[1]))) {
2141 m->type = FILE_STRING;
2142 ++l;
2143 }
2144 }
2145 }
2146
2147 if (m->type == FILE_INVALID) {
2148 /* Not found - try it as a special keyword. */
2149 m->type = get_type(special_tbl, l, &l);
2150 }
2151
2152 if (m->type == FILE_INVALID) {
2153 if (ms->flags & MAGIC_CHECK)
2154 file_magwarn(ms, "type `%s' invalid", l);
2155 return -1;
2156 }
2157
2158 if (m->type == FILE_NAME && cont_level != 0) {
2159 if (ms->flags & MAGIC_CHECK)
2160 file_magwarn(ms, "`name%s' entries can only be "
2161 "declared at top level", l);
2162 return -1;
2163 }
2164
2165 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
2166 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
2167
2168 m->mask_op = 0;
2169 if (*l == '~') {
2170 if (!IS_STRING(m->type))
2171 m->mask_op |= FILE_OPINVERSE;
2172 else if (ms->flags & MAGIC_CHECK)
2173 file_magwarn(ms, "'~' invalid for string types");
2174 ++l;
2175 }
2176 m->str_range = 0;
2177 m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
2178 if ((op = get_op(*l)) != -1) {
2179 if (IS_STRING(m->type)) {
2180 int r;
2181
2182 if (op != FILE_OPDIVIDE) {
2183 if (ms->flags & MAGIC_CHECK)
2184 file_magwarn(ms,
2185 "invalid string/indirect op: "
2186 "`%c'", *t);
2187 return -1;
2188 }
2189
2190 if (m->type == FILE_INDIRECT)
2191 r = parse_indirect_modifier(ms, m, &l);
2192 else
2193 r = parse_string_modifier(ms, m, &l);
2194 if (r == -1)
2195 return -1;
2196 } else
2197 parse_op_modifier(ms, m, &l, op);
2198 }
2199
2200 /*
2201 * We used to set mask to all 1's here, instead let's just not do
2202 * anything if mask = 0 (unless you have a better idea)
2203 */
2204 EATAB;
2205
2206 switch (*l) {
2207 case '>':
2208 case '<':
2209 m->reln = *l;
2210 ++l;
2211 if (*l == '=') {
2212 if (ms->flags & MAGIC_CHECK) {
2213 file_magwarn(ms, "%c= not supported",
2214 m->reln);
2215 return -1;
2216 }
2217 ++l;
2218 }
2219 break;
2220 /* Old-style anding: "0 byte &0x80 dynamically linked" */
2221 case '&':
2222 case '^':
2223 case '=':
2224 m->reln = *l;
2225 ++l;
2226 if (*l == '=') {
2227 /* HP compat: ignore &= etc. */
2228 ++l;
2229 }
2230 break;
2231 case '!':
2232 m->reln = *l;
2233 ++l;
2234 break;
2235 default:
2236 m->reln = '='; /* the default relation */
2237 if (*l == 'x' && ((isascii(CAST(unsigned char, l[1])) &&
2238 isspace(CAST(unsigned char, l[1]))) || !l[1])) {
2239 m->reln = *l;
2240 ++l;
2241 }
2242 break;
2243 }
2244 /*
2245 * Grab the value part, except for an 'x' reln.
2246 */
2247 if (m->reln != 'x' && getvalue(ms, m, &l, action))
2248 return -1;
2249
2250 /*
2251 * TODO finish this macro and start using it!
2252 * #define offsetcheck {if (offset > ms->bytes_max -1)
2253 * magwarn("offset too big"); }
2254 */
2255
2256 /*
2257 * Now get last part - the description
2258 */
2259 EATAB;
2260 if (l[0] == '\b') {
2261 ++l;
2262 m->flag |= NOSPACE;
2263 } else if ((l[0] == '\\') && (l[1] == 'b')) {
2264 ++l;
2265 ++l;
2266 m->flag |= NOSPACE;
2267 }
2268 for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
2269 continue;
2270 if (i == sizeof(m->desc)) {
2271 m->desc[sizeof(m->desc) - 1] = '\0';
2272 if (ms->flags & MAGIC_CHECK)
2273 file_magwarn(ms, "description `%s' truncated", m->desc);
2274 }
2275
2276 /*
2277 * We only do this check while compiling, or if any of the magic
2278 * files were not compiled.
2279 */
2280 if (ms->flags & MAGIC_CHECK) {
2281 if (check_format(ms, m) == -1)
2282 return -1;
2283 }
2284 #ifndef COMPILE_ONLY
2285 if (action == FILE_CHECK) {
2286 file_mdump(m);
2287 }
2288 #endif
2289 m->mimetype[0] = '\0'; /* initialise MIME type to none */
2290 return 0;
2291 }
2292
2293 /*
2294 * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
2295 * if valid
2296 */
2297 private int
parse_strength(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2298 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line,
2299 size_t len __attribute__((__unused__)))
2300 {
2301 const char *l = line;
2302 char *el;
2303 unsigned long factor;
2304 struct magic *m = &me->mp[0];
2305
2306 if (m->factor_op != FILE_FACTOR_OP_NONE) {
2307 file_magwarn(ms,
2308 "Current entry already has a strength type: %c %d",
2309 m->factor_op, m->factor);
2310 return -1;
2311 }
2312 if (m->type == FILE_NAME) {
2313 file_magwarn(ms, "%s: Strength setting is not supported in "
2314 "\"name\" magic entries", m->value.s);
2315 return -1;
2316 }
2317 EATAB;
2318 switch (*l) {
2319 case FILE_FACTOR_OP_NONE:
2320 case FILE_FACTOR_OP_PLUS:
2321 case FILE_FACTOR_OP_MINUS:
2322 case FILE_FACTOR_OP_TIMES:
2323 case FILE_FACTOR_OP_DIV:
2324 m->factor_op = *l++;
2325 break;
2326 default:
2327 file_magwarn(ms, "Unknown factor op `%c'", *l);
2328 return -1;
2329 }
2330 EATAB;
2331 factor = strtoul(l, &el, 0);
2332 if (factor > 255) {
2333 file_magwarn(ms, "Too large factor `%lu'", factor);
2334 goto out;
2335 }
2336 if (*el && !isspace(CAST(unsigned char, *el))) {
2337 file_magwarn(ms, "Bad factor `%s'", l);
2338 goto out;
2339 }
2340 m->factor = CAST(uint8_t, factor);
2341 if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
2342 file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2343 m->factor_op, m->factor);
2344 goto out;
2345 }
2346 return 0;
2347 out:
2348 m->factor_op = FILE_FACTOR_OP_NONE;
2349 m->factor = 0;
2350 return -1;
2351 }
2352
2353 private int
goodchar(unsigned char x,const char * extra)2354 goodchar(unsigned char x, const char *extra)
2355 {
2356 return (isascii(x) && isalnum(x)) || strchr(extra, x);
2357 }
2358
2359 private int
parse_extra(struct magic_set * ms,struct magic_entry * me,const char * line,size_t llen,off_t off,size_t len,const char * name,const char * extra,int nt)2360 parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
2361 size_t llen, off_t off, size_t len, const char *name, const char *extra,
2362 int nt)
2363 {
2364 size_t i;
2365 const char *l = line;
2366 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2367 char *buf = CAST(char *, CAST(void *, m)) + off;
2368
2369 if (buf[0] != '\0') {
2370 len = nt ? strlen(buf) : len;
2371 file_magwarn(ms, "Current entry already has a %s type "
2372 "`%.*s', new type `%s'", name, CAST(int, len), buf, l);
2373 return -1;
2374 }
2375
2376 if (*m->desc == '\0') {
2377 file_magwarn(ms, "Current entry does not yet have a "
2378 "description for adding a %s type", name);
2379 return -1;
2380 }
2381
2382 EATAB;
2383 for (i = 0; *l && i < llen && i < len && goodchar(*l, extra);
2384 buf[i++] = *l++)
2385 continue;
2386
2387 if (i == len && *l) {
2388 if (nt)
2389 buf[len - 1] = '\0';
2390 if (ms->flags & MAGIC_CHECK)
2391 file_magwarn(ms, "%s type `%s' truncated %"
2392 SIZE_T_FORMAT "u", name, line, i);
2393 } else {
2394 if (!isspace(CAST(unsigned char, *l)) && !goodchar(*l, extra))
2395 file_magwarn(ms, "%s type `%s' has bad char '%c'",
2396 name, line, *l);
2397 if (nt)
2398 buf[i] = '\0';
2399 }
2400
2401 if (i > 0)
2402 return 0;
2403
2404 file_magerror(ms, "Bad magic entry '%s'", line);
2405 return -1;
2406 }
2407
2408 /*
2409 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2410 * magic[index - 1]
2411 */
2412 private int
parse_apple(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2413 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line,
2414 size_t len)
2415 {
2416 struct magic *m = &me->mp[0];
2417
2418 return parse_extra(ms, me, line, len,
2419 CAST(off_t, offsetof(struct magic, apple)),
2420 sizeof(m->apple), "APPLE", "!+-./?", 0);
2421 }
2422
2423 /*
2424 * Parse a comma-separated list of extensions
2425 */
2426 private int
parse_ext(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2427 parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line,
2428 size_t len)
2429 {
2430 struct magic *m = &me->mp[0];
2431
2432 return parse_extra(ms, me, line, len,
2433 CAST(off_t, offsetof(struct magic, ext)),
2434 sizeof(m->ext), "EXTENSION", ",!+-/@?_$&", 0); /* & for b&w */
2435 }
2436
2437 /*
2438 * parse a MIME annotation line from magic file, put into magic[index - 1]
2439 * if valid
2440 */
2441 private int
parse_mime(struct magic_set * ms,struct magic_entry * me,const char * line,size_t len)2442 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line,
2443 size_t len)
2444 {
2445 struct magic *m = &me->mp[0];
2446
2447 return parse_extra(ms, me, line, len,
2448 CAST(off_t, offsetof(struct magic, mimetype)),
2449 sizeof(m->mimetype), "MIME", "+-/.$?:{}", 1);
2450 }
2451
2452 private int
check_format_type(const char * ptr,int type,const char ** estr)2453 check_format_type(const char *ptr, int type, const char **estr)
2454 {
2455 int quad = 0, h;
2456 size_t len, cnt;
2457 if (*ptr == '\0') {
2458 /* Missing format string; bad */
2459 *estr = "missing format spec";
2460 return -1;
2461 }
2462
2463 switch (file_formats[type]) {
2464 case FILE_FMT_QUAD:
2465 quad = 1;
2466 /*FALLTHROUGH*/
2467 case FILE_FMT_NUM:
2468 if (quad == 0) {
2469 switch (type) {
2470 case FILE_BYTE:
2471 h = 2;
2472 break;
2473 case FILE_SHORT:
2474 case FILE_BESHORT:
2475 case FILE_LESHORT:
2476 h = 1;
2477 break;
2478 case FILE_LONG:
2479 case FILE_BELONG:
2480 case FILE_LELONG:
2481 case FILE_MELONG:
2482 case FILE_LEID3:
2483 case FILE_BEID3:
2484 case FILE_INDIRECT:
2485 h = 0;
2486 break;
2487 default:
2488 abort();
2489 }
2490 } else
2491 h = 0;
2492 while (*ptr && strchr("-.#", *ptr) != NULL)
2493 ptr++;
2494 #define CHECKLEN() do { \
2495 for (len = cnt = 0; isdigit(CAST(unsigned char, *ptr)); ptr++, cnt++) \
2496 len = len * 10 + (*ptr - '0'); \
2497 if (cnt > 5 || len > 1024) \
2498 goto toolong; \
2499 } while (/*CONSTCOND*/0)
2500
2501 CHECKLEN();
2502 if (*ptr == '.')
2503 ptr++;
2504 CHECKLEN();
2505 if (quad) {
2506 if (*ptr++ != 'l')
2507 goto invalid;
2508 if (*ptr++ != 'l')
2509 goto invalid;
2510 }
2511
2512 switch (*ptr++) {
2513 #ifdef STRICT_FORMAT /* "long" formats are int formats for us */
2514 /* so don't accept the 'l' modifier */
2515 case 'l':
2516 switch (*ptr++) {
2517 case 'i':
2518 case 'd':
2519 case 'u':
2520 case 'o':
2521 case 'x':
2522 case 'X':
2523 if (h == 0)
2524 return 0;
2525 /*FALLTHROUGH*/
2526 default:
2527 goto invalid;
2528 }
2529
2530 /*
2531 * Don't accept h and hh modifiers. They make writing
2532 * magic entries more complicated, for very little benefit
2533 */
2534 case 'h':
2535 if (h-- <= 0)
2536 goto invalid;
2537 switch (*ptr++) {
2538 case 'h':
2539 if (h-- <= 0)
2540 goto invalid;
2541 switch (*ptr++) {
2542 case 'i':
2543 case 'd':
2544 case 'u':
2545 case 'o':
2546 case 'x':
2547 case 'X':
2548 return 0;
2549 default:
2550 goto invalid;
2551 }
2552 case 'i':
2553 case 'd':
2554 case 'u':
2555 case 'o':
2556 case 'x':
2557 case 'X':
2558 if (h == 0)
2559 return 0;
2560 /*FALLTHROUGH*/
2561 default:
2562 goto invalid;
2563 }
2564 #endif
2565 case 'c':
2566 if (h == 2)
2567 return 0;
2568 goto invalid;
2569 case 'i':
2570 case 'd':
2571 case 'u':
2572 case 'o':
2573 case 'x':
2574 case 'X':
2575 #ifdef STRICT_FORMAT
2576 if (h == 0)
2577 return 0;
2578 /*FALLTHROUGH*/
2579 #else
2580 return 0;
2581 #endif
2582 default:
2583 goto invalid;
2584 }
2585
2586 case FILE_FMT_FLOAT:
2587 case FILE_FMT_DOUBLE:
2588 if (*ptr == '-')
2589 ptr++;
2590 if (*ptr == '.')
2591 ptr++;
2592 CHECKLEN();
2593 if (*ptr == '.')
2594 ptr++;
2595 CHECKLEN();
2596 switch (*ptr++) {
2597 case 'e':
2598 case 'E':
2599 case 'f':
2600 case 'F':
2601 case 'g':
2602 case 'G':
2603 return 0;
2604
2605 default:
2606 goto invalid;
2607 }
2608
2609
2610 case FILE_FMT_STR:
2611 if (*ptr == '-')
2612 ptr++;
2613 while (isdigit(CAST(unsigned char, *ptr)))
2614 ptr++;
2615 if (*ptr == '.') {
2616 ptr++;
2617 while (isdigit(CAST(unsigned char , *ptr)))
2618 ptr++;
2619 }
2620
2621 switch (*ptr++) {
2622 case 's':
2623 return 0;
2624 default:
2625 goto invalid;
2626 }
2627
2628 default:
2629 /* internal error */
2630 abort();
2631 }
2632 invalid:
2633 *estr = "not valid";
2634 toolong:
2635 *estr = "too long";
2636 return -1;
2637 }
2638
2639 /*
2640 * Check that the optional printf format in description matches
2641 * the type of the magic.
2642 */
2643 private int
check_format(struct magic_set * ms,struct magic * m)2644 check_format(struct magic_set *ms, struct magic *m)
2645 {
2646 char *ptr;
2647 const char *estr;
2648
2649 for (ptr = m->desc; *ptr; ptr++)
2650 if (*ptr == '%')
2651 break;
2652 if (*ptr == '\0') {
2653 /* No format string; ok */
2654 return 1;
2655 }
2656
2657 assert(file_nformats == file_nnames);
2658
2659 if (m->type >= file_nformats) {
2660 file_magwarn(ms, "Internal error inconsistency between "
2661 "m->type and format strings");
2662 return -1;
2663 }
2664 if (file_formats[m->type] == FILE_FMT_NONE) {
2665 file_magwarn(ms, "No format string for `%s' with description "
2666 "`%s'", m->desc, file_names[m->type]);
2667 return -1;
2668 }
2669
2670 ptr++;
2671 if (check_format_type(ptr, m->type, &estr) == -1) {
2672 /*
2673 * TODO: this error message is unhelpful if the format
2674 * string is not one character long
2675 */
2676 file_magwarn(ms, "Printf format is %s for type "
2677 "`%s' in description `%s'", estr,
2678 file_names[m->type], m->desc);
2679 return -1;
2680 }
2681
2682 for (; *ptr; ptr++) {
2683 if (*ptr == '%') {
2684 file_magwarn(ms,
2685 "Too many format strings (should have at most one) "
2686 "for `%s' with description `%s'",
2687 file_names[m->type], m->desc);
2688 return -1;
2689 }
2690 }
2691 return 0;
2692 }
2693
2694 /*
2695 * Read a numeric value from a pointer, into the value union of a magic
2696 * pointer, according to the magic type. Update the string pointer to point
2697 * just after the number read. Return 0 for success, non-zero for failure.
2698 */
2699 private int
getvalue(struct magic_set * ms,struct magic * m,const char ** p,int action)2700 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2701 {
2702 char *ep;
2703 uint64_t ull;
2704
2705 switch (m->type) {
2706 case FILE_BESTRING16:
2707 case FILE_LESTRING16:
2708 case FILE_STRING:
2709 case FILE_PSTRING:
2710 case FILE_REGEX:
2711 case FILE_SEARCH:
2712 case FILE_NAME:
2713 case FILE_USE:
2714 case FILE_DER:
2715 *p = getstr(ms, m, *p, action == FILE_COMPILE);
2716 if (*p == NULL) {
2717 if (ms->flags & MAGIC_CHECK)
2718 file_magwarn(ms, "cannot get string from `%s'",
2719 m->value.s);
2720 return -1;
2721 }
2722 if (m->type == FILE_REGEX) {
2723 file_regex_t rx;
2724 int rc = file_regcomp(&rx, m->value.s, REG_EXTENDED);
2725 if (rc) {
2726 if (ms->flags & MAGIC_CHECK)
2727 file_regerror(&rx, rc, ms);
2728 }
2729 file_regfree(&rx);
2730 return rc ? -1 : 0;
2731 }
2732 return 0;
2733 default:
2734 if (m->reln == 'x')
2735 return 0;
2736 break;
2737 }
2738
2739 switch (m->type) {
2740 case FILE_FLOAT:
2741 case FILE_BEFLOAT:
2742 case FILE_LEFLOAT:
2743 errno = 0;
2744 #ifdef HAVE_STRTOF
2745 m->value.f = strtof(*p, &ep);
2746 #else
2747 m->value.f = (float)strtod(*p, &ep);
2748 #endif
2749 if (errno == 0)
2750 *p = ep;
2751 return 0;
2752 case FILE_DOUBLE:
2753 case FILE_BEDOUBLE:
2754 case FILE_LEDOUBLE:
2755 errno = 0;
2756 m->value.d = strtod(*p, &ep);
2757 if (errno == 0)
2758 *p = ep;
2759 return 0;
2760 case FILE_GUID:
2761 if (file_parse_guid(*p, m->value.guid) == -1)
2762 return -1;
2763 *p += FILE_GUID_SIZE - 1;
2764 return 0;
2765 default:
2766 errno = 0;
2767 ull = CAST(uint64_t, strtoull(*p, &ep, 0));
2768 m->value.q = file_signextend(ms, m, ull);
2769 if (*p == ep) {
2770 file_magwarn(ms, "Unparsable number `%s'", *p);
2771 } else {
2772 size_t ts = typesize(m->type);
2773 uint64_t x;
2774 const char *q;
2775
2776 if (ts == FILE_BADSIZE) {
2777 file_magwarn(ms,
2778 "Expected numeric type got `%s'",
2779 type_tbl[m->type].name);
2780 }
2781 for (q = *p; isspace(CAST(unsigned char, *q)); q++)
2782 continue;
2783 if (*q == '-')
2784 ull = -CAST(int64_t, ull);
2785 switch (ts) {
2786 case 1:
2787 x = CAST(uint64_t, ull & ~0xffULL);
2788 break;
2789 case 2:
2790 x = CAST(uint64_t, ull & ~0xffffULL);
2791 break;
2792 case 4:
2793 x = CAST(uint64_t, ull & ~0xffffffffULL);
2794 break;
2795 case 8:
2796 x = 0;
2797 break;
2798 default:
2799 abort();
2800 }
2801 if (x) {
2802 file_magwarn(ms, "Overflow for numeric"
2803 " type `%s' value %#" PRIx64,
2804 type_tbl[m->type].name, ull);
2805 }
2806 }
2807 if (errno == 0) {
2808 *p = ep;
2809 eatsize(p);
2810 }
2811 return 0;
2812 }
2813 }
2814
2815 /*
2816 * Convert a string containing C character escapes. Stop at an unescaped
2817 * space or tab.
2818 * Copy the converted version to "m->value.s", and the length in m->vallen.
2819 * Return updated scan pointer as function result. Warn if set.
2820 */
2821 private const char *
getstr(struct magic_set * ms,struct magic * m,const char * s,int warn)2822 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2823 {
2824 const char *origs = s;
2825 char *p = m->value.s;
2826 size_t plen = sizeof(m->value.s);
2827 char *origp = p;
2828 char *pmax = p + plen - 1;
2829 int c;
2830 int val;
2831
2832 while ((c = *s++) != '\0') {
2833 if (isspace(CAST(unsigned char, c)))
2834 break;
2835 if (p >= pmax) {
2836 file_error(ms, 0, "string too long: `%s'", origs);
2837 return NULL;
2838 }
2839 if (c == '\\') {
2840 switch(c = *s++) {
2841
2842 case '\0':
2843 if (warn)
2844 file_magwarn(ms, "incomplete escape");
2845 s--;
2846 goto out;
2847
2848 case '\t':
2849 if (warn) {
2850 file_magwarn(ms,
2851 "escaped tab found, use \\t instead");
2852 warn = 0; /* already did */
2853 }
2854 /*FALLTHROUGH*/
2855 default:
2856 if (warn) {
2857 if (isprint(CAST(unsigned char, c))) {
2858 /* Allow escaping of
2859 * ``relations'' */
2860 if (strchr("<>&^=!", c) == NULL
2861 && (m->type != FILE_REGEX ||
2862 strchr("[]().*?^$|{}", c)
2863 == NULL)) {
2864 file_magwarn(ms, "no "
2865 "need to escape "
2866 "`%c'", c);
2867 }
2868 } else {
2869 file_magwarn(ms,
2870 "unknown escape sequence: "
2871 "\\%03o", c);
2872 }
2873 }
2874 /*FALLTHROUGH*/
2875 /* space, perhaps force people to use \040? */
2876 case ' ':
2877 #if 0
2878 /*
2879 * Other things people escape, but shouldn't need to,
2880 * so we disallow them
2881 */
2882 case '\'':
2883 case '"':
2884 case '?':
2885 #endif
2886 /* Relations */
2887 case '>':
2888 case '<':
2889 case '&':
2890 case '^':
2891 case '=':
2892 case '!':
2893 /* and baskslash itself */
2894 case '\\':
2895 *p++ = CAST(char, c);
2896 break;
2897
2898 case 'a':
2899 *p++ = '\a';
2900 break;
2901
2902 case 'b':
2903 *p++ = '\b';
2904 break;
2905
2906 case 'f':
2907 *p++ = '\f';
2908 break;
2909
2910 case 'n':
2911 *p++ = '\n';
2912 break;
2913
2914 case 'r':
2915 *p++ = '\r';
2916 break;
2917
2918 case 't':
2919 *p++ = '\t';
2920 break;
2921
2922 case 'v':
2923 *p++ = '\v';
2924 break;
2925
2926 /* \ and up to 3 octal digits */
2927 case '0':
2928 case '1':
2929 case '2':
2930 case '3':
2931 case '4':
2932 case '5':
2933 case '6':
2934 case '7':
2935 val = c - '0';
2936 c = *s++; /* try for 2 */
2937 if (c >= '0' && c <= '7') {
2938 val = (val << 3) | (c - '0');
2939 c = *s++; /* try for 3 */
2940 if (c >= '0' && c <= '7')
2941 val = (val << 3) | (c-'0');
2942 else
2943 --s;
2944 }
2945 else
2946 --s;
2947 *p++ = CAST(char, val);
2948 break;
2949
2950 /* \x and up to 2 hex digits */
2951 case 'x':
2952 val = 'x'; /* Default if no digits */
2953 c = hextoint(*s++); /* Get next char */
2954 if (c >= 0) {
2955 val = c;
2956 c = hextoint(*s++);
2957 if (c >= 0)
2958 val = (val << 4) + c;
2959 else
2960 --s;
2961 } else
2962 --s;
2963 *p++ = CAST(char, val);
2964 break;
2965 }
2966 } else
2967 *p++ = CAST(char, c);
2968 }
2969 --s;
2970 out:
2971 *p = '\0';
2972 m->vallen = CAST(unsigned char, (p - origp));
2973 if (m->type == FILE_PSTRING) {
2974 size_t l = file_pstring_length_size(ms, m);
2975 if (l == FILE_BADSIZE)
2976 return NULL;
2977 m->vallen += CAST(unsigned char, l);
2978 }
2979 return s;
2980 }
2981
2982
2983 /* Single hex char to int; -1 if not a hex char. */
2984 private int
hextoint(int c)2985 hextoint(int c)
2986 {
2987 if (!isascii(CAST(unsigned char, c)))
2988 return -1;
2989 if (isdigit(CAST(unsigned char, c)))
2990 return c - '0';
2991 if ((c >= 'a') && (c <= 'f'))
2992 return c + 10 - 'a';
2993 if (( c>= 'A') && (c <= 'F'))
2994 return c + 10 - 'A';
2995 return -1;
2996 }
2997
2998
2999 /*
3000 * Print a string containing C character escapes.
3001 */
3002 protected void
file_showstr(FILE * fp,const char * s,size_t len)3003 file_showstr(FILE *fp, const char *s, size_t len)
3004 {
3005 char c;
3006
3007 for (;;) {
3008 if (len == FILE_BADSIZE) {
3009 c = *s++;
3010 if (c == '\0')
3011 break;
3012 }
3013 else {
3014 if (len-- == 0)
3015 break;
3016 c = *s++;
3017 }
3018 if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
3019 (void) fputc(c, fp);
3020 else {
3021 (void) fputc('\\', fp);
3022 switch (c) {
3023 case '\a':
3024 (void) fputc('a', fp);
3025 break;
3026
3027 case '\b':
3028 (void) fputc('b', fp);
3029 break;
3030
3031 case '\f':
3032 (void) fputc('f', fp);
3033 break;
3034
3035 case '\n':
3036 (void) fputc('n', fp);
3037 break;
3038
3039 case '\r':
3040 (void) fputc('r', fp);
3041 break;
3042
3043 case '\t':
3044 (void) fputc('t', fp);
3045 break;
3046
3047 case '\v':
3048 (void) fputc('v', fp);
3049 break;
3050
3051 default:
3052 (void) fprintf(fp, "%.3o", c & 0377);
3053 break;
3054 }
3055 }
3056 }
3057 }
3058
3059 /*
3060 * eatsize(): Eat the size spec from a number [eg. 10UL]
3061 */
3062 private void
eatsize(const char ** p)3063 eatsize(const char **p)
3064 {
3065 const char *l = *p;
3066
3067 if (LOWCASE(*l) == 'u')
3068 l++;
3069
3070 switch (LOWCASE(*l)) {
3071 case 'l': /* long */
3072 case 's': /* short */
3073 case 'h': /* short */
3074 case 'b': /* char/byte */
3075 case 'c': /* char/byte */
3076 l++;
3077 /*FALLTHROUGH*/
3078 default:
3079 break;
3080 }
3081
3082 *p = l;
3083 }
3084
3085 /*
3086 * handle a buffer containing a compiled file.
3087 */
3088 private struct magic_map *
apprentice_buf(struct magic_set * ms,struct magic * buf,size_t len)3089 apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len)
3090 {
3091 struct magic_map *map;
3092
3093 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
3094 file_oomem(ms, sizeof(*map));
3095 return NULL;
3096 }
3097 map->len = len;
3098 map->p = buf;
3099 map->type = MAP_TYPE_USER;
3100 if (check_buffer(ms, map, "buffer") != 0) {
3101 apprentice_unmap(map);
3102 return NULL;
3103 }
3104 return map;
3105 }
3106
3107 /*
3108 * handle a compiled file.
3109 */
3110
3111 private struct magic_map *
apprentice_map(struct magic_set * ms,const char * fn)3112 apprentice_map(struct magic_set *ms, const char *fn)
3113 {
3114 int fd;
3115 struct stat st;
3116 char *dbname = NULL;
3117 struct magic_map *map;
3118 struct magic_map *rv = NULL;
3119
3120 fd = -1;
3121 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
3122 file_oomem(ms, sizeof(*map));
3123 goto error;
3124 }
3125 map->type = MAP_TYPE_USER; /* unspecified */
3126
3127 dbname = mkdbname(ms, fn, 0);
3128 if (dbname == NULL)
3129 goto error;
3130
3131 if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1)
3132 goto error;
3133
3134 if (fstat(fd, &st) == -1) {
3135 file_error(ms, errno, "cannot stat `%s'", dbname);
3136 goto error;
3137 }
3138 if (st.st_size < 8 || st.st_size > maxoff_t()) {
3139 file_error(ms, 0, "file `%s' is too %s", dbname,
3140 st.st_size < 8 ? "small" : "large");
3141 goto error;
3142 }
3143
3144 map->len = CAST(size_t, st.st_size);
3145 #ifdef QUICK
3146 map->type = MAP_TYPE_MMAP;
3147 if ((map->p = mmap(0, CAST(size_t, st.st_size), PROT_READ|PROT_WRITE,
3148 MAP_PRIVATE|MAP_FILE, fd, CAST(off_t, 0))) == MAP_FAILED) {
3149 file_error(ms, errno, "cannot map `%s'", dbname);
3150 goto error;
3151 }
3152 #else
3153 map->type = MAP_TYPE_MALLOC;
3154 if ((map->p = CAST(void *, malloc(map->len))) == NULL) {
3155 file_oomem(ms, map->len);
3156 goto error;
3157 }
3158 if (read(fd, map->p, map->len) != (ssize_t)map->len) {
3159 file_badread(ms);
3160 goto error;
3161 }
3162 #endif
3163 (void)close(fd);
3164 fd = -1;
3165
3166 if (check_buffer(ms, map, dbname) != 0) {
3167 goto error;
3168 }
3169 #ifdef QUICK
3170 if (mprotect(map->p, CAST(size_t, st.st_size), PROT_READ) == -1) {
3171 file_error(ms, errno, "cannot mprotect `%s'", dbname);
3172 goto error;
3173 }
3174 #endif
3175
3176 free(dbname);
3177 return map;
3178
3179 error:
3180 if (fd != -1)
3181 (void)close(fd);
3182 apprentice_unmap(map);
3183 free(dbname);
3184 return rv;
3185 }
3186
3187 private int
check_buffer(struct magic_set * ms,struct magic_map * map,const char * dbname)3188 check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname)
3189 {
3190 uint32_t *ptr;
3191 uint32_t entries, nentries;
3192 uint32_t version;
3193 int i, needsbyteswap;
3194
3195 ptr = CAST(uint32_t *, map->p);
3196 if (*ptr != MAGICNO) {
3197 if (swap4(*ptr) != MAGICNO) {
3198 file_error(ms, 0, "bad magic in `%s'", dbname);
3199 return -1;
3200 }
3201 needsbyteswap = 1;
3202 } else
3203 needsbyteswap = 0;
3204 if (needsbyteswap)
3205 version = swap4(ptr[1]);
3206 else
3207 version = ptr[1];
3208 if (version != VERSIONNO) {
3209 file_error(ms, 0, "File %s supports only version %d magic "
3210 "files. `%s' is version %d", VERSION,
3211 VERSIONNO, dbname, version);
3212 return -1;
3213 }
3214 entries = CAST(uint32_t, map->len / sizeof(struct magic));
3215 if ((entries * sizeof(struct magic)) != map->len) {
3216 file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not "
3217 "a multiple of %" SIZE_T_FORMAT "u",
3218 dbname, map->len, sizeof(struct magic));
3219 return -1;
3220 }
3221 map->magic[0] = CAST(struct magic *, map->p) + 1;
3222 nentries = 0;
3223 for (i = 0; i < MAGIC_SETS; i++) {
3224 if (needsbyteswap)
3225 map->nmagic[i] = swap4(ptr[i + 2]);
3226 else
3227 map->nmagic[i] = ptr[i + 2];
3228 if (i != MAGIC_SETS - 1)
3229 map->magic[i + 1] = map->magic[i] + map->nmagic[i];
3230 nentries += map->nmagic[i];
3231 }
3232 if (entries != nentries + 1) {
3233 file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
3234 dbname, entries, nentries + 1);
3235 return -1;
3236 }
3237 if (needsbyteswap)
3238 for (i = 0; i < MAGIC_SETS; i++)
3239 byteswap(map->magic[i], map->nmagic[i]);
3240 return 0;
3241 }
3242
3243 /*
3244 * handle an mmaped file.
3245 */
3246 private int
apprentice_compile(struct magic_set * ms,struct magic_map * map,const char * fn)3247 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
3248 {
3249 static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
3250 static const size_t m = sizeof(**map->magic);
3251 int fd = -1;
3252 size_t len;
3253 char *dbname;
3254 int rv = -1;
3255 uint32_t i;
3256 union {
3257 struct magic m;
3258 uint32_t h[2 + MAGIC_SETS];
3259 } hdr;
3260
3261 dbname = mkdbname(ms, fn, 1);
3262
3263 if (dbname == NULL)
3264 goto out;
3265
3266 if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1)
3267 {
3268 file_error(ms, errno, "cannot open `%s'", dbname);
3269 goto out;
3270 }
3271 memset(&hdr, 0, sizeof(hdr));
3272 hdr.h[0] = MAGICNO;
3273 hdr.h[1] = VERSIONNO;
3274 memcpy(hdr.h + 2, map->nmagic, nm);
3275
3276 if (write(fd, &hdr, sizeof(hdr)) != CAST(ssize_t, sizeof(hdr))) {
3277 file_error(ms, errno, "error writing `%s'", dbname);
3278 goto out2;
3279 }
3280
3281 for (i = 0; i < MAGIC_SETS; i++) {
3282 len = m * map->nmagic[i];
3283 if (write(fd, map->magic[i], len) != CAST(ssize_t, len)) {
3284 file_error(ms, errno, "error writing `%s'", dbname);
3285 goto out2;
3286 }
3287 }
3288
3289 rv = 0;
3290 out2:
3291 if (fd != -1)
3292 (void)close(fd);
3293 out:
3294 apprentice_unmap(map);
3295 free(dbname);
3296 return rv;
3297 }
3298
3299 private const char ext[] = ".mgc";
3300 /*
3301 * make a dbname
3302 */
3303 private char *
mkdbname(struct magic_set * ms,const char * fn,int strip)3304 mkdbname(struct magic_set *ms, const char *fn, int strip)
3305 {
3306 const char *p, *q;
3307 char *buf;
3308
3309 if (strip) {
3310 if ((p = strrchr(fn, '/')) != NULL)
3311 fn = ++p;
3312 }
3313
3314 for (q = fn; *q; q++)
3315 continue;
3316 /* Look for .mgc */
3317 for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
3318 if (*p != *q)
3319 break;
3320
3321 /* Did not find .mgc, restore q */
3322 if (p >= ext)
3323 while (*q)
3324 q++;
3325
3326 q++;
3327 /* Compatibility with old code that looked in .mime */
3328 if (ms->flags & MAGIC_MIME) {
3329 if (asprintf(&buf, "%.*s.mime%s", CAST(int, q - fn), fn, ext)
3330 < 0)
3331 return NULL;
3332 if (access(buf, R_OK) != -1) {
3333 ms->flags &= MAGIC_MIME_TYPE;
3334 return buf;
3335 }
3336 free(buf);
3337 }
3338 if (asprintf(&buf, "%.*s%s", CAST(int, q - fn), fn, ext) < 0)
3339 return NULL;
3340
3341 /* Compatibility with old code that looked in .mime */
3342 if (strstr(fn, ".mime") != NULL)
3343 ms->flags &= MAGIC_MIME_TYPE;
3344 return buf;
3345 }
3346
3347 /*
3348 * Byteswap an mmap'ed file if needed
3349 */
3350 private void
byteswap(struct magic * magic,uint32_t nmagic)3351 byteswap(struct magic *magic, uint32_t nmagic)
3352 {
3353 uint32_t i;
3354 for (i = 0; i < nmagic; i++)
3355 bs1(&magic[i]);
3356 }
3357
3358 /*
3359 * swap a short
3360 */
3361 private uint16_t
swap2(uint16_t sv)3362 swap2(uint16_t sv)
3363 {
3364 uint16_t rv;
3365 uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3366 uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3367 d[0] = s[1];
3368 d[1] = s[0];
3369 return rv;
3370 }
3371
3372 /*
3373 * swap an int
3374 */
3375 private uint32_t
swap4(uint32_t sv)3376 swap4(uint32_t sv)
3377 {
3378 uint32_t rv;
3379 uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3380 uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3381 d[0] = s[3];
3382 d[1] = s[2];
3383 d[2] = s[1];
3384 d[3] = s[0];
3385 return rv;
3386 }
3387
3388 /*
3389 * swap a quad
3390 */
3391 private uint64_t
swap8(uint64_t sv)3392 swap8(uint64_t sv)
3393 {
3394 uint64_t rv;
3395 uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3396 uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3397 #if 0
3398 d[0] = s[3];
3399 d[1] = s[2];
3400 d[2] = s[1];
3401 d[3] = s[0];
3402 d[4] = s[7];
3403 d[5] = s[6];
3404 d[6] = s[5];
3405 d[7] = s[4];
3406 #else
3407 d[0] = s[7];
3408 d[1] = s[6];
3409 d[2] = s[5];
3410 d[3] = s[4];
3411 d[4] = s[3];
3412 d[5] = s[2];
3413 d[6] = s[1];
3414 d[7] = s[0];
3415 #endif
3416 return rv;
3417 }
3418
3419 protected uintmax_t
file_varint2uintmax_t(const unsigned char * us,int t,size_t * l)3420 file_varint2uintmax_t(const unsigned char *us, int t, size_t *l)
3421 {
3422 uintmax_t x = 0;
3423 const unsigned char *c;
3424 if (t == FILE_LEVARINT) {
3425 for (c = us; *c; c++) {
3426 if ((*c & 0x80) == 0)
3427 break;
3428 }
3429 if (l)
3430 *l = c - us + 1;
3431 for (; c >= us; c--) {
3432 x |= *c & 0x7f;
3433 x <<= 7;
3434 }
3435 } else {
3436 for (c = us; *c; c++) {
3437 x |= *c & 0x7f;
3438 if ((*c & 0x80) == 0)
3439 break;
3440 x <<= 7;
3441 }
3442 if (l)
3443 *l = c - us + 1;
3444 }
3445 return x;
3446 }
3447
3448
3449 /*
3450 * byteswap a single magic entry
3451 */
3452 private void
bs1(struct magic * m)3453 bs1(struct magic *m)
3454 {
3455 m->cont_level = swap2(m->cont_level);
3456 m->offset = swap4(CAST(uint32_t, m->offset));
3457 m->in_offset = swap4(CAST(uint32_t, m->in_offset));
3458 m->lineno = swap4(CAST(uint32_t, m->lineno));
3459 if (IS_STRING(m->type)) {
3460 m->str_range = swap4(m->str_range);
3461 m->str_flags = swap4(m->str_flags);
3462 }
3463 else {
3464 m->value.q = swap8(m->value.q);
3465 m->num_mask = swap8(m->num_mask);
3466 }
3467 }
3468
3469 protected size_t
file_pstring_length_size(struct magic_set * ms,const struct magic * m)3470 file_pstring_length_size(struct magic_set *ms, const struct magic *m)
3471 {
3472 switch (m->str_flags & PSTRING_LEN) {
3473 case PSTRING_1_LE:
3474 return 1;
3475 case PSTRING_2_LE:
3476 case PSTRING_2_BE:
3477 return 2;
3478 case PSTRING_4_LE:
3479 case PSTRING_4_BE:
3480 return 4;
3481 default:
3482 file_error(ms, 0, "corrupt magic file "
3483 "(bad pascal string length %d)",
3484 m->str_flags & PSTRING_LEN);
3485 return FILE_BADSIZE;
3486 }
3487 }
3488 protected size_t
file_pstring_get_length(struct magic_set * ms,const struct magic * m,const char * ss)3489 file_pstring_get_length(struct magic_set *ms, const struct magic *m,
3490 const char *ss)
3491 {
3492 size_t len = 0;
3493 const unsigned char *s = RCAST(const unsigned char *, ss);
3494 unsigned int s3, s2, s1, s0;
3495
3496 switch (m->str_flags & PSTRING_LEN) {
3497 case PSTRING_1_LE:
3498 len = *s;
3499 break;
3500 case PSTRING_2_LE:
3501 s0 = s[0];
3502 s1 = s[1];
3503 len = (s1 << 8) | s0;
3504 break;
3505 case PSTRING_2_BE:
3506 s0 = s[0];
3507 s1 = s[1];
3508 len = (s0 << 8) | s1;
3509 break;
3510 case PSTRING_4_LE:
3511 s0 = s[0];
3512 s1 = s[1];
3513 s2 = s[2];
3514 s3 = s[3];
3515 len = (s3 << 24) | (s2 << 16) | (s1 << 8) | s0;
3516 break;
3517 case PSTRING_4_BE:
3518 s0 = s[0];
3519 s1 = s[1];
3520 s2 = s[2];
3521 s3 = s[3];
3522 len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3;
3523 break;
3524 default:
3525 file_error(ms, 0, "corrupt magic file "
3526 "(bad pascal string length %d)",
3527 m->str_flags & PSTRING_LEN);
3528 return FILE_BADSIZE;
3529 }
3530
3531 if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) {
3532 size_t l = file_pstring_length_size(ms, m);
3533 if (l == FILE_BADSIZE)
3534 return l;
3535 len -= l;
3536 }
3537
3538 return len;
3539 }
3540
3541 protected int
file_magicfind(struct magic_set * ms,const char * name,struct mlist * v)3542 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3543 {
3544 uint32_t i, j;
3545 struct mlist *mlist, *ml;
3546
3547 mlist = ms->mlist[1];
3548
3549 for (ml = mlist->next; ml != mlist; ml = ml->next) {
3550 struct magic *ma = ml->magic;
3551 uint32_t nma = ml->nmagic;
3552 for (i = 0; i < nma; i++) {
3553 if (ma[i].type != FILE_NAME)
3554 continue;
3555 if (strcmp(ma[i].value.s, name) == 0) {
3556 v->magic = &ma[i];
3557 for (j = i + 1; j < nma; j++)
3558 if (ma[j].cont_level == 0)
3559 break;
3560 v->nmagic = j - i;
3561 return 0;
3562 }
3563 }
3564 }
3565 return -1;
3566 }
3567