2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice immediately at the beginning of the file, without modification,
11 * this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * apprentice - make one pass through /etc/magic, learning its secrets.
35 FILE_RCSID("@(#)$File: apprentice.c,v 1.262 2017/08/28 13:39:18 christos Exp $")
54 #if defined(HAVE_LIMITS_H)
59 #define MAXMAGIC_SIZE ((ssize_t)0x7fffffff)
61 #define MAXMAGIC_SIZE SSIZE_MAX
64 #define EATAB {while (isascii((unsigned char) *l) && \
65 isspace((unsigned char) *l)) ++l;}
66 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
67 tolower((unsigned char) (l)) : (l))
69 * Work around a bug in headers on Digital Unix.
70 * At least confirmed for: OSF1 V4.0 878
72 #if defined(__osf__) && defined(__DECC)
79 #define MAP_FAILED (void *) -1
86 #define ALLOC_CHUNK (size_t)10
87 #define ALLOC_INCR (size_t)200
89 #define MAP_TYPE_USER 0
90 #define MAP_TYPE_MALLOC 1
91 #define MAP_TYPE_MMAP 2
99 struct magic_entry_set
{
100 struct magic_entry
*me
;
109 struct magic
*magic
[MAGIC_SETS
];
110 uint32_t nmagic
[MAGIC_SETS
];
113 int file_formats
[FILE_NAMES_SIZE
];
114 const size_t file_nformats
= FILE_NAMES_SIZE
;
115 const char *file_names
[FILE_NAMES_SIZE
];
116 const size_t file_nnames
= FILE_NAMES_SIZE
;
118 private int getvalue(struct magic_set
*ms
, struct magic
*, const char **, int);
119 private int hextoint(int);
120 private const char *getstr(struct magic_set
*, struct magic
*, const char *,
122 private int parse(struct magic_set
*, struct magic_entry
*, const char *,
124 private void eatsize(const char **);
125 private int apprentice_1(struct magic_set
*, const char *, int);
126 private size_t apprentice_magic_strength(const struct magic
*);
127 private int apprentice_sort(const void *, const void *);
128 private void apprentice_list(struct mlist
*, int );
129 private struct magic_map
*apprentice_load(struct magic_set
*,
131 private struct mlist
*mlist_alloc(void);
132 private void mlist_free(struct mlist
*);
133 private void byteswap(struct magic
*, uint32_t);
134 private void bs1(struct magic
*);
135 private uint16_t swap2(uint16_t);
136 private uint32_t swap4(uint32_t);
137 private uint64_t swap8(uint64_t);
138 private char *mkdbname(struct magic_set
*, const char *, int);
139 private struct magic_map
*apprentice_buf(struct magic_set
*, struct magic
*,
141 private struct magic_map
*apprentice_map(struct magic_set
*, const char *);
142 private int check_buffer(struct magic_set
*, struct magic_map
*, const char *);
143 private void apprentice_unmap(struct magic_map
*);
144 private int apprentice_compile(struct magic_set
*, struct magic_map
*,
146 private int check_format_type(const char *, int, const char **);
147 private int check_format(struct magic_set
*, struct magic
*);
148 private int get_op(char);
149 private int parse_mime(struct magic_set
*, struct magic_entry
*, const char *);
150 private int parse_strength(struct magic_set
*, struct magic_entry
*, const char *);
151 private int parse_apple(struct magic_set
*, struct magic_entry
*, const char *);
152 private int parse_ext(struct magic_set
*, struct magic_entry
*, const char *);
155 private size_t magicsize
= sizeof(struct magic
);
157 private const char usg_hdr
[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
162 int (*fun
)(struct magic_set
*, struct magic_entry
*, const char *);
164 #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
166 DECLARE_FIELD(apple
),
168 DECLARE_FIELD(strength
),
175 int main(int, char *[]);
178 main(int argc
, char *argv
[])
181 struct magic_set
*ms
;
184 if ((progname
= strrchr(argv
[0], '/')) != NULL
)
190 (void)fprintf(stderr
, "Usage: %s file\n", progname
);
194 if ((ms
= magic_open(MAGIC_CHECK
)) == NULL
) {
195 (void)fprintf(stderr
, "%s: %s\n", progname
, strerror(errno
));
198 ret
= magic_compile(ms
, argv
[1]) == -1 ? 1 : 0;
200 (void)fprintf(stderr
, "%s: %s\n", progname
, magic_error(ms
));
204 #endif /* COMPILE_ONLY */
214 * XXX - the actual Single UNIX Specification says that "long" means "long",
215 * as in the C data type, but we treat it as meaning "4-byte integer".
216 * Given that the OS X version of file 5.04 did the same, I guess that passes
217 * the actual test; having "long" be dependent on how big a "long" is on
218 * the machine running "file" is silly.
220 static const struct type_tbl_s type_tbl
[] = {
221 # define XX(s) s, (sizeof(s) - 1)
222 # define XX_NULL "", 0
223 { XX("invalid"), FILE_INVALID
, FILE_FMT_NONE
},
224 { XX("byte"), FILE_BYTE
, FILE_FMT_NUM
},
225 { XX("short"), FILE_SHORT
, FILE_FMT_NUM
},
226 { XX("default"), FILE_DEFAULT
, FILE_FMT_NONE
},
227 { XX("long"), FILE_LONG
, FILE_FMT_NUM
},
228 { XX("string"), FILE_STRING
, FILE_FMT_STR
},
229 { XX("date"), FILE_DATE
, FILE_FMT_STR
},
230 { XX("beshort"), FILE_BESHORT
, FILE_FMT_NUM
},
231 { XX("belong"), FILE_BELONG
, FILE_FMT_NUM
},
232 { XX("bedate"), FILE_BEDATE
, FILE_FMT_STR
},
233 { XX("leshort"), FILE_LESHORT
, FILE_FMT_NUM
},
234 { XX("lelong"), FILE_LELONG
, FILE_FMT_NUM
},
235 { XX("ledate"), FILE_LEDATE
, FILE_FMT_STR
},
236 { XX("pstring"), FILE_PSTRING
, FILE_FMT_STR
},
237 { XX("ldate"), FILE_LDATE
, FILE_FMT_STR
},
238 { XX("beldate"), FILE_BELDATE
, FILE_FMT_STR
},
239 { XX("leldate"), FILE_LELDATE
, FILE_FMT_STR
},
240 { XX("regex"), FILE_REGEX
, FILE_FMT_STR
},
241 { XX("bestring16"), FILE_BESTRING16
, FILE_FMT_STR
},
242 { XX("lestring16"), FILE_LESTRING16
, FILE_FMT_STR
},
243 { XX("search"), FILE_SEARCH
, FILE_FMT_STR
},
244 { XX("medate"), FILE_MEDATE
, FILE_FMT_STR
},
245 { XX("meldate"), FILE_MELDATE
, FILE_FMT_STR
},
246 { XX("melong"), FILE_MELONG
, FILE_FMT_NUM
},
247 { XX("quad"), FILE_QUAD
, FILE_FMT_QUAD
},
248 { XX("lequad"), FILE_LEQUAD
, FILE_FMT_QUAD
},
249 { XX("bequad"), FILE_BEQUAD
, FILE_FMT_QUAD
},
250 { XX("qdate"), FILE_QDATE
, FILE_FMT_STR
},
251 { XX("leqdate"), FILE_LEQDATE
, FILE_FMT_STR
},
252 { XX("beqdate"), FILE_BEQDATE
, FILE_FMT_STR
},
253 { XX("qldate"), FILE_QLDATE
, FILE_FMT_STR
},
254 { XX("leqldate"), FILE_LEQLDATE
, FILE_FMT_STR
},
255 { XX("beqldate"), FILE_BEQLDATE
, FILE_FMT_STR
},
256 { XX("float"), FILE_FLOAT
, FILE_FMT_FLOAT
},
257 { XX("befloat"), FILE_BEFLOAT
, FILE_FMT_FLOAT
},
258 { XX("lefloat"), FILE_LEFLOAT
, FILE_FMT_FLOAT
},
259 { XX("double"), FILE_DOUBLE
, FILE_FMT_DOUBLE
},
260 { XX("bedouble"), FILE_BEDOUBLE
, FILE_FMT_DOUBLE
},
261 { XX("ledouble"), FILE_LEDOUBLE
, FILE_FMT_DOUBLE
},
262 { XX("leid3"), FILE_LEID3
, FILE_FMT_NUM
},
263 { XX("beid3"), FILE_BEID3
, FILE_FMT_NUM
},
264 { XX("indirect"), FILE_INDIRECT
, FILE_FMT_NUM
},
265 { XX("qwdate"), FILE_QWDATE
, FILE_FMT_STR
},
266 { XX("leqwdate"), FILE_LEQWDATE
, FILE_FMT_STR
},
267 { XX("beqwdate"), FILE_BEQWDATE
, FILE_FMT_STR
},
268 { XX("name"), FILE_NAME
, FILE_FMT_NONE
},
269 { XX("use"), FILE_USE
, FILE_FMT_NONE
},
270 { XX("clear"), FILE_CLEAR
, FILE_FMT_NONE
},
271 { XX("der"), FILE_DER
, FILE_FMT_STR
},
272 { XX_NULL
, FILE_INVALID
, FILE_FMT_NONE
},
276 * These are not types, and cannot be preceded by "u" to make them
279 static const struct type_tbl_s special_tbl
[] = {
280 { XX("der"), FILE_DER
, FILE_FMT_STR
},
281 { XX("name"), FILE_NAME
, FILE_FMT_STR
},
282 { XX("use"), FILE_USE
, FILE_FMT_STR
},
283 { XX_NULL
, FILE_INVALID
, FILE_FMT_NONE
},
289 get_type(const struct type_tbl_s
*tbl
, const char *l
, const char **t
)
291 const struct type_tbl_s
*p
;
293 for (p
= tbl
; p
->len
; p
++) {
294 if (strncmp(l
, p
->name
, p
->len
) == 0) {
304 get_standard_integer_type(const char *l
, const char **t
)
308 if (isalpha((unsigned char)l
[1])) {
321 * "dI", "dL", "uI", and "uL".
323 * XXX - the actual Single UNIX Specification says
324 * that "L" means "long", as in the C data type,
325 * but we treat it as meaning "4-byte integer".
326 * Given that the OS X version of file 5.04 did
327 * the same, I guess that passes the actual SUS
328 * validation suite; having "dL" be dependent on
329 * how big a "long" is on the machine running
339 /* "d{anything else}", "u{anything else}" */
343 } else if (isdigit((unsigned char)l
[1])) {
345 * "d{num}" and "u{num}"; we only support {num} values
346 * of 1, 2, 4, and 8 - the Single UNIX Specification
347 * doesn't say anything about whether arbitrary
348 * values should be supported, but both the Solaris 10
349 * and OS X Mountain Lion versions of file passed the
350 * Single UNIX Specification validation suite, and
351 * neither of them support values bigger than 8 or
352 * non-power-of-2 values.
354 if (isdigit((unsigned char)l
[2])) {
355 /* Multi-digit, so > 9 */
372 /* XXX - what about 3, 5, 6, or 7? */
378 * "d" or "u" by itself.
389 init_file_tables(void)
392 const struct type_tbl_s
*p
;
398 for (p
= type_tbl
; p
->len
; p
++) {
399 assert(p
->type
< FILE_NAMES_SIZE
);
400 file_names
[p
->type
] = p
->name
;
401 file_formats
[p
->type
] = p
->format
;
403 assert(p
- type_tbl
== FILE_NAMES_SIZE
);
407 add_mlist(struct mlist
*mlp
, struct magic_map
*map
, size_t idx
)
412 if ((ml
= CAST(struct mlist
*, malloc(sizeof(*ml
)))) == NULL
)
415 ml
->map
= idx
== 0 ? map
: NULL
;
416 ml
->magic
= map
->magic
[idx
];
417 ml
->nmagic
= map
->nmagic
[idx
];
419 mlp
->prev
->next
= ml
;
420 ml
->prev
= mlp
->prev
;
427 * Handle one file or directory.
430 apprentice_1(struct magic_set
*ms
, const char *fn
, int action
)
432 struct magic_map
*map
;
438 if (magicsize
!= FILE_MAGICSIZE
) {
439 file_error(ms
, 0, "magic element size %lu != %lu",
440 (unsigned long)sizeof(*map
->magic
[0]),
441 (unsigned long)FILE_MAGICSIZE
);
445 if (action
== FILE_COMPILE
) {
446 map
= apprentice_load(ms
, fn
, action
);
449 return apprentice_compile(ms
, map
, fn
);
453 map
= apprentice_map(ms
, fn
);
454 if (map
== (struct magic_map
*)-1)
457 if (ms
->flags
& MAGIC_CHECK
)
458 file_magwarn(ms
, "using regular magic file `%s'", fn
);
459 map
= apprentice_load(ms
, fn
, action
);
464 for (i
= 0; i
< MAGIC_SETS
; i
++) {
465 if (add_mlist(ms
->mlist
[i
], map
, i
) == -1) {
466 file_oomem(ms
, sizeof(*ml
));
471 if (action
== FILE_LIST
) {
472 for (i
= 0; i
< MAGIC_SETS
; i
++) {
473 printf("Set %" SIZE_T_FORMAT
"u:\nBinary patterns:\n",
475 apprentice_list(ms
->mlist
[i
], BINTEST
);
476 printf("Text patterns:\n");
477 apprentice_list(ms
->mlist
[i
], TEXTTEST
);
483 #endif /* COMPILE_ONLY */
487 file_ms_free(struct magic_set
*ms
)
492 for (i
= 0; i
< MAGIC_SETS
; i
++)
493 mlist_free(ms
->mlist
[i
]);
500 protected struct magic_set
*
501 file_ms_alloc(int flags
)
503 struct magic_set
*ms
;
506 if ((ms
= CAST(struct magic_set
*, calloc((size_t)1,
507 sizeof(struct magic_set
)))) == NULL
)
510 if (magic_setflags(ms
, flags
) == -1) {
515 ms
->o
.buf
= ms
->o
.pbuf
= NULL
;
516 len
= (ms
->c
.len
= 10) * sizeof(*ms
->c
.li
);
518 if ((ms
->c
.li
= CAST(struct level_info
*, malloc(len
))) == NULL
)
523 for (i
= 0; i
< MAGIC_SETS
; i
++)
525 ms
->file
= "unknown";
527 ms
->indir_max
= FILE_INDIR_MAX
;
528 ms
->name_max
= FILE_NAME_MAX
;
529 ms
->elf_shnum_max
= FILE_ELF_SHNUM_MAX
;
530 ms
->elf_phnum_max
= FILE_ELF_PHNUM_MAX
;
531 ms
->elf_notes_max
= FILE_ELF_NOTES_MAX
;
532 ms
->regex_max
= FILE_REGEX_MAX
;
533 ms
->bytes_max
= FILE_BYTES_MAX
;
541 apprentice_unmap(struct magic_map
*map
)
550 case MAP_TYPE_MALLOC
:
551 for (i
= 0; i
< MAGIC_SETS
; i
++) {
552 void *b
= map
->magic
[i
];
554 if (CAST(char *, b
) >= CAST(char *, p
) &&
555 CAST(char *, b
) <= CAST(char *, p
) + map
->len
)
563 if (map
->p
&& map
->p
!= MAP_FAILED
)
564 (void)munmap(map
->p
, map
->len
);
573 private struct mlist
*
577 if ((mlist
= CAST(struct mlist
*, calloc(1, sizeof(*mlist
)))) == NULL
) {
580 mlist
->next
= mlist
->prev
= mlist
;
585 mlist_free(struct mlist
*mlist
)
587 struct mlist
*ml
, *next
;
593 for (ml
= mlist
->next
; (next
= ml
->next
) != NULL
; ml
= next
) {
595 apprentice_unmap(CAST(struct magic_map
*, ml
->map
));
603 /* void **bufs: an array of compiled magic files */
605 buffer_apprentice(struct magic_set
*ms
, struct magic
**bufs
,
606 size_t *sizes
, size_t nbufs
)
610 struct magic_map
*map
;
615 (void)file_reset(ms
, 0);
619 for (i
= 0; i
< MAGIC_SETS
; i
++) {
620 mlist_free(ms
->mlist
[i
]);
621 if ((ms
->mlist
[i
] = mlist_alloc()) == NULL
) {
622 file_oomem(ms
, sizeof(*ms
->mlist
[i
]));
627 for (i
= 0; i
< nbufs
; i
++) {
628 map
= apprentice_buf(ms
, bufs
[i
], sizes
[i
]);
632 for (j
= 0; j
< MAGIC_SETS
; j
++) {
633 if (add_mlist(ms
->mlist
[j
], map
, j
) == -1) {
634 file_oomem(ms
, sizeof(*ml
));
642 for (i
= 0; i
< MAGIC_SETS
; i
++) {
643 mlist_free(ms
->mlist
[i
]);
650 /* const char *fn: list of magic files and directories */
652 file_apprentice(struct magic_set
*ms
, const char *fn
, int action
)
655 int file_err
, errs
= -1;
658 (void)file_reset(ms
, 0);
660 if ((fn
= magic_getpath(fn
, action
)) == NULL
)
665 if ((mfn
= strdup(fn
)) == NULL
) {
666 file_oomem(ms
, strlen(fn
));
670 for (i
= 0; i
< MAGIC_SETS
; i
++) {
671 mlist_free(ms
->mlist
[i
]);
672 if ((ms
->mlist
[i
] = mlist_alloc()) == NULL
) {
673 file_oomem(ms
, sizeof(*ms
->mlist
[i
]));
675 mlist_free(ms
->mlist
[i
]);
685 p
= strchr(fn
, PATHSEP
);
690 file_err
= apprentice_1(ms
, fn
, action
);
691 errs
= MAX(errs
, file_err
);
698 for (i
= 0; i
< MAGIC_SETS
; i
++) {
699 mlist_free(ms
->mlist
[i
]);
702 file_error(ms
, 0, "could not find any valid magic files!");
708 * Always leave the database loaded
710 if (action
== FILE_LOAD
)
713 for (i
= 0; i
< MAGIC_SETS
; i
++) {
714 mlist_free(ms
->mlist
[i
]);
726 file_error(ms
, 0, "Invalid action %d", action
);
732 * Compute the real length of a magic expression, for the purposes
733 * of determining how "strong" a magic expression is (approximating
734 * how specific its matches are):
735 * - magic characters count 0 unless escaped.
736 * - [] expressions count 1
737 * - {} expressions count 0
738 * - regular characters or escaped magic characters count 1
739 * - 0 length expressions count as one
742 nonmagic(const char *str
)
747 for (p
= str
; *p
; p
++)
749 case '\\': /* Escaped anything counts 1 */
754 case '?': /* Magic characters count 0 */
761 case '[': /* Bracketed expressions count 1 the ']' */
762 while (*p
&& *p
!= ']')
766 case '{': /* Braced expressions count 0 */
767 while (*p
&& *p
!= '}')
772 default: /* Anything else counts 1 */
777 return rv
== 0 ? 1 : rv
; /* Return at least 1 */
834 * Get weight of this magic entry, for sorting purposes.
837 apprentice_magic_strength(const struct magic
*m
)
840 size_t ts
, v
, val
= 2 * MULT
; /* baseline strength */
843 case FILE_DEFAULT
: /* make sure this sorts last */
844 if (m
->factor_op
!= FILE_FACTOR_OP_NONE
)
882 ts
= typesize(m
->type
);
883 if (ts
== (size_t)~0)
890 val
+= m
->vallen
* MULT
;
893 case FILE_BESTRING16
:
894 case FILE_LESTRING16
:
895 val
+= m
->vallen
* MULT
/ 2;
899 val
+= m
->vallen
* MAX(MULT
/ m
->vallen
, 1);
903 v
= nonmagic(m
->value
.s
);
904 val
+= v
* MAX(MULT
/ v
, 1);
917 (void)fprintf(stderr
, "Bad type %d\n", m
->type
);
922 case 'x': /* matches anything penalize */
923 case '!': /* matches almost anything penalize */
927 case '=': /* Exact match, prefer */
932 case '<': /* comparison match reduce strength */
937 case '&': /* masking bits, we could count them too */
942 (void)fprintf(stderr
, "Bad relation %c\n", m
->reln
);
946 if (val
== 0) /* ensure we only return 0 for FILE_DEFAULT */
949 switch (m
->factor_op
) {
950 case FILE_FACTOR_OP_NONE
:
952 case FILE_FACTOR_OP_PLUS
:
955 case FILE_FACTOR_OP_MINUS
:
958 case FILE_FACTOR_OP_TIMES
:
961 case FILE_FACTOR_OP_DIV
:
969 * Magic entries with no description get a bonus because they depend
970 * on subsequent magic entries to print something.
972 if (m
->desc
[0] == '\0')
978 * Sort callback for sorting entries by "strength" (basically length)
981 apprentice_sort(const void *a
, const void *b
)
983 const struct magic_entry
*ma
= CAST(const struct magic_entry
*, a
);
984 const struct magic_entry
*mb
= CAST(const struct magic_entry
*, b
);
985 size_t sa
= apprentice_magic_strength(ma
->mp
);
986 size_t sb
= apprentice_magic_strength(mb
->mp
);
996 * Shows sorted patterns list in the order which is used for the matching
999 apprentice_list(struct mlist
*mlist
, int mode
)
1001 uint32_t magindex
= 0;
1003 for (ml
= mlist
->next
; ml
!= mlist
; ml
= ml
->next
) {
1004 for (magindex
= 0; magindex
< ml
->nmagic
; magindex
++) {
1005 struct magic
*m
= &ml
->magic
[magindex
];
1006 if ((m
->flag
& mode
) != mode
) {
1007 /* Skip sub-tests */
1008 while (magindex
+ 1 < ml
->nmagic
&&
1009 ml
->magic
[magindex
+ 1].cont_level
!= 0)
1011 continue; /* Skip to next top-level test*/
1015 * Try to iterate over the tree until we find item with
1016 * description/mimetype.
1018 while (magindex
+ 1 < ml
->nmagic
&&
1019 ml
->magic
[magindex
+ 1].cont_level
!= 0 &&
1020 *ml
->magic
[magindex
].desc
== '\0' &&
1021 *ml
->magic
[magindex
].mimetype
== '\0')
1024 printf("Strength = %3" SIZE_T_FORMAT
"u@%u: %s [%s]\n",
1025 apprentice_magic_strength(m
),
1026 ml
->magic
[magindex
].lineno
,
1027 ml
->magic
[magindex
].desc
,
1028 ml
->magic
[magindex
].mimetype
);
1034 set_test_type(struct magic
*mstart
, struct magic
*m
)
1072 mstart
->flag
|= BINTEST
;
1076 case FILE_BESTRING16
:
1077 case FILE_LESTRING16
:
1078 /* Allow text overrides */
1079 if (mstart
->str_flags
& STRING_TEXTTEST
)
1080 mstart
->flag
|= TEXTTEST
;
1082 mstart
->flag
|= BINTEST
;
1086 /* Check for override */
1087 if (mstart
->str_flags
& STRING_BINTEST
)
1088 mstart
->flag
|= BINTEST
;
1089 if (mstart
->str_flags
& STRING_TEXTTEST
)
1090 mstart
->flag
|= TEXTTEST
;
1092 if (mstart
->flag
& (TEXTTEST
|BINTEST
))
1095 /* binary test if pattern is not text */
1096 if (file_looks_utf8(m
->value
.us
, (size_t)m
->vallen
, NULL
,
1098 mstart
->flag
|= BINTEST
;
1100 mstart
->flag
|= TEXTTEST
;
1103 /* can't deduce anything; we shouldn't see this at the
1108 /* invalid search type, but no need to complain here */
1114 addentry(struct magic_set
*ms
, struct magic_entry
*me
,
1115 struct magic_entry_set
*mset
)
1117 size_t i
= me
->mp
->type
== FILE_NAME
? 1 : 0;
1118 if (mset
[i
].count
== mset
[i
].max
) {
1119 struct magic_entry
*mp
;
1121 mset
[i
].max
+= ALLOC_INCR
;
1122 if ((mp
= CAST(struct magic_entry
*,
1123 realloc(mset
[i
].me
, sizeof(*mp
) * mset
[i
].max
))) ==
1125 file_oomem(ms
, sizeof(*mp
) * mset
[i
].max
);
1128 (void)memset(&mp
[mset
[i
].count
], 0, sizeof(*mp
) *
1132 mset
[i
].me
[mset
[i
].count
++] = *me
;
1133 memset(me
, 0, sizeof(*me
));
1138 * Load and parse one file.
1141 load_1(struct magic_set
*ms
, int action
, const char *fn
, int *errs
,
1142 struct magic_entry_set
*mset
)
1144 size_t lineno
= 0, llen
= 0;
1147 struct magic_entry me
;
1149 FILE *f
= fopen(ms
->file
= fn
, "r");
1151 if (errno
!= ENOENT
)
1152 file_error(ms
, errno
, "cannot read magic file `%s'",
1158 memset(&me
, 0, sizeof(me
));
1159 /* read and parse this file */
1160 for (ms
->line
= 1; (len
= getline(&line
, &llen
, f
)) != -1;
1162 if (len
== 0) /* null line, garbage, etc */
1164 if (line
[len
- 1] == '\n') {
1166 line
[len
- 1] = '\0'; /* delete newline */
1169 case '\0': /* empty, do not parse */
1170 case '#': /* comment, do not parse */
1173 if (line
[1] == ':') {
1176 for (i
= 0; bang
[i
].name
!= NULL
; i
++) {
1177 if ((size_t)(len
- 2) > bang
[i
].len
&&
1178 memcmp(bang
[i
].name
, line
+ 2,
1182 if (bang
[i
].name
== NULL
) {
1184 "Unknown !: entry `%s'", line
);
1188 if (me
.mp
== NULL
) {
1190 "No current entry for :!%s type",
1195 if ((*bang
[i
].fun
)(ms
, &me
,
1196 line
+ bang
[i
].len
+ 2) != 0) {
1205 switch (parse(ms
, &me
, line
, lineno
, action
)) {
1209 (void)addentry(ms
, &me
, mset
);
1218 (void)addentry(ms
, &me
, mset
);
1224 * parse a file or directory of files
1225 * const char *fn: name of magic file or directory
1228 cmpstrp(const void *p1
, const void *p2
)
1230 return strcmp(*(char *const *)p1
, *(char *const *)p2
);
1235 set_text_binary(struct magic_set
*ms
, struct magic_entry
*me
, uint32_t nme
,
1238 static const char text
[] = "text";
1239 static const char binary
[] = "binary";
1240 static const size_t len
= sizeof(text
);
1242 uint32_t i
= starttest
;
1245 set_test_type(me
[starttest
].mp
, me
[i
].mp
);
1246 if ((ms
->flags
& MAGIC_DEBUG
) == 0)
1248 (void)fprintf(stderr
, "%s%s%s: %s\n",
1250 me
[i
].mp
->mimetype
[0] == '\0' ? "" : "; ",
1251 me
[i
].mp
->desc
[0] ? me
[i
].mp
->desc
: "(no description)",
1252 me
[i
].mp
->flag
& BINTEST
? binary
: text
);
1253 if (me
[i
].mp
->flag
& BINTEST
) {
1254 char *p
= strstr(me
[i
].mp
->desc
, text
);
1255 if (p
&& (p
== me
[i
].mp
->desc
||
1256 isspace((unsigned char)p
[-1])) &&
1257 (p
+ len
- me
[i
].mp
->desc
== MAXstring
1258 || (p
[len
] == '\0' ||
1259 isspace((unsigned char)p
[len
]))))
1260 (void)fprintf(stderr
, "*** Possible "
1261 "binary test for text type\n");
1263 } while (++i
< nme
&& me
[i
].mp
->cont_level
!= 0);
1268 set_last_default(struct magic_set
*ms
, struct magic_entry
*me
, uint32_t nme
)
1271 for (i
= 0; i
< nme
; i
++) {
1272 if (me
[i
].mp
->cont_level
== 0 &&
1273 me
[i
].mp
->type
== FILE_DEFAULT
) {
1275 if (me
[i
].mp
->cont_level
== 0)
1279 ms
->line
= me
[i
].mp
->lineno
;
1281 "level 0 \"default\" did not sort last");
1289 coalesce_entries(struct magic_set
*ms
, struct magic_entry
*me
, uint32_t nme
,
1290 struct magic
**ma
, uint32_t *nma
)
1292 uint32_t i
, mentrycount
= 0;
1295 for (i
= 0; i
< nme
; i
++)
1296 mentrycount
+= me
[i
].cont_count
;
1298 slen
= sizeof(**ma
) * mentrycount
;
1299 if ((*ma
= CAST(struct magic
*, malloc(slen
))) == NULL
) {
1300 file_oomem(ms
, slen
);
1305 for (i
= 0; i
< nme
; i
++) {
1306 (void)memcpy(*ma
+ mentrycount
, me
[i
].mp
,
1307 me
[i
].cont_count
* sizeof(**ma
));
1308 mentrycount
+= me
[i
].cont_count
;
1315 magic_entry_free(struct magic_entry
*me
, uint32_t nme
)
1320 for (i
= 0; i
< nme
; i
++)
1325 private struct magic_map
*
1326 apprentice_load(struct magic_set
*ms
, const char *fn
, int action
)
1330 size_t files
= 0, maxfiles
= 0;
1331 char **filearr
= NULL
, *mfn
;
1333 struct magic_map
*map
;
1334 struct magic_entry_set mset
[MAGIC_SETS
];
1338 memset(mset
, 0, sizeof(mset
));
1339 ms
->flags
|= MAGIC_CHECK
; /* Enable checks for parsed files */
1342 if ((map
= CAST(struct magic_map
*, calloc(1, sizeof(*map
)))) == NULL
)
1344 file_oomem(ms
, sizeof(*map
));
1347 map
->type
= MAP_TYPE_MALLOC
;
1349 /* print silly verbose header for USG compat. */
1350 if (action
== FILE_CHECK
)
1351 (void)fprintf(stderr
, "%s\n", usg_hdr
);
1353 /* load directory or file */
1354 if (stat(fn
, &st
) == 0 && S_ISDIR(st
.st_mode
)) {
1360 while ((d
= readdir(dir
)) != NULL
) {
1361 if (d
->d_name
[0] == '.')
1363 if (asprintf(&mfn
, "%s/%s", fn
, d
->d_name
) < 0) {
1365 strlen(fn
) + strlen(d
->d_name
) + 2);
1370 if (stat(mfn
, &st
) == -1 || !S_ISREG(st
.st_mode
)) {
1374 if (files
>= maxfiles
) {
1376 maxfiles
= (maxfiles
+ 1) * 2;
1377 mlen
= maxfiles
* sizeof(*filearr
);
1378 if ((filearr
= CAST(char **,
1379 realloc(filearr
, mlen
))) == NULL
) {
1380 file_oomem(ms
, mlen
);
1387 filearr
[files
++] = mfn
;
1390 qsort(filearr
, files
, sizeof(*filearr
), cmpstrp
);
1391 for (i
= 0; i
< files
; i
++) {
1392 load_1(ms
, action
, filearr
[i
], &errs
, mset
);
1397 load_1(ms
, action
, fn
, &errs
, mset
);
1401 for (j
= 0; j
< MAGIC_SETS
; j
++) {
1402 /* Set types of tests */
1403 for (i
= 0; i
< mset
[j
].count
; ) {
1404 if (mset
[j
].me
[i
].mp
->cont_level
!= 0) {
1408 i
= set_text_binary(ms
, mset
[j
].me
, mset
[j
].count
, i
);
1411 qsort(mset
[j
].me
, mset
[j
].count
, sizeof(*mset
[j
].me
),
1415 * Make sure that any level 0 "default" line is last
1418 set_last_default(ms
, mset
[j
].me
, mset
[j
].count
);
1420 /* coalesce per file arrays into a single one */
1421 if (coalesce_entries(ms
, mset
[j
].me
, mset
[j
].count
,
1422 &map
->magic
[j
], &map
->nmagic
[j
]) == -1) {
1429 for (j
= 0; j
< MAGIC_SETS
; j
++)
1430 magic_entry_free(mset
[j
].me
, mset
[j
].count
);
1433 apprentice_unmap(map
);
1440 * extend the sign bit if the comparison is to be signed
1443 file_signextend(struct magic_set
*ms
, struct magic
*m
, uint64_t v
)
1445 if (!(m
->flag
& UNSIGNED
)) {
1448 * Do not remove the casts below. They are
1449 * vital. When later compared with the data,
1450 * the sign extension must have happened.
1453 v
= (signed char) v
;
1496 case FILE_BESTRING16
:
1497 case FILE_LESTRING16
:
1508 if (ms
->flags
& MAGIC_CHECK
)
1509 file_magwarn(ms
, "cannot happen: m->type=%d\n",
1518 string_modifier_check(struct magic_set
*ms
, struct magic
*m
)
1520 if ((ms
->flags
& MAGIC_CHECK
) == 0)
1523 if ((m
->type
!= FILE_REGEX
|| (m
->str_flags
& REGEX_LINE_COUNT
) == 0) &&
1524 (m
->type
!= FILE_PSTRING
&& (m
->str_flags
& PSTRING_LEN
) != 0)) {
1526 "'/BHhLl' modifiers are only allowed for pascal strings\n");
1530 case FILE_BESTRING16
:
1531 case FILE_LESTRING16
:
1532 if (m
->str_flags
!= 0) {
1534 "no modifiers allowed for 16-bit strings\n");
1540 if ((m
->str_flags
& REGEX_OFFSET_START
) != 0) {
1542 "'/%c' only allowed on regex and search\n",
1543 CHAR_REGEX_OFFSET_START
);
1548 if (m
->str_range
== 0) {
1550 "missing range; defaulting to %d\n",
1551 STRING_DEFAULT_RANGE
);
1552 m
->str_range
= STRING_DEFAULT_RANGE
;
1557 if ((m
->str_flags
& STRING_COMPACT_WHITESPACE
) != 0) {
1558 file_magwarn(ms
, "'/%c' not allowed on regex\n",
1559 CHAR_COMPACT_WHITESPACE
);
1562 if ((m
->str_flags
& STRING_COMPACT_OPTIONAL_WHITESPACE
) != 0) {
1563 file_magwarn(ms
, "'/%c' not allowed on regex\n",
1564 CHAR_COMPACT_OPTIONAL_WHITESPACE
);
1569 file_magwarn(ms
, "coding error: m->type=%d\n",
1589 return FILE_OPMINUS
;
1591 return FILE_OPMULTIPLY
;
1593 return FILE_OPDIVIDE
;
1595 return FILE_OPMODULO
;
1601 #ifdef ENABLE_CONDITIONALS
1603 get_cond(const char *l
, const char **t
)
1605 static const struct cond_tbl_s
{
1610 { "if", 2, COND_IF
},
1611 { "elif", 4, COND_ELIF
},
1612 { "else", 4, COND_ELSE
},
1613 { "", 0, COND_NONE
},
1615 const struct cond_tbl_s
*p
;
1617 for (p
= cond_tbl
; p
->len
; p
++) {
1618 if (strncmp(l
, p
->name
, p
->len
) == 0 &&
1619 isspace((unsigned char)l
[p
->len
])) {
1629 check_cond(struct magic_set
*ms
, int cond
, uint32_t cont_level
)
1632 last_cond
= ms
->c
.li
[cont_level
].last_cond
;
1636 if (last_cond
!= COND_NONE
&& last_cond
!= COND_ELIF
) {
1637 if (ms
->flags
& MAGIC_CHECK
)
1638 file_magwarn(ms
, "syntax error: `if'");
1641 last_cond
= COND_IF
;
1645 if (last_cond
!= COND_IF
&& last_cond
!= COND_ELIF
) {
1646 if (ms
->flags
& MAGIC_CHECK
)
1647 file_magwarn(ms
, "syntax error: `elif'");
1650 last_cond
= COND_ELIF
;
1654 if (last_cond
!= COND_IF
&& last_cond
!= COND_ELIF
) {
1655 if (ms
->flags
& MAGIC_CHECK
)
1656 file_magwarn(ms
, "syntax error: `else'");
1659 last_cond
= COND_NONE
;
1663 last_cond
= COND_NONE
;
1667 ms
->c
.li
[cont_level
].last_cond
= last_cond
;
1670 #endif /* ENABLE_CONDITIONALS */
1673 parse_indirect_modifier(struct magic_set
*ms
, struct magic
*m
, const char **lp
)
1675 const char *l
= *lp
;
1677 while (!isspace((unsigned char)*++l
))
1679 case CHAR_INDIRECT_RELATIVE
:
1680 m
->str_flags
|= INDIRECT_RELATIVE
;
1683 if (ms
->flags
& MAGIC_CHECK
)
1684 file_magwarn(ms
, "indirect modifier `%c' "
1694 parse_op_modifier(struct magic_set
*ms
, struct magic
*m
, const char **lp
,
1697 const char *l
= *lp
;
1703 val
= (uint64_t)strtoull(l
, &t
, 0);
1705 m
->num_mask
= file_signextend(ms
, m
, val
);
1711 parse_string_modifier(struct magic_set
*ms
, struct magic
*m
, const char **lp
)
1713 const char *l
= *lp
;
1717 while (!isspace((unsigned char)*++l
)) {
1719 case '0': case '1': case '2':
1720 case '3': case '4': case '5':
1721 case '6': case '7': case '8':
1723 if (have_range
&& (ms
->flags
& MAGIC_CHECK
))
1724 file_magwarn(ms
, "multiple ranges");
1726 m
->str_range
= CAST(uint32_t, strtoul(l
, &t
, 0));
1727 if (m
->str_range
== 0)
1728 file_magwarn(ms
, "zero range");
1731 case CHAR_COMPACT_WHITESPACE
:
1732 m
->str_flags
|= STRING_COMPACT_WHITESPACE
;
1734 case CHAR_COMPACT_OPTIONAL_WHITESPACE
:
1735 m
->str_flags
|= STRING_COMPACT_OPTIONAL_WHITESPACE
;
1737 case CHAR_IGNORE_LOWERCASE
:
1738 m
->str_flags
|= STRING_IGNORE_LOWERCASE
;
1740 case CHAR_IGNORE_UPPERCASE
:
1741 m
->str_flags
|= STRING_IGNORE_UPPERCASE
;
1743 case CHAR_REGEX_OFFSET_START
:
1744 m
->str_flags
|= REGEX_OFFSET_START
;
1747 m
->str_flags
|= STRING_BINTEST
;
1750 m
->str_flags
|= STRING_TEXTTEST
;
1753 m
->str_flags
|= STRING_TRIM
;
1755 case CHAR_PSTRING_1_LE
:
1756 #define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
1757 if (m
->type
!= FILE_PSTRING
)
1759 SET_LENGTH(PSTRING_1_LE
);
1761 case CHAR_PSTRING_2_BE
:
1762 if (m
->type
!= FILE_PSTRING
)
1764 SET_LENGTH(PSTRING_2_BE
);
1766 case CHAR_PSTRING_2_LE
:
1767 if (m
->type
!= FILE_PSTRING
)
1769 SET_LENGTH(PSTRING_2_LE
);
1771 case CHAR_PSTRING_4_BE
:
1772 if (m
->type
!= FILE_PSTRING
)
1774 SET_LENGTH(PSTRING_4_BE
);
1776 case CHAR_PSTRING_4_LE
:
1784 SET_LENGTH(PSTRING_4_LE
);
1786 case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF
:
1787 if (m
->type
!= FILE_PSTRING
)
1789 m
->str_flags
|= PSTRING_LENGTH_INCLUDES_ITSELF
;
1793 if (ms
->flags
& MAGIC_CHECK
)
1794 file_magwarn(ms
, "string modifier `%c' "
1798 /* allow multiple '/' for readability */
1799 if (l
[1] == '/' && !isspace((unsigned char)l
[2]))
1802 if (string_modifier_check(ms
, m
) == -1)
1812 * parse one line from magic file, put into magic[index++] if valid
1815 parse(struct magic_set
*ms
, struct magic_entry
*me
, const char *line
,
1816 size_t lineno
, int action
)
1818 #ifdef ENABLE_CONDITIONALS
1819 static uint32_t last_cont_level
= 0;
1823 const char *l
= line
;
1826 uint32_t cont_level
;
1835 ++l
; /* step over */
1838 #ifdef ENABLE_CONDITIONALS
1839 if (cont_level
== 0 || cont_level
> last_cont_level
)
1840 if (file_check_mem(ms
, cont_level
) == -1)
1842 last_cont_level
= cont_level
;
1844 if (cont_level
!= 0) {
1845 if (me
->mp
== NULL
) {
1846 file_magerror(ms
, "No current entry for continuation");
1849 if (me
->cont_count
== 0) {
1850 file_magerror(ms
, "Continuations present with 0 count");
1853 m
= &me
->mp
[me
->cont_count
- 1];
1854 diff
= (int32_t)cont_level
- (int32_t)m
->cont_level
;
1856 file_magwarn(ms
, "New continuation level %u is more "
1857 "than one larger than current level %u", cont_level
,
1859 if (me
->cont_count
== me
->max_count
) {
1861 size_t cnt
= me
->max_count
+ ALLOC_CHUNK
;
1862 if ((nm
= CAST(struct magic
*, realloc(me
->mp
,
1863 sizeof(*nm
) * cnt
))) == NULL
) {
1864 file_oomem(ms
, sizeof(*nm
) * cnt
);
1868 me
->max_count
= CAST(uint32_t, cnt
);
1870 m
= &me
->mp
[me
->cont_count
++];
1871 (void)memset(m
, 0, sizeof(*m
));
1872 m
->cont_level
= cont_level
;
1874 static const size_t len
= sizeof(*m
) * ALLOC_CHUNK
;
1877 if ((m
= CAST(struct magic
*, malloc(len
))) == NULL
) {
1878 file_oomem(ms
, len
);
1882 me
->max_count
= ALLOC_CHUNK
;
1883 (void)memset(m
, 0, sizeof(*m
));
1884 m
->factor_op
= FILE_FACTOR_OP_NONE
;
1888 m
->lineno
= CAST(uint32_t, lineno
);
1890 if (*l
== '&') { /* m->cont_level == 0 checked below. */
1891 ++l
; /* step over */
1895 ++l
; /* step over */
1897 if (m
->flag
& OFFADD
)
1898 m
->flag
= (m
->flag
& ~OFFADD
) | INDIROFFADD
;
1900 if (*l
== '&') { /* m->cont_level == 0 checked below */
1901 ++l
; /* step over */
1905 /* Indirect offsets are not valid at level 0. */
1906 if (m
->cont_level
== 0 && (m
->flag
& (OFFADD
| INDIROFFADD
))) {
1907 if (ms
->flags
& MAGIC_CHECK
)
1908 file_magwarn(ms
, "relative offset at level 0");
1912 /* get offset, then skip over it */
1913 m
->offset
= (uint32_t)strtoul(l
, &t
, 0);
1915 if (ms
->flags
& MAGIC_CHECK
)
1916 file_magwarn(ms
, "offset `%s' invalid", l
);
1921 if (m
->flag
& INDIR
) {
1922 m
->in_type
= FILE_LONG
;
1926 * read [.,lbs][+-]nnnnn)
1928 if (*l
== '.' || *l
== ',') {
1930 m
->in_op
|= FILE_OPSIGNED
;
1934 m
->in_type
= FILE_LELONG
;
1937 m
->in_type
= FILE_BELONG
;
1940 m
->in_type
= FILE_MELONG
;
1944 m
->in_type
= FILE_LESHORT
;
1948 m
->in_type
= FILE_BESHORT
;
1954 m
->in_type
= FILE_BYTE
;
1959 m
->in_type
= FILE_LEDOUBLE
;
1964 m
->in_type
= FILE_BEDOUBLE
;
1967 m
->in_type
= FILE_LEID3
;
1970 m
->in_type
= FILE_BEID3
;
1973 if (ms
->flags
& MAGIC_CHECK
)
1975 "indirect offset type `%c' invalid",
1983 m
->in_op
|= FILE_OPINVERSE
;
1986 if ((op
= get_op(*l
)) != -1) {
1991 m
->in_op
|= FILE_OPINDIRECT
;
1994 if (isdigit((unsigned char)*l
) || *l
== '-') {
1995 m
->in_offset
= (int32_t)strtol(l
, &t
, 0);
1997 if (ms
->flags
& MAGIC_CHECK
)
1999 "in_offset `%s' invalid", l
);
2005 ((m
->in_op
& FILE_OPINDIRECT
) && *l
++ != ')')) {
2006 if (ms
->flags
& MAGIC_CHECK
)
2008 "missing ')' in indirect offset");
2014 #ifdef ENABLE_CONDITIONALS
2015 m
->cond
= get_cond(l
, &l
);
2016 if (check_cond(ms
, m
->cond
, cont_level
) == -1)
2027 * Try it as a keyword type prefixed by "u"; match what
2028 * follows the "u". If that fails, try it as an SUS
2031 m
->type
= get_type(type_tbl
, l
+ 1, &l
);
2032 if (m
->type
== FILE_INVALID
) {
2034 * Not a keyword type; parse it as an SUS type,
2035 * 'u' possibly followed by a number or C/S/L.
2037 m
->type
= get_standard_integer_type(l
, &l
);
2039 /* It's unsigned. */
2040 if (m
->type
!= FILE_INVALID
)
2041 m
->flag
|= UNSIGNED
;
2044 * Try it as a keyword type. If that fails, try it as
2045 * an SUS integer type if it begins with "d" or as an
2046 * SUS string type if it begins with "s". In any case,
2047 * it's not unsigned.
2049 m
->type
= get_type(type_tbl
, l
, &l
);
2050 if (m
->type
== FILE_INVALID
) {
2052 * Not a keyword type; parse it as an SUS type,
2053 * either 'd' possibly followed by a number or
2054 * C/S/L, or just 's'.
2057 m
->type
= get_standard_integer_type(l
, &l
);
2058 else if (*l
== 's' && !isalpha((unsigned char)l
[1])) {
2059 m
->type
= FILE_STRING
;
2065 if (m
->type
== FILE_INVALID
) {
2066 /* Not found - try it as a special keyword. */
2067 m
->type
= get_type(special_tbl
, l
, &l
);
2070 if (m
->type
== FILE_INVALID
) {
2071 if (ms
->flags
& MAGIC_CHECK
)
2072 file_magwarn(ms
, "type `%s' invalid", l
);
2076 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
2077 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
2081 if (!IS_STRING(m
->type
))
2082 m
->mask_op
|= FILE_OPINVERSE
;
2083 else if (ms
->flags
& MAGIC_CHECK
)
2084 file_magwarn(ms
, "'~' invalid for string types");
2088 m
->str_flags
= m
->type
== FILE_PSTRING
? PSTRING_1_LE
: 0;
2089 if ((op
= get_op(*l
)) != -1) {
2090 if (IS_STRING(m
->type
)) {
2093 if (op
!= FILE_OPDIVIDE
) {
2094 if (ms
->flags
& MAGIC_CHECK
)
2096 "invalid string/indirect op: "
2101 if (m
->type
== FILE_INDIRECT
)
2102 r
= parse_indirect_modifier(ms
, m
, &l
);
2104 r
= parse_string_modifier(ms
, m
, &l
);
2108 parse_op_modifier(ms
, m
, &l
, op
);
2112 * We used to set mask to all 1's here, instead let's just not do
2113 * anything if mask = 0 (unless you have a better idea)
2123 if (ms
->flags
& MAGIC_CHECK
) {
2124 file_magwarn(ms
, "%c= not supported",
2131 /* Old-style anding: "0 byte &0x80 dynamically linked" */
2138 /* HP compat: ignore &= etc. */
2147 m
->reln
= '='; /* the default relation */
2148 if (*l
== 'x' && ((isascii((unsigned char)l
[1]) &&
2149 isspace((unsigned char)l
[1])) || !l
[1])) {
2156 * Grab the value part, except for an 'x' reln.
2158 if (m
->reln
!= 'x' && getvalue(ms
, m
, &l
, action
))
2162 * TODO finish this macro and start using it!
2163 * #define offsetcheck {if (offset > ms->bytes_max -1)
2164 * magwarn("offset too big"); }
2168 * Now get last part - the description
2174 } else if ((l
[0] == '\\') && (l
[1] == 'b')) {
2179 for (i
= 0; (m
->desc
[i
++] = *l
++) != '\0' && i
< sizeof(m
->desc
); )
2181 if (i
== sizeof(m
->desc
)) {
2182 m
->desc
[sizeof(m
->desc
) - 1] = '\0';
2183 if (ms
->flags
& MAGIC_CHECK
)
2184 file_magwarn(ms
, "description `%s' truncated", m
->desc
);
2188 * We only do this check while compiling, or if any of the magic
2189 * files were not compiled.
2191 if (ms
->flags
& MAGIC_CHECK
) {
2192 if (check_format(ms
, m
) == -1)
2195 #ifndef COMPILE_ONLY
2196 if (action
== FILE_CHECK
) {
2200 m
->mimetype
[0] = '\0'; /* initialise MIME type to none */
2205 * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
2209 parse_strength(struct magic_set
*ms
, struct magic_entry
*me
, const char *line
)
2211 const char *l
= line
;
2213 unsigned long factor
;
2214 struct magic
*m
= &me
->mp
[0];
2216 if (m
->factor_op
!= FILE_FACTOR_OP_NONE
) {
2218 "Current entry already has a strength type: %c %d",
2219 m
->factor_op
, m
->factor
);
2222 if (m
->type
== FILE_NAME
) {
2223 file_magwarn(ms
, "%s: Strength setting is not supported in "
2224 "\"name\" magic entries", m
->value
.s
);
2229 case FILE_FACTOR_OP_NONE
:
2230 case FILE_FACTOR_OP_PLUS
:
2231 case FILE_FACTOR_OP_MINUS
:
2232 case FILE_FACTOR_OP_TIMES
:
2233 case FILE_FACTOR_OP_DIV
:
2234 m
->factor_op
= *l
++;
2237 file_magwarn(ms
, "Unknown factor op `%c'", *l
);
2241 factor
= strtoul(l
, &el
, 0);
2243 file_magwarn(ms
, "Too large factor `%lu'", factor
);
2246 if (*el
&& !isspace((unsigned char)*el
)) {
2247 file_magwarn(ms
, "Bad factor `%s'", l
);
2250 m
->factor
= (uint8_t)factor
;
2251 if (m
->factor
== 0 && m
->factor_op
== FILE_FACTOR_OP_DIV
) {
2252 file_magwarn(ms
, "Cannot have factor op `%c' and factor %u",
2253 m
->factor_op
, m
->factor
);
2258 m
->factor_op
= FILE_FACTOR_OP_NONE
;
2264 goodchar(unsigned char x
, const char *extra
)
2266 return (isascii(x
) && isalnum(x
)) || strchr(extra
, x
);
2270 parse_extra(struct magic_set
*ms
, struct magic_entry
*me
, const char *line
,
2271 off_t off
, size_t len
, const char *name
, const char *extra
, int nt
)
2274 const char *l
= line
;
2275 struct magic
*m
= &me
->mp
[me
->cont_count
== 0 ? 0 : me
->cont_count
- 1];
2276 char *buf
= CAST(char *, CAST(void *, m
)) + off
;
2278 if (buf
[0] != '\0') {
2279 len
= nt
? strlen(buf
) : len
;
2280 file_magwarn(ms
, "Current entry already has a %s type "
2281 "`%.*s', new type `%s'", name
, (int)len
, buf
, l
);
2285 if (*m
->desc
== '\0') {
2286 file_magwarn(ms
, "Current entry does not yet have a "
2287 "description for adding a %s type", name
);
2292 for (i
= 0; *l
&& i
< len
&& goodchar(*l
, extra
); buf
[i
++] = *l
++)
2295 if (i
== len
&& *l
) {
2297 buf
[len
- 1] = '\0';
2298 if (ms
->flags
& MAGIC_CHECK
)
2299 file_magwarn(ms
, "%s type `%s' truncated %"
2300 SIZE_T_FORMAT
"u", name
, line
, i
);
2302 if (!isspace((unsigned char)*l
) && !goodchar(*l
, extra
))
2303 file_magwarn(ms
, "%s type `%s' has bad char '%c'",
2312 file_magerror(ms
, "Bad magic entry '%s'", line
);
2317 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2321 parse_apple(struct magic_set
*ms
, struct magic_entry
*me
, const char *line
)
2323 struct magic
*m
= &me
->mp
[0];
2325 return parse_extra(ms
, me
, line
,
2326 CAST(off_t
, offsetof(struct magic
, apple
)),
2327 sizeof(m
->apple
), "APPLE", "!+-./?", 0);
2331 * Parse a comma-separated list of extensions
2334 parse_ext(struct magic_set
*ms
, struct magic_entry
*me
, const char *line
)
2336 struct magic
*m
= &me
->mp
[0];
2338 return parse_extra(ms
, me
, line
,
2339 CAST(off_t
, offsetof(struct magic
, ext
)),
2340 sizeof(m
->ext
), "EXTENSION", ",!+-/@", 0);
2344 * parse a MIME annotation line from magic file, put into magic[index - 1]
2348 parse_mime(struct magic_set
*ms
, struct magic_entry
*me
, const char *line
)
2350 struct magic
*m
= &me
->mp
[0];
2352 return parse_extra(ms
, me
, line
,
2353 CAST(off_t
, offsetof(struct magic
, mimetype
)),
2354 sizeof(m
->mimetype
), "MIME", "+-/.", 1);
2358 check_format_type(const char *ptr
, int type
, const char **estr
)
2363 /* Missing format string; bad */
2364 *estr
= "missing format spec";
2368 switch (file_formats
[type
]) {
2403 #define CHECKLEN() do { \
2404 for (len = cnt = 0; isdigit((unsigned char)*ptr); ptr++, cnt++) \
2405 len = len * 10 + (*ptr - '0'); \
2406 if (cnt > 5 || len > 1024) \
2408 } while (/*CONSTCOND*/0)
2422 #ifdef STRICT_FORMAT /* "long" formats are int formats for us */
2423 /* so don't accept the 'l' modifier */
2440 * Don't accept h and hh modifiers. They make writing
2441 * magic entries more complicated, for very little benefit
2484 #ifdef STRICT_FORMAT
2495 case FILE_FMT_FLOAT
:
2496 case FILE_FMT_DOUBLE
:
2522 while (isdigit((unsigned char )*ptr
))
2526 while (isdigit((unsigned char )*ptr
))
2538 /* internal error */
2542 *estr
= "not valid";
2549 * Check that the optional printf format in description matches
2550 * the type of the magic.
2553 check_format(struct magic_set
*ms
, struct magic
*m
)
2558 for (ptr
= m
->desc
; *ptr
; ptr
++)
2562 /* No format string; ok */
2566 assert(file_nformats
== file_nnames
);
2568 if (m
->type
>= file_nformats
) {
2569 file_magwarn(ms
, "Internal error inconsistency between "
2570 "m->type and format strings");
2573 if (file_formats
[m
->type
] == FILE_FMT_NONE
) {
2574 file_magwarn(ms
, "No format string for `%s' with description "
2575 "`%s'", m
->desc
, file_names
[m
->type
]);
2580 if (check_format_type(ptr
, m
->type
, &estr
) == -1) {
2582 * TODO: this error message is unhelpful if the format
2583 * string is not one character long
2585 file_magwarn(ms
, "Printf format is %s for type "
2586 "`%s' in description `%s'", estr
,
2587 file_names
[m
->type
], m
->desc
);
2591 for (; *ptr
; ptr
++) {
2594 "Too many format strings (should have at most one) "
2595 "for `%s' with description `%s'",
2596 file_names
[m
->type
], m
->desc
);
2604 * Read a numeric value from a pointer, into the value union of a magic
2605 * pointer, according to the magic type. Update the string pointer to point
2606 * just after the number read. Return 0 for success, non-zero for failure.
2609 getvalue(struct magic_set
*ms
, struct magic
*m
, const char **p
, int action
)
2612 case FILE_BESTRING16
:
2613 case FILE_LESTRING16
:
2621 *p
= getstr(ms
, m
, *p
, action
== FILE_COMPILE
);
2623 if (ms
->flags
& MAGIC_CHECK
)
2624 file_magwarn(ms
, "cannot get string from `%s'",
2628 if (m
->type
== FILE_REGEX
) {
2630 int rc
= file_regcomp(&rx
, m
->value
.s
, REG_EXTENDED
);
2632 if (ms
->flags
& MAGIC_CHECK
)
2633 file_regerror(&rx
, rc
, ms
);
2642 if (m
->reln
!= 'x') {
2646 m
->value
.f
= strtof(*p
, &ep
);
2648 m
->value
.f
= (float)strtod(*p
, &ep
);
2657 if (m
->reln
!= 'x') {
2660 m
->value
.d
= strtod(*p
, &ep
);
2666 if (m
->reln
!= 'x') {
2670 ull
= (uint64_t)strtoull(*p
, &ep
, 0);
2671 m
->value
.q
= file_signextend(ms
, m
, ull
);
2673 file_magwarn(ms
, "Unparseable number `%s'", *p
);
2675 size_t ts
= typesize(m
->type
);
2679 if (ts
== (size_t)~0) {
2680 file_magwarn(ms
, "Expected numeric type got `%s'",
2681 type_tbl
[m
->type
].name
);
2683 for (q
= *p
; isspace((unsigned char)*q
); q
++)
2686 ull
= -(int64_t)ull
;
2692 x
= ull
& ~0xffffULL
;
2695 x
= ull
& ~0xffffffffULL
;
2704 file_magwarn(ms
, "Overflow for numeric type `%s' value %#" PRIx64
,
2705 type_tbl
[m
->type
].name
, ull
);
2718 * Convert a string containing C character escapes. Stop at an unescaped
2720 * Copy the converted version to "m->value.s", and the length in m->vallen.
2721 * Return updated scan pointer as function result. Warn if set.
2723 private const char *
2724 getstr(struct magic_set
*ms
, struct magic
*m
, const char *s
, int warn
)
2726 const char *origs
= s
;
2727 char *p
= m
->value
.s
;
2728 size_t plen
= sizeof(m
->value
.s
);
2730 char *pmax
= p
+ plen
- 1;
2734 while ((c
= *s
++) != '\0') {
2735 if (isspace((unsigned char) c
))
2738 file_error(ms
, 0, "string too long: `%s'", origs
);
2746 file_magwarn(ms
, "incomplete escape");
2753 "escaped tab found, use \\t instead");
2754 warn
= 0; /* already did */
2759 if (isprint((unsigned char)c
)) {
2760 /* Allow escaping of
2762 if (strchr("<>&^=!", c
) == NULL
2763 && (m
->type
!= FILE_REGEX
||
2764 strchr("[]().*?^$|{}", c
)
2766 file_magwarn(ms
, "no "
2772 "unknown escape sequence: "
2777 /* space, perhaps force people to use \040? */
2781 * Other things people escape, but shouldn't need to,
2782 * so we disallow them
2795 /* and baskslash itself */
2828 /* \ and up to 3 octal digits */
2838 c
= *s
++; /* try for 2 */
2839 if (c
>= '0' && c
<= '7') {
2840 val
= (val
<< 3) | (c
- '0');
2841 c
= *s
++; /* try for 3 */
2842 if (c
>= '0' && c
<= '7')
2843 val
= (val
<< 3) | (c
-'0');
2852 /* \x and up to 2 hex digits */
2854 val
= 'x'; /* Default if no digits */
2855 c
= hextoint(*s
++); /* Get next char */
2860 val
= (val
<< 4) + c
;
2874 m
->vallen
= CAST(unsigned char, (p
- origp
));
2875 if (m
->type
== FILE_PSTRING
)
2876 m
->vallen
+= (unsigned char)file_pstring_length_size(m
);
2881 /* Single hex char to int; -1 if not a hex char. */
2885 if (!isascii((unsigned char) c
))
2887 if (isdigit((unsigned char) c
))
2889 if ((c
>= 'a') && (c
<= 'f'))
2890 return c
+ 10 - 'a';
2891 if (( c
>= 'A') && (c
<= 'F'))
2892 return c
+ 10 - 'A';
2898 * Print a string containing C character escapes.
2901 file_showstr(FILE *fp
, const char *s
, size_t len
)
2916 if (c
>= 040 && c
<= 0176) /* TODO isprint && !iscntrl */
2917 (void) fputc(c
, fp
);
2919 (void) fputc('\\', fp
);
2922 (void) fputc('a', fp
);
2926 (void) fputc('b', fp
);
2930 (void) fputc('f', fp
);
2934 (void) fputc('n', fp
);
2938 (void) fputc('r', fp
);
2942 (void) fputc('t', fp
);
2946 (void) fputc('v', fp
);
2950 (void) fprintf(fp
, "%.3o", c
& 0377);
2958 * eatsize(): Eat the size spec from a number [eg. 10UL]
2961 eatsize(const char **p
)
2965 if (LOWCASE(*l
) == 'u')
2968 switch (LOWCASE(*l
)) {
2969 case 'l': /* long */
2970 case 's': /* short */
2971 case 'h': /* short */
2972 case 'b': /* char/byte */
2973 case 'c': /* char/byte */
2984 * handle a buffer containing a compiled file.
2986 private struct magic_map
*
2987 apprentice_buf(struct magic_set
*ms
, struct magic
*buf
, size_t len
)
2989 struct magic_map
*map
;
2991 if ((map
= CAST(struct magic_map
*, calloc(1, sizeof(*map
)))) == NULL
) {
2992 file_oomem(ms
, sizeof(*map
));
2997 map
->type
= MAP_TYPE_USER
;
2998 if (check_buffer(ms
, map
, "buffer") != 0) {
2999 apprentice_unmap(map
);
3006 * handle a compiled file.
3009 private struct magic_map
*
3010 apprentice_map(struct magic_set
*ms
, const char *fn
)
3014 char *dbname
= NULL
;
3015 struct magic_map
*map
;
3016 struct magic_map
*rv
= NULL
;
3019 if ((map
= CAST(struct magic_map
*, calloc(1, sizeof(*map
)))) == NULL
) {
3020 file_oomem(ms
, sizeof(*map
));
3023 map
->type
= MAP_TYPE_USER
; /* unspecified */
3025 dbname
= mkdbname(ms
, fn
, 0);
3029 if ((fd
= open(dbname
, O_RDONLY
|O_BINARY
)) == -1)
3032 if (fstat(fd
, &st
) == -1) {
3033 file_error(ms
, errno
, "cannot stat `%s'", dbname
);
3036 if (st
.st_size
< 8 || st
.st_size
> MAXMAGIC_SIZE
) {
3037 file_error(ms
, 0, "file `%s' is too %s", dbname
,
3038 st
.st_size
< 8 ? "small" : "large");
3042 map
->len
= (size_t)st
.st_size
;
3044 map
->type
= MAP_TYPE_MMAP
;
3045 if ((map
->p
= mmap(0, (size_t)st
.st_size
, PROT_READ
|PROT_WRITE
,
3046 MAP_PRIVATE
|MAP_FILE
, fd
, (off_t
)0)) == MAP_FAILED
) {
3047 file_error(ms
, errno
, "cannot map `%s'", dbname
);
3051 map
->type
= MAP_TYPE_MALLOC
;
3052 if ((map
->p
= CAST(void *, malloc(map
->len
))) == NULL
) {
3053 file_oomem(ms
, map
->len
);
3056 if (read(fd
, map
->p
, map
->len
) != (ssize_t
)map
->len
) {
3065 if (check_buffer(ms
, map
, dbname
) != 0) {
3066 rv
= (struct magic_map
*)-1;
3070 if (mprotect(map
->p
, (size_t)st
.st_size
, PROT_READ
) == -1) {
3071 file_error(ms
, errno
, "cannot mprotect `%s'", dbname
);
3082 apprentice_unmap(map
);
3088 check_buffer(struct magic_set
*ms
, struct magic_map
*map
, const char *dbname
)
3091 uint32_t entries
, nentries
;
3093 int i
, needsbyteswap
;
3095 ptr
= CAST(uint32_t *, map
->p
);
3096 if (*ptr
!= MAGICNO
) {
3097 if (swap4(*ptr
) != MAGICNO
) {
3098 file_error(ms
, 0, "bad magic in `%s'", dbname
);
3105 version
= swap4(ptr
[1]);
3108 if (version
!= VERSIONNO
) {
3109 file_error(ms
, 0, "File %s supports only version %d magic "
3110 "files. `%s' is version %d", VERSION
,
3111 VERSIONNO
, dbname
, version
);
3114 entries
= (uint32_t)(map
->len
/ sizeof(struct magic
));
3115 if ((entries
* sizeof(struct magic
)) != map
->len
) {
3116 file_error(ms
, 0, "Size of `%s' %" SIZE_T_FORMAT
"u is not "
3117 "a multiple of %" SIZE_T_FORMAT
"u",
3118 dbname
, map
->len
, sizeof(struct magic
));
3121 map
->magic
[0] = CAST(struct magic
*, map
->p
) + 1;
3123 for (i
= 0; i
< MAGIC_SETS
; i
++) {
3125 map
->nmagic
[i
] = swap4(ptr
[i
+ 2]);
3127 map
->nmagic
[i
] = ptr
[i
+ 2];
3128 if (i
!= MAGIC_SETS
- 1)
3129 map
->magic
[i
+ 1] = map
->magic
[i
] + map
->nmagic
[i
];
3130 nentries
+= map
->nmagic
[i
];
3132 if (entries
!= nentries
+ 1) {
3133 file_error(ms
, 0, "Inconsistent entries in `%s' %u != %u",
3134 dbname
, entries
, nentries
+ 1);
3138 for (i
= 0; i
< MAGIC_SETS
; i
++)
3139 byteswap(map
->magic
[i
], map
->nmagic
[i
]);
3144 * handle an mmaped file.
3147 apprentice_compile(struct magic_set
*ms
, struct magic_map
*map
, const char *fn
)
3149 static const size_t nm
= sizeof(*map
->nmagic
) * MAGIC_SETS
;
3150 static const size_t m
= sizeof(**map
->magic
);
3158 uint32_t h
[2 + MAGIC_SETS
];
3161 dbname
= mkdbname(ms
, fn
, 1);
3166 if ((fd
= open(dbname
, O_WRONLY
|O_CREAT
|O_TRUNC
|O_BINARY
, 0644)) == -1)
3168 file_error(ms
, errno
, "cannot open `%s'", dbname
);
3171 memset(&hdr
, 0, sizeof(hdr
));
3173 hdr
.h
[1] = VERSIONNO
;
3174 memcpy(hdr
.h
+ 2, map
->nmagic
, nm
);
3176 if (write(fd
, &hdr
, sizeof(hdr
)) != (ssize_t
)sizeof(hdr
)) {
3177 file_error(ms
, errno
, "error writing `%s'", dbname
);
3181 for (i
= 0; i
< MAGIC_SETS
; i
++) {
3182 len
= m
* map
->nmagic
[i
];
3183 if (write(fd
, map
->magic
[i
], len
) != (ssize_t
)len
) {
3184 file_error(ms
, errno
, "error writing `%s'", dbname
);
3193 apprentice_unmap(map
);
3198 private const char ext
[] = ".mgc";
3203 mkdbname(struct magic_set
*ms
, const char *fn
, int strip
)
3209 if ((p
= strrchr(fn
, '/')) != NULL
)
3213 for (q
= fn
; *q
; q
++)
3216 for (p
= ext
+ sizeof(ext
) - 1; p
>= ext
&& q
>= fn
; p
--, q
--)
3220 /* Did not find .mgc, restore q */
3226 /* Compatibility with old code that looked in .mime */
3227 if (ms
->flags
& MAGIC_MIME
) {
3228 if (asprintf(&buf
, "%.*s.mime%s", (int)(q
- fn
), fn
, ext
) < 0)
3230 if (access(buf
, R_OK
) != -1) {
3231 ms
->flags
&= MAGIC_MIME_TYPE
;
3236 if (asprintf(&buf
, "%.*s%s", (int)(q
- fn
), fn
, ext
) < 0)
3239 /* Compatibility with old code that looked in .mime */
3240 if (strstr(fn
, ".mime") != NULL
)
3241 ms
->flags
&= MAGIC_MIME_TYPE
;
3246 * Byteswap an mmap'ed file if needed
3249 byteswap(struct magic
*magic
, uint32_t nmagic
)
3252 for (i
= 0; i
< nmagic
; i
++)
3263 uint8_t *s
= (uint8_t *)(void *)&sv
;
3264 uint8_t *d
= (uint8_t *)(void *)&rv
;
3277 uint8_t *s
= (uint8_t *)(void *)&sv
;
3278 uint8_t *d
= (uint8_t *)(void *)&rv
;
3293 uint8_t *s
= (uint8_t *)(void *)&sv
;
3294 uint8_t *d
= (uint8_t *)(void *)&rv
;
3318 * byteswap a single magic entry
3321 bs1(struct magic
*m
)
3323 m
->cont_level
= swap2(m
->cont_level
);
3324 m
->offset
= swap4((uint32_t)m
->offset
);
3325 m
->in_offset
= swap4((uint32_t)m
->in_offset
);
3326 m
->lineno
= swap4((uint32_t)m
->lineno
);
3327 if (IS_STRING(m
->type
)) {
3328 m
->str_range
= swap4(m
->str_range
);
3329 m
->str_flags
= swap4(m
->str_flags
);
3332 m
->value
.q
= swap8(m
->value
.q
);
3333 m
->num_mask
= swap8(m
->num_mask
);
3338 file_pstring_length_size(const struct magic
*m
)
3340 switch (m
->str_flags
& PSTRING_LEN
) {
3350 abort(); /* Impossible */
3355 file_pstring_get_length(const struct magic
*m
, const char *ss
)
3358 const unsigned char *s
= (const unsigned char *)ss
;
3359 unsigned int s3
, s2
, s1
, s0
;
3361 switch (m
->str_flags
& PSTRING_LEN
) {
3368 len
= (s1
<< 8) | s0
;
3373 len
= (s0
<< 8) | s1
;
3380 len
= (s3
<< 24) | (s2
<< 16) | (s1
<< 8) | s0
;
3387 len
= (s0
<< 24) | (s1
<< 16) | (s2
<< 8) | s3
;
3390 abort(); /* Impossible */
3393 if (m
->str_flags
& PSTRING_LENGTH_INCLUDES_ITSELF
)
3394 len
-= file_pstring_length_size(m
);
3400 file_magicfind(struct magic_set
*ms
, const char *name
, struct mlist
*v
)
3403 struct mlist
*mlist
, *ml
;
3405 mlist
= ms
->mlist
[1];
3407 for (ml
= mlist
->next
; ml
!= mlist
; ml
= ml
->next
) {
3408 struct magic
*ma
= ml
->magic
;
3409 uint32_t nma
= ml
->nmagic
;
3410 for (i
= 0; i
< nma
; i
++) {
3411 if (ma
[i
].type
!= FILE_NAME
)
3413 if (strcmp(ma
[i
].value
.s
, name
) == 0) {
3415 for (j
= i
+ 1; j
< nma
; j
++)
3416 if (ma
[j
].cont_level
== 0)