2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice immediately at the beginning of the file, without modification,
11 * this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * softmagic - interpret variable magic from MAGIC
42 FILE_RCSID("@(#)$Id: softmagic.c,v 1.72 2004/11/24 17:38:25 christos Exp $")
45 private int match(struct magic_set
*, struct magic
*, uint32_t,
46 const unsigned char *, size_t);
47 private int mget(struct magic_set
*, union VALUETYPE
*, const unsigned char *,
48 struct magic
*, size_t);
49 private int mcheck(struct magic_set
*, union VALUETYPE
*, struct magic
*);
50 private int32_t mprint(struct magic_set
*, union VALUETYPE
*, struct magic
*);
51 private void mdebug(uint32_t, const char *, size_t);
52 private int mcopy(struct magic_set
*, union VALUETYPE
*, int, int,
53 const unsigned char *, size_t, size_t);
54 private int mconvert(struct magic_set
*, union VALUETYPE
*, struct magic
*);
55 private int check_mem(struct magic_set
*, unsigned int);
58 * softmagic - lookup one file in database
59 * (already read from MAGIC by apprentice.c).
60 * Passed the name and FILE * of one file to be typed.
62 /*ARGSUSED1*/ /* nbytes passed for regularity, maybe need later */
64 file_softmagic(struct magic_set
*ms
, const unsigned char *buf
, size_t nbytes
)
67 for (ml
= ms
->mlist
->next
; ml
!= ms
->mlist
; ml
= ml
->next
)
68 if (match(ms
, ml
->magic
, ml
->nmagic
, buf
, nbytes
))
75 * Go through the whole list, stopping if you find a match. Process all
76 * the continuations of that match before returning.
78 * We support multi-level continuations:
80 * At any time when processing a successful top-level match, there is a
81 * current continuation level; it represents the level of the last
82 * successfully matched continuation.
84 * Continuations above that level are skipped as, if we see one, it
85 * means that the continuation that controls them - i.e, the
86 * lower-level continuation preceding them - failed to match.
88 * Continuations below that level are processed as, if we see one,
89 * it means we've finished processing or skipping higher-level
90 * continuations under the control of a successful or unsuccessful
91 * lower-level continuation, and are now seeing the next lower-level
92 * continuation and should process it. The current continuation
93 * level reverts to the level of the one we're seeing.
95 * Continuations at the current level are processed as, if we see
96 * one, there's no lower-level continuation that may have failed.
98 * If a continuation matches, we bump the current continuation level
99 * so that higher-level continuations are processed.
102 match(struct magic_set
*ms
, struct magic
*magic
, uint32_t nmagic
,
103 const unsigned char *s
, size_t nbytes
)
105 uint32_t magindex
= 0;
106 unsigned int cont_level
= 0;
107 int need_separator
= 0;
110 int returnval
= 0; /* if a match is found it is set to 1*/
111 int firstline
= 1; /* a flag to print X\n X\n- X */
113 if (check_mem(ms
, cont_level
) == -1)
116 for (magindex
= 0; magindex
< nmagic
; magindex
++) {
117 /* if main entry matches, print it... */
118 int flush
= !mget(ms
, &p
, s
, &magic
[magindex
], nbytes
);
119 switch (mcheck(ms
, &p
, &magic
[magindex
])) {
130 * main entry didn't match,
131 * flush its continuations
133 while (magindex
< nmagic
- 1 &&
134 magic
[magindex
+ 1].cont_level
!= 0)
139 if (!firstline
) { /* we found another match */
140 /* put a newline and '-' to do some simple formatting*/
141 if (file_printf(ms
, "\n- ") == -1)
145 if ((ms
->c
.off
[cont_level
] = mprint(ms
, &p
, &magic
[magindex
]))
149 * If we printed something, we'll need to print
150 * a blank before we print something else.
152 if (magic
[magindex
].desc
[0])
154 /* and any continuations that match */
155 if (check_mem(ms
, ++cont_level
) == -1)
158 while (magic
[magindex
+1].cont_level
!= 0 &&
159 ++magindex
< nmagic
) {
160 if (cont_level
< magic
[magindex
].cont_level
)
162 if (cont_level
> magic
[magindex
].cont_level
) {
164 * We're at the end of the level
165 * "cont_level" continuations.
167 cont_level
= magic
[magindex
].cont_level
;
169 if (magic
[magindex
].flag
& OFFADD
) {
170 oldoff
=magic
[magindex
].offset
;
171 magic
[magindex
].offset
+= ms
->c
.off
[cont_level
-1];
173 if (!mget(ms
, &p
, s
, &magic
[magindex
], nbytes
))
176 switch (mcheck(ms
, &p
, &magic
[magindex
])) {
183 * This continuation matched.
184 * Print its message, with
185 * a blank before it if
186 * the previous item printed
187 * and this item isn't empty.
189 /* space if previous printed */
191 && (magic
[magindex
].nospflag
== 0)
192 && (magic
[magindex
].desc
[0] != '\0')) {
193 if (file_printf(ms
, " ") == -1)
197 if ((ms
->c
.off
[cont_level
] = mprint(ms
, &p
,
198 &magic
[magindex
])) == -1)
200 if (magic
[magindex
].desc
[0])
204 * If we see any continuations
208 if (check_mem(ms
, ++cont_level
) == -1)
212 if (magic
[magindex
].flag
& OFFADD
) {
213 magic
[magindex
].offset
= oldoff
;
218 if ((ms
->flags
& MAGIC_CONTINUE
) == 0) {
219 return 1; /* don't keep searching */
222 return returnval
; /* This is hit if -k is set or there is no match */
226 check_mem(struct magic_set
*ms
, unsigned int level
)
230 if (level
< ms
->c
.len
)
233 len
= (ms
->c
.len
+= 20) * sizeof(*ms
->c
.off
);
234 ms
->c
.off
= (ms
->c
.off
== NULL
) ? malloc(len
) : realloc(ms
->c
.off
, len
);
235 if (ms
->c
.off
!= NULL
)
242 mprint(struct magic_set
*ms
, union VALUETYPE
*p
, struct magic
*m
)
250 v
= file_signextend(ms
, m
, (size_t)p
->b
);
251 if (file_printf(ms
, m
->desc
, (unsigned char) v
) == -1)
253 t
= m
->offset
+ sizeof(char);
259 v
= file_signextend(ms
, m
, (size_t)p
->h
);
260 if (file_printf(ms
, m
->desc
, (unsigned short) v
) == -1)
262 t
= m
->offset
+ sizeof(short);
268 v
= file_signextend(ms
, m
, p
->l
);
269 if (file_printf(ms
, m
->desc
, (uint32_t) v
) == -1)
271 t
= m
->offset
+ sizeof(int32_t);
276 case FILE_BESTRING16
:
277 case FILE_LESTRING16
:
278 if (m
->reln
== '=') {
279 if (file_printf(ms
, m
->desc
, m
->value
.s
) == -1)
281 t
= m
->offset
+ strlen(m
->value
.s
);
284 if (*m
->value
.s
== '\0') {
285 char *cp
= strchr(p
->s
,'\n');
289 if (file_printf(ms
, m
->desc
, p
->s
) == -1)
291 t
= m
->offset
+ strlen(p
->s
);
298 if (file_printf(ms
, m
->desc
, file_fmttime(p
->l
, 1)) == -1)
300 t
= m
->offset
+ sizeof(time_t);
306 if (file_printf(ms
, m
->desc
, file_fmttime(p
->l
, 0)) == -1)
308 t
= m
->offset
+ sizeof(time_t);
311 if (file_printf(ms
, m
->desc
, p
->s
) == -1)
313 t
= m
->offset
+ strlen(p
->s
);
317 file_error(ms
, 0, "invalid m->type (%d) in mprint()", m
->type
);
324 * Convert the byte order of the data we are looking at
325 * While we're here, let's apply the mask operation
326 * (unless you have a better idea)
329 mconvert(struct magic_set
*ms
, union VALUETYPE
*p
, struct magic
*m
)
334 switch (m
->mask_op
&0x7F) {
350 case FILE_OPMULTIPLY
:
360 if (m
->mask_op
& FILE_OPINVERSE
)
365 switch (m
->mask_op
&0x7F) {
381 case FILE_OPMULTIPLY
:
391 if (m
->mask_op
& FILE_OPINVERSE
)
398 switch (m
->mask_op
&0x7F) {
414 case FILE_OPMULTIPLY
:
424 if (m
->mask_op
& FILE_OPINVERSE
)
428 case FILE_BESTRING16
:
429 case FILE_LESTRING16
:
433 /* Null terminate and eat *trailing* return */
434 p
->s
[sizeof(p
->s
) - 1] = '\0';
436 if (len
-- && p
->s
[len
] == '\n')
442 char *ptr1
= p
->s
, *ptr2
= ptr1
+ 1;
444 if (len
>= sizeof(p
->s
))
445 len
= sizeof(p
->s
) - 1;
450 if (len
-- && p
->s
[len
] == '\n')
455 p
->h
= (short)((p
->hs
[0]<<8)|(p
->hs
[1]));
457 switch (m
->mask_op
&0x7F) {
473 case FILE_OPMULTIPLY
:
483 if (m
->mask_op
& FILE_OPINVERSE
)
490 ((p
->hl
[0]<<24)|(p
->hl
[1]<<16)|(p
->hl
[2]<<8)|(p
->hl
[3]));
492 switch (m
->mask_op
&0x7F) {
508 case FILE_OPMULTIPLY
:
518 if (m
->mask_op
& FILE_OPINVERSE
)
522 p
->h
= (short)((p
->hs
[1]<<8)|(p
->hs
[0]));
524 switch (m
->mask_op
&0x7F) {
540 case FILE_OPMULTIPLY
:
550 if (m
->mask_op
& FILE_OPINVERSE
)
557 ((p
->hl
[3]<<24)|(p
->hl
[2]<<16)|(p
->hl
[1]<<8)|(p
->hl
[0]));
559 switch (m
->mask_op
&0x7F) {
575 case FILE_OPMULTIPLY
:
585 if (m
->mask_op
& FILE_OPINVERSE
)
591 file_error(ms
, 0, "invalid type %d in mconvert()", m
->type
);
598 mdebug(uint32_t offset
, const char *str
, size_t len
)
600 (void) fprintf(stderr
, "mget @%d: ", offset
);
601 file_showstr(stderr
, str
, len
);
602 (void) fputc('\n', stderr
);
603 (void) fputc('\n', stderr
);
607 mcopy(struct magic_set
*ms
, union VALUETYPE
*p
, int type
, int indir
,
608 const unsigned char *s
, size_t offset
, size_t nbytes
)
610 if (type
== FILE_REGEX
&& indir
== 0) {
612 * offset is interpreted as last line to search,
613 * (starting at 1), not as bytes-from start-of-file
615 unsigned char *b
, *last
= NULL
;
616 if ((p
->buf
= strdup((const char *)s
)) == NULL
) {
620 for (b
= (unsigned char *)p
->buf
; offset
&&
621 (b
= (unsigned char *)strchr((char *)b
, '\n')) != NULL
;
629 if (indir
== 0 && (type
== FILE_BESTRING16
|| type
== FILE_LESTRING16
))
631 const char *src
= s
+ offset
;
632 const char *esrc
= s
+ nbytes
;
633 char *dst
= p
->s
, *edst
= &p
->s
[sizeof(p
->s
) - 1];
635 if (type
== FILE_BESTRING16
)
638 for (;src
< esrc
; src
++, dst
++) {
650 if (offset
>= nbytes
) {
651 (void)memset(p
, '\0', sizeof(*p
));
654 if (nbytes
- offset
< sizeof(*p
))
655 nbytes
= nbytes
- offset
;
659 (void)memcpy(p
, s
+ offset
, nbytes
);
662 * the usefulness of padding with zeroes eludes me, it
663 * might even cause problems
665 if (nbytes
< sizeof(*p
))
666 (void)memset(((char *)p
) + nbytes
, '\0', sizeof(*p
) - nbytes
);
671 mget(struct magic_set
*ms
, union VALUETYPE
*p
, const unsigned char *s
,
672 struct magic
*m
, size_t nbytes
)
674 uint32_t offset
= m
->offset
;
676 if (mcopy(ms
, p
, m
->type
, m
->flag
& INDIR
, s
, offset
, nbytes
) == -1)
679 /* Verify we have enough data to match magic type */
682 if (nbytes
< (offset
+ 1)) /* should alway be true */
689 if (nbytes
< (offset
+ 2))
702 if (nbytes
< (offset
+ 4))
708 if (nbytes
< (offset
+ m
->vallen
))
713 if ((ms
->flags
& MAGIC_DEBUG
) != 0) {
714 mdebug(offset
, (char *)(void *)p
, sizeof(union VALUETYPE
));
718 if (m
->flag
& INDIR
) {
719 switch (m
->in_type
) {
722 switch (m
->in_op
&0x7F) {
724 offset
= p
->b
& m
->in_offset
;
727 offset
= p
->b
| m
->in_offset
;
730 offset
= p
->b
^ m
->in_offset
;
733 offset
= p
->b
+ m
->in_offset
;
736 offset
= p
->b
- m
->in_offset
;
738 case FILE_OPMULTIPLY
:
739 offset
= p
->b
* m
->in_offset
;
742 offset
= p
->b
/ m
->in_offset
;
745 offset
= p
->b
% m
->in_offset
;
750 if (m
->in_op
& FILE_OPINVERSE
)
755 switch (m
->in_op
& 0x7F) {
757 offset
= (short)((p
->hs
[0]<<8)|
762 offset
= (short)((p
->hs
[0]<<8)|
767 offset
= (short)((p
->hs
[0]<<8)|
772 offset
= (short)((p
->hs
[0]<<8)|
777 offset
= (short)((p
->hs
[0]<<8)|
781 case FILE_OPMULTIPLY
:
782 offset
= (short)((p
->hs
[0]<<8)|
787 offset
= (short)((p
->hs
[0]<<8)|
792 offset
= (short)((p
->hs
[0]<<8)|
798 offset
= (short)((p
->hs
[0]<<8)|
800 if (m
->in_op
& FILE_OPINVERSE
)
805 switch (m
->in_op
& 0x7F) {
807 offset
= (short)((p
->hs
[1]<<8)|
812 offset
= (short)((p
->hs
[1]<<8)|
817 offset
= (short)((p
->hs
[1]<<8)|
822 offset
= (short)((p
->hs
[1]<<8)|
827 offset
= (short)((p
->hs
[1]<<8)|
831 case FILE_OPMULTIPLY
:
832 offset
= (short)((p
->hs
[1]<<8)|
837 offset
= (short)((p
->hs
[1]<<8)|
842 offset
= (short)((p
->hs
[1]<<8)|
848 offset
= (short)((p
->hs
[1]<<8)|
850 if (m
->in_op
& FILE_OPINVERSE
)
855 switch (m
->in_op
& 0x7F) {
857 offset
= p
->h
& m
->in_offset
;
860 offset
= p
->h
| m
->in_offset
;
863 offset
= p
->h
^ m
->in_offset
;
866 offset
= p
->h
+ m
->in_offset
;
869 offset
= p
->h
- m
->in_offset
;
871 case FILE_OPMULTIPLY
:
872 offset
= p
->h
* m
->in_offset
;
875 offset
= p
->h
/ m
->in_offset
;
878 offset
= p
->h
% m
->in_offset
;
884 if (m
->in_op
& FILE_OPINVERSE
)
889 switch (m
->in_op
& 0x7F) {
891 offset
= (int32_t)((p
->hl
[0]<<24)|
898 offset
= (int32_t)((p
->hl
[0]<<24)|
905 offset
= (int32_t)((p
->hl
[0]<<24)|
912 offset
= (int32_t)((p
->hl
[0]<<24)|
919 offset
= (int32_t)((p
->hl
[0]<<24)|
925 case FILE_OPMULTIPLY
:
926 offset
= (int32_t)((p
->hl
[0]<<24)|
933 offset
= (int32_t)((p
->hl
[0]<<24)|
940 offset
= (int32_t)((p
->hl
[0]<<24)|
948 offset
= (int32_t)((p
->hl
[0]<<24)|
952 if (m
->in_op
& FILE_OPINVERSE
)
957 switch (m
->in_op
& 0x7F) {
959 offset
= (int32_t)((p
->hl
[3]<<24)|
966 offset
= (int32_t)((p
->hl
[3]<<24)|
973 offset
= (int32_t)((p
->hl
[3]<<24)|
980 offset
= (int32_t)((p
->hl
[3]<<24)|
987 offset
= (int32_t)((p
->hl
[3]<<24)|
993 case FILE_OPMULTIPLY
:
994 offset
= (int32_t)((p
->hl
[3]<<24)|
1001 offset
= (int32_t)((p
->hl
[3]<<24)|
1008 offset
= (int32_t)((p
->hl
[3]<<24)|
1016 offset
= (int32_t)((p
->hl
[3]<<24)|
1020 if (m
->in_op
& FILE_OPINVERSE
)
1025 switch (m
->in_op
& 0x7F) {
1027 offset
= p
->l
& m
->in_offset
;
1030 offset
= p
->l
| m
->in_offset
;
1033 offset
= p
->l
^ m
->in_offset
;
1036 offset
= p
->l
+ m
->in_offset
;
1039 offset
= p
->l
- m
->in_offset
;
1041 case FILE_OPMULTIPLY
:
1042 offset
= p
->l
* m
->in_offset
;
1045 offset
= p
->l
/ m
->in_offset
;
1048 offset
= p
->l
% m
->in_offset
;
1050 /* case TOOMANYSWITCHBLOCKS:
1051 * ugh = p->eye % m->strain;
1054 * off = p->tab & m->in_gest;
1060 if (m
->in_op
& FILE_OPINVERSE
)
1065 if (mcopy(ms
, p
, m
->type
, 0, s
, offset
, nbytes
) == -1)
1068 if ((ms
->flags
& MAGIC_DEBUG
) != 0) {
1069 mdebug(offset
, (char *)(void *)p
,
1070 sizeof(union VALUETYPE
));
1074 if (!mconvert(ms
, p
, m
))
1080 mcheck(struct magic_set
*ms
, union VALUETYPE
*p
, struct magic
*m
)
1082 uint32_t l
= m
->value
.l
;
1086 if ( (m
->value
.s
[0] == 'x') && (m
->value
.s
[1] == '\0') ) {
1115 case FILE_BESTRING16
:
1116 case FILE_LESTRING16
:
1120 * What we want here is:
1121 * v = strncmp(m->value.s, p->s, m->vallen);
1122 * but ignoring any nulls. bcmp doesn't give -/+/0
1123 * and isn't universally available anyway.
1125 unsigned char *a
= (unsigned char*)m
->value
.s
;
1126 unsigned char *b
= (unsigned char*)p
->s
;
1127 int len
= m
->vallen
;
1130 if (0L == m
->mask
) { /* normal string: do it fast */
1132 if ((v
= *b
++ - *a
++) != '\0')
1134 } else { /* combine the others */
1135 while (--len
>= 0) {
1136 if ((m
->mask
& STRING_IGNORE_LOWERCASE
) &&
1138 if ((v
= tolower(*b
++) - *a
++) != '\0')
1140 } else if ((m
->mask
& STRING_COMPACT_BLANK
) &&
1143 if (isspace(*b
++)) {
1150 } else if (isspace(*a
) &&
1151 (m
->mask
& STRING_COMPACT_OPTIONAL_BLANK
)) {
1156 if ((v
= *b
++ - *a
++) != '\0')
1169 rc
= regcomp(&rx
, m
->value
.s
, REG_EXTENDED
|REG_NOSUB
);
1172 regerror(rc
, &rx
, errmsg
, sizeof(errmsg
));
1173 file_error(ms
, 0, "regex error %d, (%s)", rc
, errmsg
);
1176 rc
= regexec(&rx
, p
->buf
, 0, 0, 0);
1183 file_error(ms
, 0, "invalid type %d in mcheck()", m
->type
);
1187 if (m
->type
!= FILE_STRING
&& m
->type
!= FILE_PSTRING
)
1188 v
= file_signextend(ms
, m
, v
);
1192 if ((ms
->flags
& MAGIC_DEBUG
) != 0)
1193 (void) fprintf(stderr
, "%u == *any* = 1\n", v
);
1199 if ((ms
->flags
& MAGIC_DEBUG
) != 0)
1200 (void) fprintf(stderr
, "%u != %u = %d\n",
1206 if ((ms
->flags
& MAGIC_DEBUG
) != 0)
1207 (void) fprintf(stderr
, "%u == %u = %d\n",
1212 if (m
->flag
& UNSIGNED
) {
1214 if ((ms
->flags
& MAGIC_DEBUG
) != 0)
1215 (void) fprintf(stderr
, "%u > %u = %d\n",
1219 matched
= (int32_t) v
> (int32_t) l
;
1220 if ((ms
->flags
& MAGIC_DEBUG
) != 0)
1221 (void) fprintf(stderr
, "%d > %d = %d\n",
1227 if (m
->flag
& UNSIGNED
) {
1229 if ((ms
->flags
& MAGIC_DEBUG
) != 0)
1230 (void) fprintf(stderr
, "%u < %u = %d\n",
1234 matched
= (int32_t) v
< (int32_t) l
;
1235 if ((ms
->flags
& MAGIC_DEBUG
) != 0)
1236 (void) fprintf(stderr
, "%d < %d = %d\n",
1242 matched
= (v
& l
) == l
;
1243 if ((ms
->flags
& MAGIC_DEBUG
) != 0)
1244 (void) fprintf(stderr
, "((%x & %x) == %x) = %d\n",
1249 matched
= (v
& l
) != l
;
1250 if ((ms
->flags
& MAGIC_DEBUG
) != 0)
1251 (void) fprintf(stderr
, "((%x & %x) != %x) = %d\n",
1257 file_error(ms
, 0, "cannot happen: invalid relation `%c'",