1 ;;; bindat.el --- binary data structure packing and unpacking.
3 ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
5 ;; Author: Kim F. Storm <storm@cua.dk>
6 ;; Assignment name: struct.el
7 ;; Keywords: comm data processes
9 ;; This file is part of GNU Emacs.
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 3, or (at your option)
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24 ;; Boston, MA 02110-1301, USA.
28 ;; Packing and unpacking of (binary) data structures.
30 ;; The data formats used in binary files and network protocols are
31 ;; often structed data which can be described by a C-style structure
32 ;; such as the one shown below. Using the bindat package, decoding
33 ;; and encoding binary data formats like these is made simple using a
34 ;; structure specification which closely resembles the C style
35 ;; structure declarations.
37 ;; Encoded (binary) data is stored in a unibyte string or vector,
38 ;; while the decoded data is stored in an alist with (FIELD . VALUE)
43 ;; Consider the following C structures:
46 ;; unsigned long dest_ip;
47 ;; unsigned long src_ip;
48 ;; unsigned short dest_port;
49 ;; unsigned short src_port;
53 ;; unsigned char type;
54 ;; unsigned char opcode;
55 ;; unsigned long length; /* In little endian order */
56 ;; unsigned char id[8]; /* nul-terminated string */
57 ;; unsigned char data[/* (length + 3) & ~3 */];
61 ;; struct header header;
62 ;; unsigned char items;
63 ;; unsigned char filler[3];
64 ;; struct data item[/* items */];
67 ;; The corresponding Lisp bindat specification looks like this:
69 ;; (setq header-bindat-spec
75 ;; (setq data-bindat-spec
78 ;; (length u16r) ;; little endian order
80 ;; (data vec (length))
83 ;; (setq packet-bindat-spec
84 ;; '((header struct header-bindat-spec)
87 ;; (item repeat (items)
88 ;; (struct data-bindat-spec))))
91 ;; A binary data representation may look like
92 ;; [ 192 168 1 100 192 168 1 101 01 28 21 32 2 0 0 0
93 ;; 2 3 5 0 ?A ?B ?C ?D ?E ?F 0 0 1 2 3 4 5 0 0 0
94 ;; 1 4 7 0 ?B ?C ?D ?E ?F ?G 0 0 6 7 8 9 10 11 12 0 ]
96 ;; The corresponding decoded structure looks like
99 ;; (dest-ip . [192 168 1 100])
100 ;; (src-ip . [192 168 1 101])
102 ;; (src-port . 5408))
104 ;; (item ((data . [1 2 3 4 5])
109 ;; ((data . [6 7 8 9 10 11 12])
115 ;; To access a specific value in this structure, use the function
116 ;; bindat-get-field with the structure as first arg followed by a list
117 ;; of field names and array indexes, e.g. using the data above,
118 ;; (bindat-get-field decoded-structure 'item 1 'id)
121 ;; Binary Data Structure Specification Format
122 ;; ------------------------------------------
124 ;; We recommend using names that end in `-bindat-spec'; such names
125 ;; are recognized automatically as "risky" variables.
127 ;; The data specification is formatted as follows:
129 ;; SPEC ::= ( ITEM... )
131 ;; ITEM ::= ( [FIELD] TYPE )
132 ;; | ( [FIELD] eval FORM ) -- eval FORM for side-effect only
133 ;; | ( [FIELD] fill LEN ) -- skip LEN bytes
134 ;; | ( [FIELD] align LEN ) -- skip to next multiple of LEN bytes
135 ;; | ( [FIELD] struct SPEC_NAME )
136 ;; | ( [FIELD] union TAG_VAL (TAG SPEC)... [(t SPEC)] )
137 ;; | ( [FIELD] repeat COUNT ITEM... )
139 ;; -- In (eval EXPR), the value of the last field is available in
140 ;; the dynamically bound variable `last'.
142 ;; TYPE ::= ( eval EXPR ) -- interpret result as TYPE
143 ;; | u8 | byte -- length 1
144 ;; | u16 | word | short -- length 2, network byte order
145 ;; | u24 -- 3-byte value
146 ;; | u32 | dword | long -- length 4, network byte order
147 ;; | u16r | u24r | u32r -- little endian byte order.
148 ;; | str LEN -- LEN byte string
149 ;; | strz LEN -- LEN byte (zero-terminated) string
150 ;; | vec LEN [TYPE] -- vector of LEN items of TYPE (default: u8)
151 ;; | ip -- 4 byte vector
152 ;; | bits LEN -- List with bits set in LEN bytes.
154 ;; -- Note: 32 bit values may be limited by emacs' INTEGER
155 ;; implementation limits.
157 ;; -- Example: `bits 2' will unpack 0x28 0x1c to (2 3 4 11 13)
158 ;; and 0x1c 0x28 to (3 5 10 11 12).
160 ;; FIELD ::= ( eval EXPR ) -- use result as NAME
164 ;; | <omitted> | nil -- LEN = 1
169 ;; TAG ::= LISP_CONSTANT
170 ;; | ( eval EXPR ) -- return non-nil if tag match;
171 ;; current TAG_VAL in `tag'.
173 ;; ARG ::= ( eval EXPR ) -- interpret result as ARG
174 ;; | INTEGER_CONSTANT
177 ;; DEREF ::= ( [NAME | INTEGER]... ) -- Field NAME or Array index relative
178 ;; to current structure spec.
179 ;; -- see bindat-get-field
181 ;; A `union' specification
182 ;; ([FIELD] union TAG_VAL (TAG SPEC) ... [(t SPEC)])
183 ;; is interpreted by evalling TAG_VAL and then comparing that to
184 ;; each TAG using equal; if a match is found, the corresponding SPEC
186 ;; If TAG is a form (eval EXPR), EXPR is evalled with `tag' bound to the
187 ;; value of TAG_VAL; the corresponding SPEC is used if the result is non-nil.
188 ;; Finally, if TAG is t, the corresponding SPEC is used unconditionally.
190 ;; An `eval' specification
191 ;; ([FIELD] eval FORM)
192 ;; is interpreted by evalling FORM for its side effects only.
193 ;; If FIELD is specified, the value is bound to that field.
194 ;; The FORM may access and update `bindat-raw' and `bindat-idx' (see `bindat-unpack').
198 ;; Helper functions for structure unpacking.
199 ;; Relies on dynamic binding of BINDAT-RAW and BINDAT-IDX
204 (defun bindat--unpack-u8 ()
206 (aref bindat-raw bindat-idx
)
207 (setq bindat-idx
(1+ bindat-idx
))))
209 (defun bindat--unpack-u16 ()
210 (logior (lsh (bindat--unpack-u8) 8) (bindat--unpack-u8)))
212 (defun bindat--unpack-u24 ()
213 (logior (lsh (bindat--unpack-u16) 8) (bindat--unpack-u8)))
215 (defun bindat--unpack-u32 ()
216 (logior (lsh (bindat--unpack-u16) 16) (bindat--unpack-u16)))
218 (defun bindat--unpack-u16r ()
219 (logior (bindat--unpack-u8) (lsh (bindat--unpack-u8) 8)))
221 (defun bindat--unpack-u24r ()
222 (logior (bindat--unpack-u16r) (lsh (bindat--unpack-u8) 16)))
224 (defun bindat--unpack-u32r ()
225 (logior (bindat--unpack-u16r) (lsh (bindat--unpack-u16r) 16)))
227 (defun bindat--unpack-item (type len
&optional vectype
)
229 (setq type
'vec len
4))
231 ((memq type
'(u8 byte
))
233 ((memq type
'(u16 word short
))
234 (bindat--unpack-u16))
236 (bindat--unpack-u24))
237 ((memq type
'(u32 dword long
))
238 (bindat--unpack-u32))
240 (bindat--unpack-u16r))
242 (bindat--unpack-u24r))
244 (bindat--unpack-u32r))
246 (let ((bits nil
) (bnum (1- (* 8 len
))) j m
)
248 (if (= (setq m
(bindat--unpack-u8)) 0)
249 (setq bnum
(- bnum
8))
252 (if (/= 0 (logand m j
))
253 (setq bits
(cons bnum bits
)))
258 (let ((s (substring bindat-raw bindat-idx
(+ bindat-idx len
))))
259 (setq bindat-idx
(+ bindat-idx len
))
261 (string-make-unibyte (concat s
)))))
264 (while (and (< i len
) (/= (aref bindat-raw
(+ bindat-idx i
)) 0))
266 (setq s
(substring bindat-raw bindat-idx
(+ bindat-idx i
)))
267 (setq bindat-idx
(+ bindat-idx len
))
269 (string-make-unibyte (concat s
)))))
271 (let ((v (make-vector len
0)) (i 0) (vlen 1))
273 (setq vlen
(nth 1 vectype
)
274 vectype
(nth 2 vectype
))
275 (setq type
(or vectype
'u8
)
278 (aset v i
(bindat--unpack-item type vlen vectype
))
283 (defun bindat--unpack-group (spec)
286 (let* ((item (car spec
))
290 (vectype (and (eq type
'vec
) (nth 3 item
)))
293 (setq spec
(cdr spec
))
294 (if (and (consp field
) (eq (car field
) 'eval
))
295 (setq field
(eval (car (cdr field
)))))
296 (if (and type
(consp type
) (eq (car type
) 'eval
))
297 (setq type
(eval (car (cdr type
)))))
298 (if (and len
(consp len
) (eq (car len
) 'eval
))
299 (setq len
(eval (car (cdr len
)))))
300 (if (memq field
'(eval fill align struct union
))
305 (if (and (consp len
) (not (eq type
'eval
)))
306 (setq len
(apply 'bindat-get-field struct len
)))
312 (setq data
(eval len
))
315 (setq bindat-idx
(+ bindat-idx len
)))
317 (while (/= (% bindat-idx len
) 0)
318 (setq bindat-idx
(1+ bindat-idx
))))
320 (setq data
(bindat--unpack-group (eval len
))))
322 (let ((index 0) (count len
))
323 (while (< index count
)
324 (setq data
(cons (bindat--unpack-group (nthcdr tail item
)) data
))
325 (setq index
(1+ index
)))
326 (setq data
(nreverse data
))))
328 (let ((tag len
) (cases (nthcdr tail item
)) case cc
)
330 (setq case
(car cases
)
333 (if (or (equal cc tag
) (equal cc t
)
334 (and (consp cc
) (eval cc
)))
335 (setq data
(bindat--unpack-group (cdr case
))
338 (setq data
(bindat--unpack-item type len vectype
)
342 (setq struct
(cons (cons field data
) struct
))
343 (setq struct
(append data struct
))))))
346 (defun bindat-unpack (spec bindat-raw
&optional bindat-idx
)
347 "Return structured data according to SPEC for binary data in BINDAT-RAW.
348 BINDAT-RAW is a unibyte string or vector.
349 Optional third arg BINDAT-IDX specifies the starting offset in BINDAT-RAW."
350 (when (multibyte-string-p bindat-raw
)
351 (error "String is multibyte"))
352 (unless bindat-idx
(setq bindat-idx
0))
353 (bindat--unpack-group spec
))
355 (defun bindat-get-field (struct &rest field
)
356 "In structured data STRUCT, return value of field named FIELD.
357 If multiple field names are specified, use the field names to
358 lookup nested sub-structures in STRUCT, corresponding to the
359 C-language syntax STRUCT.FIELD1.FIELD2.FIELD3...
360 An integer value in the field list is taken as an array index,
361 e.g. corresponding to STRUCT.FIELD1[INDEX2].FIELD3..."
362 (while (and struct field
)
363 (setq struct
(if (integerp (car field
))
364 (nth (car field
) struct
)
365 (let ((val (assq (car field
) struct
)))
366 (if (consp val
) (cdr val
)))))
367 (setq field
(cdr field
)))
371 ;; Calculate bindat-raw length of structured data
373 (defvar bindat--fixed-length-alist
374 '((u8 .
1) (byte .
1)
375 (u16 .
2) (u16r .
2) (word .
2) (short .
2)
377 (u32 .
4) (u32r .
4) (dword .
4) (long .
4)
380 (defun bindat--length-group (struct spec
)
383 (let* ((item (car spec
))
387 (vectype (and (eq type
'vec
) (nth 3 item
)))
389 (setq spec
(cdr spec
))
390 (if (and (consp field
) (eq (car field
) 'eval
))
391 (setq field
(eval (car (cdr field
)))))
392 (if (and type
(consp type
) (eq (car type
) 'eval
))
393 (setq type
(eval (car (cdr type
)))))
394 (if (and len
(consp len
) (eq (car len
) 'eval
))
395 (setq len
(eval (car (cdr len
)))))
396 (if (memq field
'(eval fill align struct union
))
401 (if (and (consp len
) (not (eq type
'eval
)))
402 (setq len
(apply 'bindat-get-field struct len
)))
405 (while (eq type
'vec
)
408 (setq len
(* len
(nth 1 vectype
))
409 type
(nth 2 vectype
))
410 (setq type
(or vectype
'u8
)
415 (setq struct
(cons (cons field
(eval len
)) struct
))
418 (setq bindat-idx
(+ bindat-idx len
)))
420 (while (/= (% bindat-idx len
) 0)
421 (setq bindat-idx
(1+ bindat-idx
))))
423 (bindat--length-group
424 (if field
(bindat-get-field struct field
) struct
) (eval len
)))
426 (let ((index 0) (count len
))
427 (while (< index count
)
428 (bindat--length-group
429 (nth index
(bindat-get-field struct field
))
431 (setq index
(1+ index
)))))
433 (let ((tag len
) (cases (nthcdr tail item
)) case cc
)
435 (setq case
(car cases
)
438 (if (or (equal cc tag
) (equal cc t
)
439 (and (consp cc
) (eval cc
)))
441 (bindat--length-group struct
(cdr case
))
442 (setq cases nil
))))))
444 (if (setq type
(assq type bindat--fixed-length-alist
))
445 (setq len
(* len
(cdr type
))))
447 (setq last
(bindat-get-field struct field
)))
448 (setq bindat-idx
(+ bindat-idx len
))))))))
450 (defun bindat-length (spec struct
)
451 "Calculate bindat-raw length for STRUCT according to bindat SPEC."
452 (let ((bindat-idx 0))
453 (bindat--length-group struct spec
)
457 ;; Pack structured data into bindat-raw
459 (defun bindat--pack-u8 (v)
460 (aset bindat-raw bindat-idx
(logand v
255))
461 (setq bindat-idx
(1+ bindat-idx
)))
463 (defun bindat--pack-u16 (v)
464 (aset bindat-raw bindat-idx
(logand (lsh v -
8) 255))
465 (aset bindat-raw
(1+ bindat-idx
) (logand v
255))
466 (setq bindat-idx
(+ bindat-idx
2)))
468 (defun bindat--pack-u24 (v)
469 (bindat--pack-u8 (lsh v -
16))
470 (bindat--pack-u16 v
))
472 (defun bindat--pack-u32 (v)
473 (bindat--pack-u16 (lsh v -
16))
474 (bindat--pack-u16 v
))
476 (defun bindat--pack-u16r (v)
477 (aset bindat-raw
(1+ bindat-idx
) (logand (lsh v -
8) 255))
478 (aset bindat-raw bindat-idx
(logand v
255))
479 (setq bindat-idx
(+ bindat-idx
2)))
481 (defun bindat--pack-u24r (v)
482 (bindat--pack-u16r v
)
483 (bindat--pack-u8 (lsh v -
16)))
485 (defun bindat--pack-u32r (v)
486 (bindat--pack-u16r v
)
487 (bindat--pack-u16r (lsh v -
16)))
489 (defun bindat--pack-item (v type len
&optional vectype
)
491 (setq type
'vec len
4))
494 (setq bindat-idx
(+ bindat-idx len
)))
495 ((memq type
'(u8 byte
))
497 ((memq type
'(u16 word short
))
498 (bindat--pack-u16 v
))
500 (bindat--pack-u24 v
))
501 ((memq type
'(u32 dword long
))
502 (bindat--pack-u32 v
))
504 (bindat--pack-u16r v
))
506 (bindat--pack-u24r v
))
508 (bindat--pack-u32r v
))
510 (let ((bnum (1- (* 8 len
))) j m
)
514 (setq bnum
(- bnum
8))
518 (setq m
(logior m j
)))
521 (bindat--pack-u8 m
))))
522 ((memq type
'(str strz
))
523 (let ((l (length v
)) (i 0))
524 (if (> l len
) (setq l len
))
526 (aset bindat-raw
(+ bindat-idx i
) (aref v i
))
528 (setq bindat-idx
(+ bindat-idx len
))))
530 (let ((l (length v
)) (i 0) (vlen 1))
532 (setq vlen
(nth 1 vectype
)
533 vectype
(nth 2 vectype
))
534 (setq type
(or vectype
'u8
)
536 (if (> l len
) (setq l len
))
538 (bindat--pack-item (aref v i
) type vlen vectype
)
541 (setq bindat-idx
(+ bindat-idx len
)))))
543 (defun bindat--pack-group (struct spec
)
546 (let* ((item (car spec
))
550 (vectype (and (eq type
'vec
) (nth 3 item
)))
552 (setq spec
(cdr spec
))
553 (if (and (consp field
) (eq (car field
) 'eval
))
554 (setq field
(eval (car (cdr field
)))))
555 (if (and type
(consp type
) (eq (car type
) 'eval
))
556 (setq type
(eval (car (cdr type
)))))
557 (if (and len
(consp len
) (eq (car len
) 'eval
))
558 (setq len
(eval (car (cdr len
)))))
559 (if (memq field
'(eval fill align struct union
))
564 (if (and (consp len
) (not (eq type
'eval
)))
565 (setq len
(apply 'bindat-get-field struct len
)))
571 (setq struct
(cons (cons field
(eval len
)) struct
))
574 (setq bindat-idx
(+ bindat-idx len
)))
576 (while (/= (% bindat-idx len
) 0)
577 (setq bindat-idx
(1+ bindat-idx
))))
580 (if field
(bindat-get-field struct field
) struct
) (eval len
)))
582 (let ((index 0) (count len
))
583 (while (< index count
)
585 (nth index
(bindat-get-field struct field
))
587 (setq index
(1+ index
)))))
589 (let ((tag len
) (cases (nthcdr tail item
)) case cc
)
591 (setq case
(car cases
)
594 (if (or (equal cc tag
) (equal cc t
)
595 (and (consp cc
) (eval cc
)))
597 (bindat--pack-group struct
(cdr case
))
598 (setq cases nil
))))))
600 (setq last
(bindat-get-field struct field
))
601 (bindat--pack-item last type len vectype
)
604 (defun bindat-pack (spec struct
&optional bindat-raw bindat-idx
)
605 "Return binary data packed according to SPEC for structured data STRUCT.
606 Optional third arg BINDAT-RAW is a pre-allocated unibyte string or vector to
608 Optional fourth arg BINDAT-IDX is the starting offset into BINDAT-RAW."
609 (when (multibyte-string-p bindat-raw
)
610 (error "Pre-allocated string is multibyte"))
611 (let ((no-return bindat-raw
))
612 (unless bindat-idx
(setq bindat-idx
0))
614 (setq bindat-raw
(make-vector (+ bindat-idx
(bindat-length spec struct
)) 0)))
615 (bindat--pack-group struct spec
)
616 (if no-return nil
(concat bindat-raw
))))
619 ;; Misc. format conversions
621 (defun bindat-format-vector (vect fmt sep
&optional len
)
622 "Format vector VECT using element format FMT and separator SEP.
623 Result is a string with each element of VECT formatted using FMT and
624 separated by the string SEP. If optional fourth arg LEN is given, use
625 only that many elements from VECT."
627 (setq len
(length vect
)))
628 (let ((i len
) (fmt2 (concat sep fmt
)) (s nil
))
631 s
(cons (format (if (= i
0) fmt fmt2
) (aref vect i
)) s
)))
634 (defun bindat-vector-to-dec (vect &optional sep
)
635 "Format vector VECT in decimal format separated by dots.
636 If optional second arg SEP is a string, use that as separator."
637 (bindat-format-vector vect
"%d" (if (stringp sep
) sep
".")))
639 (defun bindat-vector-to-hex (vect &optional sep
)
640 "Format vector VECT in hex format separated by dots.
641 If optional second arg SEP is a string, use that as separator."
642 (bindat-format-vector vect
"%02x" (if (stringp sep
) sep
":")))
644 (defun bindat-ip-to-string (ip)
645 "Format vector IP as an ip address in dotted notation.
646 The port (if any) is omitted. IP can be a string, as well."
648 (format-network-address ip t
)
649 (format "%d.%d.%d.%d"
650 (aref ip
0) (aref ip
1) (aref ip
2) (aref ip
3))))
654 ;;; arch-tag: 5e6708c3-03e2-4ad7-9885-5041b779c3fb
655 ;;; bindat.el ends here