1 ;;;; a simple huffman encoder/decoder, used to compress unicode
4 ;;;; This software is part of the SBCL system. See the README file for
7 ;;;; This software is derived from the CMU CL system, which was
8 ;;;; written at Carnegie Mellon University and released into the
9 ;;;; public domain. The software is in the public domain and is
10 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
11 ;;;; files for more information.
13 (in-package "SB!IMPL")
15 (defstruct (huffman-node (:constructor make-huffman-node
(key weight
)))
18 (defstruct (huffman-pair
19 (:include huffman-node
)
20 (:constructor make-huffman-pair
22 (key (concatenate 'string
23 (huffman-node-key left
)
24 (huffman-node-key right
)))
25 (weight (+ (huffman-node-weight left
)
26 (huffman-node-weight right
))))))
29 (defun huffman-weights (corpus)
30 (let ((weight-table (make-hash-table :test
#'equal
)))
31 (loop for string in corpus
32 do
(loop for char across string
33 do
(incf (gethash char weight-table
0))))
35 (maphash (lambda (char weight
)
36 (push (make-huffman-node (string char
) weight
) alist
))
40 (defun make-huffman-tree (corpus)
41 (labels ((merge-table (table)
42 (setf table
(sort table
#'< :key
#'huffman-node-weight
))
43 (push (make-huffman-pair (pop table
) (pop table
))
49 (if (huffman-pair-p tree
)
50 (list (huffman-node-key tree
)
51 (finish-tree (huffman-pair-left tree
))
52 (finish-tree (huffman-pair-right tree
)))
53 (huffman-node-key tree
))))
54 (finish-tree (merge-table (huffman-weights corpus
)))))
56 (defun huffman-decode (code tree
)
57 (let ((original code
))
59 (let* ((bits (integer-length code
))
60 (bit (ldb (byte 1 (- bits
2)) code
)))
61 (setf code
(dpb 1 (byte 1 (- bits
2))
62 (ldb (byte (- bits
1) 0) code
)))
65 (destructuring-bind (key left right
) branch
66 (declare (ignore key
))
72 (error "Invalid Huffman-code: ~S" original
))
73 (let ((next (choose branch
)))
77 (concatenate 'string next
(decode tree
)))
82 (defun huffman-match (char node
)
84 (find char
(the string
(car node
)) :test
#'equal
)
85 (eql char
(character node
))))
87 (defun huffman-encode (string tree
)
89 (labels ((encode (bit char tree
)
91 (setf code
(+ (ash code
1) bit
)))
93 (destructuring-bind (key left right
) tree
94 (declare (ignore key
))
95 (cond ((huffman-match char left
)
97 ((huffman-match char right
)
98 (encode 1 char right
))
101 (return-from huffman-encode nil
))))
102 (unless (huffman-match char tree
)
103 (error "Error encoding ~S (bad tree)." char
)))))
104 (loop for char across string
105 do
(encode nil char tree
))