1 ;;;; a simple huffman encoder/decoder, used to compress unicode
4 ;;;; This software is part of the SBCL system. See the README file for
7 ;;;; This software is derived from the CMU CL system, which was
8 ;;;; written at Carnegie Mellon University and released into the
9 ;;;; public domain. The software is in the public domain and is
10 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
11 ;;;; files for more information.
13 (in-package "SB!IMPL")
15 (defstruct (huffman-node (:constructor make-huffman-node
(key weight
))
19 (defstruct (huffman-pair
20 (:include huffman-node
)
22 (:constructor make-huffman-pair
24 (key (concatenate 'string
25 (huffman-node-key left
)
26 (huffman-node-key right
)))
27 (weight (+ (huffman-node-weight left
)
28 (huffman-node-weight right
))))))
31 (defun huffman-weights (corpus)
32 (let ((weight-table (make-hash-table :test
#'equal
)))
33 (loop for string in corpus
34 do
(loop for char across string
35 do
(incf (gethash char weight-table
0))))
37 (maphash (lambda (char weight
)
38 (push (make-huffman-node (string char
) weight
) alist
))
40 (sort alist
#'string
< :key
#'huffman-node-key
))))
42 (defun make-huffman-tree (corpus)
43 (labels ((merge-table (table)
44 (setf table
(stable-sort table
#'< :key
#'huffman-node-weight
))
45 (push (make-huffman-pair (pop table
) (pop table
))
51 (if (huffman-pair-p tree
)
52 (list (huffman-node-key tree
)
53 (finish-tree (huffman-pair-left tree
))
54 (finish-tree (huffman-pair-right tree
)))
55 (huffman-node-key tree
))))
56 (finish-tree (merge-table (huffman-weights corpus
)))))
58 (defun huffman-decode (code tree
)
59 (let ((original code
))
61 (let* ((bits (integer-length code
))
62 (bit (ldb (byte 1 (- bits
2)) code
)))
63 (setf code
(dpb 1 (byte 1 (- bits
2))
64 (ldb (byte (- bits
1) 0) code
)))
67 (destructuring-bind (key left right
) branch
68 (declare (ignore key
))
74 (error "Invalid Huffman-code: ~S" original
))
75 (let ((next (choose branch
)))
79 (concatenate 'string next
(decode tree
)))
84 (defun huffman-match (char node
)
86 (find char
(the string
(car node
)) :test
#'equal
)
87 (eql char
(character node
))))
89 (defun huffman-encode (string tree
)
91 (labels ((encode (bit char tree
)
93 (setf code
(+ (ash code
1) bit
)))
95 (destructuring-bind (key left right
) tree
96 (declare (ignore key
))
97 (cond ((huffman-match char left
)
99 ((huffman-match char right
)
100 (encode 1 char right
))
103 (return-from huffman-encode nil
))))
104 (unless (huffman-match char tree
)
105 (error "Error encoding ~S (bad tree)." char
)))))
106 (loop for char across string
107 do
(encode nil char tree
))