1.0.19.33: Improved interrupt handling on darwin/x86[-64]
[sbcl/eslaughter.git] / src / code / huffman.lisp
blob51f088f56513532f717161328997bc72e19bd07f
1 ;;;; a simple huffman encoder/decoder, used to compress unicode
2 ;;;; character names.
4 ;;;; This software is part of the SBCL system. See the README file for
5 ;;;; more information.
6 ;;;;
7 ;;;; This software is derived from the CMU CL system, which was
8 ;;;; written at Carnegie Mellon University and released into the
9 ;;;; public domain. The software is in the public domain and is
10 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
11 ;;;; files for more information.
13 (in-package "SB!IMPL")
15 (defstruct (huffman-node (:constructor make-huffman-node (key weight)))
16 key weight)
18 (defstruct (huffman-pair
19 (:include huffman-node)
20 (:constructor make-huffman-pair
21 (left right &aux
22 (key (concatenate 'string
23 (huffman-node-key left)
24 (huffman-node-key right)))
25 (weight (+ (huffman-node-weight left)
26 (huffman-node-weight right))))))
27 left right)
29 (defun huffman-weights (corpus)
30 (let ((weight-table (make-hash-table :test #'equal)))
31 (loop for string in corpus
32 do (loop for char across string
33 do (incf (gethash char weight-table 0))))
34 (let (alist)
35 (maphash (lambda (char weight)
36 (push (make-huffman-node (string char) weight) alist))
37 weight-table)
38 alist)))
40 (defun make-huffman-tree (corpus)
41 (labels ((merge-table (table)
42 (setf table (sort table #'< :key #'huffman-node-weight))
43 (push (make-huffman-pair (pop table) (pop table))
44 table)
45 (if (second table)
46 (merge-table table)
47 (car table)))
48 (finish-tree (tree)
49 (if (huffman-pair-p tree)
50 (list (huffman-node-key tree)
51 (finish-tree (huffman-pair-left tree))
52 (finish-tree (huffman-pair-right tree)))
53 (huffman-node-key tree))))
54 (finish-tree (merge-table (huffman-weights corpus)))))
56 (defun huffman-decode (code tree)
57 (let ((original code))
58 (labels ((pop-bit ()
59 (let* ((bits (integer-length code))
60 (bit (ldb (byte 1 (- bits 2)) code)))
61 (setf code (dpb 1 (byte 1 (- bits 2))
62 (ldb (byte (- bits 1) 0) code)))
63 bit))
64 (choose (branch)
65 (destructuring-bind (key left right) branch
66 (declare (ignore key))
67 (if (zerop (pop-bit))
68 left
69 right)))
70 (decode (branch)
71 (when (zerop code)
72 (error "Invalid Huffman-code: ~S" original))
73 (let ((next (choose branch)))
74 (cond ((consp next)
75 (decode next))
76 ((< 1 code)
77 (concatenate 'string next (decode tree)))
79 next)))))
80 (decode tree))))
82 (defun huffman-match (char node)
83 (if (consp node)
84 (find char (the string (car node)) :test #'equal)
85 (eql char (character node))))
87 (defun huffman-encode (string tree)
88 (let ((code 1))
89 (labels ((encode (bit char tree)
90 (when bit
91 (setf code (+ (ash code 1) bit)))
92 (if (consp tree)
93 (destructuring-bind (key left right) tree
94 (declare (ignore key))
95 (cond ((huffman-match char left)
96 (encode 0 char left))
97 ((huffman-match char right)
98 (encode 1 char right))
100 ;; unknown
101 (return-from huffman-encode nil))))
102 (unless (huffman-match char tree)
103 (error "Error encoding ~S (bad tree)." char)))))
104 (loop for char across string
105 do (encode nil char tree))
106 code)))