x86-64: Treat more symbols as having immediate storage class
[sbcl.git] / src / code / huffman.lisp
blobe897c688974cc4acb17db00e0707a21b59d89cfd
1 ;;;; a simple huffman encoder/decoder, used to compress unicode
2 ;;;; character names.
4 ;;;; This software is part of the SBCL system. See the README file for
5 ;;;; more information.
6 ;;;;
7 ;;;; This software is derived from the CMU CL system, which was
8 ;;;; written at Carnegie Mellon University and released into the
9 ;;;; public domain. The software is in the public domain and is
10 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
11 ;;;; files for more information.
13 (in-package "SB!IMPL")
15 (defstruct (huffman-node (:constructor make-huffman-node (key weight))
16 (:copier nil))
17 key weight)
19 (defstruct (huffman-pair
20 (:include huffman-node)
21 (:copier nil)
22 (:constructor make-huffman-pair
23 (left right &aux
24 (key (concatenate 'string
25 (huffman-node-key left)
26 (huffman-node-key right)))
27 (weight (+ (huffman-node-weight left)
28 (huffman-node-weight right))))))
29 left right)
31 (defun huffman-weights (corpus)
32 (let ((weight-table (make-hash-table :test #'equal)))
33 (loop for string in corpus
34 do (loop for char across string
35 do (incf (gethash char weight-table 0))))
36 (let (alist)
37 (maphash (lambda (char weight)
38 (push (make-huffman-node (string char) weight) alist))
39 weight-table)
40 (sort alist #'string< :key #'huffman-node-key))))
42 (defun make-huffman-tree (corpus)
43 (labels ((merge-table (table)
44 (setf table (stable-sort table #'< :key #'huffman-node-weight))
45 (push (make-huffman-pair (pop table) (pop table))
46 table)
47 (if (second table)
48 (merge-table table)
49 (car table)))
50 (finish-tree (tree)
51 (if (huffman-pair-p tree)
52 (list (huffman-node-key tree)
53 (finish-tree (huffman-pair-left tree))
54 (finish-tree (huffman-pair-right tree)))
55 (huffman-node-key tree))))
56 (finish-tree (merge-table (huffman-weights corpus)))))
58 (defun huffman-decode (code tree)
59 (let ((original code))
60 (labels ((pop-bit ()
61 (let* ((bits (integer-length code))
62 (bit (ldb (byte 1 (- bits 2)) code)))
63 (setf code (dpb 1 (byte 1 (- bits 2))
64 (ldb (byte (- bits 1) 0) code)))
65 bit))
66 (choose (branch)
67 (destructuring-bind (key left right) branch
68 (declare (ignore key))
69 (if (zerop (pop-bit))
70 left
71 right)))
72 (decode (branch)
73 (when (zerop code)
74 (error "Invalid Huffman-code: ~S" original))
75 (let ((next (choose branch)))
76 (cond ((consp next)
77 (decode next))
78 ((< 1 code)
79 (concatenate 'string next (decode tree)))
81 next)))))
82 (decode tree))))
84 (defun huffman-match (char node)
85 (if (consp node)
86 (find char (the string (car node)) :test #'equal)
87 (eql char (character node))))
89 (defun huffman-encode (string tree)
90 (let ((code 1))
91 (labels ((encode (bit char tree)
92 (when bit
93 (setf code (+ (ash code 1) bit)))
94 (if (consp tree)
95 (destructuring-bind (key left right) tree
96 (declare (ignore key))
97 (cond ((huffman-match char left)
98 (encode 0 char left))
99 ((huffman-match char right)
100 (encode 1 char right))
102 ;; unknown
103 (return-from huffman-encode nil))))
104 (unless (huffman-match char tree)
105 (error "Error encoding ~S (bad tree)." char)))))
106 (loop for char across string
107 do (encode nil char tree))
108 code)))