Updated version to 1.0.2.
[zpb-ttf.git] / cmap.lisp
blob85d6e95aa3c5ecb5f2f9e993b418f9241d8d0578
1 ;;; Copyright (c) 2006 Zachary Beane, All Rights Reserved
2 ;;;
3 ;;; Redistribution and use in source and binary forms, with or without
4 ;;; modification, are permitted provided that the following conditions
5 ;;; are met:
6 ;;;
7 ;;; * Redistributions of source code must retain the above copyright
8 ;;; notice, this list of conditions and the following disclaimer.
9 ;;;
10 ;;; * Redistributions in binary form must reproduce the above
11 ;;; copyright notice, this list of conditions and the following
12 ;;; disclaimer in the documentation and/or other materials
13 ;;; provided with the distribution.
14 ;;;
15 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
16 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
21 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ;;;
27 ;;; Loading data from the "cmap" table.
28 ;;;
29 ;;; http://www.microsoft.com/OpenType/OTSpec/cmap.htm
30 ;;; http://developer.apple.com/fonts/TTRefMan/RM06/Chap6cmap.html
31 ;;;
32 ;;; $Id: cmap.lisp,v 1.15 2006/03/23 22:23:32 xach Exp $
34 (in-package #:zpb-ttf)
36 (deftype cmap-value-table ()
37 `(array (unsigned-byte 16) (*)))
39 ;;; FIXME: "unicode-cmap" is actually a format 4 character map that
40 ;;; happens to currently be loaded from a Unicode-compatible
41 ;;; subtable. However, other character maps (like Microsoft's Symbol
42 ;;; encoding) also use format 4 and could be loaded with these
43 ;;; "unicode" objects and functions.
45 (defclass unicode-cmap ()
46 ((segment-count :initarg :segment-count :reader segment-count)
47 (end-codes :initarg :end-codes :reader end-codes)
48 (start-codes :initarg :start-codes :reader start-codes)
49 (id-deltas :initarg :id-deltas :reader id-deltas)
50 (id-range-offsets :initarg :id-range-offsets :reader id-range-offsets)
51 (glyph-indexes :initarg :glyph-indexes :accessor glyph-indexes)))
53 (defun load-unicode-cmap (stream)
54 "Load a Unicode character map of type 4 from STREAM starting at the
55 current offset."
56 (let ((format (read-uint16 stream)))
57 (when (/= format 4)
58 (error 'unsupported-format
59 :location "\"cmap\" subtable"
60 :actual-value format
61 :expected-values (list 4))))
62 (let ((table-start (- (file-position stream) 2))
63 (subtable-length (read-uint16 stream))
64 (language-code (read-uint16 stream))
65 (segment-count (/ (read-uint16 stream) 2))
66 (search-range (read-uint16 stream))
67 (entry-selector (read-uint16 stream))
68 (range-shift (read-uint16 stream)))
69 (declare (ignore language-code search-range entry-selector range-shift))
70 (flet ((make-and-load-array (&optional (size segment-count))
71 (loop with array = (make-array size
72 :element-type '(unsigned-byte 16)
73 :initial-element 0)
74 for i below size
75 do (setf (aref array i) (read-uint16 stream))
76 finally (return array)))
77 (make-signed (i)
78 (if (logbitp 15 i)
79 (1- (- (logandc2 #xFFFF i)))
80 i)))
81 (let ((end-codes (make-and-load-array))
82 (pad (read-uint16 stream))
83 (start-codes (make-and-load-array))
84 (id-deltas (make-and-load-array))
85 (id-range-offsets (make-and-load-array))
86 (glyph-index-array-size (/ (- subtable-length
87 (- (file-position stream)
88 table-start))
89 2)))
90 (declare (ignore pad))
91 (make-instance 'unicode-cmap
92 :segment-count segment-count
93 :end-codes end-codes
94 :start-codes start-codes
95 ;; these are really signed, so sign them
96 :id-deltas (map 'vector #'make-signed id-deltas)
97 :id-range-offsets id-range-offsets
98 :glyph-indexes (make-and-load-array glyph-index-array-size))))))
101 (defmethod invert-character-map (font-loader)
102 "Return a vector mapping font indexes to code points."
103 (with-slots (start-codes end-codes)
104 (character-map font-loader)
105 (declare (type cmap-value-table start-codes end-codes))
106 (let ((points (make-array (glyph-count font-loader) :initial-element -1)))
107 (dotimes (i (1- (length end-codes)) points)
108 (loop for j from (aref start-codes i) to (aref end-codes i)
109 for font-index = (code-point-font-index j font-loader)
110 when (minusp (svref points font-index)) do
111 (setf (svref points font-index) j))))))
114 (defgeneric code-point-font-index (code-point font-loader)
115 (:documentation "Return the index of the Unicode CODE-POINT in
116 FONT-LOADER, if present, otherwise NIL.")
117 (:method (code-point font-loader)
118 (let ((cmap (character-map font-loader)))
119 (with-slots (end-codes start-codes
120 id-deltas id-range-offsets
121 glyph-indexes)
122 cmap
123 (declare (type cmap-value-table
124 end-codes start-codes
125 id-range-offsets
126 glyph-indexes))
127 (dotimes (i (segment-count cmap) 1)
128 (when (<= code-point (aref end-codes i))
129 (return
130 (let ((start-code (aref start-codes i))
131 (id-range-offset (aref id-range-offsets i))
132 (id-delta (aref id-deltas i)))
133 (cond ((< code-point start-code)
135 ((zerop id-range-offset)
136 (logand #xFFFF (+ code-point id-delta)))
138 (let* ((glyph-index-offset (- (+ i
139 (ash id-range-offset -1)
140 (- code-point start-code))
141 (segment-count cmap)))
142 (glyph-index (aref (glyph-indexes cmap)
143 glyph-index-offset)))
144 (logand #xFFFF
145 (+ glyph-index id-delta)))))))))))))
147 (defgeneric font-index-code-point (glyph-index font-loader)
148 (:documentation "Return the code-point for a given glyph index.")
149 (:method (glyph-index font-loader)
150 (let ((point (aref (inverse-character-map font-loader) glyph-index)))
151 (if (plusp point)
152 point
153 0))))
155 (defmethod load-cmap-info ((font-loader font-loader))
156 (seek-to-table "cmap" font-loader)
157 (with-slots (input-stream)
158 font-loader
159 (let ((start-pos (file-position input-stream))
160 (version-number (read-uint16 input-stream))
161 (subtable-count (read-uint16 input-stream))
162 (foundp nil))
163 (declare (ignore version-number))
164 (loop repeat subtable-count
165 for platform-id = (read-uint16 input-stream)
166 for platform-specific-id = (read-uint16 input-stream)
167 for offset = (+ start-pos (read-uint32 input-stream))
168 when (and (= platform-id
169 +microsoft-platform-id+)
170 (= platform-specific-id
171 +microsoft-unicode-bmp-encoding-id+))
173 (file-position input-stream offset)
174 (setf (character-map font-loader) (load-unicode-cmap input-stream))
175 (setf (inverse-character-map font-loader)
176 (invert-character-map font-loader)
177 foundp t)
178 (return))
179 (unless foundp
180 (error "Could not find supported character map in font file")))))
182 (defun available-character-maps (loader)
183 (seek-to-table "cmap" loader)
184 (let ((stream (input-stream loader)))
185 (let ((start-pos (file-position stream))
186 (version-number (read-uint16 stream))
187 (subtable-count (read-uint16 stream)))
188 (declare (ignore start-pos))
189 (assert (zerop version-number))
190 (dotimes (i subtable-count)
191 (let ((platform-id (read-uint16 stream))
192 (encoding-id (read-uint16 stream))
193 (offset (read-uint32 stream)))
194 (declare (ignore offset))
195 (format t "~D (~A) - ~D (~A)~%"
196 platform-id (platform-id-name platform-id)
197 encoding-id (encoding-id-name platform-id encoding-id)))))))