Update release date
[zpb-ttf.git] / cmap.lisp
blob4a5bc23444b0716e91d75852c5334f3d3c262e0d
1 ;;; Copyright (c) 2006 Zachary Beane, All Rights Reserved
2 ;;;
3 ;;; Redistribution and use in source and binary forms, with or without
4 ;;; modification, are permitted provided that the following conditions
5 ;;; are met:
6 ;;;
7 ;;; * Redistributions of source code must retain the above copyright
8 ;;; notice, this list of conditions and the following disclaimer.
9 ;;;
10 ;;; * Redistributions in binary form must reproduce the above
11 ;;; copyright notice, this list of conditions and the following
12 ;;; disclaimer in the documentation and/or other materials
13 ;;; provided with the distribution.
14 ;;;
15 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
16 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
21 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ;;;
27 ;;; Loading data from the "cmap" table.
28 ;;;
29 ;;; https://docs.microsoft.com/en-us/typography/opentype/spec/cmap
30 ;;; http://developer.apple.com/fonts/TTRefMan/RM06/Chap6cmap.html
31 ;;;
32 ;;; $Id: cmap.lisp,v 1.15 2006/03/23 22:23:32 xach Exp $
34 (in-package #:zpb-ttf)
36 (deftype cmap-value-table ()
37 `(array (unsigned-byte 16) (*)))
39 ;;; FIXME: "unicode-cmap" is actually a format 4 character map that
40 ;;; happens to currently be loaded from a Unicode-compatible
41 ;;; subtable. However, other character maps (like Microsoft's Symbol
42 ;;; encoding) also use format 4 and could be loaded with these
43 ;;; "unicode" objects and functions.
45 (defclass unicode-cmap ()
46 ((segment-count :initarg :segment-count :reader segment-count)
47 (end-codes :initarg :end-codes :reader end-codes)
48 (start-codes :initarg :start-codes :reader start-codes)
49 (id-deltas :initarg :id-deltas :reader id-deltas)
50 (id-range-offsets :initarg :id-range-offsets :reader id-range-offsets)
51 (glyph-indexes :initarg :glyph-indexes :accessor glyph-indexes)))
53 (defclass format-12-cmap ()
54 ((group-count :initarg :group-count :reader group-count)
55 (start-codes :initarg :start-codes :reader start-codes)
56 (end-codes :initarg :end-codes :reader end-codes)
57 (glyph-starts :initarg :glyph-starts :accessor glyph-starts)))
59 (defun load-unicode-cmap-format12 (stream)
60 "Load a Unicode character map of type 12 from STREAM starting at the
61 current offset. Assumes format is already read and checked."
62 (let* ((reserved (read-uint16 stream))
63 (subtable-length (read-uint32 stream))
64 (language-code (read-uint32 stream))
65 (group-count (read-uint32 stream))
66 (start-codes (make-array group-count
67 :element-type '(unsigned-byte 32)
68 :initial-element 0))
69 (end-codes (make-array group-count
70 :element-type '(unsigned-byte 32)
71 :initial-element 0))
72 (glyph-starts (make-array group-count
73 :element-type '(unsigned-byte 32)
74 :initial-element 0)))
75 (declare (ignore reserved language-code subtable-length))
76 (loop for i below group-count
77 do (setf (aref start-codes i) (read-uint32 stream)
78 (aref end-codes i) (read-uint32 stream)
79 (aref glyph-starts i) (read-uint32 stream)))
80 (make-instance 'format-12-cmap
81 :group-count group-count
82 :start-codes start-codes
83 :end-codes end-codes
84 :glyph-starts glyph-starts)))
86 (defun load-unicode-cmap (stream)
87 "Load a Unicode character map of type 4 or 12 from STREAM starting at
88 the current offset."
89 (let ((format (read-uint16 stream)))
90 (when (= format 12)
91 (return-from load-unicode-cmap (load-unicode-cmap-format12 stream)))
92 (when (/= format 4)
93 (error 'unsupported-format
94 :location "\"cmap\" subtable"
95 :actual-value format
96 :expected-values (list 4))))
97 (let ((table-start (- (file-position stream) 2))
98 (subtable-length (read-uint16 stream))
99 (language-code (read-uint16 stream))
100 (segment-count (/ (read-uint16 stream) 2))
101 (search-range (read-uint16 stream))
102 (entry-selector (read-uint16 stream))
103 (range-shift (read-uint16 stream)))
104 (declare (ignore language-code search-range entry-selector range-shift))
105 (flet ((make-and-load-array (&optional (size segment-count))
106 (loop with array = (make-array size
107 :element-type '(unsigned-byte 16)
108 :initial-element 0)
109 for i below size
110 do (setf (aref array i) (read-uint16 stream))
111 finally (return array)))
112 (make-signed (i)
113 (if (logbitp 15 i)
114 (1- (- (logandc2 #xFFFF i)))
115 i)))
116 (let ((end-codes (make-and-load-array))
117 (pad (read-uint16 stream))
118 (start-codes (make-and-load-array))
119 (id-deltas (make-and-load-array))
120 (id-range-offsets (make-and-load-array))
121 (glyph-index-array-size (/ (- subtable-length
122 (- (file-position stream)
123 table-start))
124 2)))
125 (declare (ignore pad))
126 (make-instance 'unicode-cmap
127 :segment-count segment-count
128 :end-codes end-codes
129 :start-codes start-codes
130 ;; these are really signed, so sign them
131 :id-deltas (map 'vector #'make-signed id-deltas)
132 :id-range-offsets id-range-offsets
133 :glyph-indexes (make-and-load-array glyph-index-array-size))))))
136 (defun %decode-format-4-cmap-code-point-index (code-point cmap index)
137 "Return the index of the Unicode CODE-POINT in a format 4 CMAP, if
138 present, otherwise NIL. Assumes INDEX points to the element of the
139 CMAP arrays (END-CODES etc) corresponding to code-point."
140 (with-slots (end-codes start-codes
141 id-deltas id-range-offsets
142 glyph-indexes)
143 cmap
144 (declare (type cmap-value-table
145 end-codes start-codes
146 id-range-offsets
147 glyph-indexes))
148 (let ((start-code (aref start-codes index))
149 (end-code (aref end-codes index))
150 (id-range-offset (aref id-range-offsets index))
151 (id-delta (aref id-deltas index)))
152 (cond
153 ((< code-point start-code)
155 ;; ignore empty final segment
156 ((and (= 65535 start-code end-code))
158 ((zerop id-range-offset)
159 (logand #xFFFF (+ code-point id-delta)))
161 (let* ((glyph-index-offset (- (+ index
162 (ash id-range-offset -1)
163 (- code-point start-code))
164 (segment-count cmap)))
165 (glyph-index (aref (glyph-indexes cmap)
166 glyph-index-offset)))
167 (logand #xFFFF
168 (+ glyph-index id-delta))))))))
170 (defun %decode-format-12-cmap-code-point-index (code-point cmap index)
171 "Return the index of the Unicode CODE-POINT in a format 12 CMAP, if
172 present, otherwise NIL. Assumes INDEX points to the element of the
173 CMAP arrays (END-CODES etc) corresponding to code-point."
174 (with-slots (end-codes start-codes glyph-starts)
175 cmap
176 (declare (type (simple-array (unsigned-byte 32))
177 end-codes start-codes glyph-starts))
178 (let ((start-code (aref start-codes index))
179 (start-glyph-id (aref glyph-starts index)))
180 (if (< code-point start-code)
182 (+ start-glyph-id (- code-point start-code))))))
184 (defgeneric code-point-font-index-from-cmap (code-point cmap)
185 (:documentation "Return the index of the Unicode CODE-POINT in
186 CMAP, if present, otherwise NIL.")
187 (:method (code-point (cmap unicode-cmap))
188 (with-slots (end-codes)
189 cmap
190 (declare (type cmap-value-table end-codes))
191 (dotimes (i (segment-count cmap) 1)
192 (when (<= code-point (aref end-codes i))
193 (return (%decode-format-4-cmap-code-point-index code-point cmap i))))))
194 (:method (code-point (cmap format-12-cmap))
195 (with-slots (end-codes)
196 cmap
197 (declare (type (simple-array (unsigned-byte 32)) end-codes))
198 (dotimes (i (group-count cmap) 1)
199 (when (<= code-point (aref end-codes i))
200 (return
201 (%decode-format-12-cmap-code-point-index code-point cmap i)))))))
203 (defmethod invert-character-map (font-loader)
204 "Return a vector mapping font indexes to code points."
205 (with-slots (start-codes end-codes)
206 (character-map font-loader)
207 (let ((points (make-array (glyph-count font-loader) :initial-element -1))
208 (cmap (character-map font-loader)))
209 (dotimes (i (length end-codes) points)
210 (loop for j from (aref start-codes i) to (aref end-codes i)
211 for font-index
212 = (typecase cmap
213 (unicode-cmap
214 (%decode-format-4-cmap-code-point-index j cmap i))
215 (format-12-cmap
216 (%decode-format-12-cmap-code-point-index j cmap i))
218 (code-point-font-index-from-cmap j cmap)))
219 when (minusp (svref points font-index))
220 do (setf (svref points font-index) j))))))
223 (defgeneric code-point-font-index (code-point font-loader)
224 (:documentation "Return the index of the Unicode CODE-POINT in
225 FONT-LOADER, if present, otherwise NIL.")
226 (:method (code-point font-loader)
227 (code-point-font-index-from-cmap code-point (character-map font-loader))))
229 (defgeneric font-index-code-point (glyph-index font-loader)
230 (:documentation "Return the code-point for a given glyph index.")
231 (:method (glyph-index font-loader)
232 (let ((point (aref (inverse-character-map font-loader) glyph-index)))
233 (if (plusp point)
234 point
235 0))))
237 (defun %load-cmap-info (font-loader platform specific)
238 (seek-to-table "cmap" font-loader)
239 (with-slots (input-stream)
240 font-loader
241 (let ((start-pos (file-position input-stream))
242 (version-number (read-uint16 input-stream))
243 (subtable-count (read-uint16 input-stream))
244 (foundp nil))
245 (declare (ignore version-number))
246 (loop repeat subtable-count
247 for platform-id = (read-uint16 input-stream)
248 for platform-specific-id = (read-uint16 input-stream)
249 for offset = (+ start-pos (read-uint32 input-stream))
250 when (and (= platform-id platform)
251 (or (eql platform-specific-id specific)
252 (and (consp specific)
253 (member platform-specific-id specific))))
255 (file-position input-stream offset)
256 (setf (character-map font-loader) (load-unicode-cmap input-stream))
257 (setf (inverse-character-map font-loader)
258 (invert-character-map font-loader)
259 foundp t)
260 (return))
261 foundp)))
263 (defun %unknown-cmap-error (font-loader)
264 (seek-to-table "cmap" font-loader)
265 (with-slots (input-stream)
266 font-loader
267 (let ((start-pos (file-position input-stream))
268 (version-number (read-uint16 input-stream))
269 (subtable-count (read-uint16 input-stream))
270 (cmaps nil))
271 (declare (ignore version-number))
272 (loop repeat subtable-count
273 for platform-id = (read-uint16 input-stream)
274 for platform-specific-id = (read-uint16 input-stream)
275 for offset = (+ start-pos (read-uint32 input-stream))
276 for pos = (file-position input-stream)
277 do (file-position input-stream offset)
278 (push (list (platform-id-name platform-id)
279 (encoding-id-name platform-id platform-specific-id)
280 :type (read-uint16 input-stream))
281 cmaps)
282 (file-position input-stream pos))
283 (error "Could not find supported character map in font file~% available cmap tables = ~s"
284 cmaps))))
286 (defmethod load-cmap-info ((font-loader font-loader))
287 (or (%load-cmap-info font-loader +unicode-platform-id+
288 +unicode-2.0-full-encoding-id+) ;; full unicode
289 (%load-cmap-info font-loader +microsoft-platform-id+
290 +microsoft-unicode-ucs4-encoding-id+) ;; full unicode
291 (%load-cmap-info font-loader +microsoft-platform-id+
292 +microsoft-unicode-bmp-encoding-id+) ;; bmp
293 (%load-cmap-info font-loader +unicode-platform-id+
294 +unicode-2.0-encoding-id+) ;; bmp
295 (%load-cmap-info font-loader +unicode-platform-id+
296 '(0 1 2 3 4)) ;; all except variation and last-resort
297 (%load-cmap-info font-loader +microsoft-platform-id+
298 +microsoft-symbol-encoding-id+) ;; ms symbol
299 (%unknown-cmap-error font-loader)))
301 (defun available-character-maps (loader)
302 (seek-to-table "cmap" loader)
303 (let ((stream (input-stream loader)))
304 (let ((start-pos (file-position stream))
305 (version-number (read-uint16 stream))
306 (subtable-count (read-uint16 stream)))
307 (declare (ignore start-pos))
308 (assert (zerop version-number))
309 (dotimes (i subtable-count)
310 (let ((platform-id (read-uint16 stream))
311 (encoding-id (read-uint16 stream))
312 (offset (read-uint32 stream)))
313 (declare (ignore offset))
314 (format t "~D (~A) - ~D (~A)~%"
315 platform-id (platform-id-name platform-id)
316 encoding-id (encoding-id-name platform-id encoding-id)))))))