1 ;;; uvs.el --- utility for UVS (format 14) cmap subtables in OpenType fonts.
3 ;; Copyright (C) 2014-2017 Free Software Foundation, Inc.
5 ;; Author: YAMAMOTO Mitsuharu <mituharu@math.s.chiba-u.ac.jp>
7 ;; This file is part of GNU Emacs.
9 ;; GNU Emacs is free software: you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation, either version 3 of the License, or
12 ;; (at your option) any later version.
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
24 ;; To extract a C array definition of a UVS table for the Adobe-Japan1
25 ;; character collection from an IVD Sequences file, execute
26 ;; $ emacs -batch -l uvs.el \
27 ;; --eval '(uvs-print-table-ivd "IVD_Sequences.txt" "Adobe-Japan1")' \
32 (defun uvs-fields-total-size (fields)
33 (apply '+ (mapcar (lambda (field) (get field
'uvs-field-size
)) fields
)))
35 ;;; Fields in Format 14 header.
36 (defconst uvs-format-14-header-fields
37 '(format length num-var-selector-records
))
38 (put 'format
'uvs-field-size
2)
39 (put 'length
'uvs-field-size
4)
40 (put 'num-var-selector-records
'uvs-field-size
4)
41 (defconst uvs-format-14-header-size
42 (uvs-fields-total-size uvs-format-14-header-fields
))
44 ;;; Fields in Variation Selector Record.
45 (defconst uvs-variation-selector-record-fields
46 '(var-selector default-uvs-offset non-default-uvs-offset
))
47 (put 'var-selector
'uvs-field-size
3)
48 (put 'default-uvs-offset
'uvs-field-size
4)
49 (put 'non-default-uvs-offset
'uvs-field-size
4)
50 (defconst uvs-variation-selector-record-size
51 (uvs-fields-total-size uvs-variation-selector-record-fields
))
53 ;;; Fields in Non-Default UVS Table.
54 (defconst uvs-non-default-uvs-table-header-fields
'(num-uvs-mappings))
55 (put 'num-uvs-mappings
'uvs-field-size
4)
56 (defconst uvs-non-default-uvs-table-header-size
57 (uvs-fields-total-size uvs-non-default-uvs-table-header-fields
))
59 ;;; Fields in UVS Mapping.
60 (defconst uvs-uvs-mapping-fields
'(unicode-value glyph-id
))
61 (put 'unicode-value
'uvs-field-size
3)
62 (put 'glyph-id
'uvs-field-size
2)
63 (defconst uvs-uvs-mapping-size
64 (uvs-fields-total-size uvs-uvs-mapping-fields
))
66 (defun uvs-alist-from-ivd (collection-id sequence-id-to-glyph-function
)
67 "Create UVS alist from IVD Sequences and COLLECTION-ID.
68 The IVD (Ideographic Variation Database) Sequences are obtained
69 from the contents of the current buffer, and should be in the
70 form of IVD_Sequences.txt specified in Unicode Technical Standard
71 #37. COLLECTION-ID is a string specifying the identifier of the
72 collection to extract (e.g., \"Adobe-Japan1\").
73 SEQUENCE-ID-TO-GLYPH-FUNC is a function to convert an identifier
74 string of the sequence to a glyph number. UVS alist is of the
76 ((SELECTOR1 . ((BASE11 . GLYPH11) (BASE12 . GLYPH12) ...))
77 (SELECTOR2 . ((BASE21 . GLYPH21) (BASE22 . GLYPH22) ...)) ...),
78 where selectors and bases are sorted in ascending order."
80 (goto-char (point-min))
81 (while (re-search-forward
82 (concat "^[[:blank:]]*"
83 "\\([[:xdigit:]]+\\) \\([[:xdigit:]]+\\)"
84 "[[:blank:]]*;[[:blank:]]*"
85 "\\(?:" (regexp-quote collection-id
) "\\)"
86 "[[:blank:]]*;[[:blank:]]*"
87 "\\([^\n[:blank:]]+\\)"
88 "[[:blank:]]*$") nil t
)
89 (let* ((base (string-to-number (match-string 1) 16))
90 (selector (string-to-number (match-string 2) 16))
91 (sequence-id (match-string 3))
92 (glyph (funcall sequence-id-to-glyph-function sequence-id
)))
93 (let ((selector-bgs (assq selector uvs-alist
))
94 (base-glyph (cons base glyph
)))
96 (setcdr selector-bgs
(cons base-glyph
(cdr selector-bgs
)))
97 (push (cons selector
(list base-glyph
)) uvs-alist
)))))
98 (dolist (selector-bgs uvs-alist
)
100 (sort (cdr selector-bgs
)
101 (lambda (bg1 bg2
) (< (car bg1
) (car bg2
))))))
102 (sort uvs-alist
(lambda (sb1 sb2
) (< (car sb1
) (car sb2
))))))
104 (defun uvs-int-to-bytes (value size
)
105 "Convert integer VALUE to a list of SIZE bytes.
106 The most significant byte comes first."
109 (push (logand value
#xff
) result
)
110 (setq value
(lsh value -
8)))
113 (defun uvs-insert-fields-as-bytes (fields &rest values
)
114 "Insert VALUES for FIELDS as a sequence of bytes to the current buffer.
115 VALUES and FIELDS are lists of integers and field symbols,
116 respectively. Byte length of each value is determined by the
117 `uvs-field-size' property of the corresponding field."
119 (let ((field (car fields
))
120 (value (car values
)))
121 (insert (apply 'unibyte-string
122 (uvs-int-to-bytes value
(get field
'uvs-field-size
))))
123 (setq fields
(cdr fields
) values
(cdr values
)))))
125 (defun uvs-insert-alist-as-bytes (uvs-alist)
126 "Insert UVS-ALIST as a sequence of bytes to the current buffer."
127 (let* ((nrecords (length uvs-alist
)) ; # of selectors
130 (lambda (selector-bgs) (length (cdr selector-bgs
)))
133 (+ uvs-format-14-header-size
134 (* uvs-variation-selector-record-size nrecords
))))
135 (uvs-insert-fields-as-bytes uvs-format-14-header-fields
137 (+ uvs-format-14-header-size
138 (* uvs-variation-selector-record-size
140 (* uvs-non-default-uvs-table-header-size
142 (* uvs-uvs-mapping-size total-nmappings
))
144 (dolist (selector-bgs uvs-alist
)
145 (uvs-insert-fields-as-bytes uvs-variation-selector-record-fields
147 0 ; No Default UVS Tables.
149 (setq non-default-offset
150 (+ non-default-offset
151 uvs-non-default-uvs-table-header-size
152 (* (length (cdr selector-bgs
)) uvs-uvs-mapping-size
))))
153 (dolist (selector-bgs uvs-alist
)
154 (uvs-insert-fields-as-bytes uvs-non-default-uvs-table-header-fields
155 (length (cdr selector-bgs
)))
156 (dolist (base-glyph (cdr selector-bgs
))
157 (uvs-insert-fields-as-bytes uvs-uvs-mapping-fields
159 (cdr base-glyph
))))))
161 (defun uvs-dump (&optional bytes-per-line separator separator-eol line-prefix
)
162 "Print the current buffer as in representation of C array contents."
163 (or bytes-per-line
(setq bytes-per-line
8))
164 (or separator
(setq separator
", "))
165 (or separator-eol
(setq separator-eol
","))
166 (or line-prefix
(setq line-prefix
" "))
167 (goto-char (point-min))
168 (while (> (- (point-max) (point)) bytes-per-line
)
170 (princ (mapconcat (lambda (byte) (format "0x%02x" byte
))
172 (buffer-substring (point) (+ (point) bytes-per-line
)))
174 (princ separator-eol
)
176 (forward-char bytes-per-line
))
178 (princ (mapconcat (lambda (byte) (format "0x%02x" byte
))
180 (buffer-substring (point) (point-max)))
184 (defun uvs-print-table-ivd (filename collection-id
185 &optional sequence-id-to-glyph-func
)
186 "Print a C array definition of a UVS table for IVD Sequences.
187 FILENAME specifies the IVD Sequences file. COLLECTION-ID is a
188 string specifying the identifier of the collection to
189 extract (e.g., \"Adobe-Japan1\"). SEQUENCE-ID-TO-GLYPH-FUNC is a
190 function to convert an identifier string of the sequence to a
191 glyph number, and nil means to convert \"CID\\+[0-9]+\" to the
192 corresponding number."
193 (or sequence-id-to-glyph-func
194 (setq sequence-id-to-glyph-func
195 (lambda (sequence-id)
196 (string-match "\\`CID\\+\\([[:digit:]]+\\)\\'" sequence-id
)
197 (string-to-number (match-string 1 sequence-id
)))))
200 (insert-file-contents filename
)
201 (uvs-alist-from-ivd collection-id
202 sequence-id-to-glyph-func
))))
203 (set-binary-mode 'stdout t
)
204 (princ "/* Automatically generated by uvs.el. */\n")
206 (format "static const unsigned char mac_uvs_table_%s_bytes[] =\n {\n"
207 (replace-regexp-in-string "[^_[:alnum:]]" "_"
208 (downcase collection-id
))))
210 (set-buffer-multibyte nil
)
211 (uvs-insert-alist-as-bytes uvs-alist
)