1 ;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp -*-
3 ;; This code is free software; you can redistribute it and/or
4 ;; modify it under the terms of the version 2.1 of
5 ;; the GNU Lesser General Public License as published by
6 ;; the Free Software Foundation, as clarified by the
7 ;; preamble found here:
8 ;; http://opensource.franz.com/preamble.html
10 ;; This program is distributed in the hope that it will be useful,
11 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ;; GNU General Public License for more details.
15 ;; You should have received a copy of the GNU Lesser General
16 ;; Public License along with this library; if not, write to the
17 ;; Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
18 ;; Boston, MA 02110-1301, USA
20 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
21 (asdf:oos
'asdf
:load-op
:split-sequence
))
23 (defpackage :parse-encodings
24 (:use
:cl
:split-sequence
)
25 (:export
#:parse-unicode-mapping
))
27 (in-package :parse-encodings
)
29 (defvar +unicode-replacement-char-code
+ #xFFFD
)
31 (defun parse-unicode-mapping (file &optional
(array-size 256))
32 (flet ((parse-hex (str)
33 (assert (string-equal "0x" (string-downcase (subseq str
0 2))))
34 (parse-integer (subseq str
2) :radix
16))
37 (string-left-trim '(#\space
)
38 (nsubstitute #\space
#\tab line
)))))
39 (let ((arr (make-array array-size
:element-type
'(unsigned-byte 32)
40 :initial-element
+unicode-replacement-char-code
+)))
41 (with-open-file (fin file
)
42 (loop :for line
= (sanitize-line (read-line fin nil nil
))
44 (when (and (plusp (length line
)) (char= #\
0 (char line
0)))
45 (let* ((split (split-sequence #\space line
:remove-empty-subseqs t
))
46 (index (parse-hex (first split
)))
47 (code (parse-hex (second split
))))
48 (setf (aref arr index
) code
)))))