1 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; While most of SBCL is derived from the CMU CL system, the test
5 ;;;; files (like this one) were written from scratch after the fork
8 ;;;; This software is in the public domain and is provided with
9 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
10 ;;;; more information.
12 (use-package :sb-unicode
)
14 (defun split-string (string delimiter
)
15 (loop for begin
= 0 then
(1+ end
)
16 for end
= (position delimiter string
) then
(position delimiter string
:start begin
)
17 collect
(subseq string begin end
)
20 ;; Taken straight out of the common lisp cookbook
21 (defun replace-all (part replacement string
&key
(test #'char
=))
22 "Returns a new string in which all the occurences of the part
23 is replaced with replacement."
24 (with-output-to-string (out)
25 (loop with part-length
= (length part
)
26 for old-pos
= 0 then
(+ pos part-length
)
27 for pos
= (search part string
30 do
(write-string string out
32 :end
(or pos
(length string
)))
33 when pos do
(write-string replacement out
)
36 (defun test-line (line)
37 (destructuring-bind (%cp %name %gc ccc %bidi decomp-map
38 %decimal-digit %digit %numeric
39 %bidi-mirrored %old-name old-comment
40 simple-up simple-down simple-title
)
41 (split-string line
#\
;)
42 (declare (ignore decomp-map old-comment simple-up
43 simple-down simple-title
))
44 (let* ((cp (parse-integer %cp
:radix
16))
45 (char #+sb-unicode
(code-char cp
)
49 (return-from test-line t
)))
50 (gc (intern (string-upcase %gc
) "KEYWORD"))
51 (bidi (intern (string-upcase %bidi
) "KEYWORD"))
52 ;; See `normalize-character-name` in ucd.lisp for a discussion
53 ;; of U+1F5CF (PAGE) and the attendant standards-compliance issues2
54 (name (unless (or (position #\
< %name
) (= cp
#x1F5CF
))
55 (substitute #\_
#\Space %name
)))
56 (old-name (unless (string= %old-name
"")
57 (substitute #\_
#\Space %old-name
)))
58 (char-from-name (name-char name
))
60 ;; Exclude all the renaming conflicts, including the mass-rename of Georgian
65 "LATIN_CAPITAL_LETTER_YOGH"
66 "LATIN_SMALL_LETTER_YOGH"
67 "CYRILLIC_CAPITAL_LETTER_E"
68 "CYRILLIC_CAPITAL_LETTER_I"
69 "CYRILLIC_SMALL_LETTER_E"
70 "CYRILLIC_SMALL_LETTER_I"
73 "HANGUL_LETTER_KIYEOK"
75 "PARENTHESIZED_HANGUL_CIEUC"
76 "PARENTHESIZED_HANGUL_KIYEOK"
77 "PARENTHESIZED_HANGUL_PIEUP"
78 "CIRCLED_HANGUL_CIEUC"
79 "CIRCLED_HANGUL_KIYEOK"
80 "CIRCLED_HANGUL_PIEUP"
81 "HALFWIDTH_HANGUL_LETTER_CIEUC"
82 "HALFWIDTH_HANGUL_LETTER_KIYEOK"
83 "HALFWIDTH_HANGUL_LETTER_PIEUP"
86 (or (< (length old-name
) 14)
87 (string/= old-name
"GEORGIAN_SMALL" :end1
14)))
88 (name-char old-name
)))
89 (decimal-digit (parse-integer %decimal-digit
:junk-allowed t
))
90 (digit (parse-integer %digit
:junk-allowed t
))
91 (numeric (if (string= %numeric
"") nil
(read-from-string %numeric
)))
92 (bidi-mirrored (string= %bidi-mirrored
"Y")))
94 (assert (char= char char-from-name
)))
95 (when char-from-old-name
96 (assert (char= char char-from-old-name
)))
97 (assert (eql gc
(general-category char
)))
98 (assert (= (parse-integer ccc
) (combining-class char
)))
99 (assert (eql bidi
(bidi-class char
)))
100 (assert (eql decimal-digit
(decimal-value char
)))
101 (assert (eql digit
(digit-value char
)))
102 (assert (eql numeric
(numeric-value char
)))
103 (assert (eql bidi-mirrored
(mirrored-p char
)))
104 (assert (string= old-name
(unicode-1-name char
))))))
106 (defun test-property-reads ()
107 (declare (optimize (debug 2)))
108 (with-open-file (s (merge-pathnames
110 :directory
'(:relative
:up
"tools-for-build")
111 :name
"UnicodeData" :type
"txt")
112 (or *load-truename
* *compile-file-truename
*)))
113 (with-test (:name
(:unicode-properties
))
114 (loop for line
= (read-line s nil nil
)
116 do
(test-line line
)))))
118 (test-property-reads)
120 (defun codepoint-or-range (string)
124 #-sb-unicode
(< i
256))
125 (flet ((parse (str) (parse-integer str
:radix
16 :junk-allowed t
)))
126 (let ((parts (remove "" (split-string string
#\.
) :test
#'string
=)))
128 (loop for i from
(parse (car parts
)) to
(parse (cadr parts
)) collect i
)
129 (mapcar #'parse parts
))))))
131 (defun test-property-line (fn line
)
132 (destructuring-bind (%codepoints value
) (split-string line
#\
;)
133 (let* ((codepoints (codepoint-or-range %codepoints
))
134 (property (remove #\Space value
))
137 (subseq property
0 (position #\
# property
)))
140 (loop for i in codepoints do
141 (unless (eql expected
(funcall fn
(code-char i
)))
142 (error "Character ~S has the wrong value for the tested property.
144 (code-char i
) expected
(funcall fn
(code-char i
))))))))
146 (defun test-bidi-class ()
147 (declare (optimize (debug 2)))
148 (with-open-file (s "data/DerivedBidiClass.txt" :external-format
:ascii
)
149 (with-test (:name
(:bidi-class
))
150 (loop for line
= (read-line s nil nil
)
152 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
153 do
(test-property-line #'bidi-class line
)))))
157 (defun test-hangul-syllable-type ()
158 (declare (optimize (debug 2)))
159 (with-open-file (s "data/HangulSyllableType.txt" :external-format
:ascii
)
160 (with-test (:name
(:hangul-syllable-type
))
161 (loop for line
= (read-line s nil nil
)
163 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
164 do
(test-property-line #'hangul-syllable-type line
)))))
166 (test-hangul-syllable-type)
168 (defun test-east-asian-width ()
169 (declare (optimize (debug 2)))
170 (with-open-file (s "../tools-for-build/EastAsianWidth.txt"
171 :external-format
:ascii
)
172 (with-test (:name
(:east-asian-width
))
173 (loop for line
= (read-line s nil nil
)
175 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
176 do
(test-property-line #'east-asian-width line
)))))
178 (test-east-asian-width)
180 (defun test-script ()
181 (declare (optimize (debug 2)))
182 (with-open-file (s "../tools-for-build/Scripts.txt"
183 :external-format
:ascii
)
184 (with-test (:name
(:script
))
185 (loop for line
= (read-line s nil nil
)
187 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
188 do
(test-property-line #'script
(substitute #\-
#\_ line
))))))
193 (declare (optimize (debug 2)))
194 (with-open-file (s "../tools-for-build/Blocks.txt"
195 :external-format
:ascii
)
196 (with-test (:name
(:block
))
197 (loop for line
= (read-line s nil nil
)
199 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
200 do
(test-property-line
203 (substitute #\-
#\Space line
)
205 :start1
(position #\
; line)))))))
209 (defun test-proplist ()
210 (declare (optimize (debug 2)))
211 (with-open-file (s "../tools-for-build/PropList.txt"
212 :external-format
:ascii
)
213 (with-test (:name
(:proplist
))
214 (loop for line
= (read-line s nil nil
)
216 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
218 (destructuring-bind (%codepoints value
) (split-string line
#\
;)
219 (let* ((codepoints (codepoint-or-range %codepoints
))
221 (intern (string-upcase
224 (subseq (remove #\Space value
) 0
225 (position #\
# (remove #\Space value
)))))
227 (loop for i in codepoints do
228 (unless (proplist-p (code-char i
) property
)
229 (error "Character ~S should be ~S, but isn't."
230 (code-char i
) property
)))))))))
234 (defun test-bidi-mirroring-glyph ()
235 (declare (optimize (debug 2)))
236 (with-open-file (s "../tools-for-build/BidiMirroring.txt"
237 :external-format
:ascii
)
238 (with-test (:name
(:bidi-mirroring-glyph
)
239 :skipped-on
'(not :sb-unicode
))
240 (loop for line
= (read-line s nil nil
)
242 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
245 (split-string (subseq line
0 (position #\
# line
)) #\
;))
248 #'(lambda (s) (code-char (parse-integer s
:radix
16)))
250 (unless (char= (bidi-mirroring-glyph (first chars
)) (second chars
))
251 (error "The mirroring glyph of ~S is not ~S, but ~S"
252 (first chars
) (second chars
)
253 (bidi-mirroring-glyph (first chars
)))))))))
255 (test-bidi-mirroring-glyph)
258 (declare (optimize (debug 2)))
259 (with-open-file (s "../tools-for-build/DerivedAge.txt"
260 :external-format
:ascii
)
261 (with-test (:name
(:age
))
262 (loop for line
= (read-line s nil nil
)
264 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
266 (destructuring-bind (%codepoints %age
)
267 (split-string (subseq line
0 (position #\
# line
)) #\
;)
268 (let* ((range (codepoint-or-range %codepoints
))
269 (expected (mapcar #'parse-integer
(split-string %age
#\.
))))
271 for char
= (code-char i
)
275 (multiple-value-list (age char
)))
276 (error "Character ~S should have age ~S, but has ~S instead."
277 char expected
(multiple-value-list (age char
)))))))))))
281 (defun test-grapheme-break-class ()
282 (declare (optimize (debug 2)))
283 (with-open-file (s "data/GraphemeBreakProperty.txt"
284 :external-format
:ascii
)
285 (with-test (:name
(:grapheme-break-class
))
286 (loop for line
= (read-line s nil nil
)
288 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
289 do
(test-property-line #'grapheme-break-class
290 (replace-all "SpacingMark" "SPACING-MARK"
291 (substitute #\-
#\_ line
)))))))
293 (test-grapheme-break-class)
295 (defun test-word-break-class ()
296 (declare (optimize (debug 2)))
297 (with-open-file (s "data/WordBreakProperty.txt"
298 :external-format
:ascii
)
299 (with-test (:name
(:word-break-class
))
300 (loop for line
= (read-line s nil nil
)
302 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
303 do
(test-property-line #'word-break-class
304 (substitute #\-
#\_ line
))))))
306 (test-word-break-class)
308 (defun test-sentence-break-class ()
309 (declare (optimize (debug 2)))
310 (with-open-file (s "data/SentenceBreakProperty.txt"
311 :external-format
:ascii
)
312 (with-test (:name
(:sentence-break-class
))
313 (loop for line
= (read-line s nil nil
)
315 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
316 do
(test-property-line #'sentence-break-class line
)))))
318 (test-sentence-break-class)
320 (defun test-line-break-class ()
321 (declare (optimize (debug 2)))
322 (with-open-file (s "../tools-for-build/LineBreakProperty.txt"
323 :external-format
:ascii
)
324 (with-test (:name
(:line-break-class
))
325 (loop for line
= (read-line s nil nil
)
327 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
328 do
(test-property-line #'line-break-class line
)))))
330 (test-line-break-class)