1 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; While most of SBCL is derived from the CMU CL system, the test
5 ;;;; files (like this one) were written from scratch after the fork
8 ;;;; This software is in the public domain and is provided with
9 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
10 ;;;; more information.
12 (use-package :sb-unicode
)
14 ;; Taken straight out of the common lisp cookbook
15 (defun replace-all (part replacement string
&key
(test #'char
=))
16 "Returns a new string in which all the occurences of the part
17 is replaced with replacement."
18 (with-output-to-string (out)
19 (loop with part-length
= (length part
)
20 for old-pos
= 0 then
(+ pos part-length
)
21 for pos
= (search part string
24 do
(write-string string out
26 :end
(or pos
(length string
)))
27 when pos do
(write-string replacement out
)
30 (defun test-line (line)
31 (destructuring-bind (%cp %name %gc ccc %bidi decomp-map
32 %decimal-digit %digit %numeric
33 %bidi-mirrored %old-name old-comment
34 simple-up simple-down simple-title
)
35 (split-string line
#\
;)
36 (declare (ignore decomp-map old-comment simple-up
37 simple-down simple-title
))
38 (let* ((cp (parse-integer %cp
:radix
16))
39 (char #+sb-unicode
(code-char cp
)
43 (return-from test-line t
)))
44 (gc (intern (string-upcase %gc
) "KEYWORD"))
45 (bidi (intern (string-upcase %bidi
) "KEYWORD"))
46 ;; See `normalize-character-name` in ucd.lisp for a discussion
47 ;; of U+1F5CF (PAGE) and the attendant standards-compliance issues2
48 (name (unless (or (position #\
< %name
) (= cp
#x1F5CF
))
49 (substitute #\_
#\Space %name
)))
50 (old-name (unless (string= %old-name
"")
51 (substitute #\_
#\Space %old-name
)))
52 (char-from-name (name-char name
))
54 ;; Exclude all the renaming conflicts, including the mass-rename of Georgian
59 "LATIN_CAPITAL_LETTER_YOGH"
60 "LATIN_SMALL_LETTER_YOGH"
61 "CYRILLIC_CAPITAL_LETTER_E"
62 "CYRILLIC_CAPITAL_LETTER_I"
63 "CYRILLIC_SMALL_LETTER_E"
64 "CYRILLIC_SMALL_LETTER_I"
67 "HANGUL_LETTER_KIYEOK"
69 "PARENTHESIZED_HANGUL_CIEUC"
70 "PARENTHESIZED_HANGUL_KIYEOK"
71 "PARENTHESIZED_HANGUL_PIEUP"
72 "CIRCLED_HANGUL_CIEUC"
73 "CIRCLED_HANGUL_KIYEOK"
74 "CIRCLED_HANGUL_PIEUP"
75 "HALFWIDTH_HANGUL_LETTER_CIEUC"
76 "HALFWIDTH_HANGUL_LETTER_KIYEOK"
77 "HALFWIDTH_HANGUL_LETTER_PIEUP"
80 (or (< (length old-name
) 14)
81 (string/= old-name
"GEORGIAN_SMALL" :end1
14)))
82 (name-char old-name
)))
83 (decimal-digit (parse-integer %decimal-digit
:junk-allowed t
))
84 (digit (parse-integer %digit
:junk-allowed t
))
85 (numeric (if (string= %numeric
"") nil
(read-from-string %numeric
)))
86 (bidi-mirrored (string= %bidi-mirrored
"Y")))
88 (assert (char= char char-from-name
)))
89 (when char-from-old-name
90 (assert (char= char char-from-old-name
)))
91 (assert (eql gc
(general-category char
)))
92 (assert (= (parse-integer ccc
) (combining-class char
)))
93 (assert (eql bidi
(bidi-class char
)))
94 (assert (eql decimal-digit
(decimal-value char
)))
95 (assert (eql digit
(digit-value char
)))
96 (assert (eql numeric
(numeric-value char
)))
97 (assert (eql bidi-mirrored
(mirrored-p char
)))
98 (assert (string= old-name
(unicode-1-name char
))))))
100 (defun test-property-reads ()
101 (declare (optimize (debug 2)))
102 (with-open-file (s (merge-pathnames
104 :directory
'(:relative
:up
"tools-for-build")
105 :name
"UnicodeData" :type
"txt")
106 (or *load-pathname
* *compile-file-pathname
*)))
107 (with-test (:name
(:unicode-properties
))
108 (loop for line
= (read-line s nil nil
)
110 do
(test-line line
)))))
112 (test-property-reads)
114 (defun codepoint-or-range (string)
118 #-sb-unicode
(< i
256))
119 (flet ((parse (str) (parse-integer str
:radix
16 :junk-allowed t
)))
120 (let ((parts (remove "" (split-string string
#\.
) :test
#'string
=)))
122 (loop for i from
(parse (car parts
)) to
(parse (cadr parts
)) collect i
)
123 (mapcar #'parse parts
))))))
125 (defun line-codepoints (line)
126 (codepoint-or-range (car (split-string line
#\
;))))
128 (defun test-property-line (fn line
)
129 (destructuring-bind (%codepoints value
) (split-string line
#\
;)
130 (let* ((codepoints (codepoint-or-range %codepoints
))
131 (property (remove #\Space value
))
134 (subseq property
0 (position #\
# property
)))
136 (loop for i in codepoints
137 unless
(eql expected
(funcall fn
(code-char i
)))
138 do
(error "Character ~S has the wrong value for the tested property. Wanted ~S, got ~S."
139 (code-char i
) expected
(funcall fn
(code-char i
)))))))
141 (defun test-unallocated-bidi-class (code class missing
)
142 (loop for
((start . end
) . expected
) in missing
143 if
(and (<= start code
) (<= code end
))
144 do
(assert (eql class expected
))
147 (defun test-bidi-class ()
148 (declare (optimize (debug 2)))
149 (with-open-file (s "data/DerivedBidiClass.txt" :external-format
:utf-8
)
150 (let ((missing-prefix "# @missing: ")
152 '(("Left_To_Right" .
:L
)
153 ("Right_To_Left" .
:R
)
154 ("Arabic_Letter" .
:AL
)
155 ("European_Terminator" .
:ET
)))
156 (tested (make-array #x110000
:initial-element
0 :element-type
'bit
))
158 (with-test (:name
(:bidi-class
:assigned
))
159 (loop for line
= (read-line s nil nil
)
161 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
162 do
(test-property-line #'bidi-class line
)
163 and do
(loop for code in
(line-codepoints line
) do
(setf (aref tested code
) 1))
164 when
(eql (mismatch missing-prefix line
) (length missing-prefix
))
165 do
(let* ((semipos (position #\
; line))
166 (rangetext (subseq line
(length missing-prefix
) semipos
))
167 (first-dot (position #\. rangetext
))
168 (last-dot (position #\. rangetext
:from-end t
))
169 (start (parse-integer rangetext
:end first-dot
:radix
16))
170 (end (parse-integer rangetext
:start
(1+ last-dot
) :radix
16))
171 (range (cons start end
))
172 (class-name (subseq line
(+ semipos
2)))
173 (class (cdr (assoc class-name bidi-class-table
:test
'string
=))))
174 (push (cons range class
) missing
))))
175 (with-test (:name
(:bidi-class
:missing-data
))
176 ;; Consistency check: the @missing lines in the data file have
177 ;; "Left_To_Right" first and overrides following it, and we've
178 ;; pushed in reverse order; test (:BIDI-CLASS :MISSING)
179 ;; depends on this order.
180 (let ((last (car (last missing
))))
181 (assert (equal last
'((0 .
#x10FFFF
) .
:L
)))))
182 (with-test (:name
(:bidi-class
:missing
))
183 (loop for code from
0 to
(min (1- char-code-limit
) #x10FFFF
)
184 for char
= (code-char code
)
185 if
(= (aref tested code
) 0)
186 do
(test-unallocated-bidi-class code
(bidi-class char
) missing
))))))
190 (defun test-hangul-syllable-type ()
191 (declare (optimize (debug 2)))
192 (with-open-file (s "data/HangulSyllableType.txt" :external-format
:utf-8
)
193 (with-test (:name
(:hangul-syllable-type
))
194 (loop for line
= (read-line s nil nil
)
196 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
197 do
(test-property-line #'hangul-syllable-type line
)))))
199 (test-hangul-syllable-type)
201 (defun test-east-asian-width ()
202 (declare (optimize (debug 2)))
203 (with-open-file (s "../tools-for-build/EastAsianWidth.txt"
204 :external-format
:utf-8
)
205 (with-test (:name
(:east-asian-width
))
206 (loop for line
= (read-line s nil nil
)
208 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
209 do
(test-property-line #'east-asian-width line
)))))
211 (test-east-asian-width)
213 (defun test-script ()
214 (declare (optimize (debug 2)))
215 (with-open-file (s "../tools-for-build/Scripts.txt"
216 :external-format
:utf-8
)
217 (with-test (:name
(:script
))
218 (loop for line
= (read-line s nil nil
)
220 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
221 do
(test-property-line #'script
(substitute #\-
#\_ line
))))))
226 (declare (optimize (debug 2)))
227 (with-open-file (s "../tools-for-build/Blocks.txt"
228 :external-format
:utf-8
)
229 (with-test (:name
(:block
))
230 (loop for line
= (read-line s nil nil
)
232 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
233 do
(test-property-line
236 (substitute #\-
#\Space line
)
238 :start1
(position #\
; line)))))))
242 (defun test-proplist ()
243 (declare (optimize (debug 2)))
244 (with-open-file (s "../tools-for-build/PropList.txt"
245 :external-format
:utf-8
)
246 (with-test (:name
(:proplist
))
247 (loop for line
= (read-line s nil nil
)
249 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
251 (destructuring-bind (%codepoints value
) (split-string line
#\
;)
252 (let* ((codepoints (codepoint-or-range %codepoints
))
254 (intern (string-upcase
257 (subseq (remove #\Space value
) 0
258 (position #\
# (remove #\Space value
)))))
260 (loop for i in codepoints do
261 (unless (proplist-p (code-char i
) property
)
262 (error "Character ~S should be ~S, but isn't."
263 (code-char i
) property
)))))))))
267 (defun test-bidi-mirroring-glyph ()
268 (declare (optimize (debug 2)))
269 (with-open-file (s "../tools-for-build/BidiMirroring.txt"
270 :external-format
:utf-8
)
271 (with-test (:name
(:bidi-mirroring-glyph
)
272 :skipped-on
(not :sb-unicode
))
273 (loop for line
= (read-line s nil nil
)
275 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
278 (split-string (subseq line
0 (position #\
# line
)) #\
;))
281 #'(lambda (s) (code-char (parse-integer s
:radix
16)))
283 (unless (char= (bidi-mirroring-glyph (first chars
)) (second chars
))
284 (error "The mirroring glyph of ~S is not ~S, but ~S"
285 (first chars
) (second chars
)
286 (bidi-mirroring-glyph (first chars
)))))))))
288 (test-bidi-mirroring-glyph)
291 (declare (optimize (debug 2)))
292 (with-open-file (s "../tools-for-build/DerivedAge.txt"
293 :external-format
:utf-8
)
294 (with-test (:name
(:age
))
295 (loop for line
= (read-line s nil nil
)
297 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
299 (destructuring-bind (%codepoints %age
)
300 (split-string (subseq line
0 (position #\
# line
)) #\
;)
301 (let* ((range (codepoint-or-range %codepoints
))
302 (expected (mapcar #'parse-integer
(split-string %age
#\.
))))
304 for char
= (code-char i
)
308 (multiple-value-list (age char
)))
309 (error "Character ~S should have age ~S, but has ~S instead."
310 char expected
(multiple-value-list (age char
)))))))))))
314 (defun test-grapheme-break-class ()
315 (declare (optimize (debug 2)))
316 (with-open-file (s "data/GraphemeBreakProperty.txt"
317 :external-format
:utf-8
)
318 (with-test (:name
(:grapheme-break-class
))
319 (loop for line
= (read-line s nil nil
)
321 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
322 do
(test-property-line #'grapheme-break-class
323 (replace-all "SpacingMark" "SPACING-MARK"
324 (substitute #\-
#\_ line
)))))))
326 (test-grapheme-break-class)
328 (defun test-word-break-class ()
329 (declare (optimize (debug 2)))
330 (with-open-file (s "data/WordBreakProperty.txt"
331 :external-format
:utf-8
)
332 (with-test (:name
(:word-break-class
))
333 (loop for line
= (read-line s nil nil
)
335 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
336 do
(test-property-line #'word-break-class
337 (substitute #\-
#\_ line
))))))
339 (test-word-break-class)
341 (defun test-sentence-break-class ()
342 (declare (optimize (debug 2)))
343 (with-open-file (s "data/SentenceBreakProperty.txt"
344 :external-format
:utf-8
)
345 (with-test (:name
(:sentence-break-class
))
346 (loop for line
= (read-line s nil nil
)
348 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
349 do
(test-property-line #'sentence-break-class line
)))))
351 (test-sentence-break-class)
353 (defun test-line-break-class ()
354 (declare (optimize (debug 2)))
355 (with-open-file (s "../tools-for-build/LineBreak.txt"
356 :external-format
:utf-8
)
357 (with-test (:name
(:line-break-class
))
358 (loop for line
= (read-line s nil nil
)
360 unless
(or (string= "" line
) (eql 0 (position #\
# line
)))
361 do
(test-property-line #'line-break-class line
)))))
363 (test-line-break-class)