Fix subtle bug in wipe_nonpinned_words()
[sbcl.git] / tests / unicode-properties.impure.lisp
blobbfb15bbbd606d2488728ce3a622aa37be099146e
1 ;;;; This software is part of the SBCL system. See the README file for
2 ;;;; more information.
3 ;;;;
4 ;;;; While most of SBCL is derived from the CMU CL system, the test
5 ;;;; files (like this one) were written from scratch after the fork
6 ;;;; from CMU CL.
7 ;;;;
8 ;;;; This software is in the public domain and is provided with
9 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
10 ;;;; more information.
12 (use-package :sb-unicode)
14 (defun split-string (string delimiter)
15 (loop for begin = 0 then (1+ end)
16 for end = (position delimiter string) then (position delimiter string :start begin)
17 collect (subseq string begin end)
18 while end))
20 ;; Taken straight out of the common lisp cookbook
21 (defun replace-all (part replacement string &key (test #'char=))
22 "Returns a new string in which all the occurences of the part
23 is replaced with replacement."
24 (with-output-to-string (out)
25 (loop with part-length = (length part)
26 for old-pos = 0 then (+ pos part-length)
27 for pos = (search part string
28 :start2 old-pos
29 :test test)
30 do (write-string string out
31 :start old-pos
32 :end (or pos (length string)))
33 when pos do (write-string replacement out)
34 while pos)))
36 (defun test-line (line)
37 (destructuring-bind (%cp %name %gc ccc %bidi decomp-map
38 %decimal-digit %digit %numeric
39 %bidi-mirrored %old-name old-comment
40 simple-up simple-down simple-title)
41 (split-string line #\;)
42 (declare (ignore decomp-map old-comment simple-up
43 simple-down simple-title))
44 (let* ((cp (parse-integer %cp :radix 16))
45 (char #+sb-unicode (code-char cp)
46 #-sb-unicode
47 (if (< cp 256)
48 (code-char cp)
49 (return-from test-line t)))
50 (gc (intern (string-upcase %gc) "KEYWORD"))
51 (bidi (intern (string-upcase %bidi) "KEYWORD"))
52 ;; See `normalize-character-name` in ucd.lisp for a discussion
53 ;; of U+1F5CF (PAGE) and the attendant standards-compliance issues2
54 (name (unless (or (position #\< %name) (= cp #x1F5CF))
55 (substitute #\_ #\Space %name)))
56 (old-name (unless (string= %old-name "")
57 (substitute #\_ #\Space %old-name)))
58 (char-from-name (name-char name))
59 (char-from-old-name
60 ;; Exclude all the renaming conflicts, including the mass-rename of Georgian
61 (when (and old-name
62 (not
63 (member old-name
64 '("BELL"
65 "LATIN_CAPITAL_LETTER_YOGH"
66 "LATIN_SMALL_LETTER_YOGH"
67 "CYRILLIC_CAPITAL_LETTER_E"
68 "CYRILLIC_CAPITAL_LETTER_I"
69 "CYRILLIC_SMALL_LETTER_E"
70 "CYRILLIC_SMALL_LETTER_I"
71 "DOUBLE_VERTICAL_BAR"
72 "HANGUL_LETTER_CIEUC"
73 "HANGUL_LETTER_KIYEOK"
74 "HANGUL_LETTER_PIEUP"
75 "PARENTHESIZED_HANGUL_CIEUC"
76 "PARENTHESIZED_HANGUL_KIYEOK"
77 "PARENTHESIZED_HANGUL_PIEUP"
78 "CIRCLED_HANGUL_CIEUC"
79 "CIRCLED_HANGUL_KIYEOK"
80 "CIRCLED_HANGUL_PIEUP"
81 "HALFWIDTH_HANGUL_LETTER_CIEUC"
82 "HALFWIDTH_HANGUL_LETTER_KIYEOK"
83 "HALFWIDTH_HANGUL_LETTER_PIEUP"
84 "SQUARED_MV")
85 :test #'string=))
86 (or (< (length old-name) 14)
87 (string/= old-name "GEORGIAN_SMALL" :end1 14)))
88 (name-char old-name)))
89 (decimal-digit (parse-integer %decimal-digit :junk-allowed t))
90 (digit (parse-integer %digit :junk-allowed t))
91 (numeric (if (string= %numeric "") nil (read-from-string %numeric)))
92 (bidi-mirrored (string= %bidi-mirrored "Y")))
93 (when char-from-name
94 (assert (char= char char-from-name)))
95 (when char-from-old-name
96 (assert (char= char char-from-old-name)))
97 (assert (eql gc (general-category char)))
98 (assert (= (parse-integer ccc) (combining-class char)))
99 (assert (eql bidi (bidi-class char)))
100 (assert (eql decimal-digit (decimal-value char)))
101 (assert (eql digit (digit-value char)))
102 (assert (eql numeric (numeric-value char)))
103 (assert (eql bidi-mirrored (mirrored-p char)))
104 (assert (string= old-name (unicode-1-name char))))))
106 (defun test-property-reads ()
107 (declare (optimize (debug 2)))
108 (with-open-file (s (merge-pathnames
109 (make-pathname
110 :directory '(:relative :up "tools-for-build")
111 :name "UnicodeData" :type "txt")
112 (or *load-truename* *compile-file-truename*)))
113 (with-test (:name (:unicode-properties))
114 (loop for line = (read-line s nil nil)
115 while line
116 do (test-line line)))))
118 (test-property-reads)
120 (defun codepoint-or-range (string)
121 (remove-if-not
122 #'(lambda (i)
123 #+sb-unicode i
124 #-sb-unicode (< i 256))
125 (flet ((parse (str) (parse-integer str :radix 16 :junk-allowed t)))
126 (let ((parts (remove "" (split-string string #\.) :test #'string=)))
127 (if (cdr parts)
128 (loop for i from (parse (car parts)) to (parse (cadr parts)) collect i)
129 (mapcar #'parse parts))))))
131 (defun test-property-line (fn line)
132 (destructuring-bind (%codepoints value) (split-string line #\;)
133 (let* ((codepoints (codepoint-or-range %codepoints))
134 (property (remove #\Space value))
135 (expected (intern
136 (string-upcase
137 (subseq property 0 (position #\# property)))
138 "KEYWORD")))
140 (loop for i in codepoints do
141 (unless (eql expected (funcall fn (code-char i)))
142 (error "Character ~S has the wrong value for the tested property.
143 Wanted ~S, got ~S."
144 (code-char i) expected (funcall fn (code-char i))))))))
146 (defun test-bidi-class ()
147 (declare (optimize (debug 2)))
148 (with-open-file (s "data/DerivedBidiClass.txt" :external-format :ascii)
149 (with-test (:name (:bidi-class))
150 (loop for line = (read-line s nil nil)
151 while line
152 unless (or (string= "" line) (eql 0 (position #\# line)))
153 do (test-property-line #'bidi-class line)))))
155 (test-bidi-class)
157 (defun test-hangul-syllable-type ()
158 (declare (optimize (debug 2)))
159 (with-open-file (s "data/HangulSyllableType.txt" :external-format :ascii)
160 (with-test (:name (:hangul-syllable-type))
161 (loop for line = (read-line s nil nil)
162 while line
163 unless (or (string= "" line) (eql 0 (position #\# line)))
164 do (test-property-line #'hangul-syllable-type line)))))
166 (test-hangul-syllable-type)
168 (defun test-east-asian-width ()
169 (declare (optimize (debug 2)))
170 (with-open-file (s "../tools-for-build/EastAsianWidth.txt"
171 :external-format :ascii)
172 (with-test (:name (:east-asian-width))
173 (loop for line = (read-line s nil nil)
174 while line
175 unless (or (string= "" line) (eql 0 (position #\# line)))
176 do (test-property-line #'east-asian-width line)))))
178 (test-east-asian-width)
180 (defun test-script ()
181 (declare (optimize (debug 2)))
182 (with-open-file (s "../tools-for-build/Scripts.txt"
183 :external-format :ascii)
184 (with-test (:name (:script))
185 (loop for line = (read-line s nil nil)
186 while line
187 unless (or (string= "" line) (eql 0 (position #\# line)))
188 do (test-property-line #'script (substitute #\- #\_ line))))))
190 (test-script)
192 (defun test-block ()
193 (declare (optimize (debug 2)))
194 (with-open-file (s "../tools-for-build/Blocks.txt"
195 :external-format :ascii)
196 (with-test (:name (:block))
197 (loop for line = (read-line s nil nil)
198 while line
199 unless (or (string= "" line) (eql 0 (position #\# line)))
200 do (test-property-line
201 #'char-block
202 (replace
203 (substitute #\- #\Space line)
204 "; "
205 :start1 (position #\; line)))))))
207 (test-block)
209 (defun test-proplist ()
210 (declare (optimize (debug 2)))
211 (with-open-file (s "../tools-for-build/PropList.txt"
212 :external-format :ascii)
213 (with-test (:name (:proplist))
214 (loop for line = (read-line s nil nil)
215 while line
216 unless (or (string= "" line) (eql 0 (position #\# line)))
218 (destructuring-bind (%codepoints value) (split-string line #\;)
219 (let* ((codepoints (codepoint-or-range %codepoints))
220 (property
221 (intern (string-upcase
222 (substitute
223 #\- #\_
224 (subseq (remove #\Space value) 0
225 (position #\# (remove #\Space value)))))
226 "KEYWORD")))
227 (loop for i in codepoints do
228 (unless (proplist-p (code-char i) property)
229 (error "Character ~S should be ~S, but isn't."
230 (code-char i) property)))))))))
232 (test-proplist)
234 (defun test-bidi-mirroring-glyph ()
235 (declare (optimize (debug 2)))
236 (with-open-file (s "../tools-for-build/BidiMirroring.txt"
237 :external-format :ascii)
238 (with-test (:name (:bidi-mirroring-glyph)
239 :skipped-on '(not :sb-unicode))
240 (loop for line = (read-line s nil nil)
241 while line
242 unless (or (string= "" line) (eql 0 (position #\# line)))
244 (let* ((codepoints
245 (split-string (subseq line 0 (position #\# line)) #\;))
246 (chars
247 (mapcar
248 #'(lambda (s) (code-char (parse-integer s :radix 16)))
249 codepoints)))
250 (unless (char= (bidi-mirroring-glyph (first chars)) (second chars))
251 (error "The mirroring glyph of ~S is not ~S, but ~S"
252 (first chars) (second chars)
253 (bidi-mirroring-glyph (first chars)))))))))
255 (test-bidi-mirroring-glyph)
257 (defun test-age ()
258 (declare (optimize (debug 2)))
259 (with-open-file (s "../tools-for-build/DerivedAge.txt"
260 :external-format :ascii)
261 (with-test (:name (:age))
262 (loop for line = (read-line s nil nil)
263 while line
264 unless (or (string= "" line) (eql 0 (position #\# line)))
266 (destructuring-bind (%codepoints %age)
267 (split-string (subseq line 0 (position #\# line)) #\;)
268 (let* ((range (codepoint-or-range %codepoints))
269 (expected (mapcar #'parse-integer (split-string %age #\.))))
270 (loop for i in range
271 for char = (code-char i)
273 (unless (equalp
274 expected
275 (multiple-value-list (age char)))
276 (error "Character ~S should have age ~S, but has ~S instead."
277 char expected (multiple-value-list (age char)))))))))))
279 (test-age)
281 (defun test-grapheme-break-class ()
282 (declare (optimize (debug 2)))
283 (with-open-file (s "data/GraphemeBreakProperty.txt"
284 :external-format :ascii)
285 (with-test (:name (:grapheme-break-class))
286 (loop for line = (read-line s nil nil)
287 while line
288 unless (or (string= "" line) (eql 0 (position #\# line)))
289 do (test-property-line #'grapheme-break-class
290 (replace-all "SpacingMark" "SPACING-MARK"
291 (substitute #\- #\_ line)))))))
293 (test-grapheme-break-class)
295 (defun test-word-break-class ()
296 (declare (optimize (debug 2)))
297 (with-open-file (s "data/WordBreakProperty.txt"
298 :external-format :ascii)
299 (with-test (:name (:word-break-class))
300 (loop for line = (read-line s nil nil)
301 while line
302 unless (or (string= "" line) (eql 0 (position #\# line)))
303 do (test-property-line #'word-break-class
304 (substitute #\- #\_ line))))))
306 (test-word-break-class)
308 (defun test-sentence-break-class ()
309 (declare (optimize (debug 2)))
310 (with-open-file (s "data/SentenceBreakProperty.txt"
311 :external-format :ascii)
312 (with-test (:name (:sentence-break-class))
313 (loop for line = (read-line s nil nil)
314 while line
315 unless (or (string= "" line) (eql 0 (position #\# line)))
316 do (test-property-line #'sentence-break-class line)))))
318 (test-sentence-break-class)
320 (defun test-line-break-class ()
321 (declare (optimize (debug 2)))
322 (with-open-file (s "../tools-for-build/LineBreakProperty.txt"
323 :external-format :ascii)
324 (with-test (:name (:line-break-class))
325 (loop for line = (read-line s nil nil)
326 while line
327 unless (or (string= "" line) (eql 0 (position #\# line)))
328 do (test-property-line #'line-break-class line)))))
330 (test-line-break-class)