Make stuff regarding debug names much less complex.
[sbcl.git] / tests / unicode-properties.pure.lisp
blobbe7c7b8089a465ef7637d43b44e6e94783ab18a8
1 ;;;; This software is part of the SBCL system. See the README file for
2 ;;;; more information.
3 ;;;;
4 ;;;; While most of SBCL is derived from the CMU CL system, the test
5 ;;;; files (like this one) were written from scratch after the fork
6 ;;;; from CMU CL.
7 ;;;;
8 ;;;; This software is in the public domain and is provided with
9 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
10 ;;;; more information.
12 (use-package :sb-unicode)
14 ;; Taken straight out of the common lisp cookbook
15 (defun replace-all (part replacement string &key (test #'char=))
16 "Returns a new string in which all the occurences of the part
17 is replaced with replacement."
18 (with-output-to-string (out)
19 (loop with part-length = (length part)
20 for old-pos = 0 then (+ pos part-length)
21 for pos = (search part string
22 :start2 old-pos
23 :test test)
24 do (write-string string out
25 :start old-pos
26 :end (or pos (length string)))
27 when pos do (write-string replacement out)
28 while pos)))
30 (defun test-line (line)
31 (destructuring-bind (%cp %name %gc ccc %bidi decomp-map
32 %decimal-digit %digit %numeric
33 %bidi-mirrored %old-name old-comment
34 simple-up simple-down simple-title)
35 (split-string line #\;)
36 (declare (ignore decomp-map old-comment simple-up
37 simple-down simple-title))
38 (let* ((cp (parse-integer %cp :radix 16))
39 (char #+sb-unicode (code-char cp)
40 #-sb-unicode
41 (if (< cp 256)
42 (code-char cp)
43 (return-from test-line t)))
44 (gc (intern (string-upcase %gc) "KEYWORD"))
45 (bidi (intern (string-upcase %bidi) "KEYWORD"))
46 ;; See `normalize-character-name` in ucd.lisp for a discussion
47 ;; of U+1F5CF (PAGE) and the attendant standards-compliance issues2
48 (name (unless (or (position #\< %name) (= cp #x1F5CF))
49 (substitute #\_ #\Space %name)))
50 (old-name (unless (string= %old-name "")
51 (substitute #\_ #\Space %old-name)))
52 (char-from-name (name-char name))
53 (char-from-old-name
54 ;; Exclude all the renaming conflicts, including the mass-rename of Georgian
55 (when (and old-name
56 (not
57 (member old-name
58 '("BELL"
59 "LATIN_CAPITAL_LETTER_YOGH"
60 "LATIN_SMALL_LETTER_YOGH"
61 "CYRILLIC_CAPITAL_LETTER_E"
62 "CYRILLIC_CAPITAL_LETTER_I"
63 "CYRILLIC_SMALL_LETTER_E"
64 "CYRILLIC_SMALL_LETTER_I"
65 "DOUBLE_VERTICAL_BAR"
66 "HANGUL_LETTER_CIEUC"
67 "HANGUL_LETTER_KIYEOK"
68 "HANGUL_LETTER_PIEUP"
69 "PARENTHESIZED_HANGUL_CIEUC"
70 "PARENTHESIZED_HANGUL_KIYEOK"
71 "PARENTHESIZED_HANGUL_PIEUP"
72 "CIRCLED_HANGUL_CIEUC"
73 "CIRCLED_HANGUL_KIYEOK"
74 "CIRCLED_HANGUL_PIEUP"
75 "HALFWIDTH_HANGUL_LETTER_CIEUC"
76 "HALFWIDTH_HANGUL_LETTER_KIYEOK"
77 "HALFWIDTH_HANGUL_LETTER_PIEUP"
78 "SQUARED_MV")
79 :test #'string=))
80 (or (< (length old-name) 14)
81 (string/= old-name "GEORGIAN_SMALL" :end1 14)))
82 (name-char old-name)))
83 (decimal-digit (parse-integer %decimal-digit :junk-allowed t))
84 (digit (parse-integer %digit :junk-allowed t))
85 (numeric (if (string= %numeric "") nil (read-from-string %numeric)))
86 (bidi-mirrored (string= %bidi-mirrored "Y")))
87 (when char-from-name
88 (assert (char= char char-from-name)))
89 (when char-from-old-name
90 (assert (char= char char-from-old-name)))
91 (assert (eql gc (general-category char)))
92 (assert (= (parse-integer ccc) (combining-class char)))
93 (assert (eql bidi (bidi-class char)))
94 (assert (eql decimal-digit (decimal-value char)))
95 (assert (eql digit (digit-value char)))
96 (assert (eql numeric (numeric-value char)))
97 (assert (eql bidi-mirrored (mirrored-p char)))
98 (assert (string= old-name (unicode-1-name char))))))
100 (defun test-property-reads ()
101 (declare (optimize (debug 2)))
102 (with-open-file (s (merge-pathnames
103 (make-pathname
104 :directory '(:relative :up "tools-for-build")
105 :name "UnicodeData" :type "txt")
106 (or *load-pathname* *compile-file-pathname*)))
107 (with-test (:name (:unicode-properties))
108 (loop for line = (read-line s nil nil)
109 while line
110 do (test-line line)))))
112 (test-property-reads)
114 (defun codepoint-or-range (string)
115 (remove-if-not
116 #'(lambda (i)
117 #+sb-unicode i
118 #-sb-unicode (< i 256))
119 (flet ((parse (str) (parse-integer str :radix 16 :junk-allowed t)))
120 (let ((parts (remove "" (split-string string #\.) :test #'string=)))
121 (if (cdr parts)
122 (loop for i from (parse (car parts)) to (parse (cadr parts)) collect i)
123 (mapcar #'parse parts))))))
125 (defun line-codepoints (line)
126 (codepoint-or-range (car (split-string line #\;))))
128 (defun test-property-line (fn line)
129 (destructuring-bind (%codepoints value) (split-string line #\;)
130 (let* ((codepoints (codepoint-or-range %codepoints))
131 (property (remove #\Space value))
132 (expected (intern
133 (string-upcase
134 (subseq property 0 (position #\# property)))
135 "KEYWORD")))
136 (loop for i in codepoints
137 unless (eql expected (funcall fn (code-char i)))
138 do (error "Character ~S has the wrong value for the tested property. Wanted ~S, got ~S."
139 (code-char i) expected (funcall fn (code-char i)))))))
141 (defun test-unallocated-bidi-class (code class missing)
142 (loop for ((start . end) . expected) in missing
143 if (and (<= start code) (<= code end))
144 do (assert (eql class expected))
145 and do (return)))
147 (defun test-bidi-class ()
148 (declare (optimize (debug 2)))
149 (with-open-file (s "data/DerivedBidiClass.txt" :external-format :utf-8)
150 (let ((missing-prefix "# @missing: ")
151 (bidi-class-table
152 '(("Left_To_Right" . :L)
153 ("Right_To_Left" . :R)
154 ("Arabic_Letter" . :AL)
155 ("European_Terminator" . :ET)))
156 (tested (make-array #x110000 :initial-element 0 :element-type 'bit))
157 missing)
158 (with-test (:name (:bidi-class :assigned))
159 (loop for line = (read-line s nil nil)
160 while line
161 unless (or (string= "" line) (eql 0 (position #\# line)))
162 do (test-property-line #'bidi-class line)
163 and do (loop for code in (line-codepoints line) do (setf (aref tested code) 1))
164 when (eql (mismatch missing-prefix line) (length missing-prefix))
165 do (let* ((semipos (position #\; line))
166 (rangetext (subseq line (length missing-prefix) semipos))
167 (first-dot (position #\. rangetext))
168 (last-dot (position #\. rangetext :from-end t))
169 (start (parse-integer rangetext :end first-dot :radix 16))
170 (end (parse-integer rangetext :start (1+ last-dot) :radix 16))
171 (range (cons start end))
172 (class-name (subseq line (+ semipos 2)))
173 (class (cdr (assoc class-name bidi-class-table :test 'string=))))
174 (push (cons range class) missing))))
175 (with-test (:name (:bidi-class :missing-data))
176 ;; Consistency check: the @missing lines in the data file have
177 ;; "Left_To_Right" first and overrides following it, and we've
178 ;; pushed in reverse order; test (:BIDI-CLASS :MISSING)
179 ;; depends on this order.
180 (let ((last (car (last missing))))
181 (assert (equal last '((0 . #x10FFFF) . :L)))))
182 (with-test (:name (:bidi-class :missing))
183 (loop for code from 0 to (min (1- char-code-limit) #x10FFFF)
184 for char = (code-char code)
185 if (= (aref tested code) 0)
186 do (test-unallocated-bidi-class code (bidi-class char) missing))))))
188 (test-bidi-class)
190 (defun test-hangul-syllable-type ()
191 (declare (optimize (debug 2)))
192 (with-open-file (s "data/HangulSyllableType.txt" :external-format :utf-8)
193 (with-test (:name (:hangul-syllable-type))
194 (loop for line = (read-line s nil nil)
195 while line
196 unless (or (string= "" line) (eql 0 (position #\# line)))
197 do (test-property-line #'hangul-syllable-type line)))))
199 (test-hangul-syllable-type)
201 (defun test-east-asian-width ()
202 (declare (optimize (debug 2)))
203 (with-open-file (s "../tools-for-build/EastAsianWidth.txt"
204 :external-format :utf-8)
205 (with-test (:name (:east-asian-width))
206 (loop for line = (read-line s nil nil)
207 while line
208 unless (or (string= "" line) (eql 0 (position #\# line)))
209 do (test-property-line #'east-asian-width line)))))
211 (test-east-asian-width)
213 (defun test-script ()
214 (declare (optimize (debug 2)))
215 (with-open-file (s "../tools-for-build/Scripts.txt"
216 :external-format :utf-8)
217 (with-test (:name (:script))
218 (loop for line = (read-line s nil nil)
219 while line
220 unless (or (string= "" line) (eql 0 (position #\# line)))
221 do (test-property-line #'script (substitute #\- #\_ line))))))
223 (test-script)
225 (defun test-block ()
226 (declare (optimize (debug 2)))
227 (with-open-file (s "../tools-for-build/Blocks.txt"
228 :external-format :utf-8)
229 (with-test (:name (:block))
230 (loop for line = (read-line s nil nil)
231 while line
232 unless (or (string= "" line) (eql 0 (position #\# line)))
233 do (test-property-line
234 #'char-block
235 (replace
236 (substitute #\- #\Space line)
237 "; "
238 :start1 (position #\; line)))))))
240 (test-block)
242 (defun test-proplist ()
243 (declare (optimize (debug 2)))
244 (with-open-file (s "../tools-for-build/PropList.txt"
245 :external-format :utf-8)
246 (with-test (:name (:proplist))
247 (loop for line = (read-line s nil nil)
248 while line
249 unless (or (string= "" line) (eql 0 (position #\# line)))
251 (destructuring-bind (%codepoints value) (split-string line #\;)
252 (let* ((codepoints (codepoint-or-range %codepoints))
253 (property
254 (intern (string-upcase
255 (substitute
256 #\- #\_
257 (subseq (remove #\Space value) 0
258 (position #\# (remove #\Space value)))))
259 "KEYWORD")))
260 (loop for i in codepoints do
261 (unless (proplist-p (code-char i) property)
262 (error "Character ~S should be ~S, but isn't."
263 (code-char i) property)))))))))
265 (test-proplist)
267 (defun test-bidi-mirroring-glyph ()
268 (declare (optimize (debug 2)))
269 (with-open-file (s "../tools-for-build/BidiMirroring.txt"
270 :external-format :utf-8)
271 (with-test (:name (:bidi-mirroring-glyph)
272 :skipped-on (not :sb-unicode))
273 (loop for line = (read-line s nil nil)
274 while line
275 unless (or (string= "" line) (eql 0 (position #\# line)))
277 (let* ((codepoints
278 (split-string (subseq line 0 (position #\# line)) #\;))
279 (chars
280 (mapcar
281 #'(lambda (s) (code-char (parse-integer s :radix 16)))
282 codepoints)))
283 (unless (char= (bidi-mirroring-glyph (first chars)) (second chars))
284 (error "The mirroring glyph of ~S is not ~S, but ~S"
285 (first chars) (second chars)
286 (bidi-mirroring-glyph (first chars)))))))))
288 (test-bidi-mirroring-glyph)
290 (defun test-age ()
291 (declare (optimize (debug 2)))
292 (with-open-file (s "../tools-for-build/DerivedAge.txt"
293 :external-format :utf-8)
294 (with-test (:name (:age))
295 (loop for line = (read-line s nil nil)
296 while line
297 unless (or (string= "" line) (eql 0 (position #\# line)))
299 (destructuring-bind (%codepoints %age)
300 (split-string (subseq line 0 (position #\# line)) #\;)
301 (let* ((range (codepoint-or-range %codepoints))
302 (expected (mapcar #'parse-integer (split-string %age #\.))))
303 (loop for i in range
304 for char = (code-char i)
306 (unless (equalp
307 expected
308 (multiple-value-list (age char)))
309 (error "Character ~S should have age ~S, but has ~S instead."
310 char expected (multiple-value-list (age char)))))))))))
312 (test-age)
314 (defun test-grapheme-break-class ()
315 (declare (optimize (debug 2)))
316 (with-open-file (s "data/GraphemeBreakProperty.txt"
317 :external-format :utf-8)
318 (with-test (:name (:grapheme-break-class))
319 (loop for line = (read-line s nil nil)
320 while line
321 unless (or (string= "" line) (eql 0 (position #\# line)))
322 do (test-property-line #'grapheme-break-class
323 (replace-all "SpacingMark" "SPACING-MARK"
324 (substitute #\- #\_ line)))))))
326 (test-grapheme-break-class)
328 (defun test-word-break-class ()
329 (declare (optimize (debug 2)))
330 (with-open-file (s "data/WordBreakProperty.txt"
331 :external-format :utf-8)
332 (with-test (:name (:word-break-class))
333 (loop for line = (read-line s nil nil)
334 while line
335 unless (or (string= "" line) (eql 0 (position #\# line)))
336 do (test-property-line #'word-break-class
337 (substitute #\- #\_ line))))))
339 (test-word-break-class)
341 (defun test-sentence-break-class ()
342 (declare (optimize (debug 2)))
343 (with-open-file (s "data/SentenceBreakProperty.txt"
344 :external-format :utf-8)
345 (with-test (:name (:sentence-break-class))
346 (loop for line = (read-line s nil nil)
347 while line
348 unless (or (string= "" line) (eql 0 (position #\# line)))
349 do (test-property-line #'sentence-break-class line)))))
351 (test-sentence-break-class)
353 (defun test-line-break-class ()
354 (declare (optimize (debug 2)))
355 (with-open-file (s "../tools-for-build/LineBreak.txt"
356 :external-format :utf-8)
357 (with-test (:name (:line-break-class))
358 (loop for line = (read-line s nil nil)
359 while line
360 unless (or (string= "" line) (eql 0 (position #\# line)))
361 do (test-property-line #'line-break-class line)))))
363 (test-line-break-class)