first cut at testing unicode normalization
[sbcl.git] / tests / unicode-normalization.impure.lisp
blob06d555f9096aa1d235f3590321051fa20b4e06b3
1 (import 'sb-impl::normalize-string)
3 (defun parse-one-line (line)
4 (do* ((i 0 (1+ i))
5 (start 0 (1+ end))
6 (end (position #\; line :start start) (position #\; line :start start))
7 result)
8 ((= i 5) (nreverse result))
9 (with-input-from-string (s (subseq line start (1+ end)))
10 (let ((*read-base* 16.))
11 (push (map 'string 'code-char (read-delimited-list #\; s)) result)))))
13 (defmacro assert-all-string= (base &body others)
14 `(progn
15 ,@(loop for test in others
16 collect `(assert (string= ,base ,test)))))
18 (defun test-line (c1 c2 c3 c4 c5)
19 ;; NFC
20 #+nil
21 (assert-all-string= c2
22 (normalize-string c1 :nfc)
23 (normalize-string c2 :nfc)
24 (normalize-string c3 :nfc))
25 #+nil
26 (assert-all-string= c4
27 (normalize-string c4 :nfc)
28 (normalize-string c5 :nfc))
30 ;; NFD
31 (assert-all-string= c3
32 (normalize-string c1 :nfd)
33 (normalize-string c2 :nfd)
34 (normalize-string c3 :nfd))
35 (assert-all-string= c5
36 (normalize-string c4 :nfd)
37 (normalize-string c5 :nfd))
39 ;; NFKC
40 #+nil
41 (assert-all-string= c4
42 (normalize-string c1 :nfkc)
43 (normalize-string c2 :nfkc)
44 (normalize-string c3 :nfkc)
45 (normalize-string c4 :nfkc)
46 (normalize-string c5 :nfkc))
48 ;; NFKD
49 (assert-all-string= c5
50 (normalize-string c1 :nfkd)
51 (normalize-string c2 :nfkd)
52 (normalize-string c3 :nfkd)
53 (normalize-string c4 :nfkd)
54 (normalize-string c5 :nfkd)))
56 (defun test-no-normalization (string)
57 (assert-all-string= string
58 #+nil
59 (normalize-string string :nfc)
60 (normalize-string string :nfd)
61 #+nil
62 (normalize-string string :nfkc)
63 (normalize-string string :nfkd)))
65 (defun test-normalization ()
66 (declare (optimize (debug 2)))
67 (with-open-file (s "data/NormalizationTest.txt" :external-format :latin1)
68 (do ((line (read-line s) (read-line s)))
69 ((char/= #\# (char line 0))
70 (assert (string= "@Part0" line :end2 6))
71 (assert (char= #\# (char (read-line s) 0)))))
72 ;; Part0: specific cases
73 (do ((line (read-line s) (read-line s)))
74 ((char= #\# (char line 0))
75 (assert (string= "@Part1" (read-line s) :end2 6))
76 (assert (char= #\# (char (read-line s) 0)))
77 (assert (char= #\# (char (read-line s) 0))))
78 (destructuring-bind (c1 c2 c3 c4 c5)
79 (parse-one-line line)
80 (write-line line)
81 (test-line c1 c2 c3 c4 c5)))
82 ;; Part1: single characters. (Extra work to check for conformance
83 ;; on unlisted entries)
84 (do ((line (read-line s) (read-line s))
85 (code 0))
86 ((char= #\# (char line 0))
87 (do ((code code (1+ code)))
88 ((= code #x110000))
89 (test-no-normalization (string (code-char code)))))
90 (destructuring-bind (c1 c2 c3 c4 c5)
91 (parse-one-line line)
92 (do ((c code (1+ c)))
93 ((= c (char-code (char c1 0)))
94 (test-line c1 c2 c3 c4 c5)
95 (setf code (1+ c)))
96 (test-no-normalization (string (code-char code))))))))