grovel-headers.c: Put win32 stuff into grovel-headers-win32.h.
[sbcl.git] / tests / unicode-normalization.impure.lisp
blob898b704da00bd9df76f8e7f38240a312cef1543f
1 ;;;; This software is part of the SBCL system. See the README file for
2 ;;;; more information.
3 ;;;;
4 ;;;; While most of SBCL is derived from the CMU CL system, the test
5 ;;;; files (like this one) were written from scratch after the fork
6 ;;;; from CMU CL.
7 ;;;;
8 ;;;; This software is in the public domain and is provided with
9 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
10 ;;;; more information.
12 (use-package :sb-unicode)
14 (defun parse-one-line (line)
15 (do* ((i 0 (1+ i))
16 (start 0 (1+ end))
17 (end (position #\; line :start start) (position #\; line :start start))
18 result)
19 ((= i 5) (nreverse result))
20 (with-input-from-string (s (subseq line start (1+ end)))
21 (let ((*read-base* 16.))
22 (push (map 'string 'code-char (read-delimited-list #\; s)) result)))))
24 (defmacro assert-all-string= (base &body others)
25 `(progn
26 ,@(loop for test in others
27 collect `(assert (string= ,base ,test)))))
29 (defun test-line (c1 c2 c3 c4 c5)
30 ;; NFC
31 (assert-all-string= c2
32 (normalize-string c1 :nfc)
33 (normalize-string c2 :nfc)
34 (normalize-string c3 :nfc))
35 (assert-all-string= c4
36 (normalize-string c4 :nfc)
37 (normalize-string c5 :nfc))
39 ;; NFD
40 (assert-all-string= c3
41 (normalize-string c1 :nfd)
42 (normalize-string c2 :nfd)
43 (normalize-string c3 :nfd))
44 (assert-all-string= c5
45 (normalize-string c4 :nfd)
46 (normalize-string c5 :nfd))
48 ;; NFKC
49 (assert-all-string= c4
50 (normalize-string c1 :nfkc)
51 (normalize-string c2 :nfkc)
52 (normalize-string c3 :nfkc)
53 (normalize-string c4 :nfkc)
54 (normalize-string c5 :nfkc))
56 ;; NFKD
57 (assert-all-string= c5
58 (normalize-string c1 :nfkd)
59 (normalize-string c2 :nfkd)
60 (normalize-string c3 :nfkd)
61 (normalize-string c4 :nfkd)
62 (normalize-string c5 :nfkd)))
64 (defun test-no-normalization (string)
65 (assert-all-string= string
66 (normalize-string string :nfc)
67 (normalize-string string :nfd)
68 (normalize-string string :nfkc)
69 (normalize-string string :nfkd)))
71 (defun test-normalization ()
72 (declare (optimize (debug 2)))
73 (with-open-file (s "data/NormalizationTest.txt" :external-format :latin1)
74 (do ((line (read-line s) (read-line s)))
75 ((char/= #\# (char line 0))
76 (assert (string= "@Part0" line :end2 6))
77 (assert (char= #\# (char (read-line s) 0)))))
78 ;; Part0: specific cases
79 (with-test (:name (:unicode-normalization :part0)
80 :skipped-on '(not :sb-unicode))
81 (do ((line (read-line s) (read-line s)))
82 ((char= #\# (char line 0))
83 (assert (string= "@Part1" (read-line s) :end2 6))
84 (assert (char= #\# (char (read-line s) 0)))
85 (assert (char= #\# (char (read-line s) 0))))
86 (destructuring-bind (c1 c2 c3 c4 c5)
87 (parse-one-line line)
88 (test-line c1 c2 c3 c4 c5))))
89 ;; Part1: single characters. (Extra work to check for conformance
90 ;; on unlisted entries)
91 (with-test (:name (:unicode-normalization :part1)
92 :skipped-on '(not :sb-unicode))
93 (do ((line (read-line s) (read-line s))
94 (code 0))
95 ((char= #\# (char line 0))
96 (do ((code code (1+ code)))
97 ((= code #x110000))
98 (test-no-normalization (string (code-char code))))
99 (assert (string= "@Part2" (read-line s) :end2 6))
100 (assert (char= #\# (char (read-line s) 0))))
101 (destructuring-bind (c1 c2 c3 c4 c5)
102 (parse-one-line line)
103 (do ((c code (1+ c)))
104 ((= c (char-code (char c1 0)))
105 (test-line c1 c2 c3 c4 c5)
106 (setf code (1+ c)))
107 (test-no-normalization (string (code-char code)))))))
108 ;; Part2: Canonical Order Test
109 (with-test (:name (:unicode-normalization :part2)
110 :skipped-on '(not :sb-unicode))
111 (do ((line (read-line s) (read-line s)))
112 ((char= #\# (char line 0))
113 (assert (string= "@Part3" (read-line s) :end2 6))
114 (assert (char= #\# (char (read-line s) 0))))
115 (destructuring-bind (c1 c2 c3 c4 c5)
116 (parse-one-line line)
117 (test-line c1 c2 c3 c4 c5))))
118 ;; Part3: PRI #29 Test
119 (with-test (:name (:unicode-normalization :part3)
120 :skipped-on '(not :sb-unicode))
121 (do ((line (read-line s) (read-line s)))
122 ((char= #\# (char line 0))
123 (assert (char= #\# (char (read-line s) 0)))
124 (assert (null (read-line s nil nil))))
125 (destructuring-bind (c1 c2 c3 c4 c5)
126 (parse-one-line line)
127 (test-line c1 c2 c3 c4 c5))))))
129 (test-normalization)