Fix problem of retaining !LATE-TYPE-COLD-INIT symbol
[sbcl.git] / tests / unicode-breaking.impure.lisp
blob38c8408f2c5016e7a223503f12aa36fdf591e8e2
1 ;;;; This software is part of the SBCL system. See the README file for
2 ;;;; more information.
3 ;;;;
4 ;;;; While most of SBCL is derived from the CMU CL system, the test
5 ;;;; files (like this one) were written from scratch after the fork
6 ;;;; from CMU CL.
7 ;;;;
8 ;;;; This software is in the public domain and is provided with
9 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
10 ;;;; more information.
12 (use-package :sb-unicode)
14 (defconstant +mul+ #+sb-unicode (code-char 215) #-sb-unicode #\*)
15 (defconstant +div+ #+sb-unicode (code-char 247) #-sb-unicode #\/)
17 (defun split-string (string delimiter)
18 (loop for begin = 0 then (1+ end)
19 for end = (position delimiter string) then (position delimiter string :start begin)
20 collect (subseq string begin end)
21 while end))
23 (defun line-to-clusters (line)
24 (let ((codepoints
25 (remove "" (split-string (substitute #\Space +div+ line) #\Space)
26 :test #'string=))
27 clusters cluster (nobreak t))
28 (loop for i in codepoints do
29 (if (string= i (string +mul+)) (setf nobreak t)
30 (progn
31 (unless nobreak
32 (push (nreverse cluster) clusters)
33 (setf cluster nil))
34 (push (code-char (parse-integer i :radix 16)) cluster)
35 (setf nobreak nil))))
36 (when cluster (push (nreverse cluster) clusters))
37 (setf clusters (nreverse (mapcar #'(lambda (x) (coerce x 'string)) clusters)))
38 clusters))
40 (defun parse-codepoints (string &key (singleton-list t))
41 (let ((list (mapcar
42 (lambda (s) (parse-integer s :radix 16))
43 (remove "" (split-string string #\Space) :test #'string=))))
44 (if (not (or (cdr list) singleton-list)) (car list) list)))
46 (defun test-line (fn line)
47 (let ((relevant-portion (subseq line 0 (position #\# line))))
48 (when (string/= relevant-portion "")
49 (let ((string
50 (coerce (mapcar
51 #'code-char
52 (parse-codepoints
53 (remove +mul+ (remove +div+ relevant-portion))))
54 'string)))
55 (assert (equalp (funcall fn string)
56 (line-to-clusters relevant-portion)))))))
58 (defun test-graphemes ()
59 (declare (optimize (debug 2)))
60 (with-test (:name (:grapheme-breaking)
61 :skipped-on '(not :sb-unicode))
62 (with-open-file (s "data/GraphemeBreakTest.txt" :external-format :utf8)
63 (loop for line = (read-line s nil nil)
64 while line
65 do (test-line #'graphemes (remove #\Tab line))))))
67 (test-graphemes)
69 (defun test-words ()
70 (declare (optimize (debug 2)))
71 (with-test (:name (:word-breaking)
72 :skipped-on '(not :sb-unicode))
73 (with-open-file (s "data/WordBreakTest.txt" :external-format :utf8)
74 (loop for line = (read-line s nil nil)
75 while line
76 do (test-line #'words (remove #\Tab line))))))
78 (test-words)
80 (defun test-sentences ()
81 (declare (optimize (debug 2)))
82 (with-test (:name (:sentence-breaking)
83 :skipped-on '(not :sb-unicode))
84 (with-open-file (s "data/SentenceBreakTest.txt" :external-format :utf8)
85 (loop for line = (read-line s nil nil)
86 while line
87 do (test-line #'sentences (remove #\Tab line))))))
89 (test-sentences)
91 (defun process-line-break-line (line)
92 (let ((elements (split-string line #\Space)))
93 (mapcar #'(lambda (e)
94 (cond
95 ((eql (char e 0) +mul+) :cant)
96 ((eql (char e 0) +div+) :can)
97 (t (code-char (parse-integer e :radix 16)))))
98 elements)))
100 (defun string-from-line-break-line (string)
101 (coerce
102 (mapcar
103 #'(lambda (s) (code-char (parse-integer s :radix 16)))
104 (remove
106 (split-string (remove +mul+ (remove +div+ string)) #\Space)
107 :test #'string=)) 'string))
109 (defun test-line-breaking ()
110 (declare (optimize (debug 2)))
111 (with-test (:name (:line-breaking)
112 :skipped-on '(not :sb-unicode))
113 (with-open-file (s "data/LineBreakTest.txt" :external-format :utf8)
114 (loop for line = (read-line s nil nil)
115 while line
116 for string = (subseq line 0 (max 0 (1- (or (position #\# line) 1))))
117 unless (string= string "")
119 (assert (equal
120 (process-line-break-line string)
121 (substitute
122 :can :must
123 (sb-unicode::line-break-annotate
124 (string-from-line-break-line string)))))))))
126 (test-line-breaking)