1 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; While most of SBCL is derived from the CMU CL system, the test
5 ;;;; files (like this one) were written from scratch after the fork
8 ;;;; This software is in the public domain and is provided with
9 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
10 ;;;; more information.
12 (use-package :sb-unicode
)
14 (defconstant +mul
+ #+sb-unicode
(code-char 215) #-sb-unicode
#\
*)
15 (defconstant +div
+ #+sb-unicode
(code-char 247) #-sb-unicode
#\
/)
17 (defun line-to-clusters (line)
19 (remove "" (split-string (substitute #\Space
+div
+ line
) #\Space
)
21 clusters cluster
(nobreak t
))
22 (loop for i in codepoints do
23 (if (string= i
(string +mul
+)) (setf nobreak t
)
26 (push (nreverse cluster
) clusters
)
28 (push (code-char (parse-integer i
:radix
16)) cluster
)
30 (when cluster
(push (nreverse cluster
) clusters
))
31 (setf clusters
(nreverse (mapcar #'(lambda (x) (coerce x
'string
)) clusters
)))
34 (defun parse-codepoints (string &key
(singleton-list t
))
36 (lambda (s) (parse-integer s
:radix
16))
37 (remove "" (split-string string
#\Space
) :test
#'string
=))))
38 (if (not (or (cdr list
) singleton-list
)) (car list
) list
)))
40 (defun test-line (fn line
)
41 (let ((relevant-portion (subseq line
0 (position #\
# line
))))
42 (when (string/= relevant-portion
"")
47 (remove +mul
+ (remove +div
+ relevant-portion
))))
49 (assert (equalp (funcall fn string
)
50 (line-to-clusters relevant-portion
)))))))
52 (defun test-graphemes ()
53 (declare (optimize (debug 2)))
54 (with-test (:name
(:grapheme-breaking
)
55 :skipped-on
'(not :sb-unicode
))
56 (with-open-file (s "data/GraphemeBreakTest.txt" :external-format
:utf8
)
57 (loop for line
= (read-line s nil nil
)
59 do
(test-line #'graphemes
(remove #\Tab line
))))))
64 (declare (optimize (debug 2)))
65 (with-test (:name
(:word-breaking
)
66 :skipped-on
'(not :sb-unicode
))
67 (with-open-file (s "data/WordBreakTest.txt" :external-format
:utf8
)
68 (loop for line
= (read-line s nil nil
)
70 do
(test-line #'words
(remove #\Tab line
))))))
74 (defun test-sentences ()
75 (declare (optimize (debug 2)))
76 (with-test (:name
(:sentence-breaking
)
77 :skipped-on
'(not :sb-unicode
))
78 (with-open-file (s "data/SentenceBreakTest.txt" :external-format
:utf8
)
79 (loop for line
= (read-line s nil nil
)
81 do
(test-line #'sentences
(remove #\Tab line
))))))
85 (defun process-line-break-line (line)
86 (let ((elements (split-string line
#\Space
)))
89 ((eql (char e
0) +mul
+) :cant
)
90 ((eql (char e
0) +div
+) :can
)
91 (t (code-char (parse-integer e
:radix
16)))))
94 (defun string-from-line-break-line (string)
97 #'(lambda (s) (code-char (parse-integer s
:radix
16)))
100 (split-string (remove +mul
+ (remove +div
+ string
)) #\Space
)
101 :test
#'string
=)) 'string
))
103 (defun test-line-breaking ()
104 (declare (optimize (debug 2)))
105 (with-test (:name
(:line-breaking
)
106 :skipped-on
'(not :sb-unicode
))
107 (with-open-file (s "data/LineBreakTest.txt" :external-format
:utf8
)
108 (loop for line
= (read-line s nil nil
)
110 for string
= (subseq line
0 (max 0 (1- (or (position #\
# line
) 1))))
111 unless
(string= string
"")
114 (process-line-break-line string
)
117 (sb-unicode::line-break-annotate
118 (string-from-line-break-line string
)))))))))