1 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; While most of SBCL is derived from the CMU CL system, the test
5 ;;;; files (like this one) were written from scratch after the fork
8 ;;;; This software is in the public domain and is provided with
9 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
10 ;;;; more information.
12 (use-package :sb-unicode
)
14 (defconstant +mul
+ #+sb-unicode
(code-char 215) #-sb-unicode
#\
*)
15 (defconstant +div
+ #+sb-unicode
(code-char 247) #-sb-unicode
#\
/)
17 (defun split-string (string delimiter
)
18 (loop for begin
= 0 then
(1+ end
)
19 for end
= (position delimiter string
) then
(position delimiter string
:start begin
)
20 collect
(subseq string begin end
)
23 (defun line-to-clusters (line)
25 (remove "" (split-string (substitute #\Space
+div
+ line
) #\Space
)
27 clusters cluster
(nobreak t
))
28 (loop for i in codepoints do
29 (if (string= i
(string +mul
+)) (setf nobreak t
)
32 (push (nreverse cluster
) clusters
)
34 (push (code-char (parse-integer i
:radix
16)) cluster
)
36 (when cluster
(push (nreverse cluster
) clusters
))
37 (setf clusters
(nreverse (mapcar #'(lambda (x) (coerce x
'string
)) clusters
)))
40 (defun parse-codepoints (string &key
(singleton-list t
))
42 (lambda (s) (parse-integer s
:radix
16))
43 (remove "" (split-string string
#\Space
) :test
#'string
=))))
44 (if (not (or (cdr list
) singleton-list
)) (car list
) list
)))
46 (defun test-line (fn line
)
47 (let ((relevant-portion (subseq line
0 (position #\
# line
))))
48 (when (string/= relevant-portion
"")
53 (remove +mul
+ (remove +div
+ relevant-portion
))))
55 (assert (equalp (funcall fn string
)
56 (line-to-clusters relevant-portion
)))))))
58 (defun test-graphemes ()
59 (declare (optimize (debug 2)))
60 (with-test (:name
(:grapheme-breaking
)
61 :skipped-on
'(not :sb-unicode
))
62 (with-open-file (s "data/GraphemeBreakTest.txt" :external-format
:utf8
)
63 (loop for line
= (read-line s nil nil
)
65 do
(test-line #'graphemes
(remove #\Tab line
))))))
70 (declare (optimize (debug 2)))
71 (with-test (:name
(:word-breaking
)
72 :skipped-on
'(not :sb-unicode
))
73 (with-open-file (s "data/WordBreakTest.txt" :external-format
:utf8
)
74 (loop for line
= (read-line s nil nil
)
76 do
(test-line #'words
(remove #\Tab line
))))))
80 (defun test-sentences ()
81 (declare (optimize (debug 2)))
82 (with-test (:name
(:sentence-breaking
)
83 :skipped-on
'(not :sb-unicode
))
84 (with-open-file (s "data/SentenceBreakTest.txt" :external-format
:utf8
)
85 (loop for line
= (read-line s nil nil
)
87 do
(test-line #'sentences
(remove #\Tab line
))))))
91 (defun process-line-break-line (line)
92 (let ((elements (split-string line
#\Space
)))
95 ((eql (char e
0) +mul
+) :cant
)
96 ((eql (char e
0) +div
+) :can
)
97 (t (code-char (parse-integer e
:radix
16)))))
100 (defun string-from-line-break-line (string)
103 #'(lambda (s) (code-char (parse-integer s
:radix
16)))
106 (split-string (remove +mul
+ (remove +div
+ string
)) #\Space
)
107 :test
#'string
=)) 'string
))
109 (defun test-line-breaking ()
110 (declare (optimize (debug 2)))
111 (with-test (:name
(:line-breaking
)
112 :skipped-on
'(not :sb-unicode
))
113 (with-open-file (s "data/LineBreakTest.txt" :external-format
:utf8
)
114 (loop for line
= (read-line s nil nil
)
116 for string
= (subseq line
0 (max 0 (1- (or (position #\
# line
) 1))))
117 unless
(string= string
"")
120 (process-line-break-line string
)
123 (sb-unicode::line-break-annotate
124 (string-from-line-break-line string
)))))))))