Add tests for heapsort
[sbcl.git] / tests / unicode-breaking.impure.lisp
blob728cd58e37b77012315bae5089255e88e92aea9d
1 ;;;; This software is part of the SBCL system. See the README file for
2 ;;;; more information.
3 ;;;;
4 ;;;; While most of SBCL is derived from the CMU CL system, the test
5 ;;;; files (like this one) were written from scratch after the fork
6 ;;;; from CMU CL.
7 ;;;;
8 ;;;; This software is in the public domain and is provided with
9 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
10 ;;;; more information.
12 (use-package :sb-unicode)
14 (defconstant +mul+ #+sb-unicode (code-char 215) #-sb-unicode #\*)
15 (defconstant +div+ #+sb-unicode (code-char 247) #-sb-unicode #\/)
17 (defun line-to-clusters (line)
18 (let ((codepoints
19 (remove "" (split-string (substitute #\Space +div+ line) #\Space)
20 :test #'string=))
21 clusters cluster (nobreak t))
22 (loop for i in codepoints do
23 (if (string= i (string +mul+)) (setf nobreak t)
24 (progn
25 (unless nobreak
26 (push (nreverse cluster) clusters)
27 (setf cluster nil))
28 (push (code-char (parse-integer i :radix 16)) cluster)
29 (setf nobreak nil))))
30 (when cluster (push (nreverse cluster) clusters))
31 (setf clusters (nreverse (mapcar #'(lambda (x) (coerce x 'string)) clusters)))
32 clusters))
34 (defun parse-codepoints (string &key (singleton-list t))
35 (let ((list (mapcar
36 (lambda (s) (parse-integer s :radix 16))
37 (remove "" (split-string string #\Space) :test #'string=))))
38 (if (not (or (cdr list) singleton-list)) (car list) list)))
40 (defun test-line (fn line)
41 (let ((relevant-portion (subseq line 0 (position #\# line))))
42 (when (string/= relevant-portion "")
43 (let ((string
44 (coerce (mapcar
45 #'code-char
46 (parse-codepoints
47 (remove +mul+ (remove +div+ relevant-portion))))
48 'string)))
49 (assert (equalp (funcall fn string)
50 (line-to-clusters relevant-portion)))))))
52 (defun test-graphemes ()
53 (declare (optimize (debug 2)))
54 (with-test (:name (:grapheme-breaking)
55 :skipped-on '(not :sb-unicode))
56 (with-open-file (s "data/GraphemeBreakTest.txt" :external-format :utf8)
57 (loop for line = (read-line s nil nil)
58 while line
59 do (test-line #'graphemes (remove #\Tab line))))))
61 (test-graphemes)
63 (defun test-words ()
64 (declare (optimize (debug 2)))
65 (with-test (:name (:word-breaking)
66 :skipped-on '(not :sb-unicode))
67 (with-open-file (s "data/WordBreakTest.txt" :external-format :utf8)
68 (loop for line = (read-line s nil nil)
69 while line
70 do (test-line #'words (remove #\Tab line))))))
72 (test-words)
74 (defun test-sentences ()
75 (declare (optimize (debug 2)))
76 (with-test (:name (:sentence-breaking)
77 :skipped-on '(not :sb-unicode))
78 (with-open-file (s "data/SentenceBreakTest.txt" :external-format :utf8)
79 (loop for line = (read-line s nil nil)
80 while line
81 do (test-line #'sentences (remove #\Tab line))))))
83 (test-sentences)
85 (defun process-line-break-line (line)
86 (let ((elements (split-string line #\Space)))
87 (mapcar #'(lambda (e)
88 (cond
89 ((eql (char e 0) +mul+) :cant)
90 ((eql (char e 0) +div+) :can)
91 (t (code-char (parse-integer e :radix 16)))))
92 elements)))
94 (defun string-from-line-break-line (string)
95 (coerce
96 (mapcar
97 #'(lambda (s) (code-char (parse-integer s :radix 16)))
98 (remove
100 (split-string (remove +mul+ (remove +div+ string)) #\Space)
101 :test #'string=)) 'string))
103 (defun test-line-breaking ()
104 (declare (optimize (debug 2)))
105 (with-test (:name (:line-breaking)
106 :skipped-on '(not :sb-unicode))
107 (with-open-file (s "data/LineBreakTest.txt" :external-format :utf8)
108 (loop for line = (read-line s nil nil)
109 while line
110 for string = (subseq line 0 (max 0 (1- (or (position #\# line) 1))))
111 unless (string= string "")
113 (assert (equal
114 (process-line-break-line string)
115 (substitute
116 :can :must
117 (sb-unicode::line-break-annotate
118 (string-from-line-break-line string)))))))))
120 (test-line-breaking)