prehash-for-perfect-hash: add truly-thes.
[sbcl.git] / tests / utf-16be.pure.lisp
bloba5cf0f2c2bf183c4cbdb5bf98c225654f036edb2
1 ;;;; This file is for testing external-format functionality for
2 ;;;; big-endian UTF-16, using test machinery which does not have side
3 ;;;; effects. Note that the tests here reach into unexported
4 ;;;; functionality, and should not be used as a guide for users.
6 ;;;; This software is part of the SBCL system. See the README file for
7 ;;;; more information.
8 ;;;;
9 ;;;; While most of SBCL is derived from the CMU CL system, the test
10 ;;;; files (like this one) were written from scratch after the fork
11 ;;;; from CMU CL.
12 ;;;;
13 ;;;; This software is in the public domain and is provided with
14 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
15 ;;;; more information.
17 #-sb-unicode (invoke-restart 'run-tests::skip-file)
19 (defvar *test-path* (scratch-file-name))
21 (macrolet ((input-test (inxf expected &environment env)
22 `(progn
23 (with-test (:name (,(macroexpand 'name env) :file ,inxf))
24 (with-open-file (s *test-path* :external-format ',inxf)
25 (handler-bind ((sb-int:character-decoding-error
26 (lambda (c) (use-value "" c))))
27 (let* ((string (make-string 20))
28 (count (read-sequence string s)))
29 (assert (equal (map 'list 'identity (subseq string 0 count)) ,expected))))))
30 (with-test (:name (,(macroexpand 'name env) :octets ,inxf))
31 (handler-bind ((sb-int:character-decoding-error
32 (lambda (c) (use-value "" c))))
33 (let ((octets (coerce bytes '(simple-array (unsigned-byte 8) 1))))
34 (assert (equal (sb-ext:octets-to-string octets :external-format ',inxf)
35 (coerce ,expected 'string))))))))
36 (with-input-bytes ((id bytes) &body body)
37 `(let ((bytes ,bytes))
38 (with-open-file (s *test-path* :element-type '(unsigned-byte 8)
39 :direction :output :if-exists :supersede)
40 (dolist (byte bytes)
41 (write-byte byte s)))
42 (symbol-macrolet ((name ,id))
43 (macrolet ((test (inxf expected)
44 `(input-test ,inxf ,expected)))
45 ,@body)))))
46 (with-input-bytes ((:input :invalid) (list #x00 #x35 #xff #xff #x00 #x37))
47 (test :utf-16be '(#\5 #\7))
48 (test (:utf-16be :replacement #\?) '(#\5 #\? #\7)))
49 (with-input-bytes ((:input :multiple-invalid) (list #x00 #x35 #xff #xff #xff #xff #x00 #x37))
50 (test :utf-16be '(#\5 #\7))
51 (test (:utf-16be :replacement #\?) '(#\5 #\? #\? #\7)))
52 (with-input-bytes ((:input :invalid-units) (list #x00 #x35 #x00))
53 (test :utf-16be '(#\5))
54 (test (:utf-16be :replacement #\?) '(#\5 #\?)))
55 (with-input-bytes ((:input :invalid-then-invalid-units) (list #xff #xff #x00))
56 (test :utf-16be '())
57 (test (:utf-16be :replacement #\?) '(#\? #\?))))
59 (macrolet ((output-test (chars outxf expected &environment env)
60 `(progn
61 (with-test (:name (,(macroexpand 'name env) file-string-length ,outxf))
62 (let ((string (coerce ,chars 'string)))
63 (with-open-file (s *test-path* :element-type 'character
64 :external-format ',outxf
65 :direction :output :if-exists :supersede)
66 (handler-bind ((sb-int:character-encoding-error
67 (lambda (c) (use-value "" c))))
68 (let ((pos (file-position s))
69 (len (file-string-length s string)))
70 (let ((actual
71 (loop for index from 0 below (length string)
72 for char = (char string index)
73 for thislen = (file-string-length s char)
74 for thisstringlen = (file-string-length s (subseq string index))
75 if (null thisstringlen) do (assert (some 'null (subseq ,expected index))) else do (assert (notany 'null (subseq ,expected index)))
76 collect thislen
77 if (and (null len) thisstringlen) do (setf len (+ pos thisstringlen))
78 if thisstringlen do (assert (= (+ pos thisstringlen) len))
79 do (write-char char s)
80 if thislen do (assert (= (+ pos thislen) (file-position s)))
81 do (setf pos (file-position s)))))
82 (assert (equal actual ,expected))))))))))
83 (with-output-characters ((id chars) &body body)
84 `(let ((chars ,chars))
85 (symbol-macrolet ((name ,id))
86 (macrolet ((test (outxf expected)
87 `(output-test chars ,outxf ,expected)))
88 ,@body)))))
89 (with-output-characters ((:output :lf) (list #\5 #\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Linefeed #\7))
90 (test :utf-16be '(2 2 2 2)))
91 (with-output-characters ((:output :invalid :lf) (list #\5 (code-char #xdb00) (code-char #x12345) #\Linefeed #\7))
92 ;; A sufficiently-smart streams implementation could statically determine the lengths
93 ;; of replacement characters given as part of the external format
94 (test :utf-16be '(2 nil 4 2 2))
95 (test (:utf-16be :replacement #\?) '(2 nil 4 2 2))))