1 ;;; decoder-tests.el --- test for text decoder
3 ;; Copyright (C) 2013 Free Software Foundation, Inc.
5 ;; Author: Kenichi Handa <handa@gnu.org>
7 ;; This file is part of GNU Emacs.
9 ;; GNU Emacs is free software: you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation, either version 3 of the License, or
12 ;; (at your option) any later version.
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
26 ;;; Check ASCII optimizing decoder
28 ;; Directory to hold test data files.
29 (defvar decoder-tests-workdir
30 (expand-file-name "decoder-tests" temporary-file-directory
))
32 ;; Return the contents (specified by CONTENT-TYPE; ascii, latin, or
33 ;; binary) of a test file.
34 (defun decoder-tests-file-contents (content-type)
35 (let* ((ascii "ABCDEFGHIJKLMNOPQRSTUVWXYZ\n")
36 (latin (concat ascii
"ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ\n"))
37 (binary (string-to-multibyte
38 (concat (string-as-unibyte latin
)
39 (unibyte-string #xC0
#xC1 ?
\n)))))
40 (cond ((eq content-type
'ascii
) ascii
)
41 ((eq content-type
'latin
) latin
)
42 ((eq content-type
'binary
) binary
)
44 (error "Invalid file content type: %s" content-type
)))))
46 ;; Return the name of test file whose contents specified by
47 ;; CONTENT-TYPE and whose encoding specified by CODING-SYSTEM.
48 (defun decoder-tests-filename (content-type coding-system
)
49 (expand-file-name (format "%s-%s" content-type coding-system
)
50 decoder-tests-workdir
))
52 ;; Generate a test file whose contents specified by CONTENT-TYPE and
53 ;; whose encoding specified by CODING-SYSTEM.
54 (defun decoder-tests-gen-file (content-type coding-system
)
55 (or (file-directory-p decoder-tests-workdir
)
56 (mkdir decoder-tests-workdir t
))
57 (let ((file (decoder-tests-filename content-type coding-system
)))
59 (set-buffer-file-coding-system coding-system
)
60 (insert (decoder-tests-file-contents content-type
)))))
62 ;; Remove all generated test files.
63 (defun decoder-tests-remove-files ()
64 (delete-directory decoder-tests-workdir t
))
66 ;;; The following three functions are filters for contents of a test
69 ;; Convert all LFs to CR LF sequences in the string STR.
70 (defun decoder-tests-lf-to-crlf (str)
73 (goto-char (point-min))
74 (while (search-forward "\n" nil t
)
79 ;; Convert all LFs to CRs in the string STR.
80 (defun decoder-tests-lf-to-cr (str)
83 (subst-char-in-region (point-min) (point-max) ?
\n ?
\r)
86 ;; Convert all LFs to LF LF sequences in the string STR.
87 (defun decoder-tests-lf-to-lflf (str)
90 (goto-char (point-min))
91 (while (search-forward "\n" nil t
)
95 ;; Prepend the UTF-8 BOM to STR.
96 (defun decoder-tests-add-bom (str)
97 (concat "\xfeff" str
))
99 ;; Test the decoding of a file whose contents and encoding are
100 ;; specified by CONTENT-TYPE and WRITE-CODING. The test passes if the
101 ;; file is read by READ-CODING and detected as DETECTED-CODING and the
102 ;; contents is correctly decoded.
103 ;; Optional 5th arg TRANSLATOR is a function to translate the original
104 ;; file contents to match with the expected result of decoding. For
105 ;; instance, when a file of dos eol-type is read by unix eol-type,
106 ;; `decode-test-lf-to-crlf' must be specified.
108 (defun decoder-tests (content-type write-coding read-coding detected-coding
109 &optional translator
)
110 (prefer-coding-system 'utf-8-auto
)
111 (let ((filename (decoder-tests-filename content-type write-coding
)))
113 (let ((coding-system-for-read read-coding
)
114 (contents (decoder-tests-file-contents content-type
))
115 (disable-ascii-optimization nil
))
117 (setq contents
(funcall translator contents
)))
118 (insert-file-contents filename
)
119 (if (and (coding-system-equal buffer-file-coding-system detected-coding
)
120 (string= (buffer-string) contents
))
122 (list buffer-file-coding-system
123 (string-to-list (buffer-string))
124 (string-to-list contents
)))))))
126 (ert-deftest ert-test-decoder-ascii
()
129 (dolist (eol-type '(unix dos mac
))
130 (decoder-tests-gen-file 'ascii eol-type
))
131 (should-not (decoder-tests 'ascii
'unix
'undecided
'unix
))
132 (should-not (decoder-tests 'ascii
'dos
'undecided
'dos
))
133 (should-not (decoder-tests 'ascii
'dos
'dos
'dos
))
134 (should-not (decoder-tests 'ascii
'mac
'undecided
'mac
))
135 (should-not (decoder-tests 'ascii
'mac
'mac
'mac
))
136 (should-not (decoder-tests 'ascii
'dos
'utf-8
'utf-8-dos
))
137 (should-not (decoder-tests 'ascii
'dos
'unix
'unix
138 'decoder-tests-lf-to-crlf
))
139 (should-not (decoder-tests 'ascii
'mac
'dos
'dos
140 'decoder-tests-lf-to-cr
))
141 (should-not (decoder-tests 'ascii
'dos
'mac
'mac
142 'decoder-tests-lf-to-lflf
)))
143 (decoder-tests-remove-files)))
145 (ert-deftest ert-test-decoder-latin
()
148 (dolist (coding '("utf-8" "utf-8-with-signature"))
149 (dolist (eol-type '("unix" "dos" "mac"))
150 (decoder-tests-gen-file 'latin
151 (intern (concat coding
"-" eol-type
)))))
152 (should-not (decoder-tests 'latin
'utf-8-unix
'undecided
'utf-8-unix
))
153 (should-not (decoder-tests 'latin
'utf-8-unix
'utf-8-unix
'utf-8-unix
))
154 (should-not (decoder-tests 'latin
'utf-8-dos
'undecided
'utf-8-dos
))
155 (should-not (decoder-tests 'latin
'utf-8-dos
'utf-8-dos
'utf-8-dos
))
156 (should-not (decoder-tests 'latin
'utf-8-mac
'undecided
'utf-8-mac
))
157 (should-not (decoder-tests 'latin
'utf-8-mac
'utf-8-mac
'utf-8-mac
))
158 (should-not (decoder-tests 'latin
'utf-8-dos
'unix
'utf-8-unix
159 'decoder-tests-lf-to-crlf
))
160 (should-not (decoder-tests 'latin
'utf-8-mac
'dos
'utf-8-dos
161 'decoder-tests-lf-to-cr
))
162 (should-not (decoder-tests 'latin
'utf-8-dos
'mac
'utf-8-mac
163 'decoder-tests-lf-to-lflf
))
164 (should-not (decoder-tests 'latin
'utf-8-with-signature-unix
'undecided
165 'utf-8-with-signature-unix
))
166 (should-not (decoder-tests 'latin
'utf-8-with-signature-unix
'utf-8-auto
167 'utf-8-with-signature-unix
))
168 (should-not (decoder-tests 'latin
'utf-8-with-signature-dos
'undecided
169 'utf-8-with-signature-dos
))
170 (should-not (decoder-tests 'latin
'utf-8-with-signature-unix
'utf-8
171 'utf-8-unix
'decoder-tests-add-bom
))
172 (should-not (decoder-tests 'latin
'utf-8-with-signature-unix
'utf-8
173 'utf-8-unix
'decoder-tests-add-bom
)))
174 (decoder-tests-remove-files)))
176 (ert-deftest ert-test-decoder-binary
()
179 (dolist (eol-type '("unix" "dos" "mac"))
180 (decoder-tests-gen-file 'binary
181 (intern (concat "raw-text" "-" eol-type
))))
182 (should-not (decoder-tests 'binary
'raw-text-unix
'undecided
184 (should-not (decoder-tests 'binary
'raw-text-dos
'undecided
186 (should-not (decoder-tests 'binary
'raw-text-mac
'undecided
188 (should-not (decoder-tests 'binary
'raw-text-dos
'unix
189 'raw-text-unix
'decoder-tests-lf-to-crlf
))
190 (should-not (decoder-tests 'binary
'raw-text-mac
'dos
191 'raw-text-dos
'decoder-tests-lf-to-cr
))
192 (should-not (decoder-tests 'binary
'raw-text-dos
'mac
193 'raw-text-mac
'decoder-tests-lf-to-lflf
)))
194 (decoder-tests-remove-files)))
198 ;;; The following is for benchmark testing of the new optimized
199 ;;; decoder, not for regression testing.
201 (defun generate-ascii-file ()
206 (defun generate-rarely-nonascii-file ()
214 (defun generate-mostly-nonascii-file ()
226 (defvar test-file-list
227 '((generate-ascii-file
228 ("~/ascii-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" unix
)
229 ("~/ascii-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" unix
)
230 ("~/ascii-tag-none.unix" "" unix
)
231 ("~/ascii-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" dos
)
232 ("~/ascii-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" dos
)
233 ("~/ascii-tag-none.dos" "" dos
))
234 (generate-rarely-nonascii-file
235 ("~/utf-8-r-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" utf-8-unix
)
236 ("~/utf-8-r-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" utf-8-unix
)
237 ("~/utf-8-r-tag-none.unix" "" utf-8-unix
)
238 ("~/utf-8-r-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" utf-8-dos
)
239 ("~/utf-8-r-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" utf-8-dos
)
240 ("~/utf-8-r-tag-none.dos" "" utf-8-dos
))
241 (generate-mostly-nonascii-file
242 ("~/utf-8-m-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" utf-8-unix
)
243 ("~/utf-8-m-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" utf-8-unix
)
244 ("~/utf-8-m-tag-none.unix" "" utf-8-unix
)
245 ("~/utf-8-m-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" utf-8-dos
)
246 ("~/utf-8-m-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" utf-8-dos
)
247 ("~/utf-8-m-tag-none.dos" "" utf-8-dos
))))
249 (defun generate-benchmark-test-file ()
252 (message "Generating data...")
253 (dolist (files test-file-list
)
254 (delete-region (point-min) (point-max))
255 (funcall (car files
))
256 (dolist (file (cdr files
))
257 (message "Writing %s..." (car file
))
258 (goto-char (point-min))
259 (insert (nth 1 file
) "\n")
260 (let ((coding-system-for-write (nth 2 file
)))
261 (write-region (point-min) (point-max) (car file
)))
262 (delete-region (point-min) (point))))))
264 (defun benchmark-decoder ()
265 (let ((gc-cons-threshold 4000000))
266 (insert "Without optimization:\n")
267 (dolist (files test-file-list
)
268 (dolist (file (cdr files
))
269 (let* ((disable-ascii-optimization t
)
270 (result (benchmark-run 10
271 (with-temp-buffer (insert-file-contents (car file
))))))
272 (insert (format "%s: %s\n" (car file
) result
)))))
273 (insert "With optimization:\n")
274 (dolist (files test-file-list
)
275 (dolist (file (cdr files
))
276 (let* ((disable-ascii-optimization nil
)
277 (result (benchmark-run 10
278 (with-temp-buffer (insert-file-contents (car file
))))))
279 (insert (format "%s: %s\n" (car file
) result
)))))))