lisp/window.el: Fix previous change (2013-06-25T15:08:47Z!lekktu@gmail.com).
[emacs.git] / test / automated / decoder-tests.el
blob055626491c012f256ef6f94682012574343db5ea
1 ;;; decoder-tests.el --- test for text decoder
3 ;; Copyright (C) 2013 Free Software Foundation, Inc.
5 ;; Author: Kenichi Handa <handa@gnu.org>
7 ;; This file is part of GNU Emacs.
9 ;; GNU Emacs is free software: you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation, either version 3 of the License, or
12 ;; (at your option) any later version.
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
22 ;;; Code:
24 (require 'ert)
26 ;;; Check ASCII optimizing decoder
28 ;; Directory to hold test data files.
29 (defvar decoder-tests-workdir
30 (expand-file-name "decoder-tests" temporary-file-directory))
32 ;; Return the contents (specified by CONTENT-TYPE; ascii, latin, or
33 ;; binary) of a test file.
34 (defun decoder-tests-file-contents (content-type)
35 (let* ((ascii "ABCDEFGHIJKLMNOPQRSTUVWXYZ\n")
36 (latin (concat ascii "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ\n"))
37 (binary (string-to-multibyte
38 (concat (string-as-unibyte latin)
39 (unibyte-string #xC0 #xC1 ?\n)))))
40 (cond ((eq content-type 'ascii) ascii)
41 ((eq content-type 'latin) latin)
42 ((eq content-type 'binary) binary)
44 (error "Invalid file content type: %s" content-type)))))
46 ;; Return the name of test file whose contents specified by
47 ;; CONTENT-TYPE and whose encoding specified by CODING-SYSTEM.
48 (defun decoder-tests-filename (content-type coding-system)
49 (expand-file-name (format "%s-%s" content-type coding-system)
50 decoder-tests-workdir))
52 ;; Generate a test file whose contents specified by CONTENT-TYPE and
53 ;; whose encoding specified by CODING-SYSTEM.
54 (defun decoder-tests-gen-file (content-type coding-system)
55 (or (file-directory-p decoder-tests-workdir)
56 (mkdir decoder-tests-workdir t))
57 (let ((file (decoder-tests-filename content-type coding-system)))
58 (with-temp-file file
59 (set-buffer-file-coding-system coding-system)
60 (insert (decoder-tests-file-contents content-type)))))
62 ;; Remove all generated test files.
63 (defun decoder-tests-remove-files ()
64 (delete-directory decoder-tests-workdir t))
66 ;;; The following three functions are filters for contents of a test
67 ;;; file.
69 ;; Convert all LFs to CR LF sequences in the string STR.
70 (defun decoder-tests-lf-to-crlf (str)
71 (with-temp-buffer
72 (insert str)
73 (goto-char (point-min))
74 (while (search-forward "\n" nil t)
75 (delete-char -1)
76 (insert "\r\n"))
77 (buffer-string)))
79 ;; Convert all LFs to CRs in the string STR.
80 (defun decoder-tests-lf-to-cr (str)
81 (with-temp-buffer
82 (insert str)
83 (subst-char-in-region (point-min) (point-max) ?\n ?\r)
84 (buffer-string)))
86 ;; Convert all LFs to LF LF sequences in the string STR.
87 (defun decoder-tests-lf-to-lflf (str)
88 (with-temp-buffer
89 (insert str)
90 (goto-char (point-min))
91 (while (search-forward "\n" nil t)
92 (insert "\n"))
93 (buffer-string)))
95 ;; Prepend the UTF-8 BOM to STR.
96 (defun decoder-tests-add-bom (str)
97 (concat "\xfeff" str))
99 ;; Test the decoding of a file whose contents and encoding are
100 ;; specified by CONTENT-TYPE and WRITE-CODING. The test passes if the
101 ;; file is read by READ-CODING and detected as DETECTED-CODING and the
102 ;; contents is correctly decoded.
103 ;; Optional 5th arg TRANSLATOR is a function to translate the original
104 ;; file contents to match with the expected result of decoding. For
105 ;; instance, when a file of dos eol-type is read by unix eol-type,
106 ;; `decode-test-lf-to-crlf' must be specified.
108 (defun decoder-tests (content-type write-coding read-coding detected-coding
109 &optional translator)
110 (prefer-coding-system 'utf-8-auto)
111 (let ((filename (decoder-tests-filename content-type write-coding)))
112 (with-temp-buffer
113 (let ((coding-system-for-read read-coding)
114 (contents (decoder-tests-file-contents content-type))
115 (disable-ascii-optimization nil))
116 (if translator
117 (setq contents (funcall translator contents)))
118 (insert-file-contents filename)
119 (if (and (coding-system-equal buffer-file-coding-system detected-coding)
120 (string= (buffer-string) contents))
122 (list buffer-file-coding-system
123 (string-to-list (buffer-string))
124 (string-to-list contents)))))))
126 (ert-deftest ert-test-decoder-ascii ()
127 (unwind-protect
128 (progn
129 (dolist (eol-type '(unix dos mac))
130 (decoder-tests-gen-file 'ascii eol-type))
131 (should-not (decoder-tests 'ascii 'unix 'undecided 'unix))
132 (should-not (decoder-tests 'ascii 'dos 'undecided 'dos))
133 (should-not (decoder-tests 'ascii 'dos 'dos 'dos))
134 (should-not (decoder-tests 'ascii 'mac 'undecided 'mac))
135 (should-not (decoder-tests 'ascii 'mac 'mac 'mac))
136 (should-not (decoder-tests 'ascii 'dos 'utf-8 'utf-8-dos))
137 (should-not (decoder-tests 'ascii 'dos 'unix 'unix
138 'decoder-tests-lf-to-crlf))
139 (should-not (decoder-tests 'ascii 'mac 'dos 'dos
140 'decoder-tests-lf-to-cr))
141 (should-not (decoder-tests 'ascii 'dos 'mac 'mac
142 'decoder-tests-lf-to-lflf)))
143 (decoder-tests-remove-files)))
145 (ert-deftest ert-test-decoder-latin ()
146 (unwind-protect
147 (progn
148 (dolist (coding '("utf-8" "utf-8-with-signature"))
149 (dolist (eol-type '("unix" "dos" "mac"))
150 (decoder-tests-gen-file 'latin
151 (intern (concat coding "-" eol-type)))))
152 (should-not (decoder-tests 'latin 'utf-8-unix 'undecided 'utf-8-unix))
153 (should-not (decoder-tests 'latin 'utf-8-unix 'utf-8-unix 'utf-8-unix))
154 (should-not (decoder-tests 'latin 'utf-8-dos 'undecided 'utf-8-dos))
155 (should-not (decoder-tests 'latin 'utf-8-dos 'utf-8-dos 'utf-8-dos))
156 (should-not (decoder-tests 'latin 'utf-8-mac 'undecided 'utf-8-mac))
157 (should-not (decoder-tests 'latin 'utf-8-mac 'utf-8-mac 'utf-8-mac))
158 (should-not (decoder-tests 'latin 'utf-8-dos 'unix 'utf-8-unix
159 'decoder-tests-lf-to-crlf))
160 (should-not (decoder-tests 'latin 'utf-8-mac 'dos 'utf-8-dos
161 'decoder-tests-lf-to-cr))
162 (should-not (decoder-tests 'latin 'utf-8-dos 'mac 'utf-8-mac
163 'decoder-tests-lf-to-lflf))
164 (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'undecided
165 'utf-8-with-signature-unix))
166 (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'utf-8-auto
167 'utf-8-with-signature-unix))
168 (should-not (decoder-tests 'latin 'utf-8-with-signature-dos 'undecided
169 'utf-8-with-signature-dos))
170 (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'utf-8
171 'utf-8-unix 'decoder-tests-add-bom))
172 (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'utf-8
173 'utf-8-unix 'decoder-tests-add-bom)))
174 (decoder-tests-remove-files)))
176 (ert-deftest ert-test-decoder-binary ()
177 (unwind-protect
178 (progn
179 (dolist (eol-type '("unix" "dos" "mac"))
180 (decoder-tests-gen-file 'binary
181 (intern (concat "raw-text" "-" eol-type))))
182 (should-not (decoder-tests 'binary 'raw-text-unix 'undecided
183 'raw-text-unix))
184 (should-not (decoder-tests 'binary 'raw-text-dos 'undecided
185 'raw-text-dos))
186 (should-not (decoder-tests 'binary 'raw-text-mac 'undecided
187 'raw-text-mac))
188 (should-not (decoder-tests 'binary 'raw-text-dos 'unix
189 'raw-text-unix 'decoder-tests-lf-to-crlf))
190 (should-not (decoder-tests 'binary 'raw-text-mac 'dos
191 'raw-text-dos 'decoder-tests-lf-to-cr))
192 (should-not (decoder-tests 'binary 'raw-text-dos 'mac
193 'raw-text-mac 'decoder-tests-lf-to-lflf)))
194 (decoder-tests-remove-files)))
198 ;;; The following is for benchmark testing of the new optimized
199 ;;; decoder, not for regression testing.
201 (defun generate-ascii-file ()
202 (dotimes (i 100000)
203 (insert-char ?a 80)
204 (insert "\n")))
206 (defun generate-rarely-nonascii-file ()
207 (dotimes (i 100000)
208 (if (/= i 50000)
209 (insert-char ?a 80)
210 (insert)
211 (insert-char ?a 79))
212 (insert "\n")))
214 (defun generate-mostly-nonascii-file ()
215 (dotimes (i 30000)
216 (insert-char ?a 80)
217 (insert "\n"))
218 (dotimes (i 20000)
219 (insert-char80)
220 (insert "\n"))
221 (dotimes (i 10000)
222 (insert-char ?あ 80)
223 (insert "\n")))
226 (defvar test-file-list
227 '((generate-ascii-file
228 ("~/ascii-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" unix)
229 ("~/ascii-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" unix)
230 ("~/ascii-tag-none.unix" "" unix)
231 ("~/ascii-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" dos)
232 ("~/ascii-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" dos)
233 ("~/ascii-tag-none.dos" "" dos))
234 (generate-rarely-nonascii-file
235 ("~/utf-8-r-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" utf-8-unix)
236 ("~/utf-8-r-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" utf-8-unix)
237 ("~/utf-8-r-tag-none.unix" "" utf-8-unix)
238 ("~/utf-8-r-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" utf-8-dos)
239 ("~/utf-8-r-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" utf-8-dos)
240 ("~/utf-8-r-tag-none.dos" "" utf-8-dos))
241 (generate-mostly-nonascii-file
242 ("~/utf-8-m-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" utf-8-unix)
243 ("~/utf-8-m-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" utf-8-unix)
244 ("~/utf-8-m-tag-none.unix" "" utf-8-unix)
245 ("~/utf-8-m-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" utf-8-dos)
246 ("~/utf-8-m-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" utf-8-dos)
247 ("~/utf-8-m-tag-none.dos" "" utf-8-dos))))
249 (defun generate-benchmark-test-file ()
250 (interactive)
251 (with-temp-buffer
252 (message "Generating data...")
253 (dolist (files test-file-list)
254 (delete-region (point-min) (point-max))
255 (funcall (car files))
256 (dolist (file (cdr files))
257 (message "Writing %s..." (car file))
258 (goto-char (point-min))
259 (insert (nth 1 file) "\n")
260 (let ((coding-system-for-write (nth 2 file)))
261 (write-region (point-min) (point-max) (car file)))
262 (delete-region (point-min) (point))))))
264 (defun benchmark-decoder ()
265 (let ((gc-cons-threshold 4000000))
266 (insert "Without optimization:\n")
267 (dolist (files test-file-list)
268 (dolist (file (cdr files))
269 (let* ((disable-ascii-optimization t)
270 (result (benchmark-run 10
271 (with-temp-buffer (insert-file-contents (car file))))))
272 (insert (format "%s: %s\n" (car file) result)))))
273 (insert "With optimization:\n")
274 (dolist (files test-file-list)
275 (dolist (file (cdr files))
276 (let* ((disable-ascii-optimization nil)
277 (result (benchmark-run 10
278 (with-temp-buffer (insert-file-contents (car file))))))
279 (insert (format "%s: %s\n" (car file) result)))))))