From 75476d1d19ac3d7239c8963a498c447a8368433d Mon Sep 17 00:00:00 2001 From: Christophe Rhodes Date: Wed, 11 Nov 2009 13:21:37 +0000 Subject: [PATCH] 1.0.32.16: external-format restart enhancements * OUTPUT-REPLACEMENT restart for fd-stream external-formats, taking a string designator argument and attempting to encode that instead of the erroneous output; * fixes for the FORCE-END-OF-FILE fd-stream external-format restart, using a somewhat involved call / return protocol for communicating out-of-band information between output routines and drivers; * INPUT-REPLACEMENT restart for fd-stream external-formats, again with complicated out-of-band information communication. This also interacts with UNREAD-CHAR; * fix the ATTEMPT-RESYNC restart (and similar) at or near the end of file, ensuring that there is always a valid CATCH tag to be THROWN to; * fix a double-error case in the USE-VALUE restart for unibyte octet conversions; * bandage fix for mb-util decoding-error USE-VALUE restart -- there's more factoring to be done, but this fixes lp #314939 --- NEWS | 10 + src/code/external-formats/mb-util.lisp | 7 +- src/code/fd-stream.lisp | 349 +++++++++++++++++---------------- src/code/octets.lisp | 31 +-- src/code/stream.lisp | 35 ++-- tests/external-format.impure.lisp | 99 ++++++---- tests/octets.pure.lisp | 52 +++-- version.lisp-expr | 2 +- 8 files changed, 330 insertions(+), 255 deletions(-) diff --git a/NEWS b/NEWS index 90dfc8af5..db5163167 100644 --- a/NEWS +++ b/NEWS @@ -12,9 +12,19 @@ changes relative to sbcl-1.0.32: Unicode 5.2 standard, giving names and properties to a number of new characters, and providing a few extra characters with case transformations. + ** improvement: restarts for providing replacement input/output on coding + errors for fd-stream external formats. ** fix a typo preventing conversion of strings into octet vectors in the latin-2 encoding. (reported by Attila Lendvai; launchpad bug #471689) + ** fix a bug in the octet multibyte handling of decoding errors and the + USE-VALUE restart. (launchpad bug #314939) + ** fix the bug underlying the expected failure in the FORCE-END-OF-FILE + restart on fd-stream decoding errors. + ** fix a bug in the ATTEMPT-RESYNC fd-stream decoding restart when the + error is near the end of file + ** fix a double-error case in unibyte octet conversions, when the first + use of USE-VALUE is ignored. * bug fix: uses of slot accessors on specialized method parameters within the bodies of SLOT-VALUE-USING-CLASS methods no longer triggers a type error while finalizing the class. This fix may cause classes with slot diff --git a/src/code/external-formats/mb-util.lisp b/src/code/external-formats/mb-util.lisp index 0cb711db4..c76bc423f 100644 --- a/src/code/external-formats/mb-util.lisp +++ b/src/code/external-formats/mb-util.lisp @@ -150,7 +150,12 @@ (declare (type (or null string) invalid)) (cond ((null invalid) - (vector-push-extend (,simple-get-mb-char array pos bytes) string)) + (let ((thing (,simple-get-mb-char array pos bytes))) + (typecase thing + (character (vector-push-extend thing string)) + (string + (dotimes (i (length thing)) + (vector-push-extend (char thing i) string)))))) (t (dotimes (i (length invalid)) (vector-push-extend (char invalid i) string)))) diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp index 655ee6410..5daab69d3 100644 --- a/src/code/fd-stream.lisp +++ b/src/code/fd-stream.lisp @@ -171,8 +171,9 @@ (listen nil :type (member nil t :eof)) ;; the input buffer - (unread nil) + (instead (make-array 0 :element-type 'character :adjustable t :fill-pointer t) :type (array character (*))) (ibuf nil :type (or buffer null)) + (eof-forced-p nil :type (member t nil)) ;; the output buffer (obuf nil :type (or buffer null)) @@ -462,7 +463,24 @@ (force-end-of-file () :report (lambda (stream) (format stream "~@")) - t))) + (setf (fd-stream-eof-forced-p stream) t)) + (input-replacement (string) + :report (lambda (stream) + (format stream "~@")) + :interactive (lambda () + (format *query-io* "~@") + (finish-output *query-io*) + (list (read *query-io*))) + (let ((string (reverse (string string))) + (instead (fd-stream-instead stream))) + (dotimes (i (length string)) + (vector-push-extend (char string i) instead)) + (fd-stream-resync stream) + (when (> (length string) 0) + (setf (fd-stream-listen stream) t))) + nil))) (defun stream-encoding-error-and-handle (stream code) (restart-case @@ -470,6 +488,16 @@ (output-nothing () :report (lambda (stream) (format stream "~@")) + (throw 'output-nothing nil)) + (output-replacement (string) + :report (lambda (stream) + (format stream "~@")) + :interactive (lambda () + (format *query-io* "~@") + (finish-output *query-io*) + (list (read *query-io*))) + (let ((string (string string))) + (fd-sout stream (string string) 0 (length string))) (throw 'output-nothing nil)))) (defun external-format-encoding-error (stream code) @@ -979,45 +1007,54 @@ `(let* ((,stream-var ,stream) (ibuf (fd-stream-ibuf ,stream-var)) (size nil)) - (if (fd-stream-unread ,stream-var) - (prog1 - (fd-stream-unread ,stream-var) - (setf (fd-stream-unread ,stream-var) nil) - (setf (fd-stream-listen ,stream-var) nil)) - (let ((,element-var nil) - (decode-break-reason nil)) - (do ((,retry-var t)) - ((not ,retry-var)) - (unless - (catch 'eof-input-catcher - (setf decode-break-reason - (block decode-break-reason - (input-at-least ,stream-var 1) - (let* ((byte (sap-ref-8 (buffer-sap ibuf) - (buffer-head ibuf)))) - (declare (ignorable byte)) - (setq size ,bytes) - (input-at-least ,stream-var size) - (setq ,element-var (locally ,@read-forms)) - (setq ,retry-var nil)) - nil)) - (when decode-break-reason - (stream-decoding-error-and-handle stream - decode-break-reason)) - t) - (let ((octet-count (- (buffer-tail ibuf) - (buffer-head ibuf)))) - (when (or (zerop octet-count) - (and (not ,element-var) - (not decode-break-reason) - (stream-decoding-error-and-handle - stream octet-count))) - (setq ,retry-var nil))))) - (cond (,element-var - (incf (buffer-head ibuf) size) - ,element-var) - (t - (eof-or-lose ,stream-var ,eof-error ,eof-value)))))))) + (block use-instead + (when (fd-stream-eof-forced-p ,stream-var) + (setf (fd-stream-eof-forced-p ,stream-var) nil) + (return-from use-instead + (eof-or-lose ,stream-var ,eof-error ,eof-value))) + (let ((,element-var nil) + (decode-break-reason nil)) + (do ((,retry-var t)) + ((not ,retry-var)) + (if (> (length (fd-stream-instead ,stream-var)) 0) + (let* ((instead (fd-stream-instead ,stream-var)) + (result (vector-pop instead)) + (pointer (fill-pointer instead))) + (when (= pointer 0) + (setf (fd-stream-listen ,stream-var) nil)) + (return-from use-instead result)) + (unless + (catch 'eof-input-catcher + (setf decode-break-reason + (block decode-break-reason + (input-at-least ,stream-var 1) + (let* ((byte (sap-ref-8 (buffer-sap ibuf) + (buffer-head ibuf)))) + (declare (ignorable byte)) + (setq size ,bytes) + (input-at-least ,stream-var size) + (setq ,element-var (locally ,@read-forms)) + (setq ,retry-var nil)) + nil)) + (when decode-break-reason + (when (stream-decoding-error-and-handle + stream decode-break-reason) + (setq ,retry-var nil) + (throw 'eof-input-catcher nil))) + t) + (let ((octet-count (- (buffer-tail ibuf) + (buffer-head ibuf)))) + (when (or (zerop octet-count) + (and (not ,element-var) + (not decode-break-reason) + (stream-decoding-error-and-handle + stream octet-count))) + (setq ,retry-var nil)))))) + (cond (,element-var + (incf (buffer-head ibuf) size) + ,element-var) + (t + (eof-or-lose ,stream-var ,eof-error ,eof-value)))))))) ;;; a macro to wrap around all input routines to handle EOF-ERROR noise (defmacro input-wrapper ((stream bytes eof-error eof-value) &body read-forms) @@ -1025,11 +1062,8 @@ (element-var (gensym "ELT"))) `(let* ((,stream-var ,stream) (ibuf (fd-stream-ibuf ,stream-var))) - (if (fd-stream-unread ,stream-var) - (prog1 - (fd-stream-unread ,stream-var) - (setf (fd-stream-unread ,stream-var) nil) - (setf (fd-stream-listen ,stream-var) nil)) + (if (> (length (fd-stream-instead ,stream-var)) 0) + (bug "INSTEAD not empty in INPUT-WRAPPER for ~S" ,stream-var) (let ((,element-var (catch 'eof-input-catcher (input-at-least ,stream-var ,bytes) @@ -1178,22 +1212,7 @@ &aux (total-copied 0)) (declare (type fd-stream stream)) (declare (type index start requested total-copied)) - (let ((unread (fd-stream-unread stream))) - (when unread - ;; AVERs designed to fail when we have more complicated - ;; character representations. - (aver (typep unread 'base-char)) - (aver (= (fd-stream-element-size stream) 1)) - ;; KLUDGE: this is a slightly-unrolled-and-inlined version of - ;; %BYTE-BLT - (etypecase buffer - (system-area-pointer - (setf (sap-ref-8 buffer start) (char-code unread))) - ((simple-unboxed-array (*)) - (setf (aref buffer start) unread))) - (setf (fd-stream-unread stream) nil) - (setf (fd-stream-listen stream) nil) - (incf total-copied))) + (aver (= (length (fd-stream-instead stream)) 0)) (do () (nil) (let* ((remaining-request (- requested total-copied)) @@ -1280,35 +1299,34 @@ (do () ((= end start)) (let ((obuf (fd-stream-obuf stream))) - (setf (buffer-tail obuf) - (string-dispatch (simple-base-string - #!+sb-unicode - (simple-array character (*)) - string) - string - (let ((sap (buffer-sap obuf)) - (len (buffer-length obuf)) - ;; FIXME: rename - (tail (buffer-tail obuf))) - (declare (type index tail) - ;; STRING bounds have already been checked. - (optimize (safety 0))) - (loop - (,@(if output-restart - `(catch 'output-nothing) - `(progn)) - (do* () - ((or (= start end) (< (- len tail) 4))) - (let* ((byte (aref string start)) - (bits (char-code byte))) - ,out-expr - (incf tail ,size) - (incf start))) - ;; Exited from the loop normally - (return tail)) - ;; Exited via CATCH. Skip the current character - ;; and try the inner loop again. - (incf start)))))) + (string-dispatch (simple-base-string + #!+sb-unicode + (simple-array character (*)) + string) + string + (let ((sap (buffer-sap obuf)) + (len (buffer-length obuf)) + ;; FIXME: rename + (tail (buffer-tail obuf))) + (declare (type index tail) + ;; STRING bounds have already been checked. + (optimize (safety 0))) + (,@(if output-restart + `(catch 'output-nothing) + `(progn)) + (do* () + ((or (= start end) (< (- len tail) 4))) + (let* ((byte (aref string start)) + (bits (char-code byte))) + ,out-expr + (incf tail ,size) + (setf (buffer-tail obuf) tail) + (incf start))) + ;; Exited from the loop normally + (go flush)) + ;; Exited via CATCH. Skip the current character. + (incf start)))) + flush (when (< start end) (flush-output-buffer stream))) (when flush-p @@ -1334,12 +1352,16 @@ (type (simple-array character (#.+ansi-stream-in-buffer-length+)) buffer)) - (let ((unread (fd-stream-unread stream))) - (when unread - (setf (aref buffer index) unread) - (setf (fd-stream-unread stream) nil) - (setf (fd-stream-listen stream) nil) - (incf index))) + (when (fd-stream-eof-forced-p stream) + (setf (fd-stream-eof-forced-p stream) nil) + (return-from ,in-function 0)) + (do ((instead (fd-stream-instead stream))) + ((= (fill-pointer instead) 0) + (setf (fd-stream-listen stream) nil)) + (setf (aref buffer index) (vector-pop instead)) + (incf index) + (when (= index end) + (return-from ,in-function (- index start)))) (do () (nil) (let* ((ibuf (fd-stream-ibuf stream)) @@ -1467,36 +1489,33 @@ (do () ((= end start)) (let ((obuf (fd-stream-obuf stream))) - (setf (buffer-tail obuf) - (string-dispatch (simple-base-string - #!+sb-unicode - (simple-array character (*)) - string) - string - (let ((len (buffer-length obuf)) - (sap (buffer-sap obuf)) - ;; FIXME: Rename - (tail (buffer-tail obuf))) - (declare (type index tail) - ;; STRING bounds have already been checked. - (optimize (safety 0))) - (loop - (,@(if output-restart - `(catch 'output-nothing) - `(progn)) - (do* () - ((or (= start end) (< (- len tail) 4))) - (let* ((byte (aref string start)) - (bits (char-code byte)) - (size ,out-size-expr)) - ,out-expr - (incf tail size) - (incf start))) - ;; Exited from the loop normally - (return tail)) - ;; Exited via CATCH. Skip the current character - ;; and try the inner loop again. - (incf start)))))) + (string-dispatch (simple-base-string + #!+sb-unicode (simple-array character (*)) + string) + string + (let ((len (buffer-length obuf)) + (sap (buffer-sap obuf)) + ;; FIXME: Rename + (tail (buffer-tail obuf))) + (declare (type index tail) + ;; STRING bounds have already been checked. + (optimize (safety 0))) + (,@(if output-restart + `(catch 'output-nothing) + `(progn)) + (do* () + ((or (= start end) (< (- len tail) 4))) + (let* ((byte (aref string start)) + (bits (char-code byte)) + (size ,out-size-expr)) + ,out-expr + (incf tail size) + (setf (buffer-tail obuf) tail) + (incf start))) + (go flush)) + ;; Exited via CATCH: skip the current character. + (incf start)))) + flush (when (< start end) (flush-output-buffer stream))) (when flush-p @@ -1522,12 +1541,16 @@ (type (simple-array character (#.+ansi-stream-in-buffer-length+)) buffer)) - (let ((unread (fd-stream-unread stream))) - (when unread - (setf (aref buffer start) unread) - (setf (fd-stream-unread stream) nil) - (setf (fd-stream-listen stream) nil) - (incf total-copied))) + (when (fd-stream-eof-forced-p stream) + (setf (fd-stream-eof-forced-p stream) nil) + (return-from ,in-function 0)) + (do ((instead (fd-stream-instead stream))) + ((= (fill-pointer instead) 0) + (setf (fd-stream-listen stream) nil)) + (setf (aref buffer (+ start total-copied)) (vector-pop instead)) + (incf total-copied) + (when (= requested total-copied) + (return-from ,in-function total-copied))) (do () (nil) (let* ((ibuf (fd-stream-ibuf stream)) @@ -1565,8 +1588,10 @@ (if eof-error-p (error 'end-of-file :stream stream) (return-from ,in-function total-copied))) - (setf head (buffer-head ibuf)) - (setf tail (buffer-tail ibuf)))) + ;; we might have been given stuff to use instead, so + ;; we have to return (and trust our caller to know + ;; what to do about TOTAL-COPIED being 0). + (return-from ,in-function total-copied))) (setf (buffer-head ibuf) head) ;; Maybe we need to refill the stream buffer. (cond ( ;; If there were enough data in the stream buffer, we're done. @@ -1591,20 +1616,21 @@ ,in-expr)) (defun ,resync-function (stream) (let ((ibuf (fd-stream-ibuf stream))) - (loop - (input-at-least stream 2) - (incf (buffer-head ibuf)) - (unless (block decode-break-reason - (let* ((sap (buffer-sap ibuf)) - (head (buffer-head ibuf)) - (byte (sap-ref-8 sap head)) - (size ,in-size-expr)) - (declare (ignorable byte)) - (input-at-least stream size) - (setf head (buffer-head ibuf)) - ,in-expr) - nil) - (return))))) + (catch 'eof-input-catcher + (loop + (incf (buffer-head ibuf)) + (input-at-least stream 1) + (unless (block decode-break-reason + (let* ((sap (buffer-sap ibuf)) + (head (buffer-head ibuf)) + (byte (sap-ref-8 sap head)) + (size ,in-size-expr)) + (declare (ignorable byte)) + (input-at-least stream size) + (setf head (buffer-head ibuf)) + ,in-expr) + nil) + (return)))))) (defun ,read-c-string-function (sap element-type) (declare (type system-area-pointer sap)) (locally @@ -1895,13 +1921,12 @@ ;; we're still safe: buffers have finalizers of their own. (release-fd-stream-buffers fd-stream)) -;;; Flushes the current input buffer and unread chatacter, and returns -;;; the input buffer, and the amount of of flushed input in bytes. +;;; Flushes the current input buffer and any supplied replacements, +;;; and returns the input buffer, and the amount of of flushed input +;;; in bytes. (defun flush-input-buffer (stream) - (let ((unread (if (fd-stream-unread stream) - 1 - 0))) - (setf (fd-stream-unread stream) nil) + (let ((unread (length (fd-stream-instead stream)))) + (setf (fill-pointer (fd-stream-instead stream)) 0) (let ((ibuf (fd-stream-ibuf stream))) (if ibuf (let ((head (buffer-head ibuf)) @@ -1959,16 +1984,8 @@ (do-listen))))))) (do-listen))) (:unread - ;; If the stream is bivalent, the user might follow an - ;; unread-char with a read-byte. In this case, the bookkeeping - ;; is simpler if we adjust the buffer head by the number of code - ;; units in the character. - ;; FIXME: there has to be a proper way to check for bivalence, - ;; right? - (if (fd-stream-bivalent-p fd-stream) - (decf (buffer-head (fd-stream-ibuf fd-stream)) - (fd-stream-character-size fd-stream arg1)) - (setf (fd-stream-unread fd-stream) arg1)) + (decf (buffer-head (fd-stream-ibuf fd-stream)) + (fd-stream-character-size fd-stream arg1)) (setf (fd-stream-listen fd-stream) t)) (:close ;; Drop input buffers @@ -2135,8 +2152,6 @@ (let ((ibuf (fd-stream-ibuf stream))) (when ibuf (decf posn (- (buffer-tail ibuf) (buffer-head ibuf))))) - (when (fd-stream-unread stream) - (decf posn)) ;; Divide bytes by element size. (truncate posn (fd-stream-element-size stream)))))) diff --git a/src/code/octets.lisp b/src/code/octets.lisp index ce032f6c3..d4d58171b 100644 --- a/src/code/octets.lisp +++ b/src/code/octets.lisp @@ -260,7 +260,8 @@ one-past-the-end" :initial-element 0 :element-type '(unsigned-byte 8))) (index 0) - (error-position 0)) + (error-position 0) + (error-replacement)) (tagbody :no-error (loop for pos of-type index from sstart below send @@ -273,30 +274,32 @@ one-past-the-end" ;; KLUDGE: We ran into encoding errors. Bail and do ;; things the slow way (does anybody actually use this ;; functionality besides our own test suite?). - (setf error-position pos) + (setf error-position pos error-replacement byte) (go :error))) (incf index)) finally (return-from string->latin% octets)) :error - ;; We have encoded INDEX octets so far and we ran into an encoding - ;; error at ERROR-POSITION. + ;; We have encoded INDEX octets so far and we ran into an + ;; encoding error at ERROR-POSITION; the user has asked us to + ;; replace the expected output with ERROR-REPLACEMENT. (let ((new-octets (make-array (* index 2) :element-type '(unsigned-byte 8) :adjustable t :fill-pointer index))) (replace new-octets octets) - (loop for pos of-type index from error-position below send - do (let ((thing (funcall get-bytes string pos))) + (flet ((extend (thing) (typecase thing - ((unsigned-byte 8) - (vector-push-extend thing new-octets)) + ((unsigned-byte 8) (vector-push-extend thing new-octets)) ((simple-array (unsigned-byte 8) (*)) (dotimes (i (length thing)) - (vector-push-extend (aref thing i) new-octets))))) - finally (return-from string->latin% - (progn - (unless (zerop null-padding) - (vector-push-extend 0 new-octets)) - (copy-seq new-octets)))))))) + (vector-push-extend (aref thing i) new-octets)))))) + (extend error-replacement) + (loop for pos of-type index from (1+ error-position) below send + do (extend (funcall get-bytes string pos)) + finally (return-from string->latin% + (progn + (unless (zerop null-padding) + (vector-push-extend 0 new-octets)) + (copy-seq new-octets))))))))) ;;;; to-string conversions diff --git a/src/code/stream.lisp b/src/code/stream.lisp index 9003e426f..a0a0cb457 100644 --- a/src/code/stream.lisp +++ b/src/code/stream.lisp @@ -146,7 +146,7 @@ (declare (type stream stream)) (declare (type (or index (alien sb!unix:off-t) (member nil :start :end)) position)) - ;; FIXME: It woud be good to comment on the stuff that is done here... + ;; FIXME: It would be good to comment on the stuff that is done here... ;; FIXME: This doesn't look interrupt safe. (cond (position @@ -548,22 +548,27 @@ ;; An empty count does not necessarily mean that we reached ;; the EOF, it's also possible that it's e.g. due to a ;; invalid octet sequence in a multibyte stream. To handle - ;; the resyncing case correctly we need to call the - ;; single-character reading function and check whether an - ;; EOF was really reached. If not, we can just fill the - ;; buffer by one character, and hope that the next refill - ;; will not need to resync. - (let* ((value (funcall (ansi-stream-in stream) stream nil :eof)) - (index (1- +ansi-stream-in-buffer-length+))) - (case value - ((:eof) - ;; Mark buffer as empty. + ;; the resyncing case correctly we need to call the reading + ;; function and check whether an EOF was really reached. If + ;; not, we can just fill the buffer by one character, and + ;; hope that the next refill will not need to resync. + ;; + ;; KLUDGE: we can't use FD-STREAM functions (which are the + ;; only ones which will give us decoding errors) here, + ;; because this code is generic. We can't call the N-BIN + ;; function, because near the end of a real file that can + ;; legitimately bounce us to the IN function. So we have + ;; to call ANSI-STREAM-IN. + (let* ((index (1- +ansi-stream-in-buffer-length+)) + (value (funcall (ansi-stream-in stream) stream nil :eof))) + (cond + ((eql value :eof) + ;; definitely EOF now (setf (ansi-stream-in-index stream) +ansi-stream-in-buffer-length+) - ;; EOF. Redo the read, this time with the real eof parameters. - (values t (funcall (ansi-stream-in stream) - stream eof-error-p eof-value))) - (otherwise + (values t (eof-or-lose stream eof-error-p eof-value))) + ;; we resynced or were given something instead + (t (setf (aref ibuf index) value) (values nil (setf (ansi-stream-in-index stream) index)))))) (t diff --git a/tests/external-format.impure.lisp b/tests/external-format.impure.lisp index 78285f71b..0845e6bb2 100644 --- a/tests/external-format.impure.lisp +++ b/tests/external-format.impure.lisp @@ -78,22 +78,29 @@ (write-byte 67 s)) (with-open-file (s *test-path* :direction :input :external-format :utf-8) - (handler-bind - ((sb-int:character-decoding-error #'(lambda (decoding-error) - (declare (ignore decoding-error)) - (invoke-restart - 'sb-int:attempt-resync)))) - (assert (equal (read-line s nil s) "ABC")) - (assert (equal (read-line s nil s) s)))) + (let ((count 0)) + (handler-bind + ((sb-int:character-decoding-error #'(lambda (decoding-error) + (declare (ignore decoding-error)) + (when (> (incf count) 1) + (error "too many errors")) + (invoke-restart + 'sb-int:attempt-resync)))) + (assert (equal (read-line s nil s) "ABC")) + (assert (equal (read-line s nil s) s))))) (with-open-file (s *test-path* :direction :input :external-format :utf-8) - (handler-bind - ((sb-int:character-decoding-error #'(lambda (decoding-error) - (declare (ignore decoding-error)) - (invoke-restart - 'sb-int:force-end-of-file)))) - (assert (equal (read-line s nil s) "AB")) - (assert (equal (read-line s nil s) s)))) + (let ((count 0)) + (handler-bind + ((sb-int:character-decoding-error #'(lambda (decoding-error) + (declare (ignore decoding-error)) + (when (> (incf count) 1) + (error "too many errors")) + (invoke-restart + 'sb-int:force-end-of-file)))) + (assert (equal (read-line s nil s) "AB")) + (setf count 0) + (assert (equal (read-line s nil s) s))))) ;;; And again with more data to account for buffering (this was briefly) ;;; broken in early 0.9.6. @@ -112,40 +119,41 @@ (with-test (:name (:character-decode-large :attempt-resync)) (with-open-file (s *test-path* :direction :input :external-format :utf-8) - (handler-bind - ((sb-int:character-decoding-error #'(lambda (decoding-error) + (let ((count 0)) + (handler-bind + ((sb-int:character-decoding-error (lambda (decoding-error) (declare (ignore decoding-error)) + (when (> (incf count) 1) + (error "too many errors")) (invoke-restart 'sb-int:attempt-resync))) - ;; The failure mode is an infinite loop, add a timeout to detetct it. - (sb-ext:timeout (lambda () (error "Timeout")))) - (sb-ext:with-timeout 5 - (dotimes (i 80) - (assert (equal (read-line s nil s) - "1234567890123456789012345678901234567890123456789"))))))) + ;; The failure mode is an infinite loop, add a timeout to + ;; detetct it. + (sb-ext:timeout (lambda () (error "Timeout")))) + (sb-ext:with-timeout 5 + (dotimes (i 80) + (assert (equal (read-line s nil s) + "1234567890123456789012345678901234567890123456789")))))))) -(with-test (:name (:character-decode-large :force-end-of-file) - :fails-on :sbcl) - (error "We can't reliably test this due to WITH-TIMEOUT race condition") - ;; This test will currently fail. But sometimes it will fail in - ;; ungracefully due to the WITH-TIMEOUT race mentioned above. This - ;; rightfully confuses some people, so we'll skip running the code - ;; for now. -- JES, 2006-01-27 - #+nil +(with-test (:name (:character-decode-large :force-end-of-file)) (with-open-file (s *test-path* :direction :input :external-format :utf-8) - (handler-bind - ((sb-int:character-decoding-error #'(lambda (decoding-error) + (let ((count 0)) + (handler-bind + ((sb-int:character-decoding-error (lambda (decoding-error) (declare (ignore decoding-error)) + (when (> (incf count) 1) + (error "too many errors")) (invoke-restart 'sb-int:force-end-of-file))) - ;; The failure mode is an infinite loop, add a timeout to detetct it. - (sb-ext:timeout (lambda () (error "Timeout")))) - (sb-ext:with-timeout 5 - (dotimes (i 80) - (assert (equal (read-line s nil s) - "1234567890123456789012345678901234567890123456789"))) - (assert (equal (read-line s nil s) s)))))) + ;; The failure mode is an infinite loop, add a timeout to detetct it. + (sb-ext:timeout (lambda () (error "Timeout")))) + (sb-ext:with-timeout 5 + (dotimes (i 40) + (assert (equal (read-line s nil s) + "1234567890123456789012345678901234567890123456789"))) + (setf count 0) + (assert (equal (read-line s nil s) s))))))) ;;; Test character encode restarts. (with-open-file (s *test-path* :direction :output @@ -345,4 +353,17 @@ (str (c-string :external-format :ebcdic-us))) (assert (typep (strdup "foo") 'simple-base-string))) +(with-test (:name (:input-replacement :at-end-of-file)) + (dotimes (i 256) + (with-open-file (s *test-path* :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) + (write-byte i s)) + (handler-bind ((sb-int:character-decoding-error + (lambda (c) + (invoke-restart 'sb-impl::input-replacement #\?)))) + (with-open-file (s *test-path* :external-format :utf-8) + (cond + ((char= (read-char s) #\?) + (assert (or (= i (char-code #\?)) (> i 127)))) + (t (assert (and (not (= i (char-code #\?))) (< i 128))))))))) + ;;;; success diff --git a/tests/octets.pure.lisp b/tests/octets.pure.lisp index 192f0ee6e..4bf7f18b5 100644 --- a/tests/octets.pure.lisp +++ b/tests/octets.pure.lisp @@ -95,24 +95,30 @@ l9c)))) (ensure-roundtrip-utf8) - (let ((non-ascii-bytes (make-array 128 - :element-type '(unsigned-byte 8) - :initial-contents (loop for i from 128 below 256 - collect i)))) - (handler-bind ((sb-int:character-decoding-error - (lambda (c) - (use-value "??" c)))) - (assert (string= (octets-to-string non-ascii-bytes :external-format :ascii) - (make-string 256 :initial-element #\?))))) - (let ((non-ascii-chars (make-array 128 - :element-type 'character - :initial-contents (loop for i from 128 below 256 - collect (code-char i))))) - (handler-bind ((sb-int:character-encoding-error - (lambda (c) - (use-value "??" c)))) - (assert (equalp (string-to-octets non-ascii-chars :external-format :ascii) - (make-array 256 :initial-element (char-code #\?)))))) + (with-test (:name (:ascii :decoding-error use-value)) + (let ((non-ascii-bytes (make-array 128 + :element-type '(unsigned-byte 8) + :initial-contents (loop for i from 128 below 256 collect i))) + (error-count 0)) + (handler-bind ((sb-int:character-decoding-error + (lambda (c) + (incf error-count) + (use-value "??" c)))) + (assert (string= (octets-to-string non-ascii-bytes :external-format :ascii) + (make-string 256 :initial-element #\?))) + (assert (= error-count 128))))) + (with-test (:name (:ascii :encoding-error use-value)) + (let ((non-ascii-chars (make-array 128 + :element-type 'character + :initial-contents (loop for i from 128 below 256 collect (code-char i)))) + (error-count 0)) + (handler-bind ((sb-int:character-encoding-error + (lambda (c) + (incf error-count) + (use-value "??" c)))) + (assert (equalp (string-to-octets non-ascii-chars :external-format :ascii) + (make-array 256 :initial-element (char-code #\?)))) + (assert (= error-count 128))))) ;; From Markus Kuhn's UTF-8 test file: ;; http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt @@ -235,3 +241,13 @@ #+sb-unicode (assert (equalp #(251) (string-to-octets (string (code-char 369)) :external-format :latin-2))) + +#+sb-unicode +(with-test (:name (:euc-jp :decoding-errors)) + (handler-bind ((sb-int:character-decoding-error + (lambda (c) (use-value #\? c)))) + (assert (string= "?{?" + (octets-to-string + (coerce #(182 123 253 238) '(vector (unsigned-byte 8))) + :external-format :euc-jp))))) + diff --git a/version.lisp-expr b/version.lisp-expr index f0c64af5e..d484356bb 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.32.15" +"1.0.32.16" -- 2.11.4.GIT