From 8ad30632f7a37735a05cdf70d0904fa33616f6b0 Mon Sep 17 00:00:00 2001 From: Christophe Rhodes Date: Wed, 11 Nov 2009 18:08:31 +0000 Subject: [PATCH] 1.0.32.23: use :replacement in the external format for standard IO streams For *terminal-io*, a bidirectional stream, we have to make an arbitrary choice on Windows, where in theory the input and output code pages can differ. We arbitrarily choose the output format; I have no idea whether this matters. --- NEWS | 5 +++++ src/code/external-formats/enc-basic.lisp | 1 + src/code/external-formats/mb-util.lisp | 6 ++++++ src/code/external-formats/ucs-2.lisp | 2 ++ src/code/fd-stream.lisp | 30 +++++++++++++++++++----------- version.lisp-expr | 2 +- 6 files changed, 34 insertions(+), 12 deletions(-) diff --git a/NEWS b/NEWS index 55377ca06..b49af397b 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,11 @@ changes relative to sbcl-1.0.32: (: :replacement ) as an external format which will automatically substitute on encoding or decoding errors for streams and for STRING-TO-OCTETS and its inverse. + ** improvement: the file streams underlying the standard streams (such as + *STANDARD-INPUT*, *TERMINAL-IO*) are opened with an external format + which uses the replacement mechanism to handle encoding errors, + preventing various infinite error chains and unrecoverable I/O + confusion. ** minor incompatible change: the utf-8 external format now correctly refuses to encode Lisp characters in the surrogate range (char-codes between #xd800 and #xdfff). diff --git a/src/code/external-formats/enc-basic.lisp b/src/code/external-formats/enc-basic.lisp index 9ae5271ce..819879f8b 100644 --- a/src/code/external-formats/enc-basic.lisp +++ b/src/code/external-formats/enc-basic.lisp @@ -383,6 +383,7 @@ (instantiate-octets-definition define-utf8->string) (define-external-format/variable-width (:utf-8 :utf8) t + #!+sb-unicode (code-char #xfffd) #!-sb-unicode #\? (let ((bits (char-code byte))) (cond ((< bits #x80) 1) ((< bits #x800) 2) diff --git a/src/code/external-formats/mb-util.lisp b/src/code/external-formats/mb-util.lisp index c76bc423f..f0ff16d5c 100644 --- a/src/code/external-formats/mb-util.lisp +++ b/src/code/external-formats/mb-util.lisp @@ -248,6 +248,12 @@ ;; for fd-stream.lisp (define-external-format/variable-width ,aliases t + ;; KLUDGE: it so happens that at present (2009-10-22) none of + ;; the external formats defined with + ;; define-multibyte-encoding can encode the unicode + ;; replacement character, so we hardcode the preferred + ;; replacement here. + #\? (mb-char-len (or (,ucs-to-mb (char-code byte)) -1)) (let ((mb (,ucs-to-mb bits))) (if (null mb) diff --git a/src/code/external-formats/ucs-2.lisp b/src/code/external-formats/ucs-2.lisp index def9d96a1..1d15d10ef 100644 --- a/src/code/external-formats/ucs-2.lisp +++ b/src/code/external-formats/ucs-2.lisp @@ -190,6 +190,7 @@ (instantiate-octets-definition define-ucs-2->string) (define-external-format/variable-width (:ucs-2le :ucs2le #!+win32 :ucs2 #!+win32 :ucs-2) t + (code-char #xfffd) 2 (if (< bits #x10000) (setf (sap-ref-16le sap tail) bits) @@ -200,6 +201,7 @@ string->ucs-2le) (define-external-format/variable-width (:ucs-2be :ucs2be) t + (code-char #xfffd) 2 (if (< bits #x10000) (setf (sap-ref-16be sap tail) bits) diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp index 076145656..020840c78 100644 --- a/src/code/fd-stream.lisp +++ b/src/code/fd-stream.lisp @@ -754,6 +754,7 @@ ;; All the names that can refer to this external format. The first ;; one is the canonical name. (names (missing-arg) :type list :read-only t) + (default-replacement-character (missing-arg) :type character) (read-n-chars-fun (missing-arg) :type function) (read-char-fun (missing-arg) :type function) (write-n-bytes-fun (missing-arg) :type function) @@ -1388,7 +1389,7 @@ (canonical-name (&rest other-names) out-form in-form octets-to-string-symbol string-to-octets-symbol) `(define-external-format/variable-width (,canonical-name ,@other-names) - t 1 + t #\? 1 ,out-form 1 ,in-form @@ -1396,8 +1397,8 @@ ,string-to-octets-symbol)) (defmacro define-external-format/variable-width - (external-format output-restart out-size-expr - out-expr in-size-expr in-expr + (external-format output-restart replacement-character + out-size-expr out-expr in-size-expr in-expr octets-to-string-sym string-to-octets-sym) (let* ((name (first external-format)) (out-function (symbolicate "OUTPUT-BYTES/" name)) @@ -1646,6 +1647,7 @@ (let ((entry (%make-external-format :names ',external-format + :default-replacement-character ,replacement-character :read-n-chars-fun #',in-function :read-char-fun #',in-char-function :write-n-bytes-fun #',out-function @@ -2455,6 +2457,14 @@ (without-package-locks (makunbound '*available-buffers*)))) +(defun stdstream-external-format (outputp) + (declare (ignorable outputp)) + (let* ((keyword #!+win32 (if outputp (sb!win32::console-output-codepage) (sb!win32::console-input-codepage)) + #!-win32 (default-external-format)) + (ef (get-external-format keyword)) + (replacement (ef-default-replacement-character ef))) + `(,keyword :replacement ,replacement))) + ;;; This is called whenever a saved core is restarted. (defun stream-reinit (&optional init-buffers-p) (when init-buffers-p @@ -2464,22 +2474,20 @@ (with-output-to-string (*error-output*) (setf *stdin* (make-fd-stream 0 :name "standard input" :input t :buffering :line - #!+win32 :external-format #!+win32 (sb!win32::console-input-codepage))) + :external-format (stdstream-external-format nil))) (setf *stdout* (make-fd-stream 1 :name "standard output" :output t :buffering :line - #!+win32 :external-format #!+win32 (sb!win32::console-output-codepage))) + :external-format (stdstream-external-format t))) (setf *stderr* (make-fd-stream 2 :name "standard error" :output t :buffering :line - #!+win32 :external-format #!+win32 (sb!win32::console-output-codepage))) + :external-format (stdstream-external-format t))) (let* ((ttyname #.(coerce "/dev/tty" 'simple-base-string)) (tty (sb!unix:unix-open ttyname sb!unix:o_rdwr #o666))) (if tty (setf *tty* - (make-fd-stream tty - :name "the terminal" - :input t - :output t - :buffering :line + (make-fd-stream tty :name "the terminal" + :input t :output t :buffering :line + :external-format (stdstream-external-format t) :auto-close t)) (setf *tty* (make-two-way-stream *stdin* *stdout*)))) (princ (get-output-stream-string *error-output*) *stderr*)) diff --git a/version.lisp-expr b/version.lisp-expr index b0940a872..fcb1f1020 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.32.22" +"1.0.32.23" -- 2.11.4.GIT