From e9d44718e19ae59e7b9c859869dde775f550f395 Mon Sep 17 00:00:00 2001 From: dlichteblau Date: Sun, 27 Nov 2005 17:19:12 +0000 Subject: [PATCH] encoding-fehler resignalisieren --- encodings-data.lisp | 2 +- encodings.lisp | 30 ++++++++++++++++++------------ package.lisp | 3 ++- xstream.lisp | 18 ++++++++++-------- 4 files changed, 31 insertions(+), 22 deletions(-) diff --git a/encodings-data.lisp b/encodings-data.lisp index e29a683..c10131a 100644 --- a/encodings-data.lisp +++ b/encodings-data.lisp @@ -1,4 +1,4 @@ -(in-package :encoding) +(in-package :runes-encoding) (progn (add-name :us-ascii "ANSI_X3.4-1968") diff --git a/encodings.lisp b/encodings.lisp index 0982caa..04ddd93 100644 --- a/encodings.lisp +++ b/encodings.lisp @@ -1,4 +1,9 @@ -(in-package :encoding) +(in-package :runes-encoding) + +(define-condition encoding-error (simple-error) ()) + +(defun xerror (fmt &rest args) + (error 'encoding-error :format-control fmt :format-arguments args)) ;;;; --------------------------------------------------------------------------- ;;;; Encoding names @@ -115,6 +120,9 @@ (let ((hi (aref in rptr)) (lo (aref in (%+ 1 rptr)))) (setf rptr (%+ 2 rptr)) + ;; FIXME: Wenn wir hier ein Surrogate sehen, muessen wir das naechste + ;; Zeichen abwarten und nachgucken, dass nicht etwa die andere + ;; Haelfte fehlt! (setf (aref out wptr) (logior (ash hi 8) lo)) (setf wptr (%+ 1 wptr)))) (values wptr rptr))) @@ -132,6 +140,9 @@ (let ((lo (aref in (%+ 0 rptr))) (hi (aref in (%+ 1 rptr)))) (setf rptr (%+ 2 rptr)) + ;; FIXME: Wenn wir hier ein Surrogate sehen, muessen wir das naechste + ;; Zeichen abwarten und nachgucken, dass nicht etwa die andere + ;; Haelfte fehlt! (setf (aref out wptr) (logior (ash hi 8) lo)) (setf wptr (%+ 1 wptr)))) (values wptr rptr))) @@ -147,13 +158,9 @@ byte0) (macrolet ((put (x) `((lambda (x) - (cond ((or (<= #xD800 x #xDBFF) - (<= #xDC00 x #xDFFF)) - (error "Encoding UTF-16 in UTF-8? : #x~x." x))) - '(unless (data-char-p x) - (error "#x~x is not a data character." x)) - ;;(fresh-line) - ;;(prin1 x) (princ "-> ") + (when (or (<= #xD800 x #xDBFF) + (<= #xDC00 x #xDFFF)) + (xerror "surrogate encoded in UTF-8: #x~x." x)) (cond ((%> x #xFFFF) (setf (aref out (%+ 0 wptr)) (%+ #xD7C0 (ash x -10)) (aref out (%+ 1 wptr)) (%ior #xDC00 (%and x #x3FF))) @@ -196,7 +203,7 @@ (setf rptr (%+ rptr 1))) ((%<= #|#b10000000|# byte0 #b10111111) - (error "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0) + (xerror "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0) (setf rptr (%+ rptr 1))) ((%<= #|#b11000000|# byte0 #b11011111) @@ -260,7 +267,7 @@ (return)))) (t - (error "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0)) ) )) + (xerror "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0)) ) )) (values wptr rptr)) ) (defmethod encoding-p ((object (eql :utf-16-little-endian))) t) @@ -343,5 +350,4 @@ (defun find-charset (name) (or (gethash name *charsets*) - (error "There is no character set named ~S." name))) - + (xerror "There is no character set named ~S." name))) diff --git a/package.lisp b/package.lisp index d92ed60..cfb822d 100644 --- a/package.lisp +++ b/package.lisp @@ -61,8 +61,9 @@ #:set-to-full-speed #:xstream-name)) -(defpackage :encoding +(defpackage :runes-encoding (:use :cl :runes) (:export + #:encoding-error #:find-encoding #:decode-sequence)) diff --git a/xstream.lisp b/xstream.lisp index fe818ca..ea5049e 100644 --- a/xstream.lisp +++ b/xstream.lisp @@ -1,4 +1,4 @@ -;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: runes; readtable: runes; Encoding: utf-8; -*- +;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*- ;;; --------------------------------------------------------------------------- ;;; Title: Fast streams ;;; Created: 1999-07-17 @@ -66,9 +66,7 @@ ;; (eval-when (:compile-toplevel :load-toplevel :execute) - (defparameter *fast* '(optimize (speed 3) (safety 0))) - ;;(defparameter *fast* '(optimize (speed 2) (safety 3))) - ) + (defparameter *fast* '(optimize (speed 3) (safety 0)))) ;; Let us first define fast fixnum arithmetric get rid of type ;; checks. (After all we know what we do here). @@ -277,10 +275,14 @@ :end2 (xstream-os-left-end input)) ;; then we take care that the buffer is large enough to carry at ;; least 100 bytes (a random number) + ;; + ;; david: was heisst da random? ich nehme an, dass 100 einfach + ;; ausreichend sein soll, um die laengste utf-8 bytesequenz oder die + ;; beiden utf-16 surrogates zu halten? dann ist 100 ja wohl dicke + ;; ausreichend und koennte in make-xstream ordentlich geprueft werden. + ;; oder was geht hier vor? (unless (>= (length (xstream-os-buffer input)) 100) - (error "You lost") - ;; todo: enlarge buffer - )) + (error "You lost"))) (setf n (read-octets (xstream-os-buffer input) (xstream-os-stream input) m (min (1- (length (xstream-os-buffer input))) @@ -292,7 +294,7 @@ :eof) (t (multiple-value-bind (fnw fnr) - (encoding:decode-sequence + (runes-encoding:decode-sequence (xstream-encoding input) (xstream-os-buffer input) 0 n (xstream-buffer input) 0 (1- (length (xstream-buffer input))) -- 2.11.4.GIT