From 0029e696436d7a3842e182d999c40e2d354362c7 Mon Sep 17 00:00:00 2001 From: David Lichteblau Date: Sat, 10 May 2008 01:09:08 +0200 Subject: [PATCH] output encoding fixes --- src/parse/unparse.lisp | 77 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/src/parse/unparse.lisp b/src/parse/unparse.lisp index 33abfb6..dc303c8 100644 --- a/src/parse/unparse.lisp +++ b/src/parse/unparse.lisp @@ -15,7 +15,8 @@ (defclass sink (hax:abstract-handler) ((ystream :initarg :ystream :accessor sink-ystream) - (stack :initform nil :accessor stack))) + (stack :initform nil :accessor stack) + (encoding :initarg :encoding :reader sink-encoding))) #-rune-is-character (defmethod hax:%want-strings-p ((handler sink)) @@ -24,11 +25,17 @@ ;; bisschen unschoen hier SCHON WIEDER die ganze api zu duplizieren, aber die ;; ystreams sind noch undokumentiert (macrolet ((define-maker (make-sink make-ystream &rest args) - `(defun ,make-sink (,@args &rest initargs) - (apply #'make-instance + `(defun ,make-sink (,@args &rest initargs + &key encoding &allow-other-keys) + (let* ((encoding (or encoding "UTF-8")) + (ystream (,make-ystream ,@args))) + (setf (ystream-encoding ystream) + (cxml::find-output-encoding encoding)) + (apply #'make-instance 'sink - :ystream (,make-ystream ,@args) - initargs)))) + :ystream ystream + :encoding encoding + initargs))))) (define-maker make-octet-vector-sink make-octet-vector-ystream) (define-maker make-octet-stream-sink make-octet-stream-ystream stream) (define-maker make-rod-sink make-rod-ystream) @@ -47,26 +54,34 @@ #+rune-is-character (defun make-string-sink (&rest args) (apply #'make-rod-sink args)) +(defmethod initialize-instance :after ((instance sink) &key) + ;; not sure about this. We do it for XML, but the HTML parser doesn't + ;; currently look for it. +;;; (when (let ((encoding (ystream-encoding (sink-ystream instance)))) +;;; (and (not (symbolp encoding)) +;;; (eq (babel-encodings:enc-name encoding) :utf-16))) +;;; (sink-write-rune #/U+FEFF instance)) + ) ;;;; Events (defmethod hax:start-document ((sink sink) name public-id system-id) (when (plusp (length system-id)) - (%write-rod #"" sink) - (%write-rune #/U+000A sink))) + (sink-write-rod #"\"" sink))) + (sink-write-rod #">" sink) + (sink-write-rune #/U+000A sink))) (defmethod hax:end-document ((sink sink)) (close-ystream (sink-ystream sink))) @@ -77,24 +92,24 @@ (and key (sgml::find-element closure-html::*html-dtd* key nil nil))) (attlist (and elt (sgml::element-attlist elt)))) (push (cons name elt) (stack sink)) - (%write-rune #/< sink) - (%write-rod name sink) + (sink-write-rune #/< sink) + (sink-write-rod name sink) (dolist (a attributes) (let* ((aname (hax:attribute-name a)) (akey (find-symbol (string-upcase (string-rod aname)) :keyword)) (att (and akey (assoc akey attlist))) (values (second att))) - (%write-rune #/space sink) - (%write-rod aname sink) + (sink-write-rune #/space sink) + (sink-write-rod aname sink) (unless (and att (listp values) (eq (car att) (car values))) - (%write-rune #/= sink) - (%write-rune #/\" sink) + (sink-write-rune #/= sink) + (sink-write-rune #/\" sink) (let ((value (hax:attribute-value a))) (when (uri-attribute-p name aname) (setf value (escape-uri-attribute value))) (unparse-attribute-string value sink)) - (%write-rune #/\" sink)))) - (%write-rune #/> sink))) + (sink-write-rune #/\" sink)))) + (sink-write-rune #/> sink))) ;;; everything written as %URI in the DTD: ;;; @@ -142,9 +157,9 @@ (error "output does not nest: expected ~A but got ~A" name prev-name)) (unless (and elt (null (sgml::element-include elt))) - (%write-rod '#.(string-rod "") sink)))) + (sink-write-rod '#.(string-rod "") sink)))) (defmethod hax:characters ((sink sink) data) (let ((y (sink-ystream sink))) @@ -153,13 +168,13 @@ (loop for c across data do (unparse-datachar-readable c y))))) (defmethod hax:unescaped ((sink sink) data) - (%write-rod data sink)) + (sink-write-rod data sink)) (defmethod hax:comment ((sink sink) data) ;; XXX signal error if body is unprintable? - (%write-rod #"" sink)) + (sink-write-rod #"" sink)) (defun unparse-string (str sink) (let ((y (sink-ystream sink))) @@ -217,10 +232,10 @@ (t (ystream-write-escapable-rune c ystream)))) -(defun %write-rune (c sink) +(defun sink-write-rune (c sink) (ystream-write-rune c (sink-ystream sink))) -(defun %write-rod (r sink) +(defun sink-write-rod (r sink) (ystream-write-rod r (sink-ystream sink))) -- 2.11.4.GIT