From c23ceb6a4eeec2e091386ed58fbf12f12324b757 Mon Sep 17 00:00:00 2001 From: David Lichteblau Date: Sun, 14 Oct 2007 22:21:00 +0200 Subject: [PATCH] Attribute parsing issues. * src/parse/html-parser.lisp (PARSE-XSTREAM): enable *unmungle-attribute-case*. (SERIALIZE-PT-ATTRIBUTES): New. (SERIALIZE-PT): Use serialize-pt-attributes. * src/parse/sgml-dtd.lisp (SET-EQUAL): Restore from glisp. * src/parse/sgml-parse.lisp (*UNMUNGLE-ATTRIBUTE-CASE*): New. (MUNGLE-ATTLIST): Downcase attributes if *u-a-c* is enabled. --- src/parse/html-parser.lisp | 17 +++++++++++++++-- src/parse/sgml-dtd.lisp | 3 +++ src/parse/sgml-parse.lisp | 10 +++++++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/parse/html-parser.lisp b/src/parse/html-parser.lisp index 38c973a..3e956b0 100644 --- a/src/parse/html-parser.lisp +++ b/src/parse/html-parser.lisp @@ -57,6 +57,7 @@ (make-array #.(* 2 4096) :element-type 'runes:rune)) (sgml::setup-code-vector input :utf-8) (let* ((dtd *html-dtd*) + (sgml::*unmungle-attribute-case* t) (r (sgml:sgml-parse dtd input)) (pt (sgml::post-mortem-heuristic dtd r))) (if handler @@ -90,6 +91,17 @@ ;;; (merge-pathnames (or pathname (pathname input)))))) (parse-xstream xstream handler))))) +(defun serialize-pt-attributes (plist) + (loop + for (name value) on plist by #'cddr + collect + (let ((n (coerce (symbol-name name) 'rod)) + (v (etypecase value + (symbol (coerce (string-downcase (symbol-name value)) 'rod)) + (rod value) + (string (coerce value 'rod))))) + (hax:make-attribute n v t)))) + (defun serialize-pt (document handler &key (name "HTML") public-id system-id) (hax:start-document handler name public-id system-id) (labels ((recurse (pt) @@ -97,8 +109,9 @@ ((eq (gi pt) :pcdata) (hax:characters handler (pt-attrs pt))) (t - (let ((name (coerce (symbol-name (pt-name pt)) 'rod))) - (hax:start-element handler name (pt-attrs pt)) + (let ((name (coerce (symbol-name (pt-name pt)) 'rod)) + (attrs (serialize-pt-attributes (pt-attrs pt)))) + (hax:start-element handler name attrs) (mapc #'recurse (pt-children pt)) (hax:end-element handler name)))))) (recurse document)) diff --git a/src/parse/sgml-dtd.lisp b/src/parse/sgml-dtd.lisp index ccc4e91..27dc074 100644 --- a/src/parse/sgml-dtd.lisp +++ b/src/parse/sgml-dtd.lisp @@ -610,6 +610,9 @@ ;;; ------------------------------------------------------------------------------------------- +(defun set-equal (x y &rest options) + (null (apply #'set-exclusive-or x y options))) + (defun elms-eqv (dtd x y) ;; zwei elms sind genau dann aequivalent, wenn inclusion und surclusion gleich sind. (and (set-equal (elm-inclusion dtd x) (elm-inclusion dtd y)) diff --git a/src/parse/sgml-parse.lisp b/src/parse/sgml-parse.lisp index 5dc9007..b51beb1 100644 --- a/src/parse/sgml-parse.lisp +++ b/src/parse/sgml-parse.lisp @@ -967,6 +967,12 @@ (t (error "foofoo: Hmm ~S ?!" r)))) +;;; The renderer might depend on upper-case attribute values, so let's leave +;;; this off by default. For the benefit of html <-> xml conversions we +;;; don't want to check the DTD every time we convert an attribute though, +;;; so we need this mode for lower-case attribute values. +(defvar *unmungle-attribute-case* nil) + (defun mungle-attlist (dtd tag atts) (mapcan (lambda (x) (cond ((atom x) @@ -974,7 +980,9 @@ (multiple-value-bind (slot value) (sgml::find-slot-value-pair nil dtd tag (mungle x)) (when value - (setf value (foofoo value))) + (setf value (foofoo value)) + (when *unmungle-attribute-case* + (setf value (rod-downcase value)))) (and slot (list slot value)))) (t -- 2.11.4.GIT