1 (in-package :closure-html
)
3 ;;; FIXME: I liked the old SLURP-CATALOG code better than the LOOP below.
4 ;;; (Except for the use of NETLIB and URI, which we don't have here.)
8 (defun slurp-catalog (catalog-url)
9 ;; Really dirty implementation
10 (setf *simple-catalog
* nil
)
11 (multiple-value-bind (io header
) (netlib::open-document-2 catalog-url
)
12 (declare (ignore header
))
14 (let ((str (glisp::gstream-as-string io
)))
15 (with-input-from-string (input str
)
16 (do ((x (read input nil nil
) (read input nil nil
)))
18 (assert (equal (symbol-name x
) "PUBLIC"))
19 (let ((name (read input
))
21 (assert (stringp name
))
22 (assert (stringp file
))
23 (push (cons name
(url:merge-url
(url:parse-url file
) catalog-url
))
27 (format T
"~&;; Parsing DTD~% ")
28 (sgml:slurp-catalog
(url:parse-url
"file://closure/resources/dtd/catalog"))
29 (setf cl-user
::*html-dtd
* (sgml:parse-dtd
'(:public
"-//W3C//DTD HTML 4.0 Frameset//EN")))
30 (format T
"~&;; done~%")
34 (defparameter sgml
::*simple-catalog
*
39 :defaults
(merge-pathnames
41 (asdf:component-relative-pathname
42 (asdf:find-system
:closure-html
))))))
44 :for
(name . filename
)
45 :in
'(("-//W3O//DTD W3 HTML 3.0//EN" .
"dtd/HTML-3.0")
46 ("NETSCAPE-Bookmark-file-1" .
"dtd/NETSCAPE-Bookmark-file-1")
47 ("-//W3C//ENTITIES Special//EN//HTML" .
"dtd/Entities-Special")
48 ("-//W3C//ENTITIES Symbols//EN//HTML" .
"dtd/Entities-Symbols")
49 ("-//W3C//ENTITIES Latin1//EN//HTML" .
"dtd/Entities-Latin1")
50 ("-//W3C//DTD HTML 4.0 Frameset//EN" .
"dtd/DTD-HTML-4.0-Frameset")
51 ("-//W3C//DTD HTML 4.0//EN" .
"dtd/DTD-HTML-4.0")
52 ("-//W3C//DTD HTML 4.0 Transitional//EN" .
"dtd/DTD-HTML-4.0-Transitional"))
53 :collect
(cons name
(merge-pathnames filename base
)))))
55 (defparameter *html-dtd
*
56 (sgml:parse-dtd
'(:public
"-//W3C//DTD HTML 4.0 Frameset//EN")))
58 (defun parse-xstream (input handler
)
59 (setf (sgml::a-stream-scratch input
)
60 (make-array #.
(* 2 4096) :element-type
'runes
:rune
))
61 (sgml::setup-code-vector input
:utf-8
)
62 (let* ((dtd *html-dtd
*)
63 (sgml::*unmungle-attribute-case
* t
)
64 (r (sgml:sgml-parse dtd input
))
65 (pt (sgml::post-mortem-heuristic dtd r
)))
67 (serialize-pt pt handler
)
70 (defun parse (input handler
)
73 (parse-xstream input handler
))
75 #-rune-is-integer
(setf input
(string-rod input
))
76 (let ((xstream (make-rod-xstream input
)))
77 ;;; (setf (xstream-name xstream)
79 ;;; :entity-name "main document"
80 ;;; :entity-kind :main
82 (parse-xstream xstream handler
)))
83 ((and array
(not string
))
84 (parse (make-octet-input-stream input
) handler
))
88 (make-array (length input
) :element-type
'(unsigned-byte 8))))
89 (map-into bytes
#'char-code input
)
90 (parse bytes handler
)))
92 (with-open-file (s input
:element-type
'(unsigned-byte 8))
95 (let ((xstream (make-xstream input
:speed
8192)))
96 ;;; (setf (xstream-name xstream)
98 ;;; :entity-name "main document"
99 ;;; :entity-kind :main
100 ;;; :uri (pathname-to-uri
101 ;;; (merge-pathnames (or pathname (pathname input))))))
102 (parse-xstream xstream handler
)))))
104 (defun serialize-pt-attributes (plist recode
)
106 for
(name value
) on plist by
#'cddr
108 ;; better don't emit as HAX what would be bogus as SAX anyway
109 (string-equal name
"xmlns")
111 (let* ((n #+rune-is-character
(coerce (symbol-name name
) 'rod
)
112 #-rune-is-character
(symbol-name name
))
114 (symbol (coerce (string-downcase (symbol-name value
)) 'rod
))
115 (rod (funcall recode value
))
116 (string (coerce value
'rod
)))))
117 (hax:make-attribute n v t
))))
119 (defun serialize-pt (document handler
120 &key
(name "HTML") public-id system-id
(documentp t
))
121 (let* ((recodep (or #+rune-is-integer
(hax:%want-strings-p handler
)))
126 (rod-to-utf8-string rod
)
130 (hax:start-document handler name public-id system-id
))
131 (labels ((recurse (pt)
133 ((eq (gi pt
) :pcdata
)
134 (hax:characters handler
(funcall recode
(pt-attrs pt
))))
136 (let* ((name (symbol-name (pt-name pt
)))
138 #+rune-is-character
(coerce name
'rod
)
140 (if recodep name
(string-rod name
)))
142 (serialize-pt-attributes (pt-attrs pt
) recode
)))
143 (hax:start-element handler name attrs
)
144 (mapc #'recurse
(pt-children pt
))
145 (hax:end-element handler name
))))))
148 (hax:end-document handler
))))
150 (defclass pt-builder
(hax:abstract-handler
)
151 ((current :initform nil
:accessor current
)
152 (root :initform nil
:accessor root
)))
155 (defmethod hax:%want-strings-p
((handler pt-builder
))
158 (defun make-pt-builder ()
159 (make-instance 'pt-builder
))
161 (defmethod hax:start-document
((handler pt-builder
) name pubid sysid
)
162 (declare (ignore name pubid sysid
))
165 (defun unserialize-pt-attributes (attrs)
168 collect
(intern (string-upcase (hax:attribute-name a
)) :keyword
)
169 collect
(hax:attribute-value a
)))
171 (defmethod hax:start-element
((handler pt-builder
) name attrs
)
172 (let* ((parent (current handler
))
173 (this (sgml::make-pt
/low
174 :name
(intern (string-upcase name
) :keyword
)
175 :attrs
(unserialize-pt-attributes attrs
)
177 (setf (current handler
) this
)
179 (push this
(pt-children parent
))
180 (setf (root handler
) this
))))
182 (defmethod hax:characters
((handler pt-builder
) data
)
183 (push (sgml::make-pt
/low
186 :parent
(current handler
))
187 (pt-children (current handler
))))
189 (defmethod hax:comment
((handler pt-builder
) data
)
190 ;; zzz haven't found out what the representation of comments is...
193 (defmethod hax:end-element
((handler pt-builder
) name
)
194 (let ((current (current handler
)))
195 (setf (pt-children current
) (nreverse (pt-children current
)))
196 (setf (current handler
) (pt-parent current
))))
198 (defmethod hax:end-document
((handler pt-builder
))