More integration work.
[closure-html.git] / src / parse / html-parser.lisp
blobeb952c186753b27f116d8a51511fa451328d619e
1 (in-package :closure-html)
3 (defparameter *this-file*
4 (load-time-value
5 (or #.*compile-file-pathname* *load-pathname*)))
7 (defparameter *this-directory*
8 (make-pathname :directory (pathname-directory *this-file*)))
10 (defparameter sgml::*simple-catalog*
11 (loop :for (name . filename)
12 :in '(("-//W3O//DTD W3 HTML 3.0//EN" . "dtd/HTML-3.0")
13 ("NETSCAPE-Bookmark-file-1" . "dtd/NETSCAPE-Bookmark-file-1")
14 ("-//W3C//ENTITIES Special//EN//HTML" . "dtd/Entities-Special")
15 ("-//W3C//ENTITIES Symbols//EN//HTML" . "dtd/Entities-Symbols")
16 ("-//W3C//ENTITIES Latin1//EN//HTML" . "dtd/Entities-Latin1")
17 ("-//W3C//DTD HTML 4.0 Frameset//EN" . "dtd/DTD-HTML-4.0-Frameset")
18 ("-//W3C//DTD HTML 4.0//EN" . "dtd/DTD-HTML-4.0")
19 ("-//W3C//DTD HTML 4.0 Transitional//EN" . "dtd/DTD-HTML-4.0-Transitional"))
20 :collect (cons name (merge-pathnames filename *this-directory*))))
22 (defparameter *html-dtd* (sgml:parse-dtd '(:public "-//W3C//DTD HTML 4.0 Frameset//EN")))
24 (defun parse (inputstr)
25 "given a string, produce a sgml:pt, which would be your toplevel parse tree node"
26 (let ((dtd *html-dtd*))
27 (let ((input (runes:make-xstream
28 (flexi-streams:make-in-memory-input-stream
29 (flexi-streams:string-to-octets
30 inputstr :external-format (flexi-streams:make-external-format :utf-8))))))
31 (setf (sgml::a-stream-scratch input)
32 (make-array #.(* 2 4096) :element-type 'runes:rune))
33 (sgml::setup-code-vector input :utf-8)
34 (let ((r (sgml:sgml-parse dtd input)))
35 (sgml::post-mortem-heuristic dtd r)))))