LHTML serialization.
[closure-html.git] / src / parse / lhtml.lisp
bloba3a5d39cc81d7890f7e9b9af3d0e3155fb37b513
1 (in-package :closure-html)
4 ;;;; Parsing into LHTML
6 (defclass lhtml-builder (hax:abstract-handler)
7 ((stack :initform nil :accessor stack)
8 (root :initform nil :accessor root)))
10 (defun make-lhtml-builder ()
11 (make-instance 'lhtml-builder))
13 (defmethod hax:start-document ((handler lhtml-builder) name pubid sysid)
14 (declare (ignore name pubid sysid))
15 nil)
17 (defun pt-attributes-to-lhtml (attrs)
18 (mapcar (lambda (a)
19 (list (intern (string-upcase (hax:attribute-name a)) :keyword)
20 (hax:attribute-value a)))
21 attrs))
23 (defmethod hax:start-element ((handler lhtml-builder) name attrs)
24 (let* ((parent (car (stack handler)))
25 (this (list (intern (string-upcase name) :keyword)
26 (pt-attributes-to-lhtml attrs))))
27 (push this (stack handler))
28 (if parent
29 (push this (cddr parent))
30 (setf (root handler) this))))
32 (defmethod hax:characters ((handler lhtml-builder) data)
33 (push data (cddar (stack handler))))
35 (defmethod hax:comment ((handler lhtml-builder) data)
36 ;; zzz haven't found out what the representation of comments is...
37 data)
39 (defmethod hax:end-element ((handler lhtml-builder) name)
40 (let ((current (pop (stack handler))))
41 (setf (cddr current) (nreverse (cddr current)))))
43 (defmethod hax:end-document ((handler lhtml-builder))
44 (root handler))
48 ;;;; Serializing LHTML
50 (defun serialize-lhtml-attributes (alist)
51 (loop
52 for (name value) in alist
53 collect
54 (let ((n (coerce (symbol-name name) 'rod))
55 (v (etypecase value
56 (symbol (coerce (string-downcase (symbol-name value)) 'rod))
57 (rod value)
58 (string (coerce value 'rod)))))
59 (hax:make-attribute n v t))))
61 (defun serialize-lhtml
62 (document handler &key (name "HTML") public-id system-id)
63 (hax:start-document handler name public-id system-id)
64 (labels ((recurse (x)
65 (typecase x
66 ((or rod string)
67 (hax:characters handler x))
69 (destructuring-bind (name attrs &rest children) x
70 (let ((name (coerce (symbol-name name) 'rod))
71 (attrs (serialize-lhtml-attributes attrs)))
72 (hax:start-element handler name attrs)
73 (mapc #'recurse children)
74 (hax:end-element handler name)))))))
75 (recurse document))
76 (hax:end-document handler))
79 ;;;; old stuff
82 ;;; brauchen wir hier alles das noch?
84 (defun unbreak-utf8 (arr &key (start 0))
85 "given an utf-8 string, fix a common trouble with it:
86 namely broken non-breaking-space sequences not being prefixed by 194"
87 (when (> (length arr) start)
88 (let* ((pos (position 160 arr :start start))
89 (rest-fixed (when pos (unbreak-utf8 arr :start (1+ pos)))))
90 (if pos
91 (concatenate 'vector (subseq arr start pos) #(194 160) rest-fixed)
92 (subseq arr start)))))
94 (defun cxml-pt-to-lhtml (pt)
95 "given a sgml:pt, produce a lispified parse tree composed of lists of form:
96 (tag property-list children)"
97 (labels ((f (x)
98 (cond
99 ((null x) nil)
100 ((stringp x) x)
101 ((> (length x) 0)
102 (let ((r (flexi-streams:octets-to-string x :external-format (flexi-streams:make-external-format :utf-8 :little-endian t))))
103 (unless r
104 (f (unbreak-utf8 x)))
106 (t (format t "impossible happened: ~S~%" x))))
107 (iterate (pt)
108 (let* ((attrs (if (listp (sgml:pt-attrs pt))
109 (loop :for (name val) :on (sgml:pt-attrs pt) :by #'cddr
110 :collect (list name (f val)))
111 (f (sgml:pt-attrs pt)))))
112 (if (eq (sgml:pt-name pt) :pcdata)
113 (f (sgml:pt-cdata pt))
114 (cons
115 (sgml:pt-name pt)
116 (cons
117 attrs
118 (loop :for n :in (sgml:pt-children pt)
119 :when n :do (if (arrayp n) (f n))
120 :nconc (if (arrayp n)
121 (list (f n))
122 (list (iterate n))))))))))
123 (iterate pt)))
125 (defun parse-html-to-lhtml (html)
126 (cxml-pt-to-lhtml (parse html nil)))
129 (defun walk-lhtml (lhtml tag-callback text-callback)
130 (if (stringp lhtml)
131 (funcall text-callback lhtml)
132 (destructuring-bind (tag &rest body)
133 (if (consp lhtml) lhtml (list lhtml))
134 (destructuring-bind (tag-name &rest attributes)
135 (if (consp tag) tag (list tag))
136 (funcall tag-callback tag-name attributes body)))))
139 ;;;; Old reader stuff:
141 (defun lhtml->pt (lhtml)
142 (walk-lhtml lhtml
143 ;; tag callback
144 (lambda (tag-name attributes body)
145 (sgml::make-pt :name tag-name
146 :attrs (loop :for (key value) :on attributes :by #'cddr
147 :collect key
148 :collect (etypecase value
149 (string (runes:string-rod value))
150 (sgml::rod value)))
151 :children (mapcar #'lhtml->pt body)))
152 ;; text callback
153 (lambda (string)
154 (assert (stringp string))
155 (sgml::make-pt :name :pcdata :attrs (runes:string-rod string)))))
157 (defun lhtml-reader (stream subchar arg)
158 (declare (ignore subchar arg))
159 `(lhtml->pt
160 ,(funcall (get-macro-character #\`) stream nil)))
162 (set-dispatch-macro-character #\# #\T 'lhtml-reader)