1 (in-package :closure-html
)
3 (defun unbreak-utf8 (arr &key
(start 0))
4 "given an utf-8 string, fix a common trouble with it:
5 namely broken non-breaking-space sequences not being prefixed by 194"
6 (when (> (length arr
) start
)
7 (let* ((pos (position 160 arr
:start start
))
8 (rest-fixed (when pos
(unbreak-utf8 arr
:start
(1+ pos
)))))
10 (concatenate 'vector
(subseq arr start pos
) #(194 160) rest-fixed
)
11 (subseq arr start
)))))
13 (defun cxml-pt-to-lhtml (pt)
14 "given a sgml:pt, produce a lispified parse tree composed of lists of form:
15 (tag property-list children)"
21 (let ((r (flexi-streams:octets-to-string x
:external-format
(flexi-streams:make-external-format
:utf-8
:little-endian t
))))
25 (t (format t
"impossible happened: ~S~%" x
))))
27 (let* ((attrs (if (listp (sgml:pt-attrs pt
))
28 (loop :for
(name val
) :on
(sgml:pt-attrs pt
) :by
#'cddr
29 :collect
(list name
(f val
)))
30 (f (sgml:pt-attrs pt
)))))
31 (if (eq (sgml:pt-name pt
) :pcdata
)
32 (f (sgml:pt-cdata pt
))
37 (loop :for n
:in
(sgml:pt-children pt
)
38 :when n
:do
(if (arrayp n
) (f n
))
41 (list (iterate n
))))))))))
44 (defun parse-html-to-lhtml (html)
45 (cxml-pt-to-lhtml (parse html
)))
47 (defun walk-lhtml (lhtml tag-callback text-callback
)
49 (funcall text-callback lhtml
)
50 (destructuring-bind (tag &rest body
)
51 (if (consp lhtml
) lhtml
(list lhtml
))
52 (destructuring-bind (tag-name &rest attributes
)
53 (if (consp tag
) tag
(list tag
))
54 (funcall tag-callback tag-name attributes body
)))))
56 (defun lhtml->pt
(lhtml)
59 (lambda (tag-name attributes body
)
60 (make-pt :name tag-name
61 :attrs
(loop :for
(key value
) :on attributes
:by
#'cddr
63 :collect
(etypecase value
64 (string (runes:string-rod value
))
66 :children
(mapcar #'lhtml-
>pt body
)))
69 (assert (stringp string
))
70 (make-pt :name
:pcdata
:attrs
(runes:string-rod string
)))))
72 (defun lhtml-reader (stream subchar arg
)
73 (declare (ignore subchar arg
))
75 ,(funcall (get-macro-character #\
`) stream nil
)))
77 (set-dispatch-macro-character #\
# #\T
'lhtml-reader
)