Serialization.
[closure-html.git] / src / parse / lhtml.lisp
bloba050016c1a52b4b225c81434de1d2e70395afa9d
1 (in-package :closure-html)
3 (defun unbreak-utf8 (arr &key (start 0))
4 "given an utf-8 string, fix a common trouble with it:
5 namely broken non-breaking-space sequences not being prefixed by 194"
6 (when (> (length arr) start)
7 (let* ((pos (position 160 arr :start start))
8 (rest-fixed (when pos (unbreak-utf8 arr :start (1+ pos)))))
9 (if pos
10 (concatenate 'vector (subseq arr start pos) #(194 160) rest-fixed)
11 (subseq arr start)))))
13 (defun cxml-pt-to-lhtml (pt)
14 "given a sgml:pt, produce a lispified parse tree composed of lists of form:
15 (tag property-list children)"
16 (labels ((f (x)
17 (cond
18 ((null x) nil)
19 ((stringp x) x)
20 ((> (length x) 0)
21 (let ((r (flexi-streams:octets-to-string x :external-format (flexi-streams:make-external-format :utf-8 :little-endian t))))
22 (unless r
23 (f (unbreak-utf8 x)))
24 r))
25 (t (format t "impossible happened: ~S~%" x))))
26 (iterate (pt)
27 (let* ((attrs (if (listp (sgml:pt-attrs pt))
28 (loop :for (name val) :on (sgml:pt-attrs pt) :by #'cddr
29 :collect (list name (f val)))
30 (f (sgml:pt-attrs pt)))))
31 (if (eq (sgml:pt-name pt) :pcdata)
32 (f (sgml:pt-cdata pt))
33 (cons
34 (sgml:pt-name pt)
35 (cons
36 attrs
37 (loop :for n :in (sgml:pt-children pt)
38 :when n :do (if (arrayp n) (f n))
39 :nconc (if (arrayp n)
40 (list (f n))
41 (list (iterate n))))))))))
42 (iterate pt)))
44 (defun parse-html-to-lhtml (html)
45 (cxml-pt-to-lhtml (parse html)))
47 (defun walk-lhtml (lhtml tag-callback text-callback)
48 (if (stringp lhtml)
49 (funcall text-callback lhtml)
50 (destructuring-bind (tag &rest body)
51 (if (consp lhtml) lhtml (list lhtml))
52 (destructuring-bind (tag-name &rest attributes)
53 (if (consp tag) tag (list tag))
54 (funcall tag-callback tag-name attributes body)))))
56 (defun lhtml->pt (lhtml)
57 (walk-lhtml lhtml
58 ;; tag callback
59 (lambda (tag-name attributes body)
60 (make-pt :name tag-name
61 :attrs (loop :for (key value) :on attributes :by #'cddr
62 :collect key
63 :collect (etypecase value
64 (string (runes:string-rod value))
65 (sgml::rod value)))
66 :children (mapcar #'lhtml->pt body)))
67 ;; text callback
68 (lambda (string)
69 (assert (stringp string))
70 (make-pt :name :pcdata :attrs (runes:string-rod string)))))
72 (defun lhtml-reader (stream subchar arg)
73 (declare (ignore subchar arg))
74 `(lhtml->pt
75 ,(funcall (get-macro-character #\`) stream nil)))
77 (set-dispatch-macro-character #\# #\T 'lhtml-reader)