1 (uiop:define-package
#:lw2.clean-html
2 (:use
#:cl
#:alexandria
#:iterate
#:split-sequence
#:lw2.lmdb
#:lw2.links
#:lw2.utils
#:lw2.context
#:lw2.sites
#:lw2.conditions
#:lw2.colors
)
3 (:export
#:*before-clean-hook
* #:*link-hook
* #:url-scanner
#:clean-text
#:clean-text-to-html
#:clean-html
#:clean-html
* #:extract-excerpt
#:extract-excerpt
*)
4 (:unintern
#:*text-clean-regexps
* #:*html-clean-regexps
*))
6 (in-package #:lw2.clean-html
)
8 (setf cl-typesetting-hyphen
::*hyphen-patterns-directory
* (asdf:system-relative-pathname
"lw2-viewer" "data/hyphenation-patterns/"))
9 (setf cl-typesetting-hyphen
::*language-hyphen-file-list
* '((:en-us .
"hyph_en_US")))
10 (cl-typesetting-hyphen:load-language
:en-us
)
11 (setf cl-typesetting
::*default-hyphen-language
* :en-us
)
13 (defvar *before-clean-hook
* nil
)
14 (defvar *link-hook
* nil
)
16 (defun file-get-contents (filename)
17 (with-open-file (stream filename
)
18 (uiop:slurp-stream-string stream
)))
20 (defun grab-from-rts (url)
21 (declare (optimize (speed 0) (space 3)))
22 (let* ((root (plump:parse
(dex:get url
:keep-alive nil
)))
23 (post-body (plump:get-element-by-id root
"wikitext")))
24 (loop for cls in
'("div.nav_menu" "div.imgonly" "div.bottom_nav") do
25 (loop for e across
(clss:select cls post-body
)
26 do
(plump:remove-child e
)))
27 (plump:remove-child
(elt (clss:select
"h1" post-body
) 0))
28 (plump:remove-child
(elt (clss:select
"p" post-body
) 0))
29 (with-open-file (stream (merge-pathnames "./rts-content/" (subseq (quri:uri-path
(quri:uri url
)) 1)) :direction
:output
:if-does-not-exist
:create
:external-format
:utf-8
)
30 (plump:serialize post-body stream
))))
32 (defun rts-to-html (file)
33 (declare (optimize (speed 0) (space 3)))
36 (file-get-contents "./rts-content/rts.css")
38 (file-get-contents (merge-pathnames "./rts-content/" file
))))
40 (defparameter *html-overrides
* (make-hash-table :test
'equal
))
41 (loop for
(id file
) in
'(("XTXWPQSEgoMkAupKt" "An-Intuitive-Explanation-Of-Bayess-Theorem")
42 ("afmj8TKAqH6F2QMfZ" "A-Technical-Explanation-Of-Technical-Explanation")
43 ("7ZqGiPHTpiDMwqMN2" "The-Twelve-Virtues-Of-Rationality"))
44 do
(let ((file* file
)) (setf (gethash id
*html-overrides
*) (lambda () (rts-to-html file
*)))))
46 (defmacro do-with-cleaners
((regexp-list scanner replacement
) &body body
)
47 (declare (optimize (speed 0) (space 3)))
48 `(labels ((fn (,scanner
,replacement
) ,@body
))
49 ,@(loop for
(regex flags replacement
) in
(eval regexp-list
)
50 collecting
`(fn (load-time-value
51 (ppcre:create-scanner
,regex
52 ,@(loop for
(flag sym
) in
'((#\i
:case-insensitive-mode
)
53 (#\m
:multi-line-mode
)
54 (#\s
:single-line-mode
)
56 when
(find flag flags
)
57 append
(list sym t
))))
60 (defmacro define-cleaner
(name regexp-list
)
61 (declare (optimize (speed 0) (space 3)))
63 (do-with-cleaners (,regexp-list scanner replacement
)
64 (setf text
(ppcre:regex-replace-all scanner text replacement
)))))
66 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
67 (defun read-regexp-file (filename)
68 (declare (optimize (speed 0) (space 3)))
69 (let ((data (destructuring-bind (* ((* (* inner
))))
70 (with-open-file (stream (uiop:subpathname
(asdf:system-source-directory
"lw2-viewer") filename
)) (parse-js:parse-js stream
))
72 (loop for input in data
73 collecting
(destructuring-bind (* ((* regex flags
) (* replacement
))) input
74 (list regex flags
(ppcre:regex-replace-all
"\\$(\\d)" replacement
"\\\\\\1")))))))
76 (define-cleaner clean-text
(read-regexp-file "text-clean-regexps.js"))
77 (define-cleaner clean-html-regexps
(read-regexp-file "html-clean-regexps.js"))
79 (declaim (ftype function url-scanner
))
80 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
81 (setf (fdefinition 'url-scanner
) (ppcre:create-scanner
82 "(?:https?://[-a-zA-Z0-9]+\\.[-a-zA-Z0-9.]+|[-a-zA-Z0-9.]+\\.(?:com|edu|gov|mil|net|org|int|biz|info|name|museum|us|ca|uk|io|ly))(?:\\:[0-9]+){0,1}(?:(?:/|\\?(?!(?:$|\\s)))(?:(?:(\\()|\\)(?![.,;:?!]?(?:$|\\s))|[-\\w\\d.,;:?'\\\\+@!&%$#=~–_/])*(?(1)[-\\w\\d\\\\+@&%$#=~_/)]|[-\\w\\d\\\\+@&%$#=~_/]))?)?"
83 :single-line-mode t
)))
85 (defun hyphenate-string (string)
86 (let ((hyphenation-list (cl-typesetting::hyphenate-string string
)))
87 (declare (type string string
)
88 (type list hyphenation-list
))
90 (let ((new-string (make-array (+ (length string
) (length hyphenation-list
)) :element-type
'character
:fill-pointer
0)))
91 (loop for char across string
92 for orig-offset of-type fixnum from
0
93 with current-hyphenation
= hyphenation-list
94 do
(when (and current-hyphenation
(= orig-offset
(the fixnum
(first current-hyphenation
))))
95 (vector-push #\SOFT_HYPHEN new-string
)
96 (setf current-hyphenation
(rest current-hyphenation
)))
97 do
(vector-push char new-string
))
98 (values (coerce new-string
'(simple-array character
1)) hyphenation-list
))
99 (values string nil
))))
101 (defun clean-text-to-html (text &key
(hyphenation t
))
103 (((or plump
:invalid-xml-character plump
:discouraged-xml-character
) #'abort
))
105 (plump:encode-entities
107 (funcall (if hyphenation
#'hyphenate-string
#'identity
) (clean-text text
))
110 (defun rot13-char (char)
111 (let ((char-code (char-code char
)))
112 (labels ((translate-char (base)
113 (code-char (+ base
(mod (+ 13 (- char-code base
)) 26)))))
114 (declare (dynamic-extent #'translate-char
))
116 ((<= (char-code #\A
) char-code
(char-code #\Z
))
117 (translate-char (char-code #\A
)))
118 ((<= (char-code #\a) char-code
(char-code #\z
))
119 (translate-char (char-code #\a)))
122 (declaim (inline letter-index
))
123 (defun letter-index (char)
124 (let ((char-code (char-code char
)))
126 ((<= (char-code #\A
) char-code
(char-code #\Z
))
127 (- char-code
(char-code #\A
)))
128 ((<= (char-code #\a) char-code
(char-code #\z
))
129 (- char-code
(char-code #\a))))))
131 (defparameter *letter-frequencies
*
132 '((#\A .
8.167) (#\B .
1.492) (#\C .
2.782) (#\D .
4.253) (#\E .
12.702)
133 (#\F .
2.228) (#\G .
2.015) (#\H .
6.094) (#\I .
6.966) (#\J .
0.153)
134 (#\K .
0.772) (#\L .
4.025) (#\M .
2.406) (#\N .
6.749) (#\O .
7.507)
135 (#\P .
1.929) (#\Q .
0.095) (#\R .
5.987) (#\S .
6.327) (#\T .
9.056)
136 (#\U .
2.758) (#\V .
0.978) (#\W .
2.36) (#\X .
0.15) (#\Y .
1.974)
139 (defparameter *letter-rot13-log-odds
*
140 (let ((array (make-array 26 :element-type
'single-float
)))
142 for letter-freq in
*letter-frequencies
*
143 for rot13-freq in
(concatenate 'list
(subseq *letter-frequencies
* 13 26) (subseq *letter-frequencies
* 0 13))
145 do
(setf (aref array i
) (log (/ (cdr letter-freq
) (cdr rot13-freq
)) 2)))
148 (defparameter *bigram-rot13-log-odds
*
149 (let ((bigram-data (with-open-file (stream (asdf:system-relative-pathname
"lw2-viewer" "data/bigrams.lisp"))
151 (bigram-table (make-array '(26 26) :element-type
'single-float
:initial-element
1.0))
152 (bigram-log-odds (make-array '(26 26) :element-type
'single-float
)))
154 for
(bigram count
) in bigram-data
155 do
(setf (aref bigram-table
156 (letter-index (aref bigram
0))
157 (letter-index (aref bigram
1)))
158 (float (+ count
1))))
161 (setf (aref bigram-log-odds i j
)
162 (log (/ (aref bigram-table i j
)
169 (defun rot13-text-p (text &optional
(start 0) (end (length text
)))
170 (declare (type simple-string text
))
176 (let ((odds-table (load-time-value *letter-rot13-log-odds
*)))
177 (declare (type (simple-array single-float
(26)) odds-table
))
178 (if-let ((letter-index (letter-index (aref text start
))))
179 (aref odds-table letter-index
)
182 (let ((odds-table (load-time-value *bigram-rot13-log-odds
*)))
183 (declare (type (simple-array single-float
(26 26)) odds-table
))
186 for i from
(1+ start
) to
(1- end
)
187 do
(when-let ((a (letter-index (aref text
(1- i
))))
188 (b (letter-index (aref text i
))))
189 (setf sum
(+ sum
(aref odds-table a b
))))
190 finally
(return sum
)))))))
192 (< log-odds
(log 0.0001 2))
195 (defun rot13-inplace (text &optional
(start 0) (end (1- (length text
))))
196 (declare (type simple-string text
))
198 for i from start to end
199 do
(setf (aref text i
) (rot13-char (aref text i
))))
202 (defun unrot13-by-words (text)
203 (declare (type simple-string text
))
208 when
(or (= i
(1- (length text
)))
209 (position char
",.:;?! "))
211 (when (< (nth-value 1 (rot13-text-p text word-start i
)) (+ 0 (- i word-start
0)))
212 (rot13-inplace text word-start i
))
213 (setf word-start i
)))
216 (defun char-is-whitespace (c)
217 (or (cl-unicode:has-binary-property c
"White_Space")
218 (eql c
#\BRAILLE_PATTERN_BLANK
)))
220 (defun string-is-whitespace (string)
221 (every #'char-is-whitespace string
))
223 (defvar *mathjax-process
* nil
)
225 (defmacro with-mathjax-processor
(() &body body
)
226 `(let ((*mathjax-process
* nil
))
229 (when *mathjax-process
*
230 (uiop:close-streams
*mathjax-process
*)
231 (uiop:wait-process
*mathjax-process
*)))))
233 (defun start-mathjax-process ()
234 (unless *mathjax-process
*
235 (setf *mathjax-process
* (uiop:launch-program
"node js-foreign-lib/mathjax.js"
236 :input
:stream
:output
:stream
237 :element-type
'(unsigned-byte 8))))
238 (values (uiop:process-info-input
*mathjax-process
*)
239 (uiop:process-info-output
*mathjax-process
*)))
241 (defun process-mathjax (tex)
242 (multiple-value-bind (outstream instream
) (start-mathjax-process)
243 (let ((outbuf (babel:string-to-octets tex
)))
244 (nibbles:write-ub32
/le
(length outbuf
) outstream
)
245 (write-sequence outbuf outstream
)
246 (finish-output outstream
))
247 (let ((insize (nibbles:read-ub32
/le instream
)))
249 (let ((inbuf (make-array insize
:element-type
'(unsigned-byte 8))))
250 (read-sequence inbuf instream
)
251 (babel:octets-to-string inbuf
))))))
253 (defun handle-codecogs (node uri
)
255 ("^https?://[a-z.]*codecogs\.com/[a-z.]*\\?(.*)$"
256 (declare (regex-groups-min 1))
257 (let ((math-html (process-mathjax (quri:url-decode
(reg 0)))))
259 (let* ((math-dom (plump:parse math-html
))
260 (children (plump:children math-dom
)))
261 (if (string-is-whitespace (plump:text
(plump:parent node
)))
262 (loop for child across children
263 do
(plump:insert-after node child
)
264 do
(setf (plump:parent child
) (plump:parent node
)))
265 (let ((container (plump:make-element
(plump:parent node
) "span")))
266 (plump:remove-child container
)
267 (plump:insert-after node container
)
268 (setf (plump:parent container
) (plump:parent node
)
269 (plump:children container
) children
)
270 (loop for child across children
271 do
(setf (plump:parent child
) container
))))
272 (plump:remove-child node
))
276 (declaim (ftype (function (plump:node
) list
) class-list
))
278 (defun class-list (node)
279 (let ((class (plump:attribute node
"class")))
280 (and class
(split-sequence #\Space class
))))
282 (declaim (ftype (function (plump:node
&rest simple-string
) boolean
) tag-is class-is-not text-class-is-not
))
284 (defun tag-is (node &rest args
)
285 (declare (type plump
:node node
)
286 (dynamic-extent args
))
287 (when (plump:element-p node
)
288 (let ((tag (plump:tag-name node
)))
290 (some (lambda (x) (string= tag x
))
293 (defun every-ancestor (node test
)
294 (declare (type plump
:node node
)
295 (type function test
))
296 (iter (for target first node then
(plump:parent target
))
297 (cond ((or (null target
) (plump:root-p target
)) (return t
))
298 ((not (funcall test target
)) (return nil
)))))
300 (defun any-ancestor (node test
)
301 (not (every-ancestor node
(complement test
))))
303 (defun class-is-not (node &rest args
)
304 (declare (type plump
:node node
)
305 (dynamic-extent args
))
306 (every-ancestor node
(lambda (n)
312 (defun class-is (node &rest args
)
313 (declare (type plump
:node node
)
314 (dynamic-extent args
))
315 (any-ancestor node
(lambda (n)
321 (defun text-class-is-not (node &rest args
)
322 (declare (type plump
:node node
)
323 (dynamic-extent args
))
324 (apply #'class-is-not
(plump:parent node
) args
))
326 (defun clean-dom-text (root)
328 (((or plump
:invalid-xml-character plump
:discouraged-xml-character
) #'abort
))
331 (and (plump:element-p node
)
332 (ppcre:scan
"^(?:p|div|blockquote|li|h[0-6])$" (plump:tag-name node
))))
334 (and (plump:text-node-p node
)
336 (text-class-is-not node
"mjx-math" "arbital-math")
337 (not (tag-is (plump:parent node
) "code"))))
338 (traverse (node main-fn
&optional recurse-fn
)
339 (when (cleanablep node
) (funcall main-fn node
))
340 (when (plump:nesting-node-p node
)
341 (loop for n across
(plump:children node
)
343 (if recurse-fn
(funcall recurse-fn n
))
344 (traverse n main-fn recurse-fn
))))))
345 (let* ((offset-list nil
)
347 (with-output-to-string (stream)
351 (push (length (the string
(plump:text node
))) offset-list
)
352 (write-string (plump:text node
) stream
))
354 (whole-string-output whole-string-input
))
355 (declare (type string whole-string-output whole-string-input
))
356 (setf offset-list
(nreverse offset-list
))
358 ((call-with-offset-loop (continue-fn loop-fn next-offset-fn offset-adjust-fn
)
359 (loop with current-offset
= offset-list
360 with output-offset of-type
(or null fixnum
) = (first current-offset
)
361 with output-offset-list
= nil
362 with total-offset of-type fixnum
= 0
363 while
(funcall continue-fn
)
365 do
(loop for current-offset-num of-type fixnum
= (first current-offset
)
366 while
(and (rest current-offset
) (< (+ total-offset current-offset-num
) (funcall next-offset-fn
)))
368 (push output-offset output-offset-list
)
369 (setf total-offset
(+ total-offset current-offset-num
)
370 current-offset
(cdr current-offset
)
371 output-offset
(first current-offset
))))
372 do
(setf output-offset
(funcall offset-adjust-fn output-offset
))
374 (push output-offset output-offset-list
)
375 (loop for x in
(rest current-offset
) do
(push x output-offset-list
))
376 (setf offset-list
(nreverse output-offset-list
))))
378 (declare (dynamic-extent (function call-with-offset-loop
))
379 (ftype (function ((function ()) (function ()) (function () fixnum
) (function (fixnum) fixnum
)) (values)) call-with-offset-loop
))
381 ((offset-loop ((list-binding list-form
) (&body loop-body
) (&body next-offset-body
) (&body offset-adjust-body
))
382 (with-gensyms (list-current)
383 `(let ((,list-current
,list-form
)
385 (labels ((continue-fn () (if ,list-current
(setf ,list-binding
(pop ,list-current
))))
386 (loop-fn () ,.loop-body
)
387 (next-offset-fn () ,.next-offset-body
)
388 (offset-adjust-fn ,(first offset-adjust-body
) (declare (type fixnum
,(caar offset-adjust-body
)) (values fixnum
)) ,.
(rest offset-adjust-body
)))
389 (declare (dynamic-extent (function continue-fn
) (function loop-fn
) (function next-offset-fn
) (function offset-adjust-fn
)))
390 (call-with-offset-loop #'continue-fn
#'loop-fn
#'next-offset-fn
#'offset-adjust-fn
))))))
391 (do-with-cleaners ((read-regexp-file "text-clean-regexps.js") scanner replacement
)
392 (let ((replacements 0)
393 (replacement-list nil
)
394 (original-length (length whole-string-output
)))
395 (declare (type fixnum replacements
))
396 (ppcre:do-scans
(match-start match-end reg-starts reg-ends scanner whole-string-output
)
397 (declare (type fixnum match-start match-end
)
398 (type simple-vector reg-starts reg-ends
))
401 (list (if (and (> (length reg-starts
) 0) (eq (aref reg-starts
0) match-start
))
404 (if (and (> (length reg-starts
) 0) (eq (aref reg-ends
(- (length reg-ends
) 1)) match-end
))
405 (aref reg-starts
(- (length reg-starts
) 1))
408 (setf replacement-list
(nreverse replacement-list
))
409 (setf whole-string-output
(ppcre:regex-replace-all scanner whole-string-output replacement
))
410 (let ((length-difference (- (length whole-string-output
) original-length
))
412 (declare (type fixnum length-difference length-change
))
414 (current-replacement replacement-list
)
415 ((setf length-change
(ceiling length-difference replacements
)
416 length-difference
(- length-difference length-change
)
417 replacements
(- replacements
1)))
418 ((destructuring-bind (start end
) current-replacement
419 (declare (type fixnum start end
))
420 (ceiling (+ start end
) 2)))
421 ((output-offset) (max 0 (+ output-offset length-change
)))))))
422 (multiple-value-bind (hyphenated-string hyphenation-list
) (hyphenate-string whole-string-output
)
423 (setf whole-string-output hyphenated-string
)
425 (current-hyphenation hyphenation-list
)
427 (current-hyphenation)
428 ((output-offset) (1+ output-offset
))))))
429 (let ((current-offset 0))
430 (declare (type (or null fixnum
) current-offset
))
434 (let ((output-length (length whole-string-output
))
435 (next-offset (if offset-list
(+ current-offset
(the fixnum
(first offset-list
))) nil
)))
436 (declare (type (or null fixnum
) next-offset
))
437 (setf (plump:text node
) (subseq whole-string-output
(min current-offset output-length
) (and next-offset
(min next-offset output-length
)))
438 current-offset next-offset
439 offset-list
(cdr offset-list
))))
440 (lambda (node) (declare (ignore node
))))))))
443 ;;;; Dynamic content blocks.
445 (define-cache-database 'lw2.backend-modules
:backend-lmdb-cache
"dynamic-content-blocks")
447 (defparameter *dynamic-content-block-callback
* nil
)
449 (defclass dynamic-element
(plump:element
)
450 ((dynamic-call-form :initarg
:dynamic-call-form
:accessor dynamic-call-form
)))
452 (defmethod plump:serialize-object
:around
((node dynamic-element
))
453 (funcall *dynamic-content-block-callback
* :start node
)
455 (funcall *dynamic-content-block-callback
* :end node
))
457 (defun create-dynamic-call (node function-name
&rest args
)
458 (change-class node
'dynamic-element
:dynamic-call-form
(list* function-name args
)))
460 (defun call-with-dynamic-block-serialization (fn hash output-string
)
461 (let* ((current-start nil
)
464 (flet ((current-octet-pos ()
465 (psetf last-string-pos
(length output-string
)
466 last-octet-pos
(+ last-octet-pos
(babel:string-size-in-octets output-string
:start last-string-pos
)))
468 (declare (dynamic-extent #'current-octet-pos
))
469 (let* ((dynamic-call-list nil
)
470 (*dynamic-content-block-callback
*
473 (:start
(setf current-start
(current-octet-pos)))
474 (:end
(let ((current-end (current-octet-pos)))
475 (push (list* current-start current-end
(dynamic-call-form node
))
476 dynamic-call-list
)))))))
477 (declare (dynamic-extent *dynamic-content-block-callback
*))
478 (multiple-value-prog1
480 (if dynamic-call-list
481 (cache-put "dynamic-content-blocks" hash
(nreverse dynamic-call-list
) :key-type
:byte-vector
:value-type
:lisp
)
482 (cache-del "dynamic-content-blocks" hash
:key-type
:byte-vector
)))))))
484 (defmacro with-dynamic-block-serialization
((hash output-string
) &body body
)
485 `(dynamic-flet ((fn () ,@body
)) (call-with-dynamic-block-serialization #'fn
,hash
,output-string
)))
489 (define-lmdb-memoized extract-excerpt
'lw2.backend-modules
:backend-lmdb-cache
490 (:sources
("src/clean-html.lisp")) (in-html)
491 (let ((root (plump:parse
(string-trim '(#\Space
#\Newline
#\Tab
#\Return
#\Linefeed
#\Page
) in-html
)))
494 (with-output-to-string (out-stream)
499 (when (or (> (length (plump:children node
)) 1)
500 (plump:text-node-p
(plump:first-child node
)))
501 (let ((text (plump:text node
)))
503 (write-char #\Space out-stream
))
504 (write-string text out-stream
)
505 (setf chars
(+ chars
(length text
))
509 :test
(lambda (node) (tag-is node
"p")))))))
511 (define-lmdb-memoized clean-html
'lw2.backend-modules
:backend-lmdb-cache
512 (:sources
("src/clean-html.lisp" "src/links.lisp" "src/colors.lisp" "text-clean-regexps.js" "html-clean-regexps.js" "js-foreign-lib/mathjax.js")) (in-html &key with-toc post-id
)
513 (declare (ftype (function (plump:node
) fixnum
) plump
:child-position
)
514 (ftype (function (plump:node
) (and vector
(not simple-array
))) plump
:family
)
515 (ftype (function (plump:node
) simple-string
) plump
:text plump
:tag-name
))
516 (labels ((only-child-is (node &rest args
)
517 (declare (dynamic-extent args
))
518 (and (= 1 (length (plump:children node
)))
519 (let ((child (plump:first-child node
)))
521 (plump:element-p child
)
522 (apply #'tag-is child args
)))))
523 (is-child-of-tag (node &rest args
)
524 (declare (dynamic-extent args
))
525 (loop for e
= (plump:parent node
) then
(plump:parent e
)
526 while
(not (typep e
'plump
:root
))
527 when
(and (plump:element-p e
) (apply #'tag-is
(cons e args
))) return t
))
528 (add-class (node class
)
529 (declare (type plump
:node node
)
531 (let ((classes (adjoin class
(alexandria:if-let
(attr (plump:attribute node
"class")) (split-sequence #\Space attr
)) :test
#'string
=)))
532 (declare (dynamic-extent classes
))
533 (setf (plump:attribute node
"class") (format nil
"~{~A~^ ~}" classes
)))
535 (remove-attributes (node &rest attrs
)
536 (declare (dynamic-extent attrs
))
538 (plump:remove-attribute node attr
)))
539 (make-element-before (node tag
)
540 (if (plump:text-node-p node
)
541 (make-element-before (plump:parent node
) tag
)
542 (let ((e (plump:make-element
(plump:parent node
) tag
)))
543 (plump:remove-child e
)
544 (plump:insert-before node e
)
545 (setf (plump:parent e
) (plump:parent node
))
547 (wrap-element (node element-name
)
548 (let ((container (make-element-before node element-name
)))
549 (plump:remove-child node
)
550 (plump:append-child container node
)
552 (wrap-children (node element-name
)
553 (let ((new-element (plump:make-element node element-name
)))
554 (plump:remove-child new-element
)
555 (setf (plump:children new-element
) (plump:clone-children node t new-element
)
556 (plump:children node
) (plump:make-child-array
))
557 (plump:append-child node new-element
)))
558 (move-children-out-of-node (node &key keep
)
559 (iterate (for c in-vector
(plump:children node
) downto
0)
560 (setf (plump:parent c
) (plump:parent node
))
561 (plump:insert-after node c
))
563 (setf (plump:children node
) (plump:make-child-array
))
564 (plump:remove-child node
)))
565 (text-node-is-not (node &rest args
)
566 (declare (type plump
:node node
)
567 (dynamic-extent args
))
569 (typep (plump:parent node
) 'plump
:root
)
570 (every (lambda (x) (string/= (plump:tag-name
(plump:parent node
)) x
)) args
)))
571 (adjacent-text-node (node direction
)
572 (multiple-value-bind (get-sibling insert-sibling
)
574 (:previous
(values #'plump
:previous-sibling
#'plump
:insert-before
))
575 (:next
(values #'plump
:next-sibling
#'plump
:insert-after
)))
576 (let ((candidate (funcall get-sibling node
)))
577 (if (plump:text-node-p candidate
)
579 (let ((new-node (plump:make-text-node
(plump:parent node
))))
580 (funcall insert-sibling node new-node
)
583 (let ((text (plump:text node
))
586 (loop for c across text
587 when
(alpha-char-p c
)
589 when
(digit-char-p c
)
591 finally
(return (>= digit alpha
)))))
592 (remove-if-whitespace (node)
593 (when (string-is-whitespace (plump:text node
))
594 (plump:remove-child node
)))
595 (first-non-whitespace-child (node)
596 (loop for e across
(plump:children node
)
597 when
(or (typep e
'plump
:element
) (not (string-is-whitespace (plump:text e
)))) return e
))
598 (find-text-node (node direction
)
599 (let ((iterator #.
`(case direction
600 ,@(loop for d in
'(:first
:last
) collect
602 (iterate (for c in-vector
(plump:children node
) ,@(if (eq d
:last
) '(downto 0)))
603 (funcall fn c
))))))))
604 (declare (dynamic-extent iterator
))
606 ((and (plump:text-node-p node
) (plump:parent node
))
608 ((plump:nesting-node-p node
)
610 (funcall iterator
(lambda (c)
611 (when-let (tn (find-text-node c direction
)) (return tn
)))))))))
612 (vacuum-whitespace (node)
613 (dolist (direction '(:first
:last
))
614 (let ((displaced-text (make-string 0)))
615 (loop for lt
= (find-text-node node direction
) do
618 (return-from vacuum-whitespace node
))
619 ((string-is-whitespace (plump:text lt
))
620 (setf displaced-text
(case direction
621 (:first
(concatenate 'string displaced-text
(plump:text lt
)))
622 (:last
(concatenate 'string
(plump:text lt
) displaced-text
))))
623 (plump:remove-child lt
))
625 (let* ((text (plump:text lt
)))
628 (let ((boundary (loop for i from
0 to
(- (length text
) 1)
629 unless
(char-is-whitespace (aref text i
))
631 (setf displaced-text
(concatenate 'string displaced-text
(subseq text
0 boundary
))
632 (plump:text lt
) (subseq text boundary
))))
634 (let ((boundary (loop for i from
(- (length text
) 1) downto
0
635 unless
(char-is-whitespace (aref text i
))
637 (setf displaced-text
(concatenate 'string
(subseq text
(+ 1 boundary
) (length text
)))
638 (plump:text lt
) (subseq text
0 (+ 1 boundary
)))))))
640 (when (> (length displaced-text
) 0)
641 (let ((atn (adjacent-text-node node
(case direction
(:first
:previous
) (:last
:next
)))))
642 (setf (plump:text atn
) (case direction
643 (:first
(concatenate 'string
(plump:text atn
) displaced-text
))
644 (:last
(concatenate 'string displaced-text
(plump:text atn
))))))))))
645 (add-element-style (node attribute value
)
646 (let ((old-style (plump:attribute node
"style")))
647 (setf (plump:attribute node
"style")
649 (format nil
"~A~:[;~;~] ~A: ~A;" old-style
(ppcre:scan
";\s*$" old-style
) attribute value
)
650 (format nil
"~A: ~A;" attribute value
)))))
651 (style-string-to-alist (string)
652 (let ((rules (ppcre:split
"\\s*;\\s*" string
:sharedp t
)))
653 (iter (for rule in rules
)
654 (let ((parts (ppcre:split
"\\s*:\\s*" rule
:sharedp t
)))
655 (when (= 2 (length parts
))
656 (collect (cons (first parts
) (second parts
))))))))
657 (alist-to-style-string (alist)
658 (with-output-to-string (s)
659 (iter (for item in alist
)
660 (format s
"~A:~A;" (car item
) (cdr item
)))))
661 (remove-style-rules (node &rest rules
)
662 (declare (dynamic-extent rules
))
663 (when-let ((old-style (plump:attribute node
"style")))
664 (setf (plump:attribute node
"style")
665 (alist-to-style-string
666 (remove-if (lambda (x) (member (car x
) rules
:test
#'string-equal
))
667 (style-string-to-alist old-style
))))))
668 (flatten-element (node)
669 (let* ((previous-sibling (plump:previous-sibling node
))
670 (next-sibling (if (plump:text-node-p
(plump:next-sibling node
))
671 (plump:next-sibling node
)))
672 (new-text-node (if (plump:text-node-p previous-sibling
)
674 (plump:insert-before node
675 (plump:remove-child
(plump:make-text-node
(plump:parent node
)))))))
676 (setf (plump:parent new-text-node
) (plump:parent node
)
677 (plump:text new-text-node
) (concatenate 'string
678 (plump:text new-text-node
)
681 (plump:text next-sibling
)
683 (plump:remove-child node
)
684 (when next-sibling
(plump:remove-child next-sibling
))))
685 (scan-for-urls (text-node)
686 (declare (type plump
:text-node text-node
))
687 (let ((text (plump:text text-node
)))
688 (multiple-value-bind (url-start url-end
)
689 (ppcre:scan
#'url-scanner text
)
690 (declare (type simple-string text
)
691 (type (or null fixnum
) url-start url-end
))
693 (let* ((url-raw (subseq text url-start url-end
))
694 (url (if (mismatch "http" url-raw
:end2
4) (concatenate 'string
"http://" url-raw
) url-raw
))
695 (family (plump:family text-node
))
696 (other-children (prog1
697 (subseq family
(1+ (plump:child-position text-node
)))
698 (setf (fill-pointer family
) (1+ (plump:child-position text-node
)))))
699 (new-a (plump:make-element
(plump:parent text-node
) "a"))
700 (new-text (unless (= url-end
(length text
)) (plump:make-text-node
(plump:parent text-node
) (subseq text url-end
)))))
701 (setf (plump:text text-node
) (subseq text
0 url-start
)
702 (plump:attribute new-a
"href") (with-direct-link (presentable-link url
))
703 (plump:attribute new-a
"class") "bare-url")
704 (plump:make-text-node new-a
(clean-text url-raw
))
706 (scan-for-urls new-text
)
707 (setf (plump:text new-text
) (clean-text (plump:text new-text
))))
708 (loop for item across other-children
709 do
(plump:append-child
(plump:parent text-node
) item
))
710 (when (= url-start
0)
711 (plump:remove-child text-node
)))))))
712 (title-to-anchor (text used-anchors
)
713 ;; This should match LW behavior in packages/lesswrong/lib/collections/posts/tableOfContents.js
714 (let* ((chars-to-use "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789")
715 (base-anchor (with-output-to-string (stream)
716 (loop for c across text
717 do
(write-char (if (find c chars-to-use
) c
#\_
) stream
)))))
718 (loop for suffix from
0
719 for anchor
= base-anchor then
(format nil
"~A~A" base-anchor suffix
)
720 when
(not (gethash anchor used-anchors
))
721 return
(progn (setf (gethash anchor used-anchors
) t
)
723 (contents-to-html (contents min-header-level out-stream
)
724 (declare (type cons contents
))
725 (format out-stream
"<nav class=\"contents\"><div class=\"contents-head\">Contents</div><ul class=\"contents-list\">")
726 (loop for
(elem-level text id
) in contents do
727 (format out-stream
"<li class=\"toc-item-~A\"><a href=\"#~A\">~A</a></li>"
728 (- elem-level
(- min-header-level
1)) id
(clean-text-to-html text
)))
729 (format out-stream
"</ul></nav>"))
730 (style-hash-to-html (style-hash out-stream
)
731 (declare (type hash-table style-hash
))
732 (let ((style-list (alexandria:hash-table-keys style-hash
)))
734 (format out-stream
"<style>~{~A~}</style>" style-list
)))))
735 (declare (ftype (function (plump:node
&rest simple-string
) boolean
) only-child-is is-child-of-tag text-node-is-not
))
737 (((or plump
:invalid-xml-character plump
:discouraged-xml-character
) #'abort
))
739 (override (gethash post-id
*html-overrides
*))
741 (let ((root (plump:parse
(string-trim '(#\Space
#\Newline
#\Tab
#\Return
#\Linefeed
#\Page
) in-html
)))
745 (aggressive-deformat nil
)
746 (style-hash (make-hash-table :test
'equal
))
747 (used-colors (make-hash-table :test
'equal
))
748 (used-anchors (make-hash-table :test
'equal
)))
749 (declare (type fixnum section-count min-header-level
))
750 (when *before-clean-hook
*
751 (funcall *before-clean-hook
*))
752 (let ((wayward-li-container nil
))
753 (with-mathjax-processor ()
759 (when-let ((src (plump:attribute node
"src")))
760 (handle-codecogs node src
)))
761 ((not (plump:parent node
)) nil
)
764 ((not (plump:attribute node
"href"))
765 (move-children-out-of-node node
:keep t
))
767 (and (ppcre:scan
"^\s*https?://" (plump:text node
))
768 (not (find #\HORIZONTAL_ELLIPSIS
(plump:text node
))))
769 (notany (lambda (attr) (nonempty-string (plump:attribute node attr
))) '("href" "name" "id")))
770 (flatten-element node
))
772 (let* ((next-sibling (plump:next-sibling node
))
773 (next-text-node (if (plump:text-node-p next-sibling
) next-sibling
))
774 (next-next-sibling (if next-text-node
(plump:next-sibling next-text-node
) next-sibling
))
775 (next-a (if (and next-next-sibling
(tag-is next-next-sibling
"a")) next-next-sibling
)))
777 (or (not next-text-node
) (string-is-whitespace (plump:text next-text-node
)))
778 (string= (plump:attribute node
"href") (plump:attribute next-a
"href")))
780 (plump:remove-child next-text-node
)
781 (plump:append-child node next-text-node
))
782 (loop for c across
(plump:children next-a
)
783 do
(progn (plump:remove-child c
)
784 (plump:append-child node c
)))
785 (plump:remove-child next-a
)
787 ((tag-is node
"ul" "ol")
788 (setf wayward-li-container node
)
789 (let ((new-children (plump:make-child-array
)))
790 (loop for child across
(plump:children node
)
791 do
(if (and (plump:element-p child
) (tag-is child
"li"))
792 (vector-push-extend child new-children
)
793 (unless (and (plump:text-node-p child
) (string-is-whitespace (plump:text child
)))
794 (if (= (length new-children
) 0)
795 (vector-push-extend (plump:make-element node
"li") new-children
))
796 (plump:append-child
(aref new-children
(- (length new-children
) 1)) child
))))
797 (setf (plump:children node
) new-children
)))
799 (unless (is-child-of-tag node
"ul" "ol")
800 (unless wayward-li-container
801 (setf wayward-li-container
(make-element-before node
"ul")))
802 (plump:remove-child node
)
803 (plump:append-child wayward-li-container node
)))
804 ((tag-is node
"p" "blockquote" "div")
805 (setf wayward-li-container nil
))))
806 :test
#'plump
:element-p
)))
807 (loop while
(and (= 1 (length (plump:children root
))) (plump:element-p
(plump:first-child root
)) (tag-is (plump:first-child root
) "div" "html" "body"))
808 do
(setf (plump:children root
) (plump:children
(plump:first-child root
)))
809 do
(loop for c across
(plump:children root
) do
(setf (plump:parent c
) root
))
810 do
(when-let (fc (plump:first-child root
))
811 (when (and (plump:element-p fc
) (tag-is fc
"head"))
812 (loop for c across
(plump:children fc
) do
813 (when (and (plump:element-p c
) (tag-is c
"style"))
814 (setf (plump:parent c
) (plump:parent fc
))
815 (plump:insert-after fc c
)))
816 (plump:remove-child fc
))))
817 (loop for c across
(plump:children root
) do
818 (when (and (plump:element-p c
)
820 (string-is-whitespace (plump:text c
)))
821 (move-children-out-of-node c
)))
822 (loop for lc
= (plump:last-child root
)
823 while
(and (plump:element-p lc
) (tag-is lc
"br"))
824 do
(plump:remove-child lc
))
828 (when (and (plump:text-node-p node
)
830 (text-node-is-not node
"a" "style" "pre"))
831 (scan-for-urls node
))))
835 (when (and (not (plump:root-p node
)) (plump:parent node
))
838 (when (and (text-node-is-not node
"style" "pre" "code")
839 (text-class-is-not node
"mjx-math"))
840 (let ((new-root (plump:parse
(clean-html-regexps (plump:serialize node nil
))))
841 (other-children (prog1
842 (subseq (plump:family node
) (1+ (plump:child-position node
)))
843 (setf (fill-pointer (plump:family node
)) (plump:child-position node
)))))
844 (loop for item across
(plump:children new-root
)
845 do
(plump:append-child
(plump:parent node
) item
))
846 (loop for item across other-children
847 do
(plump:append-child
(plump:parent node
) item
)))))
849 (alexandria:when-let
(style (plump:attribute node
"style"))
850 (let ((style-list (style-string-to-alist style
)))
851 (cond ((or aggressive-deformat
852 (cdr (assoc "font-family" style-list
:test
#'string-equal
))
853 (search "font-style: inherit" style
)
854 (search "MsoNormal" (plump:attribute node
"class")))
855 (setf aggressive-deformat t
)
856 (plump:remove-attribute node
"style"))
857 ((ppcre:scan
"(?:^|;)\\s*(?:line-height:[^;]+in)\\s*(?:;|$)" style
)
858 (plump:remove-attribute node
"style"))
861 (dolist (style-item style-list
)
862 (when (member (car style-item
) '("color" "background" "background-color" "border" "border-color") :test
#'string-equal
)
863 (setf (cdr style-item
)
864 (regex-replace-body (-css-color-scanner- (cdr style-item
))
865 (multiple-value-bind (r g b a
) (decode-css-color (match))
867 (let ((color-name (safe-color-name r g b a
)))
869 (gethash color-name used-colors
) (list r g b a
))
870 (format nil
"var(--user-color-~A)" color-name
))))))))
872 (setf (plump:attribute node
"style") (alist-to-style-string style-list
))))))))
873 (when (and aggressive-deformat
(tag-is node
"div"))
874 (setf (plump:tag-name node
) "p"))
875 (when (let ((parent (plump:parent node
)))
877 (intersection (class-list node
) '("mjx-chtml" "mjx-math" "mjpage") :test
#'string
=)
879 (class-is-not parent
"mjx-chtml" "mjx-math" "mjpage"))))
881 ((let ((mrows (clss:select
".mjx-mrow" node
)))
882 (and (not (zerop (length mrows
)))
883 (every (lambda (mrow)
884 (zerop (length (plump:children mrow
))))
886 (plump:remove-child node
))
889 with full-width
= (or (class-is node
"mjx-full-width")
890 (loop for e across
(plump:children node
)
891 when
(member "MJXc-display" (class-list e
) :test
#'string
=)
893 for current
= node then
(plump:parent current
)
894 for parent
= (plump:parent current
)
896 (loop for s across
(plump:family current
)
897 unless
(or (eq s current
)
898 (and (plump:text-node-p s
) (string-is-whitespace (plump:text s
))))
900 do
(progn (add-class current
(if full-width
901 "mathjax-block-container"
902 "mathjax-inline-container"))
904 when
(or (null parent
)
905 (plump:root-p parent
)
906 (tag-is parent
"p" "blockquote" "div"))
907 do
(progn (add-class current
"mathjax-block-container")
911 (vacuum-whitespace node
)
912 (let ((href (plump:attribute node
"href")))
914 (let* ((href (string-trim '(#\Space
#\Newline
#\Tab
#\Return
#\Linefeed
#\Page
) href
))
915 (href (if (ppcre:scan
"^(?:(?:[a-z]+:)?//|/|#)" href
) href
(format nil
"http://~A" href
)))
916 (href (or (with-direct-link (presentable-link href
)) href
)))
918 (setf (plump:attribute node
"href") href
)
920 (log-and-ignore-errors
921 (funcall *link-hook
* href
))))))))
923 (let ((width (ignore-errors (parse-integer (plump:attribute node
"width"))))
924 (height (ignore-errors (parse-integer (plump:attribute node
"height")))))
925 (if (and width height
(<= width
1) (<= height
1))
926 ;; Remove probable tracking pixel.
927 (plump:remove-child node
)
929 (if (and (tag-is (plump:parent node
) "div" "p" "figure")
930 (only-child-is (plump:parent node
) "img"))
931 (plump:parent node
) ; Should already have imgonly class.
932 (let ((container (wrap-element node
"div")))
933 (add-class container
"imgonly")
935 (when-let ((src (plump:attribute node
"src")))
936 (setf src
(presentable-link src
:image
)
937 (plump:attribute node
"src") src
)
938 (create-dynamic-call container
'lw2.images
::dynamic-image
940 (plump:tag-name container
)
941 (alexandria:hash-table-alist
(plump:attributes container
))
942 (alexandria:hash-table-alist
(plump:attributes node
))))
943 (when (and width height
)
944 ;; Apply responsive image scaling CSS.
945 (setf (plump:attribute container
"style") (format nil
"--aspect-ratio: ~F; max-width: ~Dpx"
949 (remove-attributes node
"style" "class" "width" "height")
950 (setf (plump:attribute node
"loading") "lazy")))))
951 ((tag-is node
"figure")
952 (remove-attributes node
"style" "class" "width" "height"))
953 ((and (tag-is node
"p") (only-child-is node
"figure"))
954 (move-children-out-of-node node
))
955 ((tag-is node
"p" "blockquote" "div" "center")
956 (when (only-child-is node
"center")
957 (unless (string-is-whitespace (plump:text node
))
958 (add-element-style node
"text-align" "center"))
959 (move-children-out-of-node (plump:first-child node
)))
960 (when (tag-is node
"center")
961 (setf (plump:tag-name node
) "p")
962 (add-element-style node
"text-align" "center"))
963 (when-let ((question-id (plump:attribute node
"data-elicit-id")))
964 (when-let* ((question-title (lw2.backend
::get-elicit-question-title question-id
))
965 (links (plump:get-elements-by-tag-name node
"a"))
966 (text-node (plump:first-child
(first links
))))
967 (setf (plump:text text-node
) question-title
))
968 (create-dynamic-call (plump:parent
(plump:parent node
)) 'lw2.elicit-predictions
::render-elicit-block question-id
))
969 (if (string-is-whitespace (plump:text node
))
970 (if (or (plump:get-elements-by-tag-name node
"img")
971 (plump:get-elements-by-tag-name node
"iframe"))
972 (add-class node
"imgonly")
973 (plump:remove-child node
))
974 (if-let (parent (plump:parent node
))
975 (labels ((spoilerp (n)
976 (if-let (a (and (plump:element-p n
) (plump:attribute n
"class")))
977 (ppcre:scan
"(?:^| )spoiler\\S*(?: |$)" a
))))
978 (when (and nil
(tag-is node
"p") ;; FIXME: disabled until we can fix math and code false positives
979 (rot13-text-p (plump:text node
)))
980 (setf (plump:attribute node
"class") "spoiler")
982 (lambda (n) (unrot13-by-words (plump:text n
)))
983 :test
#'plump
:text-node-p
))
985 ((and (tag-is node
"p")
988 (plump:remove-attribute node
"class"))
989 ((and (tag-is node
"div")
991 (setf (plump:attribute node
"class") "spoiler"))
992 ((and (spoilerp node
)
994 (not (spoilerp parent
)))
995 (let ((previous-sibling (plump:previous-sibling node
)))
996 (if (and previous-sibling
(spoilerp previous-sibling
))
997 (progn (plump:remove-child node
)
998 (plump:append-child previous-sibling node
)
999 (plump:remove-attribute node
"class"))
1000 (let ((new-container (plump:make-element parent
"div")))
1001 (setf (plump:attribute new-container
"class") "spoiler")
1002 (plump:remove-child new-container
)
1003 (setf (plump:parent new-container
) (plump:parent node
))
1004 (plump:insert-before node new-container
)
1005 (loop for e
= node then ns
1006 while
(and (plump:element-p e
) (spoilerp e
))
1007 for ns
= (plump:next-sibling e
)
1009 (plump:remove-attribute e
"class")
1010 (plump:remove-child e
)
1011 (plump:append-child new-container e
))))))))))))
1012 ((tag-is node
"table" "tbody" "tr" "td")
1013 (remove-style-rules node
"border-top" "border-bottom" "border-left" "border-right" "padding")
1014 (when (and (tag-is node
"td") (is-numeric node
))
1015 (add-class node
"numeric")))
1017 (let ((parent (plump:parent node
)))
1019 ((and (or (plump:root-p parent
) (and (plump:element-p parent
) (tag-is parent
"p" "blockquote" "div")))
1020 (loop for c across
(plump:children node
) never
(and (plump:element-p c
) (tag-is c
"a"))))
1021 (vacuum-whitespace node
))
1023 (move-children-out-of-node node
)))))
1025 (when-let (old-style (plump:attribute node
"style"))
1026 (setf (plump:attribute node
"style")
1027 (ppcre:regex-replace-all
1028 (load-time-value (ppcre:create-scanner
"list-style-type\\s*:\\s*decimal\\s*;?" :single-line-mode t
:case-insensitive-mode t
))
1031 (when-let (start-string (plump:attribute node
"start"))
1032 (when-let (start (ignore-errors (parse-integer start-string
)))
1033 (plump:remove-attribute node
"start")
1034 (add-element-style node
"counter-reset" (format nil
"ol ~A" (- start
1))))))
1035 ((and (tag-is node
"li") (find "footnote-item" (class-list node
) :test
#'string-equal
))
1036 (unless (find-if (lambda (e) (and (plump:element-p e
) (find "footnote-content" (class-list e
) :test
#'string-equal
)))
1037 (plump:children node
))
1038 ;; footnote-content div does not exist. We must create it and move the footnote contents into place.
1039 (let* ((wrapper (wrap-children node
"div"))
1040 (backrefs (clss:select
".footnote-backref" wrapper
))
1041 (back-links (make-element-before wrapper
"span")))
1042 (setf (plump:attribute wrapper
"class") "footnote-content"
1043 (plump:attribute back-links
"class") "footnote-back-link")
1044 (loop for backref across backrefs do
1045 (progn (plump:remove-child backref
)
1046 (plump:append-child back-links backref
))))))
1048 (when (let ((c (plump:first-child node
))) (and c
(if (plump:text-node-p c
) (not (string-is-whitespace (plump:text c
))) (not (tag-is c
"p" "ul" "ol")))))
1049 (wrap-children node
"p")))
1050 ((tag-is node
"pre")
1051 (let ((nchildren (length (plump:children node
))))
1052 (when (>= nchildren
1)
1053 (remove-if-whitespace (plump:first-child node
))
1054 (when (>= nchildren
2)
1055 (remove-if-whitespace (plump:last-child node
))))))
1056 ((ppcre:scan
"^h[1-6]$" (plump:tag-name node
))
1057 (when (plump:get-elements-by-tag-name node
"p")
1058 (move-children-out-of-node node
))
1060 ((string-is-whitespace (plump:text node
))
1061 (plump:remove-child node
))
1063 (let ((fc (plump:first-child node
))
1064 (lc (plump:last-child node
)))
1065 (when (and (plump:element-p fc
) (tag-is fc
"br")) (plump:remove-child fc
))
1066 (when (and (not (eql fc lc
)) (plump:element-p lc
) (tag-is lc
"br")) (plump:remove-child lc
)))
1068 (incf section-count
)
1069 (unless (plump:attribute node
"id") (setf (plump:attribute node
"id") (format nil
"section-~A" section-count
)))
1070 (let* ((header-level (parse-integer (subseq (plump:tag-name node
) 1)))
1071 (header-text (with-output-to-string (stream)
1072 (plump:traverse node
1076 (when (text-node-is-not n
"style" "script")
1077 (write-string (plump:text n
) stream
))))))))
1078 (anchor-old (or (plump:attribute node
"id") (format nil
"section-~A" section-count
)))
1079 (anchor-new (title-to-anchor header-text used-anchors
))
1080 (wrapper (wrap-children node
"span")))
1081 (setf min-header-level
(min min-header-level header-level
)
1082 (plump:attribute node
"id") anchor-new
1083 (plump:attribute wrapper
"id") anchor-old
)
1084 (push (list header-level
1088 ((and (tag-is node
"span") (find "footnote-back-link" (class-list node
) :test
#'string-equal
))
1089 (plump:traverse node
#'move-children-out-of-node
:test
(lambda (n) (tag-is n
"sup" "strong"))))
1090 ((tag-is node
"style")
1091 (let ((text (plump:text node
)))
1092 (when (search ".mjx-math" text
)
1093 (setf (gethash text style-hash
) t
)))
1094 (plump:remove-child node
))
1095 ((tag-is node
"script")
1096 (plump:remove-child node
))))))))
1097 (clean-dom-text root
)
1098 (let ((with-toc (>= section-count
3))
1099 (out-string (make-array 0 :element-type
'character
:fill-pointer
0 :adjustable t
)))
1100 (with-output-to-string (out-stream out-string
)
1101 (style-hash-to-html style-hash out-stream
)
1102 (when (> (hash-table-count used-colors
) 0)
1103 (format out-stream
"<style>~%:root {~%")
1104 (maphash (lambda (name rgba-list
)
1105 (declare (ignore rgba-list
))
1106 (format out-stream
" --user-color-~A: #~A;~%" name name
))
1108 (flet ((write-inverted-colors (theme)
1109 (format out-stream
"body.theme-~A {~%" theme
)
1110 (maphash (lambda (name rgba-list
)
1111 (format out-stream
" --user-color-~A: ~A;~%" name
1112 (multiple-value-call #'encode-css-color
(apply #'perceptual-invert-rgba rgba-list
))))
1114 (format out-stream
"}~%")))
1115 (format out-stream
"}~%@media (prefers-color-scheme: dark) {~%")
1116 (write-inverted-colors "default")
1117 (format out-stream
"}~%")
1118 (write-inverted-colors "dark"))
1119 (format out-stream
"</style>"))
1120 (with-dynamic-block-serialization (current-memo-hash out-string
)
1121 (loop for c across
(plump:children root
)
1123 (not (or (string-is-whitespace (plump:text c
))
1124 (tag-is c
"figure"))))
1126 (contents-to-html (nreverse contents
) min-header-level out-stream
)
1127 (setf with-toc nil
))
1128 do
(plump:serialize c out-stream
)))