Whitespace fix.
[lw2-viewer.git] / src / clean-html.lisp
blob069d4a394b7fbc2dc984ea4b5d597361a12c10b4
1 (uiop:define-package #:lw2.clean-html
2 (:use #:cl #:alexandria #:iterate #:split-sequence #:lw2.lmdb #:lw2.links #:lw2.utils #:lw2.context #:lw2.sites #:lw2.conditions #:lw2.colors)
3 (:export #:*before-clean-hook* #:*link-hook* #:url-scanner #:clean-text #:clean-text-to-html #:clean-html #:clean-html* #:extract-excerpt #:extract-excerpt*)
4 (:unintern #:*text-clean-regexps* #:*html-clean-regexps*))
6 (in-package #:lw2.clean-html)
8 (setf cl-typesetting-hyphen::*hyphen-patterns-directory* (asdf:system-relative-pathname "lw2-viewer" "data/hyphenation-patterns/"))
9 (setf cl-typesetting-hyphen::*language-hyphen-file-list* '((:en-us . "hyph_en_US")))
10 (cl-typesetting-hyphen:load-language :en-us)
11 (setf cl-typesetting::*default-hyphen-language* :en-us)
13 (defvar *before-clean-hook* nil)
14 (defvar *link-hook* nil)
16 (defun file-get-contents (filename)
17 (with-open-file (stream filename)
18 (uiop:slurp-stream-string stream)))
20 (defun grab-from-rts (url)
21 (declare (optimize (speed 0) (space 3)))
22 (let* ((root (plump:parse (dex:get url :keep-alive nil)))
23 (post-body (plump:get-element-by-id root "wikitext")))
24 (loop for cls in '("div.nav_menu" "div.imgonly" "div.bottom_nav") do
25 (loop for e across (clss:select cls post-body)
26 do (plump:remove-child e)))
27 (plump:remove-child (elt (clss:select "h1" post-body) 0))
28 (plump:remove-child (elt (clss:select "p" post-body) 0))
29 (with-open-file (stream (merge-pathnames "./rts-content/" (subseq (quri:uri-path (quri:uri url)) 1)) :direction :output :if-does-not-exist :create :external-format :utf-8)
30 (plump:serialize post-body stream))))
32 (defun rts-to-html (file)
33 (declare (optimize (speed 0) (space 3)))
34 (concatenate 'string
35 "<style>"
36 (file-get-contents "./rts-content/rts.css")
37 "</style>"
38 (file-get-contents (merge-pathnames "./rts-content/" file))))
40 (defparameter *html-overrides* (make-hash-table :test 'equal))
41 (loop for (id file) in '(("XTXWPQSEgoMkAupKt" "An-Intuitive-Explanation-Of-Bayess-Theorem")
42 ("afmj8TKAqH6F2QMfZ" "A-Technical-Explanation-Of-Technical-Explanation")
43 ("7ZqGiPHTpiDMwqMN2" "The-Twelve-Virtues-Of-Rationality"))
44 do (let ((file* file)) (setf (gethash id *html-overrides*) (lambda () (rts-to-html file*)))))
46 (defmacro do-with-cleaners ((regexp-list scanner replacement) &body body)
47 (declare (optimize (speed 0) (space 3)))
48 `(labels ((fn (,scanner ,replacement) ,@body))
49 ,@(loop for (regex flags replacement) in (eval regexp-list)
50 collecting `(fn (load-time-value
51 (ppcre:create-scanner ,regex
52 ,@(loop for (flag sym) in '((#\i :case-insensitive-mode)
53 (#\m :multi-line-mode)
54 (#\s :single-line-mode)
55 (#\x :extended-mode))
56 when (find flag flags)
57 append (list sym t))))
58 ,replacement))))
60 (defmacro define-cleaner (name regexp-list)
61 (declare (optimize (speed 0) (space 3)))
62 `(defun ,name (text)
63 (do-with-cleaners (,regexp-list scanner replacement)
64 (setf text (ppcre:regex-replace-all scanner text replacement)))))
66 (eval-when (:compile-toplevel :load-toplevel :execute)
67 (defun read-regexp-file (filename)
68 (declare (optimize (speed 0) (space 3)))
69 (let ((data (destructuring-bind (* ((* (* inner))))
70 (with-open-file (stream (uiop:subpathname (asdf:system-source-directory "lw2-viewer") filename)) (parse-js:parse-js stream))
71 inner)))
72 (loop for input in data
73 collecting (destructuring-bind (* ((* regex flags) (* replacement))) input
74 (list regex flags (ppcre:regex-replace-all "\\$(\\d)" replacement "\\\\\\1")))))))
76 (define-cleaner clean-text (read-regexp-file "text-clean-regexps.js"))
77 (define-cleaner clean-html-regexps (read-regexp-file "html-clean-regexps.js"))
79 (declaim (ftype function url-scanner))
80 (eval-when (:compile-toplevel :load-toplevel :execute)
81 (setf (fdefinition 'url-scanner) (ppcre:create-scanner
82 "(?:https?://[-a-zA-Z0-9]+\\.[-a-zA-Z0-9.]+|[-a-zA-Z0-9.]+\\.(?:com|edu|gov|mil|net|org|int|biz|info|name|museum|us|ca|uk|io|ly))(?:\\:[0-9]+){0,1}(?:(?:/|\\?(?!(?:$|\\s)))(?:(?:(\\()|\\)(?![.,;:?!]?(?:$|\\s))|[-\\w\\d.,;:?'\\\\+@!&%$#=~–_/])*(?(1)[-\\w\\d\\\\+@&%$#=~_/)]|[-\\w\\d\\\\+@&%$#=~_/]))?)?"
83 :single-line-mode t)))
85 (defun hyphenate-string (string)
86 (let ((hyphenation-list (cl-typesetting::hyphenate-string string)))
87 (declare (type string string)
88 (type list hyphenation-list))
89 (if hyphenation-list
90 (let ((new-string (make-array (+ (length string) (length hyphenation-list)) :element-type 'character :fill-pointer 0)))
91 (loop for char across string
92 for orig-offset of-type fixnum from 0
93 with current-hyphenation = hyphenation-list
94 do (when (and current-hyphenation (= orig-offset (the fixnum (first current-hyphenation))))
95 (vector-push #\SOFT_HYPHEN new-string)
96 (setf current-hyphenation (rest current-hyphenation)))
97 do (vector-push char new-string))
98 (values (coerce new-string '(simple-array character 1)) hyphenation-list))
99 (values string nil))))
101 (defun clean-text-to-html (text &key (hyphenation t))
102 (handler-bind
103 (((or plump:invalid-xml-character plump:discouraged-xml-character) #'abort))
104 (clean-html-regexps
105 (plump:encode-entities
106 (coerce
107 (funcall (if hyphenation #'hyphenate-string #'identity) (clean-text text))
108 'simple-string)))))
110 (defun rot13-char (char)
111 (let ((char-code (char-code char)))
112 (labels ((translate-char (base)
113 (code-char (+ base (mod (+ 13 (- char-code base)) 26)))))
114 (declare (dynamic-extent #'translate-char))
115 (cond
116 ((<= (char-code #\A) char-code (char-code #\Z))
117 (translate-char (char-code #\A)))
118 ((<= (char-code #\a) char-code (char-code #\z))
119 (translate-char (char-code #\a)))
120 (t char)))))
122 (declaim (inline letter-index))
123 (defun letter-index (char)
124 (let ((char-code (char-code char)))
125 (cond
126 ((<= (char-code #\A) char-code (char-code #\Z))
127 (- char-code (char-code #\A)))
128 ((<= (char-code #\a) char-code (char-code #\z))
129 (- char-code (char-code #\a))))))
131 (defparameter *letter-frequencies*
132 '((#\A . 8.167) (#\B . 1.492) (#\C . 2.782) (#\D . 4.253) (#\E . 12.702)
133 (#\F . 2.228) (#\G . 2.015) (#\H . 6.094) (#\I . 6.966) (#\J . 0.153)
134 (#\K . 0.772) (#\L . 4.025) (#\M . 2.406) (#\N . 6.749) (#\O . 7.507)
135 (#\P . 1.929) (#\Q . 0.095) (#\R . 5.987) (#\S . 6.327) (#\T . 9.056)
136 (#\U . 2.758) (#\V . 0.978) (#\W . 2.36) (#\X . 0.15) (#\Y . 1.974)
137 (#\Z . 0.074)))
139 (defparameter *letter-rot13-log-odds*
140 (let ((array (make-array 26 :element-type 'single-float)))
141 (loop
142 for letter-freq in *letter-frequencies*
143 for rot13-freq in (concatenate 'list (subseq *letter-frequencies* 13 26) (subseq *letter-frequencies* 0 13))
144 for i from 0
145 do (setf (aref array i) (log (/ (cdr letter-freq) (cdr rot13-freq)) 2)))
146 array))
148 (defparameter *bigram-rot13-log-odds*
149 (let ((bigram-data (with-open-file (stream (asdf:system-relative-pathname "lw2-viewer" "data/bigrams.lisp"))
150 (read stream)))
151 (bigram-table (make-array '(26 26) :element-type 'single-float :initial-element 1.0))
152 (bigram-log-odds (make-array '(26 26) :element-type 'single-float)))
153 (loop
154 for (bigram count) in bigram-data
155 do (setf (aref bigram-table
156 (letter-index (aref bigram 0))
157 (letter-index (aref bigram 1)))
158 (float (+ count 1))))
159 (dotimes (i 26)
160 (dotimes (j 26)
161 (setf (aref bigram-log-odds i j)
162 (log (/ (aref bigram-table i j)
163 (aref bigram-table
164 (mod (+ 13 i) 26)
165 (mod (+ 13 j) 26)))
166 2))))
167 bigram-log-odds))
169 (defun rot13-text-p (text &optional (start 0) (end (length text)))
170 (declare (type simple-string text))
171 (let ((log-odds
172 (cond
173 ((= 0 (- end start))
174 0.0)
175 ((= 1 (- end start))
176 (let ((odds-table (load-time-value *letter-rot13-log-odds*)))
177 (declare (type (simple-array single-float (26)) odds-table))
178 (if-let ((letter-index (letter-index (aref text start))))
179 (aref odds-table letter-index)
180 0.0)))
182 (let ((odds-table (load-time-value *bigram-rot13-log-odds*)))
183 (declare (type (simple-array single-float (26 26)) odds-table))
184 (loop
185 with sum = 0.0
186 for i from (1+ start) to (1- end)
187 do (when-let ((a (letter-index (aref text (1- i))))
188 (b (letter-index (aref text i))))
189 (setf sum (+ sum (aref odds-table a b))))
190 finally (return sum)))))))
191 (values
192 (< log-odds (log 0.0001 2))
193 log-odds)))
195 (defun rot13-inplace (text &optional (start 0) (end (1- (length text))))
196 (declare (type simple-string text))
197 (loop
198 for i from start to end
199 do (setf (aref text i) (rot13-char (aref text i))))
200 text)
202 (defun unrot13-by-words (text)
203 (declare (type simple-string text))
204 (loop
205 with word-start = 0
206 for i from 0
207 for char across text
208 when (or (= i (1- (length text)))
209 (position char ",.:;?! "))
210 do (progn
211 (when (< (nth-value 1 (rot13-text-p text word-start i)) (+ 0 (- i word-start 0)))
212 (rot13-inplace text word-start i))
213 (setf word-start i)))
214 text)
216 (defun char-is-whitespace (c)
217 (or (cl-unicode:has-binary-property c "White_Space")
218 (eql c #\BRAILLE_PATTERN_BLANK)))
220 (defun string-is-whitespace (string)
221 (every #'char-is-whitespace string))
223 (defvar *mathjax-process* nil)
225 (defmacro with-mathjax-processor (() &body body)
226 `(let ((*mathjax-process* nil))
227 (unwind-protect
228 (progn ,@body)
229 (when *mathjax-process*
230 (uiop:close-streams *mathjax-process*)
231 (uiop:wait-process *mathjax-process*)))))
233 (defun start-mathjax-process ()
234 (unless *mathjax-process*
235 (setf *mathjax-process* (uiop:launch-program "node js-foreign-lib/mathjax.js"
236 :input :stream :output :stream
237 :element-type '(unsigned-byte 8))))
238 (values (uiop:process-info-input *mathjax-process*)
239 (uiop:process-info-output *mathjax-process*)))
241 (defun process-mathjax (tex)
242 (multiple-value-bind (outstream instream) (start-mathjax-process)
243 (let ((outbuf (babel:string-to-octets tex)))
244 (nibbles:write-ub32/le (length outbuf) outstream)
245 (write-sequence outbuf outstream)
246 (finish-output outstream))
247 (let ((insize (nibbles:read-ub32/le instream)))
248 (when (> insize 0)
249 (let ((inbuf (make-array insize :element-type '(unsigned-byte 8))))
250 (read-sequence inbuf instream)
251 (babel:octets-to-string inbuf))))))
253 (defun handle-codecogs (node uri)
254 (regex-case uri
255 ("^https?://[a-z.]*codecogs\.com/[a-z.]*\\?(.*)$"
256 (declare (regex-groups-min 1))
257 (let ((math-html (process-mathjax (quri:url-decode (reg 0)))))
258 (when math-html
259 (let* ((math-dom (plump:parse math-html))
260 (children (plump:children math-dom)))
261 (if (string-is-whitespace (plump:text (plump:parent node)))
262 (loop for child across children
263 do (plump:insert-after node child)
264 do (setf (plump:parent child) (plump:parent node)))
265 (let ((container (plump:make-element (plump:parent node) "span")))
266 (plump:remove-child container)
267 (plump:insert-after node container)
268 (setf (plump:parent container) (plump:parent node)
269 (plump:children container) children)
270 (loop for child across children
271 do (setf (plump:parent child) container))))
272 (plump:remove-child node))
273 t)))
274 (t nil)))
276 (declaim (ftype (function (plump:node) list) class-list))
278 (defun class-list (node)
279 (let ((class (plump:attribute node "class")))
280 (and class (split-sequence #\Space class))))
282 (declaim (ftype (function (plump:node &rest simple-string) boolean) tag-is class-is-not text-class-is-not))
284 (defun tag-is (node &rest args)
285 (declare (type plump:node node)
286 (dynamic-extent args))
287 (when (plump:element-p node)
288 (let ((tag (plump:tag-name node)))
289 (to-boolean
290 (some (lambda (x) (string= tag x))
291 args)))))
293 (defun every-ancestor (node test)
294 (declare (type plump:node node)
295 (type function test))
296 (iter (for target first node then (plump:parent target))
297 (cond ((or (null target) (plump:root-p target)) (return t))
298 ((not (funcall test target)) (return nil)))))
300 (defun any-ancestor (node test)
301 (not (every-ancestor node (complement test))))
303 (defun class-is-not (node &rest args)
304 (declare (type plump:node node)
305 (dynamic-extent args))
306 (every-ancestor node (lambda (n)
307 (not (intersection
308 (class-list n)
309 args
310 :test #'string=)))))
312 (defun class-is (node &rest args)
313 (declare (type plump:node node)
314 (dynamic-extent args))
315 (any-ancestor node (lambda (n)
316 (intersection
317 (class-list n)
318 args
319 :test #'string=))))
321 (defun text-class-is-not (node &rest args)
322 (declare (type plump:node node)
323 (dynamic-extent args))
324 (apply #'class-is-not (plump:parent node) args))
326 (defun clean-dom-text (root)
327 (handler-bind
328 (((or plump:invalid-xml-character plump:discouraged-xml-character) #'abort))
329 (labels
330 ((recursep (node)
331 (and (plump:element-p node)
332 (ppcre:scan "^(?:p|div|blockquote|li|h[0-6])$" (plump:tag-name node))))
333 (cleanablep (node)
334 (and (plump:text-node-p node)
335 (plump:parent node)
336 (text-class-is-not node "mjx-math" "arbital-math")
337 (not (tag-is (plump:parent node) "code"))))
338 (traverse (node main-fn &optional recurse-fn)
339 (when (cleanablep node) (funcall main-fn node))
340 (when (plump:nesting-node-p node)
341 (loop for n across (plump:children node)
342 do (if (recursep n)
343 (if recurse-fn (funcall recurse-fn n))
344 (traverse n main-fn recurse-fn))))))
345 (let* ((offset-list nil)
346 (whole-string-input
347 (with-output-to-string (stream)
348 (traverse
349 root
350 (lambda (node)
351 (push (length (the string (plump:text node))) offset-list)
352 (write-string (plump:text node) stream))
353 #'clean-dom-text)))
354 (whole-string-output whole-string-input))
355 (declare (type string whole-string-output whole-string-input))
356 (setf offset-list (nreverse offset-list))
357 (labels
358 ((call-with-offset-loop (continue-fn loop-fn next-offset-fn offset-adjust-fn)
359 (loop with current-offset = offset-list
360 with output-offset of-type (or null fixnum) = (first current-offset)
361 with output-offset-list = nil
362 with total-offset of-type fixnum = 0
363 while (funcall continue-fn)
364 do (funcall loop-fn)
365 do (loop for current-offset-num of-type fixnum = (first current-offset)
366 while (and (rest current-offset) (< (+ total-offset current-offset-num) (funcall next-offset-fn)))
367 do (progn
368 (push output-offset output-offset-list)
369 (setf total-offset (+ total-offset current-offset-num)
370 current-offset (cdr current-offset)
371 output-offset (first current-offset))))
372 do (setf output-offset (funcall offset-adjust-fn output-offset))
373 finally (progn
374 (push output-offset output-offset-list)
375 (loop for x in (rest current-offset) do (push x output-offset-list))
376 (setf offset-list (nreverse output-offset-list))))
377 (values)))
378 (declare (dynamic-extent (function call-with-offset-loop))
379 (ftype (function ((function ()) (function ()) (function () fixnum) (function (fixnum) fixnum)) (values)) call-with-offset-loop))
380 (macrolet
381 ((offset-loop ((list-binding list-form) (&body loop-body) (&body next-offset-body) (&body offset-adjust-body))
382 (with-gensyms (list-current)
383 `(let ((,list-current ,list-form)
384 (,list-binding))
385 (labels ((continue-fn () (if ,list-current (setf ,list-binding (pop ,list-current))))
386 (loop-fn () ,.loop-body)
387 (next-offset-fn () ,.next-offset-body)
388 (offset-adjust-fn ,(first offset-adjust-body) (declare (type fixnum ,(caar offset-adjust-body)) (values fixnum)) ,.(rest offset-adjust-body)))
389 (declare (dynamic-extent (function continue-fn) (function loop-fn) (function next-offset-fn) (function offset-adjust-fn)))
390 (call-with-offset-loop #'continue-fn #'loop-fn #'next-offset-fn #'offset-adjust-fn))))))
391 (do-with-cleaners ((read-regexp-file "text-clean-regexps.js") scanner replacement)
392 (let ((replacements 0)
393 (replacement-list nil)
394 (original-length (length whole-string-output)))
395 (declare (type fixnum replacements))
396 (ppcre:do-scans (match-start match-end reg-starts reg-ends scanner whole-string-output)
397 (declare (type fixnum match-start match-end)
398 (type simple-vector reg-starts reg-ends))
399 (incf replacements)
400 (push
401 (list (if (and (> (length reg-starts) 0) (eq (aref reg-starts 0) match-start))
402 (aref reg-ends 0)
403 match-start)
404 (if (and (> (length reg-starts) 0) (eq (aref reg-ends (- (length reg-ends) 1)) match-end))
405 (aref reg-starts (- (length reg-starts) 1))
406 match-end))
407 replacement-list))
408 (setf replacement-list (nreverse replacement-list))
409 (setf whole-string-output (ppcre:regex-replace-all scanner whole-string-output replacement))
410 (let ((length-difference (- (length whole-string-output) original-length))
411 (length-change 0))
412 (declare (type fixnum length-difference length-change))
413 (offset-loop
414 (current-replacement replacement-list)
415 ((setf length-change (ceiling length-difference replacements)
416 length-difference (- length-difference length-change)
417 replacements (- replacements 1)))
418 ((destructuring-bind (start end) current-replacement
419 (declare (type fixnum start end))
420 (ceiling (+ start end) 2)))
421 ((output-offset) (max 0 (+ output-offset length-change)))))))
422 (multiple-value-bind (hyphenated-string hyphenation-list) (hyphenate-string whole-string-output)
423 (setf whole-string-output hyphenated-string)
424 (offset-loop
425 (current-hyphenation hyphenation-list)
427 (current-hyphenation)
428 ((output-offset) (1+ output-offset))))))
429 (let ((current-offset 0))
430 (declare (type (or null fixnum) current-offset))
431 (traverse
432 root
433 (lambda (node)
434 (let ((output-length (length whole-string-output))
435 (next-offset (if offset-list (+ current-offset (the fixnum (first offset-list))) nil)))
436 (declare (type (or null fixnum) next-offset))
437 (setf (plump:text node) (subseq whole-string-output (min current-offset output-length) (and next-offset (min next-offset output-length)))
438 current-offset next-offset
439 offset-list (cdr offset-list))))
440 (lambda (node) (declare (ignore node))))))))
441 root)
443 ;;;; Dynamic content blocks.
445 (define-cache-database 'lw2.backend-modules:backend-lmdb-cache "dynamic-content-blocks")
447 (defparameter *dynamic-content-block-callback* nil)
449 (defclass dynamic-element (plump:element)
450 ((dynamic-call-form :initarg :dynamic-call-form :accessor dynamic-call-form)))
452 (defmethod plump:serialize-object :around ((node dynamic-element))
453 (funcall *dynamic-content-block-callback* :start node)
454 (call-next-method)
455 (funcall *dynamic-content-block-callback* :end node))
457 (defun create-dynamic-call (node function-name &rest args)
458 (change-class node 'dynamic-element :dynamic-call-form (list* function-name args)))
460 (defun call-with-dynamic-block-serialization (fn hash output-string)
461 (let* ((current-start nil)
462 (last-string-pos 0)
463 (last-octet-pos 0))
464 (flet ((current-octet-pos ()
465 (psetf last-string-pos (length output-string)
466 last-octet-pos (+ last-octet-pos (babel:string-size-in-octets output-string :start last-string-pos)))
467 last-octet-pos))
468 (declare (dynamic-extent #'current-octet-pos))
469 (let* ((dynamic-call-list nil)
470 (*dynamic-content-block-callback*
471 (lambda (phase node)
472 (case phase
473 (:start (setf current-start (current-octet-pos)))
474 (:end (let ((current-end (current-octet-pos)))
475 (push (list* current-start current-end (dynamic-call-form node))
476 dynamic-call-list)))))))
477 (declare (dynamic-extent *dynamic-content-block-callback*))
478 (multiple-value-prog1
479 (funcall fn)
480 (if dynamic-call-list
481 (cache-put "dynamic-content-blocks" hash (nreverse dynamic-call-list) :key-type :byte-vector :value-type :lisp)
482 (cache-del "dynamic-content-blocks" hash :key-type :byte-vector)))))))
484 (defmacro with-dynamic-block-serialization ((hash output-string) &body body)
485 `(dynamic-flet ((fn () ,@body)) (call-with-dynamic-block-serialization #'fn ,hash ,output-string)))
487 ;;;;
489 (define-lmdb-memoized extract-excerpt 'lw2.backend-modules:backend-lmdb-cache
490 (:sources ("src/clean-html.lisp")) (in-html)
491 (let ((root (plump:parse (string-trim '(#\Space #\Newline #\Tab #\Return #\Linefeed #\Page) in-html)))
492 (chars 0)
493 (need-space nil))
494 (with-output-to-string (out-stream)
495 (block nil
496 (plump:traverse
497 root
498 (lambda (node)
499 (when (or (> (length (plump:children node)) 1)
500 (plump:text-node-p (plump:first-child node)))
501 (let ((text (plump:text node)))
502 (when need-space
503 (write-char #\Space out-stream))
504 (write-string text out-stream)
505 (setf chars (+ chars (length text))
506 need-space t)
507 (when (> chars 480)
508 (return nil)))))
509 :test (lambda (node) (tag-is node "p")))))))
511 (define-lmdb-memoized clean-html 'lw2.backend-modules:backend-lmdb-cache
512 (:sources ("src/clean-html.lisp" "src/links.lisp" "src/colors.lisp" "text-clean-regexps.js" "html-clean-regexps.js" "js-foreign-lib/mathjax.js")) (in-html &key with-toc post-id)
513 (declare (ftype (function (plump:node) fixnum) plump:child-position)
514 (ftype (function (plump:node) (and vector (not simple-array))) plump:family)
515 (ftype (function (plump:node) simple-string) plump:text plump:tag-name))
516 (labels ((only-child-is (node &rest args)
517 (declare (dynamic-extent args))
518 (and (= 1 (length (plump:children node)))
519 (let ((child (plump:first-child node)))
520 (and
521 (plump:element-p child)
522 (apply #'tag-is child args)))))
523 (is-child-of-tag (node &rest args)
524 (declare (dynamic-extent args))
525 (loop for e = (plump:parent node) then (plump:parent e)
526 while (not (typep e 'plump:root))
527 when (and (plump:element-p e) (apply #'tag-is (cons e args))) return t))
528 (add-class (node class)
529 (declare (type plump:node node)
530 (type string class))
531 (let ((classes (adjoin class (alexandria:if-let (attr (plump:attribute node "class")) (split-sequence #\Space attr)) :test #'string=)))
532 (declare (dynamic-extent classes))
533 (setf (plump:attribute node "class") (format nil "~{~A~^ ~}" classes)))
534 node)
535 (remove-attributes (node &rest attrs)
536 (declare (dynamic-extent attrs))
537 (dolist (attr attrs)
538 (plump:remove-attribute node attr)))
539 (make-element-before (node tag)
540 (if (plump:text-node-p node)
541 (make-element-before (plump:parent node) tag)
542 (let ((e (plump:make-element (plump:parent node) tag)))
543 (plump:remove-child e)
544 (plump:insert-before node e)
545 (setf (plump:parent e) (plump:parent node))
546 e)))
547 (wrap-element (node element-name)
548 (let ((container (make-element-before node element-name)))
549 (plump:remove-child node)
550 (plump:append-child container node)
551 container))
552 (wrap-children (node element-name)
553 (let ((new-element (plump:make-element node element-name)))
554 (plump:remove-child new-element)
555 (setf (plump:children new-element) (plump:clone-children node t new-element)
556 (plump:children node) (plump:make-child-array))
557 (plump:append-child node new-element)))
558 (move-children-out-of-node (node &key keep)
559 (iterate (for c in-vector (plump:children node) downto 0)
560 (setf (plump:parent c) (plump:parent node))
561 (plump:insert-after node c))
562 (if keep
563 (setf (plump:children node) (plump:make-child-array))
564 (plump:remove-child node)))
565 (text-node-is-not (node &rest args)
566 (declare (type plump:node node)
567 (dynamic-extent args))
569 (typep (plump:parent node) 'plump:root)
570 (every (lambda (x) (string/= (plump:tag-name (plump:parent node)) x)) args)))
571 (adjacent-text-node (node direction)
572 (multiple-value-bind (get-sibling insert-sibling)
573 (ecase direction
574 (:previous (values #'plump:previous-sibling #'plump:insert-before))
575 (:next (values #'plump:next-sibling #'plump:insert-after)))
576 (let ((candidate (funcall get-sibling node)))
577 (if (plump:text-node-p candidate)
578 candidate
579 (let ((new-node (plump:make-text-node (plump:parent node))))
580 (funcall insert-sibling node new-node)
581 new-node)))))
582 (is-numeric (node)
583 (let ((text (plump:text node))
584 (alpha 0)
585 (digit 0))
586 (loop for c across text
587 when (alpha-char-p c)
588 do (incf alpha)
589 when (digit-char-p c)
590 do (incf digit)
591 finally (return (>= digit alpha)))))
592 (remove-if-whitespace (node)
593 (when (string-is-whitespace (plump:text node))
594 (plump:remove-child node)))
595 (first-non-whitespace-child (node)
596 (loop for e across (plump:children node)
597 when (or (typep e 'plump:element) (not (string-is-whitespace (plump:text e)))) return e))
598 (find-text-node (node direction)
599 (let ((iterator #.`(case direction
600 ,@(loop for d in '(:first :last) collect
601 `(,d (lambda (fn)
602 (iterate (for c in-vector (plump:children node) ,@(if (eq d :last) '(downto 0)))
603 (funcall fn c))))))))
604 (declare (dynamic-extent iterator))
605 (cond
606 ((and (plump:text-node-p node) (plump:parent node))
607 node)
608 ((plump:nesting-node-p node)
609 (block nil
610 (funcall iterator (lambda (c)
611 (when-let (tn (find-text-node c direction)) (return tn)))))))))
612 (vacuum-whitespace (node)
613 (dolist (direction '(:first :last))
614 (let ((displaced-text (make-string 0)))
615 (loop for lt = (find-text-node node direction) do
616 (cond
617 ((not lt)
618 (return-from vacuum-whitespace node))
619 ((string-is-whitespace (plump:text lt))
620 (setf displaced-text (case direction
621 (:first (concatenate 'string displaced-text (plump:text lt)))
622 (:last (concatenate 'string (plump:text lt) displaced-text))))
623 (plump:remove-child lt))
625 (let* ((text (plump:text lt)))
626 (case direction
627 (:first
628 (let ((boundary (loop for i from 0 to (- (length text) 1)
629 unless (char-is-whitespace (aref text i))
630 return i)))
631 (setf displaced-text (concatenate 'string displaced-text (subseq text 0 boundary))
632 (plump:text lt) (subseq text boundary))))
633 (:last
634 (let ((boundary (loop for i from (- (length text) 1) downto 0
635 unless (char-is-whitespace (aref text i))
636 return i)))
637 (setf displaced-text (concatenate 'string (subseq text (+ 1 boundary) (length text)))
638 (plump:text lt) (subseq text 0 (+ 1 boundary)))))))
639 (return))))
640 (when (> (length displaced-text) 0)
641 (let ((atn (adjacent-text-node node (case direction (:first :previous) (:last :next)))))
642 (setf (plump:text atn) (case direction
643 (:first (concatenate 'string (plump:text atn) displaced-text))
644 (:last (concatenate 'string displaced-text (plump:text atn))))))))))
645 (add-element-style (node attribute value)
646 (let ((old-style (plump:attribute node "style")))
647 (setf (plump:attribute node "style")
648 (if old-style
649 (format nil "~A~:[;~;~] ~A: ~A;" old-style (ppcre:scan ";\s*$" old-style) attribute value)
650 (format nil "~A: ~A;" attribute value)))))
651 (style-string-to-alist (string)
652 (let ((rules (ppcre:split "\\s*;\\s*" string :sharedp t)))
653 (iter (for rule in rules)
654 (let ((parts (ppcre:split "\\s*:\\s*" rule :sharedp t)))
655 (when (= 2 (length parts))
656 (collect (cons (first parts) (second parts))))))))
657 (alist-to-style-string (alist)
658 (with-output-to-string (s)
659 (iter (for item in alist)
660 (format s "~A:~A;" (car item) (cdr item)))))
661 (remove-style-rules (node &rest rules)
662 (declare (dynamic-extent rules))
663 (when-let ((old-style (plump:attribute node "style")))
664 (setf (plump:attribute node "style")
665 (alist-to-style-string
666 (remove-if (lambda (x) (member (car x) rules :test #'string-equal))
667 (style-string-to-alist old-style))))))
668 (flatten-element (node)
669 (let* ((previous-sibling (plump:previous-sibling node))
670 (next-sibling (if (plump:text-node-p (plump:next-sibling node))
671 (plump:next-sibling node)))
672 (new-text-node (if (plump:text-node-p previous-sibling)
673 previous-sibling
674 (plump:insert-before node
675 (plump:remove-child (plump:make-text-node (plump:parent node)))))))
676 (setf (plump:parent new-text-node) (plump:parent node)
677 (plump:text new-text-node) (concatenate 'string
678 (plump:text new-text-node)
679 (plump:text node)
680 (if next-sibling
681 (plump:text next-sibling)
682 "")))
683 (plump:remove-child node)
684 (when next-sibling (plump:remove-child next-sibling))))
685 (scan-for-urls (text-node)
686 (declare (type plump:text-node text-node))
687 (let ((text (plump:text text-node)))
688 (multiple-value-bind (url-start url-end)
689 (ppcre:scan #'url-scanner text)
690 (declare (type simple-string text)
691 (type (or null fixnum) url-start url-end))
692 (when url-start
693 (let* ((url-raw (subseq text url-start url-end))
694 (url (if (mismatch "http" url-raw :end2 4) (concatenate 'string "http://" url-raw) url-raw))
695 (family (plump:family text-node))
696 (other-children (prog1
697 (subseq family (1+ (plump:child-position text-node)))
698 (setf (fill-pointer family) (1+ (plump:child-position text-node)))))
699 (new-a (plump:make-element (plump:parent text-node) "a"))
700 (new-text (unless (= url-end (length text)) (plump:make-text-node (plump:parent text-node) (subseq text url-end)))))
701 (setf (plump:text text-node) (subseq text 0 url-start)
702 (plump:attribute new-a "href") (with-direct-link (presentable-link url))
703 (plump:attribute new-a "class") "bare-url")
704 (plump:make-text-node new-a (clean-text url-raw))
705 (when new-text
706 (scan-for-urls new-text)
707 (setf (plump:text new-text) (clean-text (plump:text new-text))))
708 (loop for item across other-children
709 do (plump:append-child (plump:parent text-node) item))
710 (when (= url-start 0)
711 (plump:remove-child text-node)))))))
712 (title-to-anchor (text used-anchors)
713 ;; This should match LW behavior in packages/lesswrong/lib/collections/posts/tableOfContents.js
714 (let* ((chars-to-use "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789")
715 (base-anchor (with-output-to-string (stream)
716 (loop for c across text
717 do (write-char (if (find c chars-to-use) c #\_) stream)))))
718 (loop for suffix from 0
719 for anchor = base-anchor then (format nil "~A~A" base-anchor suffix)
720 when (not (gethash anchor used-anchors))
721 return (progn (setf (gethash anchor used-anchors) t)
722 anchor))))
723 (contents-to-html (contents min-header-level out-stream)
724 (declare (type cons contents))
725 (format out-stream "<nav class=\"contents\"><div class=\"contents-head\">Contents</div><ul class=\"contents-list\">")
726 (loop for (elem-level text id) in contents do
727 (format out-stream "<li class=\"toc-item-~A\"><a href=\"#~A\">~A</a></li>"
728 (- elem-level (- min-header-level 1)) id (clean-text-to-html text)))
729 (format out-stream "</ul></nav>"))
730 (style-hash-to-html (style-hash out-stream)
731 (declare (type hash-table style-hash))
732 (let ((style-list (alexandria:hash-table-keys style-hash)))
733 (if style-list
734 (format out-stream "<style>~{~A~}</style>" style-list)))))
735 (declare (ftype (function (plump:node &rest simple-string) boolean) only-child-is is-child-of-tag text-node-is-not))
736 (handler-bind
737 (((or plump:invalid-xml-character plump:discouraged-xml-character) #'abort))
738 (alexandria:if-let
739 (override (gethash post-id *html-overrides*))
740 (funcall override)
741 (let ((root (plump:parse (string-trim '(#\Space #\Newline #\Tab #\Return #\Linefeed #\Page) in-html)))
742 (contents nil)
743 (section-count 0)
744 (min-header-level 6)
745 (aggressive-deformat nil)
746 (style-hash (make-hash-table :test 'equal))
747 (used-colors (make-hash-table :test 'equal))
748 (used-anchors (make-hash-table :test 'equal)))
749 (declare (type fixnum section-count min-header-level))
750 (when *before-clean-hook*
751 (funcall *before-clean-hook*))
752 (let ((wayward-li-container nil))
753 (with-mathjax-processor ()
754 (plump:traverse
755 root
756 (lambda (node)
757 (cond
758 ((tag-is node "img")
759 (when-let ((src (plump:attribute node "src")))
760 (handle-codecogs node src)))
761 ((not (plump:parent node)) nil)
762 ((tag-is node "a")
763 (cond
764 ((not (plump:attribute node "href"))
765 (move-children-out-of-node node :keep t))
766 ((or
767 (and (ppcre:scan "^\s*https?://" (plump:text node))
768 (not (find #\HORIZONTAL_ELLIPSIS (plump:text node))))
769 (notany (lambda (attr) (nonempty-string (plump:attribute node attr))) '("href" "name" "id")))
770 (flatten-element node))
771 (t (tagbody start
772 (let* ((next-sibling (plump:next-sibling node))
773 (next-text-node (if (plump:text-node-p next-sibling) next-sibling))
774 (next-next-sibling (if next-text-node (plump:next-sibling next-text-node) next-sibling))
775 (next-a (if (and next-next-sibling (tag-is next-next-sibling "a")) next-next-sibling)))
776 (when (and next-a
777 (or (not next-text-node) (string-is-whitespace (plump:text next-text-node)))
778 (string= (plump:attribute node "href") (plump:attribute next-a "href")))
779 (when next-text-node
780 (plump:remove-child next-text-node)
781 (plump:append-child node next-text-node))
782 (loop for c across (plump:children next-a)
783 do (progn (plump:remove-child c)
784 (plump:append-child node c)))
785 (plump:remove-child next-a)
786 (go start)))))))
787 ((tag-is node "ul" "ol")
788 (setf wayward-li-container node)
789 (let ((new-children (plump:make-child-array)))
790 (loop for child across (plump:children node)
791 do (if (and (plump:element-p child) (tag-is child "li"))
792 (vector-push-extend child new-children)
793 (unless (and (plump:text-node-p child) (string-is-whitespace (plump:text child)))
794 (if (= (length new-children) 0)
795 (vector-push-extend (plump:make-element node "li") new-children))
796 (plump:append-child (aref new-children (- (length new-children) 1)) child))))
797 (setf (plump:children node) new-children)))
798 ((tag-is node "li")
799 (unless (is-child-of-tag node "ul" "ol")
800 (unless wayward-li-container
801 (setf wayward-li-container (make-element-before node "ul")))
802 (plump:remove-child node)
803 (plump:append-child wayward-li-container node)))
804 ((tag-is node "p" "blockquote" "div")
805 (setf wayward-li-container nil))))
806 :test #'plump:element-p)))
807 (loop while (and (= 1 (length (plump:children root))) (plump:element-p (plump:first-child root)) (tag-is (plump:first-child root) "div" "html" "body"))
808 do (setf (plump:children root) (plump:children (plump:first-child root)))
809 do (loop for c across (plump:children root) do (setf (plump:parent c) root))
810 do (when-let (fc (plump:first-child root))
811 (when (and (plump:element-p fc) (tag-is fc "head"))
812 (loop for c across (plump:children fc) do
813 (when (and (plump:element-p c) (tag-is c "style"))
814 (setf (plump:parent c) (plump:parent fc))
815 (plump:insert-after fc c)))
816 (plump:remove-child fc))))
817 (loop for c across (plump:children root) do
818 (when (and (plump:element-p c)
819 (tag-is c "span")
820 (string-is-whitespace (plump:text c)))
821 (move-children-out-of-node c)))
822 (loop for lc = (plump:last-child root)
823 while (and (plump:element-p lc) (tag-is lc "br"))
824 do (plump:remove-child lc))
825 (plump:traverse
826 root
827 (lambda (node)
828 (when (and (plump:text-node-p node)
829 (plump:parent node)
830 (text-node-is-not node "a" "style" "pre"))
831 (scan-for-urls node))))
832 (plump:traverse
833 root
834 (lambda (node)
835 (when (and (not (plump:root-p node)) (plump:parent node))
836 (typecase node
837 (plump:text-node
838 (when (and (text-node-is-not node "style" "pre" "code")
839 (text-class-is-not node "mjx-math"))
840 (let ((new-root (plump:parse (clean-html-regexps (plump:serialize node nil))))
841 (other-children (prog1
842 (subseq (plump:family node) (1+ (plump:child-position node)))
843 (setf (fill-pointer (plump:family node)) (plump:child-position node)))))
844 (loop for item across (plump:children new-root)
845 do (plump:append-child (plump:parent node) item))
846 (loop for item across other-children
847 do (plump:append-child (plump:parent node) item)))))
848 (plump:element
849 (alexandria:when-let (style (plump:attribute node "style"))
850 (let ((style-list (style-string-to-alist style)))
851 (cond ((or aggressive-deformat
852 (cdr (assoc "font-family" style-list :test #'string-equal))
853 (search "font-style: inherit" style)
854 (search "MsoNormal" (plump:attribute node "class")))
855 (setf aggressive-deformat t)
856 (plump:remove-attribute node "style"))
857 ((ppcre:scan "(?:^|;)\\s*(?:line-height:[^;]+in)\\s*(?:;|$)" style)
858 (plump:remove-attribute node "style"))
860 (let (updated)
861 (dolist (style-item style-list)
862 (when (member (car style-item) '("color" "background" "background-color" "border" "border-color") :test #'string-equal)
863 (setf (cdr style-item)
864 (regex-replace-body ("#[0-9a-fA-F]{3,8}|rgba?\\((?:.*?)\\)|hsla?\\((?:.*?)\\)" (cdr style-item))
865 (multiple-value-bind (r g b a) (decode-css-color (match))
866 (when (and r g b a)
867 (let ((color-name (safe-color-name r g b a)))
868 (setf updated t
869 (gethash color-name used-colors) (list r g b a))
870 (format nil "var(--user-color-~A)" color-name))))))))
871 (when updated
872 (setf (plump:attribute node "style") (alist-to-style-string style-list))))))))
873 (when (and aggressive-deformat (tag-is node "div"))
874 (setf (plump:tag-name node) "p"))
875 (when (let ((parent (plump:parent node)))
876 (and
877 (intersection (class-list node) '("mjx-chtml" "mjx-math" "mjpage") :test #'string=)
878 (and parent
879 (class-is-not parent "mjx-chtml" "mjx-math" "mjpage"))))
880 (cond
881 ((let ((mrows (clss:select ".mjx-mrow" node)))
882 (and (not (zerop (length mrows)))
883 (every (lambda (mrow)
884 (zerop (length (plump:children mrow))))
885 mrows)))
886 (plump:remove-child node))
887 (:otherwise
888 (loop
889 with full-width = (or (class-is node "mjx-full-width")
890 (loop for e across (plump:children node)
891 when (member "MJXc-display" (class-list e) :test #'string=)
892 return t))
893 for current = node then (plump:parent current)
894 for parent = (plump:parent current)
895 when (and parent
896 (loop for s across (plump:family current)
897 unless (or (eq s current)
898 (and (plump:text-node-p s) (string-is-whitespace (plump:text s))))
899 return t))
900 do (progn (add-class current (if full-width
901 "mathjax-block-container"
902 "mathjax-inline-container"))
903 (return))
904 when (or (null parent)
905 (plump:root-p parent)
906 (tag-is parent "p" "blockquote" "div"))
907 do (progn (add-class current "mathjax-block-container")
908 (return))))))
909 (cond
910 ((tag-is node "a")
911 (vacuum-whitespace node)
912 (let ((href (plump:attribute node "href")))
913 (when href
914 (let* ((href (string-trim '(#\Space #\Newline #\Tab #\Return #\Linefeed #\Page) href))
915 (href (if (ppcre:scan "^(?:(?:[a-z]+:)?//|/|#)" href) href (format nil "http://~A" href)))
916 (href (or (with-direct-link (presentable-link href)) href)))
917 (when href
918 (setf (plump:attribute node "href") href)
919 (when *link-hook*
920 (log-and-ignore-errors
921 (funcall *link-hook* href))))))))
922 ((tag-is node "img")
923 (let ((width (ignore-errors (parse-integer (plump:attribute node "width"))))
924 (height (ignore-errors (parse-integer (plump:attribute node "height")))))
925 (if (and width height (<= width 1) (<= height 1))
926 ;; Remove probable tracking pixel.
927 (plump:remove-child node)
928 (let ((container
929 (if (and (tag-is (plump:parent node) "div" "p" "figure")
930 (only-child-is (plump:parent node) "img"))
931 (plump:parent node) ; Should already have imgonly class.
932 (let ((container (wrap-element node "div")))
933 (add-class container "imgonly")
934 container))))
935 (when-let ((src (plump:attribute node "src")))
936 (setf src (presentable-link src :image)
937 (plump:attribute node "src") src)
938 (create-dynamic-call container 'lw2.images::dynamic-image
940 (plump:tag-name container)
941 (alexandria:hash-table-alist (plump:attributes container))
942 (alexandria:hash-table-alist (plump:attributes node))))
943 (when (and width height)
944 ;; Apply responsive image scaling CSS.
945 (setf (plump:attribute container "style") (format nil "--aspect-ratio: ~F; max-width: ~Dpx"
946 (/ (float width)
947 (float height))
948 width)))
949 (remove-attributes node "style" "class" "width" "height")
950 (setf (plump:attribute node "loading") "lazy")))))
951 ((tag-is node "figure")
952 (remove-attributes node "style" "class" "width" "height"))
953 ((and (tag-is node "p") (only-child-is node "figure"))
954 (move-children-out-of-node node))
955 ((tag-is node "p" "blockquote" "div" "center")
956 (when (only-child-is node "center")
957 (unless (string-is-whitespace (plump:text node))
958 (add-element-style node "text-align" "center"))
959 (move-children-out-of-node (plump:first-child node)))
960 (when (tag-is node "center")
961 (setf (plump:tag-name node) "p")
962 (add-element-style node "text-align" "center"))
963 (when-let ((question-id (plump:attribute node "data-elicit-id")))
964 (when-let* ((question-title (lw2.backend::get-elicit-question-title question-id))
965 (links (plump:get-elements-by-tag-name node "a"))
966 (text-node (plump:first-child (first links))))
967 (setf (plump:text text-node) question-title))
968 (create-dynamic-call (plump:parent (plump:parent node)) 'lw2.elicit-predictions::render-elicit-block question-id))
969 (if (string-is-whitespace (plump:text node))
970 (if (or (plump:get-elements-by-tag-name node "img")
971 (plump:get-elements-by-tag-name node "iframe"))
972 (add-class node "imgonly")
973 (plump:remove-child node))
974 (if-let (parent (plump:parent node))
975 (labels ((spoilerp (n)
976 (if-let (a (and (plump:element-p n) (plump:attribute n "class")))
977 (ppcre:scan "(?:^| )spoiler\\S*(?: |$)" a))))
978 (when (and nil (tag-is node "p") ;; FIXME: disabled until we can fix math and code false positives
979 (rot13-text-p (plump:text node)))
980 (setf (plump:attribute node "class") "spoiler")
981 (plump:traverse node
982 (lambda (n) (unrot13-by-words (plump:text n)))
983 :test #'plump:text-node-p))
984 (cond
985 ((and (tag-is node "p")
986 (spoilerp node)
987 (spoilerp parent))
988 (plump:remove-attribute node "class"))
989 ((and (tag-is node "div")
990 (spoilerp node))
991 (setf (plump:attribute node "class") "spoiler"))
992 ((and (spoilerp node)
993 (tag-is node "p")
994 (not (spoilerp parent)))
995 (let ((previous-sibling (plump:previous-sibling node)))
996 (if (and previous-sibling (spoilerp previous-sibling))
997 (progn (plump:remove-child node)
998 (plump:append-child previous-sibling node)
999 (plump:remove-attribute node "class"))
1000 (let ((new-container (plump:make-element parent "div")))
1001 (setf (plump:attribute new-container "class") "spoiler")
1002 (plump:remove-child new-container)
1003 (setf (plump:parent new-container) (plump:parent node))
1004 (plump:insert-before node new-container)
1005 (loop for e = node then ns
1006 while (and (plump:element-p e) (spoilerp e))
1007 for ns = (plump:next-sibling e)
1008 do (progn
1009 (plump:remove-attribute e "class")
1010 (plump:remove-child e)
1011 (plump:append-child new-container e))))))))))))
1012 ((tag-is node "table" "tbody" "tr" "td")
1013 (remove-style-rules node "border-top" "border-bottom" "border-left" "border-right" "padding")
1014 (when (and (tag-is node "td") (is-numeric node))
1015 (add-class node "numeric")))
1016 ((tag-is node "u")
1017 (let ((parent (plump:parent node)))
1018 (cond
1019 ((and (or (plump:root-p parent) (and (plump:element-p parent) (tag-is parent "p" "blockquote" "div")))
1020 (loop for c across (plump:children node) never (and (plump:element-p c) (tag-is c "a"))))
1021 (vacuum-whitespace node))
1023 (move-children-out-of-node node)))))
1024 ((tag-is node "ol")
1025 (when-let (old-style (plump:attribute node "style"))
1026 (setf (plump:attribute node "style")
1027 (ppcre:regex-replace-all
1028 (load-time-value (ppcre:create-scanner "list-style-type\\s*:\\s*decimal\\s*;?" :single-line-mode t :case-insensitive-mode t))
1029 old-style
1030 "")))
1031 (when-let (start-string (plump:attribute node "start"))
1032 (when-let (start (ignore-errors (parse-integer start-string)))
1033 (plump:remove-attribute node "start")
1034 (add-element-style node "counter-reset" (format nil "ol ~A" (- start 1))))))
1035 ((and (tag-is node "li") (find "footnote-item" (class-list node) :test #'string-equal))
1036 (unless (find-if (lambda (e) (and (plump:element-p e) (find "footnote-content" (class-list e) :test #'string-equal)))
1037 (plump:children node))
1038 ;; footnote-content div does not exist. We must create it and move the footnote contents into place.
1039 (let* ((wrapper (wrap-children node "div"))
1040 (backrefs (clss:select ".footnote-backref" wrapper))
1041 (back-links (make-element-before wrapper "span")))
1042 (setf (plump:attribute wrapper "class") "footnote-content"
1043 (plump:attribute back-links "class") "footnote-back-link")
1044 (loop for backref across backrefs do
1045 (progn (plump:remove-child backref)
1046 (plump:append-child back-links backref))))))
1047 ((tag-is node "li")
1048 (when (let ((c (plump:first-child node))) (and c (if (plump:text-node-p c) (not (string-is-whitespace (plump:text c))) (not (tag-is c "p" "ul" "ol")))))
1049 (wrap-children node "p")))
1050 ((tag-is node "pre")
1051 (let ((nchildren (length (plump:children node))))
1052 (when (>= nchildren 1)
1053 (remove-if-whitespace (plump:first-child node))
1054 (when (>= nchildren 2)
1055 (remove-if-whitespace (plump:last-child node))))))
1056 ((ppcre:scan "^h[1-6]$" (plump:tag-name node))
1057 (when (plump:get-elements-by-tag-name node "p")
1058 (move-children-out-of-node node))
1059 (cond
1060 ((string-is-whitespace (plump:text node))
1061 (plump:remove-child node))
1063 (let ((fc (plump:first-child node))
1064 (lc (plump:last-child node)))
1065 (when (and (plump:element-p fc) (tag-is fc "br")) (plump:remove-child fc))
1066 (when (and (not (eql fc lc)) (plump:element-p lc) (tag-is lc "br")) (plump:remove-child lc)))
1067 (when with-toc
1068 (incf section-count)
1069 (unless (plump:attribute node "id") (setf (plump:attribute node "id") (format nil "section-~A" section-count)))
1070 (let* ((header-level (parse-integer (subseq (plump:tag-name node) 1)))
1071 (header-text (with-output-to-string (stream)
1072 (plump:traverse node
1073 (lambda (n)
1074 (typecase n
1075 (plump:text-node
1076 (when (text-node-is-not n "style" "script")
1077 (write-string (plump:text n) stream))))))))
1078 (anchor-old (or (plump:attribute node "id") (format nil "section-~A" section-count)))
1079 (anchor-new (title-to-anchor header-text used-anchors))
1080 (wrapper (wrap-children node "span")))
1081 (setf min-header-level (min min-header-level header-level)
1082 (plump:attribute node "id") anchor-new
1083 (plump:attribute wrapper "id") anchor-old)
1084 (push (list header-level
1085 header-text
1086 anchor-new)
1087 contents))))))
1088 ((and (tag-is node "span") (find "footnote-back-link" (class-list node) :test #'string-equal))
1089 (plump:traverse node #'move-children-out-of-node :test (lambda (n) (tag-is n "sup" "strong"))))
1090 ((tag-is node "style")
1091 (let ((text (plump:text node)))
1092 (when (search ".mjx-math" text)
1093 (setf (gethash text style-hash) t)))
1094 (plump:remove-child node))
1095 ((tag-is node "script")
1096 (plump:remove-child node))))))))
1097 (clean-dom-text root)
1098 (let ((with-toc (>= section-count 3))
1099 (out-string (make-array 0 :element-type 'character :fill-pointer 0 :adjustable t)))
1100 (with-output-to-string (out-stream out-string)
1101 (style-hash-to-html style-hash out-stream)
1102 (when (> (hash-table-count used-colors) 0)
1103 (format out-stream "<style>~%:root {~%")
1104 (maphash (lambda (name rgba-list)
1105 (declare (ignore rgba-list))
1106 (format out-stream " --user-color-~A: #~A;~%" name name))
1107 used-colors)
1108 (flet ((write-inverted-colors (theme)
1109 (format out-stream "body.theme-~A {~%" theme)
1110 (maphash (lambda (name rgba-list)
1111 (format out-stream " --user-color-~A: ~A;~%" name
1112 (multiple-value-call #'encode-css-color (apply #'perceptual-invert-rgba rgba-list))))
1113 used-colors)
1114 (format out-stream "}~%")))
1115 (format out-stream "}~%@media (prefers-color-scheme: dark) {~%")
1116 (write-inverted-colors "default")
1117 (format out-stream "}~%")
1118 (write-inverted-colors "dark"))
1119 (format out-stream "</style>"))
1120 (with-dynamic-block-serialization (current-memo-hash out-string)
1121 (loop for c across (plump:children root)
1122 when (and with-toc
1123 (not (or (string-is-whitespace (plump:text c))
1124 (tag-is c "figure"))))
1125 do (progn
1126 (contents-to-html (nreverse contents) min-header-level out-stream)
1127 (setf with-toc nil))
1128 do (plump:serialize c out-stream)))
1129 out-string)))))))