1 (uiop:define-package
#:lw2.legacy-archive
2 (:use
#:cl
#:lw2.utils
#:lw2.backend
)
3 (:import-from
#:cl-ppcre
#:regex-replace-all
)
4 (:export
#:lw-legacy-url
#:check-wayback-availability
#:wayback-unmodified-url
))
6 (in-package #:lw2.legacy-archive
)
8 ;; Should match the behavior of https://github.com/tricycle/lesswrong/blob/925eb95151c6aaf1e97efe630a25516af493f9e4/r2/r2/lib/utils/utils.py#L1011
9 (defun lw-legacy-slug (title)
10 (let* ((max-length 50)
11 (title (regex-replace-all "\\s+" title
"_"))
12 (title (regex-replace-all "\\W+" title
""))
13 (title (regex-replace-all "_+" title
"_"))
14 (title (string-trim "_" title
))
15 (title (string-downcase title
)))
16 (if (> (length title
) max-length
)
17 (substring title
0 (or (position #\_ title
:end max-length
:from-end t
)
21 (defun lw-legacy-id-string (legacy-id)
22 (format nil
"~(~36R~)"
24 (string (parse-integer legacy-id
))
25 (integer legacy-id
))))
27 (defun lw-legacy-url (legacy-id title
&key
(section :main
))
28 (format nil
"~Alw/~A/~A"
30 (:main
"http://lesswrong.com/")
31 (:discussion
"http://lesswrong.com/r/discussion/")
33 (lw-legacy-id-string legacy-id
)
34 (lw-legacy-slug title
)))
36 (defun check-wayback-availability (url)
37 (let* ((wayback-api-url
38 (quri:make-uri
:defaults
"https://archive.org/wayback/available"
39 :query
(alist "url" url
43 (call-with-http-response #'json
:decode-json wayback-api-url
:want-stream t
:force-string t
)
44 ((assoc :archived--snapshots
46 (assoc :timestamp timestamp
)))
49 (values (parse-integer timestamp
)))))
51 (defun wayback-unmodified-url (url)
52 (let ((timestamp (check-wayback-availability url
)))
54 (format nil
"https://web.archive.org/web/~Aid_/~A" timestamp url
))))