Fix some bugs in previous commit.
[lw2-viewer.git] / src / links.lisp
blob08d2d0963a6c904201549d705e1fc76279f8d344
1 (uiop:define-package #:lw2.links
2 (:use #:cl #:alexandria #:lw2.utils #:lw2.lmdb #:lw2.backend #:lw2.sites #:lw2.context #:lw2-viewer.config)
3 (:export #:sanitize-link
4 #:match-lw1-link #:convert-lw1-link
5 #:match-ea1-link #:convert-ea1-link
6 #:match-overcomingbias-link #:convert-overcomingbias-link
7 #:direct-link #:with-direct-link
8 #:match-lw2-link #:match-lw2-slug-link #:match-lw2-sequence-link #:convert-lw2-link #:convert-lw2-slug-link #:convert-lw2-sequence-link #:convert-lw2-misc-link
9 #:generate-item-link
10 #:convert-any-link* #:convert-any-link #:presentable-link)
11 (:unintern #:generate-post-link #:convert-lw2-user-link))
13 (in-package #:lw2.links)
15 (defun sanitize-link (link)
16 (substitute #\+ #\Space (string-trim " " link)))
18 (defun get-redirect (uri)
19 (multiple-value-bind (body status headers uri)
20 (dex:request uri :method :head :max-redirects 0 :keep-alive nil)
21 (declare (ignore body uri))
22 (let ((location (gethash "location" headers)))
23 (if (and (typep status 'integer) (< 300 status 400) location)
24 location
25 nil))))
27 (defmacro match-values (regex input registers)
28 (with-gensyms (match? strings)
29 (labels ((register-body (x)
30 (typecase x
31 (integer `(elt ,strings ,x))
32 (atom x)
33 (t (cons (register-body (car x)) (register-body (cdr x)))))))
34 `(multiple-value-bind (,match? ,strings) (ppcre:scan-to-strings ,regex ,input)
35 (when ,match?
36 (values ,.(register-body registers)))))))
38 (defmethod link-for-site-p ((s site) link) nil)
40 (defmethod link-for-site-p ((s lesswrong-viewer-site) link)
41 (ppcre:scan "^https?://(?:www\\.)?(?:less(?:er|est)?wrong\\.com|alignmentforum\\.org)" link))
43 (defmethod link-for-site-p ((s ea-forum-viewer-site) link)
44 (ppcre:scan "^https?://(?:www\\.)?(?:effective-altruism\\.com|forum\\.effectivealtruism\\.org)" link))
46 (defmethod link-for-site-p ((s progress-forum-viewer-site) link)
47 (ppcre:scan "^https?://(?:www\\.)?progressforum\\.org" link))
49 (defmethod link-for-site-p ((s arbital-site) link)
50 (ppcre:scan "^https?://(?:www\\.)?(?:arbital\\.com)" link))
52 (defun find-link-site (link)
53 (if (ppcre:scan "^/(?!/)" link)
54 *current-site*
55 (loop for s in *sites*
56 when (link-for-site-p s link) return s)))
58 (defun site-link-prefix (site)
59 (if (eq site *current-site*)
60 "/"
61 (site-uri site)))
63 (defun match-lw1-link (link) (match-values "(?:^https?://(?:www.)?less(?:er|est)?wrong.com|^)(?:/r/discussion|/r/lesswrong|/r/all)?(/lw/.*)" link (0)))
65 (defun match-ea1-link (link) (match-values "^(?:https?://(?:www\\.)?(?:effective-altruism\\.com|forum\\.effectivealtruism\\.org))?(/ea/.*)" link (0)))
67 (defun match-agentfoundations-link (link) (match-values "^(?:https?://(?:www\\.)?agentfoundations\\.org)?(/item\\?id=.*)" link (0)))
69 (defun match-lw2-link (link) (match-values "^(?:https?://[^/]+)?/(post|event)s/([^/]+)(?:/([^/#?]*)(?:/(comment|answer)/([^/#?]+)|/?(?:#(?:comment-)?|\\?commentId=)([^/#]+))?)?" link (1 (or 4 5) 2 3 0)))
71 (defun match-lw2-slug-link (link) (match-values "^(?:https?://(?:www.)?less(?:er|est)?wrong.com)?/(?:codex|hpmor)/([^/#]+)(?:/?#?([^/#]+)?)?" link (0 1)))
73 (defun match-lw2-sequence-link (link) (match-values "^(?:https?://[^/]+)?/s/([^/#]+)(?:/p/([^/#]+))?(?:#([^/#]+)?)?" link (0 1 2)))
75 (defun convert-lw2-misc-link (link)
76 (when-let* ((site (find-link-site link))
77 (matched-link (and (typep site '(or lesswrong-viewer-site ea-forum-viewer-site)) (match-values "^(?:https?://[^/]+)?/((?:users/|tags|tag/|topics/|s/|sequences/|library).*)" link (0)))))
78 (concatenate 'string (site-link-prefix site) matched-link)))
80 (defun convert-arbital-link (link)
81 (when-let* ((site (find-link-site link))
82 (matched-link (and (typep site 'arbital-site) (match-values "^(?:https?://[^/]+)?/(.*)" link (0)))))
83 (concatenate 'string (site-link-prefix site) matched-link)))
85 (defmacro with-direct-link-restart ((direct-link) &body body)
86 (once-only (direct-link)
87 `(restart-case (progn ,@body)
88 (direct-link () :report "Use direct link." ,direct-link))))
90 (defun direct-link (&optional c)
91 (declare (ignore c))
92 (if-let (restart (find-restart 'direct-link))
93 (invoke-restart restart)))
95 (defmacro with-direct-link (&body body)
96 `(handler-bind
97 ((serious-condition #'direct-link))
98 (progn ,@body)))
100 (defun process-redirect-link (link base-uri site-name)
101 (if-let ((location (get-redirect (concatenate 'string base-uri link))))
102 (let ((loc-uri (quri:uri location))) (format nil "~A~@[#comment-~A~]" (quri:uri-path loc-uri) (quri:uri-fragment loc-uri)))
103 (error "<p>Could not retrieve ~A link.</p><p>You may wish to try <a href='~A'>~:*~A</a>" site-name (concatenate 'string base-uri link))))
105 (defun convert-redirect-link (link match-fn get-fn base-uri)
106 (if-let (matched-link (funcall match-fn link))
107 (with-direct-link-restart ((concatenate 'string base-uri matched-link))
108 (merge-uris (funcall get-fn matched-link)
109 (site-uri (find-link-site base-uri))))))
111 (simple-cacheable ("lw1-link" 'backend-lmdb-cache "lw1-link" link :catch-errors nil)
112 (process-redirect-link link "https://www.lesswrong.com" "LessWrong 1.0"))
114 (defun convert-lw1-link (link)
115 (convert-redirect-link link #'match-lw1-link #'get-lw1-link "https://www.lesswrong.com"))
117 (simple-cacheable ("ea1-link" 'backend-lmdb-cache "ea1-link" link :catch-errors nil)
118 (process-redirect-link link "https://forum.effectivealtruism.org" "EA Forum 1.0"))
120 (defun convert-ea1-link (link)
121 (convert-redirect-link link #'match-ea1-link #'get-ea1-link "https://forum.effectivealtruism.org"))
123 (defun match-overcomingbias-link (link)
124 (if (ppcre:scan "^https?://(?:www\\.)?overcomingbias\\.com/" link)
125 link
126 nil))
128 (simple-cacheable ("overcomingbias-link" 'backend-lmdb-cache "overcomingbias-link" link :catch-errors nil)
129 (if-let ((location (get-redirect link)))
130 (match-lw1-link location)
131 ""))
133 (defun convert-overcomingbias-link (link)
134 (when (match-overcomingbias-link link)
135 (with-direct-link-restart (link)
136 (let ((lw1-link (get-overcomingbias-link link)))
137 (if (string= lw1-link "")
139 (convert-lw1-link lw1-link))))))
141 (simple-cacheable ("agentfoundations-link" 'backend-lmdb-cache "agentfoundations-link" link :catch-errors nil)
142 (process-redirect-link link "https://www.lesswrong.com" "Agent Foundations"))
144 (defun convert-agentfoundations-link (link)
145 (convert-redirect-link link #'match-agentfoundations-link #'get-agentfoundations-link "https://www.lesswrong.com"))
147 (defun gen-internal (post-id slug comment-id &optional absolute-uri stream item-subtype)
148 (format stream "~A~As/~A/~A~:[~@[#~A~]~;~@[#comment-~A~]~]" (or absolute-uri "/") (or item-subtype "post") post-id (or slug (get-post-slug post-id) "-") (and comment-id (= (length comment-id) 17)) comment-id))
150 (defun convert-lw2-slug-link (link)
151 (multiple-value-bind (slug comment-id) (match-lw2-slug-link link)
152 (when slug
153 (gen-internal (get-slug-postid slug) slug comment-id))))
155 (defun convert-lw2-sequence-link (link)
156 (if-let (site (find-link-site link))
157 (multiple-value-bind (sequence-id post-id comment-id) (match-lw2-sequence-link link)
158 (cond
159 (post-id (gen-internal post-id (get-post-slug post-id) comment-id (site-link-prefix site)))
160 (sequence-id (format nil "~As/~A" (site-link-prefix site) sequence-id))))))
162 (defun convert-lw2-link (link)
163 (multiple-value-bind (post-id comment-id slug) (match-lw2-link link)
164 (when post-id
165 (if-let (site (find-link-site link))
166 (gen-internal post-id slug comment-id (site-link-prefix site))))))
168 (defun generate-item-link (item-type item-designator &key comment-id absolute stream item-subtype)
169 (let ((absolute (if (eq absolute t) (site-uri *current-site*) absolute)))
170 (ecase item-type
171 (:post
172 (typecase item-designator
173 (string
174 (gen-internal item-designator (get-post-slug item-designator) comment-id absolute stream (or item-subtype "post")))
175 (cons
176 (let ((post-id (cdr (assoc :--id item-designator))))
177 (gen-internal post-id (or (cdr (assoc :slug item-designator)) (get-post-slug post-id)) comment-id absolute stream (or item-subtype "post"))))))
178 (:tag
179 (with-output-to-designator (out stream)
180 (format out "~Atag/~A~@[#comment-~A~]" (or absolute "/") item-designator comment-id))))))
182 (defun convert-any-link* (url)
183 (let ((url (sanitize-link url)))
184 (or (convert-lw2-link url)
185 (convert-lw2-slug-link url)
186 (convert-lw2-sequence-link url)
187 (convert-lw1-link url)
188 (convert-ea1-link url)
189 (convert-agentfoundations-link url)
190 (convert-overcomingbias-link url)
191 (convert-lw2-misc-link url)
192 (convert-arbital-link url))))
194 (defun convert-any-link (url)
195 (or (convert-any-link* url) url))
197 (defun presentable-link (link &optional context)
198 (or (and (ppcre:scan "^#" link) link)
199 (and (not (eq context :image)) (convert-any-link* link))
200 (and (not (eq context :search))
201 (let ((sanitized-link (sanitize-link link)))
202 (handler-case
203 (merge-uris
204 sanitized-link
205 (site-link-base *current-site*))
206 (error () sanitized-link))))))