1 (module reddit mzscheme
2 (require (lib "url.ss" "net")
5 (only (lib "string.ss") regexp-split)
6 (only (lib "13.ss" "srfi") string-contains string-join)
7 (planet "sxml.ss" ("lizorkin" "sxml.plt" 1 4))
8 (planet "ssax.ss" ("lizorkin" "ssax.plt" 1 3))
9 (planet "htmlprag.ss" ("neil" "htmlprag.plt" 1 3)))
13 (require "json/json.ss")
15 (define (subreddit-to-url subreddit)
17 (format "http://~a.reddit.com/" subreddit)
18 "http://reddit.com/"))
21 (let ((r-strip (regexp "^[ \t\r\n]*(.*?)[ \t\r\n]*$")))
23 (cadr (regexp-match r-strip s)))))
25 (define (url->sxml url)
26 (html->sxml (get-pure-port (string->url url))))
28 (define (is-url? string)
29 (string-contains string "http://"))
31 ; Guess time duration from human readable string
32 ; > (guess-time "7 Days 21 hours")
34 (define (guess-time human-readable)
36 (list "([0-9]+)+ Days ([0-9]+)+ hours"
37 (lambda (e) (+ (* 24 60 60 (e 1))
40 (lambda (e) (* 60 60 (e 1)))
42 (lambda (e) (* 24 60 60 (e 1)))
44 (lambda (e) (* 24 60 60)))))
45 (let ((m (regexp-match (car patterns) human-readable))
46 (ext (cadr patterns)))
48 (ext (lambda (i) (string->number (list-ref m i))))
49 (loop (cddr patterns))))))
51 ; (join-url "http://reddit.com/" "/info/foo")
52 ; ==> "http://reddit.com/info/foo"
53 (define (join-url domain path)
54 (string-append (substring domain 0 (- (string-length domain) 1))
57 ; (conver-back-url "http://foo.com/?a=1&b=2&c=3")
58 ; ==> "http://foo.com/?a=1&b=3&c=3
59 (define (convert-back-url quoted-url)
60 (regexp-replace* "&" quoted-url "\\&"))
62 (define-memoize (get-hot-links subreddit)
63 (let ((url (subreddit-to-url subreddit)))
65 ((sxpath "//table[@id='siteTable']/tr[contains(@id, 'thing')]") (url->sxml url))))
66 (if (null? tr-list) '()
67 (let* ((itable ((sxpath "//table/tr") (car tr-list)))
68 (rank* ((sxpath "//td[@class='numbercol']/text()") (car tr-list)))
71 (link* ((sxpath "//td[@colspan = 3]/a/@href/text()") row1))
72 (title* ((sxpath "//td[@colspan = 3]/a/text()") row1))
73 (c-href* ((sxpath "//td[@colspan = 3]/span/a[@class='bylink']/@href/text()") row2))
74 (score* ((sxpath "//td[@colspan = 3]/span[contains(@id, 'score')]/text()") row2))
75 (comments* ((sxpath "//td[@colspan = 3]/span/a[@class='bylink']/text()") row2))
76 (age* ((sxpath "//td[@colspan = 3]/text()") row2))
77 (user* ((sxpath "//td[@colspan = 3]/a[contains(@href, 'user')]/text()") row2))
78 (top* ((sxpath "//td[@rowspan = 3]/span/@class/text()") row1))
79 (rlink (make-hash-table)))
80 ;(display (format "row2: ~a" row2))
81 (map (lambda (args) (apply hash-table-put! (cons rlink args)))
82 `((href ,(convert-back-url (car link*)))
83 (title ,(string-strip (car title*)))
84 (score ,(if (null? score*) #f ; link submitted < 1 hr ?
86 (car (regexp-match "[0-9]+"
88 (comments ,(string->number
89 (car (or (regexp-match "[0-9]+"
92 (comments-href ,(convert-back-url
93 (if (is-url? (car c-href*))
95 (join-url url (car c-href*)))))
96 (age ,(let ((m (regexp-match "posted (.+) ago"
101 (user ,(string-strip (string-join user*)))
102 (top ,(not (null? top*)))
103 (rank ,(string-strip (string-join rank*)))))
105 (loop (cdr tr-list))))))))
112 (display "Go for /api/hot?sub=science")))
115 (let ((GET (get-bindings/get)))
121 (regexp-split "," (extract-binding/single "sub" GET)))
123 (json-write (if (= (length sub-reddits) 1)
124 (get-hot-links (car sub-reddits))
125 (map get-hot-links sub-reddits))))))))))
127 (provide run guess-time get-hot-links))