incorporate wikibooks calls into espri
[objavi2.git] / espri.cgi
blob46ded7ed7cf546538798b9b2b02c9263b45df805
1 #!/usr/bin/python
3 # Part of the Objavi2 package. This script imports e-books into Booki
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 import os, sys
22 import time
23 from urllib2 import urlopen, URLError
24 from urllib import urlencode, unquote
25 from urlparse import urlsplit
26 import traceback, tempfile
27 from subprocess import check_call, CalledProcessError
29 from objavi import epub
30 from objavi.book_utils import log
31 from objavi.cgi_utils import output_blob_and_exit, parse_args, print_template_and_exit, output_blob_and_shut_up
32 from objavi.cgi_utils import is_utf8, is_url, super_bleach
33 from objavi import config
35 IA_EPUB_URL = "http://www.archive.org/download/%s/%s.epub"
37 def print_form_and_exit(booklink):
38 print_template_and_exit('templates/espri.html',
39 {'booklink': booklink, }
42 def async_start(content, mimetype):
43 """Begin (and in many cases, finish) http output.
44 In asynchronous modes, fork and close down stdout.
45 """
46 output_blob_and_shut_up(content, mimetype)
47 log(sys.stdout, sys.stderr, sys.stdin)
48 if os.fork():
49 os._exit(0)
50 sys.stdout.close()
51 sys.stdin.close()
52 #log(sys.stdout, sys.stderr, sys.stdin)
55 def async_callback(callback_url, **kwargs):
56 """Call the callback url with each message."""
57 pid = os.fork()
58 if pid:
59 log('child %s is doing callback with message %r' % (pid, kwargs, ))
60 return
61 data = urlencode(kwargs)
62 try:
63 f = urlopen(callback_url, data)
64 time.sleep(2)
65 f.close()
66 except URLError, e:
67 traceback.print_exc()
68 log("ERROR in callback:\n %r\n %s %s" % (e.url, e.code, e.msg))
69 os._exit(0)
72 def espri(epuburl, zipurl):
73 log(epuburl, zipurl)
74 f = urlopen(epuburl)
75 s = f.read()
76 f.close()
77 e = epub.Epub()
78 e.load(s)
79 e.parse_meta()
80 e.parse_opf()
81 e.parse_ncx()
82 e.make_bookizip(zipurl)
84 def ia_espri(book_id):
85 epuburl = IA_EPUB_URL % (book_id, book_id)
86 log(epuburl)
87 zipurl = '%s/%s.zip' % (config.BOOKI_BOOK_DIR, book_id)
88 espri(epuburl, zipurl)
89 return zipurl
91 def inet_espri(epuburl):
92 tainted_name = unquote(os.path.basename(urlsplit(epuburl).path))
93 filename = super_bleach(tainted_name)
94 if filename.lower().endswith('-epub'):
95 filename = filename[:-5]
96 zipurl = '%s/%s-%s.zip' % (config.BOOKI_BOOK_DIR, filename, time.strftime('%F_%T'))
97 espri(epuburl, zipurl)
98 return zipurl
101 TIMEOUT_CMD = 'timeout'
102 WIKIBOOKS_TIMEOUT = '600'
103 WIKIBOOKS_CMD = 'wikibooks2epub'
104 WIKIBOOKS_CACHE = 'cache/wikibooks'
106 class TimeoutError(Exception):
107 pass
109 def wikibooks_espri(wiki_url):
110 """Wikibooks import using the wikibooks2epub script by Jan Gerber
111 to first convert the wikibook to an epub, which can then be turned
112 into a bookizip via the espri function.
114 os.environ['oxCACHE'] = WIKIBOOKS_CACHE
115 tainted_name = unquote(os.path.basename(urlsplit(wiki_url).path))
116 filename = "%s-%s" % (super_bleach(tainted_name),
117 time.strftime('%Y.%m.%d-%H.%M.%S'))
118 workdir = tempfile.mkdtemp(prefix=filename, dir=config.TMPDIR)
119 os.chmod(workdir, 0755)
120 epub_file = os.path.join(workdir, filename + '.epub')
121 epub_url = 'file://' + os.path.abspath(epub_file)
122 #epub_url = 'http://localhost/' + epub_file
124 #the wikibooks importer is a separate process, so run that, then collect the epub.
125 cmd = [TIMEOUT_CMD, WIKIBOOKS_TIMEOUT,
126 WIKIBOOKS_CMD,
127 '-i', wiki_url,
128 '-o', epub_file
130 log(cmd)
132 try:
133 check_call(cmd)
134 except CalledProcessError, e:
135 if e.returncode == 137:
136 raise TimeoutError('Wikibooks took too long (over %s seconds)' % WIKIBOOKS_TIMEOUT)
137 raise
139 zipurl = '%s/%s.zip' % (config.BOOKI_BOOK_DIR, filename)
140 espri(epub_url, zipurl)
141 return zipurl
146 SOURCES = {
147 'archive.org': {'function': ia_espri},
148 'url': {'function': inet_espri},
149 'wikibooks': {'function': wikibooks_espri},
151 ARG_VALIDATORS = {
152 "source": SOURCES.__contains__,
153 "book": is_utf8,
154 "url": is_url, #obsolete
155 'mode': ('zip', 'html', 'callback').__contains__,
156 'callback': is_url,
159 def ensure_backwards_compatibility(args):
160 """Mutate args to match previous API"""
161 if 'url' in args:
162 args['source'] = 'url'
163 args['book'] = args['url']
164 if 'source' not in args:
165 args['source'] = 'archive.org'
166 if 'callback' in args and 'mode' not in args:
167 args['mode'] = 'callback'
170 if __name__ == '__main__':
171 args = parse_args(ARG_VALIDATORS)
172 ensure_backwards_compatibility(args)
173 mode = args.get('mode', 'html')
174 book = args.get('book')
175 source = args.get('source', 'archive.org')
176 source_fn = SOURCES.get(source)['function']
178 if mode == 'callback':
179 callback_url = args['callback']
180 async_start('OK, got it... will call %r when done' % (callback_url,),
181 'text/plain')
182 url = None
183 if book is not None:
184 try:
185 url = source_fn(book)
186 book_link = '<p>Download <a href="%s">%s</a>.</p>' % (url, url)
187 except Exception, e:
188 traceback.print_exc()
189 log(e, args)
190 book_link = '<p>Error: <b>%s</b> when trying to get <b>%s</b></p>' % (e, book)
191 if mode != 'html':
192 raise
193 else:
194 book_link = ''
196 if mode == 'callback':
197 async_callback(callback_url, url=url)
199 elif mode == 'zip' and url is not None:
200 f = open(url)
201 data = f.read()
202 f.close()
203 output_blob_and_exit(data, config.BOOKIZIP_MIMETYPE,
204 os.path.basename(url))
205 else:
206 log(book_link)
207 print_form_and_exit(book_link)
208 log('done!')