3 # Part of the Objavi2 package. This script imports e-books into Booki
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 from urllib2
import urlopen
, URLError
24 from urllib
import urlencode
, unquote
25 from urlparse
import urlsplit
26 import traceback
, tempfile
27 from subprocess
import check_call
, CalledProcessError
29 from objavi
import epub
30 from objavi
.book_utils
import log
31 from objavi
.cgi_utils
import output_blob_and_exit
, parse_args
, print_template_and_exit
, output_blob_and_shut_up
32 from objavi
.cgi_utils
import is_utf8
, is_url
, super_bleach
33 from objavi
import config
35 IA_EPUB_URL
= "http://www.archive.org/download/%s/%s.epub"
37 def print_form_and_exit(booklink
):
38 print_template_and_exit('templates/espri.html',
39 {'booklink': booklink
, }
42 def async_start(content
, mimetype
):
43 """Begin (and in many cases, finish) http output.
44 In asynchronous modes, fork and close down stdout.
46 output_blob_and_shut_up(content
, mimetype
)
47 log(sys
.stdout
, sys
.stderr
, sys
.stdin
)
52 #log(sys.stdout, sys.stderr, sys.stdin)
55 def async_callback(callback_url
, **kwargs
):
56 """Call the callback url with each message."""
59 log('child %s is doing callback with message %r' % (pid
, kwargs
, ))
61 data
= urlencode(kwargs
)
63 f
= urlopen(callback_url
, data
)
68 log("ERROR in callback:\n %r\n %s %s" % (e
.url
, e
.code
, e
.msg
))
72 def espri(epuburl
, zipurl
):
82 e
.make_bookizip(zipurl
)
84 def ia_espri(book_id
):
85 epuburl
= IA_EPUB_URL
% (book_id
, book_id
)
87 zipurl
= '%s/%s.zip' % (config
.BOOKI_BOOK_DIR
, book_id
)
88 espri(epuburl
, zipurl
)
91 def inet_espri(epuburl
):
92 tainted_name
= unquote(os
.path
.basename(urlsplit(epuburl
).path
))
93 filename
= super_bleach(tainted_name
)
94 if filename
.lower().endswith('-epub'):
95 filename
= filename
[:-5]
96 zipurl
= '%s/%s-%s.zip' % (config
.BOOKI_BOOK_DIR
, filename
, time
.strftime('%F_%T'))
97 espri(epuburl
, zipurl
)
101 TIMEOUT_CMD
= 'timeout'
102 WIKIBOOKS_TIMEOUT
= '600'
103 WIKIBOOKS_CMD
= 'wikibooks2epub'
104 WIKIBOOKS_CACHE
= 'cache/wikibooks'
106 class TimeoutError(Exception):
109 def wikibooks_espri(wiki_url
):
110 """Wikibooks import using the wikibooks2epub script by Jan Gerber
111 to first convert the wikibook to an epub, which can then be turned
112 into a bookizip via the espri function.
114 os
.environ
['oxCACHE'] = WIKIBOOKS_CACHE
115 tainted_name
= unquote(os
.path
.basename(urlsplit(wiki_url
).path
))
116 filename
= "%s-%s" % (super_bleach(tainted_name
),
117 time
.strftime('%Y.%m.%d-%H.%M.%S'))
118 workdir
= tempfile
.mkdtemp(prefix
=filename
, dir=config
.TMPDIR
)
119 os
.chmod(workdir
, 0755)
120 epub_file
= os
.path
.join(workdir
, filename
+ '.epub')
121 epub_url
= 'file://' + os
.path
.abspath(epub_file
)
122 #epub_url = 'http://localhost/' + epub_file
124 #the wikibooks importer is a separate process, so run that, then collect the epub.
125 cmd
= [TIMEOUT_CMD
, WIKIBOOKS_TIMEOUT
,
134 except CalledProcessError
, e
:
135 if e
.returncode
== 137:
136 raise TimeoutError('Wikibooks took too long (over %s seconds)' % WIKIBOOKS_TIMEOUT
)
139 zipurl
= '%s/%s.zip' % (config
.BOOKI_BOOK_DIR
, filename
)
140 espri(epub_url
, zipurl
)
147 'archive.org': {'function': ia_espri
},
148 'url': {'function': inet_espri
},
149 'wikibooks': {'function': wikibooks_espri
},
152 "source": SOURCES
.__contains
__,
154 "url": is_url
, #obsolete
155 'mode': ('zip', 'html', 'callback').__contains
__,
159 def ensure_backwards_compatibility(args
):
160 """Mutate args to match previous API"""
162 args
['source'] = 'url'
163 args
['book'] = args
['url']
164 if 'source' not in args
:
165 args
['source'] = 'archive.org'
166 if 'callback' in args
and 'mode' not in args
:
167 args
['mode'] = 'callback'
170 if __name__
== '__main__':
171 args
= parse_args(ARG_VALIDATORS
)
172 ensure_backwards_compatibility(args
)
173 mode
= args
.get('mode', 'html')
174 book
= args
.get('book')
175 source
= args
.get('source', 'archive.org')
176 source_fn
= SOURCES
.get(source
)['function']
178 if mode
== 'callback':
179 callback_url
= args
['callback']
180 async_start('OK, got it... will call %r when done' % (callback_url
,),
185 url
= source_fn(book
)
186 book_link
= '<p>Download <a href="%s">%s</a>.</p>' % (url
, url
)
188 traceback
.print_exc()
190 book_link
= '<p>Error: <b>%s</b> when trying to get <b>%s</b></p>' % (e
, book
)
196 if mode
== 'callback':
197 async_callback(callback_url
, url
=url
)
199 elif mode
== 'zip' and url
is not None:
203 output_blob_and_exit(data
, config
.BOOKIZIP_MIMETYPE
,
204 os
.path
.basename(url
))
207 print_form_and_exit(book_link
)