More aggressive cleaning of bad files
[objavi2.git] / htdocs / objavi-async.cgi
blob922bb35fa32b64e4c5b377137f8b2334b986612d
1 #!/usr/bin/python
3 # Part of Objavi2, which turns html manuals into books
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Make a pdf from the specified book."""
22 from __future__ import with_statement
24 import os, sys
25 os.chdir('..')
26 sys.path.insert(0, os.path.abspath('.'))
28 import re, time
29 #import traceback
30 from pprint import pformat
32 from objavi.fmbook import Book, HTTP_HOST, find_archive_urls
33 from objavi import config
34 from objavi import twiki_wrapper
35 from objavi.book_utils import init_log, log, make_book_name
36 from objavi.cgi_utils import parse_args, optionise, listify, get_server_list
37 from objavi.cgi_utils import is_utf8, isfloat, isfloat_or_auto, is_isbn, is_url
38 from objavi.cgi_utils import output_blob_and_exit, output_blob_and_shut_up, output_and_exit
39 from objavi.cgi_utils import get_size_list, get_default_css, font_links
42 # ARG_VALIDATORS is a mapping between the expected cgi arguments and
43 # functions to validate their values. (None means no validation).
44 ARG_VALIDATORS = {
45 "book": re.compile(r'^([\w-]+/?)*[\w-]+$').match, # can be: BlahBlah/Blah_Blah
46 "css": is_utf8, # an url, empty (for default), or css content
47 "title": lambda x: len(x) < 999 and is_utf8(x),
48 "isbn": is_isbn,
49 "license": config.LICENSES.__contains__,
50 "server": config.SERVER_DEFAULTS.__contains__,
51 "engine": config.ENGINES.__contains__,
52 "booksize": config.PAGE_SIZE_DATA.__contains__,
53 "page_width": isfloat,
54 "page_height": isfloat,
55 "gutter": isfloat_or_auto,
56 "top_margin": isfloat_or_auto,
57 "side_margin": isfloat_or_auto,
58 "bottom_margin": isfloat_or_auto,
59 "columns": isfloat_or_auto,
60 "column_margin": isfloat_or_auto,
61 "cgi-context": lambda x: x.lower() in '1true0false',
62 "mode": config.CGI_MODES.__contains__,
63 "pdftype": lambda x: config.CGI_MODES.get(x, [False])[0], #for css mode
64 "rotate": u"yes".__eq__,
65 "grey_scale": u"yes".__eq__,
66 "destination": config.CGI_DESTINATIONS.__contains__,
67 "toc_header": is_utf8,
68 "max-age": isfloat,
69 "method": config.CGI_METHODS.__contains__,
70 "callback": is_url,
73 __doc__ += '\nValid arguments are: %s.\n' % ', '.join(ARG_VALIDATORS.keys())
76 def get_page_settings(args):
77 """Find the size and any optional layout settings.
79 args['booksize'] is either a keyword describing a size or
80 'custom'. If it is custom, the form is inspected for specific
81 dimensions -- otherwise these are ignored.
83 The margins, gutter, number of columns, and column
84 margins all set themselves automatically based on the page
85 dimensions, but they can be overridden. Any that are are
86 collected here."""
87 # get all the values including sizes first
88 # the sizes are found as 'page_width' and 'page_height',
89 # but the Book class expects them as a 'pointsize' tuple, so
90 # they are easily ignored.
91 settings = {}
92 for k, extrema in config.PAGE_EXTREMA.iteritems():
93 try:
94 v = float(args.get(k))
95 except (ValueError, TypeError):
96 #log("don't like %r as a float value for %s!" % (args.get(k), k))
97 continue
98 min_val, max_val, multiplier = extrema
99 if v < min_val or v > max_val:
100 log('rejecting %s: outside %s' % (v,) + extrema)
101 else:
102 log('found %s=%s' % (k, v))
103 settings[k] = v * multiplier #convert to points in many cases
105 # now if args['size'] is not 'custom', the width and height found
106 # above are ignored.
107 size = args.get('booksize', config.DEFAULT_SIZE)
108 settings.update(config.PAGE_SIZE_DATA[size])
110 #if args['mode'] is 'newspaper', then the number of columns is
111 #automatically determined unless set -- otherwise default is 1.
112 if args.get('mode') == 'newspaper' and settings.get('columns') is None:
113 settings['columns'] = 'auto'
115 if args.get('grey_scale'):
116 settings['grey_scale'] = True
118 if size == 'custom':
119 #will raise KeyError if width, height aren't set
120 settings['pointsize'] = (settings['page_width'], settings['page_height'])
121 del settings['page_width']
122 del settings['page_height']
124 settings['engine'] = args.get('engine', config.DEFAULT_ENGINE)
125 return settings
127 @output_and_exit
128 def mode_booklist(args):
129 #XXX need to include booki servers
130 return optionise(twiki_wrapper.get_book_list(args.get('server', config.DEFAULT_SERVER)),
131 default=args.get('book'))
133 @output_and_exit
134 def mode_css(args):
135 #XX sending as text/html, but it doesn't really matter
136 return get_default_css(args.get('server', config.DEFAULT_SERVER), args.get('pdftype', 'book'))
139 @output_and_exit
140 def mode_form(args):
141 f = open(config.FORM_TEMPLATE)
142 template = f.read()
143 f.close()
144 f = open(config.FONT_LIST_INCLUDE)
145 font_list = [x.strip() for x in f if x.strip()]
146 f.close()
147 server = args.get('server', config.DEFAULT_SERVER)
148 book = args.get('book')
149 size = args.get('booksize', config.DEFAULT_SIZE)
150 engine = args.get('engine', config.DEFAULT_ENGINE)
151 d = {
152 'server_options': optionise(get_server_list(), default=server),
153 'book_options': optionise(twiki_wrapper.get_book_list(server), default=book),
154 'size_options': optionise(get_size_list(), default=size),
155 'engines': optionise(config.ENGINES.keys(), default=engine),
156 'pdf_types': optionise(sorted(k for k, v in config.CGI_MODES.iteritems() if v[0])),
157 'css': get_default_css(server),
158 'font_links': listify(font_links()),
159 'font_list': listify(font_list),
160 'default_license' : config.DEFAULT_LICENSE,
161 'licenses' : optionise(config.LICENSES, default=config.DEFAULT_LICENSE),
162 'yes': 'yes',
163 None: '',
166 form = []
167 for id, title, type, source, classes, epilogue in config.FORM_INPUTS:
168 val = d.get(source, '')
169 e = config.FORM_ELEMENT_TYPES[type] % locals()
170 form.append('\n<div id="%(id)s_div" class="form-item %(classes)s">\n'
171 '<div class="input_title">%(title)s</div>\n'
172 '<div class="input_contents"> %(e)s %(epilogue)s\n</div>'
173 '</div>\n' % locals())
175 if True:
176 _valid_inputs = set(ARG_VALIDATORS)
177 _form_inputs = set(x[0] for x in config.FORM_INPUTS if x[2] != 'ul')
178 log("valid but not used inputs: %s" % (_valid_inputs - _form_inputs))
179 log("invalid form inputs: %s" % (_form_inputs - _valid_inputs))
181 return template % {'form': ''.join(form)}
185 class Context(object):
186 """Work out what to show the caller. The method/destination matrix:
188 [dest/method] sync async poll
189 archive.org url id id
190 download data . .
191 html html 1 . html 2
192 nowhere url id id
194 'html 1' is dripfed progress reports; 'html 2' polls via
195 javascript. 'id' is the book filename. 'url' is a full url
196 locating the file on archive.org or the objavi server. '.' means
197 unimplemented.
200 pollfile = None
201 def __init__(self, args):
202 self.bookid = args.get('book')
203 self.server = args.get('server', config.DEFAULT_SERVER)
204 self.mode = args.get('mode', 'book')
205 extension = config.CGI_MODES.get(self.mode)[1]
206 self.bookname = make_book_name(self.bookid, self.server, extension)
207 self.destination = args.get('destination', config.DEFAULT_CGI_DESTINATION)
208 self.callback = args.get('callback', None)
209 self.method = args.get('method', config.CGI_DESTINATIONS[self.destination]['default'])
210 self.template, self.mimetype = config.CGI_DESTINATIONS[self.destination][self.method]
211 if HTTP_HOST:
212 self.bookurl = "http://%s/books/%s" % (HTTP_HOST, self.bookname,)
213 else:
214 self.bookurl = "books/%s" % (self.bookname,)
216 self.details_url, self.s3url = find_archive_urls(self.bookid, self.bookname)
217 self.start()
219 def start(self):
220 """Begin (and in many cases, finish) http output.
222 In asynchronous modes, fork and close down stdout.
224 log(self.template, self.mimetype, self.destination, self.method)
225 if self.template is not None:
226 progress_list = ''.join('<li id="%s">%s</li>\n' % x[:2] for x in config.PROGRESS_POINTS
227 if self.mode in x[2])
228 d = {
229 'book': self.bookid,
230 'bookname': self.bookname,
231 'progress_list': progress_list,
232 'details_url': self.details_url,
233 's3url': self.s3url,
234 'bookurl': self.bookurl,
236 f = open(self.template)
237 content = f.read() % d
238 f.close()
239 else:
240 content = ''
242 if self.method == 'sync':
243 print 'Content-type: %s\n\n%s' %(self.mimetype, content)
244 else:
245 output_blob_and_shut_up(content, self.mimetype)
246 log(sys.stdout, sys.stderr, sys.stdin)
247 if os.fork():
248 os._exit(0)
249 sys.stdout.close()
250 sys.stdin.close()
251 log(sys.stdout, sys.stderr, sys.stdin)
254 def finish(self, book):
255 """Print any final http content."""
256 if self.destination == 'archive.org':
257 book.publish_s3()
258 elif self.destination == 'download' and self.method == 'sync':
259 f = open(book.publish_file)
260 data = f.read()
261 f.close()
262 output_blob_and_exit(data, config.CGI_MODES[self.mode][2], self.bookname)
265 def log_notifier(self, message):
266 """Send messages to the log only."""
267 log('******* got message "%s"' %message)
269 def callback_notifier(self, message):
270 """Call the callback url with each message."""
271 log('in callback_notifier')
272 pid = os.fork()
273 if pid:
274 log('child %s is doing callback with message %r' % (pid, message, ))
275 return
276 from urllib2 import urlopen, URLError
277 from urllib import urlencode
278 data = urlencode({'message': message})
279 try:
280 f = urlopen(self.callback, data)
281 time.sleep(2)
282 f.close()
283 except URLError, e:
284 #traceback.print_exc()
285 log("ERROR in callback:\n %r\n %s %s" % (e.url, e.code, e.msg))
286 os._exit(0)
288 def javascript_notifier(self, message):
289 """Print little bits of javascript which will be appended to
290 an unfinished html page."""
291 try:
292 if message.startswith('ERROR:'):
293 log('got an error! %r' % message)
294 print ('<b class="error-message">'
295 '%s\n'
296 '</b></body></html>' % message
298 else:
299 print ('<script type="text/javascript">\n'
300 'objavi_show_progress("%s");\n'
301 '</script>' % message
303 if message == config.FINISHED_MESSAGE:
304 print '</body></html>'
305 sys.stdout.flush()
306 except ValueError, e:
307 log("failed to send message %r, got exception %r" % (message, e))
309 def pollee_notifier(self, message):
310 """Append the message to a file that the remote server can poll"""
311 if self.pollfile is None or self.pollfile.closed:
312 self.pollfile = open(config.POLL_NOTIFY_PATH % self.bookname, 'a')
313 self.pollfile.write('%s\n' % message)
314 self.pollfile.flush()
315 #self.pollfile.close()
316 #if message == config.FINISHED_MESSAGE:
317 # self.pollfile.close()
319 def get_watchers(self):
320 """Based on the CGI arguments, return a likely set of notifier
321 methods."""
322 log('in get_watchers. method %r, callback %r, destination %r' %
323 (self.method, self.callback, self.destination))
324 watchers = set()
325 if self.method == 'poll':
326 watchers.add(self.pollee_notifier)
327 if self.method == 'async' and self.callback:
328 watchers.add(self.callback_notifier)
329 if self.method == 'sync' and self.destination == 'html':
330 watchers.add(self.javascript_notifier)
331 watchers.add(self.log_notifier)
332 log('watchers are %s' % watchers)
333 return watchers
338 def mode_book(args):
339 # so we're making a pdf.
340 context = Context(args)
341 page_settings = get_page_settings(args)
343 with Book(context.bookid, context.server, context.bookname,
344 page_settings=page_settings,
345 watchers=context.get_watchers(), isbn=args.get('isbn'),
346 license=args.get('license'), title=args.get('title'),
347 max_age=float(args.get('max-age', -1))) as book:
349 book.spawn_x()
351 if 'toc_header' in args:
352 book.toc_header = args['toc_header'].decode('utf-8')
353 book.load_book()
354 book.add_css(args.get('css'), context.mode)
355 book.add_section_titles()
357 if context.mode == 'book':
358 book.make_book_pdf()
359 elif context.mode in ('web', 'newspaper'):
360 book.make_simple_pdf(context.mode)
361 if "rotate" in args:
362 book.rotate180()
364 book.publish_pdf()
365 context.finish(book)
367 #These ones are similar enough to be handled by the one function
368 mode_newspaper = mode_book
369 mode_web = mode_book
372 def mode_openoffice(args):
373 """Make an openoffice document. A whole lot of the inputs have no
374 effect."""
375 context = Context(args)
376 with Book(context.bookid, context.server, context.bookname,
377 watchers=context.get_watchers(), isbn=args.get('isbn'),
378 license=args.get('license'), title=args.get('title'),
379 max_age=float(args.get('max-age', -1))) as book:
381 book.spawn_x()
382 book.load_book()
383 book.add_css(args.get('css'), 'openoffice')
384 book.add_section_titles()
385 book.make_oo_doc()
386 context.finish(book)
388 def mode_epub(args):
389 log('making epub with\n%s' % pformat(args))
390 #XXX need to catch and process lack of necessary arguments.
391 context = Context(args)
393 with Book(context.bookid, context.server, context.bookname,
394 watchers=context.get_watchers(), title=args.get('title'),
395 max_age=float(args.get('max-age', -1))) as book:
397 book.make_epub(use_cache=config.USE_CACHED_IMAGES)
398 context.finish(book)
401 def mode_bookizip(args):
402 log('making bookizip with\n%s' % pformat(args))
403 context = Context(args)
405 with Book(context.bookid, context.server, context.bookname,
406 watchers=context.get_watchers(), title=args.get('title'),
407 max_age=float(args.get('max-age', -1))) as book:
408 book.publish_bookizip()
409 context.finish(book)
412 def main():
413 args = parse_args(ARG_VALIDATORS)
414 mode = args.get('mode')
415 if mode is None and 'book' in args:
416 mode = 'book'
418 global CGI_CONTEXT
419 CGI_CONTEXT = 'SERVER_NAME' in os.environ or args.get('cgi-context', 'no').lower() in '1true'
421 if not args and not CGI_CONTEXT:
422 print __doc__
423 sys.exit()
425 output_function = globals().get('mode_%s' % mode, mode_form)
426 output_function(args)
428 if __name__ == '__main__':
429 if config.CGITB_DOMAINS and os.environ.get('REMOTE_ADDR') in config.CGITB_DOMAINS:
430 import cgitb
431 cgitb.enable()
432 init_log()
433 main()