CGI scripts chdir into objavi root so as to access all those other parts
[objavi2.git] / htdocs / objavi-async.cgi
blob3b4d29bbce12e9fcbc12eed7a60df80c620d83e9
1 #!/usr/bin/python
3 # Part of Objavi2, which turns html manuals into books
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Make a pdf from the specified book."""
22 from __future__ import with_statement
24 import os, sys
25 os.chdir('..')
26 import re, time
27 #import traceback
28 from pprint import pformat
30 from objavi.fmbook import Book, HTTP_HOST, find_archive_urls
31 from objavi import config
32 from objavi import twiki_wrapper
33 from objavi.book_utils import init_log, log, make_book_name
34 from objavi.cgi_utils import parse_args, optionise, listify, get_server_list
35 from objavi.cgi_utils import is_utf8, isfloat, isfloat_or_auto, is_isbn, is_url
36 from objavi.cgi_utils import output_blob_and_exit, output_blob_and_shut_up, output_and_exit
37 from objavi.cgi_utils import get_size_list, get_default_css, font_links
40 # ARG_VALIDATORS is a mapping between the expected cgi arguments and
41 # functions to validate their values. (None means no validation).
42 ARG_VALIDATORS = {
43 "book": re.compile(r'^([\w-]+/?)*[\w-]+$').match, # can be: BlahBlah/Blah_Blah
44 "css": is_utf8, # an url, empty (for default), or css content
45 "title": lambda x: len(x) < 999 and is_utf8(x),
46 "isbn": is_isbn,
47 "license": config.LICENSES.__contains__,
48 "server": config.SERVER_DEFAULTS.__contains__,
49 "engine": config.ENGINES.__contains__,
50 "booksize": config.PAGE_SIZE_DATA.__contains__,
51 "page_width": isfloat,
52 "page_height": isfloat,
53 "gutter": isfloat_or_auto,
54 "top_margin": isfloat_or_auto,
55 "side_margin": isfloat_or_auto,
56 "bottom_margin": isfloat_or_auto,
57 "columns": isfloat_or_auto,
58 "column_margin": isfloat_or_auto,
59 "cgi-context": lambda x: x.lower() in '1true0false',
60 "mode": config.CGI_MODES.__contains__,
61 "pdftype": lambda x: config.CGI_MODES.get(x, [False])[0], #for css mode
62 "rotate": u"yes".__eq__,
63 "grey_scale": u"yes".__eq__,
64 "destination": config.CGI_DESTINATIONS.__contains__,
65 "toc_header": is_utf8,
66 "max-age": isfloat,
67 "method": config.CGI_METHODS.__contains__,
68 "callback": is_url,
71 __doc__ += '\nValid arguments are: %s.\n' % ', '.join(ARG_VALIDATORS.keys())
74 def get_page_settings(args):
75 """Find the size and any optional layout settings.
77 args['booksize'] is either a keyword describing a size or
78 'custom'. If it is custom, the form is inspected for specific
79 dimensions -- otherwise these are ignored.
81 The margins, gutter, number of columns, and column
82 margins all set themselves automatically based on the page
83 dimensions, but they can be overridden. Any that are are
84 collected here."""
85 # get all the values including sizes first
86 # the sizes are found as 'page_width' and 'page_height',
87 # but the Book class expects them as a 'pointsize' tuple, so
88 # they are easily ignored.
89 settings = {}
90 for k, extrema in config.PAGE_EXTREMA.iteritems():
91 try:
92 v = float(args.get(k))
93 except (ValueError, TypeError):
94 #log("don't like %r as a float value for %s!" % (args.get(k), k))
95 continue
96 min_val, max_val, multiplier = extrema
97 if v < min_val or v > max_val:
98 log('rejecting %s: outside %s' % (v,) + extrema)
99 else:
100 log('found %s=%s' % (k, v))
101 settings[k] = v * multiplier #convert to points in many cases
103 # now if args['size'] is not 'custom', the width and height found
104 # above are ignored.
105 size = args.get('booksize', config.DEFAULT_SIZE)
106 settings.update(config.PAGE_SIZE_DATA[size])
108 #if args['mode'] is 'newspaper', then the number of columns is
109 #automatically determined unless set -- otherwise default is 1.
110 if args.get('mode') == 'newspaper' and settings.get('columns') is None:
111 settings['columns'] = 'auto'
113 if args.get('grey_scale'):
114 settings['grey_scale'] = True
116 if size == 'custom':
117 #will raise KeyError if width, height aren't set
118 settings['pointsize'] = (settings['page_width'], settings['page_height'])
119 del settings['page_width']
120 del settings['page_height']
122 settings['engine'] = args.get('engine', config.DEFAULT_ENGINE)
123 return settings
125 @output_and_exit
126 def mode_booklist(args):
127 #XXX need to include booki servers
128 return optionise(twiki_wrapper.get_book_list(args.get('server', config.DEFAULT_SERVER)),
129 default=args.get('book'))
131 @output_and_exit
132 def mode_css(args):
133 #XX sending as text/html, but it doesn't really matter
134 return get_default_css(args.get('server', config.DEFAULT_SERVER), args.get('pdftype', 'book'))
137 @output_and_exit
138 def mode_form(args):
139 f = open(config.FORM_TEMPLATE)
140 template = f.read()
141 f.close()
142 f = open(config.FONT_LIST_INCLUDE)
143 font_list = [x.strip() for x in f if x.strip()]
144 f.close()
145 server = args.get('server', config.DEFAULT_SERVER)
146 book = args.get('book')
147 size = args.get('booksize', config.DEFAULT_SIZE)
148 engine = args.get('engine', config.DEFAULT_ENGINE)
149 d = {
150 'server_options': optionise(get_server_list(), default=server),
151 'book_options': optionise(twiki_wrapper.get_book_list(server), default=book),
152 'size_options': optionise(get_size_list(), default=size),
153 'engines': optionise(config.ENGINES.keys(), default=engine),
154 'pdf_types': optionise(sorted(k for k, v in config.CGI_MODES.iteritems() if v[0])),
155 'css': get_default_css(server),
156 'font_links': listify(font_links()),
157 'font_list': listify(font_list),
158 'default_license' : config.DEFAULT_LICENSE,
159 'licenses' : optionise(config.LICENSES, default=config.DEFAULT_LICENSE),
160 'yes': 'yes',
161 None: '',
164 form = []
165 for id, title, type, source, classes, epilogue in config.FORM_INPUTS:
166 val = d.get(source, '')
167 e = config.FORM_ELEMENT_TYPES[type] % locals()
168 form.append('\n<div id="%(id)s_div" class="form-item %(classes)s">\n'
169 '<div class="input_title">%(title)s</div>\n'
170 '<div class="input_contents"> %(e)s %(epilogue)s\n</div>'
171 '</div>\n' % locals())
173 if True:
174 _valid_inputs = set(ARG_VALIDATORS)
175 _form_inputs = set(x[0] for x in config.FORM_INPUTS if x[2] != 'ul')
176 log("valid but not used inputs: %s" % (_valid_inputs - _form_inputs))
177 log("invalid form inputs: %s" % (_form_inputs - _valid_inputs))
179 return template % {'form': ''.join(form)}
183 class Context(object):
184 """Work out what to show the caller. The method/destination matrix:
186 [dest/method] sync async poll
187 archive.org url id id
188 download data . .
189 html html 1 . html 2
190 nowhere url id id
192 'html 1' is dripfed progress reports; 'html 2' polls via
193 javascript. 'id' is the book filename. 'url' is a full url
194 locating the file on archive.org or the objavi server. '.' means
195 unimplemented.
198 pollfile = None
199 def __init__(self, args):
200 self.bookid = args.get('book')
201 self.server = args.get('server', config.DEFAULT_SERVER)
202 self.mode = args.get('mode', 'book')
203 extension = config.CGI_MODES.get(self.mode)[1]
204 self.bookname = make_book_name(self.bookid, self.server, extension)
205 self.destination = args.get('destination', config.DEFAULT_CGI_DESTINATION)
206 self.callback = args.get('callback', None)
207 self.method = args.get('method', config.CGI_DESTINATIONS[self.destination]['default'])
208 self.template, self.mimetype = config.CGI_DESTINATIONS[self.destination][self.method]
209 if HTTP_HOST:
210 self.bookurl = "http://%s/books/%s" % (HTTP_HOST, self.bookname,)
211 else:
212 self.bookurl = "books/%s" % (self.bookname,)
214 self.details_url, self.s3url = find_archive_urls(self.bookid, self.bookname)
215 self.start()
217 def start(self):
218 """Begin (and in many cases, finish) http output.
220 In asynchronous modes, fork and close down stdout.
222 log(self.template, self.mimetype, self.destination, self.method)
223 if self.template is not None:
224 progress_list = ''.join('<li id="%s">%s</li>\n' % x[:2] for x in config.PROGRESS_POINTS
225 if self.mode in x[2])
226 d = {
227 'book': self.bookid,
228 'bookname': self.bookname,
229 'progress_list': progress_list,
230 'details_url': self.details_url,
231 's3url': self.s3url,
232 'bookurl': self.bookurl,
234 f = open(self.template)
235 content = f.read() % d
236 f.close()
237 else:
238 content = ''
240 if self.method == 'sync':
241 print 'Content-type: %s\n\n%s' %(self.mimetype, content)
242 else:
243 output_blob_and_shut_up(content, self.mimetype)
244 log(sys.stdout, sys.stderr, sys.stdin)
245 if os.fork():
246 os._exit(0)
247 sys.stdout.close()
248 sys.stdin.close()
249 log(sys.stdout, sys.stderr, sys.stdin)
252 def finish(self, book):
253 """Print any final http content."""
254 if self.destination == 'archive.org':
255 book.publish_s3()
256 elif self.destination == 'download' and self.method == 'sync':
257 f = open(book.publish_file)
258 data = f.read()
259 f.close()
260 output_blob_and_exit(data, config.CGI_MODES[self.mode][2], self.bookname)
263 def log_notifier(self, message):
264 """Send messages to the log only."""
265 log('******* got message "%s"' %message)
267 def callback_notifier(self, message):
268 """Call the callback url with each message."""
269 log('in callback_notifier')
270 pid = os.fork()
271 if pid:
272 log('child %s is doing callback with message %r' % (pid, message, ))
273 return
274 from urllib2 import urlopen, URLError
275 from urllib import urlencode
276 data = urlencode({'message': message})
277 try:
278 f = urlopen(self.callback, data)
279 time.sleep(2)
280 f.close()
281 except URLError, e:
282 #traceback.print_exc()
283 log("ERROR in callback:\n %r\n %s %s" % (e.url, e.code, e.msg))
284 os._exit(0)
286 def javascript_notifier(self, message):
287 """Print little bits of javascript which will be appended to
288 an unfinished html page."""
289 try:
290 if message.startswith('ERROR:'):
291 log('got an error! %r' % message)
292 print ('<b class="error-message">'
293 '%s\n'
294 '</b></body></html>' % message
296 else:
297 print ('<script type="text/javascript">\n'
298 'objavi_show_progress("%s");\n'
299 '</script>' % message
301 if message == config.FINISHED_MESSAGE:
302 print '</body></html>'
303 sys.stdout.flush()
304 except ValueError, e:
305 log("failed to send message %r, got exception %r" % (message, e))
307 def pollee_notifier(self, message):
308 """Append the message to a file that the remote server can poll"""
309 if self.pollfile is None or self.pollfile.closed:
310 self.pollfile = open(config.POLL_NOTIFY_PATH % self.bookname, 'a')
311 self.pollfile.write('%s\n' % message)
312 self.pollfile.flush()
313 #self.pollfile.close()
314 #if message == config.FINISHED_MESSAGE:
315 # self.pollfile.close()
317 def get_watchers(self):
318 """Based on the CGI arguments, return a likely set of notifier
319 methods."""
320 log('in get_watchers. method %r, callback %r, destination %r' %
321 (self.method, self.callback, self.destination))
322 watchers = set()
323 if self.method == 'poll':
324 watchers.add(self.pollee_notifier)
325 if self.method == 'async' and self.callback:
326 watchers.add(self.callback_notifier)
327 if self.method == 'sync' and self.destination == 'html':
328 watchers.add(self.javascript_notifier)
329 watchers.add(self.log_notifier)
330 log('watchers are %s' % watchers)
331 return watchers
336 def mode_book(args):
337 # so we're making a pdf.
338 context = Context(args)
339 page_settings = get_page_settings(args)
341 with Book(context.bookid, context.server, context.bookname,
342 page_settings=page_settings,
343 watchers=context.get_watchers(), isbn=args.get('isbn'),
344 license=args.get('license'), title=args.get('title'),
345 max_age=float(args.get('max-age', -1))) as book:
347 book.spawn_x()
349 if 'toc_header' in args:
350 book.toc_header = args['toc_header'].decode('utf-8')
351 book.load_book()
352 book.add_css(args.get('css'), context.mode)
353 book.add_section_titles()
355 if context.mode == 'book':
356 book.make_book_pdf()
357 elif context.mode in ('web', 'newspaper'):
358 book.make_simple_pdf(context.mode)
359 if "rotate" in args:
360 book.rotate180()
362 book.publish_pdf()
363 context.finish(book)
365 #These ones are similar enough to be handled by the one function
366 mode_newspaper = mode_book
367 mode_web = mode_book
370 def mode_openoffice(args):
371 """Make an openoffice document. A whole lot of the inputs have no
372 effect."""
373 context = Context(args)
374 with Book(context.bookid, context.server, context.bookname,
375 watchers=context.get_watchers(), isbn=args.get('isbn'),
376 license=args.get('license'), title=args.get('title'),
377 max_age=float(args.get('max-age', -1))) as book:
379 book.spawn_x()
380 book.load_book()
381 book.add_css(args.get('css'), 'openoffice')
382 book.add_section_titles()
383 book.make_oo_doc()
384 context.finish(book)
386 def mode_epub(args):
387 log('making epub with\n%s' % pformat(args))
388 #XXX need to catch and process lack of necessary arguments.
389 context = Context(args)
391 with Book(context.bookid, context.server, context.bookname,
392 watchers=context.get_watchers(), title=args.get('title'),
393 max_age=float(args.get('max-age', -1))) as book:
395 book.make_epub(use_cache=config.USE_CACHED_IMAGES)
396 context.finish(book)
399 def mode_bookizip(args):
400 log('making bookizip with\n%s' % pformat(args))
401 context = Context(args)
403 with Book(context.bookid, context.server, context.bookname,
404 watchers=context.get_watchers(), title=args.get('title'),
405 max_age=float(args.get('max-age', -1))) as book:
406 book.publish_bookizip()
407 context.finish(book)
410 def main():
411 args = parse_args(ARG_VALIDATORS)
412 mode = args.get('mode')
413 if mode is None and 'book' in args:
414 mode = 'book'
416 global CGI_CONTEXT
417 CGI_CONTEXT = 'SERVER_NAME' in os.environ or args.get('cgi-context', 'no').lower() in '1true'
419 if not args and not CGI_CONTEXT:
420 print __doc__
421 sys.exit()
423 output_function = globals().get('mode_%s' % mode, mode_form)
424 output_function(args)
426 if __name__ == '__main__':
427 if config.CGITB_DOMAINS and os.environ.get('REMOTE_ADDR') in config.CGITB_DOMAINS:
428 import cgitb
429 cgitb.enable()
430 init_log()
431 main()