remove unused functions from cgi_utils
[objavi2.git] / objavi-async.cgi
blobb4c32072b3f2dd4506b1ad31dcaad6b953cc2ec6
1 #!/usr/bin/python
3 # Part of Objavi2, which turns html manuals into books
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Make a pdf from the specified book."""
22 from __future__ import with_statement
24 import os, sys
25 import re, time
26 #import traceback
27 from pprint import pformat
29 from objavi.fmbook import Book, HTTP_HOST, find_archive_urls
30 from objavi import config
31 from objavi import twiki_wrapper
32 from objavi.book_utils import init_log, log, make_book_name
33 from objavi.cgi_utils import parse_args, optionise, listify, get_server_list
34 from objavi.cgi_utils import is_utf8, isfloat, isfloat_or_auto, is_isbn, is_url
35 from objavi.cgi_utils import output_blob_and_exit, output_blob_and_shut_up, output_and_exit
36 from objavi.cgi_utils import get_size_list, get_default_css, font_links
39 # ARG_VALIDATORS is a mapping between the expected cgi arguments and
40 # functions to validate their values. (None means no validation).
41 ARG_VALIDATORS = {
42 "book": re.compile(r'^([\w-]+/?)*[\w-]+$').match, # can be: BlahBlah/Blah_Blah
43 "css": is_utf8, # an url, empty (for default), or css content
44 "title": lambda x: len(x) < 999 and is_utf8(x),
45 "isbn": is_isbn,
46 "license": config.LICENSES.__contains__,
47 "server": config.SERVER_DEFAULTS.__contains__,
48 "engine": config.ENGINES.__contains__,
49 "booksize": config.PAGE_SIZE_DATA.__contains__,
50 "page_width": isfloat,
51 "page_height": isfloat,
52 "gutter": isfloat_or_auto,
53 "top_margin": isfloat_or_auto,
54 "side_margin": isfloat_or_auto,
55 "bottom_margin": isfloat_or_auto,
56 "columns": isfloat_or_auto,
57 "column_margin": isfloat_or_auto,
58 "cgi-context": lambda x: x.lower() in '1true0false',
59 "mode": config.CGI_MODES.__contains__,
60 "pdftype": lambda x: config.CGI_MODES.get(x, [False])[0], #for css mode
61 "rotate": u"yes".__eq__,
62 "grey_scale": u"yes".__eq__,
63 "destination": config.CGI_DESTINATIONS.__contains__,
64 "toc_header": is_utf8,
65 "max-age": isfloat,
66 "method": config.CGI_METHODS.__contains__,
67 "callback": is_url,
70 __doc__ += '\nValid arguments are: %s.\n' % ', '.join(ARG_VALIDATORS.keys())
73 def get_page_settings(args):
74 """Find the size and any optional layout settings.
76 args['booksize'] is either a keyword describing a size or
77 'custom'. If it is custom, the form is inspected for specific
78 dimensions -- otherwise these are ignored.
80 The margins, gutter, number of columns, and column
81 margins all set themselves automatically based on the page
82 dimensions, but they can be overridden. Any that are are
83 collected here."""
84 # get all the values including sizes first
85 # the sizes are found as 'page_width' and 'page_height',
86 # but the Book class expects them as a 'pointsize' tuple, so
87 # they are easily ignored.
88 settings = {}
89 for k, extrema in config.PAGE_EXTREMA.iteritems():
90 try:
91 v = float(args.get(k))
92 except (ValueError, TypeError):
93 #log("don't like %r as a float value for %s!" % (args.get(k), k))
94 continue
95 min_val, max_val, multiplier = extrema
96 if v < min_val or v > max_val:
97 log('rejecting %s: outside %s' % (v,) + extrema)
98 else:
99 log('found %s=%s' % (k, v))
100 settings[k] = v * multiplier #convert to points in many cases
102 # now if args['size'] is not 'custom', the width and height found
103 # above are ignored.
104 size = args.get('booksize', config.DEFAULT_SIZE)
105 settings.update(config.PAGE_SIZE_DATA[size])
107 #if args['mode'] is 'newspaper', then the number of columns is
108 #automatically determined unless set -- otherwise default is 1.
109 if args.get('mode') == 'newspaper' and settings.get('columns') is None:
110 settings['columns'] = 'auto'
112 if args.get('grey_scale'):
113 settings['grey_scale'] = True
115 if size == 'custom':
116 #will raise KeyError if width, height aren't set
117 settings['pointsize'] = (settings['page_width'], settings['page_height'])
118 del settings['page_width']
119 del settings['page_height']
121 settings['engine'] = args.get('engine', config.DEFAULT_ENGINE)
122 return settings
124 @output_and_exit
125 def mode_booklist(args):
126 #XXX need to include booki servers
127 return optionise(twiki_wrapper.get_book_list(args.get('server', config.DEFAULT_SERVER)),
128 default=args.get('book'))
130 @output_and_exit
131 def mode_css(args):
132 #XX sending as text/html, but it doesn't really matter
133 return get_default_css(args.get('server', config.DEFAULT_SERVER), args.get('pdftype', 'book'))
136 @output_and_exit
137 def mode_form(args):
138 f = open(config.FORM_TEMPLATE)
139 template = f.read()
140 f.close()
141 f = open(config.FONT_LIST_INCLUDE)
142 font_list = [x.strip() for x in f if x.strip()]
143 f.close()
144 server = args.get('server', config.DEFAULT_SERVER)
145 book = args.get('book')
146 size = args.get('booksize', config.DEFAULT_SIZE)
147 engine = args.get('engine', config.DEFAULT_ENGINE)
148 d = {
149 'server_options': optionise(get_server_list(), default=server),
150 'book_options': optionise(twiki_wrapper.get_book_list(server), default=book),
151 'size_options': optionise(get_size_list(), default=size),
152 'engines': optionise(config.ENGINES.keys(), default=engine),
153 'pdf_types': optionise(sorted(k for k, v in config.CGI_MODES.iteritems() if v[0])),
154 'css': get_default_css(server),
155 'font_links': listify(font_links()),
156 'font_list': listify(font_list),
157 'default_license' : config.DEFAULT_LICENSE,
158 'licenses' : optionise(config.LICENSES, default=config.DEFAULT_LICENSE),
159 'yes': 'yes',
160 None: '',
163 form = []
164 for id, title, type, source, classes, epilogue in config.FORM_INPUTS:
165 val = d.get(source, '')
166 e = config.FORM_ELEMENT_TYPES[type] % locals()
167 form.append('\n<div id="%(id)s_div" class="form-item %(classes)s">\n'
168 '<div class="input_title">%(title)s</div>\n'
169 '<div class="input_contents"> %(e)s %(epilogue)s\n</div>'
170 '</div>\n' % locals())
172 if True:
173 _valid_inputs = set(ARG_VALIDATORS)
174 _form_inputs = set(x[0] for x in config.FORM_INPUTS if x[2] != 'ul')
175 log("valid but not used inputs: %s" % (_valid_inputs - _form_inputs))
176 log("invalid form inputs: %s" % (_form_inputs - _valid_inputs))
178 return template % {'form': ''.join(form)}
182 class Context(object):
183 """Work out what to show the caller. The method/destination matrix:
185 [dest/method] sync async poll
186 archive.org url id id
187 download data . .
188 html html 1 . html 2
189 nowhere url id id
191 'html 1' is dripfed progress reports; 'html 2' polls via
192 javascript. 'id' is the book filename. 'url' is a full url
193 locating the file on archive.org or the objavi server. '.' means
194 unimplemented.
197 pollfile = None
198 def __init__(self, args):
199 self.bookid = args.get('book')
200 self.server = args.get('server', config.DEFAULT_SERVER)
201 self.mode = args.get('mode', 'book')
202 extension = config.CGI_MODES.get(self.mode)[1]
203 self.bookname = make_book_name(self.bookid, self.server, extension)
204 self.destination = args.get('destination', config.DEFAULT_CGI_DESTINATION)
205 self.callback = args.get('callback', None)
206 self.method = args.get('method', config.CGI_DESTINATIONS[self.destination]['default'])
207 self.template, self.mimetype = config.CGI_DESTINATIONS[self.destination][self.method]
208 if HTTP_HOST:
209 self.bookurl = "http://%s/books/%s" % (HTTP_HOST, self.bookname,)
210 else:
211 self.bookurl = "books/%s" % (self.bookname,)
213 self.details_url, self.s3url = find_archive_urls(self.bookid, self.bookname)
214 self.start()
216 def start(self):
217 """Begin (and in many cases, finish) http output.
219 In asynchronous modes, fork and close down stdout.
221 log(self.template, self.mimetype, self.destination, self.method)
222 if self.template is not None:
223 progress_list = ''.join('<li id="%s">%s</li>\n' % x[:2] for x in config.PROGRESS_POINTS
224 if self.mode in x[2])
225 d = {
226 'book': self.bookid,
227 'bookname': self.bookname,
228 'progress_list': progress_list,
229 'details_url': self.details_url,
230 's3url': self.s3url,
231 'bookurl': self.bookurl,
233 f = open(self.template)
234 content = f.read() % d
235 f.close()
236 else:
237 content = ''
239 if self.method == 'sync':
240 print 'Content-type: %s\n\n%s' %(self.mimetype, content)
241 else:
242 output_blob_and_shut_up(content, self.mimetype)
243 log(sys.stdout, sys.stderr, sys.stdin)
244 if os.fork():
245 os._exit(0)
246 sys.stdout.close()
247 sys.stdin.close()
248 log(sys.stdout, sys.stderr, sys.stdin)
251 def finish(self, book):
252 """Print any final http content."""
253 if self.destination == 'archive.org':
254 book.publish_s3()
255 elif self.destination == 'download' and self.method == 'sync':
256 f = open(book.publish_file)
257 data = f.read()
258 f.close()
259 output_blob_and_exit(data, config.CGI_MODES[self.mode][2], self.bookname)
262 def log_notifier(self, message):
263 """Send messages to the log only."""
264 log('******* got message "%s"' %message)
266 def callback_notifier(self, message):
267 """Call the callback url with each message."""
268 log('in callback_notifier')
269 pid = os.fork()
270 if pid:
271 log('child %s is doing callback with message %r' % (pid, message, ))
272 return
273 from urllib2 import urlopen, URLError
274 from urllib import urlencode
275 data = urlencode({'message': message})
276 try:
277 f = urlopen(self.callback, data)
278 time.sleep(2)
279 f.close()
280 except URLError, e:
281 #traceback.print_exc()
282 log("ERROR in callback:\n %r\n %s %s" % (e.url, e.code, e.msg))
283 os._exit(0)
285 def javascript_notifier(self, message):
286 """Print little bits of javascript which will be appended to
287 an unfinished html page."""
288 try:
289 if message.startswith('ERROR:'):
290 log('got an error! %r' % message)
291 print ('<b class="error-message">'
292 '%s\n'
293 '</b></body></html>' % message
295 else:
296 print ('<script type="text/javascript">\n'
297 'objavi_show_progress("%s");\n'
298 '</script>' % message
300 if message == config.FINISHED_MESSAGE:
301 print '</body></html>'
302 sys.stdout.flush()
303 except ValueError, e:
304 log("failed to send message %r, got exception %r" % (message, e))
306 def pollee_notifier(self, message):
307 """Append the message to a file that the remote server can poll"""
308 if self.pollfile is None or self.pollfile.closed:
309 self.pollfile = open(config.POLL_NOTIFY_PATH % self.bookname, 'a')
310 self.pollfile.write('%s\n' % message)
311 self.pollfile.flush()
312 #self.pollfile.close()
313 #if message == config.FINISHED_MESSAGE:
314 # self.pollfile.close()
316 def get_watchers(self):
317 """Based on the CGI arguments, return a likely set of notifier
318 methods."""
319 log('in get_watchers. method %r, callback %r, destination %r' %
320 (self.method, self.callback, self.destination))
321 watchers = set()
322 if self.method == 'poll':
323 watchers.add(self.pollee_notifier)
324 if self.method == 'async' and self.callback:
325 watchers.add(self.callback_notifier)
326 if self.method == 'sync' and self.destination == 'html':
327 watchers.add(self.javascript_notifier)
328 watchers.add(self.log_notifier)
329 log('watchers are %s' % watchers)
330 return watchers
335 def mode_book(args):
336 # so we're making a pdf.
337 context = Context(args)
338 page_settings = get_page_settings(args)
340 with Book(context.bookid, context.server, context.bookname,
341 page_settings=page_settings,
342 watchers=context.get_watchers(), isbn=args.get('isbn'),
343 license=args.get('license'), title=args.get('title'),
344 max_age=float(args.get('max-age', -1))) as book:
346 book.spawn_x()
348 if 'toc_header' in args:
349 book.toc_header = args['toc_header'].decode('utf-8')
350 book.load_book()
351 book.add_css(args.get('css'), context.mode)
352 book.add_section_titles()
354 if context.mode == 'book':
355 book.make_book_pdf()
356 elif context.mode in ('web', 'newspaper'):
357 book.make_simple_pdf(context.mode)
358 if "rotate" in args:
359 book.rotate180()
361 book.publish_pdf()
362 context.finish(book)
364 #These ones are similar enough to be handled by the one function
365 mode_newspaper = mode_book
366 mode_web = mode_book
369 def mode_openoffice(args):
370 """Make an openoffice document. A whole lot of the inputs have no
371 effect."""
372 context = Context(args)
373 with Book(context.bookid, context.server, context.bookname,
374 watchers=context.get_watchers(), isbn=args.get('isbn'),
375 license=args.get('license'), title=args.get('title'),
376 max_age=float(args.get('max-age', -1))) as book:
378 book.spawn_x()
379 book.load_book()
380 book.add_css(args.get('css'), 'openoffice')
381 book.add_section_titles()
382 book.make_oo_doc()
383 context.finish(book)
385 def mode_epub(args):
386 log('making epub with\n%s' % pformat(args))
387 #XXX need to catch and process lack of necessary arguments.
388 context = Context(args)
390 with Book(context.bookid, context.server, context.bookname,
391 watchers=context.get_watchers(), title=args.get('title'),
392 max_age=float(args.get('max-age', -1))) as book:
394 book.make_epub(use_cache=config.USE_CACHED_IMAGES)
395 context.finish(book)
398 def mode_bookizip(args):
399 log('making bookizip with\n%s' % pformat(args))
400 context = Context(args)
402 with Book(context.bookid, context.server, context.bookname,
403 watchers=context.get_watchers(), title=args.get('title'),
404 max_age=float(args.get('max-age', -1))) as book:
405 book.publish_bookizip()
406 context.finish(book)
409 def main():
410 args = parse_args(ARG_VALIDATORS)
411 mode = args.get('mode')
412 if mode is None and 'book' in args:
413 mode = 'book'
415 global CGI_CONTEXT
416 CGI_CONTEXT = 'SERVER_NAME' in os.environ or args.get('cgi-context', 'no').lower() in '1true'
418 if not args and not CGI_CONTEXT:
419 print __doc__
420 sys.exit()
422 output_function = globals().get('mode_%s' % mode, mode_form)
423 output_function(args)
425 if __name__ == '__main__':
426 if config.CGITB_DOMAINS and os.environ.get('REMOTE_ADDR') in config.CGITB_DOMAINS:
427 import cgitb
428 cgitb.enable()
429 init_log()
430 main()