oops, added wrong robots.txt
[objavi2.git] / htdocs / objavi.cgi
blob366f8cc336dcc1cdbae5cec8a0ce187ad848b573
1 #!/usr/bin/python
3 # Part of Objavi2, which turns html manuals into books
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Make a pdf from the specified book."""
22 from __future__ import with_statement
24 import os, sys
25 os.chdir('..')
26 sys.path.insert(0, os.path.abspath('.'))
28 import re, time
29 #import traceback
30 from pprint import pformat
32 from objavi.fmbook import Book, HTTP_HOST, find_archive_urls
33 from objavi import config
34 from objavi import twiki_wrapper
35 from objavi.book_utils import init_log, log, make_book_name
36 from objavi.cgi_utils import parse_args, optionise, listify, get_server_list
37 from objavi.cgi_utils import is_utf8, isfloat, isfloat_or_auto, is_isbn, is_url
38 from objavi.cgi_utils import output_blob_and_exit, output_blob_and_shut_up, output_and_exit
39 from objavi.cgi_utils import get_size_list, get_default_css, font_links
42 # ARG_VALIDATORS is a mapping between the expected cgi arguments and
43 # functions to validate their values. (None means no validation).
44 ARG_VALIDATORS = {
45 "book": re.compile(r'^([\w-]+/?)*[\w-]+$').match, # can be: BlahBlah/Blah_Blah
46 "css": is_utf8, # an url, empty (for default), or css content
47 "title": lambda x: len(x) < 999 and is_utf8(x),
48 "isbn": is_isbn,
49 "license": config.LICENSES.__contains__,
50 "server": config.SERVER_DEFAULTS.__contains__,
51 "engine": config.ENGINES.__contains__,
52 "booksize": config.PAGE_SIZE_DATA.__contains__,
53 "page_width": isfloat,
54 "page_height": isfloat,
55 "gutter": isfloat_or_auto,
56 "top_margin": isfloat_or_auto,
57 "side_margin": isfloat_or_auto,
58 "bottom_margin": isfloat_or_auto,
59 "columns": isfloat_or_auto,
60 "column_margin": isfloat_or_auto,
61 "cgi-context": lambda x: x.lower() in '1true0false',
62 "mode": config.CGI_MODES.__contains__,
63 "pdftype": lambda x: config.CGI_MODES.get(x, [False])[0], #for css mode
64 "rotate": u"yes".__eq__,
65 "grey_scale": u"yes".__eq__,
66 "destination": config.CGI_DESTINATIONS.__contains__,
67 "toc_header": is_utf8,
68 "max-age": isfloat,
69 "method": config.CGI_METHODS.__contains__,
70 "callback": is_url,
71 "html_template": is_utf8,
72 "booki-group": is_utf8,
73 "booki-user": is_utf8,
76 __doc__ += '\nValid arguments are: %s.\n' % ', '.join(ARG_VALIDATORS.keys())
79 def get_page_settings(args):
80 """Find the size and any optional layout settings.
82 args['booksize'] is either a keyword describing a size or
83 'custom'. If it is custom, the form is inspected for specific
84 dimensions -- otherwise these are ignored.
86 The margins, gutter, number of columns, and column
87 margins all set themselves automatically based on the page
88 dimensions, but they can be overridden. Any that are are
89 collected here."""
90 # get all the values including sizes first
91 # the sizes are found as 'page_width' and 'page_height',
92 # but the Book class expects them as a 'pointsize' tuple, so
93 # they are easily ignored.
94 settings = {}
95 for k, extrema in config.PAGE_EXTREMA.iteritems():
96 try:
97 v = float(args.get(k))
98 except (ValueError, TypeError):
99 #log("don't like %r as a float value for %s!" % (args.get(k), k))
100 continue
101 min_val, max_val, multiplier = extrema
102 if v < min_val or v > max_val:
103 log('rejecting %s: outside %s' % (v,) + extrema)
104 else:
105 log('found %s=%s' % (k, v))
106 settings[k] = v * multiplier #convert to points in many cases
108 # now if args['size'] is not 'custom', the width and height found
109 # above are ignored.
110 size = args.get('booksize', config.DEFAULT_SIZE)
111 settings.update(config.PAGE_SIZE_DATA[size])
113 #if args['mode'] is 'newspaper', then the number of columns is
114 #automatically determined unless set -- otherwise default is 1.
115 if args.get('mode') == 'newspaper' and settings.get('columns') is None:
116 settings['columns'] = 'auto'
118 if args.get('grey_scale'):
119 settings['grey_scale'] = True
121 if size == 'custom':
122 #will raise KeyError if width, height aren't set
123 settings['pointsize'] = (settings['page_width'], settings['page_height'])
124 del settings['page_width']
125 del settings['page_height']
127 settings['engine'] = args.get('engine', config.DEFAULT_ENGINE)
128 return settings
130 @output_and_exit
131 def mode_booklist(args):
132 #XXX need to include booki servers
133 return optionise(twiki_wrapper.get_book_list(args.get('server', config.DEFAULT_SERVER)),
134 default=args.get('book'))
136 @output_and_exit
137 def mode_css(args):
138 #XX sending as text/html, but it doesn't really matter
139 return get_default_css(args.get('server', config.DEFAULT_SERVER), args.get('pdftype', 'book'))
142 @output_and_exit
143 def mode_form(args):
144 f = open(config.FORM_TEMPLATE)
145 template = f.read()
146 f.close()
147 f = open(config.FONT_LIST_INCLUDE)
148 font_list = [x.strip() for x in f if x.strip()]
149 f.close()
150 server = args.get('server', config.DEFAULT_SERVER)
151 book = args.get('book')
152 size = args.get('booksize', config.DEFAULT_SIZE)
153 engine = args.get('engine', config.DEFAULT_ENGINE)
154 d = {
155 'server_options': optionise(get_server_list(), default=server),
156 'book_options': optionise(twiki_wrapper.get_book_list(server), default=book),
157 'size_options': optionise(get_size_list(), default=size),
158 'engines': optionise(config.ENGINES.keys(), default=engine),
159 'pdf_types': optionise(sorted(k for k, v in config.CGI_MODES.iteritems() if v[0])),
160 'css': get_default_css(server),
161 'font_links': listify(font_links()),
162 'font_list': listify(font_list),
163 'default_license' : config.DEFAULT_LICENSE,
164 'licenses' : optionise(config.LICENSES, default=config.DEFAULT_LICENSE),
165 'yes': 'yes',
166 None: '',
169 form = []
170 for id, title, type, source, classes, epilogue in config.FORM_INPUTS:
171 val = d.get(source, '')
172 e = config.FORM_ELEMENT_TYPES[type] % locals()
173 form.append('\n<div id="%(id)s_div" class="form-item %(classes)s">\n'
174 '<div class="input_title">%(title)s</div>\n'
175 '<div class="input_contents"> %(e)s %(epilogue)s\n</div>'
176 '</div>\n' % locals())
178 if True:
179 _valid_inputs = set(ARG_VALIDATORS)
180 _form_inputs = set(x[0] for x in config.FORM_INPUTS if x[2] != 'ul')
181 log("valid but not used inputs: %s" % (_valid_inputs - _form_inputs))
182 log("invalid form inputs: %s" % (_form_inputs - _valid_inputs))
184 return template % {'form': ''.join(form)}
188 class Context(object):
189 """Work out what to show the caller. The method/destination matrix:
191 [dest/method] sync async poll
192 archive.org url id id
193 download data . .
194 html html 1 . html 2
195 nowhere url id id
197 'html 1' is dripfed progress reports; 'html 2' polls via
198 javascript. 'id' is the book filename. 'url' is a full url
199 locating the file on archive.org or the objavi server. '.' means
200 unimplemented.
203 pollfile = None
204 def __init__(self, args):
205 self.bookid = args.get('book')
206 self.server = args.get('server', config.DEFAULT_SERVER)
207 self.mode = args.get('mode', 'book')
208 extension = config.CGI_MODES.get(self.mode)[1]
209 self.bookname = make_book_name(self.bookid, self.server, extension)
210 self.destination = args.get('destination', config.DEFAULT_CGI_DESTINATION)
211 self.callback = args.get('callback', None)
212 self.method = args.get('method', config.CGI_DESTINATIONS[self.destination]['default'])
213 self.template, self.mimetype = config.CGI_DESTINATIONS[self.destination][self.method]
214 if HTTP_HOST:
215 self.bookurl = "http://%s/books/%s" % (HTTP_HOST, self.bookname,)
216 else:
217 self.bookurl = "books/%s" % (self.bookname,)
219 self.details_url, self.s3url = find_archive_urls(self.bookid, self.bookname)
220 self.booki_group = args.get('booki-group')
221 self.booki_user = args.get('booki-user')
222 self.start()
224 def start(self):
225 """Begin (and in many cases, finish) http output.
227 In asynchronous modes, fork and close down stdout.
229 log(self.template, self.mimetype, self.destination, self.method)
230 if self.template is not None:
231 progress_list = ''.join('<li id="%s">%s</li>\n' % x[:2] for x in config.PROGRESS_POINTS
232 if self.mode in x[2])
233 d = {
234 'book': self.bookid,
235 'bookname': self.bookname,
236 'progress_list': progress_list,
237 'details_url': self.details_url,
238 's3url': self.s3url,
239 'bookurl': self.bookurl,
241 f = open(self.template)
242 content = f.read() % d
243 f.close()
244 else:
245 content = ''
247 if self.method == 'sync':
248 print 'Content-type: %s\n\n%s' %(self.mimetype, content)
249 else:
250 output_blob_and_shut_up(content, self.mimetype)
251 log(sys.stdout, sys.stderr, sys.stdin)
252 if os.fork():
253 os._exit(0)
254 sys.stdout.close()
255 sys.stdin.close()
256 log(sys.stdout, sys.stderr, sys.stdin)
259 def finish(self, book):
260 """Print any final http content."""
261 book.publish_shared(self.booki_group, self.booki_user)
262 if self.destination == 'archive.org':
263 book.publish_s3()
264 elif self.destination == 'download' and self.method == 'sync':
265 f = open(book.publish_file)
266 data = f.read()
267 f.close()
268 output_blob_and_exit(data, config.CGI_MODES[self.mode][2], self.bookname)
271 def log_notifier(self, message):
272 """Send messages to the log only."""
273 log('******* got message "%s"' %message)
275 def callback_notifier(self, message):
276 """Call the callback url with each message."""
277 log('in callback_notifier')
278 pid = os.fork()
279 if pid:
280 log('child %s is doing callback with message %r' % (pid, message, ))
281 return
282 from urllib2 import urlopen, URLError
283 from urllib import urlencode
284 data = urlencode({'message': message})
285 try:
286 f = urlopen(self.callback, data)
287 time.sleep(2)
288 f.close()
289 except URLError, e:
290 #traceback.print_exc()
291 log("ERROR in callback:\n %r\n %s %s" % (e.url, e.code, e.msg))
292 os._exit(0)
294 def javascript_notifier(self, message):
295 """Print little bits of javascript which will be appended to
296 an unfinished html page."""
297 try:
298 if message.startswith('ERROR:'):
299 log('got an error! %r' % message)
300 print ('<b class="error-message">'
301 '%s\n'
302 '</b></body></html>' % message
304 else:
305 print ('<script type="text/javascript">\n'
306 'objavi_show_progress("%s");\n'
307 '</script>' % message
309 if message == config.FINISHED_MESSAGE:
310 print '</body></html>'
311 sys.stdout.flush()
312 except ValueError, e:
313 log("failed to send message %r, got exception %r" % (message, e))
315 def pollee_notifier(self, message):
316 """Append the message to a file that the remote server can poll"""
317 if self.pollfile is None or self.pollfile.closed:
318 self.pollfile = open(config.POLL_NOTIFY_PATH % self.bookname, 'a')
319 self.pollfile.write('%s\n' % message)
320 self.pollfile.flush()
321 #self.pollfile.close()
322 #if message == config.FINISHED_MESSAGE:
323 # self.pollfile.close()
325 def get_watchers(self):
326 """Based on the CGI arguments, return a likely set of notifier
327 methods."""
328 log('in get_watchers. method %r, callback %r, destination %r' %
329 (self.method, self.callback, self.destination))
330 watchers = set()
331 if self.method == 'poll':
332 watchers.add(self.pollee_notifier)
333 if self.method == 'async' and self.callback:
334 watchers.add(self.callback_notifier)
335 if self.method == 'sync' and self.destination == 'html':
336 watchers.add(self.javascript_notifier)
337 watchers.add(self.log_notifier)
338 log('watchers are %s' % watchers)
339 return watchers
341 def mode_book(args):
342 # so we're making a pdf.
343 context = Context(args)
344 page_settings = get_page_settings(args)
346 with Book(context.bookid, context.server, context.bookname,
347 page_settings=page_settings,
348 watchers=context.get_watchers(), isbn=args.get('isbn'),
349 license=args.get('license'), title=args.get('title'),
350 max_age=float(args.get('max-age', -1))) as book:
352 book.spawn_x()
354 if 'toc_header' in args:
355 book.toc_header = args['toc_header'].decode('utf-8')
356 book.load_book()
357 book.add_css(args.get('css'), context.mode)
358 book.add_section_titles()
360 if context.mode == 'book':
361 book.make_book_pdf()
362 elif context.mode in ('web', 'newspaper'):
363 book.make_simple_pdf(context.mode)
364 if "rotate" in args:
365 book.rotate180()
367 book.publish_pdf()
368 context.finish(book)
370 #These ones are similar enough to be handled by the one function
371 mode_newspaper = mode_book
372 mode_web = mode_book
375 def mode_openoffice(args):
376 """Make an openoffice document. A whole lot of the inputs have no
377 effect."""
378 context = Context(args)
379 with Book(context.bookid, context.server, context.bookname,
380 watchers=context.get_watchers(), isbn=args.get('isbn'),
381 license=args.get('license'), title=args.get('title'),
382 max_age=float(args.get('max-age', -1))) as book:
384 book.spawn_x()
385 book.load_book()
386 book.add_css(args.get('css'), 'openoffice')
387 book.add_section_titles()
388 book.make_oo_doc()
389 context.finish(book)
391 def mode_epub(args):
392 log('making epub with\n%s' % pformat(args))
393 #XXX need to catch and process lack of necessary arguments.
394 context = Context(args)
396 with Book(context.bookid, context.server, context.bookname,
397 watchers=context.get_watchers(), title=args.get('title'),
398 max_age=float(args.get('max-age', -1))) as book:
400 book.make_epub(use_cache=config.USE_CACHED_IMAGES)
401 context.finish(book)
404 def mode_bookizip(args):
405 log('making bookizip with\n%s' % pformat(args))
406 context = Context(args)
408 with Book(context.bookid, context.server, context.bookname,
409 watchers=context.get_watchers(), title=args.get('title'),
410 max_age=float(args.get('max-age', -1))) as book:
411 book.publish_bookizip()
412 context.finish(book)
414 def mode_templated_html(args):
415 log('making templated html with\n%s' % pformat(args))
416 context = Context(args)
417 template = args.get('html_template')
418 log(template)
419 with Book(context.bookid, context.server, context.bookname,
420 watchers=context.get_watchers(), title=args.get('title'),
421 max_age=float(args.get('max-age', -1))) as book:
423 book.make_templated_html(template=template)
424 context.finish(book)
426 def mode_templated_html_zip(args):
427 pass
429 def main():
430 args = parse_args(ARG_VALIDATORS)
431 mode = args.get('mode')
432 if mode is None and 'book' in args:
433 mode = 'book'
435 global CGI_CONTEXT
436 CGI_CONTEXT = 'SERVER_NAME' in os.environ or args.get('cgi-context', 'no').lower() in '1true'
438 if not args and not CGI_CONTEXT:
439 print __doc__
440 sys.exit()
442 output_function = globals().get('mode_%s' % mode, mode_form)
443 output_function(args)
445 if __name__ == '__main__':
446 if config.CGITB_DOMAINS and os.environ.get('REMOTE_ADDR') in config.CGITB_DOMAINS:
447 import cgitb
448 cgitb.enable()
449 init_log()
450 main()