improve documentation and clarity of parse_outline
[objavi2.git] / objavi2.py
blobb360b25a8511df7dfb3a1cce1273eace59a984d3
1 #!/usr/bin/python
3 # Part of Objavi2, which turns html manuals into books
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Make a pdf from the specified book."""
22 from __future__ import with_statement
24 import os, sys
25 import cgi
26 import re, time
27 from urllib2 import urlopen
28 from getopt import gnu_getopt
30 from fmbook import log, Book
32 import config
33 from config import SERVER_DEFAULTS, DEFAULT_SERVER
35 FORM_TEMPLATE = os.path.abspath('templates/form.html')
36 PROGRESS_TEMPLATE = os.path.abspath('templates/progress.html')
38 def isfloat(s):
39 #spaces?, digits!, dot?, digits?, spaces?
40 #return re.compile(r'^\s*[+-]?\d+\.?\d*\s*$').match
41 try:
42 float(s)
43 return True
44 except ValueError:
45 return False
47 def isfloat_or_auto(s):
48 return isfloat(s) or s.lower() in ('', 'auto')
50 def is_isbn(s):
51 # 10 or 13 digits with any number of hyphens, perhaps with check-digit missing
52 s =s.replace('-', '')
53 return (re.match(r'^\d+[\dXx*]$', s) and len(s) in (9, 10, 12, 13))
56 # ARG_VALIDATORS is a mapping between the expected cgi arguments and
57 # functions to validate their values. (None means no validation).
58 ARG_VALIDATORS = {
59 "book": re.compile(r'^(\w+/?)*\w+$').match, # can be: BlahBlah/Blah_Blah
60 "css": None, # an url, empty (for default), or css content
61 "title": lambda x: len(x) < 999,
62 #"header": None, # header text, UNUSED
63 "isbn": is_isbn,
64 "license": config.LICENSES.__contains__,
65 "server": SERVER_DEFAULTS.__contains__,
66 "engine": config.ENGINES.__contains__,
67 "booksize": config.PAGE_SIZE_DATA.__contains__,
68 "page_width": isfloat,
69 "page_height": isfloat,
70 "gutter": isfloat_or_auto,
71 "top_margin": isfloat_or_auto,
72 "side_margin": isfloat_or_auto,
73 "bottom_margin": isfloat_or_auto,
74 "columns": isfloat_or_auto,
75 "column_margin": isfloat_or_auto,
76 "cgi-context": lambda x: x.lower() in '1true0false',
77 "mode": config.CGI_MODES.__contains__,
78 "pdftype": lambda x: config.CGI_MODES.get(x, [False])[0],
79 "rotate": u"rotate".__eq__,
82 __doc__ += '\nValid arguments are: %s.\n' % ', '.join(ARG_VALIDATORS.keys())
84 def parse_args():
85 """Read and validate CGI or commandline arguments, putting the
86 good ones into the returned dictionary. Command line arguments
87 should be in the form --title='A Book'.
88 """
89 query = cgi.FieldStorage()
90 options, args = gnu_getopt(sys.argv[1:], '', [x + '=' for x in ARG_VALIDATORS])
91 options = dict(options)
92 log(options)
93 data = {}
94 for key, validator in ARG_VALIDATORS.items():
95 value = query.getfirst(key, options.get('--' + key, None))
96 log('%s: %s' % (key, value), debug='STARTUP')
97 if value is not None:
98 if validator is not None and not validator(value):
99 log("argument '%s' is not valid ('%s')" % (key, value))
100 continue
101 data[key] = value
103 log(data, debug='STARTUP')
104 return data
106 def get_server_list():
107 return sorted(SERVER_DEFAULTS.keys())
109 def get_book_list(server):
110 """Ask the server for a list of books. Floss Manual TWikis keep such a list at
111 /bin/view/TWiki/WebLeftBarWebsList?skin=text but it needs a bit of processing
113 If BOOK_LIST_CACHE is non-zero, the book list won't be re-fetched
114 in that many seconds, rather it will be read from disk.
116 if config.BOOK_LIST_CACHE:
117 cache_name = os.path.join(config.BOOK_LIST_CACHE_DIR, '%s.booklist' % server)
118 if (os.path.exists(cache_name) and
119 os.stat(cache_name).st_mtime + config.BOOK_LIST_CACHE > time.time()):
120 f = open(cache_name)
121 s = f.read()
122 f.close()
123 return s.split()
125 url = 'http://%s/bin/view/TWiki/WebLeftBarWebsList?skin=text' % server
126 #XXX should use lxml
127 log(url)
128 f = urlopen(url)
129 s = f.read()
130 f.close()
131 items = sorted(re.findall(r'/bin/view/([\w/]+)/WebHome', s))
132 if config.BOOK_LIST_CACHE:
133 f = open(cache_name, 'w')
134 f.write('\n'.join(items))
135 f.close()
136 return items
138 def get_size_list():
139 #order by increasing areal size.
140 def calc_size(name, pointsize, klass):
141 if pointsize:
142 mmx = pointsize[0] * config.POINT_2_MM
143 mmy = pointsize[1] * config.POINT_2_MM
144 return (mmx * mmy, name, klass,
145 '%s (%dmm x %dmm)' % (name, mmx, mmy))
147 return (0, name, klass, name) # presumably 'custom'
149 return [x[1:] for x in sorted(calc_size(k, v.get('pointsize'), v.get('class', ''))
150 for k, v in config.PAGE_SIZE_DATA.iteritems())
154 def optionise(items, default=None):
155 """Make a list of strings into an html option string, as would fit
156 inside <select> tags."""
157 options = []
158 for x in items:
159 if isinstance(x, str):
160 x = (x, x)
161 if len(x) == 2:
162 # couple: value, name
163 if x[0] == default:
164 options.append('<option selected="selected" value="%s">%s</option>' % x)
165 else:
166 options.append('<option value="%s">%s</option>' % x)
167 else:
168 # triple: value, class, name
169 if x[0] == default:
170 options.append('<option selected="selected" value="%s" class="%s">%s</option>' % x)
171 else:
172 options.append('<option value="%s" class="%s">%s</option>' % x)
174 return '\n'.join(options)
176 def listify(items):
177 """Make a list of strings into html <li> items, to fit in a <ul>
178 or <ol> element."""
179 return '\n'.join('<li>%s</li>' % x for x in items)
182 def get_default_css(server=DEFAULT_SERVER, mode='book'):
183 """Get the default CSS text for the selected server"""
184 log(server)
185 cssfile = SERVER_DEFAULTS[server]['css-%s' % mode]
186 log(cssfile)
187 f = open(cssfile)
188 s = f.read()
189 f.close()
190 return s
192 def font_links():
193 """Links to various example pdfs."""
194 links = []
195 for script in os.listdir(config.FONT_EXAMPLE_SCRIPT_DIR):
196 if not script.isalnum():
197 log("warning: font-sample %s won't work; skipping" % script)
198 continue
199 links.append('<a href="%s?script=%s">%s</a>' % (config.FONT_LIST_URL, script, script))
200 return links
203 def make_progress_page(book, bookname, mode):
204 f = open(PROGRESS_TEMPLATE)
205 template = f.read()
206 f.close()
207 progress_list = ''.join('<li id="%s">%s</li>\n' % x[:2] for x in config.PROGRESS_POINTS
208 if mode in x[2])
210 d = {
211 'book': book,
212 'bookname': bookname,
213 'progress_list': progress_list,
215 print template % d
216 def progress_notifier(message):
217 print ('<script type="text/javascript">\n'
218 'objavi_show_progress("%s");\n'
219 '</script>' % message
221 if message == 'finished':
222 print '</body></html>'
223 sys.stdout.flush()
224 return progress_notifier
226 def print_progress(message):
227 print '******* got message "%s"' %message
229 def make_book_name(book, server):
230 lang = SERVER_DEFAULTS.get(server, SERVER_DEFAULTS[DEFAULT_SERVER])['lang']
231 book = ''.join(x for x in book if x.isalnum())
232 return '%s-%s-%s.pdf' % (book, lang,
233 time.strftime('%Y.%m.%d-%H.%M.%S'))
236 def get_page_settings(args):
237 """Find the size and any optional layout settings.
239 args['booksize'] is either a keyword describing a size or
240 'custom'. If it is custom, the form is inspected for specific
241 dimensions -- otherwise these are ignored.
243 The margins, gutter, number of columns, and column
244 margins all set themselves automatically based on the page
245 dimensions, but they can be overridden. Any that are are
246 collected here."""
247 # get all the values including sizes first
248 # the sizes are found as 'page_width' and 'page_height',
249 # but the Book class expects them as a 'pointsize' tuple, so
250 # they are easily ignored.
251 settings = {}
252 for k, extrema in config.PAGE_EXTREMA.iteritems():
253 try:
254 v = float(args.get(k))
255 except (ValueError, TypeError):
256 #log("don't like %r as a float value for %s!" % (args.get(k), k))
257 continue
258 min_val, max_val, multiplier = extrema
259 if v < min_val or v > max_val:
260 log('rejecting %s: outside %s' % (v,) + extrema)
261 else:
262 log('found %s=%s' % (k, v))
263 settings[k] = v * multiplier #convert to points in many cases
265 # now if args['size'] is not 'custom', the width and height found
266 # above are ignored.
268 size = args.get('booksize', config.DEFAULT_SIZE)
269 settings.update(config.PAGE_SIZE_DATA[size])
271 if size == 'custom':
272 #will raise KeyError if width, height aren't set
273 settings['pointsize'] = (settings['page_width'], settings['page_height'])
274 del settings['page_width']
275 del settings['page_height']
277 return settings
280 def output_and_exit(f):
281 """Decorator: prefix function output with http headers and exit
282 immediately after."""
283 def output(args):
284 if CGI_CONTEXT:
285 print "Content-type: text/html; charset=utf-8\n"
286 f(args)
287 sys.exit()
288 return output
290 @output_and_exit
291 def mode_booklist(args):
292 print optionise(get_book_list(args.get('server', config.DEFAULT_SERVER)),
293 default=args.get('book'))
295 @output_and_exit
296 def mode_css(args):
297 #XX sending as text/html, but it doesn't really matter
298 print get_default_css(args.get('server', config.DEFAULT_SERVER), args.get('pdftype', 'book'))
301 @output_and_exit
302 def mode_form(args):
303 f = open(FORM_TEMPLATE)
304 template = f.read()
305 f.close()
306 f = open(config.FONT_LIST_INCLUDE)
307 font_list = [x.strip() for x in f if x.strip()]
308 f.close()
309 server = args.get('server', config.DEFAULT_SERVER)
310 book = args.get('book')
311 size = args.get('booksize', config.DEFAULT_SIZE)
312 engine = args.get('engine', config.DEFAULT_ENGINE)
313 d = {
314 'server_options': optionise(get_server_list(), default=server),
315 'book_options': optionise(get_book_list(server), default=book),
316 'size_options': optionise(get_size_list(), default=size),
317 'engines': optionise(config.ENGINES.keys(), default=engine),
318 'pdf_types': optionise(sorted(k for k, v in config.CGI_MODES.iteritems() if v[0])),
319 'css': get_default_css(server),
320 'font_links': listify(font_links()),
321 'font_list': listify(font_list),
322 'default_license' : config.DEFAULT_LICENSE,
323 'licenses' : optionise(config.LICENSES, default=config.DEFAULT_LICENSE),
324 None: '',
327 form = []
328 for id, title, type, source, classes, epilogue in config.FORM_INPUTS:
329 val = d.get(source, '')
330 e = config.FORM_ELEMENT_TYPES[type] % locals()
331 form.append('\n<div id="%(id)s_div" class="form-item %(classes)s">\n'
332 '<div class="input_title">%(title)s</div>\n'
333 '<div class="input_contents"> %(e)s %(epilogue)s\n</div>'
334 '</div>\n' % locals())
336 if True:
337 _valid_inputs = set(ARG_VALIDATORS)
338 _form_inputs = set(x[0] for x in config.FORM_INPUTS)
339 log("valid but not used inputs: %s" % (_valid_inputs - _form_inputs))
340 log("invalid form inputs: %s" % (_form_inputs - _valid_inputs))
342 print template % {'form': ''.join(form)}
345 @output_and_exit
346 def mode_book(args):
347 # so we're making a pdf.
348 mode = args.get('mode', 'book')
349 bookid = args.get('book')
350 server = args.get('server', config.DEFAULT_SERVER)
351 engine = args.get('engine', config.DEFAULT_ENGINE)
352 page_settings = get_page_settings(args)
353 bookname = make_book_name(bookid, server)
355 if CGI_CONTEXT:
356 progress_bar = make_progress_page(bookid, bookname, mode)
357 else:
358 progress_bar = print_progress
360 with Book(bookid, server, bookname, page_settings=page_settings, engine=engine,
361 watcher=progress_bar, isbn=args.get('isbn'),
362 license=args.get('license')) as book:
363 if CGI_CONTEXT:
364 book.spawn_x()
365 book.load()
366 book.set_title(args.get('title'))
367 book.add_css(args.get('css'), mode)
368 book.add_section_titles()
370 if mode == 'book':
371 book.make_book_pdf()
372 elif mode in ('web', 'newspaper'):
373 book.make_simple_pdf(mode)
375 if "rotate" in args:
376 book.rotate180()
378 book.publish_pdf()
379 book.notify_watcher('finished')
382 #These ones are similar enought to be handled by the one function
383 mode_newspaper = mode_book
384 mode_web = mode_book
386 def main():
387 args = parse_args()
388 mode = args.get('mode')
389 if mode is None and 'book' in args:
390 mode = 'book'
392 global CGI_CONTEXT
393 CGI_CONTEXT = 'SERVER_NAME' in os.environ or args.get('cgi-context', 'no').lower() in '1true'
395 if not args and not CGI_CONTEXT:
396 print __doc__
397 sys.exit()
399 output_function = globals().get('mode_%s' % mode, mode_form)
400 output_function(args)
402 if __name__ == '__main__':
403 if config.CGITB_DOMAINS and os.environ.get('REMOTE_ADDR') in config.CGITB_DOMAINS:
404 import cgitb
405 cgitb.enable()
406 main()