Less strange line wrapping in form
[objavi2.git] / objavi2.py
blob61d7281069a7ee661038f30d6711e32a24448aaf
1 #!/usr/bin/python
3 # Part of Objavi2, which turns html manuals into books
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Make a pdf from the specified book."""
22 from __future__ import with_statement
24 import os, sys
25 import cgi
26 import re, time
27 from urllib2 import urlopen
28 from getopt import gnu_getopt
30 from fmbook import log, Book
32 import config
33 from config import SERVER_DEFAULTS, DEFAULT_SERVER
35 FORM_TEMPLATE = os.path.abspath('templates/form.html')
36 PROGRESS_TEMPLATE = os.path.abspath('templates/progress.html')
38 def isfloat(s):
39 #spaces?, digits!, dot?, digits?, spaces?
40 #return re.compile(r'^\s*[+-]?\d+\.?\d*\s*$').match
41 try:
42 float(s)
43 return True
44 except ValueError:
45 return False
47 def isfloat_or_auto(s):
48 return isfloat(s) or s.lower() in ('', 'auto')
50 def is_isbn(s):
51 # 10 or 13 digits with any number of hyphens, perhaps with check-digit missing
52 s =s.replace('-', '')
53 return (re.match(r'^\d+[\dXx*]$', s) and len(s) in (9, 10, 12, 13))
56 # ARG_VALIDATORS is a mapping between the expected cgi arguments and
57 # functions to validate their values. (None means no validation).
58 ARG_VALIDATORS = {
59 "book": re.compile(r'^(\w+/?)*\w+$').match, # can be: BlahBlah/Blah_Blah
60 "css": None, # an url, empty (for default), or css content
61 "title": lambda x: len(x) < 999,
62 #"header": None, # header text, UNUSED
63 "isbn": is_isbn,
64 "license": config.LICENSES.__contains__,
65 "server": SERVER_DEFAULTS.__contains__,
66 "engine": config.ENGINES.__contains__,
67 "booksize": config.PAGE_SIZE_DATA.__contains__,
68 "page_width": isfloat,
69 "page_height": isfloat,
70 "gutter": isfloat_or_auto,
71 "top_margin": isfloat_or_auto,
72 "side_margin": isfloat_or_auto,
73 "bottom_margin": isfloat_or_auto,
74 "columns": isfloat_or_auto,
75 "column_margin": isfloat_or_auto,
76 "cgi-context": lambda x: x.lower() in '1true0false',
77 "mode": config.CGI_MODES.__contains__,
78 "pdftype": lambda x: config.CGI_MODES.get(x, [False])[0],
79 "rotate": u"yes".__eq__,
80 "grey_scale": u"yes".__eq__,
83 __doc__ += '\nValid arguments are: %s.\n' % ', '.join(ARG_VALIDATORS.keys())
85 def parse_args():
86 """Read and validate CGI or commandline arguments, putting the
87 good ones into the returned dictionary. Command line arguments
88 should be in the form --title='A Book'.
89 """
90 query = cgi.FieldStorage()
91 options, args = gnu_getopt(sys.argv[1:], '', [x + '=' for x in ARG_VALIDATORS])
92 options = dict(options)
93 data = {}
94 for key, validator in ARG_VALIDATORS.items():
95 value = query.getfirst(key, options.get('--' + key, None))
96 log('%s: %s' % (key, value), debug='STARTUP')
97 if value is not None:
98 if validator is not None and not validator(value):
99 log("argument '%s' is not valid ('%s')" % (key, value))
100 continue
101 data[key] = value
103 log(data, debug='STARTUP')
104 return data
106 def get_server_list():
107 return sorted(SERVER_DEFAULTS.keys())
109 def get_book_list(server):
110 """Ask the server for a list of books. Floss Manual TWikis keep such a list at
111 /bin/view/TWiki/WebLeftBarWebsList?skin=text but it needs a bit of processing
113 If BOOK_LIST_CACHE is non-zero, the book list won't be re-fetched
114 in that many seconds, rather it will be read from disk.
116 if config.BOOK_LIST_CACHE:
117 cache_name = os.path.join(config.BOOK_LIST_CACHE_DIR, '%s.booklist' % server)
118 if (os.path.exists(cache_name) and
119 os.stat(cache_name).st_mtime + config.BOOK_LIST_CACHE > time.time()):
120 f = open(cache_name)
121 s = f.read()
122 f.close()
123 return s.split()
125 url = 'http://%s/bin/view/TWiki/WebLeftBarWebsList?skin=text' % server
126 #XXX should use lxml
127 log(url)
128 f = urlopen(url)
129 s = f.read()
130 f.close()
131 items = sorted(re.findall(r'/bin/view/([\w/]+)/WebHome', s))
132 if config.BOOK_LIST_CACHE:
133 f = open(cache_name, 'w')
134 f.write('\n'.join(items))
135 f.close()
136 return items
138 def get_size_list():
139 #order by increasing areal size.
140 def calc_size(name, pointsize, klass):
141 if pointsize:
142 mmx = pointsize[0] * config.POINT_2_MM
143 mmy = pointsize[1] * config.POINT_2_MM
144 return (mmx * mmy, name, klass,
145 '%s (%dmm x %dmm)' % (name, mmx, mmy))
147 return (0, name, klass, name) # presumably 'custom'
149 return [x[1:] for x in sorted(calc_size(k, v.get('pointsize'), v.get('class', ''))
150 for k, v in config.PAGE_SIZE_DATA.iteritems())
154 def optionise(items, default=None):
155 """Make a list of strings into an html option string, as would fit
156 inside <select> tags."""
157 options = []
158 for x in items:
159 if isinstance(x, str):
160 x = (x, x)
161 if len(x) == 2:
162 # couple: value, name
163 if x[0] == default:
164 options.append('<option selected="selected" value="%s">%s</option>' % x)
165 else:
166 options.append('<option value="%s">%s</option>' % x)
167 else:
168 # triple: value, class, name
169 if x[0] == default:
170 options.append('<option selected="selected" value="%s" class="%s">%s</option>' % x)
171 else:
172 options.append('<option value="%s" class="%s">%s</option>' % x)
174 return '\n'.join(options)
176 def listify(items):
177 """Make a list of strings into html <li> items, to fit in a <ul>
178 or <ol> element."""
179 return '\n'.join('<li>%s</li>' % x for x in items)
182 def get_default_css(server=DEFAULT_SERVER, mode='book'):
183 """Get the default CSS text for the selected server"""
184 log(server)
185 cssfile = SERVER_DEFAULTS[server]['css-%s' % mode]
186 log(cssfile)
187 f = open(cssfile)
188 s = f.read()
189 f.close()
190 return s
192 def font_links():
193 """Links to various example pdfs."""
194 links = []
195 for script in os.listdir(config.FONT_EXAMPLE_SCRIPT_DIR):
196 if not script.isalnum():
197 log("warning: font-sample %s won't work; skipping" % script)
198 continue
199 links.append('<a href="%s?script=%s">%s</a>' % (config.FONT_LIST_URL, script, script))
200 return links
203 def make_progress_page(book, bookname, mode):
204 """Return a function that will notify the user of progress. In
205 CGI context this means making an html page to display the
206 messages, which are then sent as javascript snippets on the same
207 connection."""
208 if not CGI_CONTEXT:
209 return lambda message: '******* got message "%s"' %message
211 f = open(PROGRESS_TEMPLATE)
212 template = f.read()
213 f.close()
214 progress_list = ''.join('<li id="%s">%s</li>\n' % x[:2] for x in config.PROGRESS_POINTS
215 if mode in x[2])
217 d = {
218 'book': book,
219 'bookname': bookname,
220 'progress_list': progress_list,
222 print template % d
223 def progress_notifier(message):
224 print ('<script type="text/javascript">\n'
225 'objavi_show_progress("%s");\n'
226 '</script>' % message
228 if message == 'finished':
229 print '</body></html>'
230 sys.stdout.flush()
231 return progress_notifier
233 def make_book_name(book, server, suffix='.pdf'):
234 lang = SERVER_DEFAULTS.get(server, SERVER_DEFAULTS[DEFAULT_SERVER])['lang']
235 book = ''.join(x for x in book if x.isalnum())
236 return '%s-%s-%s%s' % (book, lang,
237 time.strftime('%Y.%m.%d-%H.%M.%S'),
238 suffix)
241 def get_page_settings(args):
242 """Find the size and any optional layout settings.
244 args['booksize'] is either a keyword describing a size or
245 'custom'. If it is custom, the form is inspected for specific
246 dimensions -- otherwise these are ignored.
248 The margins, gutter, number of columns, and column
249 margins all set themselves automatically based on the page
250 dimensions, but they can be overridden. Any that are are
251 collected here."""
252 # get all the values including sizes first
253 # the sizes are found as 'page_width' and 'page_height',
254 # but the Book class expects them as a 'pointsize' tuple, so
255 # they are easily ignored.
256 settings = {}
257 for k, extrema in config.PAGE_EXTREMA.iteritems():
258 try:
259 v = float(args.get(k))
260 except (ValueError, TypeError):
261 #log("don't like %r as a float value for %s!" % (args.get(k), k))
262 continue
263 min_val, max_val, multiplier = extrema
264 if v < min_val or v > max_val:
265 log('rejecting %s: outside %s' % (v,) + extrema)
266 else:
267 log('found %s=%s' % (k, v))
268 settings[k] = v * multiplier #convert to points in many cases
270 # now if args['size'] is not 'custom', the width and height found
271 # above are ignored.
272 size = args.get('booksize', config.DEFAULT_SIZE)
273 settings.update(config.PAGE_SIZE_DATA[size])
275 #if args['mode'] is 'newspaper', then the number of columns is
276 #automatically determined unless set -- otherwise default is 1.
277 if args['mode'] == 'newspaper' and settings.get('columns') is None:
278 settings['columns'] = 'auto'
280 if args.get('grey_scale'):
281 settings['grey_scale'] = True
283 if size == 'custom':
284 #will raise KeyError if width, height aren't set
285 settings['pointsize'] = (settings['page_width'], settings['page_height'])
286 del settings['page_width']
287 del settings['page_height']
289 settings['engine'] = args.get('engine', config.DEFAULT_ENGINE)
290 return settings
293 def output_and_exit(f):
294 """Decorator: prefix function output with http headers and exit
295 immediately after."""
296 def output(args):
297 if CGI_CONTEXT:
298 print "Content-type: text/html; charset=utf-8\n"
299 f(args)
300 sys.exit()
301 return output
303 @output_and_exit
304 def mode_booklist(args):
305 print optionise(get_book_list(args.get('server', config.DEFAULT_SERVER)),
306 default=args.get('book'))
308 @output_and_exit
309 def mode_css(args):
310 #XX sending as text/html, but it doesn't really matter
311 print get_default_css(args.get('server', config.DEFAULT_SERVER), args.get('pdftype', 'book'))
314 @output_and_exit
315 def mode_form(args):
316 f = open(FORM_TEMPLATE)
317 template = f.read()
318 f.close()
319 f = open(config.FONT_LIST_INCLUDE)
320 font_list = [x.strip() for x in f if x.strip()]
321 f.close()
322 server = args.get('server', config.DEFAULT_SERVER)
323 book = args.get('book')
324 size = args.get('booksize', config.DEFAULT_SIZE)
325 engine = args.get('engine', config.DEFAULT_ENGINE)
326 d = {
327 'server_options': optionise(get_server_list(), default=server),
328 'book_options': optionise(get_book_list(server), default=book),
329 'size_options': optionise(get_size_list(), default=size),
330 'engines': optionise(config.ENGINES.keys(), default=engine),
331 'pdf_types': optionise(sorted(k for k, v in config.CGI_MODES.iteritems() if v[0])),
332 'css': get_default_css(server),
333 'font_links': listify(font_links()),
334 'font_list': listify(font_list),
335 'default_license' : config.DEFAULT_LICENSE,
336 'licenses' : optionise(config.LICENSES, default=config.DEFAULT_LICENSE),
337 'yes': 'yes',
338 None: '',
341 form = []
342 for id, title, type, source, classes, epilogue in config.FORM_INPUTS:
343 val = d.get(source, '')
344 e = config.FORM_ELEMENT_TYPES[type] % locals()
345 form.append('\n<div id="%(id)s_div" class="form-item %(classes)s">\n'
346 '<div class="input_title">%(title)s</div>\n'
347 '<div class="input_contents"> %(e)s %(epilogue)s\n</div>'
348 '</div>\n' % locals())
350 if True:
351 _valid_inputs = set(ARG_VALIDATORS)
352 _form_inputs = set(x[0] for x in config.FORM_INPUTS if x[2] != 'ul')
353 log("valid but not used inputs: %s" % (_valid_inputs - _form_inputs))
354 log("invalid form inputs: %s" % (_form_inputs - _valid_inputs))
356 print template % {'form': ''.join(form)}
359 @output_and_exit
360 def mode_book(args):
361 # so we're making a pdf.
362 mode = args.get('mode', 'book')
363 bookid = args.get('book')
364 server = args.get('server', config.DEFAULT_SERVER)
365 engine = args.get('engine', config.DEFAULT_ENGINE)
366 page_settings = get_page_settings(args)
367 bookname = make_book_name(bookid, server)
368 progress_bar = make_progress_page(bookid, bookname, mode)
370 with Book(bookid, server, bookname, page_settings=page_settings, engine=engine,
371 watcher=progress_bar, isbn=args.get('isbn'),
372 license=args.get('license')) as book:
373 if CGI_CONTEXT:
374 book.spawn_x()
375 book.load()
376 book.set_title(args.get('title'))
377 book.add_css(args.get('css'), mode)
378 book.add_section_titles()
380 if mode == 'book':
381 book.make_book_pdf()
382 elif mode in ('web', 'newspaper'):
383 book.make_simple_pdf(mode)
384 if "rotate" in args:
385 book.rotate180()
387 book.publish_pdf()
388 book.notify_watcher('finished')
390 #These ones are similar enought to be handled by the one function
391 mode_newspaper = mode_book
392 mode_web = mode_book
395 @output_and_exit
396 def mode_openoffice(args):
397 """Make an openoffice document. A whole lot of the inputs have no
398 effect."""
399 bookid = args.get('book')
400 server = args.get('server', config.DEFAULT_SERVER)
401 #page_settings = get_page_settings(args)
402 bookname = make_book_name(bookid, server, '.odt')
403 progress_bar = make_progress_page(bookid, bookname, 'openoffice')
405 with Book(bookid, server, bookname,
406 watcher=progress_bar, isbn=args.get('isbn'),
407 license=args.get('license')) as book:
408 if CGI_CONTEXT:
409 book.spawn_x()
410 book.load()
411 book.set_title(args.get('title'))
412 book.add_css(args.get('css'), 'openoffice')
413 book.add_section_titles()
414 book.make_oo_doc()
415 book.notify_watcher('finished')
418 def main():
419 args = parse_args()
420 mode = args.get('mode')
421 if mode is None and 'book' in args:
422 mode = 'book'
424 global CGI_CONTEXT
425 CGI_CONTEXT = 'SERVER_NAME' in os.environ or args.get('cgi-context', 'no').lower() in '1true'
427 if not args and not CGI_CONTEXT:
428 print __doc__
429 sys.exit()
431 output_function = globals().get('mode_%s' % mode, mode_form)
432 output_function(args)
434 if __name__ == '__main__':
435 if config.CGITB_DOMAINS and os.environ.get('REMOTE_ADDR') in config.CGITB_DOMAINS:
436 import cgitb
437 cgitb.enable()
438 main()