3 # Part of Objavi2, which turns html manuals into books
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Make a pdf from the specified book."""
22 from __future__
import with_statement
26 sys
.path
.insert(0, os
.path
.abspath('.'))
30 from pprint
import pformat
32 from objavi
.fmbook
import Book
, HTTP_HOST
, find_archive_urls
33 from objavi
import config
34 from objavi
import twiki_wrapper
35 from objavi
.book_utils
import init_log
, log
, make_book_name
36 from objavi
.cgi_utils
import parse_args
, optionise
, listify
, get_server_list
37 from objavi
.cgi_utils
import is_utf8
, isfloat
, isfloat_or_auto
, is_isbn
, is_url
38 from objavi
.cgi_utils
import output_blob_and_exit
, output_blob_and_shut_up
, output_and_exit
39 from objavi
.cgi_utils
import get_size_list
, get_default_css
, font_links
42 # ARG_VALIDATORS is a mapping between the expected cgi arguments and
43 # functions to validate their values. (None means no validation).
45 "book": re
.compile(r
'^([\w-]+/?)*[\w-]+$').match
, # can be: BlahBlah/Blah_Blah
46 "css": is_utf8
, # an url, empty (for default), or css content
47 "title": lambda x
: len(x
) < 999 and is_utf8(x
),
49 "license": config
.LICENSES
.__contains
__,
50 "server": config
.SERVER_DEFAULTS
.__contains
__,
51 "engine": config
.ENGINES
.__contains
__,
52 "booksize": config
.PAGE_SIZE_DATA
.__contains
__,
53 "page_width": isfloat
,
54 "page_height": isfloat
,
55 "gutter": isfloat_or_auto
,
56 "top_margin": isfloat_or_auto
,
57 "side_margin": isfloat_or_auto
,
58 "bottom_margin": isfloat_or_auto
,
59 "columns": isfloat_or_auto
,
60 "column_margin": isfloat_or_auto
,
61 "cgi-context": lambda x
: x
.lower() in '1true0false',
62 "mode": config
.CGI_MODES
.__contains
__,
63 "pdftype": lambda x
: config
.CGI_MODES
.get(x
, [False])[0], #for css mode
64 "rotate": u
"yes".__eq
__,
65 "grey_scale": u
"yes".__eq
__,
66 "destination": config
.CGI_DESTINATIONS
.__contains
__,
67 "toc_header": is_utf8
,
69 "method": config
.CGI_METHODS
.__contains
__,
71 "html_template": is_utf8
,
72 "booki-group": is_utf8
,
73 "booki-user": is_utf8
,
76 __doc__
+= '\nValid arguments are: %s.\n' % ', '.join(ARG_VALIDATORS
.keys())
79 def get_page_settings(args
):
80 """Find the size and any optional layout settings.
82 args['booksize'] is either a keyword describing a size or
83 'custom'. If it is custom, the form is inspected for specific
84 dimensions -- otherwise these are ignored.
86 The margins, gutter, number of columns, and column
87 margins all set themselves automatically based on the page
88 dimensions, but they can be overridden. Any that are are
90 # get all the values including sizes first
91 # the sizes are found as 'page_width' and 'page_height',
92 # but the Book class expects them as a 'pointsize' tuple, so
93 # they are easily ignored.
95 for k
, extrema
in config
.PAGE_EXTREMA
.iteritems():
97 v
= float(args
.get(k
))
98 except (ValueError, TypeError):
99 #log("don't like %r as a float value for %s!" % (args.get(k), k))
101 min_val
, max_val
, multiplier
= extrema
102 if v
< min_val
or v
> max_val
:
103 log('rejecting %s: outside %s' % (v
,) + extrema
)
105 log('found %s=%s' % (k
, v
))
106 settings
[k
] = v
* multiplier
#convert to points in many cases
108 # now if args['size'] is not 'custom', the width and height found
110 size
= args
.get('booksize', config
.DEFAULT_SIZE
)
111 settings
.update(config
.PAGE_SIZE_DATA
[size
])
113 #if args['mode'] is 'newspaper', then the number of columns is
114 #automatically determined unless set -- otherwise default is 1.
115 if args
.get('mode') == 'newspaper' and settings
.get('columns') is None:
116 settings
['columns'] = 'auto'
118 if args
.get('grey_scale'):
119 settings
['grey_scale'] = True
122 #will raise KeyError if width, height aren't set
123 settings
['pointsize'] = (settings
['page_width'], settings
['page_height'])
124 del settings
['page_width']
125 del settings
['page_height']
127 settings
['engine'] = args
.get('engine', config
.DEFAULT_ENGINE
)
131 def mode_booklist(args
):
132 #XXX need to include booki servers
133 return optionise(twiki_wrapper
.get_book_list(args
.get('server', config
.DEFAULT_SERVER
)),
134 default
=args
.get('book'))
138 #XX sending as text/html, but it doesn't really matter
139 return get_default_css(args
.get('server', config
.DEFAULT_SERVER
), args
.get('pdftype', 'book'))
144 f
= open(config
.FORM_TEMPLATE
)
147 f
= open(config
.FONT_LIST_INCLUDE
)
148 font_list
= [x
.strip() for x
in f
if x
.strip()]
150 server
= args
.get('server', config
.DEFAULT_SERVER
)
151 book
= args
.get('book')
152 size
= args
.get('booksize', config
.DEFAULT_SIZE
)
153 engine
= args
.get('engine', config
.DEFAULT_ENGINE
)
155 'server_options': optionise(get_server_list(), default
=server
),
156 'book_options': optionise(twiki_wrapper
.get_book_list(server
), default
=book
),
157 'size_options': optionise(get_size_list(), default
=size
),
158 'engines': optionise(config
.ENGINES
.keys(), default
=engine
),
159 'pdf_types': optionise(sorted(k
for k
, v
in config
.CGI_MODES
.iteritems() if v
[0])),
160 'css': get_default_css(server
),
161 'font_links': listify(font_links()),
162 'font_list': listify(font_list
),
163 'default_license' : config
.DEFAULT_LICENSE
,
164 'licenses' : optionise(config
.LICENSES
, default
=config
.DEFAULT_LICENSE
),
170 for id, title
, type, source
, classes
, epilogue
in config
.FORM_INPUTS
:
171 val
= d
.get(source
, '')
172 e
= config
.FORM_ELEMENT_TYPES
[type] % locals()
173 form
.append('\n<div id="%(id)s_div" class="form-item %(classes)s">\n'
174 '<div class="input_title">%(title)s</div>\n'
175 '<div class="input_contents"> %(e)s %(epilogue)s\n</div>'
176 '</div>\n' % locals())
179 _valid_inputs
= set(ARG_VALIDATORS
)
180 _form_inputs
= set(x
[0] for x
in config
.FORM_INPUTS
if x
[2] != 'ul')
181 log("valid but not used inputs: %s" % (_valid_inputs
- _form_inputs
))
182 log("invalid form inputs: %s" % (_form_inputs
- _valid_inputs
))
184 return template
% {'form': ''.join(form
)}
188 class Context(object):
189 """Work out what to show the caller. The method/destination matrix:
191 [dest/method] sync async poll
192 archive.org url id id
197 'html 1' is dripfed progress reports; 'html 2' polls via
198 javascript. 'id' is the book filename. 'url' is a full url
199 locating the file on archive.org or the objavi server. '.' means
204 def __init__(self
, args
):
205 self
.bookid
= args
.get('book')
206 self
.server
= args
.get('server', config
.DEFAULT_SERVER
)
207 self
.mode
= args
.get('mode', 'book')
208 extension
= config
.CGI_MODES
.get(self
.mode
)[1]
209 self
.bookname
= make_book_name(self
.bookid
, self
.server
, extension
)
210 self
.destination
= args
.get('destination', config
.DEFAULT_CGI_DESTINATION
)
211 self
.callback
= args
.get('callback', None)
212 self
.method
= args
.get('method', config
.CGI_DESTINATIONS
[self
.destination
]['default'])
213 self
.template
, self
.mimetype
= config
.CGI_DESTINATIONS
[self
.destination
][self
.method
]
215 self
.bookurl
= "http://%s/books/%s" % (HTTP_HOST
, self
.bookname
,)
217 self
.bookurl
= "books/%s" % (self
.bookname
,)
219 self
.details_url
, self
.s3url
= find_archive_urls(self
.bookid
, self
.bookname
)
220 self
.booki_group
= args
.get('booki-group')
221 self
.booki_user
= args
.get('booki-user')
225 """Begin (and in many cases, finish) http output.
227 In asynchronous modes, fork and close down stdout.
229 log(self
.template
, self
.mimetype
, self
.destination
, self
.method
)
230 if self
.template
is not None:
231 progress_list
= ''.join('<li id="%s">%s</li>\n' % x
[:2] for x
in config
.PROGRESS_POINTS
232 if self
.mode
in x
[2])
235 'bookname': self
.bookname
,
236 'progress_list': progress_list
,
237 'details_url': self
.details_url
,
239 'bookurl': self
.bookurl
,
241 f
= open(self
.template
)
242 content
= f
.read() % d
247 if self
.method
== 'sync':
248 print 'Content-type: %s\n\n%s' %(self
.mimetype
, content
)
250 output_blob_and_shut_up(content
, self
.mimetype
)
251 log(sys
.stdout
, sys
.stderr
, sys
.stdin
)
256 log(sys
.stdout
, sys
.stderr
, sys
.stdin
)
259 def finish(self
, book
):
260 """Print any final http content."""
261 book
.publish_shared(self
.booki_group
, self
.booki_user
)
262 if self
.destination
== 'archive.org':
264 elif self
.destination
== 'download' and self
.method
== 'sync':
265 f
= open(book
.publish_file
)
268 output_blob_and_exit(data
, config
.CGI_MODES
[self
.mode
][2], self
.bookname
)
271 def log_notifier(self
, message
):
272 """Send messages to the log only."""
273 log('******* got message "%s"' %message
)
275 def callback_notifier(self
, message
):
276 """Call the callback url with each message."""
277 log('in callback_notifier')
280 log('child %s is doing callback with message %r' % (pid
, message
, ))
282 from urllib2
import urlopen
, URLError
283 from urllib
import urlencode
284 data
= urlencode({'message': message
})
286 f
= urlopen(self
.callback
, data
)
290 #traceback.print_exc()
291 log("ERROR in callback:\n %r\n %s %s" % (e
.url
, e
.code
, e
.msg
))
294 def javascript_notifier(self
, message
):
295 """Print little bits of javascript which will be appended to
296 an unfinished html page."""
298 if message
.startswith('ERROR:'):
299 log('got an error! %r' % message
)
300 print ('<b class="error-message">'
302 '</b></body></html>' % message
305 print ('<script type="text/javascript">\n'
306 'objavi_show_progress("%s");\n'
307 '</script>' % message
309 if message
== config
.FINISHED_MESSAGE
:
310 print '</body></html>'
312 except ValueError, e
:
313 log("failed to send message %r, got exception %r" % (message
, e
))
315 def pollee_notifier(self
, message
):
316 """Append the message to a file that the remote server can poll"""
317 if self
.pollfile
is None or self
.pollfile
.closed
:
318 self
.pollfile
= open(config
.POLL_NOTIFY_PATH
% self
.bookname
, 'a')
319 self
.pollfile
.write('%s\n' % message
)
320 self
.pollfile
.flush()
321 #self.pollfile.close()
322 #if message == config.FINISHED_MESSAGE:
323 # self.pollfile.close()
325 def get_watchers(self
):
326 """Based on the CGI arguments, return a likely set of notifier
328 log('in get_watchers. method %r, callback %r, destination %r' %
329 (self
.method
, self
.callback
, self
.destination
))
331 if self
.method
== 'poll':
332 watchers
.add(self
.pollee_notifier
)
333 if self
.method
== 'async' and self
.callback
:
334 watchers
.add(self
.callback_notifier
)
335 if self
.method
== 'sync' and self
.destination
== 'html':
336 watchers
.add(self
.javascript_notifier
)
337 watchers
.add(self
.log_notifier
)
338 log('watchers are %s' % watchers
)
342 # so we're making a pdf.
343 context
= Context(args
)
344 page_settings
= get_page_settings(args
)
346 with
Book(context
.bookid
, context
.server
, context
.bookname
,
347 page_settings
=page_settings
,
348 watchers
=context
.get_watchers(), isbn
=args
.get('isbn'),
349 license
=args
.get('license'), title
=args
.get('title'),
350 max_age
=float(args
.get('max-age', -1))) as book
:
354 if 'toc_header' in args
:
355 book
.toc_header
= args
['toc_header'].decode('utf-8')
357 book
.add_css(args
.get('css'), context
.mode
)
358 book
.add_section_titles()
360 if context
.mode
== 'book':
362 elif context
.mode
in ('web', 'newspaper'):
363 book
.make_simple_pdf(context
.mode
)
370 #These ones are similar enough to be handled by the one function
371 mode_newspaper
= mode_book
375 def mode_openoffice(args
):
376 """Make an openoffice document. A whole lot of the inputs have no
378 context
= Context(args
)
379 with
Book(context
.bookid
, context
.server
, context
.bookname
,
380 watchers
=context
.get_watchers(), isbn
=args
.get('isbn'),
381 license
=args
.get('license'), title
=args
.get('title'),
382 max_age
=float(args
.get('max-age', -1))) as book
:
386 book
.add_css(args
.get('css'), 'openoffice')
387 book
.add_section_titles()
392 log('making epub with\n%s' % pformat(args
))
393 #XXX need to catch and process lack of necessary arguments.
394 context
= Context(args
)
396 with
Book(context
.bookid
, context
.server
, context
.bookname
,
397 watchers
=context
.get_watchers(), title
=args
.get('title'),
398 max_age
=float(args
.get('max-age', -1))) as book
:
400 book
.make_epub(use_cache
=config
.USE_CACHED_IMAGES
)
404 def mode_bookizip(args
):
405 log('making bookizip with\n%s' % pformat(args
))
406 context
= Context(args
)
408 with
Book(context
.bookid
, context
.server
, context
.bookname
,
409 watchers
=context
.get_watchers(), title
=args
.get('title'),
410 max_age
=float(args
.get('max-age', -1))) as book
:
411 book
.publish_bookizip()
414 def mode_templated_html(args
):
415 log('making templated html with\n%s' % pformat(args
))
416 context
= Context(args
)
417 template
= args
.get('html_template')
419 with
Book(context
.bookid
, context
.server
, context
.bookname
,
420 watchers
=context
.get_watchers(), title
=args
.get('title'),
421 max_age
=float(args
.get('max-age', -1))) as book
:
423 book
.make_templated_html(template
=template
)
426 def mode_templated_html_zip(args
):
430 args
= parse_args(ARG_VALIDATORS
)
431 mode
= args
.get('mode')
432 if mode
is None and 'book' in args
:
436 CGI_CONTEXT
= 'SERVER_NAME' in os
.environ
or args
.get('cgi-context', 'no').lower() in '1true'
438 if not args
and not CGI_CONTEXT
:
442 output_function
= globals().get('mode_%s' % mode
, mode_form
)
443 output_function(args
)
445 if __name__
== '__main__':
446 if config
.CGITB_DOMAINS
and os
.environ
.get('REMOTE_ADDR') in config
.CGITB_DOMAINS
: