3 # Part of Objavi2, which turns html manuals into books
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Make a pdf from the specified book."""
22 from __future__
import with_statement
28 from pprint
import pformat
30 from objavi
.fmbook
import Book
, HTTP_HOST
, find_archive_urls
31 from objavi
import config
32 from objavi
import twiki_wrapper
33 from objavi
.book_utils
import init_log
, log
, make_book_name
34 from objavi
.cgi_utils
import parse_args
, optionise
, listify
, get_server_list
35 from objavi
.cgi_utils
import is_utf8
, isfloat
, isfloat_or_auto
, is_isbn
, is_url
36 from objavi
.cgi_utils
import output_blob_and_exit
, output_blob_and_shut_up
, output_and_exit
37 from objavi
.cgi_utils
import get_size_list
, get_default_css
, font_links
40 # ARG_VALIDATORS is a mapping between the expected cgi arguments and
41 # functions to validate their values. (None means no validation).
43 "book": re
.compile(r
'^([\w-]+/?)*[\w-]+$').match
, # can be: BlahBlah/Blah_Blah
44 "css": is_utf8
, # an url, empty (for default), or css content
45 "title": lambda x
: len(x
) < 999 and is_utf8(x
),
47 "license": config
.LICENSES
.__contains
__,
48 "server": config
.SERVER_DEFAULTS
.__contains
__,
49 "engine": config
.ENGINES
.__contains
__,
50 "booksize": config
.PAGE_SIZE_DATA
.__contains
__,
51 "page_width": isfloat
,
52 "page_height": isfloat
,
53 "gutter": isfloat_or_auto
,
54 "top_margin": isfloat_or_auto
,
55 "side_margin": isfloat_or_auto
,
56 "bottom_margin": isfloat_or_auto
,
57 "columns": isfloat_or_auto
,
58 "column_margin": isfloat_or_auto
,
59 "cgi-context": lambda x
: x
.lower() in '1true0false',
60 "mode": config
.CGI_MODES
.__contains
__,
61 "pdftype": lambda x
: config
.CGI_MODES
.get(x
, [False])[0], #for css mode
62 "rotate": u
"yes".__eq
__,
63 "grey_scale": u
"yes".__eq
__,
64 "destination": config
.CGI_DESTINATIONS
.__contains
__,
65 "toc_header": is_utf8
,
67 "method": config
.CGI_METHODS
.__contains
__,
71 __doc__
+= '\nValid arguments are: %s.\n' % ', '.join(ARG_VALIDATORS
.keys())
74 def get_page_settings(args
):
75 """Find the size and any optional layout settings.
77 args['booksize'] is either a keyword describing a size or
78 'custom'. If it is custom, the form is inspected for specific
79 dimensions -- otherwise these are ignored.
81 The margins, gutter, number of columns, and column
82 margins all set themselves automatically based on the page
83 dimensions, but they can be overridden. Any that are are
85 # get all the values including sizes first
86 # the sizes are found as 'page_width' and 'page_height',
87 # but the Book class expects them as a 'pointsize' tuple, so
88 # they are easily ignored.
90 for k
, extrema
in config
.PAGE_EXTREMA
.iteritems():
92 v
= float(args
.get(k
))
93 except (ValueError, TypeError):
94 #log("don't like %r as a float value for %s!" % (args.get(k), k))
96 min_val
, max_val
, multiplier
= extrema
97 if v
< min_val
or v
> max_val
:
98 log('rejecting %s: outside %s' % (v
,) + extrema
)
100 log('found %s=%s' % (k
, v
))
101 settings
[k
] = v
* multiplier
#convert to points in many cases
103 # now if args['size'] is not 'custom', the width and height found
105 size
= args
.get('booksize', config
.DEFAULT_SIZE
)
106 settings
.update(config
.PAGE_SIZE_DATA
[size
])
108 #if args['mode'] is 'newspaper', then the number of columns is
109 #automatically determined unless set -- otherwise default is 1.
110 if args
.get('mode') == 'newspaper' and settings
.get('columns') is None:
111 settings
['columns'] = 'auto'
113 if args
.get('grey_scale'):
114 settings
['grey_scale'] = True
117 #will raise KeyError if width, height aren't set
118 settings
['pointsize'] = (settings
['page_width'], settings
['page_height'])
119 del settings
['page_width']
120 del settings
['page_height']
122 settings
['engine'] = args
.get('engine', config
.DEFAULT_ENGINE
)
126 def mode_booklist(args
):
127 #XXX need to include booki servers
128 return optionise(twiki_wrapper
.get_book_list(args
.get('server', config
.DEFAULT_SERVER
)),
129 default
=args
.get('book'))
133 #XX sending as text/html, but it doesn't really matter
134 return get_default_css(args
.get('server', config
.DEFAULT_SERVER
), args
.get('pdftype', 'book'))
139 f
= open(config
.FORM_TEMPLATE
)
142 f
= open(config
.FONT_LIST_INCLUDE
)
143 font_list
= [x
.strip() for x
in f
if x
.strip()]
145 server
= args
.get('server', config
.DEFAULT_SERVER
)
146 book
= args
.get('book')
147 size
= args
.get('booksize', config
.DEFAULT_SIZE
)
148 engine
= args
.get('engine', config
.DEFAULT_ENGINE
)
150 'server_options': optionise(get_server_list(), default
=server
),
151 'book_options': optionise(twiki_wrapper
.get_book_list(server
), default
=book
),
152 'size_options': optionise(get_size_list(), default
=size
),
153 'engines': optionise(config
.ENGINES
.keys(), default
=engine
),
154 'pdf_types': optionise(sorted(k
for k
, v
in config
.CGI_MODES
.iteritems() if v
[0])),
155 'css': get_default_css(server
),
156 'font_links': listify(font_links()),
157 'font_list': listify(font_list
),
158 'default_license' : config
.DEFAULT_LICENSE
,
159 'licenses' : optionise(config
.LICENSES
, default
=config
.DEFAULT_LICENSE
),
165 for id, title
, type, source
, classes
, epilogue
in config
.FORM_INPUTS
:
166 val
= d
.get(source
, '')
167 e
= config
.FORM_ELEMENT_TYPES
[type] % locals()
168 form
.append('\n<div id="%(id)s_div" class="form-item %(classes)s">\n'
169 '<div class="input_title">%(title)s</div>\n'
170 '<div class="input_contents"> %(e)s %(epilogue)s\n</div>'
171 '</div>\n' % locals())
174 _valid_inputs
= set(ARG_VALIDATORS
)
175 _form_inputs
= set(x
[0] for x
in config
.FORM_INPUTS
if x
[2] != 'ul')
176 log("valid but not used inputs: %s" % (_valid_inputs
- _form_inputs
))
177 log("invalid form inputs: %s" % (_form_inputs
- _valid_inputs
))
179 return template
% {'form': ''.join(form
)}
183 class Context(object):
184 """Work out what to show the caller. The method/destination matrix:
186 [dest/method] sync async poll
187 archive.org url id id
192 'html 1' is dripfed progress reports; 'html 2' polls via
193 javascript. 'id' is the book filename. 'url' is a full url
194 locating the file on archive.org or the objavi server. '.' means
199 def __init__(self
, args
):
200 self
.bookid
= args
.get('book')
201 self
.server
= args
.get('server', config
.DEFAULT_SERVER
)
202 self
.mode
= args
.get('mode', 'book')
203 extension
= config
.CGI_MODES
.get(self
.mode
)[1]
204 self
.bookname
= make_book_name(self
.bookid
, self
.server
, extension
)
205 self
.destination
= args
.get('destination', config
.DEFAULT_CGI_DESTINATION
)
206 self
.callback
= args
.get('callback', None)
207 self
.method
= args
.get('method', config
.CGI_DESTINATIONS
[self
.destination
]['default'])
208 self
.template
, self
.mimetype
= config
.CGI_DESTINATIONS
[self
.destination
][self
.method
]
210 self
.bookurl
= "http://%s/books/%s" % (HTTP_HOST
, self
.bookname
,)
212 self
.bookurl
= "books/%s" % (self
.bookname
,)
214 self
.details_url
, self
.s3url
= find_archive_urls(self
.bookid
, self
.bookname
)
218 """Begin (and in many cases, finish) http output.
220 In asynchronous modes, fork and close down stdout.
222 log(self
.template
, self
.mimetype
, self
.destination
, self
.method
)
223 if self
.template
is not None:
224 progress_list
= ''.join('<li id="%s">%s</li>\n' % x
[:2] for x
in config
.PROGRESS_POINTS
225 if self
.mode
in x
[2])
228 'bookname': self
.bookname
,
229 'progress_list': progress_list
,
230 'details_url': self
.details_url
,
232 'bookurl': self
.bookurl
,
234 f
= open(self
.template
)
235 content
= f
.read() % d
240 if self
.method
== 'sync':
241 print 'Content-type: %s\n\n%s' %(self
.mimetype
, content
)
243 output_blob_and_shut_up(content
, self
.mimetype
)
244 log(sys
.stdout
, sys
.stderr
, sys
.stdin
)
249 log(sys
.stdout
, sys
.stderr
, sys
.stdin
)
252 def finish(self
, book
):
253 """Print any final http content."""
254 if self
.destination
== 'archive.org':
256 elif self
.destination
== 'download' and self
.method
== 'sync':
257 f
= open(book
.publish_file
)
260 output_blob_and_exit(data
, config
.CGI_MODES
[self
.mode
][2], self
.bookname
)
263 def log_notifier(self
, message
):
264 """Send messages to the log only."""
265 log('******* got message "%s"' %message
)
267 def callback_notifier(self
, message
):
268 """Call the callback url with each message."""
269 log('in callback_notifier')
272 log('child %s is doing callback with message %r' % (pid
, message
, ))
274 from urllib2
import urlopen
, URLError
275 from urllib
import urlencode
276 data
= urlencode({'message': message
})
278 f
= urlopen(self
.callback
, data
)
282 #traceback.print_exc()
283 log("ERROR in callback:\n %r\n %s %s" % (e
.url
, e
.code
, e
.msg
))
286 def javascript_notifier(self
, message
):
287 """Print little bits of javascript which will be appended to
288 an unfinished html page."""
290 if message
.startswith('ERROR:'):
291 log('got an error! %r' % message
)
292 print ('<b class="error-message">'
294 '</b></body></html>' % message
297 print ('<script type="text/javascript">\n'
298 'objavi_show_progress("%s");\n'
299 '</script>' % message
301 if message
== config
.FINISHED_MESSAGE
:
302 print '</body></html>'
304 except ValueError, e
:
305 log("failed to send message %r, got exception %r" % (message
, e
))
307 def pollee_notifier(self
, message
):
308 """Append the message to a file that the remote server can poll"""
309 if self
.pollfile
is None or self
.pollfile
.closed
:
310 self
.pollfile
= open(config
.POLL_NOTIFY_PATH
% self
.bookname
, 'a')
311 self
.pollfile
.write('%s\n' % message
)
312 self
.pollfile
.flush()
313 #self.pollfile.close()
314 #if message == config.FINISHED_MESSAGE:
315 # self.pollfile.close()
317 def get_watchers(self
):
318 """Based on the CGI arguments, return a likely set of notifier
320 log('in get_watchers. method %r, callback %r, destination %r' %
321 (self
.method
, self
.callback
, self
.destination
))
323 if self
.method
== 'poll':
324 watchers
.add(self
.pollee_notifier
)
325 if self
.method
== 'async' and self
.callback
:
326 watchers
.add(self
.callback_notifier
)
327 if self
.method
== 'sync' and self
.destination
== 'html':
328 watchers
.add(self
.javascript_notifier
)
329 watchers
.add(self
.log_notifier
)
330 log('watchers are %s' % watchers
)
337 # so we're making a pdf.
338 context
= Context(args
)
339 page_settings
= get_page_settings(args
)
341 with
Book(context
.bookid
, context
.server
, context
.bookname
,
342 page_settings
=page_settings
,
343 watchers
=context
.get_watchers(), isbn
=args
.get('isbn'),
344 license
=args
.get('license'), title
=args
.get('title'),
345 max_age
=float(args
.get('max-age', -1))) as book
:
349 if 'toc_header' in args
:
350 book
.toc_header
= args
['toc_header'].decode('utf-8')
352 book
.add_css(args
.get('css'), context
.mode
)
353 book
.add_section_titles()
355 if context
.mode
== 'book':
357 elif context
.mode
in ('web', 'newspaper'):
358 book
.make_simple_pdf(context
.mode
)
365 #These ones are similar enough to be handled by the one function
366 mode_newspaper
= mode_book
370 def mode_openoffice(args
):
371 """Make an openoffice document. A whole lot of the inputs have no
373 context
= Context(args
)
374 with
Book(context
.bookid
, context
.server
, context
.bookname
,
375 watchers
=context
.get_watchers(), isbn
=args
.get('isbn'),
376 license
=args
.get('license'), title
=args
.get('title'),
377 max_age
=float(args
.get('max-age', -1))) as book
:
381 book
.add_css(args
.get('css'), 'openoffice')
382 book
.add_section_titles()
387 log('making epub with\n%s' % pformat(args
))
388 #XXX need to catch and process lack of necessary arguments.
389 context
= Context(args
)
391 with
Book(context
.bookid
, context
.server
, context
.bookname
,
392 watchers
=context
.get_watchers(), title
=args
.get('title'),
393 max_age
=float(args
.get('max-age', -1))) as book
:
395 book
.make_epub(use_cache
=config
.USE_CACHED_IMAGES
)
399 def mode_bookizip(args
):
400 log('making bookizip with\n%s' % pformat(args
))
401 context
= Context(args
)
403 with
Book(context
.bookid
, context
.server
, context
.bookname
,
404 watchers
=context
.get_watchers(), title
=args
.get('title'),
405 max_age
=float(args
.get('max-age', -1))) as book
:
406 book
.publish_bookizip()
411 args
= parse_args(ARG_VALIDATORS
)
412 mode
= args
.get('mode')
413 if mode
is None and 'book' in args
:
417 CGI_CONTEXT
= 'SERVER_NAME' in os
.environ
or args
.get('cgi-context', 'no').lower() in '1true'
419 if not args
and not CGI_CONTEXT
:
423 output_function
= globals().get('mode_%s' % mode
, mode_form
)
424 output_function(args
)
426 if __name__
== '__main__':
427 if config
.CGITB_DOMAINS
and os
.environ
.get('REMOTE_ADDR') in config
.CGITB_DOMAINS
: