3 # Part of Objavi2, which turns html manuals into books
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Make a pdf from the specified book."""
22 from __future__
import with_statement
26 sys
.path
.insert(0, os
.path
.abspath('.'))
30 from pprint
import pformat
32 from objavi
.fmbook
import Book
, HTTP_HOST
, find_archive_urls
33 from objavi
import config
34 from objavi
import twiki_wrapper
35 from objavi
.book_utils
import init_log
, log
, make_book_name
36 from objavi
.cgi_utils
import parse_args
, optionise
, listify
, get_server_list
37 from objavi
.cgi_utils
import is_utf8
, isfloat
, isfloat_or_auto
, is_isbn
, is_url
38 from objavi
.cgi_utils
import output_blob_and_exit
, output_blob_and_shut_up
, output_and_exit
39 from objavi
.cgi_utils
import get_size_list
, get_default_css
, font_links
42 # ARG_VALIDATORS is a mapping between the expected cgi arguments and
43 # functions to validate their values. (None means no validation).
45 "book": re
.compile(r
'^([\w-]+/?)*[\w-]+$').match
, # can be: BlahBlah/Blah_Blah
46 "css": is_utf8
, # an url, empty (for default), or css content
47 "title": lambda x
: len(x
) < 999 and is_utf8(x
),
49 "license": config
.LICENSES
.__contains
__,
50 "server": config
.SERVER_DEFAULTS
.__contains
__,
51 "engine": config
.ENGINES
.__contains
__,
52 "booksize": config
.PAGE_SIZE_DATA
.__contains
__,
53 "page_width": isfloat
,
54 "page_height": isfloat
,
55 "gutter": isfloat_or_auto
,
56 "top_margin": isfloat_or_auto
,
57 "side_margin": isfloat_or_auto
,
58 "bottom_margin": isfloat_or_auto
,
59 "columns": isfloat_or_auto
,
60 "column_margin": isfloat_or_auto
,
61 "cgi-context": lambda x
: x
.lower() in '1true0false',
62 "mode": config
.CGI_MODES
.__contains
__,
63 "pdftype": lambda x
: config
.CGI_MODES
.get(x
, [False])[0], #for css mode
64 "rotate": u
"yes".__eq
__,
65 "grey_scale": u
"yes".__eq
__,
66 "destination": config
.CGI_DESTINATIONS
.__contains
__,
67 "toc_header": is_utf8
,
69 "method": config
.CGI_METHODS
.__contains
__,
73 __doc__
+= '\nValid arguments are: %s.\n' % ', '.join(ARG_VALIDATORS
.keys())
76 def get_page_settings(args
):
77 """Find the size and any optional layout settings.
79 args['booksize'] is either a keyword describing a size or
80 'custom'. If it is custom, the form is inspected for specific
81 dimensions -- otherwise these are ignored.
83 The margins, gutter, number of columns, and column
84 margins all set themselves automatically based on the page
85 dimensions, but they can be overridden. Any that are are
87 # get all the values including sizes first
88 # the sizes are found as 'page_width' and 'page_height',
89 # but the Book class expects them as a 'pointsize' tuple, so
90 # they are easily ignored.
92 for k
, extrema
in config
.PAGE_EXTREMA
.iteritems():
94 v
= float(args
.get(k
))
95 except (ValueError, TypeError):
96 #log("don't like %r as a float value for %s!" % (args.get(k), k))
98 min_val
, max_val
, multiplier
= extrema
99 if v
< min_val
or v
> max_val
:
100 log('rejecting %s: outside %s' % (v
,) + extrema
)
102 log('found %s=%s' % (k
, v
))
103 settings
[k
] = v
* multiplier
#convert to points in many cases
105 # now if args['size'] is not 'custom', the width and height found
107 size
= args
.get('booksize', config
.DEFAULT_SIZE
)
108 settings
.update(config
.PAGE_SIZE_DATA
[size
])
110 #if args['mode'] is 'newspaper', then the number of columns is
111 #automatically determined unless set -- otherwise default is 1.
112 if args
.get('mode') == 'newspaper' and settings
.get('columns') is None:
113 settings
['columns'] = 'auto'
115 if args
.get('grey_scale'):
116 settings
['grey_scale'] = True
119 #will raise KeyError if width, height aren't set
120 settings
['pointsize'] = (settings
['page_width'], settings
['page_height'])
121 del settings
['page_width']
122 del settings
['page_height']
124 settings
['engine'] = args
.get('engine', config
.DEFAULT_ENGINE
)
128 def mode_booklist(args
):
129 #XXX need to include booki servers
130 return optionise(twiki_wrapper
.get_book_list(args
.get('server', config
.DEFAULT_SERVER
)),
131 default
=args
.get('book'))
135 #XX sending as text/html, but it doesn't really matter
136 return get_default_css(args
.get('server', config
.DEFAULT_SERVER
), args
.get('pdftype', 'book'))
141 f
= open(config
.FORM_TEMPLATE
)
144 f
= open(config
.FONT_LIST_INCLUDE
)
145 font_list
= [x
.strip() for x
in f
if x
.strip()]
147 server
= args
.get('server', config
.DEFAULT_SERVER
)
148 book
= args
.get('book')
149 size
= args
.get('booksize', config
.DEFAULT_SIZE
)
150 engine
= args
.get('engine', config
.DEFAULT_ENGINE
)
152 'server_options': optionise(get_server_list(), default
=server
),
153 'book_options': optionise(twiki_wrapper
.get_book_list(server
), default
=book
),
154 'size_options': optionise(get_size_list(), default
=size
),
155 'engines': optionise(config
.ENGINES
.keys(), default
=engine
),
156 'pdf_types': optionise(sorted(k
for k
, v
in config
.CGI_MODES
.iteritems() if v
[0])),
157 'css': get_default_css(server
),
158 'font_links': listify(font_links()),
159 'font_list': listify(font_list
),
160 'default_license' : config
.DEFAULT_LICENSE
,
161 'licenses' : optionise(config
.LICENSES
, default
=config
.DEFAULT_LICENSE
),
167 for id, title
, type, source
, classes
, epilogue
in config
.FORM_INPUTS
:
168 val
= d
.get(source
, '')
169 e
= config
.FORM_ELEMENT_TYPES
[type] % locals()
170 form
.append('\n<div id="%(id)s_div" class="form-item %(classes)s">\n'
171 '<div class="input_title">%(title)s</div>\n'
172 '<div class="input_contents"> %(e)s %(epilogue)s\n</div>'
173 '</div>\n' % locals())
176 _valid_inputs
= set(ARG_VALIDATORS
)
177 _form_inputs
= set(x
[0] for x
in config
.FORM_INPUTS
if x
[2] != 'ul')
178 log("valid but not used inputs: %s" % (_valid_inputs
- _form_inputs
))
179 log("invalid form inputs: %s" % (_form_inputs
- _valid_inputs
))
181 return template
% {'form': ''.join(form
)}
185 class Context(object):
186 """Work out what to show the caller. The method/destination matrix:
188 [dest/method] sync async poll
189 archive.org url id id
194 'html 1' is dripfed progress reports; 'html 2' polls via
195 javascript. 'id' is the book filename. 'url' is a full url
196 locating the file on archive.org or the objavi server. '.' means
201 def __init__(self
, args
):
202 self
.bookid
= args
.get('book')
203 self
.server
= args
.get('server', config
.DEFAULT_SERVER
)
204 self
.mode
= args
.get('mode', 'book')
205 extension
= config
.CGI_MODES
.get(self
.mode
)[1]
206 self
.bookname
= make_book_name(self
.bookid
, self
.server
, extension
)
207 self
.destination
= args
.get('destination', config
.DEFAULT_CGI_DESTINATION
)
208 self
.callback
= args
.get('callback', None)
209 self
.method
= args
.get('method', config
.CGI_DESTINATIONS
[self
.destination
]['default'])
210 self
.template
, self
.mimetype
= config
.CGI_DESTINATIONS
[self
.destination
][self
.method
]
212 self
.bookurl
= "http://%s/books/%s" % (HTTP_HOST
, self
.bookname
,)
214 self
.bookurl
= "books/%s" % (self
.bookname
,)
216 self
.details_url
, self
.s3url
= find_archive_urls(self
.bookid
, self
.bookname
)
220 """Begin (and in many cases, finish) http output.
222 In asynchronous modes, fork and close down stdout.
224 log(self
.template
, self
.mimetype
, self
.destination
, self
.method
)
225 if self
.template
is not None:
226 progress_list
= ''.join('<li id="%s">%s</li>\n' % x
[:2] for x
in config
.PROGRESS_POINTS
227 if self
.mode
in x
[2])
230 'bookname': self
.bookname
,
231 'progress_list': progress_list
,
232 'details_url': self
.details_url
,
234 'bookurl': self
.bookurl
,
236 f
= open(self
.template
)
237 content
= f
.read() % d
242 if self
.method
== 'sync':
243 print 'Content-type: %s\n\n%s' %(self
.mimetype
, content
)
245 output_blob_and_shut_up(content
, self
.mimetype
)
246 log(sys
.stdout
, sys
.stderr
, sys
.stdin
)
251 log(sys
.stdout
, sys
.stderr
, sys
.stdin
)
254 def finish(self
, book
):
255 """Print any final http content."""
256 if self
.destination
== 'archive.org':
258 elif self
.destination
== 'download' and self
.method
== 'sync':
259 f
= open(book
.publish_file
)
262 output_blob_and_exit(data
, config
.CGI_MODES
[self
.mode
][2], self
.bookname
)
265 def log_notifier(self
, message
):
266 """Send messages to the log only."""
267 log('******* got message "%s"' %message
)
269 def callback_notifier(self
, message
):
270 """Call the callback url with each message."""
271 log('in callback_notifier')
274 log('child %s is doing callback with message %r' % (pid
, message
, ))
276 from urllib2
import urlopen
, URLError
277 from urllib
import urlencode
278 data
= urlencode({'message': message
})
280 f
= urlopen(self
.callback
, data
)
284 #traceback.print_exc()
285 log("ERROR in callback:\n %r\n %s %s" % (e
.url
, e
.code
, e
.msg
))
288 def javascript_notifier(self
, message
):
289 """Print little bits of javascript which will be appended to
290 an unfinished html page."""
292 if message
.startswith('ERROR:'):
293 log('got an error! %r' % message
)
294 print ('<b class="error-message">'
296 '</b></body></html>' % message
299 print ('<script type="text/javascript">\n'
300 'objavi_show_progress("%s");\n'
301 '</script>' % message
303 if message
== config
.FINISHED_MESSAGE
:
304 print '</body></html>'
306 except ValueError, e
:
307 log("failed to send message %r, got exception %r" % (message
, e
))
309 def pollee_notifier(self
, message
):
310 """Append the message to a file that the remote server can poll"""
311 if self
.pollfile
is None or self
.pollfile
.closed
:
312 self
.pollfile
= open(config
.POLL_NOTIFY_PATH
% self
.bookname
, 'a')
313 self
.pollfile
.write('%s\n' % message
)
314 self
.pollfile
.flush()
315 #self.pollfile.close()
316 #if message == config.FINISHED_MESSAGE:
317 # self.pollfile.close()
319 def get_watchers(self
):
320 """Based on the CGI arguments, return a likely set of notifier
322 log('in get_watchers. method %r, callback %r, destination %r' %
323 (self
.method
, self
.callback
, self
.destination
))
325 if self
.method
== 'poll':
326 watchers
.add(self
.pollee_notifier
)
327 if self
.method
== 'async' and self
.callback
:
328 watchers
.add(self
.callback_notifier
)
329 if self
.method
== 'sync' and self
.destination
== 'html':
330 watchers
.add(self
.javascript_notifier
)
331 watchers
.add(self
.log_notifier
)
332 log('watchers are %s' % watchers
)
339 # so we're making a pdf.
340 context
= Context(args
)
341 page_settings
= get_page_settings(args
)
343 with
Book(context
.bookid
, context
.server
, context
.bookname
,
344 page_settings
=page_settings
,
345 watchers
=context
.get_watchers(), isbn
=args
.get('isbn'),
346 license
=args
.get('license'), title
=args
.get('title'),
347 max_age
=float(args
.get('max-age', -1))) as book
:
351 if 'toc_header' in args
:
352 book
.toc_header
= args
['toc_header'].decode('utf-8')
354 book
.add_css(args
.get('css'), context
.mode
)
355 book
.add_section_titles()
357 if context
.mode
== 'book':
359 elif context
.mode
in ('web', 'newspaper'):
360 book
.make_simple_pdf(context
.mode
)
367 #These ones are similar enough to be handled by the one function
368 mode_newspaper
= mode_book
372 def mode_openoffice(args
):
373 """Make an openoffice document. A whole lot of the inputs have no
375 context
= Context(args
)
376 with
Book(context
.bookid
, context
.server
, context
.bookname
,
377 watchers
=context
.get_watchers(), isbn
=args
.get('isbn'),
378 license
=args
.get('license'), title
=args
.get('title'),
379 max_age
=float(args
.get('max-age', -1))) as book
:
383 book
.add_css(args
.get('css'), 'openoffice')
384 book
.add_section_titles()
389 log('making epub with\n%s' % pformat(args
))
390 #XXX need to catch and process lack of necessary arguments.
391 context
= Context(args
)
393 with
Book(context
.bookid
, context
.server
, context
.bookname
,
394 watchers
=context
.get_watchers(), title
=args
.get('title'),
395 max_age
=float(args
.get('max-age', -1))) as book
:
397 book
.make_epub(use_cache
=config
.USE_CACHED_IMAGES
)
401 def mode_bookizip(args
):
402 log('making bookizip with\n%s' % pformat(args
))
403 context
= Context(args
)
405 with
Book(context
.bookid
, context
.server
, context
.bookname
,
406 watchers
=context
.get_watchers(), title
=args
.get('title'),
407 max_age
=float(args
.get('max-age', -1))) as book
:
408 book
.publish_bookizip()
413 args
= parse_args(ARG_VALIDATORS
)
414 mode
= args
.get('mode')
415 if mode
is None and 'book' in args
:
419 CGI_CONTEXT
= 'SERVER_NAME' in os
.environ
or args
.get('cgi-context', 'no').lower() in '1true'
421 if not args
and not CGI_CONTEXT
:
425 output_function
= globals().get('mode_%s' % mode
, mode_form
)
426 output_function(args
)
428 if __name__
== '__main__':
429 if config
.CGITB_DOMAINS
and os
.environ
.get('REMOTE_ADDR') in config
.CGITB_DOMAINS
: