3 # Part of Objavi2, which turns html manuals into books
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Make a pdf from the specified book."""
22 from __future__
import with_statement
27 from pprint
import pformat
29 from objavi
.fmbook
import Book
, HTTP_HOST
, find_archive_urls
30 from objavi
import config
31 from objavi
import twiki_wrapper
32 from objavi
.book_utils
import init_log
, log
, make_book_name
33 from objavi
.cgi_utils
import parse_args
, optionise
, listify
, get_server_list
34 from objavi
.cgi_utils
import is_utf8
, isfloat
, isfloat_or_auto
, is_isbn
, is_url
35 from objavi
.cgi_utils
import output_blob_and_exit
, output_blob_and_shut_up
, output_and_exit
36 from objavi
.cgi_utils
import get_size_list
, get_default_css
, font_links
39 # ARG_VALIDATORS is a mapping between the expected cgi arguments and
40 # functions to validate their values. (None means no validation).
42 "book": re
.compile(r
'^([\w-]+/?)*[\w-]+$').match
, # can be: BlahBlah/Blah_Blah
43 "css": is_utf8
, # an url, empty (for default), or css content
44 "title": lambda x
: len(x
) < 999 and is_utf8(x
),
46 "license": config
.LICENSES
.__contains
__,
47 "server": config
.SERVER_DEFAULTS
.__contains
__,
48 "engine": config
.ENGINES
.__contains
__,
49 "booksize": config
.PAGE_SIZE_DATA
.__contains
__,
50 "page_width": isfloat
,
51 "page_height": isfloat
,
52 "gutter": isfloat_or_auto
,
53 "top_margin": isfloat_or_auto
,
54 "side_margin": isfloat_or_auto
,
55 "bottom_margin": isfloat_or_auto
,
56 "columns": isfloat_or_auto
,
57 "column_margin": isfloat_or_auto
,
58 "cgi-context": lambda x
: x
.lower() in '1true0false',
59 "mode": config
.CGI_MODES
.__contains
__,
60 "pdftype": lambda x
: config
.CGI_MODES
.get(x
, [False])[0], #for css mode
61 "rotate": u
"yes".__eq
__,
62 "grey_scale": u
"yes".__eq
__,
63 "destination": config
.CGI_DESTINATIONS
.__contains
__,
64 "toc_header": is_utf8
,
66 "method": config
.CGI_METHODS
.__contains
__,
70 __doc__
+= '\nValid arguments are: %s.\n' % ', '.join(ARG_VALIDATORS
.keys())
73 def get_page_settings(args
):
74 """Find the size and any optional layout settings.
76 args['booksize'] is either a keyword describing a size or
77 'custom'. If it is custom, the form is inspected for specific
78 dimensions -- otherwise these are ignored.
80 The margins, gutter, number of columns, and column
81 margins all set themselves automatically based on the page
82 dimensions, but they can be overridden. Any that are are
84 # get all the values including sizes first
85 # the sizes are found as 'page_width' and 'page_height',
86 # but the Book class expects them as a 'pointsize' tuple, so
87 # they are easily ignored.
89 for k
, extrema
in config
.PAGE_EXTREMA
.iteritems():
91 v
= float(args
.get(k
))
92 except (ValueError, TypeError):
93 #log("don't like %r as a float value for %s!" % (args.get(k), k))
95 min_val
, max_val
, multiplier
= extrema
96 if v
< min_val
or v
> max_val
:
97 log('rejecting %s: outside %s' % (v
,) + extrema
)
99 log('found %s=%s' % (k
, v
))
100 settings
[k
] = v
* multiplier
#convert to points in many cases
102 # now if args['size'] is not 'custom', the width and height found
104 size
= args
.get('booksize', config
.DEFAULT_SIZE
)
105 settings
.update(config
.PAGE_SIZE_DATA
[size
])
107 #if args['mode'] is 'newspaper', then the number of columns is
108 #automatically determined unless set -- otherwise default is 1.
109 if args
.get('mode') == 'newspaper' and settings
.get('columns') is None:
110 settings
['columns'] = 'auto'
112 if args
.get('grey_scale'):
113 settings
['grey_scale'] = True
116 #will raise KeyError if width, height aren't set
117 settings
['pointsize'] = (settings
['page_width'], settings
['page_height'])
118 del settings
['page_width']
119 del settings
['page_height']
121 settings
['engine'] = args
.get('engine', config
.DEFAULT_ENGINE
)
125 def mode_booklist(args
):
126 #XXX need to include booki servers
127 return optionise(twiki_wrapper
.get_book_list(args
.get('server', config
.DEFAULT_SERVER
)),
128 default
=args
.get('book'))
132 #XX sending as text/html, but it doesn't really matter
133 return get_default_css(args
.get('server', config
.DEFAULT_SERVER
), args
.get('pdftype', 'book'))
138 f
= open(config
.FORM_TEMPLATE
)
141 f
= open(config
.FONT_LIST_INCLUDE
)
142 font_list
= [x
.strip() for x
in f
if x
.strip()]
144 server
= args
.get('server', config
.DEFAULT_SERVER
)
145 book
= args
.get('book')
146 size
= args
.get('booksize', config
.DEFAULT_SIZE
)
147 engine
= args
.get('engine', config
.DEFAULT_ENGINE
)
149 'server_options': optionise(get_server_list(), default
=server
),
150 'book_options': optionise(twiki_wrapper
.get_book_list(server
), default
=book
),
151 'size_options': optionise(get_size_list(), default
=size
),
152 'engines': optionise(config
.ENGINES
.keys(), default
=engine
),
153 'pdf_types': optionise(sorted(k
for k
, v
in config
.CGI_MODES
.iteritems() if v
[0])),
154 'css': get_default_css(server
),
155 'font_links': listify(font_links()),
156 'font_list': listify(font_list
),
157 'default_license' : config
.DEFAULT_LICENSE
,
158 'licenses' : optionise(config
.LICENSES
, default
=config
.DEFAULT_LICENSE
),
164 for id, title
, type, source
, classes
, epilogue
in config
.FORM_INPUTS
:
165 val
= d
.get(source
, '')
166 e
= config
.FORM_ELEMENT_TYPES
[type] % locals()
167 form
.append('\n<div id="%(id)s_div" class="form-item %(classes)s">\n'
168 '<div class="input_title">%(title)s</div>\n'
169 '<div class="input_contents"> %(e)s %(epilogue)s\n</div>'
170 '</div>\n' % locals())
173 _valid_inputs
= set(ARG_VALIDATORS
)
174 _form_inputs
= set(x
[0] for x
in config
.FORM_INPUTS
if x
[2] != 'ul')
175 log("valid but not used inputs: %s" % (_valid_inputs
- _form_inputs
))
176 log("invalid form inputs: %s" % (_form_inputs
- _valid_inputs
))
178 return template
% {'form': ''.join(form
)}
182 class Context(object):
183 """Work out what to show the caller. The method/destination matrix:
185 [dest/method] sync async poll
186 archive.org url id id
191 'html 1' is dripfed progress reports; 'html 2' polls via
192 javascript. 'id' is the book filename. 'url' is a full url
193 locating the file on archive.org or the objavi server. '.' means
198 def __init__(self
, args
):
199 self
.bookid
= args
.get('book')
200 self
.server
= args
.get('server', config
.DEFAULT_SERVER
)
201 self
.mode
= args
.get('mode', 'book')
202 extension
= config
.CGI_MODES
.get(self
.mode
)[1]
203 self
.bookname
= make_book_name(self
.bookid
, self
.server
, extension
)
204 self
.destination
= args
.get('destination', config
.DEFAULT_CGI_DESTINATION
)
205 self
.callback
= args
.get('callback', None)
206 self
.method
= args
.get('method', config
.CGI_DESTINATIONS
[self
.destination
]['default'])
207 self
.template
, self
.mimetype
= config
.CGI_DESTINATIONS
[self
.destination
][self
.method
]
209 self
.bookurl
= "http://%s/books/%s" % (HTTP_HOST
, self
.bookname
,)
211 self
.bookurl
= "books/%s" % (self
.bookname
,)
213 self
.details_url
, self
.s3url
= find_archive_urls(self
.bookid
, self
.bookname
)
217 """Begin (and in many cases, finish) http output.
219 In asynchronous modes, fork and close down stdout.
221 log(self
.template
, self
.mimetype
, self
.destination
, self
.method
)
222 if self
.template
is not None:
223 progress_list
= ''.join('<li id="%s">%s</li>\n' % x
[:2] for x
in config
.PROGRESS_POINTS
224 if self
.mode
in x
[2])
227 'bookname': self
.bookname
,
228 'progress_list': progress_list
,
229 'details_url': self
.details_url
,
231 'bookurl': self
.bookurl
,
233 f
= open(self
.template
)
234 content
= f
.read() % d
239 if self
.method
== 'sync':
240 print 'Content-type: %s\n\n%s' %(self
.mimetype
, content
)
242 output_blob_and_shut_up(content
, self
.mimetype
)
243 log(sys
.stdout
, sys
.stderr
, sys
.stdin
)
248 log(sys
.stdout
, sys
.stderr
, sys
.stdin
)
251 def finish(self
, book
):
252 """Print any final http content."""
253 if self
.destination
== 'archive.org':
255 elif self
.destination
== 'download' and self
.method
== 'sync':
256 f
= open(book
.publish_file
)
259 output_blob_and_exit(data
, config
.CGI_MODES
[self
.mode
][2], self
.bookname
)
262 def log_notifier(self
, message
):
263 """Send messages to the log only."""
264 log('******* got message "%s"' %message
)
266 def callback_notifier(self
, message
):
267 """Call the callback url with each message."""
268 log('in callback_notifier')
271 log('child %s is doing callback with message %r' % (pid
, message
, ))
273 from urllib2
import urlopen
, URLError
274 from urllib
import urlencode
275 data
= urlencode({'message': message
})
277 f
= urlopen(self
.callback
, data
)
281 #traceback.print_exc()
282 log("ERROR in callback:\n %r\n %s %s" % (e
.url
, e
.code
, e
.msg
))
285 def javascript_notifier(self
, message
):
286 """Print little bits of javascript which will be appended to
287 an unfinished html page."""
289 if message
.startswith('ERROR:'):
290 log('got an error! %r' % message
)
291 print ('<b class="error-message">'
293 '</b></body></html>' % message
296 print ('<script type="text/javascript">\n'
297 'objavi_show_progress("%s");\n'
298 '</script>' % message
300 if message
== config
.FINISHED_MESSAGE
:
301 print '</body></html>'
303 except ValueError, e
:
304 log("failed to send message %r, got exception %r" % (message
, e
))
306 def pollee_notifier(self
, message
):
307 """Append the message to a file that the remote server can poll"""
308 if self
.pollfile
is None or self
.pollfile
.closed
:
309 self
.pollfile
= open(config
.POLL_NOTIFY_PATH
% self
.bookname
, 'a')
310 self
.pollfile
.write('%s\n' % message
)
311 self
.pollfile
.flush()
312 #self.pollfile.close()
313 #if message == config.FINISHED_MESSAGE:
314 # self.pollfile.close()
316 def get_watchers(self
):
317 """Based on the CGI arguments, return a likely set of notifier
319 log('in get_watchers. method %r, callback %r, destination %r' %
320 (self
.method
, self
.callback
, self
.destination
))
322 if self
.method
== 'poll':
323 watchers
.add(self
.pollee_notifier
)
324 if self
.method
== 'async' and self
.callback
:
325 watchers
.add(self
.callback_notifier
)
326 if self
.method
== 'sync' and self
.destination
== 'html':
327 watchers
.add(self
.javascript_notifier
)
328 watchers
.add(self
.log_notifier
)
329 log('watchers are %s' % watchers
)
336 # so we're making a pdf.
337 context
= Context(args
)
338 page_settings
= get_page_settings(args
)
340 with
Book(context
.bookid
, context
.server
, context
.bookname
,
341 page_settings
=page_settings
,
342 watchers
=context
.get_watchers(), isbn
=args
.get('isbn'),
343 license
=args
.get('license'), title
=args
.get('title'),
344 max_age
=float(args
.get('max-age', -1))) as book
:
348 if 'toc_header' in args
:
349 book
.toc_header
= args
['toc_header'].decode('utf-8')
351 book
.add_css(args
.get('css'), context
.mode
)
352 book
.add_section_titles()
354 if context
.mode
== 'book':
356 elif context
.mode
in ('web', 'newspaper'):
357 book
.make_simple_pdf(context
.mode
)
364 #These ones are similar enough to be handled by the one function
365 mode_newspaper
= mode_book
369 def mode_openoffice(args
):
370 """Make an openoffice document. A whole lot of the inputs have no
372 context
= Context(args
)
373 with
Book(context
.bookid
, context
.server
, context
.bookname
,
374 watchers
=context
.get_watchers(), isbn
=args
.get('isbn'),
375 license
=args
.get('license'), title
=args
.get('title'),
376 max_age
=float(args
.get('max-age', -1))) as book
:
380 book
.add_css(args
.get('css'), 'openoffice')
381 book
.add_section_titles()
386 log('making epub with\n%s' % pformat(args
))
387 #XXX need to catch and process lack of necessary arguments.
388 context
= Context(args
)
390 with
Book(context
.bookid
, context
.server
, context
.bookname
,
391 watchers
=context
.get_watchers(), title
=args
.get('title'),
392 max_age
=float(args
.get('max-age', -1))) as book
:
394 book
.make_epub(use_cache
=config
.USE_CACHED_IMAGES
)
398 def mode_bookizip(args
):
399 log('making bookizip with\n%s' % pformat(args
))
400 context
= Context(args
)
402 with
Book(context
.bookid
, context
.server
, context
.bookname
,
403 watchers
=context
.get_watchers(), title
=args
.get('title'),
404 max_age
=float(args
.get('max-age', -1))) as book
:
405 book
.publish_bookizip()
410 args
= parse_args(ARG_VALIDATORS
)
411 mode
= args
.get('mode')
412 if mode
is None and 'book' in args
:
416 CGI_CONTEXT
= 'SERVER_NAME' in os
.environ
or args
.get('cgi-context', 'no').lower() in '1true'
418 if not args
and not CGI_CONTEXT
:
422 output_function
= globals().get('mode_%s' % mode
, mode_form
)
423 output_function(args
)
425 if __name__
== '__main__':
426 if config
.CGITB_DOMAINS
and os
.environ
.get('REMOTE_ADDR') in config
.CGITB_DOMAINS
: