added test of len() method for SQLTable
[pygr.git] / doc / tools / mkhowto
blob21cd6fb276c89a8b91f6e775f49ad4df5aac8a72
1 #! /usr/bin/env python
2 # -*- Python -*-
3 """usage: %(program)s [options...] file ...
5 Options specifying formats to build:
6 --html HyperText Markup Language (default)
7 --pdf Portable Document Format
8 --ps PostScript
9 --dvi 'DeVice Indepentent' format from TeX
10 --text ASCII text (requires lynx)
12 More than one output format may be specified, or --all.
14 HTML options:
15 --address, -a Specify an address for page footers.
16 --dir Specify the directory for HTML output.
17 --link Specify the number of levels to include on each page.
18 --split, -s Specify a section level for page splitting, default: %(max_split_depth)s.
19 --iconserver, -i Specify location of icons (default: ./).
20 --image-type Specify the image type to use in HTML output;
21 values: gif, png (default).
22 --numeric Don't rename the HTML files; just keep node#.html for
23 the filenames.
24 --style Specify the CSS file to use for the output (filename,
25 not a URL).
26 --up-link URL to a parent document.
27 --up-title Title of a parent document.
28 --favicon Icon to display in the browsers location bar.
30 Other options:
31 --a4 Format for A4 paper.
32 --letter Format for US letter paper (the default).
33 --help, -H Show this text.
34 --logging, -l Log stdout and stderr to a file (*.how).
35 --debugging, -D Echo commands as they are executed.
36 --keep, -k Keep temporary files around.
37 --quiet, -q Do not print command output to stdout.
38 (stderr is also lost, sorry; see *.how for errors)
39 """
41 import getopt
42 import glob
43 import os
44 import re
45 import shutil
46 import sys
49 MYDIR = os.path.abspath(sys.path[0])
50 TOPDIR = os.path.dirname(MYDIR)
52 ISTFILE = os.path.join(TOPDIR, "texinputs", "python.ist")
53 NODE2LABEL_SCRIPT = os.path.join(MYDIR, "node2label.pl")
54 L2H_INIT_FILE = os.path.join(TOPDIR, "perl", "l2hinit.perl")
56 BIBTEX_BINARY = "bibtex"
57 DVIPS_BINARY = "dvips"
58 LATEX_BINARY = "latex"
59 LATEX2HTML_BINARY = "latex2html"
60 LYNX_BINARY = "lynx"
61 MAKEINDEX_BINARY = "makeindex"
62 PDFLATEX_BINARY = "pdflatex"
63 PERL_BINARY = "perl"
64 PYTHON_BINARY = "python"
67 def usage(options, file):
68 print >>file, __doc__ % options
70 def error(options, message, err=2):
71 print >>sys.stderr, message
72 print >>sys.stderr
73 usage(options, sys.stderr)
74 sys.exit(2)
77 class Options:
78 program = os.path.basename(sys.argv[0])
80 address = ''
81 builddir = None
82 debugging = 0
83 discard_temps = 1
84 have_temps = 0
85 icon_server = "."
86 image_type = "png"
87 logging = 0
88 max_link_depth = 3
89 max_split_depth = 6
90 paper = "letter"
91 quiet = 0
92 runs = 0
93 numeric = 0
94 global_module_index = None
95 style_file = os.path.join(TOPDIR, "html", "style.css")
96 about_file = os.path.join(TOPDIR, "html", "about.dat")
97 up_link = None
98 up_title = None
99 favicon = None
101 # 'dvips_safe' is a weird option. It is used mostly to make
102 # LaTeX2HTML not try to be too smart about protecting the user
103 # from a bad version of dvips -- some versions would core dump if
104 # the path to the source DVI contained a dot, and it's appearantly
105 # difficult to determine if the version available has that bug.
106 # This option gets set when PostScript output is requested
107 # (because we're going to run dvips regardless, and we'll either
108 # know it succeeds before LaTeX2HTML is run, or we'll have
109 # detected the failure and bailed), or the user asserts that it's
110 # safe from the command line.
112 # So, why does LaTeX2HTML think it appropriate to protect the user
113 # from a dvips that's only potentially going to core dump? Only
114 # because they want to avoid doing a lot of work just to have to
115 # bail later with no useful intermediates. Unfortunately, they
116 # bail *before* they know whether dvips will be needed at all.
117 # I've gone around the bush a few times with the LaTeX2HTML
118 # developers over whether this is appropriate behavior, and they
119 # don't seem interested in changing their position.
121 dvips_safe = 0
123 DEFAULT_FORMATS = ("html",)
124 ALL_FORMATS = ("dvi", "html", "pdf", "ps", "text")
126 def __init__(self):
127 self.formats = []
128 self.l2h_init_files = []
130 def __getitem__(self, key):
131 # This is used when formatting the usage message.
132 try:
133 return getattr(self, key)
134 except AttributeError:
135 raise KeyError, key
137 def parse(self, args):
138 opts, args = getopt.getopt(args, "Hi:a:s:lDkqr:",
139 ["all", "postscript", "help", "iconserver=",
140 "address=", "a4", "letter", "l2h-init=",
141 "link=", "split=", "logging", "debugging",
142 "keep", "quiet", "runs=", "image-type=",
143 "about=", "numeric", "style=", "paper=",
144 "up-link=", "up-title=", "dir=",
145 "global-module-index=", "dvips-safe",
146 "favicon="]
147 + list(self.ALL_FORMATS))
148 for opt, arg in opts:
149 if opt == "--all":
150 self.formats = list(self.ALL_FORMATS)
151 self.dvips_safe = "ps" in self.formats
152 elif opt in ("-H", "--help"):
153 usage(self, sys.stdout)
154 sys.exit()
155 elif opt == "--iconserver":
156 self.icon_server = arg
157 elif opt in ("-a", "--address"):
158 self.address = arg
159 elif opt == "--a4":
160 self.paper = "a4"
161 elif opt == "--letter":
162 self.paper = "letter"
163 elif opt == "--link":
164 self.max_link_depth = int(arg)
165 elif opt in ("-s", "--split"):
166 self.max_split_depth = int(arg)
167 elif opt in ("-l", "--logging"):
168 self.logging = self.logging + 1
169 elif opt in ("-D", "--debugging"):
170 self.debugging = self.debugging + 1
171 elif opt in ("-k", "--keep"):
172 self.discard_temps = 0
173 elif opt in ("-q", "--quiet"):
174 self.quiet = 1
175 elif opt in ("-r", "--runs"):
176 self.runs = int(arg)
177 elif opt == "--image-type":
178 self.image_type = arg
179 elif opt == "--about":
180 # always make this absolute:
181 self.about_file = os.path.normpath(
182 os.path.abspath(arg))
183 elif opt == "--numeric":
184 self.numeric = 1
185 elif opt == "--style":
186 self.style_file = os.path.abspath(arg)
187 elif opt == "--l2h-init":
188 self.l2h_init_files.append(os.path.abspath(arg))
189 elif opt == "--favicon":
190 self.favicon = arg
191 elif opt == "--up-link":
192 self.up_link = arg
193 elif opt == "--up-title":
194 self.up_title = arg
195 elif opt == "--global-module-index":
196 self.global_module_index = arg
197 elif opt == "--dir":
198 if os.sep == "\\":
199 arg = re.sub("/", "\\\\", arg)
200 self.builddir = os.path.expanduser(arg)
201 elif opt == "--paper":
202 self.paper = arg
203 elif opt == "--dvips-safe":
204 self.dvips_safe = 1
206 # Format specifiers:
208 elif opt[2:] in self.ALL_FORMATS:
209 self.add_format(opt[2:])
210 elif opt == "--postscript":
211 # synonym for --ps
212 self.add_format("ps")
213 self.initialize()
215 # return the args to allow the caller access:
217 return args
219 def add_format(self, format):
220 """Add a format to the formats list if not present."""
221 if not format in self.formats:
222 if format == "ps":
223 # assume this is safe since we're going to run it anyway
224 self.dvips_safe = 1
225 self.formats.append(format)
227 def initialize(self):
228 """Complete initialization. This is needed if parse() isn't used."""
229 # add the default format if no formats were specified:
230 if not self.formats:
231 self.formats = self.DEFAULT_FORMATS
232 # determine the base set of texinputs directories:
233 texinputs = os.environ.get("TEXINPUTS", "").split(os.pathsep)
234 if not texinputs:
235 texinputs = ['']
236 mydirs = [os.path.join(TOPDIR, "paper-" + self.paper),
237 os.path.join(TOPDIR, "texinputs"),
239 if '' in texinputs:
240 i = texinputs.index('')
241 texinputs[i:i] = mydirs
242 else:
243 texinputs += mydirs
244 self.base_texinputs = texinputs
245 if self.builddir:
246 self.builddir = os.path.abspath(self.builddir)
249 class Job:
250 latex_runs = 0
252 def __init__(self, options, path):
253 self.options = options
254 self.doctype = get_doctype(path)
255 self.filedir, self.doc = split_pathname(path)
256 self.builddir = os.path.abspath(options.builddir or self.doc)
257 if ("html" in options.formats or "text" in options.formats):
258 if not os.path.exists(self.builddir):
259 os.mkdir(self.builddir)
260 self.log_filename = os.path.join(self.builddir, self.doc + ".how")
261 else:
262 self.log_filename = os.path.abspath(self.doc + ".how")
263 if os.path.exists(self.log_filename):
264 os.unlink(self.log_filename)
265 l2hconf = self.doc + ".l2h"
266 if os.path.exists(l2hconf):
267 if os.path.exists(l2hconf + "~"):
268 os.unlink(l2hconf + "~")
269 os.rename(l2hconf, l2hconf + "~")
270 self.l2h_aux_init_file = self.doc + ".l2h"
271 self.write_l2h_aux_init_file()
273 def build(self):
274 self.setup_texinputs()
275 formats = self.options.formats
276 if "dvi" in formats or "ps" in formats:
277 self.build_dvi()
278 if "pdf" in formats:
279 self.build_pdf()
280 if "ps" in formats:
281 self.build_ps()
282 if "html" in formats:
283 self.require_temps()
284 self.build_html(self.builddir)
285 if self.options.icon_server == ".":
286 pattern = os.path.join(TOPDIR, "html", "icons",
287 "*." + self.options.image_type)
288 imgs = glob.glob(pattern)
289 if not imgs:
290 self.warning(
291 "Could not locate support images of type %s."
292 % `self.options.image_type`)
293 for fn in imgs:
294 new_fn = os.path.join(self.builddir, os.path.basename(fn))
295 shutil.copyfile(fn, new_fn)
296 if "text" in formats:
297 self.require_temps()
298 tempdir = self.doc
299 need_html = "html" not in formats
300 if self.options.max_split_depth != 1:
301 fp = open(self.l2h_aux_init_file, "a")
302 fp.write("# re-hack this file for --text:\n")
303 l2hoption(fp, "MAX_SPLIT_DEPTH", "1")
304 fp.write("1;\n")
305 fp.close()
306 tempdir = self.doc + "-temp-html"
307 need_html = 1
308 if need_html:
309 self.build_html(tempdir, max_split_depth=1)
310 self.build_text(tempdir)
311 if self.options.discard_temps:
312 self.cleanup()
314 def setup_texinputs(self):
315 texinputs = [self.filedir] + self.options.base_texinputs
316 os.environ["TEXINPUTS"] = os.pathsep.join(texinputs)
317 self.message("TEXINPUTS=" + os.environ["TEXINPUTS"])
319 def build_aux(self, binary=None):
320 if binary is None:
321 binary = LATEX_BINARY
322 new_index( "%s.ind" % self.doc, "genindex")
323 new_index("mod%s.ind" % self.doc, "modindex")
324 self.run("%s %s" % (binary, self.doc))
325 self.use_bibtex = check_for_bibtex(self.doc + ".aux")
326 self.latex_runs = 1
328 def build_dvi(self):
329 self.use_latex(LATEX_BINARY)
331 def build_pdf(self):
332 self.use_latex(PDFLATEX_BINARY)
334 def use_latex(self, binary):
335 self.require_temps(binary=binary)
336 if self.latex_runs < 2:
337 if os.path.isfile("mod%s.idx" % self.doc):
338 self.run("%s mod%s.idx" % (MAKEINDEX_BINARY, self.doc))
339 use_indfix = 0
340 if os.path.isfile(self.doc + ".idx"):
341 use_indfix = 1
342 # call to Doc/tools/fix_hack omitted; doesn't appear necessary
343 self.run("%s %s.idx" % (MAKEINDEX_BINARY, self.doc))
344 import indfix
345 indfix.process(self.doc + ".ind")
346 if self.use_bibtex:
347 self.run("%s %s" % (BIBTEX_BINARY, self.doc))
348 self.process_synopsis_files()
349 self.run("%s %s" % (binary, self.doc))
350 self.latex_runs = self.latex_runs + 1
351 if os.path.isfile("mod%s.idx" % self.doc):
352 self.run("%s -s %s mod%s.idx"
353 % (MAKEINDEX_BINARY, ISTFILE, self.doc))
354 if use_indfix:
355 self.run("%s -s %s %s.idx"
356 % (MAKEINDEX_BINARY, ISTFILE, self.doc))
357 indfix.process(self.doc + ".ind")
358 self.process_synopsis_files()
360 # and now finish it off:
362 if os.path.isfile(self.doc + ".toc") and binary == PDFLATEX_BINARY:
363 import toc2bkm
364 if self.doctype == "manual":
365 bigpart = "chapter"
366 else:
367 bigpart = "section"
368 toc2bkm.process(self.doc + ".toc", self.doc + ".bkm", bigpart)
369 if self.use_bibtex:
370 self.run("%s %s" % (BIBTEX_BINARY, self.doc))
371 self.run("%s %s" % (binary, self.doc))
372 self.latex_runs = self.latex_runs + 1
374 def process_synopsis_files(self):
375 synopsis_files = glob.glob(self.doc + "*.syn")
376 for path in synopsis_files:
377 uniqify_module_table(path)
379 def build_ps(self):
380 self.run("%s -N0 -o %s.ps %s" % (DVIPS_BINARY, self.doc, self.doc))
382 def build_html(self, builddir, max_split_depth=None):
383 if max_split_depth is None:
384 max_split_depth = self.options.max_split_depth
385 texfile = None
386 for p in os.environ["TEXINPUTS"].split(os.pathsep):
387 fn = os.path.join(p, self.doc + ".tex")
388 if os.path.isfile(fn):
389 texfile = fn
390 break
391 if not texfile:
392 self.warning("Could not locate %s.tex; aborting." % self.doc)
393 sys.exit(1)
394 # remove leading ./ (or equiv.); might avoid problems w/ dvips
395 if texfile[:2] == os.curdir + os.sep:
396 texfile = texfile[2:]
397 # build the command line and run LaTeX2HTML:
398 if not os.path.isdir(builddir):
399 os.mkdir(builddir)
400 else:
401 for fname in glob.glob(os.path.join(builddir, "*.html")):
402 os.unlink(fname)
403 args = [LATEX2HTML_BINARY,
404 "-init_file", self.l2h_aux_init_file,
405 "-dir", builddir,
406 texfile
408 self.run(" ".join(args)) # XXX need quoting!
409 # ... postprocess
410 shutil.copyfile(self.options.style_file,
411 os.path.join(builddir, self.doc + ".css"))
412 shutil.copyfile(os.path.join(builddir, self.doc + ".html"),
413 os.path.join(builddir, "index.html"))
414 if max_split_depth != 1:
415 label_file = os.path.join(builddir, "labels.pl")
416 fp = open(label_file)
417 about_node = None
418 target = " = q/about/;\n"
419 x = len(target)
420 while 1:
421 line = fp.readline()
422 if not line:
423 break
424 if line[-x:] == target:
425 line = fp.readline()
426 m = re.search(r"\|(node\d+\.[a-z]+)\|", line)
427 about_node = m.group(1)
428 shutil.copyfile(os.path.join(builddir, about_node),
429 os.path.join(builddir, "about.html"))
430 break
431 if not self.options.numeric:
432 pwd = os.getcwd()
433 try:
434 os.chdir(builddir)
435 self.run("%s %s *.html" % (PERL_BINARY, NODE2LABEL_SCRIPT))
436 finally:
437 os.chdir(pwd)
438 # These files need to be cleaned up here since builddir there
439 # can be more than one, so we clean each of them.
440 if self.options.discard_temps:
441 for fn in ("images.tex", "images.log", "images.aux"):
442 safe_unlink(os.path.join(builddir, fn))
444 def build_text(self, tempdir=None):
445 if tempdir is None:
446 tempdir = self.doc
447 indexfile = os.path.join(tempdir, "index.html")
448 self.run("%s -nolist -dump %s >%s.txt"
449 % (LYNX_BINARY, indexfile, self.doc))
451 def require_temps(self, binary=None):
452 if not self.latex_runs:
453 self.build_aux(binary=binary)
455 def write_l2h_aux_init_file(self):
456 options = self.options
457 fp = open(self.l2h_aux_init_file, "w")
458 d = string_to_perl(os.path.dirname(L2H_INIT_FILE))
459 fp.write("package main;\n"
460 "push (@INC, '%s');\n"
461 "$mydir = '%s';\n"
462 % (d, d))
463 fp.write(open(L2H_INIT_FILE).read())
464 for filename in options.l2h_init_files:
465 fp.write("\n# initialization code incorporated from:\n# ")
466 fp.write(filename)
467 fp.write("\n")
468 fp.write(open(filename).read())
469 fp.write("\n"
470 "# auxillary init file for latex2html\n"
471 "# generated by mkhowto\n"
472 "$NO_AUTO_LINK = 1;\n"
474 l2hoption(fp, "ABOUT_FILE", options.about_file)
475 l2hoption(fp, "ICONSERVER", options.icon_server)
476 l2hoption(fp, "IMAGE_TYPE", options.image_type)
477 l2hoption(fp, "ADDRESS", options.address)
478 l2hoption(fp, "MAX_LINK_DEPTH", options.max_link_depth)
479 l2hoption(fp, "MAX_SPLIT_DEPTH", options.max_split_depth)
480 l2hoption(fp, "EXTERNAL_UP_LINK", options.up_link)
481 l2hoption(fp, "EXTERNAL_UP_TITLE", options.up_title)
482 l2hoption(fp, "FAVORITES_ICON", options.favicon)
483 l2hoption(fp, "GLOBAL_MODULE_INDEX", options.global_module_index)
484 l2hoption(fp, "DVIPS_SAFE", options.dvips_safe)
485 fp.write("1;\n")
486 fp.close()
488 def cleanup(self):
489 self.__have_temps = 0
490 for pattern in ("%s.aux", "%s.log", "%s.out", "%s.toc", "%s.bkm",
491 "%s.idx", "%s.ilg", "%s.ind", "%s.pla",
492 "%s.bbl", "%s.blg",
493 "mod%s.idx", "mod%s.ind", "mod%s.ilg",
495 safe_unlink(pattern % self.doc)
496 map(safe_unlink, glob.glob(self.doc + "*.syn"))
497 for spec in ("IMG*", "*.pl", "WARNINGS", "index.dat", "modindex.dat"):
498 pattern = os.path.join(self.doc, spec)
499 map(safe_unlink, glob.glob(pattern))
500 if "dvi" not in self.options.formats:
501 safe_unlink(self.doc + ".dvi")
502 if os.path.isdir(self.doc + "-temp-html"):
503 shutil.rmtree(self.doc + "-temp-html", ignore_errors=1)
504 if not self.options.logging:
505 os.unlink(self.log_filename)
506 if not self.options.debugging:
507 os.unlink(self.l2h_aux_init_file)
509 def run(self, command):
510 self.message(command)
511 if sys.platform.startswith("win"):
512 rc = os.system(command)
513 else:
514 rc = os.system("(%s) </dev/null >>%s 2>&1"
515 % (command, self.log_filename))
516 if rc:
517 self.warning(
518 "Session transcript and error messages are in %s."
519 % self.log_filename)
520 result = 1
521 if hasattr(os, "WIFEXITED"):
522 if os.WIFEXITED(rc):
523 result = os.WEXITSTATUS(rc)
524 self.warning("Exited with status %s." % result)
525 else:
526 self.warning("Killed by signal %s." % os.WSTOPSIG(rc))
527 else:
528 self.warning("Return code: %s" % rc)
529 sys.stderr.write("The relevant lines from the transcript are:\n")
530 sys.stderr.write("-" * 72 + "\n")
531 sys.stderr.writelines(get_run_transcript(self.log_filename))
532 sys.exit(result)
534 def message(self, msg):
535 msg = "+++ " + msg
536 if not self.options.quiet:
537 print msg
538 self.log(msg + "\n")
540 def warning(self, msg):
541 msg = "*** %s\n" % msg
542 sys.stderr.write(msg)
543 self.log(msg)
545 def log(self, msg):
546 fp = open(self.log_filename, "a")
547 fp.write(msg)
548 fp.close()
551 def get_run_transcript(filename):
552 """Return lines from the transcript file for the most recent run() call."""
553 fp = open(filename)
554 lines = fp.readlines()
555 fp.close()
556 lines.reverse()
557 L = []
558 for line in lines:
559 L.append(line)
560 if line[:4] == "+++ ":
561 break
562 L.reverse()
563 return L
566 def safe_unlink(path):
567 """Unlink a file without raising an error if it doesn't exist."""
568 try:
569 os.unlink(path)
570 except os.error:
571 pass
574 def split_pathname(path):
575 path = os.path.abspath(path)
576 dirname, basename = os.path.split(path)
577 if basename[-4:] == ".tex":
578 basename = basename[:-4]
579 return dirname, basename
582 _doctype_rx = re.compile(r"\\documentclass(?:\[[^]]*\])?{([a-zA-Z]*)}")
583 def get_doctype(path):
584 fp = open(path)
585 doctype = None
586 while 1:
587 line = fp.readline()
588 if not line:
589 break
590 m = _doctype_rx.match(line)
591 if m:
592 doctype = m.group(1)
593 break
594 fp.close()
595 return doctype
598 def main():
599 options = Options()
600 try:
601 args = options.parse(sys.argv[1:])
602 except getopt.error, msg:
603 error(options, msg)
604 if not args:
605 # attempt to locate single .tex file in current directory:
606 args = glob.glob("*.tex")
607 if not args:
608 error(options, "No file to process.")
609 if len(args) > 1:
610 error(options, "Could not deduce which files should be processed.")
612 # parameters are processed, let's go!
614 for path in args:
615 Job(options, path).build()
618 def l2hoption(fp, option, value):
619 if value:
620 fp.write('$%s = "%s";\n' % (option, string_to_perl(str(value))))
623 _to_perl = {}
624 for c in map(chr, range(1, 256)):
625 _to_perl[c] = c
626 _to_perl["@"] = "\\@"
627 _to_perl["$"] = "\\$"
628 _to_perl['"'] = '\\"'
630 def string_to_perl(s):
631 return ''.join(map(_to_perl.get, s))
634 def check_for_bibtex(filename):
635 fp = open(filename)
636 pos = fp.read().find(r"\bibdata{")
637 fp.close()
638 return pos >= 0
640 def uniqify_module_table(filename):
641 lines = open(filename).readlines()
642 if len(lines) > 1:
643 if lines[-1] == lines[-2]:
644 del lines[-1]
645 open(filename, "w").writelines(lines)
648 def new_index(filename, label="genindex"):
649 fp = open(filename, "w")
650 fp.write(r"""\
651 \begin{theindex}
652 \label{%s}
653 \end{theindex}
654 """ % label)
655 fp.close()
658 if __name__ == "__main__":
659 main()