3 __version__
= '$Revision: 1.1 $'
10 from xml
.sax
.saxutils
import quoteattr
17 # Hackish way to deal with macros replaced with simple text
18 (re
.compile(r
"\\ABC\b"), "ABC"),
19 (re
.compile(r
"\\ASCII\b"), "ASCII"),
20 (re
.compile(r
"\\Cpp\b"), "C++"),
21 (re
.compile(r
"\\EOF\b"), "EOF"),
22 (re
.compile(r
"\\NULL\b"), "NULL"),
23 (re
.compile(r
"\\POSIX\b"), "POSIX"),
24 (re
.compile(r
"\\UNIX\b"), "Unix"),
25 # deal with turds left over from LaTeX2HTML
26 (re
.compile(r
"<#\d+#>"), ""),
32 def __init__(self
, link
, str, seqno
):
35 for pattern
, replacement
in REPLACEMENTS
:
36 str = pattern
.sub(replacement
, str)
38 self
.text
= split_entry_text(str)
39 self
.key
= split_entry_key(str)
41 def __cmp__(self
, other
):
42 """Comparison operator includes sequence number, for use with
44 return self
.cmp_entry(other
) or cmp(self
.seqno
, other
.seqno
)
46 def cmp_entry(self
, other
):
47 """Comparison 'operator' that ignores sequence number."""
49 for i
in range(min(len(self
.key
), len(other
.key
))):
50 c
= (cmp_part(self
.key
[i
], other
.key
[i
])
51 or cmp_part(self
.text
[i
], other
.text
[i
]))
54 return c
or cmp(self
.key
, other
.key
) or cmp(self
.text
, other
.text
)
57 return "<Node for %s (%s)>" % (bang_join(self
.text
), self
.seqno
)
60 return bang_join(self
.key
)
63 return "%s\1%s###%s\n" \
64 % ("\1".join(self
.links
),
75 minlen
= min(len(s1
), len(s2
))
76 if len(s1
) < len(s2
) and l1
== l2
[:len(s1
)]:
78 elif len(s2
) < len(s1
) and l2
== l1
[:len(s2
)]:
81 result
= cmp(l1
, l2
) or cmp(s1
, s2
)
85 def split_entry(str, which
):
87 parts
= str.split('!')
88 parts
= [part
.split('@') for part
in parts
]
98 _rmtt
= re
.compile(r
"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""",
100 _rmparens
= re
.compile(r
"\(\)")
102 def split_entry_key(str):
103 parts
= split_entry(str, 1)
104 for i
in range(len(parts
)):
105 m
= _rmtt
.match(parts
[i
])
107 parts
[i
] = null_join(m
.group(1, 2, 3))
109 parts
[i
] = parts
[i
].lower()
110 # remove '()' from the key:
111 parts
[i
] = _rmparens
.sub('', parts
[i
])
112 return map(trim_ignored_letters
, parts
)
115 def split_entry_text(str):
119 str = null_join(m
.group(1, 2, 3))
120 return split_entry(str, 1)
125 rx
= re
.compile("(.*)\1(.*)###(.*)$")
132 link
, str, seqno
= m
.group(1, 2, 3)
133 nodes
.append(Node(link
, str, seqno
))
137 def trim_ignored_letters(s
):
138 # ignore $ to keep environment variables with the
139 # leading letter from the name
140 if s
.startswith("$"):
145 def get_first_letter(s
):
146 if s
.startswith("<tex2html_percent_mark>"):
149 return trim_ignored_letters(s
)[0]
152 def split_letters(nodes
):
156 append
= group
.append
157 letter
= get_first_letter(nodes
[0].text
[0])
158 letter_groups
.append((letter
, group
))
160 nletter
= get_first_letter(node
.text
[0])
161 if letter
!= nletter
:
164 letter_groups
.append((letter
, group
))
165 append
= group
.append
170 def group_symbols(groups
):
172 ident_letters
= string
.ascii_letters
+ "_"
173 while groups
[0][0] not in ident_letters
:
174 entries
+= groups
[0][1]
177 groups
.insert(0, ("Symbols", entries
))
180 # need a function to separate the nodes into columns...
181 def split_columns(nodes
, columns
=1):
184 # This is a rough height; we may have to increase to avoid breaks before
186 colheight
= int(len(nodes
) / columns
)
187 numlong
= int(len(nodes
) % columns
)
189 colheight
= colheight
+ 1
193 for i
in range(numlong
):
194 start
= i
* colheight
195 end
= start
+ colheight
196 cols
.append(nodes
[start
:end
])
198 colheight
= colheight
- 1
200 numshort
= int(len(nodes
) / colheight
)
201 except ZeroDivisionError:
202 cols
= cols
+ (columns
- len(cols
)) * [[]]
204 for i
in range(numshort
):
205 start
= i
* colheight
206 end
= start
+ colheight
207 cols
.append(nodes
[start
:end
])
209 # If items continue across columns, make sure they are marked
210 # as continuations so the user knows to look at the previous column.
212 for i
in range(len(cols
) - 1):
215 next
= cols
[i
+ 1][0]
219 n
= min(len(prev
.key
), len(next
.key
))
221 if prev
.key
[j
] != next
.key
[j
]:
223 next
.continuation
= j
+ 1
227 DL_LEVEL_INDENT
= " "
229 def format_column(nodes
):
230 strings
= ["<dl compact='compact'>"]
231 append
= strings
.append
237 for i
in range(min(len(current
), len(previous
))):
238 if previous
[i
] != current
[i
]:
242 append("<dl compact='compact'>" * (count
- level
) + "\n")
246 append(level
* DL_LEVEL_INDENT
)
247 append("</dl>" * (level
- count
))
249 # else: level == count
250 for i
in range(count
, len(current
) - 1):
253 if node
.continuation
> i
:
254 extra
= " (continued)"
257 append("\n<dt>%s%s\n<dd>\n%s<dl compact='compact'>"
258 % (term
, extra
, level
* DL_LEVEL_INDENT
))
259 append("\n%s<dt>%s%s</a>"
260 % (level
* DL_LEVEL_INDENT
, node
.links
[0], node
.text
[-1]))
261 for link
in node
.links
[1:]:
262 append(",\n%s %s[Link]</a>" % (level
* DL_LEVEL_INDENT
, link
))
265 append("</dl>" * (level
+ 1))
266 return null_join(strings
)
269 def format_nodes(nodes
, columns
=1):
271 append
= strings
.append
273 colnos
= range(columns
)
274 colheight
= int(len(nodes
) / columns
)
275 if len(nodes
) % columns
:
276 colheight
= colheight
+ 1
277 colwidth
= int(100 / columns
)
278 append('<table width="100%"><tr valign="top">')
279 for col
in split_columns(nodes
, columns
):
280 append('<td width="%d%%">\n' % colwidth
)
281 append(format_column(col
))
283 append("\n</tr></table>")
285 append(format_column(nodes
))
286 return null_join(strings
)
289 def format_letter(letter
):
291 lettername
= ". (dot)"
293 lettername
= "_ (underscore)"
295 lettername
= letter
.capitalize()
296 return "\n<hr />\n<h2 id=%s>%s</h2>\n\n" \
297 % (quoteattr("letter-" + letter
), lettername
)
300 def format_html_letters(nodes
, columns
, group_symbol_nodes
):
301 letter_groups
= split_letters(nodes
)
302 if group_symbol_nodes
:
303 group_symbols(letter_groups
)
305 for letter
, nodes
in letter_groups
:
306 s
= "<b><a href=\"#letter-%s\">%s</a></b>" % (letter
, letter
)
308 s
= ["<hr /><center>\n%s</center>\n" % " |\n".join(items
)]
309 for letter
, nodes
in letter_groups
:
310 s
.append(format_letter(letter
))
311 s
.append(format_nodes(nodes
, columns
))
314 def format_html(nodes
, columns
):
315 return format_nodes(nodes
, columns
)
319 """Collapse sequences of nodes with matching keys into a single node.
325 while i
< len(nodes
):
327 if not node
.cmp_entry(prev
):
328 prev
.links
.append(node
.links
[0])
337 fp
.write(node
.dump())
340 def process_nodes(nodes
, columns
, letters
=0, group_symbol_nodes
=0):
344 return format_html_letters(nodes
, columns
, group_symbol_nodes
)
346 return format_html(nodes
, columns
)
355 group_symbol_nodes
= 1
356 opts
, args
= getopt
.getopt(sys
.argv
[1:], "c:lo:",
357 ["columns=", "dont-group-symbols",
358 "group-symbols", "letters", "output="])
359 for opt
, val
in opts
:
360 if opt
in ("-o", "--output"):
362 elif opt
in ("-c", "--columns"):
363 columns
= int(val
, 10)
364 elif opt
in ("-l", "--letters"):
366 elif opt
== "--group-symbols":
367 group_symbol_nodes
= 1
368 elif opt
== "--dont-group-symbols":
369 group_symbol_nodes
= 0
374 nodes
= nodes
+ load(open(fn
))
375 num_nodes
= len(nodes
)
376 html
= process_nodes(nodes
, columns
, letters
, group_symbol_nodes
)
377 program
= os
.path
.basename(sys
.argv
[0])
379 sys
.stdout
.write(html
)
380 sys
.stderr
.write("\n%s: %d index nodes" % (program
, num_nodes
))
382 open(ofn
, "w").write(html
)
384 print "%s: %d index nodes" % (program
, num_nodes
)
387 if __name__
== "__main__":