Documentation #1541: fix explanations of Converse Ctv, Cpv, Csv variables
[charm.git] / doc / markupSanitizer.py
blob244638bcc7dc48a614fc55f2ddc11962c87b8dd4
1 #!/usr/bin/env python
3 from bs4 import BeautifulSoup
4 import sys
5 import os
7 if sys.version < '3':
8 import codecs
9 def u(x):
10 return codecs.unicode_escape_decode(x)[0]
11 else:
12 def u(x):
13 return x
15 # Accept filename as user input
16 argc = len( sys.argv )
17 if (argc < 2): raise Exception
18 fileName = sys.argv[1];
20 # Construct a DOM object
21 soup = BeautifulSoup(open(fileName), "lxml")
23 # Assuming, tt tags are not spewed recklessly by latex2html,
24 # replace them with code tags
25 for t in soup('tt'):
26 t.wrap( soup.new_tag('code') )
27 t.unwrap()
29 # Rewrap all div class=alltt blocks in pre tags
30 for d in soup('div','alltt'):
31 d.wrap( soup.new_tag('pre') )
32 d.unwrap()
34 # Remove br and span tags from within pre sections
35 for p in soup('pre'):
36 for b in p('br'):
37 b.extract()
38 for s in p('span'):
39 s.unwrap()
41 # Remove all useless class 'arabic' spans
42 for s in soup('span','arabic'):
43 s.unwrap()
45 # Extract the navigation bar
46 navmenu = soup.find('div', 'navigation')
47 if navmenu:
48 navmenu.extract()
50 # Wrap the remaining contents within a div
51 if not soup.find('div', id='maincontainer'):
52 soup.body['id'] = 'maincontainer'
53 soup.body.name = 'div'
54 soup.find('div', id='maincontainer').wrap( soup.new_tag('body') )
56 if navmenu:
57 # If this navmenu doesn't already have a TOC, insert one
58 if not navmenu.find('ul','manual-toc'):
59 # Add a toc within the navmenu
60 navmenuTOC = BeautifulSoup(open("tmp-navmenu.html"), "lxml")
61 navmenuTOC = navmenuTOC.find('ul','manual-toc').extract()
62 navmenuTOC.append( BeautifulSoup("".join([
63 '<li><a href="http://charm.cs.illinois.edu">PPL Homepage</a></li>',
64 '<li><a href="http://charm.cs.illinois.edu/help">Other Manuals</a></li>'])
65 ) )
66 navmenu.append(navmenuTOC)
68 # Insert navigation symbols to prev and next links
69 prevsymbol = soup.new_tag('span')
70 prevsymbol['class'] = 'navsymbol'
71 prevsymbol.string = u('\xab')
72 prv = navmenu.find('li',id='nav-prev')
73 if prv:
74 prv.find('a').insert(0, prevsymbol)
76 nextsymbol = soup.new_tag('span')
77 nextsymbol['class'] = 'navsymbol'
78 nextsymbol.string = u('\xbb')
79 nxt = navmenu.find('li',id='nav-next')
80 if nxt:
81 nxt.find('a').append(nextsymbol)
83 # Reinsert the navigation bar at the end
84 soup.body.append(navmenu)
86 # Extract the title
87 titl = soup.find('title')
89 # Replace the head section with the user-supplied head markup
90 soup.find('head').extract()
91 newhead = BeautifulSoup(open("../assets/head.html"), "lxml")
92 newhead = newhead.find('head').extract()
93 newhead.append(titl)
94 soup.html.body.insert_before(newhead)
96 # Print cleaned up markup to stdout
97 print( soup.prettify(formatter="html") )