Apply minor cleanup to mempool
[charm.git] / doc / markupSanitizer.py
blobc02b0b0c9ddd6f5cc7300deebe464a778c5c4b4c
1 #!/usr/bin/env python
3 from bs4 import BeautifulSoup,NavigableString
4 import sys
5 import os
7 if sys.version < '3':
8 import codecs
9 def u(x):
10 return codecs.unicode_escape_decode(x)[0]
11 else:
12 def u(x):
13 return x
15 # Accept filename as user input
16 argc = len( sys.argv )
17 if (argc < 2): raise Exception
18 fileName = sys.argv[1];
20 # Construct a DOM object
21 soup = BeautifulSoup(open(fileName), "lxml")
23 # Assuming, tt tags are not spewed recklessly by latex2html,
24 # replace them with code tags
25 for t in soup('tt'):
26 t.wrap( soup.new_tag('code') )
28 # Remove first space to fix wrong indentation
29 tmp = t.contents[0]
30 if isinstance(tmp, NavigableString) and tmp.startswith('\n'):
31 t.contents[0].replace_with(soup.new_string(tmp.replace('\n ', '\n', 1)))
33 t.unwrap()
35 # Rewrap all div class=alltt blocks in pre tags
36 for d in soup('div','alltt'):
37 d.wrap( soup.new_tag('pre') )
38 d.unwrap()
40 # Remove br and span tags from within pre sections
41 for p in soup('pre'):
42 for b in p('br'):
43 b.extract()
44 for s in p('span'):
45 s.unwrap()
47 # Remove all useless class 'arabic' spans
48 for s in soup('span','arabic'):
49 s.unwrap()
51 # Extract the navigation bar
52 navmenu = soup.find('div', 'navigation')
53 if navmenu:
54 navmenu.extract()
56 # Wrap the remaining contents within a div
57 if not soup.find('div', id='maincontainer'):
58 soup.body['id'] = 'maincontainer'
59 soup.body.name = 'div'
60 soup.find('div', id='maincontainer').wrap( soup.new_tag('body') )
62 if navmenu:
63 # If this navmenu doesn't already have a TOC, insert one
64 if not navmenu.find('ul','manual-toc'):
65 # Add a toc within the navmenu
66 navmenuTOC = BeautifulSoup(open("tmp-navmenu.html"), "lxml")
67 navmenuTOC = navmenuTOC.find('ul','manual-toc').extract()
68 navmenuTOC.append(BeautifulSoup("".join([
69 '<li><a href="http://charm.cs.illinois.edu">PPL Homepage</a></li>',
70 '<li><a href="http://charm.cs.illinois.edu/help">Other Manuals</a></li>'
71 ]), "lxml"))
72 navmenu.append(navmenuTOC)
74 # Insert navigation symbols to prev and next links
75 prevsymbol = soup.new_tag('span')
76 prevsymbol['class'] = 'navsymbol'
77 prevsymbol.string = u('\xab')
78 prv = navmenu.find('li',id='nav-prev')
79 if prv:
80 prv.find('a').insert(0, prevsymbol)
82 nextsymbol = soup.new_tag('span')
83 nextsymbol['class'] = 'navsymbol'
84 nextsymbol.string = u('\xbb')
85 nxt = navmenu.find('li',id='nav-next')
86 if nxt:
87 nxt.find('a').append(nextsymbol)
89 # Reinsert the navigation bar at the end
90 soup.body.append(navmenu)
92 # Extract the title
93 titl = soup.find('title')
95 # Replace the head section with the user-supplied head markup
96 soup.find('head').extract()
97 newhead = BeautifulSoup(open("../assets/head.html"), "lxml")
98 newhead = newhead.find('head').extract()
99 newhead.append(titl)
100 soup.html.body.insert_before(newhead)
102 # Print cleaned up markup to stdout
103 print( soup.encode("utf-8") )