tools/c10e-html: strip html produced by xslt
[gtk-doc.git] / tools / c10e-html.py
blob81e8a8df6bbdcc3e3b55665a7dccb31686c08c73
1 #!/usr/bin/python3
2 # canonicalize html dirs to ease comaring them
4 # run as:
5 # ./tools/c10e-html html
7 import argparse
8 import glob
9 import re
10 import os
11 import sys
13 from bs4 import BeautifulSoup
16 def prettify(filename):
17 with open(filename, 'r') as doc:
18 soup = BeautifulSoup(doc.read(), 'lxml')
19 with open(filename, 'w') as doc:
20 html = soup.prettify()
21 # strip things that mkhtml2 is not producing to reduce the diff
22 html = html.replace('a class="link" href', 'a href')
23 html = html.replace('summary="Navigation header" ', '')
24 html = html.replace(""" <div class="footer">
25 <hr/>
26 Generated by GTK-Doc V1.28.1
27 </div>
28 """, '')
29 html = re.sub(r'\s*<meta content="DocBook[^>]*>', '', html)
30 html = re.sub(r'\s*<meta content="GTK-Doc[^>]*>', '', html)
31 doc.write(html)
34 def main(htmldir):
35 for filename in glob.glob(os.path.join(htmldir, '*.devhelp2')):
36 prettify(filename)
37 for filename in glob.glob(os.path.join(htmldir, '*.html')):
38 prettify(filename)
41 if __name__ == '__main__':
42 parser = argparse.ArgumentParser(
43 description='c10e-html - canonicalize html files for diffing')
44 parser.add_argument('args', nargs='*', help='HTML_DIR')
46 options = parser.parse_args()
47 if len(options.args) < 1:
48 sys.exit('Too few arguments')
50 main(options.args[0])