mkhtml2: add handling for testobject on *mediaobjects
[gtk-doc.git] / tools / c10e-html.py
blobf020f679352eda344a936cf483a45a42ee5d519b
1 #!/usr/bin/python3
2 # canonicalize html dirs to ease comaring them
4 # run as:
5 # ./tools/c10e-html html
7 import argparse
8 import glob
9 import re
10 import os
11 import sys
13 from bs4 import BeautifulSoup
16 def prettify(filename, parser='lxml', fixup=False):
17 with open(filename, 'r') as doc:
18 soup = BeautifulSoup(doc.read(), parser)
19 with open(filename, 'w') as doc:
20 html = soup.prettify()
21 if fixup:
22 # strip things that mkhtml2 is not producing to reduce the diff
23 html = html.replace('a class="link" href', 'a href')
24 html = html.replace(' target="_top"', '')
25 html = html.replace('summary="Navigation header" ', '')
26 html = html.replace(""" <a name="idx">
27 </a>
28 """, '')
29 html = re.sub(""" <div class="footer">
30 <hr/>
31 Generated by GTK-Doc V[.0-9]*
32 </div>
33 """, '', html)
34 html = re.sub(r'\s*<p>\s*</p>', '', html)
35 html = re.sub(r'\s*<a name="id-[.0-9]+">\s*</a>', '', html)
36 html = re.sub(r'\s*<div class="titlepage">\s*</div>', '', html)
37 html = re.sub(r'\s*<meta content="DocBook[^>]*>', '', html)
38 html = re.sub(r'\s*<meta content="GTK-Doc[^>]*>', '', html)
39 doc.write(html)
42 def main(htmldir):
43 for filename in glob.glob(os.path.join(htmldir, '*.devhelp2')):
44 prettify(filename, parser='lxml-xml')
45 for filename in glob.glob(os.path.join(htmldir, '*.html')):
46 prettify(filename, fixup=True)
49 if __name__ == '__main__':
50 parser = argparse.ArgumentParser(
51 description='c10e-html - canonicalize html files for diffing')
52 parser.add_argument('args', nargs='*', help='HTML_DIR')
54 options = parser.parse_args()
55 if len(options.args) < 1:
56 sys.exit('Too few arguments')
58 main(options.args[0])