fixxref: only add glib types to nolinks if we don't have them
[gtk-doc.git] / tools / c10e-html.py
blobd4a853591525e1c604fd8e07cfb4b25c6cea3786
1 #!/usr/bin/python3
2 # canonicalize html dirs to ease comaring them
4 # run as:
5 # ./tools/c10e-html html
7 import argparse
8 import glob
9 import re
10 import os
11 import sys
13 from bs4 import BeautifulSoup
16 def prettify(filename):
17 with open(filename, 'r') as doc:
18 soup = BeautifulSoup(doc.read(), 'lxml')
19 with open(filename, 'w') as doc:
20 html = soup.prettify()
21 # strip things that mkhtml2 is not producing to reduce the diff
22 html = html.replace('a class="link" href', 'a href')
23 html = html.replace(' target="_top"', '')
24 html = html.replace('summary="Navigation header" ', '')
25 html = html.replace(""" <a name="idx">
26 </a>
27 """, '')
28 html = re.sub(""" <div class="footer">
29 <hr/>
30 Generated by GTK-Doc V[.0-9]*
31 </div>
32 """, '', html)
33 html = re.sub(r'\s*<p>\s*</p>', '', html)
34 html = re.sub(r'\s*<div class="titlepage">\s*</div>', '', html)
35 html = re.sub(r'\s*<meta content="DocBook[^>]*>', '', html)
36 html = re.sub(r'\s*<meta content="GTK-Doc[^>]*>', '', html)
37 doc.write(html)
40 def main(htmldir):
41 for filename in glob.glob(os.path.join(htmldir, '*.devhelp2')):
42 prettify(filename)
43 for filename in glob.glob(os.path.join(htmldir, '*.html')):
44 prettify(filename)
47 if __name__ == '__main__':
48 parser = argparse.ArgumentParser(
49 description='c10e-html - canonicalize html files for diffing')
50 parser.add_argument('args', nargs='*', help='HTML_DIR')
52 options = parser.parse_args()
53 if len(options.args) < 1:
54 sys.exit('Too few arguments')
56 main(options.args[0])