2 # canonicalize html dirs to ease comaring them
5 # ./tools/c10e-html html
13 from bs4
import BeautifulSoup
16 def prettify(filename
):
17 with
open(filename
, 'r') as doc
:
18 soup
= BeautifulSoup(doc
.read(), 'lxml')
19 with
open(filename
, 'w') as doc
:
20 html
= soup
.prettify()
21 # strip things that mkhtml2 is not producing to reduce the diff
22 html
= html
.replace('a class="link" href', 'a href')
23 html
= html
.replace('summary="Navigation header" ', '')
24 html
= html
.replace(""" <div class="footer">
26 Generated by GTK-Doc V1.28.1
29 html
= html
.replace(""" <a name="idx">
32 html
= re
.sub(r
'\s*<meta content="DocBook[^>]*>', '', html
)
33 html
= re
.sub(r
'\s*<meta content="GTK-Doc[^>]*>', '', html
)
38 for filename
in glob
.glob(os
.path
.join(htmldir
, '*.devhelp2')):
40 for filename
in glob
.glob(os
.path
.join(htmldir
, '*.html')):
44 if __name__
== '__main__':
45 parser
= argparse
.ArgumentParser(
46 description
='c10e-html - canonicalize html files for diffing')
47 parser
.add_argument('args', nargs
='*', help='HTML_DIR')
49 options
= parser
.parse_args()
50 if len(options
.args
) < 1:
51 sys
.exit('Too few arguments')