2 # canonicalize html dirs to ease comaring them
5 # ./tools/c10e-html html
13 from bs4
import BeautifulSoup
16 def prettify(filename
, parser
='lxml', fixup
=False):
17 with
open(filename
, 'r') as doc
:
18 soup
= BeautifulSoup(doc
.read(), parser
)
19 with
open(filename
, 'w') as doc
:
20 html
= soup
.prettify()
22 # strip things that mkhtml2 is not producing to reduce the diff
23 html
= html
.replace('a class="link" href', 'a href')
24 html
= html
.replace(' target="_top"', '')
25 html
= html
.replace('summary="Navigation header" ', '')
26 html
= html
.replace(""" <a name="idx">
29 html
= re
.sub(""" <div class="footer">
31 Generated by GTK-Doc V[.0-9]*
34 html
= re
.sub(r
'\s*<p>\s*</p>', '', html
)
35 html
= re
.sub(r
'\s*<a name="id-[.0-9]+">\s*</a>', '', html
)
36 html
= re
.sub(r
'\s*<div class="titlepage">\s*</div>', '', html
)
37 html
= re
.sub(r
'\s*<meta content="DocBook[^>]*>', '', html
)
38 html
= re
.sub(r
'\s*<meta content="GTK-Doc[^>]*>', '', html
)
43 for filename
in glob
.glob(os
.path
.join(htmldir
, '*.devhelp2')):
44 prettify(filename
, parser
='lxml-xml')
45 for filename
in glob
.glob(os
.path
.join(htmldir
, '*.html')):
46 prettify(filename
, fixup
=True)
49 if __name__
== '__main__':
50 parser
= argparse
.ArgumentParser(
51 description
='c10e-html - canonicalize html files for diffing')
52 parser
.add_argument('args', nargs
='*', help='HTML_DIR')
54 options
= parser
.parse_args()
55 if len(options
.args
) < 1:
56 sys
.exit('Too few arguments')