c10e-html: strip more stuff
[gtk-doc.git] / gtkdoc / fixxref.py
blobbc733a21bbe48f3c765c94c446d4002fcee65696
1 # -*- python -*-
3 # gtk-doc - GTK DocBook documentation generator.
4 # Copyright (C) 1998 Damon Chaplin
5 # 2007-2016 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 ''"Fix cross-references in the HTML documentation.''"
24 # Support both Python 2 and 3
25 from __future__ import print_function
27 import logging
28 import os
29 import re
30 import shlex
31 import subprocess
32 import sys
33 import tempfile
35 from . import common, config
37 # This contains all the entities and their relative URLs.
38 Links = {}
40 # failing link targets we don't warn about even once
41 NoLinks = {
42 'char',
43 'double',
44 'float',
45 'int',
46 'long',
47 'main',
48 'signed',
49 'unsigned',
50 'va-list',
51 'void',
52 'GBoxed',
53 'GEnum',
54 'GFlags',
55 'GInterface'
59 def Run(options):
60 logging.info('options: %s', str(options.__dict__))
62 LoadIndicies(options.module_dir, options.html_dir, options.extra_dir)
63 ReadSections(options.module)
64 FixCrossReferences(options.module_dir, options.module, options.src_lang)
67 # TODO(ensonic): try to refactor so that we get a list of path's and then just
68 # loop over them.
69 # - module_dir is by default 'html'
70 # - html_dir can be set by configure, defaults to $(docdir)
71 def LoadIndicies(module_dir, html_dir, extra_dirs):
72 # Cache of dirs we already scanned for index files
73 dir_cache = {}
75 path_prefix = ''
76 m = re.search(r'(.*?)/share/gtk-doc/html', html_dir)
77 if m:
78 path_prefix = m.group(1)
79 logging.info('Path prefix: %s', path_prefix)
80 prefix_match = r'^' + re.escape(path_prefix) + r'/'
82 # We scan the directory containing GLib and any directories in GNOME2_PATH
83 # first, but these will be overriden by any later scans.
84 dir = common.GetModuleDocDir('glib-2.0')
85 if dir and os.path.exists(dir):
86 # Some predefined link targets to get links into type hierarchies as these
87 # have no targets. These are always absolute for now.
88 Links['GBoxed'] = dir + '/gobject/gobject-Boxed-Types.html'
89 Links['GEnum'] = dir + '/gobject/gobject-Enumeration-and-Flag-Types.html'
90 Links['GFlags'] = dir + '/gobject/gobject-Enumeration-and-Flag-Types.html'
91 Links['GInterface'] = dir + '/gobject/GTypeModule.html'
93 if dir != html_dir:
94 logging.info('Scanning GLib directory: %s', dir)
95 ScanIndices(dir, (re.search(prefix_match, dir) is None), dir_cache)
97 path = os.environ.get('GNOME2_PATH')
98 if path:
99 for dir in path.split(':'):
100 dir += 'share/gtk-doc/html'
101 if os.path.exists(dir) and dir != html_dir:
102 logging.info('Scanning GNOME2_PATH directory: %s', dir)
103 ScanIndices(dir, (re.search(prefix_match, dir) is None), dir_cache)
105 logging.info('Scanning HTML_DIR directory: %s', html_dir)
106 ScanIndices(html_dir, False, dir_cache)
107 logging.info('Scanning MODULE_DIR directory: %s', module_dir)
108 ScanIndices(module_dir, False, dir_cache)
110 # check all extra dirs, but skip already scanned dirs or subdirs of those
111 for dir in extra_dirs:
112 dir = dir.rstrip('/')
113 logging.info('Scanning EXTRA_DIR directory: %s', dir)
115 # If the --extra-dir option is not relative and is not sharing the same
116 # prefix as the target directory of the docs, we need to use absolute
117 # directories for the links
118 if not dir.startswith('..') and re.search(prefix_match, dir) is None:
119 ScanIndices(dir, True, dir_cache)
120 else:
121 ScanIndices(dir, False, dir_cache)
124 def ScanIndices(scan_dir, use_absolute_links, dir_cache):
125 if not scan_dir or scan_dir in dir_cache:
126 return
127 dir_cache[scan_dir] = 1
129 logging.info('Scanning index directory: %s, absolute: %d', scan_dir, use_absolute_links)
131 # TODO(ensonic): this code is the same as in rebase.py
132 if not os.path.isdir(scan_dir):
133 logging.info('Cannot open dir "%s"', scan_dir)
134 return
136 subdirs = []
137 for entry in sorted(os.listdir(scan_dir)):
138 full_entry = os.path.join(scan_dir, entry)
139 if os.path.isdir(full_entry):
140 subdirs.append(full_entry)
141 continue
143 if entry.endswith('.devhelp2'):
144 # if devhelp-file is good don't read index.sgml
145 ReadDevhelp(full_entry, use_absolute_links)
146 elif entry == "index.sgml.gz" and not os.path.exists(os.path.join(scan_dir, 'index.sgml')):
147 # debian/ubuntu started to compress this as index.sgml.gz :/
148 print(''' Please fix https://bugs.launchpad.net/ubuntu/+source/gtk-doc/+bug/77138 . For now run:
149 gunzip %s
150 ''' % full_entry)
151 elif entry.endswith('.devhelp2.gz') and not os.path.exists(full_entry[:-3]):
152 # debian/ubuntu started to compress this as *devhelp2.gz :/
153 print('''Please fix https://bugs.launchpad.net/ubuntu/+source/gtk-doc/+bug/1466210 . For now run:
154 gunzip %s
155 ''' % full_entry)
156 # we could consider supporting: gzip module
158 # Now recursively scan the subdirectories.
159 for subdir in subdirs:
160 ScanIndices(subdir, use_absolute_links, dir_cache)
163 def ReadDevhelp(file, use_absolute_links):
164 # Determine the absolute directory, to be added to links in $file
165 # if we need to use an absolute link.
166 # $file will be something like /prefix/gnome/share/gtk-doc/html/gtk/$file
167 # We want the part up to 'html/.*' since the links in $file include
168 # the rest.
169 dir = "../"
170 if use_absolute_links:
171 # For uninstalled index files we'd need to map the path to where it
172 # will be installed to
173 if not file.startswith('./'):
174 m = re.search(r'(.*\/)(.*?)\/.*?\.devhelp2', file)
175 dir = m.group(1) + m.group(2) + '/'
176 else:
177 m = re.search(r'(.*\/)(.*?)\/.*?\.devhelp2', file)
178 if m:
179 dir += m.group(2) + '/'
180 else:
181 dir = ''
183 logging.info('Scanning index file=%s, absolute=%d, dir=%s', file, use_absolute_links, dir)
185 for line in common.open_text(file):
186 m = re.search(r' link="([^#]*)#([^"]*)"', line)
187 if m:
188 link = m.group(1) + '#' + m.group(2)
189 logging.debug('Found id: %s href: %s', m.group(2), link)
190 Links[m.group(2)] = dir + link
193 def ReadSections(module):
194 """We don't warn on missing links to non-public sysmbols."""
195 for line in common.open_text(module + '-sections.txt'):
196 m1 = re.search(r'^<SUBSECTION\s*(.*)>', line)
197 if line.startswith('#') or line.strip() == '':
198 continue
199 elif line.startswith('<SECTION>'):
200 subsection = ''
201 elif m1:
202 subsection = m1.group(1)
203 elif line.startswith('<SUBSECTION>') or line.startswith('</SECTION>'):
204 continue
205 elif re.search(r'^<TITLE>(.*)<\/TITLE>', line):
206 continue
207 elif re.search(r'^<FILE>(.*)<\/FILE>', line):
208 continue
209 elif re.search(r'^<INCLUDE>(.*)<\/INCLUDE>', line):
210 continue
211 else:
212 symbol = line.strip()
213 if subsection == "Standard" or subsection == "Private":
214 NoLinks.add(common.CreateValidSGMLID(symbol))
217 def FixCrossReferences(module_dir, module, src_lang):
218 # TODO(ensonic): use glob.glob()?
219 for entry in sorted(os.listdir(module_dir)):
220 full_entry = os.path.join(module_dir, entry)
221 if os.path.isdir(full_entry):
222 continue
223 elif entry.endswith('.html') or entry.endswith('.htm'):
224 FixHTMLFile(src_lang, module, full_entry)
227 def FixHTMLFile(src_lang, module, file):
228 logging.info('Fixing file: %s', file)
230 content = common.open_text(file).read()
232 if config.highlight:
233 # FIXME: ideally we'd pass a clue about the example language to the highligher
234 # unfortunately the "language" attribute is not appearing in the html output
235 # we could patch the customization to have <code class="xxx"> inside of <pre>
236 if config.highlight.endswith('vim'):
237 def repl_func(m):
238 return HighlightSourceVim(src_lang, m.group(1), m.group(2))
239 content = re.sub(
240 r'<div class=\"(example-contents|informalexample)\"><pre class=\"programlisting\">(.*?)</pre></div>',
241 repl_func, content, flags=re.DOTALL)
242 else:
243 def repl_func(m):
244 return HighlightSource(src_lang, m.group(1), m.group(2))
245 content = re.sub(
246 r'<div class=\"(example-contents|informalexample)\"><pre class=\"programlisting\">(.*?)</pre></div>',
247 repl_func, content, flags=re.DOTALL)
249 content = re.sub(r'\&lt;GTKDOCLINK\s+HREF=\&quot;(.*?)\&quot;\&gt;(.*?)\&lt;/GTKDOCLINK\&gt;',
250 r'\<GTKDOCLINK\ HREF=\"\1\"\>\2\</GTKDOCLINK\>', content, flags=re.DOTALL)
252 # From the highlighter we get all the functions marked up. Now we can turn them into GTKDOCLINK items
253 def repl_func(m):
254 return MakeGtkDocLink(m.group(1), m.group(2), m.group(3))
255 content = re.sub(r'(<span class=\"function\">)(.*?)(</span>)', repl_func, content, flags=re.DOTALL)
256 # We can also try the first item in stuff marked up as 'normal'
257 content = re.sub(
258 r'(<span class=\"normal\">\s*)(.+?)((\s+.+?)?\s*</span>)', repl_func, content, flags=re.DOTALL)
260 lines = content.rstrip().split('\n')
262 def repl_func_with_ix(i):
263 def repl_func(m):
264 return MakeXRef(module, file, i + 1, m.group(1), m.group(2))
265 return repl_func
267 for i in range(len(lines)):
268 lines[i] = re.sub(r'<GTKDOCLINK\s+HREF="([^"]*)"\s*>(.*?)</GTKDOCLINK\s*>', repl_func_with_ix(i), lines[i])
269 if 'GTKDOCLINK' in lines[i]:
270 logging.info('make xref failed for line %d: "%s"', i, lines[i])
272 new_file = file + '.new'
273 content = '\n'.join(lines)
274 with common.open_text(new_file, 'w') as h:
275 h.write(content)
277 os.unlink(file)
278 os.rename(new_file, file)
281 def GetXRef(id):
282 href = Links.get(id)
283 if href:
284 return (id, href)
286 # This is a workaround for some inconsistency we have with CreateValidSGMLID
287 if ':' in id:
288 tid = id.replace(':', '--')
289 href = Links.get(tid)
290 if href:
291 return (tid, href)
293 # poor mans plural support
294 if id.endswith('s'):
295 tid = id[:-1]
296 href = Links.get(tid)
297 if href:
298 return (tid, href)
299 tid += '-struct'
300 href = Links.get(tid)
301 if href:
302 return (tid, href)
304 tid = id + '-struct'
305 href = Links.get(tid)
306 if href:
307 return (tid, href)
309 return (id, None)
312 def ReportBadXRef(file, line, id, text):
313 logging.info('no link for: id=%s, linktext=%s', id, text)
315 # don't warn multiple times and also skip blacklisted (ctypes)
316 if id in NoLinks:
317 return
318 # if it's a function, don't warn if it does not contain a "_"
319 # (transformed to "-")
320 # - gnome coding style would use '_'
321 # - will avoid wrong warnings for ansi c functions
322 if re.search(r' class=\"function\"', text) and '-' not in id:
323 return
324 # if it's a 'return value', don't warn (implicitly created link)
325 if re.search(r' class=\"returnvalue\"', text):
326 return
327 # if it's a 'type', don't warn if it starts with lowercase
328 # - gnome coding style would use CamelCase
329 if re.search(r' class=\"type\"', text) and id[0].islower():
330 return
331 # don't warn for self links
332 if text == id:
333 return
335 common.LogWarning(file, line, 'no link for: "%s" -> (%s).' % (id, text))
336 NoLinks.add(id)
339 def MakeRelativeXRef(module, href):
340 # if it is a link to same module, remove path to make it work uninstalled
341 m = re.search(r'^\.\./' + module + '/(.*)$', href)
342 if m:
343 href = m.group(1)
344 return href
347 def MakeXRef(module, file, line, id, text):
348 href = GetXRef(id)[1]
350 if href:
351 href = MakeRelativeXRef(module, href)
352 logging.info('Fixing link: %s, %s, %s', id, href, text)
353 return "<a href=\"%s\">%s</a>" % (href, text)
354 else:
355 ReportBadXRef(file, line, id, text)
356 return text
359 def MakeGtkDocLink(pre, symbol, post):
360 id = common.CreateValidSGMLID(symbol)
362 # these are implicitely created links in highlighed sources
363 # we don't want warnings for those if the links cannot be resolved.
364 NoLinks.add(id)
366 return pre + '<GTKDOCLINK HREF="' + id + '">' + symbol + '</GTKDOCLINK>' + post
369 def HighlightSource(src_lang, type, source):
370 # write source to a temp file
371 # FIXME: use .c for now to hint the language to the highlighter
372 with tempfile.NamedTemporaryFile(mode='w+', suffix='.c') as f:
373 temp_source_file = HighlightSourcePreProcess(f, source)
374 highlight_options = config.highlight_options.replace('$SRC_LANG', src_lang)
376 logging.info('running %s %s %s', config.highlight, highlight_options, temp_source_file)
378 # format source
379 highlighted_source = subprocess.check_output(
380 [config.highlight] + shlex.split(highlight_options) + [temp_source_file]).decode('utf-8')
381 logging.debug('result: [%s]', highlighted_source)
382 if config.highlight.endswith('/source-highlight'):
383 highlighted_source = re.sub(r'^<\!-- .*? -->', '', highlighted_source, flags=re.MULTILINE | re.DOTALL)
384 highlighted_source = re.sub(
385 r'<pre><tt>(.*?)</tt></pre>', r'\1', highlighted_source, flags=re.MULTILINE | re.DOTALL)
386 elif config.highlight.endswith('/highlight'):
387 # need to rewrite the stylesheet classes
388 highlighted_source = highlighted_source.replace('<span class="gtkdoc com">', '<span class="comment">')
389 highlighted_source = highlighted_source.replace('<span class="gtkdoc dir">', '<span class="preproc">')
390 highlighted_source = highlighted_source.replace('<span class="gtkdoc kwd">', '<span class="function">')
391 highlighted_source = highlighted_source.replace('<span class="gtkdoc kwa">', '<span class="keyword">')
392 highlighted_source = highlighted_source.replace('<span class="gtkdoc line">', '<span class="linenum">')
393 highlighted_source = highlighted_source.replace('<span class="gtkdoc num">', '<span class="number">')
394 highlighted_source = highlighted_source.replace('<span class="gtkdoc str">', '<span class="string">')
395 highlighted_source = highlighted_source.replace('<span class="gtkdoc sym">', '<span class="symbol">')
396 # maybe also do
397 # highlighted_source = re.sub(r'</span>(.+)<span', '</span><span class="normal">\1</span><span')
399 return HighlightSourcePostprocess(type, highlighted_source)
402 def HighlightSourceVim(src_lang, type, source):
403 # write source to a temp file
404 with tempfile.NamedTemporaryFile(mode='w+', suffix='.h') as f:
405 temp_source_file = HighlightSourcePreProcess(f, source)
407 # format source
408 # TODO(ensonic): use p.communicate()
409 script = "echo 'let html_number_lines=0|let html_use_css=1|let html_use_xhtml=1|e %s|syn on|set syntax=%s|run! plugin/tohtml.vim|run! syntax/2html.vim|w! %s.html|qa!' | " % (
410 temp_source_file, src_lang, temp_source_file)
411 script += "%s -n -e -u NONE -T xterm >/dev/null" % config.highlight
412 subprocess.check_call([script], shell=True)
414 highlighted_source = common.open_text(temp_source_file + ".html").read()
415 highlighted_source = re.sub(r'.*<pre\b[^>]*>\n', '', highlighted_source, flags=re.DOTALL)
416 highlighted_source = re.sub(r'</pre>.*', '', highlighted_source, flags=re.DOTALL)
418 # need to rewrite the stylesheet classes
419 highlighted_source = highlighted_source.replace('<span class="Comment">', '<span class="comment">')
420 highlighted_source = highlighted_source.replace('<span class="PreProc">', '<span class="preproc">')
421 highlighted_source = highlighted_source.replace('<span class="Statement">', '<span class="keyword">')
422 highlighted_source = highlighted_source.replace('<span class="Identifier">', '<span class="function">')
423 highlighted_source = highlighted_source.replace('<span class="Constant">', '<span class="number">')
424 highlighted_source = highlighted_source.replace('<span class="Special">', '<span class="symbol">')
425 highlighted_source = highlighted_source.replace('<span class="Type">', '<span class="type">')
427 # remove temp files
428 os.unlink(temp_source_file + '.html')
430 return HighlightSourcePostprocess(type, highlighted_source)
433 def HighlightSourcePreProcess(f, source):
434 # chop of leading and trailing empty lines, leave leading space in first real line
435 source = source.strip(' ')
436 source = source.strip('\n')
437 source = source.rstrip()
439 # cut common indent
440 m = re.search(r'^(\s+)', source)
441 if m:
442 source = re.sub(r'^' + m.group(1), '', source, flags=re.MULTILINE)
443 # avoid double entity replacement
444 source = source.replace('&lt;', '<')
445 source = source.replace('&gt;', '>')
446 source = source.replace('&amp;', '&')
447 if sys.version_info < (3,):
448 source = source.encode('utf-8')
449 f.write(source)
450 f.flush()
451 return f.name
454 def HighlightSourcePostprocess(type, highlighted_source):
455 # chop of leading and trailing empty lines
456 highlighted_source = highlighted_source.strip()
458 # turn common urls in comments into links
459 highlighted_source = re.sub(r'<span class="url">(.*?)</span>',
460 r'<span class="url"><a href="\1">\1</a></span>',
461 highlighted_source, flags=re.DOTALL)
463 # we do own line-numbering
464 line_count = highlighted_source.count('\n')
465 source_lines = '\n'.join([str(i) for i in range(1, line_count + 2)])
467 return """<div class="%s">
468 <table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
469 <tbody>
470 <tr>
471 <td class="listing_lines" align="right"><pre>%s</pre></td>
472 <td class="listing_code"><pre class="programlisting">%s</pre></td>
473 </tr>
474 </tbody>
475 </table>
476 </div>
477 """ % (type, source_lines, highlighted_source)