mkdb: logging more details for entrity expansion
[gtk-doc.git] / gtkdoc / rebase.py
blobd6affe3522ed59e3bc956ee98b7eec549a2b9553
1 # -*- python -*-
3 # gtk-doc - GTK DocBook documentation generator.
4 # Copyright (C) 1998 Damon Chaplin
5 # 2007 David Necas (Yeti)
6 # 2007-2016 Stefan Sauer
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 """
24 The rebase tool rewrites URI references in installed HTML documentation.
25 """
27 from __future__ import print_function
28 from six import iteritems, iterkeys
30 import logging
31 import os
32 import re
34 from . import common
36 # Maps.
37 # These two point to the last seen URI of given type for a package:
38 # OnlineMap: package => on-line URI
39 # LocalMap: package => local URI
40 # This maps all seen URIs of a package to fix broken links in the process:
41 # RevMap: URI => package
42 OnlineMap = {}
43 LocalMap = {}
44 RevMap = {}
45 # Remember what mangling we did.
46 Mapped = {}
49 def log(options, *msg):
50 if options.verbose:
51 print(*msg)
54 def run(options):
55 other_dirs = []
57 # We scan the directory containing GLib and any directories in GNOME2_PATH
58 # first, but these will be overriden by any later scans.
59 if "GNOME2_PATH" in os.environ:
60 for dir in os.environ["GNOME2_PATH"].split(':'):
61 dir = os.path.join(dir, "/share/gtk-doc/html")
62 if os.path.isdir(dir):
63 log(options, "Prepending GNOME2_PATH directory:", dir)
64 other_dirs = [dir] + other_dirs
66 dir = common.GetModuleDocDir('glib-2.0')
67 log(options, "Prepending GLib directory", dir)
68 other_dirs = [dir] + other_dirs
70 # Check all other dirs, but skip already scanned dirs ord subdirs of those
72 for dir in other_dirs:
73 ScanDirectory(dir, options)
75 if options.relative:
76 RelativizeLocalMap(options.html_dir, options)
78 RebaseReferences(options.html_dir, options)
79 PrintWhatWeHaveDone()
82 def ScanDirectory(scan_dir, options):
83 log(options, "Scanning documentation directory %s", scan_dir)
85 if scan_dir == options.html_dir:
86 log(options, "Excluding self")
87 return
89 if not os.path.isdir(scan_dir):
90 logging.info('Cannot open dir "%s"', scan_dir)
91 return
93 subdirs = []
94 onlinedir = None
95 have_index = False
96 for entry in sorted(os.listdir(scan_dir)):
97 full_entry = os.path.join(scan_dir, entry)
98 if os.path.isdir(full_entry):
99 subdirs.append(full_entry)
100 continue
102 if entry.endswith('.devhelp2'):
103 log(options, "Reading index from " + entry)
104 o = ReadDevhelp(scan_dir, entry)
105 # Prefer this location over possibly stale index.sgml
106 if o is not None:
107 onlinedir = o
108 have_index = True
110 if onlinedir and entry == "index.sgml":
111 log(options, "Reading index from index.sgml")
112 onlinedir = ReadIndex(dir, entry)
113 have_index = True
114 elif entry == "index.sgml.gz" and not os.path.exists(os.path.join(scan_dir, 'index.sgml')):
115 # debian/ubuntu started to compress this as index.sgml.gz :/
116 print(''' Please fix https://bugs.launchpad.net/ubuntu/+source/gtk-doc/+bug/77138 . For now run:
117 gunzip %s/%s
118 ''' % (scan_dir, entry))
119 elif entry.endswith('.devhelp2.gz') and not os.path.exists(full_entry[:-3]):
120 # debian/ubuntu started to compress this as *devhelp2.gz :/
121 print('''Please fix https://bugs.launchpad.net/ubuntu/+source/gtk-doc/+bug/1466210 . For now run:
122 gunzip %d/%s
123 ''' % (scan_dir, entry))
124 # we could consider supporting: gzip module
126 if have_index:
127 AddMap(scan_dir, onlinedir, options)
129 # Now recursively scan the subdirectories.
130 for subdir in subdirs:
131 ScanDirectory(subdir, options)
134 def ReadDevhelp(dir, file):
135 onlinedir = None
137 for line in open(os.path.join(dir, file)):
138 # online must come before chapter/functions
139 if '<chapters' in line or '<functions' in line:
140 break
141 match = re.search(r' online="([^"]*)"/', line)
142 if match:
143 # Remove trailing non-directory component.
144 onlinedir = re.sub(r'(.*/).*', r'\1', match.groups(1))
145 return onlinedir
148 def ReadIndex(dir, file):
149 onlinedir = None
151 for line in open(os.path.join(dir, file)):
152 # ONLINE must come before any ANCHORs
153 if '<ANCHOR' in line:
154 break
155 match = re.match(r'''^<ONLINE\s+href\s*=\s*"([^"]+)"\s*>''', line)
156 if match:
157 # Remove trailing non-directory component.
158 onlinedir = re.sub(r'''(.*/).*''', r'\1', match.groups(1))
159 return onlinedir
162 def AddMap(dir, onlinedir, options):
163 package = None
165 package = os.path.split(dir)[1]
166 if options.dest_dir != '' and dir.startswith(options.dest_dir):
167 dir = dir[len(options.dest_dir) - 1:]
169 if onlinedir:
170 log(options, "On-line location of %s." % onlinedir)
171 OnlineMap[package] = onlinedir
172 RevMap[onlinedir] = package
173 else:
174 log(options, "No On-line location for %s found" % package)
176 log(options, "Local location of $package: " + dir)
177 LocalMap[package] = dir
178 RevMap[dir] = package
181 def RelativizeLocalMap(dirname, options):
182 prefix = None
183 dir = None
185 dirname = os.path.realpath(dirname)
186 prefix = os.path.split(dirname)
187 for package, dir in LocalMap.items():
188 if dir.startswith(prefix):
189 dir = os.path.join("..", dir[len(prefix):])
190 LocalMap[package] = dir
191 log(options, "Relativizing local location of $package to " + dir)
194 def RebaseReferences(dirname, options):
195 for ifile in sorted(os.listdir(dirname)):
196 if ifile.endswith('.html'):
197 RebaseFile(os.path.join(dirname, ifile), options)
200 def RebaseFile(filename, options):
201 log(options, "Fixing file: " + filename)
202 regex = re.compile(r'''(<a(?:\s+\w+=(?:"[^"]*"|'[^']*'))*\s+href=")([^"]*)(")''',
203 flags=re.MULTILINE)
205 def repl_func(match):
206 return match.group(1) + RebaseLink(match.group(2), options) + match.group(3)
208 contents = open(filename).read()
209 processed = re.sub(regex, repl_func, contents)
210 newfilename = filename + '.new'
211 open(newfilename, 'w').write(processed)
212 os.unlink(filename)
213 os.rename(newfilename, filename)
216 def RebaseLink(href, options):
217 match = re.match(r'^(.*/)([^/]*)$', href)
218 package = None
219 origdir = 'INVALID'
221 if match:
222 dir = origdir = match.group(1)
223 file = match.group(2)
224 if dir in RevMap:
225 package = RevMap[dir]
226 else:
227 match = re.match(r'\.\./([^/]+)', href)
228 if match is not None:
229 package = match.groups(1)
230 elif options.aggressive:
231 match = re.search(r'''([^/]+)/$''', href)
232 package = match.groups(1)
234 if package:
235 if options.online and package in OnlineMap:
236 dir = OnlineMap[package]
237 elif package in LocalMap:
238 dir = LocalMap[package]
239 href = os.path.join(dir, file)
240 else:
241 log(options, "Can't determine package for '%s'" % href)
243 if dir != origdir:
244 if origdir in Mapped:
245 Mapped[origdir][1] += 1
246 else:
247 Mapped[origdir] = [dir, 1]
248 return href
251 def PrintWhatWeHaveDone():
252 for origdir in sorted(iterkeys(Mapped)):
253 info = Mapped[origdir]
254 print(origdir, "->", info[0], "(%s)" % info[1])