staff-symbol-referencer.cc: Junk redundant functions.
[lilypond.git] / buildscripts / check_texi_refs.py
blob94bd6ecb642476c58ed188cbbd26e624f3a88977
1 #!/usr/bin/env python
3 """
4 check_texi_refs.py
5 Interactive Texinfo cross-references checking and fixing tool
7 """
10 import sys
11 import re
12 import os
13 import optparse
14 import imp
16 outdir = 'out-www'
18 log = sys.stderr
19 stdout = sys.stdout
21 file_not_found = 'file not found in include path'
23 warn_not_fixed = '*** Warning: this broken x-ref has not been fixed!\n'
25 opt_parser = optparse.OptionParser (usage='check_texi_refs.py [OPTION]... FILE',
26 description='''Check and fix \
27 cross-references in a collection of Texinfo
28 documents heavily cross-referenced each other.
29 ''')
31 opt_parser.add_option ('-a', '--auto-fix',
32 help="Automatically fix cross-references whenever \
33 it is possible",
34 action='store_true',
35 dest='auto_fix',
36 default=False)
38 opt_parser.add_option ('-b', '--batch',
39 help="Do not run interactively",
40 action='store_false',
41 dest='interactive',
42 default=True)
44 opt_parser.add_option ('-c', '--check-comments',
45 help="Also check commented out x-refs",
46 action='store_true',
47 dest='check_comments',
48 default=False)
50 opt_parser.add_option ('-p', '--check-punctuation',
51 help="Check punctuation after x-refs",
52 action='store_true',
53 dest='check_punctuation',
54 default=False)
56 opt_parser.add_option ("-I", '--include', help="add DIR to include path",
57 metavar="DIR",
58 action='append', dest='include_path',
59 default=[os.path.abspath (os.getcwd ())])
61 (options, files) = opt_parser.parse_args ()
63 class InteractionError (Exception):
64 pass
67 manuals_defs = imp.load_source ('manuals_defs', files[0])
68 manuals = {}
70 def find_file (name, prior_directory='.'):
71 p = os.path.join (prior_directory, name)
72 out_p = os.path.join (prior_directory, outdir, name)
73 if os.path.isfile (p):
74 return p
75 elif os.path.isfile (out_p):
76 return out_p
78 # looking for file in include_path
79 for d in options.include_path:
80 p = os.path.join (d, name)
81 if os.path.isfile (p):
82 return p
84 # file not found in include_path: looking in `outdir' subdirs
85 for d in options.include_path:
86 p = os.path.join (d, outdir, name)
87 if os.path.isfile (p):
88 return p
90 raise EnvironmentError (1, file_not_found, name)
93 exit_code = 0
95 def set_exit_code (n):
96 global exit_code
97 exit_code = max (exit_code, n)
100 if options.interactive:
101 try:
102 import readline
103 except:
104 pass
106 def yes_prompt (question, default=False, retries=3):
107 d = {True: 'y', False: 'n'}.get (default, False)
108 while retries:
109 a = raw_input ('%s [default: %s]' % (question, d) + '\n')
110 if a.lower ().startswith ('y'):
111 return True
112 if a.lower ().startswith ('n'):
113 return False
114 if a == '' or retries < 0:
115 return default
116 stdout.write ("Please answer yes or no.\n")
117 retries -= 1
119 def search_prompt ():
120 """Prompt user for a substring to look for in node names.
122 If user input is empty or matches no node name, return None,
123 otherwise return a list of (manual, node name, file) tuples.
126 substring = raw_input ("Enter a substring to search in node names \
127 (press Enter to skip this x-ref):\n")
128 if not substring:
129 return None
130 substring = substring.lower ()
131 matches = []
132 for k in manuals:
133 matches += [(k, node, manuals[k]['nodes'][node][0])
134 for node in manuals[k]['nodes']
135 if substring in node.lower ()]
136 return matches
138 else:
139 def yes_prompt (question, default=False, retries=3):
140 return default
142 def search_prompt ():
143 return None
146 ref_re = re.compile (r'@(ref|ruser|rlearning|rprogram|rglos)\{([^,\\]*?)\}(.)',
147 re.DOTALL)
148 node_include_re = re.compile (r'(?m)^@(node|include)\s+(.+?)$')
150 whitespace_re = re.compile (r'\s+')
151 line_start_re = re.compile ('(?m)^')
153 def which_line (index, newline_indices):
154 """Calculate line number of a given string index
156 Return line number of string index index, where
157 newline_indices is an ordered iterable of all newline indices.
159 inf = 0
160 sup = len (newline_indices) - 1
161 n = len (newline_indices)
162 while inf + 1 != sup:
163 m = (inf + sup) / 2
164 if index >= newline_indices [m]:
165 inf = m
166 else:
167 sup = m
168 return inf + 1
171 comments_re = re.compile ('(?<!@)(@c(?:omment)? \
172 .*?\\n|^@ignore\\n.*?\\n@end ignore\\n)', re.M | re.S)
174 def calc_comments_boundaries (texinfo_doc):
175 return [(m.start (), m.end ()) for m in comments_re.finditer (texinfo_doc)]
178 def is_commented_out (start, end, comments_boundaries):
179 for k in range (len (comments_boundaries)):
180 if (start > comments_boundaries[k][0]
181 and end <= comments_boundaries[k][1]):
182 return True
183 elif end <= comments_boundaries[k][0]:
184 return False
185 return False
188 def read_file (f, d):
189 s = open (f).read ()
190 base = os.path.basename (f)
191 dir = os.path.dirname (f)
193 d['contents'][f] = s
195 d['newline_indices'][f] = [m.end () for m in line_start_re.finditer (s)]
196 if options.check_comments:
197 d['comments_boundaries'][f] = []
198 else:
199 d['comments_boundaries'][f] = calc_comments_boundaries (s)
201 for m in node_include_re.finditer (s):
202 if m.group (1) == 'node':
203 line = which_line (m.start (), d['newline_indices'][f])
204 d['nodes'][m.group (2)] = (f, line)
206 elif m.group (1) == 'include':
207 try:
208 p = find_file (m.group (2), dir)
209 except EnvironmentError, (errno, strerror):
210 if strerror == file_not_found:
211 continue
212 else:
213 raise
214 read_file (p, d)
217 def read_manual (name):
218 """Look for all node names and cross-references in a Texinfo document
220 Return a (manual, dictionary) tuple where manual is the cross-reference
221 macro name defined by references_dict[name], and dictionary
222 has the following keys:
224 'nodes' is a dictionary of `node name':(file name, line number),
226 'contents' is a dictionary of file:`full file contents',
228 'newline_indices' is a dictionary of
229 file:[list of beginning-of-line string indices],
231 'comments_boundaries' is a list of (start, end) tuples,
232 which contain string indices of start and end of each comment.
234 Included files that can be found in the include path are processed too.
237 d = {}
238 d['nodes'] = {}
239 d['contents'] = {}
240 d['newline_indices'] = {}
241 d['comments_boundaries'] = {}
242 manual = manuals_defs.references_dict.get (name, '')
243 try:
244 f = find_file (name + '.tely')
245 except EnvironmentError, (errno, strerror):
246 if not strerror == file_not_found:
247 raise
248 else:
249 try:
250 f = find_file (name + '.texi')
251 except EnvironmentError, (errno, strerror):
252 if strerror == file_not_found:
253 sys.stderr.write (name + '.{texi,tely}: ' +
254 file_not_found + '\n')
255 return (manual, d)
256 else:
257 raise
259 log.write ("Processing manual %s (%s)\n" % (f, manual))
260 read_file (f, d)
261 return (manual, d)
264 log.write ("Reading files...\n")
266 manuals = dict ([read_manual (name)
267 for name in manuals_defs.references_dict.keys ()])
269 ref_fixes = set ()
270 bad_refs_count = 0
271 fixes_count = 0
273 def add_fix (old_type, old_ref, new_type, new_ref):
274 ref_fixes.add ((old_type, old_ref, new_type, new_ref))
277 def lookup_fix (r):
278 found = []
279 for (old_type, old_ref, new_type, new_ref) in ref_fixes:
280 if r == old_ref:
281 found.append ((new_type, new_ref))
282 return found
285 def preserve_linebreak (text, linebroken):
286 if linebroken:
287 if ' ' in text:
288 text = text.replace (' ', '\n', 1)
289 n = ''
290 else:
291 n = '\n'
292 else:
293 n = ''
294 return (text, n)
297 def choose_in_numbered_list (message, string_list, sep=' ', retries=3):
298 S = set (string_list)
299 S.discard ('')
300 string_list = list (S)
301 numbered_list = sep.join ([str (j + 1) + '. ' + string_list[j]
302 for j in range (len (string_list))]) + '\n'
303 t = retries
304 while t > 0:
305 value = ''
306 stdout.write (message +
307 "(press Enter to discard and start a new search)\n")
308 input = raw_input (numbered_list)
309 if not input:
310 return ''
311 try:
312 value = string_list[int (input) - 1]
313 except IndexError:
314 stdout.write ("Error: index number out of range\n")
315 except ValueError:
316 matches = [input in v for v in string_list]
317 n = matches.count (True)
318 if n == 0:
319 stdout.write ("Error: input matches no item in the list\n")
320 elif n > 1:
321 stdout.write ("Error: ambiguous input (matches several items \
322 in the list)\n")
323 else:
324 value = string_list[matches.index (True)]
325 if value:
326 return value
327 t -= 1
328 raise InteractionError ("%d retries limit exceeded" % retries)
331 def check_ref (manual, file, m):
332 global fixes_count, bad_refs_count
333 bad_ref = False
334 fixed = True
335 type = m.group (1)
336 original_name = m.group (2)
337 name = whitespace_re.sub (' ', original_name). strip ()
338 newline_indices = manuals[manual]['newline_indices'][file]
339 line = which_line (m.start (), newline_indices)
340 linebroken = '\n' in m.group (2)
341 next_char = m.group (3)
342 commented_out = is_commented_out \
343 (m.start (), m.end (), manuals[manual]['comments_boundaries'][file])
344 useful_fix = not outdir in file
346 # check puncuation after x-ref
347 if options.check_punctuation and not next_char in '.,;:!?':
348 stdout.write ("Warning: %s: %d: `%s': x-ref \
349 not followed by punctuation\n" % (file, line, name))
351 # validate xref
352 explicit_type = type
353 new_name = name
355 if type != 'ref' and type == manual and not commented_out and useful_fix:
356 bad_ref = True
357 stdout.write ("\n%s: %d: `%s': external %s x-ref should be internal\n"
358 % (file, line, name, type))
359 if options.auto_fix or yes_prompt ("Fix this?"):
360 type = 'ref'
362 if type == 'ref':
363 explicit_type = manual
365 if not name in manuals[explicit_type]['nodes'] and not commented_out:
366 bad_ref = True
367 fixed = False
368 stdout.write ('\n')
369 if type == 'ref':
370 stdout.write ("%s: %d: `%s': wrong internal x-ref\n"
371 % (file, line, name))
372 else:
373 stdout.write ("%s: %d: `%s': wrong external `%s' x-ref\n"
374 % (file, line, name, type))
375 # print context
376 stdout.write ('--\n' + manuals[manual]['contents'][file]
377 [newline_indices[max (0, line - 2)]:
378 newline_indices[min (line + 3,
379 len (newline_indices) - 1)]] +
380 '--\n')
382 # try to find the reference in other manuals
383 found = []
384 for k in [k for k in manuals if k != explicit_type]:
385 if name in manuals[k]['nodes']:
386 if k == manual:
387 found = ['ref']
388 stdout.write (" found as internal x-ref\n")
389 break
390 else:
391 found.append (k)
392 stdout.write (" found as `%s' x-ref\n" % k)
394 if (len (found) == 1
395 and (options.auto_fix or yes_prompt ("Fix this x-ref?"))):
396 add_fix (type, name, found[0], name)
397 type = found[0]
398 fixed = True
400 elif len (found) > 1 and useful_fix:
401 if options.interactive or options.auto_fix:
402 stdout.write ("* Several manuals contain this node name, \
403 cannot determine manual automatically.\n")
404 if options.interactive:
405 t = choose_in_numbered_list ("Choose manual for this x-ref by \
406 index number or beginning of name:\n", found)
407 if t:
408 add_fix (type, name, t, name)
409 type = t
410 fixed = True
412 if not fixed:
413 # try to find a fix already made
414 found = lookup_fix (name)
416 if len (found) == 1:
417 stdout.write ("Found one previous fix: %s `%s'\n" % found[0])
418 if options.auto_fix or yes_prompt ("Apply this fix?"):
419 type, new_name = found[0]
420 fixed = True
422 elif len (found) > 1:
423 if options.interactive or options.auto_fix:
424 stdout.write ("* Several previous fixes match \
425 this node name, cannot fix automatically.\n")
426 if options.interactive:
427 concatened = choose_in_numbered_list ("Choose new manual \
428 and x-ref by index number or beginning of name:\n", [''.join ([i[0], ' ', i[1]])
429 for i in found],
430 sep='\n')
431 if concatened:
432 type, new_name = concatenated.split (' ', 1)
433 fixed = True
435 if not fixed:
436 # all previous automatic fixes attempts failed,
437 # ask user for substring to look in node names
438 while True:
439 node_list = search_prompt ()
440 if node_list == None:
441 if options.interactive:
442 stdout.write (warn_not_fixed)
443 break
444 elif not node_list:
445 stdout.write ("No matched node names.\n")
446 else:
447 concatenated = choose_in_numbered_list ("Choose \
448 node name and manual for this x-ref by index number or beginning of name:\n", \
449 [' '.join ([i[0], i[1], '(in %s)' % i[2]])
450 for i in node_list],
451 sep='\n')
452 if concatenated:
453 t, z = concatenated.split (' ', 1)
454 new_name = z.split (' (in ', 1)[0]
455 add_fix (type, name, t, new_name)
456 type = t
457 fixed = True
458 break
460 if fixed and type == manual:
461 type = 'ref'
462 bad_refs_count += int (bad_ref)
463 if bad_ref and not useful_fix:
464 stdout.write ("*** Warning: this file is automatically generated, \
465 please fix the code source instead of generated documentation.\n")
467 # compute returned string
468 if new_name == name:
469 return ('@%s{%s}' % (type, original_name)) + next_char
470 else:
471 fixes_count += 1
472 (ref, n) = preserve_linebreak (new_name, linebroken)
473 return ('@%s{%s}' % (type, ref)) + next_char + n
476 log.write ("Checking cross-references...\n")
478 try:
479 for key in manuals:
480 for file in manuals[key]['contents']:
481 s = ref_re.sub (lambda m: check_ref (key, file, m),
482 manuals[key]['contents'][file])
483 if s != manuals[key]['contents'][file]:
484 open (file, 'w').write (s)
485 except KeyboardInterrupt:
486 log.write ("Operation interrupted, exiting.\n")
487 sys.exit (2)
488 except InteractionError, instance:
489 log.write ("Operation refused by user: %s\nExiting.\n" % instance)
490 sys.exit (3)
492 log.write ("Done: %d bad x-refs found, fixed %d.\n" %
493 (bad_refs_count, fixes_count))