LSR: Update.
[lilypond.git] / buildscripts / check_texi_refs.py
blobdff7e334f1ccc45d9e2ab039ef8b222ef8443a48
1 #!/usr/bin/env python
3 """
4 check_texi_refs.py
5 Interactive Texinfo cross-references checking and fixing tool
7 """
10 import sys
11 import re
12 import os
13 import optparse
14 import imp
16 outdir = 'out-www'
18 log = sys.stderr
19 stdout = sys.stdout
21 file_not_found = 'file not found in include path'
23 warn_not_fixed = '*** Warning: this broken x-ref has not been fixed!\n'
25 opt_parser = optparse.OptionParser (usage='check_texi_refs.py [OPTION]... FILE',
26 description='''Check and fix \
27 cross-references in a collection of Texinfo
28 documents heavily cross-referenced each other.
29 ''')
31 opt_parser.add_option ('-a', '--auto-fix',
32 help="Automatically fix cross-references whenever \
33 it is possible",
34 action='store_true',
35 dest='auto_fix',
36 default=False)
38 opt_parser.add_option ('-b', '--batch',
39 help="Do not run interactively",
40 action='store_false',
41 dest='interactive',
42 default=True)
44 opt_parser.add_option ('-c', '--check-comments',
45 help="Also check commented out x-refs",
46 action='store_true',
47 dest='check_comments',
48 default=False)
50 opt_parser.add_option ('-p', '--check-punctuation',
51 help="Check punctuation after x-refs",
52 action='store_true',
53 dest='check_punctuation',
54 default=False)
56 opt_parser.add_option ("-I", '--include', help="add DIR to include path",
57 metavar="DIR",
58 action='append', dest='include_path',
59 default=[os.path.abspath (os.getcwd ())])
61 (options, files) = opt_parser.parse_args ()
63 class InteractionError (Exception):
64 pass
67 manuals_defs = imp.load_source ('manuals_defs', files[0])
68 manuals = {}
70 def find_file (name, prior_directory='.'):
71 p = os.path.join (prior_directory, name)
72 out_p = os.path.join (prior_directory, outdir, name)
73 if os.path.isfile (p):
74 return p
75 elif os.path.isfile (out_p):
76 return out_p
78 # looking for file in include_path
79 for d in options.include_path:
80 p = os.path.join (d, name)
81 if os.path.isfile (p):
82 return p
84 # file not found in include_path: looking in `outdir' subdirs
85 for d in options.include_path:
86 p = os.path.join (d, outdir, name)
87 if os.path.isfile (p):
88 return p
90 raise EnvironmentError (1, file_not_found, name)
93 exit_code = 0
95 def set_exit_code (n):
96 global exit_code
97 exit_code = max (exit_code, n)
100 if options.interactive:
101 try:
102 import readline
103 except:
104 pass
106 def yes_prompt (question, default=False, retries=3):
107 d = {True: 'y', False: 'n'}.get (default, False)
108 while retries:
109 a = raw_input ('%s [default: %s]' % (question, d) + '\n')
110 if a.lower ().startswith ('y'):
111 return True
112 if a.lower ().startswith ('n'):
113 return False
114 if a == '' or retries < 0:
115 return default
116 stdout.write ("Please answer yes or no.\n")
117 retries -= 1
119 def search_prompt ():
120 """Prompt user for a substring to look for in node names.
122 If user input is empty or matches no node name, return None,
123 otherwise return a list of (manual, node name, file) tuples.
126 substring = raw_input ("Enter a substring to search in node names \
127 (press Enter to skip this x-ref):\n")
128 if not substring:
129 return None
130 substring = substring.lower ()
131 matches = []
132 for k in manuals:
133 matches += [(k, node, manuals[k]['nodes'][node][0])
134 for node in manuals[k]['nodes']
135 if substring in node.lower ()]
136 return matches
138 else:
139 def yes_prompt (question, default=False, retries=3):
140 return default
142 def search_prompt ():
143 return None
146 ref_re = re.compile \
147 ('@(ref|ruser|rlearning|rprogram|rglos)(?:\\{(?P<ref>[^,\\\\\\}]+?)|\
148 named\\{(?P<refname>[^,\\\\]+?),(?P<display>[^,\\\\\\}]+?))\\}(?P<last>.)',
149 re.DOTALL)
150 node_include_re = re.compile (r'(?m)^@(node|include)\s+(.+?)$')
152 whitespace_re = re.compile (r'\s+')
153 line_start_re = re.compile ('(?m)^')
155 def which_line (index, newline_indices):
156 """Calculate line number of a given string index
158 Return line number of string index index, where
159 newline_indices is an ordered iterable of all newline indices.
161 inf = 0
162 sup = len (newline_indices) - 1
163 n = len (newline_indices)
164 while inf + 1 != sup:
165 m = (inf + sup) / 2
166 if index >= newline_indices [m]:
167 inf = m
168 else:
169 sup = m
170 return inf + 1
173 comments_re = re.compile ('(?<!@)(@c(?:omment)? \
174 .*?\\n|^@ignore\\n.*?\\n@end ignore\\n)', re.M | re.S)
176 def calc_comments_boundaries (texinfo_doc):
177 return [(m.start (), m.end ()) for m in comments_re.finditer (texinfo_doc)]
180 def is_commented_out (start, end, comments_boundaries):
181 for k in range (len (comments_boundaries)):
182 if (start > comments_boundaries[k][0]
183 and end <= comments_boundaries[k][1]):
184 return True
185 elif end <= comments_boundaries[k][0]:
186 return False
187 return False
190 def read_file (f, d):
191 s = open (f).read ()
192 base = os.path.basename (f)
193 dir = os.path.dirname (f)
195 d['contents'][f] = s
197 d['newline_indices'][f] = [m.end () for m in line_start_re.finditer (s)]
198 if options.check_comments:
199 d['comments_boundaries'][f] = []
200 else:
201 d['comments_boundaries'][f] = calc_comments_boundaries (s)
203 for m in node_include_re.finditer (s):
204 if m.group (1) == 'node':
205 line = which_line (m.start (), d['newline_indices'][f])
206 d['nodes'][m.group (2)] = (f, line)
208 elif m.group (1) == 'include':
209 try:
210 p = find_file (m.group (2), dir)
211 except EnvironmentError, (errno, strerror):
212 if strerror == file_not_found:
213 continue
214 else:
215 raise
216 read_file (p, d)
219 def read_manual (name):
220 """Look for all node names and cross-references in a Texinfo document
222 Return a (manual, dictionary) tuple where manual is the cross-reference
223 macro name defined by references_dict[name], and dictionary
224 has the following keys:
226 'nodes' is a dictionary of `node name':(file name, line number),
228 'contents' is a dictionary of file:`full file contents',
230 'newline_indices' is a dictionary of
231 file:[list of beginning-of-line string indices],
233 'comments_boundaries' is a list of (start, end) tuples,
234 which contain string indices of start and end of each comment.
236 Included files that can be found in the include path are processed too.
239 d = {}
240 d['nodes'] = {}
241 d['contents'] = {}
242 d['newline_indices'] = {}
243 d['comments_boundaries'] = {}
244 manual = manuals_defs.references_dict.get (name, '')
245 try:
246 f = find_file (name + '.tely')
247 except EnvironmentError, (errno, strerror):
248 if not strerror == file_not_found:
249 raise
250 else:
251 try:
252 f = find_file (name + '.texi')
253 except EnvironmentError, (errno, strerror):
254 if strerror == file_not_found:
255 sys.stderr.write (name + '.{texi,tely}: ' +
256 file_not_found + '\n')
257 return (manual, d)
258 else:
259 raise
261 log.write ("Processing manual %s (%s)\n" % (f, manual))
262 read_file (f, d)
263 return (manual, d)
266 log.write ("Reading files...\n")
268 manuals = dict ([read_manual (name)
269 for name in manuals_defs.references_dict.keys ()])
271 ref_fixes = set ()
272 bad_refs_count = 0
273 fixes_count = 0
275 def add_fix (old_type, old_ref, new_type, new_ref):
276 ref_fixes.add ((old_type, old_ref, new_type, new_ref))
279 def lookup_fix (r):
280 found = []
281 for (old_type, old_ref, new_type, new_ref) in ref_fixes:
282 if r == old_ref:
283 found.append ((new_type, new_ref))
284 return found
287 def preserve_linebreak (text, linebroken):
288 if linebroken:
289 if ' ' in text:
290 text = text.replace (' ', '\n', 1)
291 n = ''
292 else:
293 n = '\n'
294 else:
295 n = ''
296 return (text, n)
299 def choose_in_numbered_list (message, string_list, sep=' ', retries=3):
300 S = set (string_list)
301 S.discard ('')
302 string_list = list (S)
303 numbered_list = sep.join ([str (j + 1) + '. ' + string_list[j]
304 for j in range (len (string_list))]) + '\n'
305 t = retries
306 while t > 0:
307 value = ''
308 stdout.write (message +
309 "(press Enter to discard and start a new search)\n")
310 input = raw_input (numbered_list)
311 if not input:
312 return ''
313 try:
314 value = string_list[int (input) - 1]
315 except IndexError:
316 stdout.write ("Error: index number out of range\n")
317 except ValueError:
318 matches = [input in v for v in string_list]
319 n = matches.count (True)
320 if n == 0:
321 stdout.write ("Error: input matches no item in the list\n")
322 elif n > 1:
323 stdout.write ("Error: ambiguous input (matches several items \
324 in the list)\n")
325 else:
326 value = string_list[matches.index (True)]
327 if value:
328 return value
329 t -= 1
330 raise InteractionError ("%d retries limit exceeded" % retries)
332 refs_count = 0
334 def check_ref (manual, file, m):
335 global fixes_count, bad_refs_count, refs_count
336 refs_count += 1
337 bad_ref = False
338 fixed = True
339 type = m.group (1)
340 original_name = m.group ('ref') or m.group ('refname')
341 name = whitespace_re.sub (' ', original_name). strip ()
342 newline_indices = manuals[manual]['newline_indices'][file]
343 line = which_line (m.start (), newline_indices)
344 linebroken = '\n' in original_name
345 original_display_name = m.group ('display')
346 next_char = m.group ('last')
347 if original_display_name: # the xref has an explicit display name
348 display_linebroken = '\n' in original_display_name
349 display_name = whitespace_re.sub (' ', original_display_name). strip ()
350 commented_out = is_commented_out \
351 (m.start (), m.end (), manuals[manual]['comments_boundaries'][file])
352 useful_fix = not outdir in file
354 # check puncuation after x-ref
355 if options.check_punctuation and not next_char in '.,;:!?':
356 stdout.write ("Warning: %s: %d: `%s': x-ref \
357 not followed by punctuation\n" % (file, line, name))
359 # validate xref
360 explicit_type = type
361 new_name = name
363 if type != 'ref' and type == manual and not commented_out:
364 if useful_fix:
365 fixed = False
366 bad_ref = True
367 stdout.write ("\n%s: %d: `%s': external %s x-ref should be internal\n"
368 % (file, line, name, type))
369 if options.auto_fix or yes_prompt ("Fix this?"):
370 type = 'ref'
372 if type == 'ref':
373 explicit_type = manual
375 if not name in manuals[explicit_type]['nodes'] and not commented_out:
376 bad_ref = True
377 fixed = False
378 stdout.write ('\n')
379 if type == 'ref':
380 stdout.write ("\e[1;31m%s: %d: `%s': wrong internal x-ref\e[0m\n"
381 % (file, line, name))
382 else:
383 stdout.write ("\e[1;31m%s: %d: `%s': wrong external `%s' x-ref\e[0m\n"
384 % (file, line, name, type))
385 # print context
386 stdout.write ('--\n' + manuals[manual]['contents'][file]
387 [newline_indices[max (0, line - 2)]:
388 newline_indices[min (line + 3,
389 len (newline_indices) - 1)]] +
390 '--\n')
392 # try to find the reference in other manuals
393 found = []
394 for k in [k for k in manuals if k != explicit_type]:
395 if name in manuals[k]['nodes']:
396 if k == manual:
397 found = ['ref']
398 stdout.write ("\e[1;32m found as internal x-ref\e[0m\n")
399 break
400 else:
401 found.append (k)
402 stdout.write ("\e[1;32m found as `%s' x-ref\e[0m\n" % k)
404 if (len (found) == 1
405 and (options.auto_fix or yes_prompt ("Fix this x-ref?"))):
406 add_fix (type, name, found[0], name)
407 type = found[0]
408 fixed = True
410 elif len (found) > 1 and useful_fix:
411 if options.interactive or options.auto_fix:
412 stdout.write ("* Several manuals contain this node name, \
413 cannot determine manual automatically.\n")
414 if options.interactive:
415 t = choose_in_numbered_list ("Choose manual for this x-ref by \
416 index number or beginning of name:\n", found)
417 if t:
418 add_fix (type, name, t, name)
419 type = t
420 fixed = True
422 if not fixed:
423 # try to find a fix already made
424 found = lookup_fix (name)
426 if len (found) == 1:
427 stdout.write ("Found one previous fix: %s `%s'\n" % found[0])
428 if options.auto_fix or yes_prompt ("Apply this fix?"):
429 type, new_name = found[0]
430 fixed = True
432 elif len (found) > 1:
433 if options.interactive or options.auto_fix:
434 stdout.write ("* Several previous fixes match \
435 this node name, cannot fix automatically.\n")
436 if options.interactive:
437 concatened = choose_in_numbered_list ("Choose new manual \
438 and x-ref by index number or beginning of name:\n", [''.join ([i[0], ' ', i[1]])
439 for i in found],
440 sep='\n')
441 if concatened:
442 type, new_name = concatenated.split (' ', 1)
443 fixed = True
445 if not fixed:
446 # all previous automatic fixing attempts failed,
447 # ask user for substring to look in node names
448 while True:
449 node_list = search_prompt ()
450 if node_list == None:
451 if options.interactive:
452 stdout.write (warn_not_fixed)
453 break
454 elif not node_list:
455 stdout.write ("No matched node names.\n")
456 else:
457 concatenated = choose_in_numbered_list ("Choose \
458 node name and manual for this x-ref by index number or beginning of name:\n", \
459 [' '.join ([i[0], i[1], '(in %s)' % i[2]])
460 for i in node_list],
461 sep='\n')
462 if concatenated:
463 t, z = concatenated.split (' ', 1)
464 new_name = z.split (' (in ', 1)[0]
465 add_fix (type, name, t, new_name)
466 type = t
467 fixed = True
468 break
470 if fixed and type == manual:
471 type = 'ref'
472 bad_refs_count += int (bad_ref)
473 if bad_ref and not useful_fix:
474 stdout.write ("*** Warning: this file is automatically generated, \
475 please fix the code source instead of generated documentation.\n")
477 # compute returned string
478 if new_name == name:
479 if bad_ref and (options.interactive or options.auto_fix):
480 # only the type of the ref was fixed
481 fixes_count += int (fixed)
482 if original_display_name:
483 return ('@%snamed{%s,%s}' % (type, original_name, original_display_name)) + next_char
484 else:
485 return ('@%s{%s}' % (type, original_name)) + next_char
486 else:
487 fixes_count += int (fixed)
488 (ref, n) = preserve_linebreak (new_name, linebroken)
489 if original_display_name:
490 if bad_ref:
491 stdout.write ("Current display name is `%s'\n")
492 display_name = raw_input \
493 ("Enter a new display name or press enter to keep the existing name:\n") \
494 or display_name
495 (display_name, n) = preserve_linebreak (display_name, display_linebroken)
496 else:
497 display_name = original_display_name
498 return ('@%snamed{%s,%s}' % (type, ref, display_name)) + \
499 next_char + n
500 else:
501 return ('@%s{%s}' % (type, ref)) + next_char + n
504 log.write ("Checking cross-references...\n")
506 try:
507 for key in manuals:
508 for file in manuals[key]['contents']:
509 s = ref_re.sub (lambda m: check_ref (key, file, m),
510 manuals[key]['contents'][file])
511 if s != manuals[key]['contents'][file]:
512 open (file, 'w').write (s)
513 except KeyboardInterrupt:
514 log.write ("Operation interrupted, exiting.\n")
515 sys.exit (2)
516 except InteractionError, instance:
517 log.write ("Operation refused by user: %s\nExiting.\n" % instance)
518 sys.exit (3)
520 log.write ("\e[1;36mDone: %d x-refs found, %d bad x-refs found, fixed %d.\e[0m\n" %
521 (refs_count, bad_refs_count, fixes_count))