Doc-es: update of Programming Interfaces.
[lilypond/mpolesky.git] / scripts / auxiliar / check_texi_refs.py
blob45f1e16993581c02fc6fd5509ceca22ac6730ac5
1 #!/usr/bin/env python
3 """
4 check_texi_refs.py
5 Interactive Texinfo cross-references checking and fixing tool
7 """
10 import sys
11 import re
12 import os
13 import optparse
14 import imp
16 outdir = 'out-www'
18 log = sys.stderr
19 stdout = sys.stdout
21 file_not_found = 'file not found in include path'
23 warn_not_fixed = '*** Warning: this broken x-ref has not been fixed!\n'
25 opt_parser = optparse.OptionParser (usage='check_texi_refs.py [OPTION]... FILE',
26 description='''Check and fix \
27 cross-references in a collection of Texinfo
28 documents heavily cross-referenced each other.
29 ''')
31 opt_parser.add_option ('-a', '--auto-fix',
32 help="Automatically fix cross-references whenever \
33 it is possible",
34 action='store_true',
35 dest='auto_fix',
36 default=False)
38 opt_parser.add_option ('-b', '--batch',
39 help="Do not run interactively",
40 action='store_false',
41 dest='interactive',
42 default=True)
44 opt_parser.add_option ('-c', '--check-comments',
45 help="Also check commented out x-refs",
46 action='store_true',
47 dest='check_comments',
48 default=False)
50 opt_parser.add_option ('-p', '--check-punctuation',
51 help="Check punctuation after x-refs",
52 action='store_true',
53 dest='check_punctuation',
54 default=False)
56 opt_parser.add_option ("-I", '--include', help="add DIR to include path",
57 metavar="DIR",
58 action='append', dest='include_path',
59 default=[])
61 (options, files) = opt_parser.parse_args ()
62 options.include_path.append (os.path.abspath (os.getcwd ()))
64 class InteractionError (Exception):
65 pass
68 manuals_defs = imp.load_source ('manuals_defs', files[0])
69 manuals = {}
71 def find_file (name, prior_directory='.'):
72 p = os.path.join (prior_directory, name)
73 out_p = os.path.join (prior_directory, outdir, name)
74 if os.path.isfile (p):
75 return p
76 elif os.path.isfile (out_p):
77 return out_p
79 # looking for file in include_path
80 for d in options.include_path:
81 p = os.path.join (d, name)
82 if os.path.isfile (p):
83 return p
85 # file not found in include_path: looking in `outdir' subdirs
86 for d in options.include_path:
87 p = os.path.join (d, outdir, name)
88 if os.path.isfile (p):
89 return p
91 raise EnvironmentError (1, file_not_found, name)
94 exit_code = 0
96 def set_exit_code (n):
97 global exit_code
98 exit_code = max (exit_code, n)
101 if options.interactive:
102 try:
103 import readline
104 except:
105 pass
107 def yes_prompt (question, default=False, retries=3):
108 d = {True: 'y', False: 'n'}.get (default, False)
109 while retries:
110 a = raw_input ('%s [default: %s]' % (question, d) + '\n')
111 if a.lower ().startswith ('y'):
112 return True
113 if a.lower ().startswith ('n'):
114 return False
115 if a == '' or retries < 0:
116 return default
117 stdout.write ("Please answer yes or no.\n")
118 retries -= 1
120 def search_prompt ():
121 """Prompt user for a substring to look for in node names.
123 If user input is empty or matches no node name, return None,
124 otherwise return a list of (manual, node name, file) tuples.
127 substring = raw_input ("Enter a substring to search in node names \
128 (press Enter to skip this x-ref):\n")
129 if not substring:
130 return None
131 substring = substring.lower ()
132 matches = []
133 for k in manuals:
134 matches += [(k, node, manuals[k]['nodes'][node][0])
135 for node in manuals[k]['nodes']
136 if substring in node.lower ()]
137 return matches
139 else:
140 def yes_prompt (question, default=False, retries=3):
141 return default
143 def search_prompt ():
144 return None
147 ref_re = re.compile \
148 ('@((?:ressay|rgloss|rinternals|rlearning|rslr|rprogram|ruser|ref)|named)(?:\\{(?P<ref>[^,\\\\\\}]+?)|\
149 named\\{(?P<refname>[^,\\\\]+?),(?P<display>[^,\\\\\\}]+?))\\}(?P<last>.)',
150 re.DOTALL)
151 node_include_re = re.compile (r'(?m)^@(node|include)\s+(.+?)$')
153 whitespace_re = re.compile (r'\s+')
154 line_start_re = re.compile ('(?m)^')
156 def which_line (index, newline_indices):
157 """Calculate line number of a given string index
159 Return line number of string index index, where
160 newline_indices is an ordered iterable of all newline indices.
162 inf = 0
163 sup = len (newline_indices) - 1
164 n = len (newline_indices)
165 while inf + 1 != sup:
166 m = (inf + sup) / 2
167 if index >= newline_indices [m]:
168 inf = m
169 else:
170 sup = m
171 return inf + 1
174 comments_re = re.compile ('(?<!@)(@c(?:omment)? \
175 .*?\\n|^@ignore\\n.*?\\n@end ignore\\n)', re.M | re.S)
177 def calc_comments_boundaries (texinfo_doc):
178 return [(m.start (), m.end ()) for m in comments_re.finditer (texinfo_doc)]
181 def is_commented_out (start, end, comments_boundaries):
182 for k in range (len (comments_boundaries)):
183 if (start > comments_boundaries[k][0]
184 and end <= comments_boundaries[k][1]):
185 return True
186 elif end <= comments_boundaries[k][0]:
187 return False
188 return False
191 def read_file (f, d):
192 s = open (f).read ()
193 base = os.path.basename (f)
194 dir = os.path.dirname (f)
196 d['contents'][f] = s
198 d['newline_indices'][f] = [m.end () for m in line_start_re.finditer (s)]
199 if options.check_comments:
200 d['comments_boundaries'][f] = []
201 else:
202 d['comments_boundaries'][f] = calc_comments_boundaries (s)
204 for m in node_include_re.finditer (s):
205 if m.group (1) == 'node':
206 line = which_line (m.start (), d['newline_indices'][f])
207 d['nodes'][m.group (2)] = (f, line)
209 elif m.group (1) == 'include':
210 try:
211 p = find_file (m.group (2), dir)
212 except EnvironmentError, (errno, strerror):
213 if strerror == file_not_found:
214 continue
215 else:
216 raise
217 read_file (p, d)
220 def read_manual (name):
221 """Look for all node names and cross-references in a Texinfo document
223 Return a (manual, dictionary) tuple where manual is the cross-reference
224 macro name defined by references_dict[name], and dictionary
225 has the following keys:
227 'nodes' is a dictionary of `node name':(file name, line number),
229 'contents' is a dictionary of file:`full file contents',
231 'newline_indices' is a dictionary of
232 file:[list of beginning-of-line string indices],
234 'comments_boundaries' is a list of (start, end) tuples,
235 which contain string indices of start and end of each comment.
237 Included files that can be found in the include path are processed too.
240 d = {}
241 d['nodes'] = {}
242 d['contents'] = {}
243 d['newline_indices'] = {}
244 d['comments_boundaries'] = {}
245 manual = manuals_defs.references_dict.get (name, '')
246 try:
247 f = find_file (name + '.tely')
248 except EnvironmentError, (errno, strerror):
249 if not strerror == file_not_found:
250 raise
251 else:
252 try:
253 f = find_file (name + '.texi')
254 except EnvironmentError, (errno, strerror):
255 if strerror == file_not_found:
256 sys.stderr.write (name + '.{texi,tely}: ' +
257 file_not_found + '\n')
258 return (manual, d)
259 else:
260 raise
262 log.write ("Processing manual %s (%s)\n" % (f, manual))
263 read_file (f, d)
264 return (manual, d)
267 log.write ("Reading files...\n")
269 manuals = dict ([read_manual (name)
270 for name in manuals_defs.references_dict.keys ()])
272 ref_fixes = set ()
273 bad_refs_count = 0
274 fixes_count = 0
276 def add_fix (old_type, old_ref, new_type, new_ref):
277 ref_fixes.add ((old_type, old_ref, new_type, new_ref))
280 def lookup_fix (r):
281 found = []
282 for (old_type, old_ref, new_type, new_ref) in ref_fixes:
283 if r == old_ref:
284 found.append ((new_type, new_ref))
285 return found
288 def preserve_linebreak (text, linebroken):
289 if linebroken:
290 if ' ' in text:
291 text = text.replace (' ', '\n', 1)
292 n = ''
293 else:
294 n = '\n'
295 else:
296 n = ''
297 return (text, n)
300 def choose_in_numbered_list (message, string_list, sep=' ', retries=3):
301 S = set (string_list)
302 S.discard ('')
303 string_list = list (S)
304 numbered_list = sep.join ([str (j + 1) + '. ' + string_list[j]
305 for j in range (len (string_list))]) + '\n'
306 t = retries
307 while t > 0:
308 value = ''
309 stdout.write (message +
310 "(press Enter to discard and start a new search)\n")
311 input = raw_input (numbered_list)
312 if not input:
313 return ''
314 try:
315 value = string_list[int (input) - 1]
316 except IndexError:
317 stdout.write ("Error: index number out of range\n")
318 except ValueError:
319 matches = [input in v for v in string_list]
320 n = matches.count (True)
321 if n == 0:
322 stdout.write ("Error: input matches no item in the list\n")
323 elif n > 1:
324 stdout.write ("Error: ambiguous input (matches several items \
325 in the list)\n")
326 else:
327 value = string_list[matches.index (True)]
328 if value:
329 return value
330 t -= 1
331 raise InteractionError ("%d retries limit exceeded" % retries)
333 refs_count = 0
335 def check_ref (manual, file, m):
336 global fixes_count, bad_refs_count, refs_count
337 refs_count += 1
338 bad_ref = False
339 fixed = True
340 type = m.group (1)
341 original_name = m.group ('ref') or m.group ('refname')
342 name = whitespace_re.sub (' ', original_name). strip ()
343 newline_indices = manuals[manual]['newline_indices'][file]
344 line = which_line (m.start (), newline_indices)
345 linebroken = '\n' in original_name
346 original_display_name = m.group ('display')
347 next_char = m.group ('last')
348 if original_display_name: # the xref has an explicit display name
349 display_linebroken = '\n' in original_display_name
350 display_name = whitespace_re.sub (' ', original_display_name). strip ()
351 commented_out = is_commented_out \
352 (m.start (), m.end (), manuals[manual]['comments_boundaries'][file])
353 useful_fix = not outdir in file
355 # check puncuation after x-ref
356 if options.check_punctuation and not next_char in '.,;:!?':
357 stdout.write ("Warning: %s: %d: `%s': x-ref \
358 not followed by punctuation\n" % (file, line, name))
360 # validate xref
361 explicit_type = type
362 new_name = name
364 if type != 'ref' and type == manual and not commented_out:
365 if useful_fix:
366 fixed = False
367 bad_ref = True
368 stdout.write ("\n%s: %d: `%s': external %s x-ref should be internal\n"
369 % (file, line, name, type))
370 if options.auto_fix or yes_prompt ("Fix this?"):
371 type = 'ref'
373 if type == 'ref':
374 explicit_type = manual
376 if not name in manuals[explicit_type]['nodes'] and not commented_out:
377 bad_ref = True
378 fixed = False
379 stdout.write ('\n')
380 if type == 'ref':
381 stdout.write ("\e[1;31m%s: %d: `%s': wrong internal x-ref\e[0m\n"
382 % (file, line, name))
383 else:
384 stdout.write ("\e[1;31m%s: %d: `%s': wrong external `%s' x-ref\e[0m\n"
385 % (file, line, name, type))
386 # print context
387 stdout.write ('--\n' + manuals[manual]['contents'][file]
388 [newline_indices[max (0, line - 2)]:
389 newline_indices[min (line + 3,
390 len (newline_indices) - 1)]] +
391 '--\n')
393 # try to find the reference in other manuals
394 found = []
395 for k in [k for k in manuals if k != explicit_type]:
396 if name in manuals[k]['nodes']:
397 if k == manual:
398 found = ['ref']
399 stdout.write ("\e[1;32m found as internal x-ref\e[0m\n")
400 break
401 else:
402 found.append (k)
403 stdout.write ("\e[1;32m found as `%s' x-ref\e[0m\n" % k)
405 if (len (found) == 1
406 and (options.auto_fix or yes_prompt ("Fix this x-ref?"))):
407 add_fix (type, name, found[0], name)
408 type = found[0]
409 fixed = True
411 elif len (found) > 1 and useful_fix:
412 if options.interactive or options.auto_fix:
413 stdout.write ("* Several manuals contain this node name, \
414 cannot determine manual automatically.\n")
415 if options.interactive:
416 t = choose_in_numbered_list ("Choose manual for this x-ref by \
417 index number or beginning of name:\n", found)
418 if t:
419 add_fix (type, name, t, name)
420 type = t
421 fixed = True
423 if not fixed:
424 # try to find a fix already made
425 found = lookup_fix (name)
427 if len (found) == 1:
428 stdout.write ("Found one previous fix: %s `%s'\n" % found[0])
429 if options.auto_fix or yes_prompt ("Apply this fix?"):
430 type, new_name = found[0]
431 fixed = True
433 elif len (found) > 1:
434 if options.interactive or options.auto_fix:
435 stdout.write ("* Several previous fixes match \
436 this node name, cannot fix automatically.\n")
437 if options.interactive:
438 concatened = choose_in_numbered_list ("Choose new manual \
439 and x-ref by index number or beginning of name:\n", [''.join ([i[0], ' ', i[1]])
440 for i in found],
441 sep='\n')
442 if concatened:
443 type, new_name = concatenated.split (' ', 1)
444 fixed = True
446 if not fixed:
447 # all previous automatic fixing attempts failed,
448 # ask user for substring to look in node names
449 while True:
450 node_list = search_prompt ()
451 if node_list == None:
452 if options.interactive:
453 stdout.write (warn_not_fixed)
454 break
455 elif not node_list:
456 stdout.write ("No matched node names.\n")
457 else:
458 concatenated = choose_in_numbered_list ("Choose \
459 node name and manual for this x-ref by index number or beginning of name:\n", \
460 [' '.join ([i[0], i[1], '(in %s)' % i[2]])
461 for i in node_list],
462 sep='\n')
463 if concatenated:
464 t, z = concatenated.split (' ', 1)
465 new_name = z.split (' (in ', 1)[0]
466 add_fix (type, name, t, new_name)
467 type = t
468 fixed = True
469 break
471 if fixed and type == manual:
472 type = 'ref'
473 bad_refs_count += int (bad_ref)
474 if bad_ref and not useful_fix:
475 stdout.write ("*** Warning: this file is automatically generated, \
476 please fix the code source instead of generated documentation.\n")
478 # compute returned string
479 if new_name == name:
480 if bad_ref and (options.interactive or options.auto_fix):
481 # only the type of the ref was fixed
482 fixes_count += int (fixed)
483 if original_display_name:
484 return ('@%snamed{%s,%s}' % (type, original_name, original_display_name)) + next_char
485 else:
486 return ('@%s{%s}' % (type, original_name)) + next_char
487 else:
488 fixes_count += int (fixed)
489 (ref, n) = preserve_linebreak (new_name, linebroken)
490 if original_display_name:
491 if bad_ref:
492 stdout.write ("Current display name is `%s'\n")
493 display_name = raw_input \
494 ("Enter a new display name or press enter to keep the existing name:\n") \
495 or display_name
496 (display_name, n) = preserve_linebreak (display_name, display_linebroken)
497 else:
498 display_name = original_display_name
499 return ('@%snamed{%s,%s}' % (type, ref, display_name)) + \
500 next_char + n
501 else:
502 return ('@%s{%s}' % (type, ref)) + next_char + n
505 log.write ("Checking cross-references...\n")
507 try:
508 for key in manuals:
509 for file in manuals[key]['contents']:
510 s = ref_re.sub (lambda m: check_ref (key, file, m),
511 manuals[key]['contents'][file])
512 if s != manuals[key]['contents'][file]:
513 open (file, 'w').write (s)
514 except KeyboardInterrupt:
515 log.write ("Operation interrupted, exiting.\n")
516 sys.exit (2)
517 except InteractionError, instance:
518 log.write ("Operation refused by user: %s\nExiting.\n" % instance)
519 sys.exit (3)
521 log.write ("\e[1;36mDone: %d x-refs found, %d bad x-refs found, fixed %d.\e[0m\n" %
522 (refs_count, bad_refs_count, fixes_count))