5 Interactive Texinfo cross-references checking and fixing tool
21 file_not_found
= 'file not found in include path'
23 warn_not_fixed
= '*** Warning: this broken x-ref has not been fixed!\n'
25 opt_parser
= optparse
.OptionParser (usage
='check_texi_refs.py [OPTION]... FILE',
26 description
='''Check and fix \
27 cross-references in a collection of Texinfo
28 documents heavily cross-referenced each other.
31 opt_parser
.add_option ('-a', '--auto-fix',
32 help="Automatically fix cross-references whenever \
38 opt_parser
.add_option ('-b', '--batch',
39 help="Do not run interactively",
44 opt_parser
.add_option ('-c', '--check-comments',
45 help="Also check commented out x-refs",
47 dest
='check_comments',
50 opt_parser
.add_option ('-p', '--check-punctuation',
51 help="Check punctuation after x-refs",
53 dest
='check_punctuation',
56 opt_parser
.add_option ("-I", '--include', help="add DIR to include path",
58 action
='append', dest
='include_path',
59 default
=[os
.path
.abspath (os
.getcwd ())])
61 (options
, files
) = opt_parser
.parse_args ()
63 class InteractionError (Exception):
67 manuals_defs
= imp
.load_source ('manuals_defs', files
[0])
70 def find_file (name
, prior_directory
='.'):
71 p
= os
.path
.join (prior_directory
, name
)
72 out_p
= os
.path
.join (prior_directory
, outdir
, name
)
73 if os
.path
.isfile (p
):
75 elif os
.path
.isfile (out_p
):
78 # looking for file in include_path
79 for d
in options
.include_path
:
80 p
= os
.path
.join (d
, name
)
81 if os
.path
.isfile (p
):
84 # file not found in include_path: looking in `outdir' subdirs
85 for d
in options
.include_path
:
86 p
= os
.path
.join (d
, outdir
, name
)
87 if os
.path
.isfile (p
):
90 raise EnvironmentError (1, file_not_found
, name
)
95 def set_exit_code (n
):
97 exit_code
= max (exit_code
, n
)
100 if options
.interactive
:
106 def yes_prompt (question
, default
=False, retries
=3):
107 d
= {True: 'y', False: 'n'}.get (default
, False)
109 a
= raw_input ('%s [default: %s]' % (question
, d
) + '\n')
110 if a
.lower ().startswith ('y'):
112 if a
.lower ().startswith ('n'):
114 if a
== '' or retries
< 0:
116 stdout
.write ("Please answer yes or no.\n")
119 def search_prompt ():
120 """Prompt user for a substring to look for in node names.
122 If user input is empty or matches no node name, return None,
123 otherwise return a list of (manual, node name, file) tuples.
126 substring
= raw_input ("Enter a substring to search in node names \
127 (press Enter to skip this x-ref):\n")
130 substring
= substring
.lower ()
133 matches
+= [(k
, node
, manuals
[k
]['nodes'][node
][0])
134 for node
in manuals
[k
]['nodes']
135 if substring
in node
.lower ()]
139 def yes_prompt (question
, default
=False, retries
=3):
142 def search_prompt ():
146 ref_re
= re
.compile \
147 ('@(ref|ruser|rlearning|rprogram|rglos)(?:\\{(?P<ref>[^,\\\\\\}]+?)|\
148 named\\{(?P<refname>[^,\\\\]+?),(?P<display>[^,\\\\\\}]+?))\\}(?P<last>.)',
150 node_include_re
= re
.compile (r
'(?m)^@(node|include)\s+(.+?)$')
152 whitespace_re
= re
.compile (r
'\s+')
153 line_start_re
= re
.compile ('(?m)^')
155 def which_line (index
, newline_indices
):
156 """Calculate line number of a given string index
158 Return line number of string index index, where
159 newline_indices is an ordered iterable of all newline indices.
162 sup
= len (newline_indices
) - 1
163 n
= len (newline_indices
)
164 while inf
+ 1 != sup
:
166 if index
>= newline_indices
[m
]:
173 comments_re
= re
.compile ('(?<!@)(@c(?:omment)? \
174 .*?\\n|^@ignore\\n.*?\\n@end ignore\\n)', re
.M | re
.S
)
176 def calc_comments_boundaries (texinfo_doc
):
177 return [(m
.start (), m
.end ()) for m
in comments_re
.finditer (texinfo_doc
)]
180 def is_commented_out (start
, end
, comments_boundaries
):
181 for k
in range (len (comments_boundaries
)):
182 if (start
> comments_boundaries
[k
][0]
183 and end
<= comments_boundaries
[k
][1]):
185 elif end
<= comments_boundaries
[k
][0]:
190 def read_file (f
, d
):
192 base
= os
.path
.basename (f
)
193 dir = os
.path
.dirname (f
)
197 d
['newline_indices'][f
] = [m
.end () for m
in line_start_re
.finditer (s
)]
198 if options
.check_comments
:
199 d
['comments_boundaries'][f
] = []
201 d
['comments_boundaries'][f
] = calc_comments_boundaries (s
)
203 for m
in node_include_re
.finditer (s
):
204 if m
.group (1) == 'node':
205 line
= which_line (m
.start (), d
['newline_indices'][f
])
206 d
['nodes'][m
.group (2)] = (f
, line
)
208 elif m
.group (1) == 'include':
210 p
= find_file (m
.group (2), dir)
211 except EnvironmentError, (errno
, strerror
):
212 if strerror
== file_not_found
:
219 def read_manual (name
):
220 """Look for all node names and cross-references in a Texinfo document
222 Return a (manual, dictionary) tuple where manual is the cross-reference
223 macro name defined by references_dict[name], and dictionary
224 has the following keys:
226 'nodes' is a dictionary of `node name':(file name, line number),
228 'contents' is a dictionary of file:`full file contents',
230 'newline_indices' is a dictionary of
231 file:[list of beginning-of-line string indices],
233 'comments_boundaries' is a list of (start, end) tuples,
234 which contain string indices of start and end of each comment.
236 Included files that can be found in the include path are processed too.
242 d
['newline_indices'] = {}
243 d
['comments_boundaries'] = {}
244 manual
= manuals_defs
.references_dict
.get (name
, '')
246 f
= find_file (name
+ '.tely')
247 except EnvironmentError, (errno
, strerror
):
248 if not strerror
== file_not_found
:
252 f
= find_file (name
+ '.texi')
253 except EnvironmentError, (errno
, strerror
):
254 if strerror
== file_not_found
:
255 sys
.stderr
.write (name
+ '.{texi,tely}: ' +
256 file_not_found
+ '\n')
261 log
.write ("Processing manual %s (%s)\n" % (f
, manual
))
266 log
.write ("Reading files...\n")
268 manuals
= dict ([read_manual (name
)
269 for name
in manuals_defs
.references_dict
.keys ()])
275 def add_fix (old_type
, old_ref
, new_type
, new_ref
):
276 ref_fixes
.add ((old_type
, old_ref
, new_type
, new_ref
))
281 for (old_type
, old_ref
, new_type
, new_ref
) in ref_fixes
:
283 found
.append ((new_type
, new_ref
))
287 def preserve_linebreak (text
, linebroken
):
290 text
= text
.replace (' ', '\n', 1)
299 def choose_in_numbered_list (message
, string_list
, sep
=' ', retries
=3):
300 S
= set (string_list
)
302 string_list
= list (S
)
303 numbered_list
= sep
.join ([str (j
+ 1) + '. ' + string_list
[j
]
304 for j
in range (len (string_list
))]) + '\n'
308 stdout
.write (message
+
309 "(press Enter to discard and start a new search)\n")
310 input = raw_input (numbered_list
)
314 value
= string_list
[int (input) - 1]
316 stdout
.write ("Error: index number out of range\n")
318 matches
= [input in v
for v
in string_list
]
319 n
= matches
.count (True)
321 stdout
.write ("Error: input matches no item in the list\n")
323 stdout
.write ("Error: ambiguous input (matches several items \
326 value
= string_list
[matches
.index (True)]
330 raise InteractionError ("%d retries limit exceeded" % retries
)
334 def check_ref (manual
, file, m
):
335 global fixes_count
, bad_refs_count
, refs_count
340 original_name
= m
.group ('ref') or m
.group ('refname')
341 name
= whitespace_re
.sub (' ', original_name
). strip ()
342 newline_indices
= manuals
[manual
]['newline_indices'][file]
343 line
= which_line (m
.start (), newline_indices
)
344 linebroken
= '\n' in original_name
345 original_display_name
= m
.group ('display')
346 next_char
= m
.group ('last')
347 if original_display_name
: # the xref has an explicit display name
348 display_linebroken
= '\n' in original_display_name
349 display_name
= whitespace_re
.sub (' ', original_display_name
). strip ()
350 commented_out
= is_commented_out \
351 (m
.start (), m
.end (), manuals
[manual
]['comments_boundaries'][file])
352 useful_fix
= not outdir
in file
354 # check puncuation after x-ref
355 if options
.check_punctuation
and not next_char
in '.,;:!?':
356 stdout
.write ("Warning: %s: %d: `%s': x-ref \
357 not followed by punctuation\n" % (file, line
, name
))
363 if type != 'ref' and type == manual
and not commented_out
:
367 stdout
.write ("\n%s: %d: `%s': external %s x-ref should be internal\n"
368 % (file, line
, name
, type))
369 if options
.auto_fix
or yes_prompt ("Fix this?"):
373 explicit_type
= manual
375 if not name
in manuals
[explicit_type
]['nodes'] and not commented_out
:
380 stdout
.write ("\e[1;31m%s: %d: `%s': wrong internal x-ref\e[0m\n"
381 % (file, line
, name
))
383 stdout
.write ("\e[1;31m%s: %d: `%s': wrong external `%s' x-ref\e[0m\n"
384 % (file, line
, name
, type))
386 stdout
.write ('--\n' + manuals
[manual
]['contents'][file]
387 [newline_indices
[max (0, line
- 2)]:
388 newline_indices
[min (line
+ 3,
389 len (newline_indices
) - 1)]] +
392 # try to find the reference in other manuals
394 for k
in [k
for k
in manuals
if k
!= explicit_type
]:
395 if name
in manuals
[k
]['nodes']:
398 stdout
.write ("\e[1;32m found as internal x-ref\e[0m\n")
402 stdout
.write ("\e[1;32m found as `%s' x-ref\e[0m\n" % k
)
405 and (options
.auto_fix
or yes_prompt ("Fix this x-ref?"))):
406 add_fix (type, name
, found
[0], name
)
410 elif len (found
) > 1 and useful_fix
:
411 if options
.interactive
or options
.auto_fix
:
412 stdout
.write ("* Several manuals contain this node name, \
413 cannot determine manual automatically.\n")
414 if options
.interactive
:
415 t
= choose_in_numbered_list ("Choose manual for this x-ref by \
416 index number or beginning of name:\n", found
)
418 add_fix (type, name
, t
, name
)
423 # try to find a fix already made
424 found
= lookup_fix (name
)
427 stdout
.write ("Found one previous fix: %s `%s'\n" % found
[0])
428 if options
.auto_fix
or yes_prompt ("Apply this fix?"):
429 type, new_name
= found
[0]
432 elif len (found
) > 1:
433 if options
.interactive
or options
.auto_fix
:
434 stdout
.write ("* Several previous fixes match \
435 this node name, cannot fix automatically.\n")
436 if options
.interactive
:
437 concatened
= choose_in_numbered_list ("Choose new manual \
438 and x-ref by index number or beginning of name:\n", [''.join ([i
[0], ' ', i
[1]])
442 type, new_name
= concatenated
.split (' ', 1)
446 # all previous automatic fixing attempts failed,
447 # ask user for substring to look in node names
449 node_list
= search_prompt ()
450 if node_list
== None:
451 if options
.interactive
:
452 stdout
.write (warn_not_fixed
)
455 stdout
.write ("No matched node names.\n")
457 concatenated
= choose_in_numbered_list ("Choose \
458 node name and manual for this x-ref by index number or beginning of name:\n", \
459 [' '.join ([i
[0], i
[1], '(in %s)' % i
[2]])
463 t
, z
= concatenated
.split (' ', 1)
464 new_name
= z
.split (' (in ', 1)[0]
465 add_fix (type, name
, t
, new_name
)
470 if fixed
and type == manual
:
472 bad_refs_count
+= int (bad_ref
)
473 if bad_ref
and not useful_fix
:
474 stdout
.write ("*** Warning: this file is automatically generated, \
475 please fix the code source instead of generated documentation.\n")
477 # compute returned string
479 if bad_ref
and (options
.interactive
or options
.auto_fix
):
480 # only the type of the ref was fixed
481 fixes_count
+= int (fixed
)
482 if original_display_name
:
483 return ('@%snamed{%s,%s}' % (type, original_name
, original_display_name
)) + next_char
485 return ('@%s{%s}' % (type, original_name
)) + next_char
487 fixes_count
+= int (fixed
)
488 (ref
, n
) = preserve_linebreak (new_name
, linebroken
)
489 if original_display_name
:
491 stdout
.write ("Current display name is `%s'\n")
492 display_name
= raw_input \
493 ("Enter a new display name or press enter to keep the existing name:\n") \
495 (display_name
, n
) = preserve_linebreak (display_name
, display_linebroken
)
497 display_name
= original_display_name
498 return ('@%snamed{%s,%s}' % (type, ref
, display_name
)) + \
501 return ('@%s{%s}' % (type, ref
)) + next_char
+ n
504 log
.write ("Checking cross-references...\n")
508 for file in manuals
[key
]['contents']:
509 s
= ref_re
.sub (lambda m
: check_ref (key
, file, m
),
510 manuals
[key
]['contents'][file])
511 if s
!= manuals
[key
]['contents'][file]:
512 open (file, 'w').write (s
)
513 except KeyboardInterrupt:
514 log
.write ("Operation interrupted, exiting.\n")
516 except InteractionError
, instance
:
517 log
.write ("Operation refused by user: %s\nExiting.\n" % instance
)
520 log
.write ("\e[1;36mDone: %d x-refs found, %d bad x-refs found, fixed %d.\e[0m\n" %
521 (refs_count
, bad_refs_count
, fixes_count
))