scripts/auxiliar/check_texi_refs.py

   1 #!/usr/bin/env python
   2
   3 """
   4 check_texi_refs.py
   5 Interactive Texinfo cross-references checking and fixing tool
   6
   7 """
   8
   9
  10 import sys
  11 import re
  12 import os
  13 import optparse
  14 import imp
  15
  16 outdir = 'out-www'
  17
  18 log = sys.stderr
  19 stdout = sys.stdout
  20
  21 file_not_found = 'file not found in include path'
  22
  23 warn_not_fixed = '*** Warning: this broken x-ref has not been fixed!\n'
  24
  25 opt_parser = optparse.OptionParser (usage='check_texi_refs.py [OPTION]... FILE',
  26                                     description='''Check and fix \
  27 cross-references in a collection of Texinfo
  28 documents heavily cross-referenced each other.
  29 ''')
  30
  31 opt_parser.add_option ('-a', '--auto-fix',
  32                        help="Automatically fix cross-references whenever \
  33 it is possible",
  34                        action='store_true',
  35                        dest='auto_fix',
  36                        default=False)
  37
  38 opt_parser.add_option ('-b', '--batch',
  39                        help="Do not run interactively",
  40                        action='store_false',
  41                        dest='interactive',
  42                        default=True)
  43
  44 opt_parser.add_option ('-c', '--check-comments',
  45                        help="Also check commented out x-refs",
  46                        action='store_true',
  47                        dest='check_comments',
  48                        default=False)
  49
  50 opt_parser.add_option ('-p', '--check-punctuation',
  51                        help="Check punctuation after x-refs",
  52                        action='store_true',
  53                        dest='check_punctuation',
  54                        default=False)
  55
  56 opt_parser.add_option ("-I", '--include', help="add DIR to include path",
  57                        metavar="DIR",
  58                        action='append', dest='include_path',
  59                        default=[])
  60
  61 (options, files) = opt_parser.parse_args ()
  62 options.include_path.append (os.path.abspath (os.getcwd ()))
  63
  64 class InteractionError (Exception):
  65     pass
  66
  67
  68 manuals_defs = imp.load_source ('manuals_defs', files[0])
  69 manuals = {}
  70
  71 def find_file (name, prior_directory='.'):
  72     p = os.path.join (prior_directory, name)
  73     out_p = os.path.join (prior_directory, outdir, name)
  74     if os.path.isfile (p):
  75         return p
  76     elif os.path.isfile (out_p):
  77         return out_p
  78
  79     # looking for file in include_path
  80     for d in options.include_path:
  81         p = os.path.join (d, name)
  82         if os.path.isfile (p):
  83             return p
  84
  85     # file not found in include_path: looking in `outdir' subdirs
  86     for d in options.include_path:
  87         p = os.path.join (d, outdir, name)
  88         if os.path.isfile (p):
  89             return p
  90
  91     raise EnvironmentError (1, file_not_found, name)
  92
  93
  94 exit_code = 0
  95
  96 def set_exit_code (n):
  97     global exit_code
  98     exit_code = max (exit_code, n)
  99
 100
 101 if options.interactive:
 102     try:
 103         import readline
 104     except:
 105         pass
 106
 107     def yes_prompt (question, default=False, retries=3):
 108         d = {True: 'y', False: 'n'}.get (default, False)
 109         while retries:
 110             a = raw_input ('%s [default: %s]' % (question, d) + '\n')
 111             if a.lower ().startswith ('y'):
 112                 return True
 113             if a.lower ().startswith ('n'):
 114                 return False
 115             if a == '' or retries < 0:
 116                 return default
 117             stdout.write ("Please answer yes or no.\n")
 118             retries -= 1
 119
 120     def search_prompt ():
 121         """Prompt user for a substring to look for in node names.
 122
 123 If user input is empty or matches no node name, return None,
 124 otherwise return a list of (manual, node name, file) tuples.
 125
 126 """
 127         substring = raw_input ("Enter a substring to search in node names \
 128 (press Enter to skip this x-ref):\n")
 129         if not substring:
 130             return None
 131         substring = substring.lower ()
 132         matches = []
 133         for k in manuals:
 134             matches += [(k, node, manuals[k]['nodes'][node][0])
 135                         for node in manuals[k]['nodes']
 136                         if substring in node.lower ()]
 137         return matches
 138
 139 else:
 140     def yes_prompt (question, default=False, retries=3):
 141         return default
 142
 143     def search_prompt ():
 144         return None
 145
 146
 147 ref_re = re.compile \
 148     ('@((?:ressay|rgloss|rinternals|rlearning|rslr|rprogram|ruser|ref)|named)(?:\\{(?P<ref>[^,\\\\\\}]+?)|\
 149 named\\{(?P<refname>[^,\\\\]+?),(?P<display>[^,\\\\\\}]+?))\\}(?P<last>.)',
 150      re.DOTALL)
 151 node_include_re = re.compile (r'(?m)^@(node|include)\s+(.+?)$')
 152
 153 whitespace_re = re.compile (r'\s+')
 154 line_start_re = re.compile ('(?m)^')
 155
 156 def which_line (index, newline_indices):
 157     """Calculate line number of a given string index
 158
 159 Return line number of string index index, where
 160 newline_indices is an ordered iterable of all newline indices.
 161 """
 162     inf = 0
 163     sup = len (newline_indices) - 1
 164     n = len (newline_indices)
 165     while inf + 1 != sup:
 166         m = (inf + sup) / 2
 167         if index >= newline_indices [m]:
 168             inf = m
 169         else:
 170             sup = m
 171     return inf + 1
 172
 173
 174 comments_re = re.compile ('(?<!@)(@c(?:omment)? \
 175 .*?\\n|^@ignore\\n.*?\\n@end ignore\\n)', re.M | re.S)
 176
 177 def calc_comments_boundaries (texinfo_doc):
 178     return [(m.start (), m.end ()) for m in comments_re.finditer (texinfo_doc)]
 179
 180
 181 def is_commented_out (start, end, comments_boundaries):
 182     for k in range (len (comments_boundaries)):
 183         if (start > comments_boundaries[k][0]
 184             and end <= comments_boundaries[k][1]):
 185             return True
 186         elif end <= comments_boundaries[k][0]:
 187             return False
 188     return False
 189
 190
 191 def read_file (f, d):
 192     s = open (f).read ()
 193     base = os.path.basename (f)
 194     dir = os.path.dirname (f)
 195
 196     d['contents'][f] = s
 197
 198     d['newline_indices'][f] = [m.end () for m in line_start_re.finditer (s)]
 199     if options.check_comments:
 200         d['comments_boundaries'][f] = []
 201     else:
 202         d['comments_boundaries'][f] = calc_comments_boundaries (s)
 203
 204     for m in node_include_re.finditer (s):
 205         if m.group (1) == 'node':
 206             line = which_line (m.start (), d['newline_indices'][f])
 207             d['nodes'][m.group (2)] = (f, line)
 208
 209         elif m.group (1) == 'include':
 210             try:
 211                 p = find_file (m.group (2), dir)
 212             except EnvironmentError, (errno, strerror):
 213                 if strerror == file_not_found:
 214                     continue
 215                 else:
 216                     raise
 217             read_file (p, d)
 218
 219
 220 def read_manual (name):
 221     """Look for all node names and cross-references in a Texinfo document
 222
 223 Return a (manual, dictionary) tuple where manual is the cross-reference
 224 macro name defined by references_dict[name], and dictionary
 225 has the following keys:
 226
 227   'nodes' is a dictionary of `node name':(file name, line number),
 228
 229   'contents' is a dictionary of file:`full file contents',
 230
 231   'newline_indices' is a dictionary of
 232 file:[list of beginning-of-line string indices],
 233
 234   'comments_boundaries' is a list of (start, end) tuples,
 235 which contain string indices of start and end of each comment.
 236
 237 Included files that can be found in the include path are processed too.
 238
 239 """
 240     d = {}
 241     d['nodes'] = {}
 242     d['contents'] = {}
 243     d['newline_indices'] = {}
 244     d['comments_boundaries'] = {}
 245     manual = manuals_defs.references_dict.get (name, '')
 246     try:
 247         f = find_file (name + '.tely')
 248     except EnvironmentError, (errno, strerror):
 249         if not strerror == file_not_found:
 250             raise
 251         else:
 252             try:
 253                 f = find_file (name + '.texi')
 254             except EnvironmentError, (errno, strerror):
 255                 if strerror == file_not_found:
 256                     sys.stderr.write (name + '.{texi,tely}: ' +
 257                                       file_not_found + '\n')
 258                     return (manual, d)
 259                 else:
 260                     raise
 261
 262     log.write ("Processing manual %s (%s)\n" % (f, manual))
 263     read_file (f, d)
 264     return (manual, d)
 265
 266
 267 log.write ("Reading files...\n")
 268
 269 manuals = dict ([read_manual (name)
 270                  for name in manuals_defs.references_dict.keys ()])
 271
 272 ref_fixes = set ()
 273 bad_refs_count = 0
 274 fixes_count = 0
 275
 276 def add_fix (old_type, old_ref, new_type, new_ref):
 277     ref_fixes.add ((old_type, old_ref, new_type, new_ref))
 278
 279
 280 def lookup_fix (r):
 281     found = []
 282     for (old_type, old_ref, new_type, new_ref) in ref_fixes:
 283         if r == old_ref:
 284             found.append ((new_type, new_ref))
 285     return found
 286
 287
 288 def preserve_linebreak (text, linebroken):
 289     if linebroken:
 290         if ' ' in text:
 291             text = text.replace (' ', '\n', 1)
 292             n = ''
 293         else:
 294             n = '\n'
 295     else:
 296         n = ''
 297     return (text, n)
 298
 299
 300 def choose_in_numbered_list (message, string_list, sep=' ', retries=3):
 301     S = set (string_list)
 302     S.discard ('')
 303     string_list = list (S)
 304     numbered_list = sep.join ([str (j + 1) + '. ' + string_list[j]
 305                                for j in range (len (string_list))]) + '\n'
 306     t = retries
 307     while t > 0:
 308         value = ''
 309         stdout.write (message +
 310                       "(press Enter to discard and start a new search)\n")
 311         input = raw_input (numbered_list)
 312         if not input:
 313             return ''
 314         try:
 315             value = string_list[int (input) - 1]
 316         except IndexError:
 317             stdout.write ("Error: index number out of range\n")
 318         except ValueError:
 319             matches = [input in v for v in string_list]
 320             n = matches.count (True)
 321             if n == 0:
 322                 stdout.write ("Error: input matches no item in the list\n")
 323             elif n > 1:
 324                 stdout.write ("Error: ambiguous input (matches several items \
 325 in the list)\n")
 326             else:
 327                 value = string_list[matches.index (True)]
 328         if value:
 329             return value
 330         t -= 1
 331     raise InteractionError ("%d retries limit exceeded" % retries)
 332
 333 refs_count = 0
 334
 335 def check_ref (manual, file, m):
 336     global fixes_count, bad_refs_count, refs_count
 337     refs_count += 1
 338     bad_ref = False
 339     fixed = True
 340     type = m.group (1)
 341     original_name = m.group ('ref') or m.group ('refname')
 342     name = whitespace_re.sub (' ', original_name). strip ()
 343     newline_indices = manuals[manual]['newline_indices'][file]
 344     line = which_line (m.start (), newline_indices)
 345     linebroken = '\n' in original_name
 346     original_display_name = m.group ('display')
 347     next_char = m.group ('last')
 348     if original_display_name: # the xref has an explicit display name
 349         display_linebroken = '\n' in original_display_name
 350         display_name = whitespace_re.sub (' ', original_display_name). strip ()
 351     commented_out = is_commented_out \
 352         (m.start (), m.end (), manuals[manual]['comments_boundaries'][file])
 353     useful_fix = not outdir in file
 354
 355     # check puncuation after x-ref
 356     if options.check_punctuation and not next_char in '.,;:!?':
 357         stdout.write ("Warning: %s: %d: `%s': x-ref \
 358 not followed by punctuation\n" % (file, line, name))
 359
 360     # validate xref
 361     explicit_type = type
 362     new_name = name
 363
 364     if type != 'ref' and type == manual and not commented_out:
 365         if useful_fix:
 366             fixed = False
 367             bad_ref = True
 368             stdout.write ("\n%s: %d: `%s': external %s x-ref should be internal\n"
 369                           % (file, line, name, type))
 370             if options.auto_fix or yes_prompt ("Fix this?"):
 371                 type = 'ref'
 372
 373     if type == 'ref':
 374         explicit_type = manual
 375
 376     if not name in manuals[explicit_type]['nodes'] and not commented_out:
 377         bad_ref = True
 378         fixed = False
 379         stdout.write ('\n')
 380         if type == 'ref':
 381             stdout.write ("\e[1;31m%s: %d: `%s': wrong internal x-ref\e[0m\n"
 382                           % (file, line, name))
 383         else:
 384             stdout.write ("\e[1;31m%s: %d: `%s': wrong external `%s' x-ref\e[0m\n"
 385                           % (file, line, name, type))
 386         # print context
 387         stdout.write ('--\n' + manuals[manual]['contents'][file]
 388                       [newline_indices[max (0, line - 2)]:
 389                        newline_indices[min (line + 3,
 390                                             len (newline_indices) - 1)]] +
 391                       '--\n')
 392
 393         # try to find the reference in other manuals
 394         found = []
 395         for k in [k for k in manuals if k != explicit_type]:
 396             if name in manuals[k]['nodes']:
 397                 if k == manual:
 398                     found = ['ref']
 399                     stdout.write ("\e[1;32m  found as internal x-ref\e[0m\n")
 400                     break
 401                 else:
 402                     found.append (k)
 403                     stdout.write ("\e[1;32m  found as `%s' x-ref\e[0m\n" % k)
 404
 405         if (len (found) == 1
 406             and (options.auto_fix or yes_prompt ("Fix this x-ref?"))):
 407             add_fix (type, name, found[0], name)
 408             type = found[0]
 409             fixed = True
 410
 411         elif len (found) > 1 and useful_fix:
 412             if options.interactive or options.auto_fix:
 413                 stdout.write ("* Several manuals contain this node name, \
 414 cannot determine manual automatically.\n")
 415             if options.interactive:
 416                 t = choose_in_numbered_list ("Choose manual for this x-ref by \
 417 index number or beginning of name:\n", found)
 418                 if t:
 419                     add_fix (type, name, t, name)
 420                     type = t
 421                     fixed = True
 422
 423         if not fixed:
 424             # try to find a fix already made
 425             found = lookup_fix (name)
 426
 427             if len (found) == 1:
 428                 stdout.write ("Found one previous fix: %s `%s'\n" % found[0])
 429                 if options.auto_fix or yes_prompt ("Apply this fix?"):
 430                     type, new_name = found[0]
 431                     fixed = True
 432
 433             elif len (found) > 1:
 434                 if options.interactive or options.auto_fix:
 435                     stdout.write ("* Several previous fixes match \
 436 this node name, cannot fix automatically.\n")
 437                 if options.interactive:
 438                     concatened = choose_in_numbered_list ("Choose new manual \
 439 and x-ref by index number or beginning of name:\n", [''.join ([i[0], ' ', i[1]])
 440                                                      for i in found],
 441                                                     sep='\n')
 442                     if concatened:
 443                         type, new_name = concatenated.split (' ', 1)
 444                         fixed = True
 445
 446         if not fixed:
 447             # all previous automatic fixing attempts failed,
 448             # ask user for substring to look in node names
 449             while True:
 450                 node_list = search_prompt ()
 451                 if node_list == None:
 452                     if options.interactive:
 453                         stdout.write (warn_not_fixed)
 454                     break
 455                 elif not node_list:
 456                     stdout.write ("No matched node names.\n")
 457                 else:
 458                     concatenated = choose_in_numbered_list ("Choose \
 459 node name and manual for this x-ref by index number or beginning of name:\n", \
 460                             [' '.join ([i[0], i[1], '(in %s)' % i[2]])
 461                              for i in node_list],
 462                                                             sep='\n')
 463                     if concatenated:
 464                         t, z = concatenated.split (' ', 1)
 465                         new_name = z.split (' (in ', 1)[0]
 466                         add_fix (type, name, t, new_name)
 467                         type = t
 468                         fixed = True
 469                         break
 470
 471     if fixed and type == manual:
 472         type = 'ref'
 473     bad_refs_count += int (bad_ref)
 474     if bad_ref and not useful_fix:
 475         stdout.write ("*** Warning: this file is automatically generated, \
 476 please fix the code source instead of generated documentation.\n")
 477
 478     # compute returned string
 479     if new_name == name:
 480         if bad_ref and (options.interactive or options.auto_fix):
 481             # only the type of the ref was fixed
 482             fixes_count += int (fixed)
 483         if original_display_name:
 484             return ('@%snamed{%s,%s}' % (type, original_name, original_display_name)) + next_char
 485         else:
 486             return ('@%s{%s}' % (type, original_name)) + next_char
 487     else:
 488         fixes_count += int (fixed)
 489         (ref, n) = preserve_linebreak (new_name, linebroken)
 490         if original_display_name:
 491             if bad_ref:
 492                 stdout.write ("Current display name is `%s'\n")
 493                 display_name = raw_input \
 494                     ("Enter a new display name or press enter to keep the existing name:\n") \
 495                     or display_name
 496                 (display_name, n) = preserve_linebreak (display_name, display_linebroken)
 497             else:
 498                 display_name = original_display_name
 499             return ('@%snamed{%s,%s}' % (type, ref, display_name)) + \
 500                 next_char + n
 501         else:
 502             return ('@%s{%s}' % (type, ref)) + next_char + n
 503
 504
 505 log.write ("Checking cross-references...\n")
 506
 507 try:
 508     for key in manuals:
 509         for file in manuals[key]['contents']:
 510             s = ref_re.sub (lambda m: check_ref (key, file, m),
 511                             manuals[key]['contents'][file])
 512             if s != manuals[key]['contents'][file]:
 513                 open (file, 'w').write (s)
 514 except KeyboardInterrupt:
 515     log.write ("Operation interrupted, exiting.\n")
 516     sys.exit (2)
 517 except InteractionError, instance:
 518     log.write ("Operation refused by user: %s\nExiting.\n" % instance)
 519     sys.exit (3)
 520
 521 log.write ("\e[1;36mDone: %d x-refs found, %d bad x-refs found, fixed %d.\e[0m\n" %
 522            (refs_count, bad_refs_count, fixes_count))